R代码9-单因素Cox和多因素Cox回归分析

首席躺平官 2021-07-29 16:13:42 阅读: 3339
rm(list=ls())
library("survival")

setwd("D:/TCGA/")
library("plyr")

#计算函数
func_unicox <- function(x){
  FML <- as.formula(paste0('coxData~',x))
  GCox <- coxph(FML, data = surData)
  GSum <- summary(GCox)
  HR <- round(GSum$coefficients[,2],3)
  PValue <-GSum$coefficients[,5]
  CI <- paste0(round(GSum$conf.int[,3:4],3),collapse = '-')
  unicox <- data.frame(
    'var' = x,
    'Hazard Ratio' = HR,
    'CI95' = CI,
    'P Value' = PValue
  )
  return(unicox)
}

#
#cancerList = c(
#  "ACC","BLCA","BRCA","CESC","CHOL",
#  "COAD","DLBC","ESCA","GBM","HNSC",
#  "KICH","KIRP","LAML","LGG",
#  "LIHC","LUAD","LUSC","MESO","OV",
#  "PAAD","PCPG","PRAD","READ","SARC",
#  "SKCM","STAD","TGCT","THCA","THYM",
#  "UCEC","UCS","UVM"
#)

cancerList = c(
  "KICH"
)


ensg2symbol = read.csv("geneType.csv",header = T)
for(cancer in cancerList){
  #cancer="DLBC"
  print(cancer)
  mRNA = read.csv(paste0("data/",cancer,".csv"),header = T, row.names = 1)
  tumor_mRNA = mRNA[,as.numeric(substr(colnames(mRNA),14,15))<11]
  dim(tumor_mRNA)
  row.names(tumor_mRNA) = substr(row.names(tumor_mRNA),1,15)
  tumor_mRNA_median = data.frame("ense"=substr(row.names(tumor_mRNA),1,15),"median"=apply(tumor_mRNA, 1, median))
  
  tumor_mRNA = log2(tumor_mRNA+0.1)
  
  tumor_mRNA = as.data.frame(t(tumor_mRNA))
  tumor_mRNA$tcgaid=substr(row.names(tumor_mRNA),1,12)
  dim(tumor_mRNA)
  tumor_mRNA = tumor_mRNA[!duplicated(tumor_mRNA$tcgaid),]
  dim(tumor_mRNA)
  row.names(tumor_mRNA)=tumor_mRNA$tcgaid
  
  
  clinical = read.csv(paste0("clinical/",cancer,".csv"),header = T, row.names = 1, stringsAsFactors = F)
  dim(clinical)
  clinical$vital_status = toupper(as.character(clinical$vital_status))
  clinical$submitter_id = gsub("-",".",clinical$submitter_id)
  clinical = clinical[clinical$vital_status =="DEAD" | clinical$vital_status=="ALIVE",]
  OS = data.frame("tcgaid"=clinical$submitter_id, "status"=ifelse(clinical$vital_status=="DEAD",1,0), "time"=ifelse(clinical$vital_status=="DEAD",as.numeric(clinical$days_to_death), as.numeric(clinical$days_to_last_follow_up)))
  OS = OS[!is.na(OS$time) & OS$time!="--" & OS$time!="",]
  #剔除少于30天的数据
  OS = OS[OS$time>30,]
  dim(OS)
  dim(tumor_mRNA)
  surData = merge(OS, tumor_mRNA, by="tcgaid", all=T)
  dim(surData)
  
  coxData = Surv(time =surData$time , event = surData$status )
  
  
  varCox =c(colnames(surData)[4:ncol(surData)])
  
  uniData <- lapply(varCox, func_unicox)
  uniData <- ldply(uniData, data.frame)
  uniData$var = substr(uniData$var,1,15)
  
  uniResult = merge(uniData, ensg2symbol, by.x="var", by.y="ensg", all.x = TRUE)
  
  uniResult = merge(uniResult, tumor_mRNA_median, by.x="var",by.y="ense", all.x=T)
  write.csv(uniResult,file = paste0("unicox/",cancer,".csv"), row.names = F)
}

邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2024 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号