用ctree进行引导和递归划分分析

3z6pesqy  于 2023-05-26  发布在  其他
关注(0)|答案(1)|浏览(100)

我目前正在努力递归分区和一些数据的打包/引导。由于数据是保密的,我提供了一个使用“GBSG 2”数据的可重现示例。本质上,我目前正试图用我自己的相同患者群体的数据复制最近发表在《临床肿瘤学杂志》(https://ascopubs.org/doi/abs/10.1200/JCO.22.02222)上的一篇文章。
我附上了他们的方法部分的打印和补充表,这基本上是我希望最终与x1c 0d1xx 1c 1d 1xx 1c 2d 1x
我的问题可以归结为

  • 我想为每个终末节点提取三年生存率,然后为每个患者指定他们属于哪一组-A组> 70%,B组; 70-50,C; 50-25和D小于25%。
  • 当引导之后,同样的需要发生,所以我可以看到每个迭代的特定患者被分配到哪个组,以及这种情况发生的频率。

下面是一些虚拟代码和我到目前为止所做的

library(partykit)
data("GBSG2", package = "TH.data")

#Dataframe
df <- GBSG2

#Ctree object
stree <- ctree(Surv(time,cens)~., data=df, control= ctree_control(minsplit = 50, alpha = 0.1, multiway = T))

#The following part I hope could be done more efficiently
n <- predict(stree, type="node")
nd <- factor(predict(stree, type="node"))
df$node <- n
fit1 <- survfit(Surv(time,cens)~nd, data=df)
summary(fit1, times=365*3)

#Manual input to each node by reading the transcript
df$grp <- ifelse(df$node==3, "A",NA)
df$grp <- ifelse(df$node==4, "A", df$grp)
df$grp <- ifelse(df$node==7, "C", df$grp)
df$grp <- ifelse(df$node==8, "D", df$grp)
df$grp <- ifelse(df$node==9, "B", df$grp)

我相信在我的 Bootstrap 可以完成之前,上述问题需要得到修复,以便获得与所附补充表相匹配的结果(我想做1000次,但我正在做10次,直到它起作用)。

#Bagging
df_bag <- df %>% 
  select(-"node", -"grp")
cf <- cforest(Surv(time,cens)~.,data=df_bag, ntree=10, mtry = Inf)

非常感谢
托比亚斯·贝格

c3frrgcw

c3frrgcw1#

我已经设法找到了我的问题的解决方案

library(partykit)
library(survival)
data("GBSG2", package = "TH.data")

#Data
df <- GBSG2

#Ctree object
stree <- ctree(Surv(time,cens)~., data=df, control= ctree_control(minsplit = 50, alpha = 0.1, multiway = T))

#Prediciton for Recursive partitioning analysis
        n <- predict(stree, type="node")
    node <- factor(predict(stree, type="node"))
    df$node <- n
    fit1 <- survfit(Surv(time,event)~node, data=df)
    res <- summary(fit1, times=365*3) 
    cols <- lapply(c(6, 10), function(x) res[x])
    tbl <- do.call(data.frame, cols)
    tbl$strata <- as.integer(gsub("[^0-9]", "", tbl$strata))
    tbl <- tbl %>% 
      rename(node=strata)
    df <- df %>% 
      left_join(., tbl, by="node") %>% 
      mutate(grp=ifelse(surv>0.699999, "A", NA)) %>% 
      mutate(grp=ifelse(surv<0.70 & surv>0.49999, "B", grp)) %>% 
      mutate(grp=ifelse(surv<0.50 & surv>0.24999, "C", grp)) %>% 
      mutate(grp=ifelse(surv<0.25, "D", grp))
    
#Bootstrapping with 10 iterations
#Function which essentially does the above prediction and returns for each row the corresponding group
classify_abcd = function (df_bag_in, pred_vector) {
  n <- pred_vector
  node <- factor(pred_vector)
  df_bag_in$node <- n
  fit1 <- survfit(Surv(time,event)~node, data=df_bag_in)
  res <- summary(fit1, times=365*3,extend = TRUE) 
  cols <- lapply(c(6, 10), function(x) res[x])
  tbl <- do.call(data.frame, cols)
  tbl$strata <- as.integer(gsub("[^0-9]", "", tbl$strata))
  tbl <- tbl %>% 
    rename(node=strata)
  df_bag_in <- df_bag_in %>% 
    left_join(., tbl, by="node") %>% 
    mutate(grp=ifelse(surv>0.699999, "A", NA)) %>% 
    mutate(grp=ifelse(surv<0.70 & surv>0.49999, "B", grp)) %>% 
    mutate(grp=ifelse(surv<0.50 & surv>0.24999, "C", grp)) %>% 
    mutate(grp=ifelse(surv<0.25, "D", grp))
  
  return(df_bag_in[c('grp')])
}
#Bootstrapping 10 iterations. End result is the data frame with each group assignment per iteration
cf <- cforest(Surv(time,event)~.,data=df, ntree=10, mtry = Inf, trace=T)
all_list_runs <- predict(cf, type="node")
map_id_to_classes = data.frame()
for(pred_vector in all_list_runs) {
  per_id_class = classify_abcd(df_rpa, pred_vector) 
  print(per_id_class)
  
  if (length(map_id_to_classes) == 0) {
    map_id_to_classes = per_id_class
  } else {
    map_id_to_classes = cbind(map_id_to_classes, per_id_class$grp)
  }
  
}

相关问题