R:使用多个条件提取唯一数据

r7knjye2  于 2022-12-25  发布在  其他
关注(0)|答案(1)|浏览(130)

如何使用最长时间≤ 4年的提取唯一ID及其在相应最长时间(4年或更短)的状态和相应癌症变量创建新数据集?
我有这样的数据:Data example
我想创建这样的数据集,如data1:Data what I want to extract

data <- structure(list(State = structure(c(1L, 1L, 1L, 1L,1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor"),
                       Time = structure(1:18, .Label = c("0", "1", "2", "3", "4", "5", "0", "1", "2", "3", "0", "1", "2", "3", "4", "5", "6", "7"), class = "factor"),
                       Status = c(0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L ),
                       cancer = structure(1:18, .Label = c("1", "1", "1", "1", "1", "1",  "2", "2", "2", "2", "1", "1", "1", "1", "1", "1", "1", "1"), class = "factor")),
                  .Names = c("ID", "timeYears", "status", "cancer"),
                  class = "data.frame", row.names = c(NA, -18L))
data1 <- structure(list(State = structure(c(1L, 2L, 3L), .Label = c("1", "2", "3"), class = "factor"),
                        Time = structure(1:3, .Label = c("4", "3", "4"), class = "factor"),
                        Status = c( 1L, 0L, 0L),
                        cancer = structure(1:3, .Label = c("1", "2", "1"), class = "factor")),
                   .Names = c("ID", "timeYears", "status", "cancer"),
                   class = "data.frame", row.names = c(NA, -3L))
y53ybaqx

y53ybaqx1#

library(dplyr)
data %>%
  dplyr::filter(as.integer(as.character(timeYears)) <= 4) %>%
  group_by(ID) %>%
  slice_max(timeYears) %>%
  ungroup()
# # A tibble: 3 × 4
#   ID    timeYears status cancer
#   <fct> <fct>      <int> <fct> 
# 1 1     4              1 1     
# 2 2     3              0 2     
# 3 3     4              0 1

碱基R

data[ave(as.integer(as.character(data$timeYears)), data$ID,
         FUN = function(z) z == max(z[z <= 4])) > 0,]
#    ID timeYears status cancer
# 5   1         4      1      1
# 10  2         3      0      2
# 15  3         4      0      1

相关问题