R语言 查找纵向数据中出现3次以上的观测

hgqdbh6s  于 2023-05-04  发布在  其他
关注(0)|答案(2)|浏览(145)

我有以下数据:

structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L)), row.names = c(NA, 48L), class = "data.frame")

我想找到在一行中对A有〉3个观察结果的ID,并创建以下数据:

structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L), Censor = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 1L, 1L, 1L), Day_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L)), row.names = c(NA, 
48L), class = "data.frame")

其中,删失从ID连续具有针对A的〉3个观测的第一天开始,并且Day_2是第一个观测发生的日期。

7gcisfzg

7gcisfzg1#

rle方法 Package 在f函数中,该函数标识长度〉3的后续1 s,然后在ave中使用它。

f <- \(x) with(rle(x), rep.int(replace(numeric(length(values)),
                                       which(values == 1 & lengths > 3), 1), lengths))

res <- within(dat, {
  censor <- ave(A, ID, FUN=f)
  Day_2 <- ave(censor, ID, FUN=\(x) if (sum(x) != 0) which.max(x) else NA_integer_)
})
res
#    ID A Day Day_2 censor
# 1   1 0   1    NA      0
# 2   1 0   2    NA      0
# 3   1 0   3    NA      0
# 4   1 0   4    NA      0
# 5   1 0   5    NA      0
# 6   1 1   6    NA      0
# 7   1 1   7    NA      0
# 8   1 0   8    NA      0
# 9   1 0   9    NA      0
# 10  1 0  10    NA      0
# 11  1 0  11    NA      0
# 12  1 1  12    NA      0
# 13  1 1  13    NA      0
# 14  1 1  14    NA      0
# 15  1 0  15    NA      0
# 16  1 0  16    NA      0
# 17  1 0  17    NA      0
# 18  2 0   1     5      0
# 19  2 0   2     5      0
# 20  2 0   3     5      0
# 21  2 0   4     5      0
# 22  2 1   5     5      1
# 23  2 1   6     5      1
# 24  2 1   7     5      1
# 25  2 1   8     5      1
# 26  2 1   9     5      1
# 27  2 1  10     5      1
# 28  2 1  11     5      1
# 29  2 1  12     5      1
# 30  2 1  13     5      1
# 31  2 1  14     5      1
# 32  3 0   1    14      0
# 33  3 0   2    14      0
# 34  3 0   3    14      0
# 35  3 0   4    14      0
# 36  3 0   5    14      0
# 37  3 1   6    14      0
# 38  3 1   7    14      0
# 39  3 1   8    14      0
# 40  3 0   9    14      0
# 41  3 1  10    14      0
# 42  3 0  11    14      0
# 43  3 0  12    14      0
# 44  3 0  13    14      0
# 45  3 1  14    14      1
# 46  3 1  15    14      1
# 47  3 1  16    14      1
# 48  3 1  17    14      1
  • 数据:*
dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L)), row.names = c(NA, 48L), class = "data.frame")
wkftcu5l

wkftcu5l2#

另一种方法可以使用来自zoorollapply。在这里,按ID分组后,您可以在4个值的宽度/窗口上使用rollapply,如果A中的所有值都是1,则添加指示符1。下一个cumany将使Censor列中第一个1值之后的其余部分为1。最后,which将提供索引,其中Censor首先等于1。

library(tidyverse)
library(zoo)

df %>%
  group_by(ID) %>%
  mutate(Censor = rollapply(A == 1, width = 4, all, fill = 0, align = "left"),
         Censor = +cumany(Censor == 1),
         Day_2 = which(Censor == 1)[1])

输出

ID     A   Day Censor Day_2
   <int> <int> <int>  <int> <int>
 1     1     0     1      0    NA
 2     1     0     2      0    NA
 3     1     0     3      0    NA
 4     1     0     4      0    NA
 5     1     0     5      0    NA
 6     1     1     6      0    NA
 7     1     1     7      0    NA
 8     1     0     8      0    NA
 9     1     0     9      0    NA
10     1     0    10      0    NA
11     1     0    11      0    NA
12     1     1    12      0    NA
13     1     1    13      0    NA
14     1     1    14      0    NA
15     1     0    15      0    NA
16     1     0    16      0    NA
17     1     0    17      0    NA
18     2     0     1      0     5
19     2     0     2      0     5
20     2     0     3      0     5
21     2     0     4      0     5
22     2     1     5      1     5
23     2     1     6      1     5
24     2     1     7      1     5
25     2     1     8      1     5
26     2     1     9      1     5
27     2     1    10      1     5
28     2     1    11      1     5
29     2     1    12      1     5
30     2     1    13      1     5
31     2     1    14      1     5
32     3     0     1      0    14
33     3     0     2      0    14
34     3     0     3      0    14
35     3     0     4      0    14
36     3     0     5      0    14
37     3     1     6      0    14
38     3     1     7      0    14
39     3     1     8      0    14
40     3     0     9      0    14
41     3     1    10      0    14
42     3     0    11      0    14
43     3     0    12      0    14
44     3     0    13      0    14
45     3     1    14      1    14
46     3     1    15      1    14
47     3     1    16      1    14
48     3     1    17      1    14

相关问题