使用rowSums在R中创建变量

hrirmatl  于 2022-12-25  发布在  其他
关注(0)|答案(1)|浏览(149)

我想在预期变量中记录“是”的发生率。

library(dplyr)

set.seed(2022)
mydata <- tibble::tibble(
  "id" = 1:100,
  "a1" = sample(c(rep("Yes", 40), rep_len(NA, 100)), 100),
  "a2" = sample(c(rep("Yes", 50), rep_len(NA, 100)), 100),
  "a3" = sample(c(rep("Yes", 40), rep_len(NA, 100)), 100),
  "a4" = sample(c(rep("Yes", 50), rep_len(NA, 100)), 100),
  "b2" = rnorm(100, 50, 10)
)

#  Goal is to capture any occurrence of Yes in  (a* variables)

anymatch <- function(vars){
  rowSums(select(cur_data(), all_of(vars))=="Yes")
}

avars <- paste0("a", 1:4)

mydata %>% 
  mutate(afin = anymatch(avars)) %>% 
  select(avars, afin)
uqzxnwby

uqzxnwby1#

我们需要na.rm = TRUE

anymatch <- function(vars){
  rowSums(select(cur_data(), all_of(vars))=="Yes", na.rm = TRUE)
}

现在它给出了正确的计数

>  mydata %>%
   mutate(afin = anymatch(avars)) %>% 
   select(all_of(avars), afin) 
# A tibble: 100 × 5
   a1    a2    a3    a4     afin
   <chr> <chr> <chr> <chr> <dbl>
 1 <NA>  <NA>  <NA>  <NA>      0
 2 <NA>  Yes   <NA>  Yes       2
 3 Yes   <NA>  <NA>  <NA>      1
 4 <NA>  Yes   Yes   <NA>      2
 5 Yes   Yes   <NA>  <NA>      2
 6 Yes   Yes   Yes   Yes       4
 7 <NA>  Yes   <NA>  <NA>      1
 8 <NA>  <NA>  <NA>  <NA>      0
 9 Yes   Yes   <NA>  Yes       3
10 <NA>  Yes   <NA>  <NA>      1
# … with 90 more rows
# ℹ Use `print(n = ...)` to see more rows

在将来的版本中,我们可能会使用pick而不是cur_data()

anymatch <- function(vars){
   rowSums(pick(all_of(vars))=="Yes", na.rm = TRUE)
 }
mydata %>%
   mutate(afin = anymatch(avars)) %>% 
    select(all_of(avars), afin) 
# A tibble: 100 × 5
   a1    a2    a3    a4     afin
   <chr> <chr> <chr> <chr> <dbl>
 1 <NA>  <NA>  <NA>  <NA>      0
 2 <NA>  Yes   <NA>  Yes       2
 3 Yes   <NA>  <NA>  <NA>      1
 4 <NA>  Yes   Yes   <NA>      2
 5 Yes   Yes   <NA>  <NA>      2
 6 Yes   Yes   Yes   Yes       4
 7 <NA>  Yes   <NA>  <NA>      1
 8 <NA>  <NA>  <NA>  <NA>      0
 9 Yes   Yes   <NA>  Yes       3
10 <NA>  Yes   <NA>  <NA>      1
# … with 90 more rows

相关问题