通过字符串变量合并到dplyr中

4dc9hkyq  于 2023-06-19  发布在  其他
关注(0)|答案(2)|浏览(82)

我有一个像primary这样的数据框架和另一个像secondary这样的数据框架:

library(dplyr)

primary <- data.frame(
  id = c("A123", "B456", "C789", "D012"),
  prod = c("1111", "2222", "3333", "4444"),
  d= c("USA", "CAN", "MEX", "BRA")
)

secondary <- data.frame(
  id= c("A123", "B456", "C789", "D012"),
  prod= c("1111", "2222", "3333", "4444"),
  rec_d = c("USA", "CAN,MEX", NA, "BRA")
)

我想通过(id,prod)和检查d是否在rec_d中来执行left_join(primary,secondary)。
我尝试了以下代码,但没有成功:

result <- left_join(primary, secondary, by = c("id", "prod")) %>%
  mutate(flag = ifelse(d %in% rec_d| is.na(rec_d), 1, 0))

预期的结果是:

desired <- structure(list(id = c("A123", "B456", "C789", "D012"), prod = c("1111", 
"2222", "3333", "4444"), d = c("USA", "CAN", "MEX", "BRA"), rec_d = c("USA", 
"CAN,MEX", NA, "BRA"), flag = c(1, 1, 0, 1)), class = "data.frame", row.names = c(NA, 
-4L))

有线索吗

46scxncf

46scxncf1#

library(dplyr)
library(stringr)

left_join(primary, secondary, by = c("id", "prod")) %>%
  mutate(flag = ifelse(!is.na(rec_d) & str_detect(rec_d, d), 1, 0))

   id prod   d   rec_d flag
1 A123 1111 USA     USA    1
2 B456 2222 CAN CAN,MEX    1
3 C789 3333 MEX    <NA>    0
4 D012 4444 BRA     BRA    1

或:

left_join(primary, secondary, by = c("id", "prod")) %>%
  mutate(flag = if_else(str_detect(rec_d, d), 1, 0, missing = 0))
lsmd5eda

lsmd5eda2#

我们可以使用dplyr::case_when.default = 0。我觉得它使mutate语句比if_else更清晰,里面有复杂的逻辑语句。

library(dplyr)
library(stringr)

left_join(primary, secondary, by = c("id", "prod")) |> 
    mutate(flag = case_when(str_detect(rec_d, d) ~ 1,
                            .default = 0
                            )
           )

    id prod   d   rec_d flag
1 A123 1111 USA     USA    1
2 B456 2222 CAN CAN,MEX    1
3 C789 3333 MEX    <NA>    0
4 D012 4444 BRA     BRA    1

相关问题