如何计算r数据框中多列之间的日期差?

lsmd5eda  于 2022-12-30  发布在  其他
关注(0)|答案(1)|浏览(207)

我正在尝试计算数据框架中多个日期之间的差异。我提供了以下示例数据。
我知道我可以用difftime(Date2, Date1, unit = "days")来找出日期之间的差异。另外,我试过下面的代码,它也能工作。我只是想在数据框中以更整洁的方式来做这件事。

  1. library(glue)
  2. library(tidyverse)
  3. col_of_interest <- c("InstantDate")
  4. col_orders <- paste0(col_of_interest, "_", rep(c(1:11), each = 1))
  5. data_date <- data %>%
  6. select(any_of(col_orders))
  7. datadiff <- data_date[2:11] - data_date[1:10]

我失败的尝试就在这里。

  1. data_date <- data %>%
  2. select(any_of(col_orders)) %>%
  3. mutate(for(i in (seq(vars) - 1)) "Day_diff_{i}" := difftime(vars[i+1], vars[i], units = "days"))

下面是示例代码。

  1. data <- structure(list(Total_1 = c("NULL", "NULL", "NULL", "NULL", "NULL",
  2. "NULL"), Total_2 = c("17", "5", "3", "13", "NULL", "0"), Total_3 = c("15",
  3. "NULL", NA, "2", "6", NA), Total_4 = c("9", NA, NA, "8", NA,
  4. NA), Total_5 = c("15", NA, NA, "14", NA, NA), Total_6 = c("NULL",
  5. NA, NA, NA, NA, NA), Total_7 = c(NA_character_, NA_character_,
  6. NA_character_, NA_character_, NA_character_, NA_character_),
  7. Total_8 = c(NA_character_, NA_character_, NA_character_,
  8. NA_character_, NA_character_, NA_character_), Total_9 = c(NA_character_,
  9. NA_character_, NA_character_, NA_character_, NA_character_,
  10. NA_character_), Total_10 = c(NA_character_, NA_character_,
  11. NA_character_, NA_character_, NA_character_, NA_character_
  12. ), Total_11 = c(NA_character_, NA_character_, NA_character_,
  13. NA_character_, NA_character_, NA_character_), InstantDate_1 = structure(c(18327,
  14. 18330, 18332, 18332, 18332, 18333), class = "Date"), InstantDate_2 = structure(c(18673,
  15. 18858, 18794, 18527, 18516, 18533), class = "Date"), InstantDate_3 = structure(c(18703,
  16. 19044, NA, 18673, 18726, NA), class = "Date"), InstantDate_4 = structure(c(18786,
  17. NA, NA, 18905, NA, NA), class = "Date"), InstantDate_5 = structure(c(18855,
  18. NA, NA, 19006, NA, NA), class = "Date"), InstantDate_6 = structure(c(19229,
  19. NA, NA, NA, NA, NA), class = "Date"), InstantDate_7 = structure(c(NA_real_,
  20. NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
  21. InstantDate_8 = structure(c(NA_real_, NA_real_, NA_real_,
  22. NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_9 = structure(c(NA_real_,
  23. NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
  24. InstantDate_10 = structure(c(NA_real_, NA_real_, NA_real_,
  25. NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_11 = structure(c(NA_real_,
  26. NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
  27. VisitType_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", "NULL"
  28. ), VisitType_2 = c("FOLLOW UP", "FOLLOW UP", "VIRTUAL VISIT",
  29. "OFFICE VISIT", "NULL", "VIRTUAL VISIT"), VisitType_3 = c("FOLLOW UP",
  30. "FOLLOW UP", NA, "VIRTUAL VISIT", "VIRTUAL VISIT", NA), VisitType_4 = c("FOLLOW UP",
  31. NA, NA, "VIRTUAL VISIT", NA, NA), VisitType_5 = c("FOLLOW UP",
  32. NA, NA, "FOLLOW UP", NA, NA), VisitType_6 = c("FOLLOW UP",
  33. NA, NA, NA, NA, NA), VisitType_7 = c(NA_character_, NA_character_,
  34. NA_character_, NA_character_, NA_character_, NA_character_
  35. ), VisitType_8 = c(NA_character_, NA_character_, NA_character_,
  36. NA_character_, NA_character_, NA_character_), VisitType_9 = c(NA_character_,
  37. NA_character_, NA_character_, NA_character_, NA_character_,
  38. NA_character_), VisitType_10 = c(NA_character_, NA_character_,
  39. NA_character_, NA_character_, NA_character_, NA_character_
  40. ), VisitType_11 = c(NA_character_, NA_character_, NA_character_,
  41. NA_character_, NA_character_, NA_character_)), row.names = c(NA,
  42. -6L), class = c("tbl_df", "tbl", "data.frame"))
trnvg8h3

trnvg8h31#

我们可以使用dplyover中的across2

  1. library(dplyover)
  2. out <- data %>%
  3. select(any_of(col_orders)) %>%
  4. mutate(across2(2:11, 1:10, ~ difftime(.x, .y, units = "days"),
  5. .names = "Day_diff_{idx}"))
  • 输出
  1. > out %>% select(starts_with("Day_diff"))
  2. # A tibble: 6 × 10
  3. Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  4. <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
  5. 1 346 days 30 days 83 days 69 days 374 days NA days NA days NA days NA days NA days
  6. 2 528 days 186 days NA days NA days NA days NA days NA days NA days NA days NA days
  7. 3 462 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
  8. 4 195 days 146 days 232 days 101 days NA days NA days NA days NA days NA days NA days
  9. 5 184 days 210 days NA days NA days NA days NA days NA days NA days NA days NA days
  10. 6 200 days NA days NA days NA days NA days NA days NA days NA days NA days NA days

如果我们想使用for循环

  1. out1 <- data %>%
  2. select(any_of(col_orders))
  3. for(i in 2:11) out1 <- out1 %>%
  4. mutate( "Day_diff_{i-1}" := difftime(pick(i)[[1]], pick(i-1)[[1]],
  5. units = "days"))
  • 输出
  1. > out1 %>% select(starts_with("Day_diff"))
  2. # A tibble: 6 × 10
  3. Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  4. <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
  5. 1 346 days 30 days 83 days 69 days 374 days NA days NA days NA days NA days NA days
  6. 2 528 days 186 days NA days NA days NA days NA days NA days NA days NA days NA days
  7. 3 462 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
  8. 4 195 days 146 days 232 days 101 days NA days NA days NA days NA days NA days NA days
  9. 5 184 days 210 days NA days NA days NA days NA days NA days NA days NA days NA days
  10. 6 200 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
展开查看全部

相关问题