如何计算r数据框中多列之间的日期差?

lsmd5eda  于 2022-12-30  发布在  其他
关注(0)|答案(1)|浏览(172)

我正在尝试计算数据框架中多个日期之间的差异。我提供了以下示例数据。
我知道我可以用difftime(Date2, Date1, unit = "days")来找出日期之间的差异。另外,我试过下面的代码,它也能工作。我只是想在数据框中以更整洁的方式来做这件事。

library(glue)
library(tidyverse)

col_of_interest <- c("InstantDate")
col_orders <- paste0(col_of_interest, "_", rep(c(1:11), each = 1))

data_date <- data %>% 
  select(any_of(col_orders))

datadiff <- data_date[2:11] - data_date[1:10]

我失败的尝试就在这里。

data_date <- data %>% 
  select(any_of(col_orders)) %>%
  mutate(for(i in (seq(vars) - 1)) "Day_diff_{i}" := difftime(vars[i+1], vars[i], units = "days"))

下面是示例代码。

data <- structure(list(Total_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL"), Total_2 = c("17", "5", "3", "13", "NULL", "0"), Total_3 = c("15", 
"NULL", NA, "2", "6", NA), Total_4 = c("9", NA, NA, "8", NA, 
NA), Total_5 = c("15", NA, NA, "14", NA, NA), Total_6 = c("NULL", 
NA, NA, NA, NA, NA), Total_7 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
Total_8 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), Total_9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_), Total_10 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), Total_11 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), InstantDate_1 = structure(c(18327, 
18330, 18332, 18332, 18332, 18333), class = "Date"), InstantDate_2 = structure(c(18673, 
18858, 18794, 18527, 18516, 18533), class = "Date"), InstantDate_3 = structure(c(18703, 
19044, NA, 18673, 18726, NA), class = "Date"), InstantDate_4 = structure(c(18786, 
NA, NA, 18905, NA, NA), class = "Date"), InstantDate_5 = structure(c(18855, 
NA, NA, 19006, NA, NA), class = "Date"), InstantDate_6 = structure(c(19229, 
NA, NA, NA, NA, NA), class = "Date"), InstantDate_7 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
InstantDate_8 = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_9 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
InstantDate_10 = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_11 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
VisitType_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", "NULL"
), VisitType_2 = c("FOLLOW UP", "FOLLOW UP", "VIRTUAL VISIT", 
"OFFICE VISIT", "NULL", "VIRTUAL VISIT"), VisitType_3 = c("FOLLOW UP", 
"FOLLOW UP", NA, "VIRTUAL VISIT", "VIRTUAL VISIT", NA), VisitType_4 = c("FOLLOW UP", 
NA, NA, "VIRTUAL VISIT", NA, NA), VisitType_5 = c("FOLLOW UP", 
NA, NA, "FOLLOW UP", NA, NA), VisitType_6 = c("FOLLOW UP", 
NA, NA, NA, NA, NA), VisitType_7 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_8 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), VisitType_9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_), VisitType_10 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_11 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))
trnvg8h3

trnvg8h31#

我们可以使用dplyover中的across2

library(dplyover)
out <- data %>% 
  select(any_of(col_orders)) %>%
   mutate(across2(2:11, 1:10, ~ difftime(.x, .y, units = "days"),
     .names = "Day_diff_{idx}"))
  • 输出
> out %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
  Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     
1 346 days    30 days    83 days    69 days   374 days   NA days    NA days    NA days    NA days    NA days    
2 528 days   186 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
3 462 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
4 195 days   146 days   232 days   101 days    NA days   NA days    NA days    NA days    NA days    NA days    
5 184 days   210 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
6 200 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days

如果我们想使用for循环

out1 <- data %>% 
   select(any_of(col_orders))
for(i in 2:11) out1 <- out1 %>%
    mutate( "Day_diff_{i-1}" := difftime(pick(i)[[1]], pick(i-1)[[1]], 
        units = "days"))
  • 输出
> out1 %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
  Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     
1 346 days    30 days    83 days    69 days   374 days   NA days    NA days    NA days    NA days    NA days    
2 528 days   186 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
3 462 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
4 195 days   146 days   232 days   101 days    NA days   NA days    NA days    NA days    NA days    NA days    
5 184 days   210 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
6 200 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days

相关问题