R:滚动日期范围内的累计和

dsf9zpds  于 2023-07-31  发布在  其他
关注(0)|答案(4)|浏览(123)

在R中,如何计算在要计算的行之前的定义时间段的cumsum?如果可能的话,首选dplyr。
例如,如果周期为10天,则函数将实现cum_rolling10:

  1. date value cumsum cum_rolling10
  2. 1/01/2000 9 9 9
  3. 2/01/2000 1 10 10
  4. 5/01/2000 9 19 19
  5. 6/01/2000 3 22 22
  6. 7/01/2000 4 26 26
  7. 8/01/2000 3 29 29
  8. 13/01/2000 10 39 29
  9. 14/01/2000 9 48 38
  10. 18/01/2000 2 50 21
  11. 19/01/2000 9 59 30
  12. 21/01/2000 8 67 38
  13. 25/01/2000 5 72 24
  14. 26/01/2000 1 73 25
  15. 30/01/2000 6 79 20
  16. 31/01/2000 6 85 18

字符串

gcmastyq

gcmastyq1#

使用dplyrtidyrlubridatezoo的解决方案。

  1. library(dplyr)
  2. library(tidyr)
  3. library(lubridate)
  4. library(zoo)
  5. dt2 <- dt %>%
  6. mutate(date = dmy(date)) %>%
  7. mutate(cumsum = cumsum(value)) %>%
  8. complete(date = full_seq(date, period = 1), fill = list(value = 0)) %>%
  9. mutate(cum_rolling10 = rollapplyr(value, width = 10, FUN = sum, partial = TRUE)) %>%
  10. drop_na(cumsum)
  11. dt2
  12. # A tibble: 15 x 4
  13. date value cumsum cum_rolling10
  14. <date> <dbl> <int> <dbl>
  15. 1 2000-01-01 9 9 9
  16. 2 2000-01-02 1 10 10
  17. 3 2000-01-05 9 19 19
  18. 4 2000-01-06 3 22 22
  19. 5 2000-01-07 4 26 26
  20. 6 2000-01-08 3 29 29
  21. 7 2000-01-13 10 39 29
  22. 8 2000-01-14 9 48 38
  23. 9 2000-01-18 2 50 21
  24. 10 2000-01-19 9 59 30
  25. 11 2000-01-21 8 67 38
  26. 12 2000-01-25 5 72 24
  27. 13 2000-01-26 1 73 25
  28. 14 2000-01-30 6 79 20
  29. 15 2000-01-31 6 85 18

字符串

资料

  1. dt <- structure(list(date = c("1/01/2000", "2/01/2000", "5/01/2000",
  2. "6/01/2000", "7/01/2000", "8/01/2000", "13/01/2000", "14/01/2000",
  3. "18/01/2000", "19/01/2000", "21/01/2000", "25/01/2000", "26/01/2000",
  4. "30/01/2000", "31/01/2000"), value = c(9L, 1L, 9L, 3L, 4L, 3L,
  5. 10L, 9L, 2L, 9L, 8L, 5L, 1L, 6L, 6L)), .Names = c("date", "value"
  6. ), row.names = c(NA, -15L), class = "data.frame")

展开查看全部
uinbv5nw

uinbv5nw2#

我建议使用runner包,用于计算滚动/运行窗口上的函数。你可以通过使用sum_run来实现这一点-这里有一个liner:

  1. library(runner)
  2. library(dplyr)
  3. df %>%
  4. mutate(
  5. cum_rolling_10 = sum_run(
  6. x = df$value,
  7. k = 10,
  8. idx = as.Date(df$date, format = "%d/%m/%Y"))
  9. )
  10. df
  11. # date value cum_rolling_10
  12. # 1 1/01/2000 9 9
  13. # 2 2/01/2000 1 10
  14. # 3 5/01/2000 9 19
  15. # 4 6/01/2000 3 22
  16. # 5 7/01/2000 4 26
  17. # 6 8/01/2000 3 29
  18. # 7 13/01/2000 10 29
  19. # 8 14/01/2000 9 38
  20. # 9 18/01/2000 2 21
  21. # 10 19/01/2000 9 30
  22. # 11 21/01/2000 8 38
  23. # 12 25/01/2000 5 24
  24. # 13 26/01/2000 1 25
  25. # 14 30/01/2000 6 20
  26. # 15 31/01/2000 6 18

字符串
好好享受吧!

展开查看全部
kjthegm6

kjthegm63#

这个解决方案将避免内存开销,并且迁移到sparklyr将很容易。
滞后= 7

  1. dt %>%
  2. mutate(date = dmy(date)) %>%
  3. mutate(order = datediff(date,min(date)) %>%
  4. arrange(desc(order)) %>%
  5. mutate(n_order = lag(order + lag,1L,default = 0)) %>%
  6. mutate(b_order = ifelse(order - n_order >= 0,order,-1)) %>%
  7. mutate(m_order = cummax(b_order)) %>%
  8. group_by(m_order) %>%
  9. mutate(rolling_value = cumsum(value))

字符串

mwyxok5s

mwyxok5s4#

使用slider中的slide_index_sum(),它的API与purrr相同。

  1. library(slider)
  2. library(dplyr)
  3. df <- tibble(
  4. date = c(
  5. "1/01/2000", "2/01/2000", "5/01/2000", "6/01/2000", "7/01/2000",
  6. "8/01/2000", "13/01/2000", "14/01/2000", "18/01/2000", "19/01/2000",
  7. "21/01/2000", "25/01/2000", "26/01/2000", "30/01/2000", "31/01/2000"
  8. ),
  9. value = c(9L, 1L, 9L, 3L, 4L, 3L, 10L, 9L, 2L, 9L, 8L, 5L, 1L, 6L, 6L)
  10. )
  11. df <- mutate(df, date = as.Date(date, format = "%d/%m/%Y"))
  12. df %>%
  13. mutate(
  14. cumsum = cumsum(value),
  15. cum_rolling10 = slide_index_sum(value, date, before = 9L)
  16. )
  17. #> # A tibble: 15 × 4
  18. #> date value cumsum cum_rolling10
  19. #> <date> <int> <int> <dbl>
  20. #> 1 2000-01-01 9 9 9
  21. #> 2 2000-01-02 1 10 10
  22. #> 3 2000-01-05 9 19 19
  23. #> 4 2000-01-06 3 22 22
  24. #> 5 2000-01-07 4 26 26
  25. #> 6 2000-01-08 3 29 29
  26. #> 7 2000-01-13 10 39 29
  27. #> 8 2000-01-14 9 48 38
  28. #> 9 2000-01-18 2 50 21
  29. #> 10 2000-01-19 9 59 30
  30. #> 11 2000-01-21 8 67 38
  31. #> 12 2000-01-25 5 72 24
  32. #> 13 2000-01-26 1 73 25
  33. #> 14 2000-01-30 6 79 20
  34. #> 15 2000-01-31 6 85 18

字符串

展开查看全部

相关问题