R语言 使用输入向量创建多个新列

yuvru6vn  于 2023-11-14  发布在  其他
关注(0)|答案(1)|浏览(105)

我有大量的数据集,我需要计算滞后的数字。结果数据集需要为每一天,列的数量'回头看'的要求。在我的例子中,我看了5天切片为10个以前的时期。我试图找出如何推广这一点,以涵盖任何数量的时间片长度和数量的要求回头看(例如,前5年为365天)。
x1c 0d1x的数据
我已经包括了一个60天的单站和3个代码示例。第一个示例显示了通过硬编码来实现。如果我有一百个回顾期,这会很乏味。
第二个例子展示了我如何开始使用参数,这是值得的。
第三(这不起作用)显示了我希望如何编写某种语句,该语句在要创建的度量向量之间发生变化,使用第二个向量作为滞后量。也许使用{col}而不是在向量中预先指定名称更好?我只是不知道如何引用一个“固定”变量(第一个汇总,但创建一个动态数量的新变量连续较长的时期。
谢谢!

  1. pacman::p_load(tidyverse, RcppRoll)
  2. # The input dataset
  3. Daily <- structure(list(stnID = c(165638621686, 165638621686, 165638621686,
  4. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  5. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  6. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  7. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  8. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  9. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  10. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  11. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  12. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  13. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  14. 165638621686, 165638621686, 165638621686, 165638621686, 165638621686,
  15. 165638621686, 165638621686), date = structure(c(10957, 10958,
  16. 10959, 10960, 10961, 10962, 10963, 10964, 10965, 10966, 10967,
  17. 10968, 10969, 10970, 10971, 10972, 10973, 10974, 10975, 10976,
  18. 10977, 10978, 10979, 10980, 10981, 10982, 10983, 10984, 10985,
  19. 10986, 10987, 10988, 10989, 10990, 10991, 10992, 10993, 10994,
  20. 10995, 10996, 10997, 10998, 10999, 11000, 11001, 11002, 11003,
  21. 11004, 11005, 11006, 11007, 11008, 11009, 11010, 11011, 11012,
  22. 11013, 11014, 11015, 11016), class = "Date"), measure = c(15.3333333333333,
  23. 15.9791666666667, 16.65, 12.675, 9.32916666666667, 9.71041666666667,
  24. 11.8916666666667, 11.9958333333333, 11.025, 10.94375, 11.3791666666667,
  25. 9.04166666666667, 10.5604166666667, 10.8583333333333, 11.4083333333333,
  26. 10.1979166666667, 10.19375, 13.1645833333333, 13.7604166666667,
  27. 13.21875, 11.16875, 10.43125, 11.0604166666667, 13.4041666666667,
  28. 14.0979166666667, 10.8521739130435, 8.54375, 5.44375, 8.06666666666667,
  29. 9.77291666666667, 10.2676470588235, 11.5979166666667, 12.1375,
  30. 11.7958333333333, 12.3916666666667, 12.7875, 13.4604166666667,
  31. 10.7541666666667, 10.1979166666667, 10.9145833333333, 11.76875,
  32. 13.6291666666667, 12.5, 10.9416666666667, 12.16875, 12.2229166666667,
  33. 12.0541666666667, 11.69375, 11.05, 12.3229166666667, 12.1208333333333,
  34. 11.5020833333333, 13.1770833333333, 11.3833333333333, 9.88, 10.9520833333333,
  35. 11.275, 11.4208333333333, 11.3270833333333, 11.0104166666667)), class = c("tbl_df",
  36. "tbl", "data.frame"), row.names = c(NA, -60L))
  37. # The hand-code approach
  38. rolled1 <- Daily %>% group_by(stnID) %>%
  39. mutate(measure_01 := roll_meanr(measure, 5, na.rm=TRUE)) %>%
  40. mutate(measure_02 := lag(measure_01, 10)) %>%
  41. mutate(measure_03 := lag(measure_01, 15)) %>%
  42. mutate(measure_04 := lag(measure_01, 20)) %>%
  43. mutate(measure_05 := lag(measure_01, 25)) %>%
  44. mutate(measure_06 := lag(measure_01, 30)) %>%
  45. mutate(measure_07 := lag(measure_01, 35)) %>%
  46. mutate(measure_08 := lag(measure_01, 40)) %>%
  47. mutate(measure_09 := lag(measure_01, 45)) %>%
  48. mutate(measure_10 := lag(measure_01, 50))
  49. # The parameter approach
  50. periodN <- 10 # number of lagged periods we want to create
  51. periodSize <- 5 # of days back to look
  52. varList <- paste0("measure_",sprintf("%02d", seq_along(1:periodN)))
  53. rolled2 <- Daily %>% group_by(stnID) %>%
  54. mutate(!!varList[1] := roll_meanr(measure, periodSize, na.rm=TRUE)) %>%
  55. mutate(!!varList[2] := lag(measure_01, periodSize * 2)) %>%
  56. mutate(!!varList[3] := lag(measure_01, periodSize * 3)) %>%
  57. mutate(!!varList[4] := lag(measure_01, periodSize * 4)) %>%
  58. mutate(!!varList[5] := lag(measure_01, periodSize * 5)) %>%
  59. mutate(!!varList[6] := lag(measure_01, periodSize * 6)) %>%
  60. mutate(!!varList[7] := lag(measure_01, periodSize * 7)) %>%
  61. mutate(!!varList[8] := lag(measure_01, periodSize * 8)) %>%
  62. mutate(!!varList[9] := lag(measure_01, periodSize * 9)) %>%
  63. mutate(!!varList[10]:= lag(measure_01, periodSize * 10))
  64. # the vector approach (I have no idea what I am doing here)
  65. varList <- paste0("measure_",sprintf("%02d", seq(2, periodN, 1)))
  66. numList <- periodSize * seq(2, periodN, 1)
  67. # I want to loop over the vectors which start from measure_02 to measure_10, and 10 to 50
  68. rolled3 <- Daily %>% group_by_stnID) %>%
  69. mutate(!!varList[1] := roll_meanr(measure, periodSize, na.rm=TRUE)) %>%
  70. mutate(across(varList) ~ lag(measure01, numList))

字符串
我用前两种方法都很成功,我查过例子,但似乎找不到一个能做到这一点的。
我是否应该先“创建”结果列,在每个列中放入滞后量,然后进行突变,使每个列成为滞后[目标日期,.x]?
我头好痛。

41zrol4v

41zrol4v1#

从这个答案中提取的。这是你想要的吗?

  1. library(data.table)
  2. library(rlang)
  3. library(tidyverse)
  4. n_lags <- 50
  5. step <- 5
  6. lags <- function(var, n = 50, step = 5) {
  7. var <- enquo(var)
  8. indices <- seq(5, n, by = 5)
  9. # create a list of quosures by looping over `indices`
  10. # then give them names for `mutate` to use later
  11. map(indices, ~ quo(lag(!!var, !!.x))) %>%
  12. set_names(sprintf("measure_%02d", indices))
  13. }
  14. # unquote the list of quosures so that they are evaluated by `mutate`
  15. Daily %>%
  16. group_by(stnID) %>%
  17. mutate(measure_01 = frollmean(measure, n = 5, na.rm=TRUE)) %>%
  18. mutate_at(vars(measure_01), funs(!!!lags(measure_01, n_lags, step)))
  19. #>
  20. #> # A tibble: 60 × 14
  21. #> stnID date measure measure_01 measure_05 measure_10 measure_15
  22. #> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
  23. #> 1 165638621686 2000-01-01 15.3 NA NA NA NA
  24. #> 2 165638621686 2000-01-02 16.0 NA NA NA NA
  25. #> 3 165638621686 2000-01-03 16.6 NA NA NA NA
  26. #> 4 165638621686 2000-01-04 12.7 NA NA NA NA
  27. #> 5 165638621686 2000-01-05 9.33 14.0 NA NA NA
  28. #> 6 165638621686 2000-01-06 9.71 12.9 NA NA NA
  29. #> 7 165638621686 2000-01-07 11.9 12.1 NA NA NA
  30. #> 8 165638621686 2000-01-08 12.0 11.1 NA NA NA
  31. #> 9 165638621686 2000-01-09 11.0 10.8 NA NA NA
  32. #> 10 165638621686 2000-01-10 10.9 11.1 14.0 NA NA
  33. #> # ℹ 50 more rows
  34. #> # ℹ 7 more variables: measure_20 <dbl>, measure_25 <dbl>, measure_30 <dbl>,
  35. #> # measure_35 <dbl>, measure_40 <dbl>, measure_45 <dbl>, measure_50 <dbl>

字符串
创建于2023-10-28使用reprex v2.0.2

展开查看全部

相关问题