如何识别列中的第一个和最后一个非零值并返回两者在R中的位置

fumotvh3  于 2022-12-20  发布在  其他
关注(0)|答案(2)|浏览(109)

如何检查列中的第一个和最后一个非零值并返回其在数据框中的位置。
附件是我正在使用的数据框。

  1. structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
  2. "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
  3. "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
  4. "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
  5. "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
  6. "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
  7. "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
  8. "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
  9. "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
  10. "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
  11. 0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
  12. 957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
  13. 1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
  14. 33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
  15. "tbl", "data.frame"))

如果你能根据检查结果找到酒店的位置,你会很有帮助的。谢谢。

kb5ga3dv

kb5ga3dv1#

以R为底数的话可能是这样的:

  1. # first non-zero
  2. # which.min() for booleans is the 1st FALSE
  3. first_nz <- which.min(df$`XYZ|551` == 0)
  4. df[first_nz,]
  5. #> Row Labels XYZ|551
  6. #> 10 2019-10-01 206
  7. # last non-zero
  8. # first reverse dataframe rows
  9. df_rev <- df[nrow(df):1,]
  10. # then same as before, but extract just the matching `Row Labels`
  11. (last_nz_rowlbl <- df_rev$`Row Labels`[which.min(df_rev$`XYZ|551` == 0)])
  12. #> [1] "2022-04-01"
  13. last_nz <- which(df$`Row Labels` == last_nz_rowlbl)
  14. df[last_nz,]
  15. #> Row Labels XYZ|551
  16. #> 40 2022-04-01 33
  17. # location (row index) of first non-zero:
  18. first_nz
  19. #> [1] 10
  20. # location (row index) of last non-zero:
  21. last_nz
  22. #> [1] 40

输入:

  1. df <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
  2. "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
  3. "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
  4. "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
  5. "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
  6. "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
  7. "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
  8. "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
  9. "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
  10. "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
  11. 0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
  12. 957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
  13. 1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
  14. 33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
  15. "tbl", "data.frame"))

创建于2022年12月19日,使用reprex v2.0.2

展开查看全部
gt0wga4j

gt0wga4j2#

您可以执行以下操作:

  1. library(tidyverse)
  2. some_data <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
  3. "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
  4. "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
  5. "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
  6. "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
  7. "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
  8. "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
  9. "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
  10. "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
  11. "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
  12. 0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
  13. 957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
  14. 1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
  15. 33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
  16. "tbl", "data.frame"))
  17. first_and_last_non_zero_of_column <- function(df, var,
  18. rowlabel_var = "Row Labels") {
  19. if (identical(var, rowlabel_var)) {
  20. return(NULL)
  21. }
  22. nonzero <- select(
  23. df,
  24. {{ rowlabel_var }},
  25. {{ var }}
  26. ) |> filter(!!sym(var) > 0)
  27. enframe(list(
  28. first = slice_head(nonzero, n = 1),
  29. last = slice_tail(nonzero, n = 1)
  30. ), name = "order") |>
  31. unnest_wider(col = value) |>
  32. pivot_longer(
  33. cols = var,
  34. names_to = "variable_name"
  35. )
  36. }
  37. first_and_last_non_zero_of_column(some_data, "XYZ|551")
  38. # if you have more than one variable to check
  39. map_dfr(
  40. names(some_data),
  41. ~ first_and_last_non_zero_of_column(
  42. df = some_data,
  43. var = .x
  44. )
  45. )
展开查看全部

相关问题