如何识别列中的第一个和最后一个非零值并返回两者在R中的位置

fumotvh3  于 2022-12-20  发布在  其他
关注(0)|答案(2)|浏览(99)

如何检查列中的第一个和最后一个非零值并返回其在数据框中的位置。
附件是我正在使用的数据框。

structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
"tbl", "data.frame"))

如果你能根据检查结果找到酒店的位置,你会很有帮助的。谢谢。

kb5ga3dv

kb5ga3dv1#

以R为底数的话可能是这样的:

# first non-zero
# which.min() for booleans is the 1st FALSE

first_nz <- which.min(df$`XYZ|551` == 0)
df[first_nz,]
#>    Row Labels XYZ|551
#> 10 2019-10-01     206

# last non-zero
# first reverse dataframe rows
df_rev <- df[nrow(df):1,]
# then same as before, but extract just the matching `Row Labels`
(last_nz_rowlbl <- df_rev$`Row Labels`[which.min(df_rev$`XYZ|551` == 0)])
#> [1] "2022-04-01"

last_nz <- which(df$`Row Labels` == last_nz_rowlbl)
df[last_nz,]
#>    Row Labels XYZ|551
#> 40 2022-04-01      33

# location (row index) of first non-zero:
first_nz
#> [1] 10
# location (row index) of last non-zero:
last_nz
#> [1] 40

输入:

df <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

创建于2022年12月19日,使用reprex v2.0.2

gt0wga4j

gt0wga4j2#

您可以执行以下操作:

library(tidyverse)

some_data <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

first_and_last_non_zero_of_column <- function(df, var,
                                              rowlabel_var = "Row Labels") {
  if (identical(var, rowlabel_var)) {
    return(NULL)
  }

  nonzero <- select(
    df,
    {{ rowlabel_var }},
    {{ var }}
  ) |> filter(!!sym(var) > 0)
  enframe(list(
    first = slice_head(nonzero, n = 1),
    last = slice_tail(nonzero, n = 1)
  ), name = "order") |>
    unnest_wider(col = value) |>
    pivot_longer(
      cols = var,
      names_to = "variable_name"
    )
}

first_and_last_non_zero_of_column(some_data, "XYZ|551")

# if you have more than one variable to check
map_dfr(
  names(some_data),
  ~ first_and_last_non_zero_of_column(
    df = some_data,
    var = .x
  )
)

相关问题