有没有办法在dplyr中选择条件不同的组中的第一行?

krcsximq  于 2023-02-01  发布在  其他
关注(0)|答案(4)|浏览(362)

我想为变量a创建的每个组选择一行。它应该是变量c的最大值所在的行,但如果变量b为TRUE,则应选择该组中b = TRUE且c最大的行。
下面的代码:

set.seed(42)
a <- rep(1:3, each = 3)
b <- sample(c(0,1), size = 9, replace = T) 
c <- sample(1:9, size = 9, replace = F)

df <- data.frame(a = a, 
                 b = b,
                 c = c)

df %>% group_by(a) %>% filter(b == 1) %>% 
       arrange(desc(c), .by_group = T) %>% 
       summarise_all(function(x) x[1]) -> df1

df %>% group_by(a) %>% filter(all(b != 1)) %>% 
       arrange(desc(c), .by_group = T) %>% 
       summarise_all(function(x) x[1]) -> df2 

df3 <- rbind(df1, df2)

这是可行的,但我想知道是否有更简单的方法来实现同样的目标。

z18hc3ub

z18hc3ub1#

您可以按组筛选值,然后进行汇总。

df %>% 
  group_by(a) %>% 
  filter(all(b==0) | b==1) %>% 
  summarize(b = first(b), c = max(c))
#       a     b     c
#   <int> <dbl> <int>
# 1     1     0     8
# 2     2     1     5
# 3     3     1     9

因此,我们只保留b==1或所有b==0时每组的值

h79rfbju

h79rfbju2#

我们可以在summarise里面加上ifelse,而不需要filter B值。

set.seed(42)
a <- rep(1:3, each = 3)
b <- sample(c(0,1), size = 9, replace = T) 
cc <- sample(1:9, size = 9, replace = F)

df <- data.frame(a = a, 
                 b = b,
                 cc = cc)

df |> 
  group_by(a) |> 
  summarise(b = max(b),teste = ifelse(any(b == 1), max(cc[b == 1]), max(cc)) )

此外,不要在R中命名c

vfh0ocws

vfh0ocws3#

library(data.table)
setDT(df)
# select the maximum c value, grouped by a and b
# then negative order by b (so rows with b == 1 get on top), 
# and select the first row of each a-group
df[df[, .I[c == max(c)], by = .(a,b)]$V1][order(a,-b), .SD[1], by = a]
i7uaboj4

i7uaboj44#

library(dplyr)

df %>% group_by(a) %>% 
  arrange(desc(b),desc(c), .by_group = T) %>% 
  slice_head(n = 1) %>% 
  ungroup()
#> # A tibble: 3 × 3
#>       a     b     c
#>   <int> <dbl> <int>
#> 1     1     0     8
#> 2     2     1     5
#> 3     3     1     9

输入数据:

set.seed(42)
a <- rep(1:3, each = 3)
b <- sample(c(0,1), size = 9, replace = T) 
c <- sample(1:9, size = 9, replace = F)

df <- data.frame(a = a, 
                 b = b,
                 c = c)
df
#>   a b c
#> 1 1 0 8
#> 2 1 0 7
#> 3 1 0 4
#> 4 2 0 1
#> 5 2 1 5
#> 6 2 1 2
#> 7 3 1 9
#> 8 3 1 3
#> 9 3 0 6

创建于2023年1月30日,使用reprex v2.0.2

相关问题