R Dür RowWise Subtract Group

daupos2t  于 2023-11-14  发布在  其他
关注(0)|答案(5)|浏览(111)
quux <- structure(list(STUDENT = c(1L, 1L, 2L, 2L, 3L, 3L, 4L), CLASS = c("A", "B", "A", "B", "A", "B", "A"), ENG = c(9L, 2L, 7L, 5L, 8L, 0L, 1L), MATH = c(2L, 10L, 0L, 3L, 4L, 1L, 3L), HIST = c(8L, 1L, 0L, 1L, 6L, 6L, 2L)), class = "data.frame", row.names = c(NA, -7L))

STUDENT CLASS   ENG MATH    HIST
1   A   9   2   8
1   B   2   10  1
2   A   7   0   0
2   B   5   3   1
3   A   8   4   6
3   B   0   1   6
4   A   1   3   2

字符串
我有这个数据上面,并希望使这个数据下面

STUDENT ENG.DIF MATH.DIF    HIST.DIF
1   -7  8   -9
2   -2  3   1
3   -8  -3  0
4   NA  NA  NA


对于每个记录,对ENG、MATH、HIST执行ENG[CLASS = 'B']- ENG[CLASS = 'A']。如果只存在一个记录,则NA值
我试着:
DAT1 %>% group_by(STUDENT) %>% mutate(ENG.DIF = ENG[CLASS =='B'] - ENG[CLASS == 'A']没有成功

v1uwarro

v1uwarro1#

使用dapr,您可以使用summarize来提供帮助。

dd %>% 
  summarize(across(ENG:HIST, ~if(n()==2) .x[CLASS=="B"]-.x[CLASS=="A"] else NA, .names="{col}.DIF"), .by=STUDENT)
#   STUDENT ENG.DIF MATH.DIF HIST.DIF
# 1       1      -7        8       -7
# 2       2      -2        3        1
# 3       3      -8       -3        0
# 4       4      NA       NA       NA

字符串

9avjhtql

9avjhtql2#

我们可以在计算中添加一个简单的if

quux %>%
  group_by(STUDENT) %>%
  mutate(
    across(c(ENG, MATH, HIST),
           ~ if (n() > 1) .[CLASS == "B"] - .[CLASS == "A"] else NA, 
           .names = "{.col}.dif")
  ) %>%
  ungroup()
# # A tibble: 7 × 8
#   STUDENT CLASS   ENG  MATH  HIST ENG.dif MATH.dif HIST.dif
#     <int> <chr> <int> <int> <int>   <int>    <int>    <int>
# 1       1 A         9     2     8      -7        8       -7
# 2       1 B         2    10     1      -7        8       -7
# 3       2 A         7     0     0      -2        3        1
# 4       2 B         5     3     1      -2        3        1
# 5       3 A         8     4     6      -8       -3        0
# 6       3 B         0     1     6      -8       -3        0
# 7       4 A         1     3     2      NA       NA       NA

字符串
由于我们总是计算一个差值,我们可以利用c()[1]返回NA的事实:

quux %>%
  group_by(STUDENT) %>%
  mutate(
    across(c(ENG, MATH, HIST),
           ~ (.[CLASS == "B"] - .[CLASS == "A"])[1],
           .names = "{.col}.dif")
  ) %>%
  ungroup()


数据

quux <- structure(list(STUDENT = c(1L, 1L, 2L, 2L, 3L, 3L, 4L), CLASS = c("A", "B", "A", "B", "A", "B", "A"), ENG = c(9L, 2L, 7L, 5L, 8L, 0L, 1L), MATH = c(2L, 10L, 0L, 3L, 4L, 1L, 3L), HIST = c(8L, 1L, 0L, 1L, 6L, 6L, 2L)), class = "data.frame", row.names = c(NA, -7L))

hrirmatl

hrirmatl3#

首先,我们可以使用complete创建填充NA的缺失行,然后使用summarise进行简单的减法。

library(dplyr)
library(tidyr)

quux |> 
    complete(STUDENT, CLASS) |> 
    summarise(across(ENG:HIST,
                     \(x) x[CLASS == 'A'] - x[CLASS == 'B'],
                     .names = "{col}_DIFF"),
              .by = STUDENT)

# A tibble: 4 × 4
  STUDENT ENG_DIFF MATH_DIFF HIST_DIFF
    <int>    <int>     <int>     <int>
1       1        7        -8         7
2       2        2        -3        -1
3       3        8         3         0
4       4       NA        NA        NA

字符串

gpfsuwkq

gpfsuwkq4#

quux |>
  arrange(CLASS) |> # if not already always in A,B order
  mutate(across(ENG:HIST, ~.x - lag(.x)), .by = STUDENT) |>
  drop_na() |>
  select(-CLASS) |>
  complete(STUDENT = unique(quux$STUDENT))

字符串
结果

# A tibble: 4 × 4
  STUDENT   ENG  MATH  HIST
    <int> <int> <int> <int>
1       1    -7     8    -7
2       2    -2     3     1
3       3    -8    -3     0
4       4    NA    NA    NA

s8vozzvw

s8vozzvw5#

你可以像下面这样尝试pivot_*

df %>%
    pivot_longer(-c(1:2)) %>%
    arrange(CLASS) %>%
    reframe(DIF = diff(value)[1], .by = c(STUDENT, name)) %>%
    pivot_wider(values_from = DIF, names_glue = "{name}.DIF")

字符串
这给

# A tibble: 4 × 4
  STUDENT ENG.DIF MATH.DIF HIST.DIF
    <int>   <int>    <int>    <int>
1       1      -7        8       -7
2       2      -2        3        1
3       3      -8       -3        0
4       4      NA       NA       NA

相关问题