在R中替换列中的数字

jdg4fx2g  于 2024-01-03  发布在  其他
关注(0)|答案(4)|浏览(144)

我有一个带有df$col的dfs,我想用一个更高的数字本身替换所有的1,比如2111112变成222222,311113变成33333等等,并保持整体模式,以及0等。所以只需要在确切的位置将1替换为更高的数字。举个例子:

data <- data.frame(col = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 0, 0, 0, 0, 3, 1, 1, 3, 0, 0, 0, 0, 4, 1, 1, 1, 1, 4, 0, 0, 0, 5, 1, 1, 1, 1, 1, 5, 0, 0, 0)))

desired.outut.data <- data.frame(col = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0)))

字符串
在不同的dfs中,较高数字之间的1的数量可能会改变。提前感谢!

nkoocmlb

nkoocmlb1#

我相信有更优雅的方式:

library(dplyr)
data |>
  mutate(grp = cumsum(col != lag(col, 1, 0))) |>
  mutate(new_val = if_else(col == 0, 0, 
                           slider::slide_index_dbl(
                             col, grp, max, .before = 1)))

字符串
结果

col grp new_val
1    0   0       0
2    0   0       0
3    0   0       0
4    0   0       0
5    1   1       1
6    1   1       1
7    1   1       1
8    1   1       1
9    0   2       0
10   0   2       0
11   0   2       0
12   0   2       0
13   2   3       2
14   1   4       2
15   1   4       2
16   1   4       2
17   1   4       2
18   2   5       2
19   0   6       0
20   0   6       0
21   0   6       0
22   0   6       0
23   3   7       3
24   1   8       3
25   1   8       3
26   3   9       3
27   0  10       0
28   0  10       0
29   0  10       0
30   0  10       0
31   4  11       4
32   1  12       4
33   1  12       4
34   1  12       4
35   1  12       4
36   4  13       4
37   0  14       0
38   0  14       0
39   0  14       0
40   5  15       5
41   1  16       5
42   1  16       5
43   1  16       5
44   1  16       5
45   1  16       5
46   5  17       5
47   0  18       0
48   0  18       0
49   0  18       0

q43xntqr

q43xntqr2#

下面是fill的一个例子:

library(dplyr)
library(tidyr)
data %>% 
  mutate(col = na_if(col, 1),
         col = ifelse(is.na(col) & row_number() < 10, 1, col)) %>% 
  fill(col, .direction = "down")%>% 
  pull()

个字符

9gm1akwq

9gm1akwq3#

我们可以通过为值x != 0创建的索引进行拆分,并使用avemax用于位置。

> transform(data, col=ave(x, replace(cumsum(c(0, diff(x != 0)) == 1), x == 0, 0), FUN=max))
   col
1    0
2    0
3    0
4    0
5    1
6    1
7    1
8    1
9    0
10   0
11   0
12   0
13   2
14   2
15   2
16   2
17   2
18   2
19   0
20   0
21   0
22   0
23   3
24   3
25   3
26   3
27   0
28   0
29   0
30   0
31   4
32   4
33   4
34   4
35   4
36   4
37   0
38   0
39   0
40   5
41   5
42   5
43   5
44   5
45   5
46   5
47   0
48   0
49   0

字符串

  • 数据类型:*
> dput(data)
structure(list(col = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 
1, 1, 1, 1, 2, 0, 0, 0, 0, 3, 1, 1, 3, 0, 0, 0, 0, 4, 1, 1, 1, 
1, 4, 0, 0, 0, 5, 1, 1, 1, 1, 1, 5, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-49L))

s5a0g9ez

s5a0g9ez4#

类似于jay.sf的方法,但翻译成整洁,也许更容易阅读

library(tidyverse)
data <- data.frame(col = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 0, 0, 0, 0, 3, 1, 1, 3, 0, 0, 0, 0, 4, 1, 1, 1, 1, 4, 0, 0, 0, 5, 1, 1, 1, 1, 1, 5, 0, 0, 0))

mod <- data %>% 
  mutate(
    # create variable for grouping, splitting in 0 and non-zero blocks
    non_zero = (col != 0) %>% replace_na(FALSE),
    group    = consecutive_id(non_zero)
  ) %>% 
  # per group, replace all values by their respective maximum
  mutate(
    .by    = group,
    result = max(col),
    .after = col
  )

library(waldo)
desired.output <- c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0)
waldo::compare(mod$result, desired.output)
#> ✔ No differences

字符串
创建于2023-12-11使用reprex v2.0.2

相关问题