R语言 在tibble中将数据从右列移动到左列

cyej8jka  于 2023-01-10  发布在  其他
关注(0)|答案(6)|浏览(184)

我有一个关于诊断的信息:

data <- tibble(
  id = c(1:10),
  diagnosis_1 = c("F32", "F431", "R58", "S32", "F11", NA, NA, "Y67", "F32", "Z032"),
  diagnosis_2 = c(NA, NA, NA, NA, NA, NA, "G35", NA, NA, NA),
  diagnosis_3 = c("F40", NA, "R67", "F431", NA, "F60", "S58", "R68", "F11", NA),
  diagnosis_4 = c(NA, NA, "F65", NA, "F19", NA, NA, "F32", NA, NA)
)

作为清理过程的一部分,我删除了所有不符合特定标准的诊断(即不是以字母F、G或Z开头的诊断)。

data$diagnosis_1[str_sub(data$diagnosis_1, 1,1) %in% c("R", "S", "Y")] <- NA
data$diagnosis_2[str_sub(data$diagnosis_2, 1,1) %in% c("R", "S", "Y")] <- NA
data$diagnosis_3[str_sub(data$diagnosis_3, 1,1) %in% c("R", "S", "Y")] <- NA
data$diagnosis_4[str_sub(data$diagnosis_4, 1,1) %in% c("R", "S", "Y")] <- NA

以这句话结尾:

现在我需要把数据移到左边来填充从左到右的列(例如,如果diagnosis_2,diagnosis_3或diagnosis_4有数据,diagnosis_1就不为空).我试过使用ifelse(),因为它是矢量化的,但是我似乎不能让它和几个嵌套的ifelse()一起工作.

ifelse(is.na(data$diagnosis_1), data$diagnosis_2, data$diagnosis_1))

非常感谢所有建议。
编辑:添加预期输出:

lnxxn5zx

lnxxn5zx1#

使用 dplyrtidyr。从宽到长重新整形,排除"^RSY"NA诊断,从长到宽重新整形。

library(dplyr)
library(tidyr)

gather(data, key = "k", value = "v", -id) %>% 
  filter(!(grepl("^[R|S|Y]", v) | is.na(v))) %>% 
  group_by(id) %>% 
  mutate(diagN = paste0("diagnosis_", row_number())) %>% 
  select(-k) %>% 
  spread(key = "diagN", value = "v") %>% 
  ungroup()

# # A tibble: 10 x 3
#       id diagnosis_1 diagnosis_2
#    <int> <chr>       <chr>      
#  1     1 F32         F40        
#  2     2 F431        NA         
#  3     3 F65         NA         
#  4     4 F431        NA         
#  5     5 F11         F19        
#  6     6 F60         NA         
#  7     7 G35         NA         
#  8     8 F32         NA         
#  9     9 F32         F11        
# 10    10 Z032        NA
jecbmhm3

jecbmhm32#

我们首先将以“R”、“S”或“Y”开头的replace值移到NA,然后左移非NA值。

data[-1] <- lapply(data[-1], function(x) replace(x, grepl("^[R|S|Y]", x), NA))  
data[] <- t(apply(data, 1, function(x) `length<-`(na.omit(x), length(x))))

data
# A tibble: 10 x 5
#     id diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
#   <chr> <chr>       <chr>       <chr>       <chr>      
# 1 " 1"  F32         F40         NA          NA         
# 2 " 2"  F431        NA          NA          NA         
# 3 " 3"  F65         NA          NA          NA         
# 4 " 4"  F431        NA          NA          NA         
# 5 " 5"  F11         F19         NA          NA         
# 6 " 6"  F60         NA          NA          NA         
# 7 " 7"  G35         NA          NA          NA         
# 8 " 8"  F32         NA          NA          NA         
# 9 " 9"  F32         F11         NA          NA         
#10  10   Z032        NA          NA          NA

将非NA值左移取自大卫对here的回答,您也可以尝试任何其他方法来将同一问题的值左移。

aydmsdu9

aydmsdu93#

你可以试试tidyverse

library(tidyverse)
data %>% 
  mutate_at(vars(starts_with("diagnosis")), funs(ifelse(str_sub(., 1, 1) %in% c("R", "S", "Y"), NA, .))) %>% 
  gather(k,v, -id) %>% 
  group_by(id) %>% 
  arrange(id) %>% 
  mutate(v=ifelse(k == "diagnosis_1", v[!is.na(v)][1], v)) %>% 
  spread(k, v)
# A tibble: 10 x 5
# Groups:   id [10]
      id diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
   <int> <chr>       <chr>       <chr>       <chr>      
 1     1 F32         NA          F40         NA         
 2     2 F431        NA          NA          NA         
 3     3 F65         NA          NA          F65        
 4     4 F431        NA          F431        NA         
 5     5 F11         NA          NA          F19        
 6     6 F60         NA          F60         NA         
 7     7 G35         G35         NA          NA         
 8     8 F32         NA          NA          F32        
 9     9 F32         NA          F11         NA         
10    10 Z032        NA          NA          NA

由于不清楚OP想要什么(见下面的讨论),您也可以尝试

data %>% 
  mutate_at(vars(starts_with("diagnosis")), funs(ifelse(str_sub(., 1, 1) %in% c("R", "S", "Y"), NA, .))) %>% 
  gather(k,v, -id) %>% 
  group_by(id) %>% 
  arrange(id) %>% 
  mutate(v=c(v[!is.na(v)], rep(NA, length(v) - length(v[!is.na(v)])))) %>% 
  spread(k, v)
# A tibble: 10 x 5
# Groups:   id [10]
      id diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
   <int> <chr>       <chr>       <chr>       <chr>      
 1     1 F32         F40         NA          NA         
 2     2 F431        NA          NA          NA         
 3     3 F65         NA          NA          NA         
 4     4 F431        NA          NA          NA         
 5     5 F11         F19         NA          NA         
 6     6 F60         NA          NA          NA         
 7     7 G35         NA          NA          NA         
 8     8 F32         NA          NA          NA         
 9     9 F32         F11         NA          NA         
10    10 Z032        NA          NA          NA
tcomlyy6

tcomlyy64#

您可以将Reducedplyr中的coalesce沿着使用,即

df$diagnosis_1 <- Reduce(dplyr::coalesce, df[-1])

#id diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
#   <int> <chr>       <chr>       <chr>       <chr>      
# 1     1 F32         <NA>        F40         <NA>       
# 2     2 F431        <NA>        <NA>        <NA>       
# 3     3 F65         <NA>        <NA>        F65        
# 4     4 F431        <NA>        F431        <NA>       
# 5     5 F11         <NA>        <NA>        F19        
# 6     6 F60         <NA>        F60         <NA>       
# 7     7 G35         G35         <NA>        <NA>       
# 8     8 F32         <NA>        <NA>        F32        
# 9     9 F32         <NA>        F11         <NA>       
#10    10 Z032        <NA>        <NA>        <NA>
62lalag4

62lalag45#

下面的解决方案使用来自dedupewider包的函数na_move

library(dedupewider)

na_move(data) # 'right' direction is by default

#> # A tibble: 10 x 5
#>    id    diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
#>  * <chr> <chr>       <chr>       <lgl>       <lgl>      
#>  1 1     F32         F40         NA          NA         
#>  2 2     F431        <NA>        NA          NA         
#>  3 3     F65         <NA>        NA          NA         
#>  4 4     F431        <NA>        NA          NA         
#>  5 5     F11         F19         NA          NA         
#>  6 6     F60         <NA>        NA          NA         
#>  7 7     G35         <NA>        NA          NA         
#>  8 8     F32         <NA>        NA          NA         
#>  9 9     F32         F11         NA          NA         
#> 10 10    Z032        <NA>        NA          NA
a6b3iqyw

a6b3iqyw6#

使用pivot_longerunnest_widertidyr更新。

      • 版本1.0.10**CRAN版本:2022-09-01
      • 版本1.2.1**CRAN发布版本:2022-09-08

第一步:清理数据

library(dplyr)
library(tidyr)

data <- data %>% 
  mutate(across(starts_with("diag"), ~ 
    replace(.x, grepl(paste0("^", c("R", "S", "Y"), collapse="|"), .x), NA)))

步骤2:左紧数据

data %>% 
  pivot_longer(starts_with("diag")) %>% 
  group_by(id) %>% 
  mutate(value = value[order(is.na(value))]) %>% 
  summarize(col = list(value)) %>% 
  unnest_wider(col, names_sep="_") %>% 
  setNames(colnames({{data}}))
# A tibble: 10 × 5
      id diagnosis_1 diagnosis_2 diagnosis_3 diagnosis_4
   <int> <chr>       <chr>       <chr>       <chr>      
 1     1 F32         F40         NA          NA         
 2     2 F431        NA          NA          NA         
 3     3 F65         NA          NA          NA         
 4     4 F431        NA          NA          NA         
 5     5 F11         F19         NA          NA         
 6     6 F60         NA          NA          NA         
 7     7 G35         NA          NA          NA         
 8     8 F32         NA          NA          NA         
 9     9 F32         F11         NA          NA         
10    10 Z032        NA          NA          NA
数据
data <- structure(list(id = 1:10, diagnosis_1 = c("F32", "F431", "R58", 
"S32", "F11", NA, NA, "Y67", "F32", "Z032"), diagnosis_2 = c(NA, 
NA, NA, NA, NA, NA, "G35", NA, NA, NA), diagnosis_3 = c("F40", 
NA, "R67", "F431", NA, "F60", "S58", "R68", "F11", NA), diagnosis_4 = c(NA, 
NA, "F65", NA, "F19", NA, NA, "F32", NA, NA)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -10L))

相关问题