R语言 带有ifelse条件的sapply函数

mspsb9vt  于 2023-05-26  发布在  其他
关注(0)|答案(4)|浏览(181)

我是一个新手在应用功能和提前感谢的帮助。我有一个数据集(df),我只需要清理x列中的一个行子集--有连字符的行将被清理。我在df中包含了列x_clean,因为这是我希望从清理列中得到的结果。如果x列的任何值中有连字符,我将在连字符前面的字符串中填充0,直到它有5位数字,并在连字符后面的字符串中填充0,直到它有4位数字。如果字符串中没有连字符,那么我将设置为NA。这是我尝试过的,但还没有成功:

df=data.frame(x=c("55555555","4444-444","NULL","hello","0065440006123","22-111"))%>%
  mutate(nchar=nchar(x), 
         detect=str_detect(x,"-"),
         xlcean=c(NA,"04444-0444",NA,NA,NA,"00022-0111"))
df%>%mutate(xclean=sapply(strsplit(x,"-"), function(x)
  {ifelse(detect==T,
    paste(sprintf("%05d",as.numeric(x[1])), sprintf("%04d",as.numeric(x[2])), sep="-"),NA)}))

我也试过这个:

df%>%mutate(x_clean=
             if (detect==T) {sapply(strsplit(x,"-"), function(x)paste(sprintf("%05d",as.numeric(x[1])), sprintf("%04d",as.numeric(x[2])), sep="-"))}
              else {NA})
yk9xbfzb

yk9xbfzb1#

使用dplyr,不使用sapply的方法

library(dplyr)

df %>% 
  rowwise() %>% 
  mutate(xclean = strsplit(x, "-"),
         xclean = ifelse(grepl("-", x), sprintf("%05d%s%04d", 
           as.integer(xclean[1]), "-", as.integer(xclean[2])), NA)) %>% 
  ungroup()
# A tibble: 6 × 2
  x             xclean    
  <chr>         <chr>     
1 55555555      NA        
2 4444-444      04444-0444
3 NULL          NA        
4 hello         NA        
5 0065440006123 NA        
6 22-111        00022-0111

只有sapply

data.frame(df, xclean = sapply(strsplit(df$x, "-"), function(y) 
  ifelse(length(y) == 2, 
    sprintf("%05d%s%04d", as.integer(y[1]), "-", as.integer(y[2])), NA)))
              x     xclean
1      55555555       <NA>
2      4444-444 04444-0444
3          NULL       <NA>
4         hello       <NA>
5 0065440006123       <NA>
6        22-111 00022-0111
j13ufse2

j13ufse22#

df |>
  mutate(x_split = str_split(x, "-"),
         xclean =
           sapply(x_split, \(ss) {
             if (length(ss) == 2) {
               sprintf("%05d-%04d", as.integer(ss[1]), as.integer(ss[2]))
             } else {
               NA_character_
             }
           }))
#               x       x_split     xclean
# 1      55555555      55555555       <NA>
# 2      4444-444     4444, 444 04444-0444
# 3          NULL          NULL       <NA>
# 4         hello         hello       <NA>
# 5 0065440006123 0065440006123       <NA>
# 6        22-111       22, 111 00022-0111
enxuqcxy

enxuqcxy3#

这里有另一个解决方案:

library(tidyverse)
df %>%
  mutate(
    # step 1: split `x`on dash:
    x_clean = ifelse(str_detect(x, "\\d+-\\d+"),str_split(x, "-"), NA)) %>%
  # step 2: cast in long format:
  unnest(x_clean) %>%
  # for each `x`:
  group_by(x) %>%
  # pad `0` to first value 5 times, and 4 times to second value:
  mutate(x_clean = ifelse(row_number() == 1, 
                          str_pad(x_clean, width = 5, side = "left", pad = "0"),
                          str_pad(x_clean, width = 4, side = "left", pad = "0"))) %>%
  # step 3: string back together:
  summarise(x_clean = str_c(x_clean, collapse = "-"))
# A tibble: 6 × 2
  x             x_clean   
  <chr>         <chr>     
1 0065440006123 NA        
2 22-111        00022-0111
3 4444-444      04444-0444
4 55555555      NA        
5 hello         NA        
6 NULL          NA
nwwlzxa7

nwwlzxa74#

也可以使用gsubfn

replacement <- function(a, b){
  ifelse(nzchar(a), sprintf("%05i-%05i", as.numeric(a), as.numeric(b)), NA)
}

gsubfn::gsubfn("(\\d+)-(\\d+)|^.+",replacement, df$x)

[1] "NA"  "04444-00444"  "NA"  "NA"  "NA"  "00022-00111"

相关问题