R语言 合并逻辑向量创建非逻辑向量

vc6uscn9  于 2023-10-13  发布在  其他
关注(0)|答案(3)|浏览(89)

我在一个 Dataframe 中有两个逻辑向量:

df <- data.frame(log1 = c(FALSE, FALSE, TRUE, FALSE, TRUE), log2 = c(TRUE, FALSE, FALSE, FALSE, TRUE))

我想把这两个列合并成第三列。但是这个新列不应该仅仅包含逻辑值。相反,它应该为第三列分配三个值之一-“高”,“离群值”或“正常”。“高”优先,所以第三列应该显示“高”,而不是第5行的“离群值”。
我想使用ifelse可以做到这一点,但我无法使用以下代码使其工作:

df$new <- NA
if(df$log1 == TRUE){
  df$new <-  "high"
  } else if(df$log2 == TRUE) {
    df$new  <-  "outlier"
    } else {
      df$new  <-  "normal"
      }

有人能帮忙吗?

jk9hmnmh

jk9hmnmh1#

在没有if/else的情况下尝试以下操作:

df$new <- "normal"
df[df$log1, ]$new <- "high"
df[!df$log1 & df$log2, ]$new <- "outlier"

   log1  log2     new
1 FALSE  TRUE outlier
2 FALSE FALSE  normal
3  TRUE FALSE    high
4 FALSE FALSE  normal
5  TRUE  TRUE    high

**一个更好的解决方案:**试试这个:

library(dplyr)

df %>% 
  mutate(new = ifelse(log1,
                      "high",
                      ifelse(log2,
                             "outlier",
                             "normal")  
  )
  )
t30tvxxf

t30tvxxf2#

这是关于ifelse及其衍生物的。

base R

ifelse(df$log1, "high", ifelse(df$log2, "outlier", "normal"))
# [1] "outlier" "normal" "high"   "normal" "high"

dplyr

我们可以嵌套dplyr::if_else,但嵌套通常鼓励我们使用case_when

library(dplyr)
df %>%
  mutate(
    new1 = if_else(log1, "high", if_else(log2, "outlier", "normal")), 
    new2 = case_when(log1 ~ "high", log2 ~ "outlier", TRUE ~ "normal")
  )
#    log1  log2    new1    new2
# 1 FALSE  TRUE outlier outlier
# 2 FALSE FALSE  normal  normal
# 3  TRUE FALSE    high    high
# 4 FALSE FALSE  normal  normal
# 5  TRUE  TRUE    high    high

data.table

类似地,fifelsefcase

library(data.table)
as.data.table(df)[, new1 := fifelse(log1, "high", fifelse(log2, "outlier", "normal"))
  ][, new2 := fcase(log1, "high", log2, "outlier", default = "normal")][]
#      log1   log2    new1    new2
#    <lgcl> <lgcl>  <char>  <char>
# 1:  FALSE   TRUE outlier outlier
# 2:  FALSE  FALSE  normal  normal
# 3:   TRUE  FALSE    high    high
# 4:  FALSE  FALSE  normal  normal
# 5:   TRUE   TRUE    high    high

请注意,虽然上面的dplyr::case_when使用了与cond1 ~ value1, cond2 ~ value2一样的波浪号公式,但fcase变体使用了交替的参数cond1, value1, cond2, value2, ...)
此外,default=参数只要是常量就可以工作。如果需要一个动态默认值(即基于表内容),则需要一个全真向量,如fcase(..., rep(TRUE, .N), NEWVALUE)

monwx1rj

monwx1rj3#

使用索引:

df$new <- with(df, c("normal", "outlier", "high", "high")[2L*log1 + log2 + 1L])
df
#>    log1  log2     new
#> 1 FALSE  TRUE outlier
#> 2 FALSE FALSE  normal
#> 3  TRUE FALSE    high
#> 4 FALSE FALSE  normal
#> 5  TRUE  TRUE    high

@r2evans指出的data.table解决方案是最快和最高效的内存。索引赢得了基础解决方案。

f1 <- function(df) {
  within(df, new <- ifelse(log1, "high", ifelse(log2, "outlier", "normal")))
}

f2 <- function(df) {
  df %>%
    mutate(
      new = if_else(log1, "high", if_else(log2, "outlier", "normal")), 
    )
}

f3 <- function(df) {
  df %>%
    mutate(
      new = case_when(log1 ~ "high", log2 ~ "outlier", TRUE ~ "normal")
    )
}

f4 <- function(df) {
  setDT(df)[, new := fifelse(log1, "high", fifelse(log2, "outlier", "normal"))]
}

f5 <- function(df) {
  setDT(df)[, new := fcase(log1, "high", log2, "outlier", default = "normal")]
}

f6 <- function(df) {
  df$new <- "normal"
  df[df$log1, ]$new <- "high"
  df[!df$log1 & df$log2, ]$new <- "outlier"
  df
}

f7 <- function(df) {
  within(df, new <- c("normal", "outlier", "high", "high")[2L*log1 + log2 + 1L])
}

基准:

df <- data.frame(log1 = sample(!0:1, 1e5, 1), log2 = sample(!0:1, 1e5, 1))

bench::mark(
  ifelse = f1(df),
  if_else = f2(df),
  case_when = f3(df),
  fifelse = f4(df),
  fcase = f5(df),
  subsetting = f6(df),
  indexing = f7(df),
  check = FALSE # mix of data.table and data.frame
)
#> # A tibble: 7 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 ifelse      40.32ms   41.9ms      24.0   10.32MB     12.0
#> 2 if_else      8.13ms   8.92ms     111.     9.95MB     33.0
#> 3 case_when    6.58ms   7.09ms     137.     7.36MB     45.7
#> 4 fifelse      1.68ms   2.01ms     483.     3.42MB     18.0
#> 5 fcase        1.45ms   1.57ms     618.      1.2MB     22.6
#> 6 subsetting   6.99ms    8.2ms     120.    10.85MB     59.9
#> 7 indexing     1.57ms   2.54ms     389.     4.27MB     50.9

相关问题