如何在R中将数据从长格式整形为宽格式?

xkftehaa  于 2023-06-19  发布在  其他
关注(0)|答案(2)|浏览(103)

现在,我有相同的输入 Dataframe :

AAA <- c('APIS', 'APIS', 'PIPIS', 'AGAROS', 'AGAROS', 'AGAROS', 'NOTHING')
BBB <- c('a', 'a', 'a', 'b', 'b', 'c', NA)
CCC <- c(1, 2, NA, 4, 5, 6, 7)
DDD <- c("Mat", "ASG", "MNT", "NBEH", "DJ", "EU", "DHR")
test.data <- data.frame(AAA, BBB, CCC, DDD)
test.data

我想重塑它,使每个唯一的“AAA”变量变成单行。“AAA”变量的多个条目自动成为新列,后缀或前缀为1、2、3.....就像这样:

AAA <- c('APIS', 'PIPIS', 'AGAROS', 'NOTHING')
BBB_1 <- c('a', 'a', 'b', NA)
CCC_1 <- c(1, NA, 4, 7)
DDD_1 <- c("Mat", "MNT", "NBEH", "DHR")

BBB_2 <- c('a', NA, 'b', NA)
CCC_2 <- c(2, NA, 5, NA)
DDD_2 <- c("ASG", NA, "DJ", NA)

BBB_3 <- c(NA, NA, 'c', NA)
CCC_3 <- c(NA, NA, 6, NA)
DDD_3 <- c(NA, NA, "EU", NA)

output <- data.frame(AAA, BBB_1, CCC_1, DDD_1, BBB_2, CCC_2, DDD_2, 
                        BBB_3, CCC_3, DDD_3)
output

我已经看过熔化和铸造和其他一些东西,但似乎没有做的工作。

dtcbnfnu

dtcbnfnu1#

您可以为AAA的每个唯一值创建一个唯一的行号,然后转换为宽格式。

library(dplyr)
library(tidyr)

test.data %>%
  mutate(row = row_number(), .by = AAA) %>%
  pivot_wider(names_from = row, values_from = BBB:DDD, names_vary = "slowest")

# A tibble: 4 × 10
#  AAA     BBB_1 CCC_1 DDD_1 BBB_2 CCC_2 DDD_2 BBB_3 CCC_3 DDD_3
#  <chr>   <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr>
#1 APIS    a         1 Mat   a         2 ASG   NA       NA NA   
#2 PIPIS   a        NA MNT   NA       NA NA    NA       NA NA   
#3 AGAROS  b         4 NBEH  b         5 DJ    c         6 EU   
#4 NOTHING NA        7 DHR   NA       NA NA    NA       NA NA

使用data.table-

library(data.table)
setDT(test.data)

dcast(test.data[, row := seq_len(.N), AAA], 
      AAA ~ row, value.var = c("BBB", "CCC", "DDD"))
nwwlzxa7

nwwlzxa72#

碱R溶液:

# Input Data: test.data => data.frame
AAA <- c('APIS', 'APIS', 'PIPIS', 'AGAROS', 'AGAROS', 'AGAROS', 'NOTHING')
BBB <- c('a', 'a', 'a', 'b', 'b', 'c', NA)
CCC <- c(1, 2, NA, 4, 5, 6, 7)
DDD <- c("Mat", "ASG", "MNT", "NBEH", "DJ", "EU", "DHR")
test.data <- data.frame(AAA, BBB, CCC, DDD)
test.data

# Function to column bind all columns in a data.frame, grouped by given
# columns: cbind_by => function
cbind_by <- function(df, by){
  # Test that df is a data.frame: 
  # stop execution if not
  stopifnot(is.data.frame(df))
  # Test that the by vector contains values that are valid column names of the
  # df data.frame: 
  stopifnot(all(by %in% colnames(df)))
  # Private function to robustly row-bind two data.frames 
  # together regardless of schema differences: .robust_rbind => function
  .robust_rbind <- function(lhs_df, rhs_df){
    add_names_lhs <- setdiff(names(rhs_df), names(lhs_df))
    add_names_rhs <- setdiff(names(lhs_df), names(rhs_df))
    lhs_df[,add_names_lhs] <- NA
    rhs_df[,add_names_rhs] <- NA
    res_df <- rbind(lhs_df, rhs_df)
    return(res_df)
  }
  # Private function to robustly row-bind all data.frames in a list together
  # .robust_rbind_df_list => function
  .robust_rbind_df_list <- function(df_list){
    res_df <- Reduce(
      function(x, y){
        .robust_rbind(x, y)
      },
      df_list
    )
    return(res_df)
  }
  # Split-apply combine (column bind) by the by vector: 
  res_df <- .robust_rbind_df_list( 
    lapply(
      split(
        test.data, 
        do.call(
          paste, 
          as.data.frame(
            test.data[,by], 
            stringsAsFactors = FALSE
          )
        )
      ),
      function(x){
        # Cbind rows to cols: 
        ir <- suppressWarnings(
          cbind(
            tmp = x[,by],
            do.call(
              cbind, 
              split(
                x[,setdiff(names(x), by)], 
                seq_len(nrow(x))
              )
            )
          )
        )[1,]
        # Derive new col names: 
        nms <- gsub(
          "(\\d+)\\.(.*)", 
          "\\2_\\1", 
          names(ir[-seq_len(length(by))])
        )
        # Set the col names:
        setNames(
          ir,
          c(by, nms)
        )
      }
    )
  )
  return(res_df)
}

# Apply the function: res_df => data.frame
res_df <- cbind_by(
  df = test.data,
  by = "AAA"
)

# Display the result in the console: data.frame => stdout(console)
res_df

相关问题