我有一个数据集，看起来像这样：
| 投资回报率|集团|识别号|每面积检测数|
| - ------|- ------|- ------|- ------|
| 脑干|A类|京科002|0.0327285333827735美元|
| 小脑|A类|京科002|0.149208050073911|
| 脑干|A类|京科002|0.0336959892253705美元|
| 小脑|A类|京科002|无|
已截断：此处为完整数据集

structure(list(ROI = c("Brainstem", "Cerebellum", "Brainstem", 
"Cerebellum", "Brainstem", "Cerebellum", "Brainstem", "Cerebellum", 
"Brainstem", "Brainstem", "Brainstem", "Cerebellum", "Brainstem", 
"Brainstem", "Cerebellum", "Brainstem", "Brainstem", "Brainstem", 
"Cerebellum", "Cerebellum", "Brainstem", "Brainstem", "Brainstem", 
"Cerebellum", "Brainstem", "Brainstem", "Cerebellum", "Cerebellum", 
"Brainstem", "Brainstem", "Cerebellum", "Brainstem", "Brainstem", 
"Brainstem", "Cerebellum", "Cerebellum", "Brainstem", "Brainstem", 
"Brainstem", "Brainstem", "Cerebellum", "Brainstem", "Cerebellum"
), Group = c("A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C"), ID = c("JK002", "JK002", "JK002", "JK002", 
"JK003", "JK003", "JK003", "JK003", "JK003", "JK004", "JK004", 
"JK004", "JK004", "JK005", "JK005", "JK005", "JK005", "JK006", 
"JK006", "JK006", "JK006", "JK006", "JK007", "JK007", "JK007", 
"JK008", "JK008", "JK008", "JK008", "JK008", "JK011", "JK011", 
"JK011", "JK011", "JK009", "JK009", "JK009", "JK009", "JK009", 
"JK010", "JK010", "JK010", "JK010"), Detections_per_area = c(0.0327285333827735, 
0.149208050073911, 0.0336959892253705, 0, 0.615965559353422, 
0.117064703241855, 0.171329148144879, 0, 0.131086542762028, 0.143700717389906, 
0.0590510074394172, 0, 0.0610284572370045, 0.358989653774582, 
0.113758644699529, 0.289455536567144, 0, 0.0355596373594045, 
0, 0.0661718135522336, 0.243313220125351, 0.373564943266101, 
0.591264576854053, 0.322774099148268, 0.725656324981598, 0.46343627586687, 
0, 0.227796467592069, 0.0876146804249713, 0.297011825126973, 
0.163792893059129, 0.336883295806429, 1.08791432091601, 0.370788052321182, 
1.49652281121774, 1.18495456669418, 3.36132331547411, 0.951628515574163, 
1.63575637052095, 0.408866067869112, 0, 1.47956381894319, 0.368780820643061
)), row.names = c(NA, -43L), class = "data.frame")

我尝试使用R将其转换为Graphpad棱镜友好格式。这是预期的输出，对应于Detections_per_Area的值填充在（x）中。令人讨厌的是，棱镜要求所有组的大小相同，无论值的数量如何，因此下面为空。
| | A类|A类|A类|A类|A类|乙|乙|乙|乙|乙|C级|C级|C级|C级|C级|
| - ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|- ------|
| | 京科002|京科002|京科003|JK004|JK005|京科001|JK009|京科010|无效|无效|JK006|JK007|JK008|京科011|无效|
| 脑干|x|x|x|x|x|x|x|x|无效|无效|x|x|x|x|无效|
| 小脑|x||x|x|x|x|x|x|无效|无效|x|x|x|x|无效|
如果没有ID列，我可以使用以下代码生成预期的输出：

library(tidyr)
library(dplyr)

Count<-data_source %>% count(ROI,Group, sort = TRUE)
highest = as.numeric(Count[1,'n'])
range = seq(1, highest)

  data_wide <- data_source %>% arrange(Group) %>% 
    group_by(ROI,Group) %>% mutate(rn = row_number()) %>% complete(rn = range) %>% 
    pivot_wider(
      names_from = c(Group, rn),
      values_from = Detections_per_area
)
  
names(data_wide) = gsub(pattern = "_.*", replacement = "", x = names(data_wide))
names(data_wide) = gsub(pattern = "ROI", replacement = "", x = names(data_wide))

这将对最大的组进行计数，然后在透视之前生成与之对应的行号。
任何关于最好的方法的建议都是很好的，我想在名称中添加ID，然后删除后透视，但是没有任何运气。

# Determine structure (including the placeholders with "Null") ds_skeleton <- data_source |> dplyr::select(ROI, Group) |> dplyr::group_by(ROI, Group) |> dplyr::mutate( rn = dplyr::row_number(Group), ) |> dplyr::ungroup() |> tidyr::complete(ROI, Group, rn) # Order the observed data (with holes) d2 <- data_source |> dplyr::select( ROI, Group, ID, y = Detections_per_area, # Shorten to help with SO's narrow window. ) |> dplyr::group_by(ROI, Group) |> dplyr::mutate( rn = dplyr::row_number(Group), ) |> dplyr::ungroup()

ds_long <- ds_skeleton |> dplyr::left_join(d2, by = c("ROI", "Group", "rn")) |> dplyr::mutate( ID = dplyr::coalesce(ID, "Null"), # tag = paste(Group, rn, ID, sep = "_"), tag = sprintf( "%s_%03i_%s", # Pad the rn to accommodate "001" to "999" Group, rn, ID ), y = dplyr::if_else(ID == "Null", "Null", as.character(y)), ) |> dplyr::select( ROI, tag, y, ) # Pivot the values to the wide format ds_wide <- ds_long |> tidyr::pivot_wider( id_cols = "ROI", names_from = "tag", values_from = "y" )

# Construct the two header rows pattern <- "^([A-Z])_\\d{3}_(.+)$" # Extracting info from tag ds_header <- ds_wide |> dplyr::slice(0) |> # Drop the rows b/c we want only the column header tibble::add_row(ROI = "header") |> tidyr::pivot_longer( cols = -"ROI", names_to = "tag", values_to = "header_1" ) |> dplyr::mutate( header_1 = sub(pattern, "\\1", tag), header_2 = sub(pattern, "\\2", tag), ) |> dplyr::select(-ROI) |> tidyr::pivot_longer( cols = c("header_1", "header_2"), names_to = "ROI") |> tidyr::pivot_wider(names_from = "tag") |> dplyr::mutate( ROI = "" ) # Combine the headers with the data ds_header |> dplyr::union_all(ds_wide)

1条答案

按热度按时间

8fsztsew1#

正如我在评论中所说的，我对如何重用这些ID感到不舒服。从我所知道的来看，没有好的方法来确定“0.0327285333827735”（第1行）应与“0.149208050073911”关联（第3行）或“0”（在第4行）。下面的片段正确地处理了这个问题，假设顺序是有意义的。但是我真的很不舒服“脑干+A+ JK 003”有三行，而“Cerebellum+A+ JK 003”有两个，我强烈建议不要重用这些ID。
我假设这是可以修复的。这就是这个块所模拟的。一个ID不能在ROI-by-Group组合中重用。

data_source <-
  data_source |> 
  dplyr::group_by(ROI, Group, ID) |> 
  dplyr::slice_head() |> 
  dplyr::ungroup()

首先平衡数据集，使每个ROIxGroup具有相同的行数。使用“Null”占位符，如您在上面指定的。

然后将结构与值连接。

最后，构造头部并将其堆叠在真实的数据之上（可能有一种更简洁的方法来处理dplyr::across()）。

要创建csv，请将col_names = FALSE传递给readr::write_csv()。

赞(0）回复(0）举报 2023-01-18

R pivot_widder，带有多个标题

1条答案

相关问题

热门标签

最新问答