如果column不是以substring开始,则将该行的所有内容移到以substring开头的上一行

66bbxpm5  于 2023-07-31  发布在  其他
关注(0)|答案(2)|浏览(96)

如果第一列V1不是以“KEGG_”子字符串开始,则将该行的所有内容移动到以“KEGG_”开始的前一行。

move.these.rows <- kegg[!(kegg$V1 %like% "KEGG_"),]
rows.with.kegg <- kegg[kegg$V1 %like% "KEGG_",]

字符串
输入:

> df <- dput(kegg[26:31,1:5])
structure(list(V1 = c("KEGG_SNARE_INTERACTIONS_IN_VESICULAR_TRANSPORT", 
"KEGG_LYSOSOME", "CD164", "LIPA", "KEGG_CARDIAC_MUSCLE_CONTRACTION", 
"ATP1A3"), V3 = c("STX12", "PLA2G15", "HGSNAT", "AP1M1", "CACNA2D1", 
"ATP1A2"), V4 = c("STX2", "AP3B2", "ABCA2", "LAPTM4B", "CACNB1", 
"UQCRB"), V5 = c("VAMP5", "GGA1", "DNASE2B", "NAPSA", "COX8A", 
"COX6A2"), V6 = c("GOSR2", "SLC11A1", "AGA", "HEXB", "CACNB2", 
"SLC9A6")), row.names = 26:31, class = "data.frame")


所需输出:
| | V3| V4| V5| V6| V7| V8| V9| V10| V11| V12| V13| V12 | V13 |
| --|--|--|--|--|--|--|--|--|--|--|--|--| ------------ |
| STX12| STX2| VAMP5| GOSR2|||||||||||
| PLA2G15| AP3B2| GGA1| SLC11A1| CD164| HGSNAT| ABCA2| DNASE2B|阿加|AP1M1| LAPTM4B| NAPSA| HEXB| HEXB |
| CACNA2D1| CACNB1| COX8A| CACNB2| ATP1A3| ATP1A2| UQCRB| COX6A2| SLC9A6||||||

l2osamch

l2osamch1#

在碱R中:

read.table(text = gsub("KEGG", "\nKEGG", do.call(paste, c(df, collapse=' '))), fill =TRUE)

                                              V1       V2     V3    V4      V5     V6     V7    V8      V9    V10  V11   V12     V13   V14  V15
1 KEGG_SNARE_INTERACTIONS_IN_VESICULAR_TRANSPORT    STX12   STX2 VAMP5   GOSR2                                                                 
2                                  KEGG_LYSOSOME  PLA2G15  AP3B2  GGA1 SLC11A1  CD164 HGSNAT ABCA2 DNASE2B    AGA LIPA AP1M1 LAPTM4B NAPSA HEXB
3                KEGG_CARDIAC_MUSCLE_CONTRACTION CACNA2D1 CACNB1 COX8A  CACNB2 ATP1A3 ATP1A2 UQCRB  COX6A2 SLC9A6

字符串
在tidyverse中:

df %>%
  pivot_longer(everything()) %>%
  mutate(id = ifelse(str_detect(value, 'KEGG'), value, NA))%>%
  fill(id) %>%
  filter(id!=value)%>% 
  mutate(name = row_number(), .by = id) %>%
  pivot_wider(names_prefix = 'V')
  
  # A tibble: 3 × 15
  id    V1    V2    V3    V4    V5    V6    V7    V8    V9    V10   V11   V12   V13   V14  
  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 KEGG… STX12 STX2  VAMP5 GOSR2 NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
2 KEGG… PLA2… AP3B2 GGA1  SLC1… CD164 HGSN… ABCA2 DNAS… AGA   LIPA  AP1M1 LAPT… NAPSA HEXB 
3 KEGG… CACN… CACN… COX8A CACN… ATP1… ATP1… UQCRB COX6… SLC9… NA    NA    NA    NA    NA

mm5n2pyu

mm5n2pyu2#

tidyverse工作流:

library(dplyr)

df %>%
  group_by(rowid = cumsum(grepl("^KEGG_", V1))) %>%
  summarise(V = list(c(t(pick(everything()))))) %>%
  tidyr::unnest_wider(V, names_sep = '')

# # A tibble: 3 × 16
#   rowid V1                                             V2       V3     V4    V5      V6     V7     V8    V9      V10    V11   V12   V13     V14   V15  
#   <int> <chr>                                          <chr>    <chr>  <chr> <chr>   <chr>  <chr>  <chr> <chr>   <chr>  <chr> <chr> <chr>   <chr> <chr>
# 1     1 KEGG_SNARE_INTERACTIONS_IN_VESICULAR_TRANSPORT STX12    STX2   VAMP5 GOSR2   NA     NA     NA    NA      NA     NA    NA    NA      NA    NA   
# 2     2 KEGG_LYSOSOME                                  PLA2G15  AP3B2  GGA1  SLC11A1 CD164  HGSNAT ABCA2 DNASE2B AGA    LIPA  AP1M1 LAPTM4B NAPSA HEXB 
# 3     3 KEGG_CARDIAC_MUSCLE_CONTRACTION                CACNA2D1 CACNB1 COX8A CACNB2  ATP1A3 ATP1A2 UQCRB COX6A2  SLC9A6 NA    NA    NA      NA    NA

字符串

相关问题