R语言 根据循环或自定义函数的多个条件重新整形数据

oalqel3c  于 2023-01-28  发布在  其他
关注(0)|答案(3)|浏览(121)

我试图找出最有效的方法来转换我的数据从表1到表2如下。因为我有大量的数据在表中,循环是不理想的使用在这种情况下。因此需要Maven的意见,以更快的方式完成这一点。

route_id <- c('FR00020604','FR00020604','FR00020604','FR00020604','FR00026117','FR00026117','FR00026117',
'FR00026117','FR00026117')
 
flow_type <- c('STD','STD','NSTD','NSTD','STD','STD','STD','STD','STD')
    
prod <- c('654495','654495','654495','654495','732919','732919','732921','732921','732921')
    
route_seg_num <- c('01','02','01','02','01','02','01','02','03')
    
ship_from <- c('S22491IE02','S22521DE09','S22491IE02','S22521DE09','S00745BR01','S01480PA01',
'S00745BR01','S01480PA01','S100142300')
    
ship_to <- c('S22521DE09','S85879GB03','S22521DE09','S85879GB03','S01480PA01','S100142300',
'S01480PA01','S100142300','S100153252')

表一

表二

vs3odd8k

vs3odd8k1#

使用tidyr s pivot_wider的方法

library(dplyr)
library(tidyr)

pivot_wider(df, c(route_id, flow_type, prod), 
    names_from=route_seg_num, values_from=c(ship_to, ship_from)) %>% 
  rename(origin = ship_from_01) %>% 
  select(route_id:origin) %>% 
  rename_with(function(x) 
    sub(".*_(\\d)+$", "dest_\\1", x), starts_with("ship")) %>%
  relocate(origin, .after=prod)
# A tibble: 4 × 7
  route_id   flow_type prod   origin     dest_1     dest_2     dest_3    
  <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 NA        
2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 NA        
3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 NA        
4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252
数据
df <- structure(list(route_id = c("FR00020604", "FR00020604", "FR00020604", 
"FR00020604", "FR00026117", "FR00026117", "FR00026117", "FR00026117", 
"FR00026117"), flow_type = c("STD", "STD", "NSTD", "NSTD", "STD", 
"STD", "STD", "STD", "STD"), prod = c("654495", "654495", "654495", 
"654495", "732919", "732919", "732921", "732921", "732921"), 
    route_seg_num = c("01", "02", "01", "02", "01", "02", "01", 
    "02", "03"), ship_from = c("S22491IE02", "S22521DE09", "S22491IE02", 
    "S22521DE09", "S00745BR01", "S01480PA01", "S00745BR01", "S01480PA01", 
    "S100142300"), ship_to = c("S22521DE09", "S85879GB03", "S22521DE09", 
    "S85879GB03", "S01480PA01", "S100142300", "S01480PA01", "S100142300", 
    "S100153252")), class = "data.frame", row.names = c(NA, -9L
))
pxy2qtax

pxy2qtax2#

df %>%
  group_by(route_id, flow_type) %>%
  mutate(origin = first(ship_from))%>%
  pivot_wider(-ship_from,names_from = route_seg_num, values_from = ship_to,
              names_prefix = 'dest_')


 route_id   flow_type prod   origin     dest_01    dest_02    dest_03   
  <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 NA        
2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 NA        
3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 NA        
4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252
htzpubme

htzpubme3#

下面是一个基于tidyverse的解决方案。

library(tidyverse)

df |>
  pivot_wider(names_from = route_seg_num, values_from = c(ship_from, ship_to)) |>
  select(-ship_from_03, -ship_to_01) |> 
  rename("origin" = "ship_from_01", 
         "dest1" = "ship_from_02", 
         "dest2" = "ship_to_02", 
         "dest3" = "ship_to_03")

#> # A tibble: 4 × 7
#> # Groups:   route_id, flow_type [3]
#>   route_id   flow_type prod   origin     dest1      dest2      dest3     
#>   <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
#> 1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 <NA>      
#> 2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 <NA>      
#> 3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 <NA>      
#> 4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252
    • 数据**
route_id <- c(
  "FR00020604", "FR00020604", "FR00020604", "FR00020604", "FR00026117", "FR00026117", "FR00026117",
  "FR00026117", "FR00026117"
)

flow_type <- c("STD", "STD", "NSTD", "NSTD", "STD", "STD", "STD", "STD", "STD")

prod <- c("654495", "654495", "654495", "654495", "732919", "732919", "732921", "732921", "732921")

route_seg_num <- c("01", "02", "01", "02", "01", "02", "01", "02", "03")

ship_from <- c(
  "S22491IE02", "S22521DE09", "S22491IE02", "S22521DE09", "S00745BR01", "S01480PA01",
  "S00745BR01", "S01480PA01", "S100142300"
)

ship_to <- c(
  "S22521DE09", "S85879GB03", "S22521DE09", "S85879GB03", "S01480PA01", "S100142300",
  "S01480PA01", "S100142300", "S100153252"
)

df <- data.frame(
  route_id, flow_type, prod, route_seg_num, ship_from, ship_to
)

相关问题