R语言 根据相应的行替换数据

6rqinv9w  于 2023-03-27  发布在  其他
关注(0)|答案(3)|浏览(186)

我有一些数据看起来像这样:

# A tibble: 10 × 4
   code  country              iso_o iso_d
   <chr> <chr>                <chr> <chr>
 1 ABW   Aruba                ABW   ABW  
 2 AFG   Afghanistan          ABW   AFG  
 3 AGO   Angola               ABW   AGO  
 4 AIA   Anguilla             ABW   AIA  
 5 ALB   Albania              ABW   ALB  
 6 AND   Andorra              ABW   AND  
 7 ANT   Netherland Antilles  ABW   ANT  
 8 ARE   United Arab Emirates ABW   ARE  
 9 ARG   Argentina            ABW   ARG  
10 ARM   Armenia              ABW   ARM

我需要代码的前四个变量在图片中:codecountryiso_oiso_d。变量country包含国家名称,变量code包含国家代码。它们是对应的。如果列iso_o中的信息与代码中的信息匹配,我希望iso_o中的单元格采用与代码对应的国家名称。提前感谢。
我期待的是:
如果code = ABW并且它对应的国家是Aruba,我希望iso_o和iso_d在有ABW的地方说Aruba
数据如下:

structure(list(code = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND", 
"ANT", "ARE", "ARG", "ARM"), country = c("Aruba", "Afghanistan", 
"Angola", "Anguilla", "Albania", "Andorra", "Netherland Antilles", 
"United Arab Emirates", "Argentina", "Armenia"), iso_o = c("ABW", 
"ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW"
), iso_d = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND", "ANT", 
"ARE", "ARG", "ARM")), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))
deikduxw

deikduxw1#

有点笨拙,但是:

library(dplyr)
left_join(df, df %>% select(iso_o = code, iso_o2 = country)) %>%
  select(code, country, iso_o2, iso_d) %>%
  rename(iso_o = iso_o2)

结果

Joining with `by = join_by(iso_o)`
# A tibble: 10 × 4
   code  country              iso_o iso_d
   <chr> <chr>                <chr> <chr>
 1 ABW   Aruba                Aruba ABW  
 2 AFG   Afghanistan          Aruba AFG  
 3 AGO   Angola               Aruba AGO  
 4 AIA   Anguilla             Aruba AIA  
 5 ALB   Albania              Aruba ALB  
 6 AND   Andorra              Aruba AND  
 7 ANT   Netherland Antilles  Aruba ANT  
 8 ARE   United Arab Emirates Aruba ARE  
 9 ARG   Argentina            Aruba ARG  
10 ARM   Armenia              Aruba ARM
pexxcrt2

pexxcrt22#

您的问题中存在歧义,并且不完全清楚您希望输出的结果是什么。我将“如果列iso_o中的信息与代码中的信息匹配,我希望iso_o中的那个单元格采用国家名称”解释为您只想修改满足该条件的行。您可以尝试which()。在此示例中,df是您的数据:

df[which(df[,1] == df[,3]),3] <- df[which(df[,1] == df[,3]),2]

这将查找在“code”和“iso_o”列中具有匹配值的行which,如果是TRUE,则将“country”列中的值添加到该行的“iso_o”列。但是,在问题的后面,您声明希望修改“iso_o”和“iso_d”列,在这种情况下用途:

df[which(df[,1] == df[,3]),3:4] <- df[which(df[,1] == df[,3]),2]
pes8fvy9

pes8fvy93#

有许多可能的方法来解决这个问题;除了已经提供的答案之外,这些解决方案中是否有适合您的用例的?
基于dplyr软件包的解决方案:

library(dplyr, warn = FALSE)

df <- structure(list(code = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND",  "ANT", "ARE", "ARG", "ARM"), country = c("Aruba", "Afghanistan",  "Angola", "Anguilla", "Albania", "Andorra", "Netherland Antilles",  "United Arab Emirates", "Argentina", "Armenia"), iso_o = c("ABW",  "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW" ), iso_d = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND", "ANT",  "ARE", "ARG", "ARM")), row.names = c(NA, -10L), class = c("tbl_df",  "tbl", "data.frame"))
df
#> # A tibble: 10 × 4
#>    code  country              iso_o iso_d
#>    <chr> <chr>                <chr> <chr>
#>  1 ABW   Aruba                ABW   ABW  
#>  2 AFG   Afghanistan          ABW   AFG  
#>  3 AGO   Angola               ABW   AGO  
#>  4 AIA   Anguilla             ABW   AIA  
#>  5 ALB   Albania              ABW   ALB  
#>  6 AND   Andorra              ABW   AND  
#>  7 ANT   Netherland Antilles  ABW   ANT  
#>  8 ARE   United Arab Emirates ABW   ARE  
#>  9 ARG   Argentina            ABW   ARG  
#> 10 ARM   Armenia              ABW   ARM

# create a "lookup" named vector
lookup <- df %>%
  pull(country) %>%
  setNames(df$code)
lookup
#>                    ABW                    AFG                    AGO 
#>                "Aruba"          "Afghanistan"               "Angola" 
#>                    AIA                    ALB                    AND 
#>             "Anguilla"              "Albania"              "Andorra" 
#>                    ANT                    ARE                    ARG 
#>  "Netherland Antilles" "United Arab Emirates"            "Argentina" 
#>                    ARM 
#>              "Armenia"

# using dplyr recode and named vector
df %>%
  mutate(across(starts_with("iso"),
                ~recode(.x, !!!lookup)))
#> # A tibble: 10 × 4
#>    code  country              iso_o iso_d               
#>    <chr> <chr>                <chr> <chr>               
#>  1 ABW   Aruba                Aruba Aruba               
#>  2 AFG   Afghanistan          Aruba Afghanistan         
#>  3 AGO   Angola               Aruba Angola              
#>  4 AIA   Anguilla             Aruba Anguilla            
#>  5 ALB   Albania              Aruba Albania             
#>  6 AND   Andorra              Aruba Andorra             
#>  7 ANT   Netherland Antilles  Aruba Netherland Antilles 
#>  8 ARE   United Arab Emirates Aruba United Arab Emirates
#>  9 ARG   Argentina            Aruba Argentina           
#> 10 ARM   Armenia              Aruba Armenia

# using dplyr coalesce and named vector
df %>% 
  mutate(across(starts_with("iso"),
                ~coalesce(lookup[ .x ], .x)))
#> # A tibble: 10 × 4
#>    code  country              iso_o iso_d               
#>    <chr> <chr>                <chr> <chr>               
#>  1 ABW   Aruba                Aruba Aruba               
#>  2 AFG   Afghanistan          Aruba Afghanistan         
#>  3 AGO   Angola               Aruba Angola              
#>  4 AIA   Anguilla             Aruba Anguilla            
#>  5 ALB   Albania              Aruba Albania             
#>  6 AND   Andorra              Aruba Andorra             
#>  7 ANT   Netherland Antilles  Aruba Netherland Antilles 
#>  8 ARE   United Arab Emirates Aruba United Arab Emirates
#>  9 ARG   Argentina            Aruba Argentina           
#> 10 ARM   Armenia              Aruba Armenia

# using purrr modify_if and named vector
library(purrr)
df %>% 
  mutate(across(starts_with("iso"),
                ~modify_if(.x, .x %in% df$code,
                           ~ lookup[.x])))
#> # A tibble: 10 × 4
#>    code  country              iso_o iso_d               
#>    <chr> <chr>                <chr> <chr>               
#>  1 ABW   Aruba                Aruba Aruba               
#>  2 AFG   Afghanistan          Aruba Afghanistan         
#>  3 AGO   Angola               Aruba Angola              
#>  4 AIA   Anguilla             Aruba Anguilla            
#>  5 ALB   Albania              Aruba Albania             
#>  6 AND   Andorra              Aruba Andorra             
#>  7 ANT   Netherland Antilles  Aruba Netherland Antilles 
#>  8 ARE   United Arab Emirates Aruba United Arab Emirates
#>  9 ARG   Argentina            Aruba Argentina           
#> 10 ARM   Armenia              Aruba Armenia

# using stringr str_replace_all and named vector
library(stringr)
df %>% 
  mutate(iso_d = str_replace_all(iso_d, lookup),
         iso_o = str_replace_all(iso_o, lookup))
#> # A tibble: 10 × 4
#>    code  country              iso_o iso_d               
#>    <chr> <chr>                <chr> <chr>               
#>  1 ABW   Aruba                Aruba Aruba               
#>  2 AFG   Afghanistan          Aruba Afghanistan         
#>  3 AGO   Angola               Aruba Angola              
#>  4 AIA   Anguilla             Aruba Anguilla            
#>  5 ALB   Albania              Aruba Albania             
#>  6 AND   Andorra              Aruba Andorra             
#>  7 ANT   Netherland Antilles  Aruba Netherland Antilles 
#>  8 ARE   United Arab Emirates Aruba United Arab Emirates
#>  9 ARG   Argentina            Aruba Argentina           
#> 10 ARM   Armenia              Aruba Armenia

创建于2023-03-23带有reprex v2.0.2
一些基础R解决方案:

df <- structure(list(code = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND",  "ANT", "ARE", "ARG", "ARM"), country = c("Aruba", "Afghanistan",  "Angola", "Anguilla", "Albania", "Andorra", "Netherland Antilles",  "United Arab Emirates", "Argentina", "Armenia"), iso_o = c("ABW",  "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW" ), iso_d = c("ABW", "AFG", "AGO", "AIA", "ALB", "AND", "ANT",  "ARE", "ARG", "ARM")), row.names = c(NA, -10L), class = c("tbl_df",  "tbl", "data.frame"))

# create a "lookup_df" dataframe
lookup_df <- df[,1:2]

# base match using lookup dataframe
df2 <- df
tochange <- match(df2$iso_o, lookup_df$code, nomatch = 0)
df2$iso_o[tochange > 0] <- lookup_df$country[tochange]
tochange2 <- match(df2$iso_d, lookup_df$code, nomatch = 0)
df2$iso_d[tochange2 > 0] <- lookup_df$country[tochange2]
df2
#>    code              country iso_o                iso_d
#> 1   ABW                Aruba Aruba                Aruba
#> 2   AFG          Afghanistan Aruba          Afghanistan
#> 3   AGO               Angola Aruba               Angola
#> 4   AIA             Anguilla Aruba             Anguilla
#> 5   ALB              Albania Aruba              Albania
#> 6   AND              Andorra Aruba              Andorra
#> 7   ANT  Netherland Antilles Aruba  Netherland Antilles
#> 8   ARE United Arab Emirates Aruba United Arab Emirates
#> 9   ARG            Argentina Aruba            Argentina
#> 10  ARM              Armenia Aruba              Armenia

# base for loop using lookup dataframe
df2 <- df
for (i in seq_len(nrow(lookup_df))) {
  df2$iso_d[df$iso_d == lookup_df$code[i]] = lookup_df$country[i]
  df2$iso_o[df$iso_o == lookup_df$code[i]] = lookup_df$country[i]
}
df2
#>    code              country iso_o                iso_d
#> 1   ABW                Aruba Aruba                Aruba
#> 2   AFG          Afghanistan Aruba          Afghanistan
#> 3   AGO               Angola Aruba               Angola
#> 4   AIA             Anguilla Aruba             Anguilla
#> 5   ALB              Albania Aruba              Albania
#> 6   AND              Andorra Aruba              Andorra
#> 7   ANT  Netherland Antilles Aruba  Netherland Antilles
#> 8   ARE United Arab Emirates Aruba United Arab Emirates
#> 9   ARG            Argentina Aruba            Argentina
#> 10  ARM              Armenia Aruba              Armenia

创建于2023-03-23带有reprex v2.0.2

相关问题