在R中忽略循环索引?

mm9b1k5b  于 2022-12-06  发布在  其他
关注(0)|答案(2)|浏览(128)

我在R中有这个数据集:

set.seed(123)

myFun <- function(n = 5000) {
  a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
  paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
}

col1 = myFun(100)
col2 = myFun(100)
col3 = myFun(100)
col4 = myFun(100)
group <- c("A","B","C","D")
group = sample(group, 100, replace=TRUE)

example = data.frame(col1, col2, col3, col4, group)

       col1       col2       col3       col4 group
1 SKZDZ9876D BTAMF8110T LIBFV6882H ZFIPL4295E     A
2 NXJRX7189Y AIZGY5809C HSMIH4556D YJGJP8022H     C
3 XPTZB2035P EEKXK0873A PCPNW1021S NMROS4134O     A
4 LJMCM3436S KGADK2847O SRMUI5723N RDIXI7301N     B
5 ADITC6567L HUOCT5660P AQCNE3753K FUMGY1428B     D
6 BAEDP8491P IAGQG4816B TXXQH6337M SDACH5752D     C

我现在尝试运行以下双循环:

library(stringdist)
method = c("osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw","soundex")

results = list()

l = length(unique(example$group))

 for (j in 1:l) {
for (i in 1:length(method)) {
   
        
        g = unique(example$group)

        groups_j = g[j]

        my_data_i = example[which(example$group == groups_j  ), ]
        
        
        method_i = method[i]
        name_1_i = paste0("col1_col_2", method_i)
        name_2_i = paste0("col3_col_4", method_i)
        
        p1_i = stringdistmatrix(my_data_i$col1, my_data_i$col2, method =  method_i, useNames = "string") %>%
            as_tibble(rownames = "a") %>%
            pivot_longer(-1, names_to = "b", values_to = name_1_i)
        
        p2_i = stringdistmatrix(my_data_i$col3, my_data_i$col4, method =  method_i, useNames = "string") %>%
            as_tibble(rownames = "a") %>%
            pivot_longer(-1, names_to = "b", values_to = name_2_i)
        
        p1_i = p1_i[,3]
        p2_i = p2_i[,3]
        
        final_i = cbind(p1_i, p2_i, groups_j)
        results[[i]] = final_i
        
    }
    
}

final = do.call(cbind.data.frame, results)

循环似乎运行-,但当我检查最终结果时,我注意到“j”循环中的其他索引似乎已被忽略:

> table(final$groups_j)

  A 
441

正如我们所看到的原始数据,似乎有4组:

> table(example$group)

 A  B  C  D 
21 28 19 32

有人能帮我弄清楚为什么其他3组没有被我的循环处理吗?

谢谢你,谢谢你

3zwjbxry

3zwjbxry1#

这不应该是一个正确的答案。我只是在玩你的代码。尽管如此,它可能会帮助你调试它。

library(stringdist)
library(tidyverse)

results = list()
res_j <- list()

l = length(unique(example$group))
g = unique(example$group)

for (j in 1:l) {
  
  groups_j = g[j]
  
  for (i in 1:length(method)) {
  
    my_data_i = example[which(example$group == groups_j  ), ]
    
    method_i = method[i]
    name_1_i = paste0("col1_col_2", method_i)
    name_2_i = paste0("col3_col_4", method_i)
    
    p1_i = stringdistmatrix(my_data_i$col1, my_data_i$col2, method =  method_i, useNames = "string") %>%
      as_tibble(rownames = "a") %>%
      pivot_longer(-1, names_to = "b", values_to = name_1_i)
    
    p2_i = stringdistmatrix(my_data_i$col3, my_data_i$col4, method =  method_i, useNames = "string") %>%
      as_tibble(rownames = "a") %>%
      pivot_longer(-1, names_to = "b", values_to = name_2_i)
    
    p1_i = p1_i[,3]
    p2_i = p2_i[,3]
    
    final_i = cbind(p1_i, p2_i)
    results[[i]] = final_i
    
  }
  res_j[[j]] <- flatten(results)
  res_j[[j]]$group <- groups_j
}

test <- map_dfr(res_j, as.tibble) 

# here’s a summary table of the result set.
library(gtExtras)
gt_plt_summary(test)
g6ll5ycj

g6ll5ycj2#

这里有一个办法。
split按组循环,lapply按内部for循环到子数据集,而不是unique(example$group)并使用这些值循环通过数据集。
第一个
创建于2022年11月27日,使用reprex v2.0.2

相关问题