R:尝试在filter.data.frame中使用长度为零的变量名

jtw3ybtb  于 2023-02-26  发布在  其他
关注(0)|答案(2)|浏览(261)

bounty将在4天后过期。此问题的答案可获得+50声望奖励。Ben Bolker希望奖励现有答案:我今天遇到了这个问题,我自己可能会花更多的时间来找出答案,如果能找到一个解释为什么会出现这个特别模糊的错误消息的答案,那将是很有趣的(例如,在过程的什么时候,名为NA的列会引发“尝试使用零长度变量名”)?

我不是一个RMaven,在所有和得到这个错误:
初始化出错(...):尝试使用长度为零的变量名调用:%〉%...筛选.数据.帧-〉筛选_行-〉-〉初始化-〉.调用
对于日志,我将范围缩小到这一行代码:

qb_stats_all <- subset(qb_stats_all, select = -c(num)) %>% filter(player != 'Player')

下面是之前的完整代码:

library(dplyr)
library(htmltab)
    
# Set earliest quarterback draft year
beginning_qb_year <- 1985

# Set most recent quarterback draft year
ending_qb_year <- 2020

# Scrape NFL quarterback career data
url_start <- paste0('https://www.pro-football-reference.com/play-index/psl_finder.cgi?request=1&match=combined&year_min=', beginning_qb_year, '&year_max=2020&season_start=1&season_end=-1&pos%5B%5D=qb&draft_year_min=1936&draft_year_max=2020&draft_slot_min=1&draft_slot_max=500&draft_pick_in_round=pick_overall&conference=any&draft_pos%5B%5D=qb&draft_pos%5B%5D=rb&draft_pos%5B%5D=wr&draft_pos%5B%5D=te&draft_pos%5B%5D=e&draft_pos%5B%5D=t&draft_pos%5B%5D=g&draft_pos%5B%5D=c&draft_pos%5B%5D=ol&draft_pos%5B%5D=dt&draft_pos%5B%5D=de&draft_pos%5B%5D=dl&draft_pos%5B%5D=ilb&draft_pos%5B%5D=olb&draft_pos%5B%5D=lb&draft_pos%5B%5D=cb&draft_pos%5B%5D=s&draft_pos%5B%5D=db&draft_pos%5B%5D=k&draft_pos%5B%5D=p&c5val=1.0&order_by=pass_att&offset=')

offset_seq <- seq(0, 500, 100)
qb_stats_all <- data.frame()

for (url_end in offset_seq) {
  
  print(paste0('Scraping from QB #', url_end, '!'))
  url <- paste0(url_start, url_end)
  data <- htmltab(url, which = 1, rm_nodata_cols = F)
  qb_stats_all <- rbind(qb_stats_all, data)
  
}

# Clean NFL quarterback career data
colnames(qb_stats_all) <- c('num', 'player', 'start_year', 'end_year', 'draft', 'team', 'league', 'g', 'gs', 'cmp', 'att', 'cmp_pct', 'yds', 'td', 'int', 'td_pct', 'int_pct', 'passing_rate', 'sacked', 'sack_yds', 'yards_per_attempt', 'adj_yards_per_attempt', 'adj_net_yards_per_attempt', 'yds_gained_per_game', 'w', 'l', 't')
qb_stats_all <- subset(qb_stats_all, select = -c(num)) %>% filter(player != 'Player')
2wnc66cl

2wnc66cl1#

错误是由于列名具有NA值。您可以删除它们。

library(dplyr)
qb_stats_all <- qb_stats_all %>%  
                  select(-num, -(28:32)) %>% 
                  filter(player != 'Player')

或者重命名它们以给予它们任何适当列名。

names(qb_stats_all)[28:32] <- paste0('col', 1:5)
qb_stats_all <- qb_stats_all %>%  filter(player != 'Player')
c6ubokkw

c6ubokkw2#

@BenBolker:看起来像个dplyrbug。I've reported it.

> library(dplyr)
> dd <- data.frame(1:10, rnorm(10), letters[1:10])
> names(dd) <- c("a", "b", NA)
> options(error = recover)
> filter(dd, b > 0)
Error in initialize(...) : attempt to use zero-length variable name

Enter a frame number, or 0 to exit   

1: filter(dd, b > 0)
2: filter.data.frame(dd, b > 0)
3: filter_rows(.data, dots, by)
4: DataMask$new(data, by, "filter", error_call = error_call)
5: initialize(...)

Selection: 4
Called from: initialize(...)
Browse[1]> debugonce(initialize)
Browse[1]> initialize(...)
debugging in: initialize(...)
debug: {
    rows <- by$data$.rows
    if (length(rows) == 0) {
        rows <- new_list_of(list(integer()), ptype = integer())
    }
    private$rows <- rows
    frame <- caller_env(n = 2)
    local_mask(self, frame)
    names_bindings <- chr_unserialise_unicode(names2(data))
    if (anyDuplicated(names_bindings)) {
        abort("Can't transform a data frame with duplicate names.", 
            call = error_call)
    }
    names(data) <- names_bindings
    private$size <- nrow(data)
    private$current_data <- dplyr_new_list(data)
    private$grouped <- by$type == "grouped"
    private$rowwise <- by$type == "rowwise"
    private$chops <- .Call(dplyr_lazy_vec_chop_impl, data, rows, 
        private$grouped, private$rowwise)
    private$mask <- .Call(dplyr_data_masks_setup, private$chops, 
        data, rows)
    private$keys <- group_keys0(by$data)
    private$by_names <- by$names
    private$verb <- verb
}
Browse[4]>

由于rlang::names2的行为(已记录!),names(data)中的NA在此处替换为""

names_bindings <- chr_unserialise_unicode(names2(data))
    if (anyDuplicated(names_bindings)) {
        abort("Can't transform a data frame with duplicate names.", 
            call = error_call)
    }
    names(data) <- names_bindings

关于零长度变量名的错误最后在这里抛出:

private$chops <- .Call(dplyr_lazy_vec_chop_impl, data, rows, 
        private$grouped, private$rowwise)

相关问题