R语言 在Tidymodels工作流集中实现tune_bayes

4nkexdtk  于 2022-12-06  发布在  其他
关注(0)|答案(1)|浏览(116)

我试图使用tidymodels构建一个workflow_set来比较使用贝叶斯方法调优的模型。我不确定如何将需要调优的参数Map到workflow_set中。

"Error in dials::grid_latin_hypercube(param, size = n) : \n  These arguments contains unknowns: `mtry`. See the `finalize()` function.\n"

如果不使用workflow_set,则需要提供初始值

xgb_wflow <-
   tree_frogs_wflow %>%
   add_model(xgb_spec)
 
   xgb_params <- extract_parameter_set_dials(xgb_spec)%>%
   finalize(tree_frogs_train)
   xgb_res <- 
   tune_bayes(
              object = xgb_wflow,
              resamples = folds,
              param_info=xgb_params,
              metrics=metric_set(pr_auc,roc_auc,accuracy),
              initial = 10,
              control =  ctrl_bayes)

不知道如何使用workflow_sets做到这一点。我的尝试是低于任何帮助是感激的。

library(tidymodels)
library(tidyverse)
library(stacks) 

data("tree_frogs")

tree_frogs <- tree_frogs %>%
  select(-c(clutch, latency)) 
set.seed(1)

tree_frogs_split <- initial_split(tree_frogs)
tree_frogs_train <- training(tree_frogs_split)
tree_frogs_test  <- testing(tree_frogs_split)

tree_frogs_rec <- 
  recipe(reflex ~ ., data = tree_frogs_train) %>%
  step_dummy(all_nominal(), -reflex) %>%
  step_zv(all_predictors())

set.seed(1)
folds <- rsample::vfold_cv(tree_frogs_train, v = 5)

## multinomial 

mlt_spec <- 
  multinom_reg(penalty = tune(), mixture = 1) %>% 
  set_engine("glmnet") %>% 
  set_mode("classification")

## Random Forest 

rand_forest_spec <- 
  rand_forest(
    mtry = tune(),
    min_n = tune(),
    trees = 500
  ) %>%
  set_mode("classification") %>%
  set_engine("ranger")

## XGBoost 

xgb_spec <- 
  boost_tree(
    trees = 1000,
    min_n = tune(),
    learn_rate = tune(),
    loss_reduction = tune(),
    sample_size = tune(),
    mtry = tune(),
    tree_depth = tune()
  ) %>%
  set_engine("xgboost") %>%
  set_mode("classification")

all_workflows <- 
  workflow_set(
    preproc = list("basic_rec" = tree_frogs_rec),
    models = list(mlt = mlt_spec, rf = rand_forest_spec, xgb = xgb_spec)
  )

ctrl_bayes = control_bayes(save_pred = TRUE,
                           parallel_over = "everything",
                           save_workflow = TRUE)

bayes_results <-
  all_workflows %>%
  workflow_map(
    seed = 1,
    fn = "tune_bayes",
    resamples = folds,
    initial = 10,
    control = ctrl_bayes
  )

bayes_results %>% 
  rank_results() %>% 
  filter(.metric == "pr_auc") %>% 
  select(model, .config, pr_auc = mean, rank)
2j4z5cfb

2j4z5cfb1#

我在文档中使用options_add函数找到了答案

相关问题