在RStudio中强制不发生R会话中止

btqmn9zl  于 2023-11-14  发布在  其他
关注(0)|答案(1)|浏览(96)

我想为几个GLMM模型创建一个循环,但我知道有些因素是不可能拟合的。我创建了一些参数来避免错误,如skip_to_next <- FALSEtryCatch和最小点限制(if(length(unique(NEW_DS_F_pred_sub$DATE))>=4))。尽管有所有这些步骤,我总是有R Session Aborted,我找不到任何方法来忽略不太好的拟合因素。
在我的例子中:

library("glmmTMB")
library("dplyr")
library("ggeffects")
library("ggplot2")

NEW_DS_F_pred <- NULL
STAND <- c(rep("A",5),rep("B",3),rep("C",6),rep("D",4))
stands <- unique(STAND)
DATE <- c("2022-01-01","2022-02-12","2022-03-01","2022-04-05","2022-06-01",
"2022-01-01","2022-02-12","2022-03-01",
"2022-01-01","2022-02-12","2022-03-01","2022-04-05","2022-06-01","2022-06-20",
"2022-01-01","2022-02-12","2022-03-01","2022-04-05")
B2_MAX <- runif(n=length(DATE))
B3_MAX <- runif(n=length(DATE))
B4_MAX <- runif(n=length(DATE))
NEW_DS_F_pred <- cbind(STAND,DATE,B2_MAX,B3_MAX,B4_MAX) %>% as.data.frame()

for (i in 1:length(stands)){
        skip_to_next <- FALSE
        tryCatch(print(stands[i]), error = function(e) { skip_to_next <<- TRUE})

NEW_DS_F_pred_sub <- NEW_DS_F_pred%>%filter(STAND==stands[i])

if(length(unique(NEW_DS_F_pred_sub$DATE))>=4){

NEW_DS_F_pred_sub$DATE_TIME <- as.numeric(difftime(NEW_DS_F_pred_sub$DATE, as.Date("2022-06-30"), units = "days"))
NEW_DS_F_pred_sub$DATE_TIME <- as.numeric(NEW_DS_F_pred_sub$DATE_TIME)
NEW_DS_F_pred_sub$B2_MAX <- as.numeric(NEW_DS_F_pred_sub$B2_MAX)
NEW_DS_F_pred_sub$B3_MAX <- as.numeric(NEW_DS_F_pred_sub$B3_MAX)
NEW_DS_F_pred_sub$B4_MAX <- as.numeric(NEW_DS_F_pred_sub$B4_MAX)
NEW_DS_F_pred_sub<-as.data.frame(NEW_DS_F_pred_sub)

# Fit the model B2_MAX
glmm_fit_B2_MAX <- glmmTMB(B2_MAX ~ poly(DATE_TIME,3) + (1|DATE_TIME), data=NEW_DS_F_pred_sub, family=tweedie(link = "log"))
ggeffects::ggpredict(glmm_fit_B2_MAX, terms = "DATE_TIME [all]") %>% plot(add.data = TRUE) + 
  xlab('Time in days') +
  ylab('VI 1')

# Predict the values
glmm_fit_B2_MAX_new <- NULL
glmm_fit_B2_MAX_new$DATE_TIME <- seq(-180,1)
glmm_fit_B2_MAX_new$B2_MAX <- predict(
  glmm_fit_B2_MAX,
  newdata = glmm_fit_B2_MAX_new,
  type = c("response"))
glmm_fit_B2_MAX_new$STAND <- rep(stands[i], length(glmm_fit_B2_MAX_new$DATE_TIME)) 
glmm_fit_B2_MAX_new <- as.data.frame(glmm_fit_B2_MAX_new)
glmm_fit_B2_MAX_new <- glmm_fit_B2_MAX_new%>%dplyr::select(STAND,DATE_TIME,B2_MAX)

# Fit the model B3_MAX
glmm_fit_B3_MAX <- glmmTMB(B3_MAX ~ poly(DATE_TIME,3) + (1|DATE_TIME), data=NEW_DS_F_pred_sub, family=tweedie(link = "log"))

# Predict the values
glmm_fit_B3_MAX_new <- NULL
glmm_fit_B3_MAX_new$DATE_TIME <- seq(-180,1)
glmm_fit_B3_MAX_new$B3_MAX <- predict(
  glmm_fit_B3_MAX,
  newdata = glmm_fit_B3_MAX_new,
  type = c("response"))
glmm_fit_B3_MAX_new$STAND <- rep(stands[i], length(glmm_fit_B3_MAX_new$DATE_TIME)) 
glmm_fit_B3_MAX_new <- as.data.frame(glmm_fit_B3_MAX_new)

# Fit the model B4_MAX
glmm_fit_B4_MAX <- glmmTMB(B4_MAX ~ poly(DATE_TIME,3) + (1|DATE_TIME), data=NEW_DS_F_pred_sub, family=tweedie(link = "log"))

# Predict the values
glmm_fit_B4_MAX_new <- NULL
glmm_fit_B4_MAX_new$DATE_TIME <- seq(-180,1)
glmm_fit_B4_MAX_new$B4_MAX <- predict(
  glmm_fit_B4_MAX,
  newdata = glmm_fit_B4_MAX_new,
  type = c("response"))
glmm_fit_B4_MAX_new$STAND <- rep(stands[i], length(glmm_fit_B4_MAX_new$DATE_TIME)) 
glmm_fit_B4_MAX_new <- as.data.frame(glmm_fit_B4_MAX_new)

if(skip_to_next) { next }     
}
}
#
#

字符串
x1c 0d1x的数据
有没有办法强制循环继续而不中断R会话?
先谢谢你了!

rsaldnfx

rsaldnfx1#

如果您可以通过以下方式安装TMB的最新开发版本:

remotes::install_github("kaskr/adcomp/TMB")

字符串
(you需要安装开发工具),这将应用错误修复并阻止R崩溃。
我做了一个函数,里面有你的代码,格式如下:

do <- function(seed=NULL) {
    if (!is.null(seed)) {   
          cat("seed ", seed, "\n")
          set.seed(seed)
    }
    ## ... all of your code
}


然后跑

for (i in 1:200) { 
   do(i+100)
}


i==11,它终止了我的R会话,
std::length_error'; what():cannot create std::vector larger than max_size()进程R中止(core dumped)at Fri Nov 03 20:17:28 2023
现在我可以运行do(111)并让它立即崩溃。
现在,我可以/将逐步执行代码和/或对函数运行debug(),以查看这个随机数种子的数据的哪些方面导致了问题的爆发。
在逐步通过(使用debug())之后,我发现了崩溃发生的位置(在第四步(展台D),在第一个模型B2处)。

saveRDS(NEW_DS_F_pred_sub, file = "SO77422084_bad.rds")


保存“坏” Dataframe (我试过dput(),但似乎不起作用.)
现在这个最小的代码会使R崩溃:

library(glmmTMB)
NEW_DS_F_pred_sub <- readRDS("SO77422084_bad.rds")
## trim data set to make it even more minimal
bad <- NEW_DS_F_pred_sub[c("DATE_TIME", "B2_MAX")]
glmm_fit_B2_MAX <- glmmTMB(B2_MAX ~ poly(DATE_TIME,3) + 
                                   (1|DATE_TIME), data=bad,
                               family=tweedie(link = "log"))


会让R崩溃
一个最小的例子,有一点更多的代码,但从头开始运行(即不需要我们运行一堆代码并将结果保存到外部文件):

set.seed(111)
dd <- data.frame( STAND = rep(LETTERS[1:4], c(5,3,6,4)),
   DATE = c("2022-01-01","2022-02-12","2022-03-01","2022-04-05",
    "2022-06-01","2022-01-01","2022-02-12","2022-03-01",
    "2022-01-01","2022-02-12","2022-03-01","2022-04-05",
    "2022-06-01","2022-06-20","2022-01-01","2022-02-12",
                              "2022-03-01","2022-04-05"))
    dd$B2_MAX <- runif(n=nrow(dd))
    dd_sub <- subset(dd, STAND == "D")
    dd_sub$DATE_TIME <-as.numeric(difftime(dd_sub$DATE, as.Date("2022-06-30"), units = "days"))
glmmTMB(B2_MAX ~ poly(DATE_TIME,3) + 
                (1|DATE_TIME), data=dd_sub,
            family=tweedie(link = "log"))


奇怪的是,在bad上运行dput()并将bad重新定义为该值:

bad <- structure(list(DATE_TIME = c(-179.791666666667, -137.791666666667, 
                 -120.791666666667, -85.8333333333333),
         B2_MAX = c(0.156202515820041, 0.446427763439715, 
    0.171443687053397, 0.9665342932567)),
                     row.names = c(NA, -4L), class = "data.frame")


不会使R崩溃-在以二进制格式存储的数据集和创建ASCII表示时发生的情况之间一定存在一些非常微妙的差异.
然后我运行了debug(glmmTMB),并逐步通过了glmmTMB。崩溃发生在R试图运行优化时,在代码的这一点上。
更进一步,在nlminb中,我发现崩溃发生在这里:

.Call(C_port_nlminb, obj, grad, hess, rho, low, upp, d = rep_len(as.double(scale), 
    length(par)), iv, v)

值得注意的是,如果我使用

control=glmmTMBControl(optimizer=optim, optArgs=list(method="BFGS"))

因此,这一定不是优化器的错误,而是在优化过程中遇到的某些参数集触发了崩溃。
接下来我要做的事情(不是今晚!)是找出一种适当的方法来跟踪优化器正在尝试的参数,这样我们就可以找出导致崩溃的确切参数(并且,希望修复glmmTMB中看起来像bug的东西)
请参阅here了解更多关于此错误的信息。

相关问题