R语言 使用未缩放的值手动绘制GLM预测

yhqotfr8  于 2023-05-26  发布在  其他
关注(0)|答案(1)|浏览(182)

我有一个有两个变量的模型,crop和pop,两者都有缩放值。
我正在使用GLMMtmb在其上运行一个模型,但我不确定如何手动绘制此模型的预测,反向转换缩放的变量值。
一个数据块是在一个可重复的例子后结束。
所以对于我的数据框列:动物是动物的存在或不存在,作物和流行的变量,可能会影响存在或不存在。
所以我运行模型

model <- glmmTMB(animal~crop+pop,family="poisson",data=dummy)

我从某人那里收到了一些手动绘制预测的代码,但它不起作用。这是代码,例如,'pop'变量。

range <- range(dummy$pop)

nd <- expand.grid(pop = seq(range[1], range[2], by=0.5))

dummy <- as.data.frame(predict(model, newdata=nd, se.fit=TRUE))
dummy$lower <- dummy$fit - 1.96*dummy$se.fit
dummy$upper <- dummy$fit + 1.96*dummy$se.fit

dummy$fit_trans <- plogis(dummy$fit)
dummy$low_trans <- plogis(dummy$lower)
dummy$up_trans <- plogis(dummy$upper)
dummy <- cbind(dummy, nd)

这应该给予我一个dataframe,我可以提供给ggplot,但它出错了,我创建对象'nd'(newdata)。需要添加一些东西,但我不确定是什么。此外,我没有看到在这段代码中,我的变量数据在创建新的数据框之前是未缩放的?

structure(list(animal = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0), crop = structure(c(-0.911367192804466, -0.656468577561402, 
0.937306298620515, -0.748068186743915, -0.950289715380232, -0.950289715380232, 
-0.311140903098454, -0.950289715380232, -0.420299555068266, -0.204030814471523, 
0.50447619733435, -0.950289715380232, -0.777918539996587, -0.950289715380232, 
-0.950289715380232, -0.840253112662893, -0.839667809844811, -0.949704414301948, 
-0.950289715380232, -0.332797072571516, -0.950289715380232, 0.28586616195498, 
-0.950289715380232, -0.949704414301948, -0.950289715380232, -0.217785406339271, 
-0.950289715380232, -0.949704414301948, -0.950289715380232, -0.950289715380232, 
-0.950289715380232, -0.917220203587303, -0.950289715380232, -0.950289715380232, 
-0.950289715380232, -0.950289715380232, -0.683099761835028, -0.950289715380232, 
-0.950289715380232, -0.950289715380232, -0.950289715380232, -0.950289715380232, 
-0.950289715380232, -0.420006934975588, -0.950289715380232, -0.86512840935985, 
-0.950289715380232, -0.880053585986186, -0.950289715380232, -0.947070559449672, 
0.886092437742609, 1.83281709105803, 1.11640850164685, -0.950289715380232, 
-0.946485258371388, 1.37335566631441, -0.126771093015647, -0.949704414301948, 
-0.896734664107575, -0.123551937954986, -0.826498527754337, -0.950289715380232, 
-0.307921748037792, 1.68180931195255, 0.606903807743094, -0.190568898369987, 
-0.669052549874602, 1.74970429966619, -0.941217548666834, 1.81701382450034, 
0.217678665495733, -0.235637069219248, 0.166464804617827, -0.949411763762807, 
-0.945022005675679, 0.0827666616935611, 0.362833263318178, 1.5957700969398, 
0.778982349115678, 1.85652176820044, 1.54836074265497, -0.950289715380232, 
0.153588184375181, 1.1480148120681, -0.950289715380232, -0.860153345844944, 
-0.551699672370031, -0.916927552178262, -0.791673117945951, -0.949704414301948, 
-0.580672067915984, 1.70053900213117, 0.212703567184868, 0.465553605166667, 
-0.490535698380696, 0.512963070798555, -0.40449645553117, -0.949704414301948, 
0.437459181245815, 1.78921202067216, -0.939168994892841), dim = c(101L, 
1L)), pop = structure(c(-0.197547924591922, -0.194351971156629, 
-0.188274258566538, -0.196448483438051, -0.197547924591922, -0.197534996338939, 
-0.197544742856045, -0.19666972080707, -0.184138726259079, -0.187096628792688, 
-0.181474291574343, -0.197126836606957, -0.191909922110918, -0.197541385064299, 
-0.197547924591922, -0.180014490953617, -0.196345541198624, -0.194080008065368, 
-0.197547924591922, -0.169718619816262, -0.1973883809182, -0.19069419228899, 
-0.19750465288443, -0.197547924591922, -0.197547924591922, -0.196222722433591, 
-0.197547924591922, -0.197007381120755, -0.197547924591922, -0.197547924591922, 
-0.197547924591922, -0.196084866482173, -0.196199779840005, -0.197547924591922, 
-0.197547924591922, -0.196775288017525, -0.195128528488411, -0.197547924591922, 
-0.197547924591922, -0.196661275901739, -0.197547924591922, -0.197547924591922, 
-0.197547924591922, -0.195309309220705, -0.197547924591922, -0.196083127378007, 
-0.197547924591922, -0.197047712962236, -0.197547924591922, -0.19661973821873, 
0.624878194166003, -0.194073305851747, -0.165976797458507, -0.197547924591922, 
-0.189654680869701, -0.138488532034927, -0.182502054986232, -0.151862484978888, 
-0.146757177620867, -0.180411925114601, -0.196752195942981, -0.197416680215279, 
-0.183794718471439, -0.173372247384172, -0.196556998242149, -0.174186298469269, 
-0.178206794332005, -0.197547924591922, -0.196558496041342, -0.182985603845477, 
0.0359655292697121, 0.14216237743388, -0.170265068895674, -0.197547924591922, 
-0.19328285943275, -0.148427554030811, -0.115746893472626, -0.189714944074946, 
-0.185416997205207, -0.1959619567473, -0.0975791931695816, -0.197326675563388, 
-0.17579755900438, -0.0809452064393561, -0.197246026550915, 0.888153565621317, 
-0.189168753981969, -0.193326520514112, -0.191939980937669, -0.197547923770278, 
-0.193648598847386, -0.140792688570448, -0.162820943186907, -0.0971587686665226, 
-0.184731765563276, -0.166566876527242, -0.0244512966150544, 
-0.19624995376397, -0.147791915216302, -0.167901037976233, 16.3965287744725
), dim = c(101L, 1L))), class = "data.frame", row.names = c(2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 
43L, 45L, 46L, 47L, 49L, 50L, 51L, 52L, 53L, 3786L, 3787L, 3788L, 
3789L, 3790L, 3791L, 3792L, 3793L, 3794L, 3795L, 3796L, 3797L, 
3798L, 3799L, 3800L, 3801L, 3802L, 3803L, 3804L, 3805L, 3806L, 
3807L, 3808L, 3809L, 3810L, 3811L, 3812L, 3813L, 3814L, 3815L, 
3816L, 3817L, 3818L, 3819L, 3820L, 3821L, 3822L, 3823L, 3824L, 
3825L, 3826L, 3827L, 3828L, 3829L, 3830L, 3831L, 3832L, 3833L, 
3834L, 3835L, 3836L))
pkwftd7m

pkwftd7m1#

这里有几个问题。
1.如果你只测量动物的存在/不存在,那么你需要逻辑回归而不是泊松回归(即你应该在你的模型中有family = "binomial")。
1.目前还不清楚为什么要使用glmmTMB,因为你有一个固定效应模型,这意味着base R的glm也可以。
1.预测数据框的主要问题是,模型需要croppop的值,但仅提供pop的值
1.在虚拟数据框中,crop有一个极端离群值,这意味着整个范围内的等间距值不能代表数据。
1.如果你有两个因变量,很难用置信区间的上限和下限来可视化预测。
为了得到croppop的不同值上的概率预测的2D网格,我们可以做

library(glmmTMB)

model <- glmmTMB(animal ~ crop + pop, family = "binomial", data = dummy)

nd <- expand.grid(crop = seq(min(dummy$crop), max(dummy$crop), length = 10),
                  pop  = seq(-0.2, -0.15, length = 10))

preds <- predict(model, newdata = nd, se.fit = TRUE)

nd$pred  <- plogis(preds$fit)
nd$lower <- plogis(preds$fit - 1.96 * preds$se.fit)
nd$upper <- plogis(preds$fit + 1.96 * preds$se.fit)

head(nd)
#>          crop  pop      pred     lower     upper
#> 1 -0.95028972 -0.2 0.8231875 0.6957251 0.9045785
#> 2 -0.63842177 -0.2 0.7764348 0.6327054 0.8750293
#> 3 -0.32655383 -0.2 0.7215021 0.5445071 0.8488165
#> 4 -0.01468589 -0.2 0.6589997 0.4379221 0.8273954
#> 5  0.29718206 -0.2 0.5904329 0.3280019 0.8098054
#> 6  0.60905000 -0.2 0.5181596 0.2299935 0.7947321

在绘图时,我们可以将预测网格显示为geom_tile,如下所示:

library(ggplot2)

ggplot(nd, aes(crop, pop, fill = pred)) +
  geom_tile() +
  scale_fill_viridis_c("Probability of\nAnimal presence") +
  coord_fixed(60) +
  theme_minimal(base_size = 16)

如果你真的想看到置信带,你需要使用面:

ggplot(within(nd, pop <- factor(paste("pop =", round(pop, 3)))),
              aes(crop, pred)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  geom_point() +
  facet_wrap(. ~ pop) +
  theme_bw()

相关问题