使用ForestPlot包对估计值进行分组时,使用侧表绘制森林图

iszxjhcz  于 2023-06-03  发布在  其他
关注(0)|答案(1)|浏览(347)

我使用forestplot软件包来绘制我在相同变量上计算的多个模型的估计值,这些变量由一个分组变量分开:

# package calls
library(forestplot)
library(dplyr)
library(broom)

model_list <- 
  lapply(split(mtcars, mtcars$cyl),
         \(x) lm(mpg ~ hp + drat, data = x) %>%
           broom::tidy()) %>%
  bind_rows(.id = "cyl")
> model_list
# A tibble: 9 × 6
  cyl   term        estimate std.error statistic p.value
  <chr> <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 4     (Intercept) 22.6       20.2        1.12   0.295 
2 4     hp          -0.0896     0.0714    -1.25   0.245 
3 4     drat         2.82       4.09       0.689  0.510 
4 6     (Intercept) 19.3        5.96       3.24   0.0318
5 6     hp          -0.00956    0.0301    -0.317  0.767 
6 6     drat         0.456      1.53       0.297  0.781 
7 8     (Intercept) 11.6        6.29       1.84   0.0930
8 8     hp          -0.0286     0.0184    -1.56   0.148 
9 8     drat         2.95       2.52       1.17   0.267
> dput(model_list)
structure(list(cyl = c("4", "4", "4", "6", "6", "6", "8", "8", 
"8"), term = c("(Intercept)", "hp", "drat", "(Intercept)", "hp", 
"drat", "(Intercept)", "hp", "drat"), estimate = c(22.5842331369155, 
-0.0896163727194673, 2.82123086860038, 19.276290671286, -0.00955656724590332, 
0.45603134914307, 11.5738076494898, -0.0286147185507174, 2.94579083260769
), std.error = c(20.1598442589198, 0.0714382978379768, 4.09205373078535, 
5.95554159948471, 0.030110518391565, 1.53447737693666, 6.2934127394834, 
0.018398031862226, 2.5187181104732), statistic = c(1.120258313847, 
-1.25445839880898, 0.689441306055121, 3.23669818257232, -0.317383019502595, 
0.297190011398842, 1.83903521484908, -1.55531410995476, 1.16955955506043
), p.value = c(0.29510529382976, 0.245084968663166, 0.510044820506449, 
0.0317731650193357, 0.766829525981112, 0.781115912768507, 0.093035417836973, 
0.148154964992997, 0.266896538389878)), row.names = c(NA, -9L
), class = c("tbl_df", "tbl", "data.frame"))

我想用一个森林图来可视化它,左手边有一个表格,其中包含各种模型的信息(例如,估计的p值)。
仅绘制估计值的工作原理与我预期的一样:

model_list %>%
  mutate(
    mean = estimate,
    lower= estimate-(1.96*std.error),
    upper= estimate+(1.96*std.error)) %>%
  select(cyl, term, mean, std.error, lower, upper, p.value) %>%
  group_by(cyl) %>%
  forestplot(
    labeltext = term,
    boxsize = .1,
    grid = TRUE,
    ci.vertices = TRUE,
    ci.vertices.height = 0.05
  )

...但是当我尝试添加更多列时,我得到一个错误:

model_list %>%
  mutate(
    mean = estimate,
    lower= estimate-(1.96*std.error),
    upper= estimate+(1.96*std.error)) %>%
  select(cyl, term, mean, std.error, lower, upper, p.value) %>%
  group_by(cyl) %>%
  forestplot(
    labeltext = c(term, p.value),
    boxsize = .1,
    grid = TRUE,
    ci.vertices = TRUE,
    ci.vertices.height = 0.05
  )
Error in `dplyr::mutate()`:
! Problem while computing `data = lapply(...)`.
ℹ The error occurred in group 1: .fp_groups = "4".
Caused by error in `if (df$.fp_labels[i] != all_labels[i]) ...`:
! missing value where TRUE/FALSE needed
Run `rlang::last_error()` to see where the error occurred.

我假设它正在p.value列上查找相同的值,但它找不到它们。我想在图中添加一个或多个额外的列,包含与该特定估计相关的所有值。有什么办法可以让我这么做吗?
谢谢你。

km0tfn4u

km0tfn4u1#

我在forestplot软件包中遇到了同样的问题,特别是分组forestplot --这个问题是唯一的“标签”,在这种情况下,你需要为每一行都有完全相同的标签才能有一个分组图。
在您的示例中,您有9个唯一值标签,而您只需要在尝试绘制的三个组之间共享三个标签。

library(tidyverse)
library(forestplot)
#> Loading required package: grid
#> Loading required package: checkmate
#> Loading required package: abind
# Checking erros in 'forestplot' package
# 1. Loading data-Frame
x <- structure(list(
      cyl = c("4", "4", "4", "6", "6", "6", "8", "8", "8"), 
      term = c("(Intercept)", "hp", "drat", "(Intercept)", "hp",  "drat", "(Intercept)", "hp", "drat"), 
      estimate = c(22.5842331369155, -0.0896163727194673, 2.82123086860038, 19.276290671286, -0.00955656724590332, 0.45603134914307, 11.5738076494898, -0.0286147185507174, 2.94579083260769), 
      std.error = c(20.1598442589198, 0.0714382978379768, 4.09205373078535, 5.95554159948471, 0.030110518391565, 1.53447737693666, 6.2934127394834, 0.018398031862226, 2.5187181104732), 
      statistic = c(1.120258313847, -1.25445839880898, 0.689441306055121, 3.23669818257232, -0.317383019502595, 0.297190011398842, 1.83903521484908, -1.55531410995476, 1.16955955506043), 
      p.value = c(0.29510529382976, 0.245084968663166, 0.510044820506449, 0.0317731650193357, 0.766829525981112, 0.781115912768507, 0.093035417836973, 0.148154964992997, 0.266896538389878)), 
      row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame")) %>%
      mutate(
            mean = estimate,
            lower= estimate-(1.96*std.error),
            upper= estimate+(1.96*std.error)) %>%
      select(cyl, term, mean, std.error, lower, upper, p.value) %>%
      group_by(cyl)

# 2. Setting unique terms do display on forest plot and getting mean, lower and upper
lblid <- substitute(c(term, p.value))
mean <- rlang::as_name("mean")
lower <- rlang::as_name("lower")
upper <- rlang::as_name("upper")

# 3. Getting group variable Names
groups <- attr(x, "groups") |>
      dplyr::select(-.rows & where(\(col) length(unique(col)) > 1)) |>
      colnames()

# 4. Creating a fixed Column
core_data <- x |>
      dplyr::ungroup() |>
      dplyr::select({{ lblid }},
                    mean = {{ mean }},
                    lower = {{ lower }},
                    upper = {{ upper }}) |>
      dplyr::bind_cols(x |>
                             tidyr::unite(".fp_groups", dplyr::all_of(groups), sep = " > ", remove = FALSE) |>
                             tidyr::unite(".fp_labels", {{lblid}}, sep = " > ") |>
                             dplyr::select(dplyr::starts_with(".fp"), dplyr::all_of(groups))) |>
      dplyr::group_by(.fp_groups)

# 5. Creating all possible Labels to be Created
all_labels <- core_data |>
      tidyr::nest() |>
      dplyr::pull(data) |>
      lapply(\(x) x$.fp_labels) |>
      unlist() |>
      unique()

all_labels
#> [1] "(Intercept) > 0.29510529382976"   "hp > 0.245084968663166"          
#> [3] "drat > 0.510044820506449"         "(Intercept) > 0.0317731650193357"
#> [5] "hp > 0.766829525981112"           "drat > 0.781115912768507"        
#> [7] "(Intercept) > 0.093035417836973"  "hp > 0.148154964992997"          
#> [9] "drat > 0.266896538389878"

# 6. Cheking problems with Levels
bad_rows <- core_data |>
      dplyr::mutate(level = sapply(.fp_labels, \(lbl) which(all_labels == lbl)[[1]])) |>
      dplyr::filter(level > dplyr::lead(level))
if (nrow(bad_rows) > 0) {
      stop("There are seem be invalid the labels: ", bad_rows$.fp_labels |> paste(collapse = ", "),
           "\n appear in the wrong position.")
}

# 7. Checking problems with Duplicates
bad_rows <- core_data |>
      dplyr::group_by(.fp_groups, .fp_labels) |>
      dplyr::summarise(n = dplyr::n(), .groups = "drop") |>
      dplyr::filter(n > 1)

if (nrow(bad_rows) > 0) {
      stop("There are seem be non-unique labels: ", bad_rows$.fp_labels |> paste(collapse = ", "))
}

# 8. Nesting data-frame and checking unique values per group (they must share same values)
temp_df <- core_data |>
      tidyr::nest()

# -- Showing Lables per Group
temp_df$data[[1]][['.fp_labels']]
#> [1] "(Intercept) > 0.29510529382976" "hp > 0.245084968663166"        
#> [3] "drat > 0.510044820506449"
temp_df$data[[2]][['.fp_labels']]
#> [1] "(Intercept) > 0.0317731650193357" "hp > 0.766829525981112"          
#> [3] "drat > 0.781115912768507"
temp_df$data[[3]][['.fp_labels']]
#> [1] "(Intercept) > 0.093035417836973" "hp > 0.148154964992997"         
#> [3] "drat > 0.266896538389878"

for(df in temp_df$data){
      for (i in 1:length(all_labels)){
            if (df$.fp_labels[i] != all_labels[i]) {
                  new_row <- core_data |>
                        dplyr::ungroup() |>
                        dplyr::select({{lblid}}, .fp_labels) |>
                        dplyr::filter(.fp_labels == all_labels[i]) |>
                        dplyr::distinct(.fp_labels, .keep_all = TRUE)
                  
                  df <- tibble::add_row(df,
                                        new_row,
                                        .before = i)
            }
            
            
      }
}
#> Error in if (df$.fp_labels[i] != all_labels[i]) {: missing value where TRUE/FALSE needed

相关问题