R语言 如何在ggplot生成的图中添加多条模拟图线?

rkue9o1l  于 2023-05-04  发布在  其他
关注(0)|答案(2)|浏览(128)

我如何将多条绘图线(如细蓝线)添加到本文底部代码中显示的现有ggplot代码中,并包含在图例“Simulations”中,用于我的数据框simPaths_df中的模拟运行?模拟在时段500中开始。该simPaths_df Dataframe 具有针对该时间段的单个“x”列,并且多个y轴模拟输出位于 Dataframe 中标记为y1、y2的右侧列中。..等等
抱歉,代码看起来很长。实际的代码很短,但是ggplot部分很长,而且很复杂,因为我对ggplot不是很熟悉。我愿意接受任何简化这个烂摊子的建议!
下面的图片显示了我正在尝试做的事情:

验证码:

library(dplyr)
library(ggplot2)
library(MASS)
library(survival)

lung1 <- lung %>% 
  mutate(time1 = ifelse(time >= 500, 500, time)) %>% 
  mutate(status1 = ifelse(status == 2 & time >= 500, 1, status))

weibCurve <- function(time, survregCoefs) {exp(-(time/exp(survregCoefs[1]))^exp(-survregCoefs[2]))}

fit1 <- survreg(Surv(time1, status1) ~ 1, data = lung1)

lung1.survfit <- survfit(Surv(time1, status1) ~ 1, data = lung1)
lung1.df <- data.frame(time = lung1.survfit$time, 
                       survival = lung1.survfit$surv, 
                       upper_95 = lung1.survfit$upper, 
                       lower_95 = lung1.survfit$lower)

# generate simulation paths
n_simulations <- 10
simPaths <- data.frame(x = seq(from = 500, to = 1000, by = 5))
simPathList <- lapply(1:n_simulations, function(i) {
  newCoef <- MASS::mvrnorm(n = 1, fit1$icoef, vcov(fit1))
  y <- weibCurve(simPaths$x, newCoef)
  simPaths[[paste0("y", i)]] <- y
  simPaths
})

simPaths_df <- Reduce(function(x, y) merge(x, y, by = "x", all = TRUE), simPathList)

lung1.df %>%
  ggplot(aes(x = time, y = survival)) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = "Confidence Interval"), 
              alpha = 0.2) +
  scale_fill_manual(values = c("Confidence Interval" = "grey50"), name = NULL) +
  guides(color = guide_legend(order = 1), fill  = guide_legend(order = 2)) +
  geom_line(aes(y = survival, color = "Historical data"), size = 1) +
  scale_x_continuous(limits = c(0, 1500)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0.05)) +
  labs(x = "Time", y = "Survival probability", color = NULL) +
  theme_classic() +
  stat_function(fun = weibCurve, args = list(survregCoefs = fit1$icoef), 
                aes(color = "Weibull distribution fit"), size = 1, n = 1000) +
  scale_color_manual(values = c("blue", "red", "grey50"), 
                     labels = c("Historical data", "Weibull distribution fit", "Confidence intervals")) +
  labs(color = NULL) +
  theme(legend.position = c(0.95, 0.95), legend.justification = c(1, 1),
        legend.title.align = 0.5, legend.box.spacing = unit(0.3, "lines"), 
        legend.margin = margin(t = 0, r = 0, b = 0, l = 0), legend.title = element_text(size = 12), 
        legend.text = element_text(size = 10))
sgtfey8w

sgtfey8w1#

为了实现这一点,需要做一些改变:

  • 修复一个错字,"Confidence Interval"-〉"...vals"
  • 我推荐使用scale_color_manual中的命名值而不是值和标签;它应该是一样的,我发现自己在做循环在过去的决议是这样做。
  • simPaths_df数据进行透视/整形,使其“更长”。
simPaths_df_long <- pivot_longer(simPaths_df, -x, names_to = "sim", values_to = "survival")
simPaths_df_long
# # A tibble: 1,010 × 3
#        x sim   survival
#    <dbl> <chr>    <dbl>
#  1   500 y1       0.282
#  2   500 y2       0.269
#  3   500 y3       0.291
#  4   500 y4       0.260
#  5   500 y5       0.271
#  6   500 y6       0.308
#  7   500 y7       0.294
#  8   500 y8       0.284
#  9   500 y9       0.335
# 10   500 y10      0.289
# # ℹ 1,000 more rows
# # ℹ Use `print(n = ...)` to see more rows

lung1.df %>%
  ggplot(aes(x = time, y = survival)) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = "Confidence Interval"), 
              alpha = 0.2) +
  scale_fill_manual(values = c("Confidence Interval" = "grey50"), name = NULL) +
  guides(color = guide_legend(order = 1), fill  = guide_legend(order = 2)) +
  geom_line(aes(y = survival, color = "Historical data"), linewidth = 1) +
  scale_x_continuous(limits = c(0, 1500)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0.05)) +
  labs(x = "Time", y = "Survival probability", color = NULL) +
  theme_classic() +
  stat_function(fun = weibCurve, args = list(survregCoefs = fit1$icoef), 
                aes(color = "Weibull distribution fit"), size = 1, n = 1000) +
  scale_color_manual(values = c("Historical data"="blue", "Weibull distribution fit"="red", "Confidence interval"="grey50", "Simulations"="lightblue")) +
  labs(color = NULL) +
  theme(legend.position = c(0.95, 0.95), legend.justification = c(1, 1),
        legend.title.align = 0.5, legend.box.spacing = unit(0.3, "lines"), 
        legend.margin = margin(t = 0, r = 0, b = 0, l = 0), legend.title = element_text(size = 12), 
        legend.text = element_text(size = 10)) +
  geom_line(aes(x = x, group = sim, color = "Simulations"), data=simPaths_df_long)

秩序很重要。如果您希望蓝色线条更像背景,并且“低于”所有其他元素,那么将其放置在第一个geom_line之前,例如

lung1.df %>%
  ggplot(aes(x = time, y = survival)) +
  geom_line(aes(x = x, group = sim, color = "Simulations"), data=simPaths_df_long) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = "Confidence Interval"), 
              alpha = 0.2) +
  scale_fill_manual(values = c("Confidence Interval" = "grey50"), name = NULL) +
  guides(color = guide_legend(order = 1), fill  = guide_legend(order = 2)) +
  geom_line(aes(y = survival, color = "Historical data"), linewidth = 1) +
  scale_x_continuous(limits = c(0, 1500)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0.05)) +
  labs(x = "Time", y = "Survival probability", color = NULL) +
  theme_classic() +
  stat_function(fun = weibCurve, args = list(survregCoefs = fit1$icoef), 
                aes(color = "Weibull distribution fit"), size = 1, n = 1000) +
  scale_color_manual(values = c("Historical data"="blue", "Weibull distribution fit"="red", "Confidence interval"="grey50", "Simulations"="lightblue")) +
  labs(color = NULL) +
  theme(legend.position = c(0.95, 0.95), legend.justification = c(1, 1),
        legend.title.align = 0.5, legend.box.spacing = unit(0.3, "lines"), 
        legend.margin = margin(t = 0, r = 0, b = 0, l = 0), legend.title = element_text(size = 12), 
        legend.text = element_text(size = 10))

zbdgwd5y

zbdgwd5y2#

有个办法

suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
  library(MASS)
  library(survival)
})

theme_so_q76146318 <- function(){ 
  theme_classic(base_size = 10) %+replace%    #
    theme(
      legend.position = c(0.95, 0.95), 
      legend.justification = c(1, 1),
      legend.title.align = 0.5, 
      legend.box.spacing = unit(0.3, "lines"), 
      legend.margin = margin(t = 0, r = 0, b = 0, l = 0), 
      legend.title = element_text(size = 12), 
      legend.text = element_text(size = 10)
    )
}


lung1 <- lung %>% 
  mutate(time1 = ifelse(time >= 500, 500, time)) %>% 
  mutate(status1 = ifelse(status == 2 & time >= 500, 1, status))

weibCurve <- function(time, survregCoefs) {
  exp(-(time/exp(survregCoefs[1]))^exp(-survregCoefs[2]))
}

fit1 <- survreg(Surv(time1, status1) ~ 1, data = lung1)

lung1.survfit <- survfit(Surv(time1, status1) ~ 1, data = lung1)
lung1.df <- data.frame(time = lung1.survfit$time, 
                       survival = lung1.survfit$surv, 
                       upper_95 = lung1.survfit$upper, 
                       lower_95 = lung1.survfit$lower)

# generate simulation paths
n_simulations <- 10
simPaths <- data.frame(x = seq(from = 500, to = 1500, by = 5))
simPathList <- lapply(1:n_simulations, function(i) {
  newCoef <- MASS::mvrnorm(n = 1, fit1$icoef, vcov(fit1))
  y <- weibCurve(simPaths$x, newCoef)
  simPaths[[paste0("y", i)]] <- y
  simPaths
})

simPaths_df <- Reduce(function(x, y) merge(x, y, by = "x", all = TRUE), simPathList)
simPaths_df_long <- simPaths_df %>% tidyr::pivot_longer(-x)

clrs <- c(`Historical data` = "blue", 
          `Weibull distribution fit` = "red", 
          `Simulations` = "#30a6e9", 
          `Confidence intervals` = "grey50")

lung1.df %>%
  ggplot(aes(x = time, y = survival)) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = "Confidence Interval"), 
              alpha = 0.2) +
  geom_line(aes(y = survival, color = "Historical data"), linewidth = 1) +
  geom_line(
    data = simPaths_df_long,
    mapping = aes(x, value, group = name, color = "Simulations"),
    alpha = 0.5
  ) +
  stat_function(fun = weibCurve, args = list(survregCoefs = fit1$icoef), 
                aes(color = "Weibull distribution fit"), size = 1, n = 1000) +
  scale_x_continuous(limits = c(0, 1500)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0.05)) +
  labs(x = "Time", y = "Survival probability", color = NULL) +
  scale_fill_manual(values = c("Confidence Interval" = "grey50"), name = NULL) +
  scale_color_manual(values = clrs) +
  guides(color = guide_legend(order = 1), fill  = guide_legend(order = 2)) +
  theme_so_q76146318()
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.

创建于2023-05-01使用reprex v2.0.2

相关问题