R语言 如何在ggplot2中显示混合效应模型中仅一条回归线的置信区间?

pprl5pva  于 2023-10-13  发布在  其他
关注(0)|答案(1)|浏览(149)

下面的代码创建一个包含3列的数据框。

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

然后我用一个混合效应模型来拟合模拟数据。具体如下:

ggplot(data, aes(x = Time, y = Purity, color = as.factor(Lot), shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE, type = 1) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

结果如下所示:

**问题:**我想在worst regression line上只显示95% lower confidence bound。我怎么能这么做呢?

Worst regression line是比其他线早== 80与水平线相交的线。我知道如果我设置se == TRUE,那么所有行的所有置信区间都会显示出来。但我只想得到最差线的置信下限。

**额外问题:**我怎样才能修复图例,使其只显示符号(而不是符号上的线)?

knpiaxh1

knpiaxh11#

您可以绘制两个geom_smooth() s -一个用于4个“好”行,一个用于1个“最差”行,例如。

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point(key_glyph = "point") +
  geom_smooth(data = data %>% filter(Lot == 2),
              method = "lm", se=TRUE, type = 1,
              key_glyph = "point") +
  geom_smooth(data = data %>% filter(Lot != 2),
              method = "lm", se=FALSE, type = 1,
              key_glyph = "point") +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
#> Warning in geom_smooth(data = data %>% filter(Lot == 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> Warning in geom_smooth(data = data %>% filter(Lot != 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> `geom_smooth()` using formula = 'y ~ x'
#> `geom_smooth()` using formula = 'y ~ x'

创建于2023-10-12使用reprex v2.0.2

编辑

根据@stefan的评论,与其使用key_glyph = "point",不如使用show_legend = FALSE

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(data = data %>% filter(Lot == 2),
              method = "lm", formula = "y ~ x",
              se=TRUE,
              show.legend = FALSE) +
  geom_smooth(data = data %>% filter(Lot != 2),
              method = "lm", formula = "y ~ x",
              se=FALSE,
              show.legend = FALSE) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

创建于2023-10-12使用reprex v2.0.2

编辑2

你可以用不同的方式自动选择“最差”行;最简单的方法是选择在时间= 0时纯度最低的批次,但这可能会根据您的数据(即也许您想选择在时间= 24时纯度最低的批次?)。你可以只画上界,但你必须自己计算坐标,例如:

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

# Select the worst regression line
worst <- data %>% filter(Purity == min(Purity)) %>% pull(Lot)

# Build the 5 linear models
output <- data %>%
  nest_by(Lot) %>%
  reframe(model = list(lm(data = data, formula = Purity ~ Time)))

# Apply the models and extract the coordinates
preds <- predict(output$model[[worst]], newdata = data, se.fit = TRUE)
input_df <- data.frame(fit = preds$fit, se.fit = preds$se.fit) %>%
  bind_cols(data)
  
# Plot data and input_df
ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(method = "lm", formula = "y ~ x",
              se=FALSE,
              show.legend = FALSE) +
  geom_ribbon(data = input_df, aes(x = Time, y = Purity,
                                   ymin = fit, ymax = fit + se.fit * 2),
              inherit.aes = FALSE, lty = 2, fill = "blue", alpha = 0.25) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

创建于2023-10-12使用reprex v2.0.2

相关问题