R语言如何在分组箱线图(ggplot)上执行t检验并绘制组间比较的p值？

rslzwgfq 于 12个月前发布在其他

关注(0)|答案(1)|浏览(128)

我有一个 Dataframe ，如下图所示：

> dput(filtered_lymph)
structure(list(cluster = c("CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells"
), condition = c("B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", 
"KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", 
"B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", 
"KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", 
"B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", 
"KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", 
"B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", 
"KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", 
"B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", 
"B16_AC484_1", "B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", 
"KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", 
"B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", 
"KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", 
"B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", 
"KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", "B16_AC484_3", 
"B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", 
"KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", "B16_NTX_3", 
"B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", 
"KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", 
"KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", 
"B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", 
"KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6"), 
    `total cells` = c(1.705589084, 1.414617716, 2.009911894, 
    0.857632933, 1.263689975, 1.606186794, 2.006018054, 1.449275362, 
    1.286863271, 4.41495778, 5.954825462, 4.0544723, 4.275011018, 
    1.063829787, 2.304330552, 0.997592019, 1.957585644, 0.386100386, 
    0.500294291, 0.855880729, 0.876460768, 1.538118591, 1.736208345, 
    1.583467525, 1.176075269, 2.027027027, 1.645692159, 14.51062713, 
    4.479622769, 12.61013216, 7.261292167, 3.959561921, 12.55205235, 
    6.218655968, 14.29512516, 3.699731903, 4.680337756, 3.661875428, 
    7.644692046, 6.63287792, 1.968085106, 5.522447358, 10.49191606, 
    3.996737357, 8.273579702, 11.65391407, 12.64494754, 6.385642738, 
    20.17387428, 20.61047326, 22.49060655, 17.37231183, 13.73873874, 
    15.32752501, 3.332458672, 6.635230717, 2.450440529, 3.687821612, 
    4.085930918, 5.770374777, 3.911735206, 5.928853755, 2.252010724, 
    6.899879373, 7.289527721, 4.178272981, 6.170118995, 6.64893617, 
    3.535955503, 6.329549364, 3.752039152, 6.784335356, 5.974102413, 
    7.482054114, 7.429048414, 10.90057958, 9.633155979, 11.54052603, 
    10.68548387, 14.99356499, 16.16650532, 5.510364734, 2.72819131, 
    3.716960352, 5.431675243, 4.886267902, 6.543723974, 6.519558676, 
    5.401844532, 23.21715818, 3.908323281, 3.867214237, 2.69266481, 
    2.732481269, 1.861702128, 7.151370679, 7.258341933, 4.159869494, 
    3.281853282, 3.943496174, 4.389839867, 2.796327212, 4.257690593, 
    5.516661999, 4.911433172, 6.686827957, 8.719433719, 6.808647951
    ), New_Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("NTX", 
    "Anti-PD-1", "AC484"), class = "factor"), Tissue_Expression = c("Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid")), row.names = c(NA, -108L), class = c("tbl_df", 
"tbl", "data.frame"))

> str(filtered_lymph)
tibble [108 × 5] (S3: tbl_df/tbl/data.frame)
 $ cluster          : chr [1:108] "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" ...
 $ condition        : chr [1:108] "B16_NTX_1" "B16_NTX_2" "B16_NTX_3" "B16_NTX_4" ...
 $ total cells      : num [1:108] 1.706 1.415 2.01 0.858 1.264 ...
 $ New_Condition    : Factor w/ 3 levels "NTX","Anti-PD-1",..: 1 1 1 1 1 1 1 1 1 2 ...
 $ Tissue_Expression: chr [1:108] "Lymphoid" "Lymphoid" "Lymphoid" "Lymphoid" ...

> summary(filtered_lymph)
   cluster           condition          total cells     
 Length:108         Length:108         Min.   : 0.3861  
 Class :character   Class :character   1st Qu.: 2.6321  
 Mode  :character   Mode  :character   Median : 4.5800  
                                       Mean   : 6.0848  
                                       3rd Qu.: 7.2591  
                                       Max.   :23.2172  
   New_Condition Tissue_Expression 
 NTX      :36    Length:108        
 Anti-PD-1:32    Class :character  
 AC484    :40    Mode  :character

字符串
我试图找到一种方法，在对照和测试“New_Condition”变量之间进行双尾非配对t检验-这将在“NTX”和“抗PD-1”之间进行，另一个在“NTX”和“AC 484”之间进行。我希望为每个不同的细胞群计算这一点，即“CD 4 + T细胞”，“祖细胞T细胞”等。
然后，我希望能够在ggplot上的相应箱线图对上方绘制p值-代码详细如下：

p_lymph <- ggplot(filtered_lymph, aes(x = cluster, y = `total cells`, fill = New_Condition, color = New_Condition)) +
  geom_boxplot(position = position_dodge(width = 0.8), size = 0.9,linetype="solid", outlier.shape=NA) +
  scale_fill_manual(values=fill_colors) + #These two lines I am changing the legend label from New_Condition (as it is named in the original data column) to Treatment as stated on the original paper
  scale_color_manual(name="Condition", values = outline_colors) +
  theme_minimal()+
  guides(fill="none",color="none")+ #removing the legend
  labs(y="Total cells (%)",
       x=NULL)+
  scale_x_discrete(labels=x_lym_labels)+
  theme(
    axis.line=element_line(color="black",linewidth=0.5,linetype="solid"), #adding axis lines
    axis.text.x=element_blank(),
    axis.ticks.x=element_line(colour="black",linewidth=0.5),#manually adding in tick breaks
    axis.ticks.y=element_line(colour="black", linewidth=0.5),
    panel.grid=element_blank() #removing grid lines
  )+
      scale_y_continuous(breaks=seq(0,25, by=5),
                         limits=c(0,25), #Adjusting the y-axis breaks
                         labels=c("0","5","10","15","20","25") #customising the y-axis labels
                        )

型
我以前试过t.test函数：

t.test(filtered_lymph$'total cells',filtered_lymph$cluster[filtered_lymph$New_Condition])$p.value

型
然而得到这个错误

Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : 
  missing value where TRUE/FALSE needed
In addition: Warning messages:
1: In mean.default(y) : argument is not numeric or logical: returning NA
2: In var(y) : NAs introduced by coercion

型
任何帮助将不胜感激！

r

来源：https://stackoverflow.com/questions/77626694/how-to-perform-t-test-and-plot-p-values-for-comparison-between-groups-on-a-group

1条答案

按热度按时间

f0brbegy1#

答案，正如评论中所暗示的那样，使用类似ggsignif的geom_signif，但你必须改变你的图的结构才能让它工作：

library(ggplot2)
library(ggsignif)

ggplot(filtered_lymph, aes(New_Condition, `total cells`,
                           color = New_Condition, fill = after_scale(alpha(color, 0.5)))) +
  geom_boxplot() +
  geom_signif(comparisons = list(c('NTX', 'Anti-PD-1'),
                                 c('NTX', 'AC484')),
              step_increase = 0.1, color = 'black',
              map_signif_level = scales::pvalue_format(add_p = TRUE)) +
  scale_x_discrete('Cluster', expand = c(0.25, 0.5)) +
  scale_color_manual("Condition", values = c("#4d4b4c", "#4c517b", "#c63a41")) +
  facet_grid(.~cluster, switch = 'x') +
  theme_minimal() +
  theme(panel.spacing.x = unit(0, 'mm'),
        axis.text.x = element_blank(),
        axis.line = element_line(),
        strip.placement = 'outside')

字符串

的数据

赞(0）回复(0）举报 12个月前

我来回答

R语言如何在分组箱线图(ggplot)上执行t检验并绘制组间比较的p值？

1条答案

相关问题

热门标签

最新问答

R语言 如何在分组箱线图(ggplot)上执行t检验并绘制组间比较的p值？

1条答案

相关问题

热门标签

最新问答

R语言如何在分组箱线图(ggplot)上执行t检验并绘制组间比较的p值？