R语言 如何在分组箱线图(ggplot)上执行t检验并绘制组间比较的p值?

rslzwgfq  于 12个月前  发布在  其他
关注(0)|答案(1)|浏览(128)

我有一个 Dataframe ,如下图所示:

> dput(filtered_lymph)
structure(list(cluster = c("CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", 
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells", 
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", 
"NK Cells", "Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells", 
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells"
), condition = c("B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", 
"KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", 
"B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", 
"KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", 
"B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", 
"KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", 
"B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", 
"KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", 
"B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", 
"B16_AC484_1", "B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", 
"KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", 
"B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", 
"KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", 
"B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", 
"KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", "B16_AC484_3", 
"B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", 
"KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", "B16_NTX_3", 
"B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", 
"KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", 
"KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", 
"B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", 
"KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6"), 
    `total cells` = c(1.705589084, 1.414617716, 2.009911894, 
    0.857632933, 1.263689975, 1.606186794, 2.006018054, 1.449275362, 
    1.286863271, 4.41495778, 5.954825462, 4.0544723, 4.275011018, 
    1.063829787, 2.304330552, 0.997592019, 1.957585644, 0.386100386, 
    0.500294291, 0.855880729, 0.876460768, 1.538118591, 1.736208345, 
    1.583467525, 1.176075269, 2.027027027, 1.645692159, 14.51062713, 
    4.479622769, 12.61013216, 7.261292167, 3.959561921, 12.55205235, 
    6.218655968, 14.29512516, 3.699731903, 4.680337756, 3.661875428, 
    7.644692046, 6.63287792, 1.968085106, 5.522447358, 10.49191606, 
    3.996737357, 8.273579702, 11.65391407, 12.64494754, 6.385642738, 
    20.17387428, 20.61047326, 22.49060655, 17.37231183, 13.73873874, 
    15.32752501, 3.332458672, 6.635230717, 2.450440529, 3.687821612, 
    4.085930918, 5.770374777, 3.911735206, 5.928853755, 2.252010724, 
    6.899879373, 7.289527721, 4.178272981, 6.170118995, 6.64893617, 
    3.535955503, 6.329549364, 3.752039152, 6.784335356, 5.974102413, 
    7.482054114, 7.429048414, 10.90057958, 9.633155979, 11.54052603, 
    10.68548387, 14.99356499, 16.16650532, 5.510364734, 2.72819131, 
    3.716960352, 5.431675243, 4.886267902, 6.543723974, 6.519558676, 
    5.401844532, 23.21715818, 3.908323281, 3.867214237, 2.69266481, 
    2.732481269, 1.861702128, 7.151370679, 7.258341933, 4.159869494, 
    3.281853282, 3.943496174, 4.389839867, 2.796327212, 4.257690593, 
    5.516661999, 4.911433172, 6.686827957, 8.719433719, 6.808647951
    ), New_Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("NTX", 
    "Anti-PD-1", "AC484"), class = "factor"), Tissue_Expression = c("Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", 
    "Lymphoid", "Lymphoid")), row.names = c(NA, -108L), class = c("tbl_df", 
"tbl", "data.frame"))

> str(filtered_lymph)
tibble [108 × 5] (S3: tbl_df/tbl/data.frame)
 $ cluster          : chr [1:108] "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" ...
 $ condition        : chr [1:108] "B16_NTX_1" "B16_NTX_2" "B16_NTX_3" "B16_NTX_4" ...
 $ total cells      : num [1:108] 1.706 1.415 2.01 0.858 1.264 ...
 $ New_Condition    : Factor w/ 3 levels "NTX","Anti-PD-1",..: 1 1 1 1 1 1 1 1 1 2 ...
 $ Tissue_Expression: chr [1:108] "Lymphoid" "Lymphoid" "Lymphoid" "Lymphoid" ...

> summary(filtered_lymph)
   cluster           condition          total cells     
 Length:108         Length:108         Min.   : 0.3861  
 Class :character   Class :character   1st Qu.: 2.6321  
 Mode  :character   Mode  :character   Median : 4.5800  
                                       Mean   : 6.0848  
                                       3rd Qu.: 7.2591  
                                       Max.   :23.2172  
   New_Condition Tissue_Expression 
 NTX      :36    Length:108        
 Anti-PD-1:32    Class :character  
 AC484    :40    Mode  :character

字符串
我试图找到一种方法,在对照和测试“New_Condition”变量之间进行双尾非配对t检验-这将在“NTX”和“抗PD-1”之间进行,另一个在“NTX”和“AC 484”之间进行。我希望为每个不同的细胞群计算这一点,即“CD 4 + T细胞”,“祖细胞T细胞”等。
然后,我希望能够在ggplot上的相应箱线图对上方绘制p值-代码详细如下:

p_lymph <- ggplot(filtered_lymph, aes(x = cluster, y = `total cells`, fill = New_Condition, color = New_Condition)) +
  geom_boxplot(position = position_dodge(width = 0.8), size = 0.9,linetype="solid", outlier.shape=NA) +
  scale_fill_manual(values=fill_colors) + #These two lines I am changing the legend label from New_Condition (as it is named in the original data column) to Treatment as stated on the original paper
  scale_color_manual(name="Condition", values = outline_colors) +
  theme_minimal()+
  guides(fill="none",color="none")+ #removing the legend
  labs(y="Total cells (%)",
       x=NULL)+
  scale_x_discrete(labels=x_lym_labels)+
  theme(
    axis.line=element_line(color="black",linewidth=0.5,linetype="solid"), #adding axis lines
    axis.text.x=element_blank(),
    axis.ticks.x=element_line(colour="black",linewidth=0.5),#manually adding in tick breaks
    axis.ticks.y=element_line(colour="black", linewidth=0.5),
    panel.grid=element_blank() #removing grid lines
  )+
      scale_y_continuous(breaks=seq(0,25, by=5),
                         limits=c(0,25), #Adjusting the y-axis breaks
                         labels=c("0","5","10","15","20","25") #customising the y-axis labels
                        )


我以前试过t.test函数:

t.test(filtered_lymph$'total cells',filtered_lymph$cluster[filtered_lymph$New_Condition])$p.value


然而得到这个错误

Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : 
  missing value where TRUE/FALSE needed
In addition: Warning messages:
1: In mean.default(y) : argument is not numeric or logical: returning NA
2: In var(y) : NAs introduced by coercion


任何帮助将不胜感激!

f0brbegy

f0brbegy1#

答案,正如评论中所暗示的那样,使用类似ggsignifgeom_signif,但你必须改变你的图的结构才能让它工作:

library(ggplot2)
library(ggsignif)

ggplot(filtered_lymph, aes(New_Condition, `total cells`,
                           color = New_Condition, fill = after_scale(alpha(color, 0.5)))) +
  geom_boxplot() +
  geom_signif(comparisons = list(c('NTX', 'Anti-PD-1'),
                                 c('NTX', 'AC484')),
              step_increase = 0.1, color = 'black',
              map_signif_level = scales::pvalue_format(add_p = TRUE)) +
  scale_x_discrete('Cluster', expand = c(0.25, 0.5)) +
  scale_color_manual("Condition", values = c("#4d4b4c", "#4c517b", "#c63a41")) +
  facet_grid(.~cluster, switch = 'x') +
  theme_minimal() +
  theme(panel.spacing.x = unit(0, 'mm'),
        axis.text.x = element_blank(),
        axis.line = element_line(),
        strip.placement = 'outside')

字符串


的数据

相关问题