我有一个 Dataframe ,如下图所示:
> dput(filtered_lymph)
structure(list(cluster = c("CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells"
), condition = c("B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4",
"KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5",
"B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1",
"KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2",
"B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3",
"KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2",
"B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3",
"KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3",
"B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5",
"B16_AC484_1", "B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1",
"KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6",
"B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", "KPC_NTX_1",
"KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1",
"B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2",
"KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", "B16_AC484_3",
"B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4",
"KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", "B16_NTX_3",
"B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4",
"KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4",
"KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1",
"B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2",
"KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6"),
`total cells` = c(1.705589084, 1.414617716, 2.009911894,
0.857632933, 1.263689975, 1.606186794, 2.006018054, 1.449275362,
1.286863271, 4.41495778, 5.954825462, 4.0544723, 4.275011018,
1.063829787, 2.304330552, 0.997592019, 1.957585644, 0.386100386,
0.500294291, 0.855880729, 0.876460768, 1.538118591, 1.736208345,
1.583467525, 1.176075269, 2.027027027, 1.645692159, 14.51062713,
4.479622769, 12.61013216, 7.261292167, 3.959561921, 12.55205235,
6.218655968, 14.29512516, 3.699731903, 4.680337756, 3.661875428,
7.644692046, 6.63287792, 1.968085106, 5.522447358, 10.49191606,
3.996737357, 8.273579702, 11.65391407, 12.64494754, 6.385642738,
20.17387428, 20.61047326, 22.49060655, 17.37231183, 13.73873874,
15.32752501, 3.332458672, 6.635230717, 2.450440529, 3.687821612,
4.085930918, 5.770374777, 3.911735206, 5.928853755, 2.252010724,
6.899879373, 7.289527721, 4.178272981, 6.170118995, 6.64893617,
3.535955503, 6.329549364, 3.752039152, 6.784335356, 5.974102413,
7.482054114, 7.429048414, 10.90057958, 9.633155979, 11.54052603,
10.68548387, 14.99356499, 16.16650532, 5.510364734, 2.72819131,
3.716960352, 5.431675243, 4.886267902, 6.543723974, 6.519558676,
5.401844532, 23.21715818, 3.908323281, 3.867214237, 2.69266481,
2.732481269, 1.861702128, 7.151370679, 7.258341933, 4.159869494,
3.281853282, 3.943496174, 4.389839867, 2.796327212, 4.257690593,
5.516661999, 4.911433172, 6.686827957, 8.719433719, 6.808647951
), New_Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("NTX",
"Anti-PD-1", "AC484"), class = "factor"), Tissue_Expression = c("Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid")), row.names = c(NA, -108L), class = c("tbl_df",
"tbl", "data.frame"))
> str(filtered_lymph)
tibble [108 × 5] (S3: tbl_df/tbl/data.frame)
$ cluster : chr [1:108] "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" ...
$ condition : chr [1:108] "B16_NTX_1" "B16_NTX_2" "B16_NTX_3" "B16_NTX_4" ...
$ total cells : num [1:108] 1.706 1.415 2.01 0.858 1.264 ...
$ New_Condition : Factor w/ 3 levels "NTX","Anti-PD-1",..: 1 1 1 1 1 1 1 1 1 2 ...
$ Tissue_Expression: chr [1:108] "Lymphoid" "Lymphoid" "Lymphoid" "Lymphoid" ...
> summary(filtered_lymph)
cluster condition total cells
Length:108 Length:108 Min. : 0.3861
Class :character Class :character 1st Qu.: 2.6321
Mode :character Mode :character Median : 4.5800
Mean : 6.0848
3rd Qu.: 7.2591
Max. :23.2172
New_Condition Tissue_Expression
NTX :36 Length:108
Anti-PD-1:32 Class :character
AC484 :40 Mode :character
字符串
我试图找到一种方法,在对照和测试“New_Condition”变量之间进行双尾非配对t检验-这将在“NTX”和“抗PD-1”之间进行,另一个在“NTX”和“AC 484”之间进行。我希望为每个不同的细胞群计算这一点,即“CD 4 + T细胞”,“祖细胞T细胞”等。
然后,我希望能够在ggplot上的相应箱线图对上方绘制p值-代码详细如下:
p_lymph <- ggplot(filtered_lymph, aes(x = cluster, y = `total cells`, fill = New_Condition, color = New_Condition)) +
geom_boxplot(position = position_dodge(width = 0.8), size = 0.9,linetype="solid", outlier.shape=NA) +
scale_fill_manual(values=fill_colors) + #These two lines I am changing the legend label from New_Condition (as it is named in the original data column) to Treatment as stated on the original paper
scale_color_manual(name="Condition", values = outline_colors) +
theme_minimal()+
guides(fill="none",color="none")+ #removing the legend
labs(y="Total cells (%)",
x=NULL)+
scale_x_discrete(labels=x_lym_labels)+
theme(
axis.line=element_line(color="black",linewidth=0.5,linetype="solid"), #adding axis lines
axis.text.x=element_blank(),
axis.ticks.x=element_line(colour="black",linewidth=0.5),#manually adding in tick breaks
axis.ticks.y=element_line(colour="black", linewidth=0.5),
panel.grid=element_blank() #removing grid lines
)+
scale_y_continuous(breaks=seq(0,25, by=5),
limits=c(0,25), #Adjusting the y-axis breaks
labels=c("0","5","10","15","20","25") #customising the y-axis labels
)
型
我以前试过t.test函数:
t.test(filtered_lymph$'total cells',filtered_lymph$cluster[filtered_lymph$New_Condition])$p.value
型
然而得到这个错误
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") :
missing value where TRUE/FALSE needed
In addition: Warning messages:
1: In mean.default(y) : argument is not numeric or logical: returning NA
2: In var(y) : NAs introduced by coercion
型
任何帮助将不胜感激!
1条答案
按热度按时间f0brbegy1#
答案,正如评论中所暗示的那样,使用类似
ggsignif
的geom_signif
,但你必须改变你的图的结构才能让它工作:字符串
的数据