R语言 更改ggbiplot pca中的图例和形状

ne5o7dgx  于 2023-02-01  发布在  其他
关注(0)|答案(1)|浏览(390)

你能帮我做主成分分析吗?我想改变形状,这样每个物种都有不同的颜色,每个物种的2-3个生物体都有2-3个不同的符号。它应该看起来像这样:enter image description here
到目前为止,我尝试了以下代码:

setwd("~/Schwarze Johannisbeeren/SJ Wein mit nicht Sc/PCA/stackoverflow frage")
results = read.csv("results.csv", sep = ";", encoding = "UTF-8", header=TRUE, check.names=FALSE)

results.pca <- prcomp(results[,c(3:7)],       
                    center = TRUE,
                    scale. = TRUE)

#grouping by organism
results.organism <- results[, 1]

#by species
results.species <- results[, 2]

summary(results.pca)

library(ggplot2)
library(ggbiplot)

ggbiplot(results.pca, alpha=0, obs.scale = 1, var.scale = 1 ,ellipse = TRUE,ellipse.prob=0.68, circle = F, varname.size=0, 
         var.axes = F, groups=results$species) +
  theme_bw()+
  geom_point(aes( colour=factor(results.species)), size=2)+
scale_shape_manual(values= c("Mt1"= 1,  "Mt2" =2, "Al1"= 1, "Al2" =2, "Bg1" =1, "Bg2"=2, "Bg3" =3, "Cs1"= 1, "Cs2" =2, "Cs3" =3, "Df1"= 1, "Df2" =2, "Df3" =3))+
                     
  #scale_color_brewer(name= "organism", type = "qual", palette = 2)+
  #scale_x_continuous (limits = c (-1, 9))+   
  theme(axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face= "bold"), 
        axis.title.y = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12, face = "bold"),
        axis.text.y = element_text(colour = "black", size = 12, face = "bold"))

这就是我的数据

> results
   organism species  lactones cyanides alcohols  ethers   acids
1       Mt1      Mt 23435.167    166.4 137653.9  4040.1 1131.52
2       Mt1      Mt 23303.111    168.9 153511.0  4529.1 1148.52
3       Mt1      Mt 22340.556    176.6 150719.9  3255.8 1200.88
4       Mt2      Mt 51519.222    175.9 173401.1  3890.1 1196.12
5       Mt2      Mt 48824.500    166.5 171614.4  3694.1 1132.20
6       Mt2      Mt 50427.278    165.4 168865.1  3693.2 1124.72
7       Al1      Al 25260.222    162.0 211737.4  9563.9 1101.60
8       Al1      Al 23177.556    161.5 199886.7 10403.3 1098.20
9       Al1      Al 27903.000    156.2 240088.4 11897.1 1062.16
10      Al2      Al  5993.722    180.4 289334.9  6673.3 1226.72
11      Al2      Al  7307.389    169.7 275631.1  8333.4 1153.96
12      Al2      Al  9419.167    147.5 277924.5  9622.2 1003.00
13      Bg1      Bg 58216.944    132.4  92275.3  4099.5  900.32
14      Bg1      Bg 69860.222    147.4 105654.9  4080.6 1002.32
15      Bg1      Bg 72809.333    145.8 111731.3  4014.6  991.44
16      Bg2      Bg 51584.611    142.9 105548.2  6450.1  971.72
17      Bg2      Bg 57738.056    141.2 117728.9  6332.4  960.16
18      Bg2      Bg 53356.056    142.7 110260.2  6506.2  970.36
19      Bg3      Bg 41983.389    130.8 103799.4  4781.8  889.44
20      Bg3      Bg 46930.722    148.3 113944.6  5151.6 1008.44
21      Bg3      Bg 49487.611    139.4 121976.5  5318.3  947.92
22      Cs1      Cs  7155.056    161.6 221538.8  8356.0 1098.88
23      Cs1      Cs  8153.611    151.0 179823.0  7961.2 1026.80
24      Cs1      Cs  7445.722    168.6 176978.0  8196.5 1146.48
25      Cs2      Cs 10771.556    126.4 144314.1  8634.6  859.52
26      Cs2      Cs 12239.556    142.6 142913.7  9471.9  969.68
27      Cs2      Cs 13788.611    136.1 131506.7  9390.4  925.48
28      Cs3      Cs 12082.111    152.0 171730.0  6259.6 1033.60
29      Cs3      Cs 14331.556    143.3 141748.7  7532.8  974.44
30      Cs3      Cs 14123.056    158.2 150303.0  7755.8 1075.76
31      Df1      Df 26906.778    156.2 310203.9  5505.5 1062.16
32      Df1      Df 20689.111    163.5 214322.9  5315.6 1111.80
33      Df1      Df 22872.722    154.1 197572.9  4627.7 1047.88
34      Df2      Df 18838.222    159.2 125167.6 12372.9 1082.56
35      Df2      Df 18218.667    155.8 127077.2 11182.0 1059.44
36      Df2      Df 18545.389    156.2 154400.4 10543.6 1062.16
37      Df3      Df 19924.111    156.4 199472.6  4452.3 1063.52
38      Df3      Df 22504.056    158.0 196343.0  3994.1 1074.40
39      Df3      Df 16907.278    151.5 185052.9  4084.6 1030.20
>

顺便问一下,是否可以仅使用PC 1(x %)而不是PC 1(x %已解释变量)进行轴标记?

rta7y2nd

rta7y2nd1#

实现所需结果的一种方法是,首先创建形状和颜色调色板,将微生物名称Map到形状和颜色;其次,在geom_point中,通过添加一列organism(我使用dplyr::bind_cols)来扩展数据;这样做可以将organismMap到shapecoloraes上;最后,去掉使用scale_color_discrete(guide = "none")的组的颜色图例,并通过ggnewscale::new_scale_colorscale_color_manual
注意:轴标题的简单修复方法是使用+labs(x = ..., y = ...)手动设置它们。

library(ggplot2)
library(ggbiplot)

pal_shape <- gsub("^.*?(.)$", "\\1", results$organism)
pal_shape <- scales::shape_pal()(3)[as.integer(pal_shape)]
names(pal_shape) <- results$organism

pal_color <- gsub("^(.*?).$", "\\1", results$organism)
pal_color <- setNames(scales::hue_pal()(5), sort(unique(results$species)))[pal_color]
names(pal_color) <- results$organism

ggbiplot(results.pca,
  alpha = 0, obs.scale = 1, var.scale = 1, ellipse = TRUE, ellipse.prob = 0.68, circle = F, varname.size = 0,
  var.axes = F, groups = results$species
) +
  scale_color_discrete(guide = "none") +
  ggnewscale::new_scale_color() +
  geom_point(data = ~ dplyr::bind_cols(.x, organism = results$organism), 
             aes(shape = organism, colour = organism), 
             size = 2) +
  scale_shape_manual(values = pal_shape) +
  scale_color_manual(values = pal_color) +
  theme_bw() +
  theme(
    axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face = "bold"),
    axis.title.y = element_text(size = 12, face = "bold"),
    axis.title.x = element_text(size = 12, face = "bold"),
    axis.text.y = element_text(colour = "black", size = 12, face = "bold")
  )

数据

results <- structure(list(organism = c(
  "Mt1", "Mt1", "Mt1", "Mt2", "Mt2",
  "Mt2", "Al1", "Al1", "Al1", "Al2", "Al2", "Al2", "Bg1", "Bg1",
  "Bg1", "Bg2", "Bg2", "Bg2", "Bg3", "Bg3", "Bg3", "Cs1", "Cs1",
  "Cs1", "Cs2", "Cs2", "Cs2", "Cs3", "Cs3", "Cs3", "Df1", "Df1",
  "Df1", "Df2", "Df2", "Df2", "Df3", "Df3", "Df3"
), species = c(
  "Mt",
  "Mt", "Mt", "Mt", "Mt", "Mt", "Al", "Al", "Al", "Al", "Al", "Al",
  "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Cs", "Cs",
  "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Df", "Df", "Df", "Df",
  "Df", "Df", "Df", "Df", "Df"
), lactones = c(
  23435.167, 23303.111,
  22340.556, 51519.222, 48824.5, 50427.278, 25260.222, 23177.556,
  27903, 5993.722, 7307.389, 9419.167, 58216.944, 69860.222, 72809.333,
  51584.611, 57738.056, 53356.056, 41983.389, 46930.722, 49487.611,
  7155.056, 8153.611, 7445.722, 10771.556, 12239.556, 13788.611,
  12082.111, 14331.556, 14123.056, 26906.778, 20689.111, 22872.722,
  18838.222, 18218.667, 18545.389, 19924.111, 22504.056, 16907.278
), cyanides = c(
  166.4, 168.9, 176.6, 175.9, 166.5, 165.4, 162,
  161.5, 156.2, 180.4, 169.7, 147.5, 132.4, 147.4, 145.8, 142.9,
  141.2, 142.7, 130.8, 148.3, 139.4, 161.6, 151, 168.6, 126.4,
  142.6, 136.1, 152, 143.3, 158.2, 156.2, 163.5, 154.1, 159.2,
  155.8, 156.2, 156.4, 158, 151.5
), alcohols = c(
  137653.9, 153511,
  150719.9, 173401.1, 171614.4, 168865.1, 211737.4, 199886.7, 240088.4,
  289334.9, 275631.1, 277924.5, 92275.3, 105654.9, 111731.3, 105548.2,
  117728.9, 110260.2, 103799.4, 113944.6, 121976.5, 221538.8, 179823,
  176978, 144314.1, 142913.7, 131506.7, 171730, 141748.7, 150303,
  310203.9, 214322.9, 197572.9, 125167.6, 127077.2, 154400.4, 199472.6,
  196343, 185052.9
), ethers = c(
  4040.1, 4529.1, 3255.8, 3890.1,
  3694.1, 3693.2, 9563.9, 10403.3, 11897.1, 6673.3, 8333.4, 9622.2,
  4099.5, 4080.6, 4014.6, 6450.1, 6332.4, 6506.2, 4781.8, 5151.6,
  5318.3, 8356, 7961.2, 8196.5, 8634.6, 9471.9, 9390.4, 6259.6,
  7532.8, 7755.8, 5505.5, 5315.6, 4627.7, 12372.9, 11182, 10543.6,
  4452.3, 3994.1, 4084.6
), acids = c(
  1131.52, 1148.52, 1200.88,
  1196.12, 1132.2, 1124.72, 1101.6, 1098.2, 1062.16, 1226.72, 1153.96,
  1003, 900.32, 1002.32, 991.44, 971.72, 960.16, 970.36, 889.44,
  1008.44, 947.92, 1098.88, 1026.8, 1146.48, 859.52, 969.68, 925.48,
  1033.6, 974.44, 1075.76, 1062.16, 1111.8, 1047.88, 1082.56, 1059.44,
  1062.16, 1063.52, 1074.4, 1030.2
)), class = "data.frame", row.names = c(
  "1",
  "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
  "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
  "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
  "36", "37", "38", "39"
))

相关问题