如何创建一个真/假语句来标记r中的重要基因?

enxuqcxy  于 2023-01-22  发布在  其他
关注(0)|答案(1)|浏览(110)

我想找到一种方法来自动标记我的VP中的上调和下调基因,我一直在手动地做这件事,查看数据表。但是,我不知道如何写一个命令来告诉r标记我感兴趣的基因。

structure(list(log2FC = c(-0.0794009, -0.113568, 0.615316, -1.16623,
-0.32056, -0.206217, -0.0100415, -0.0448286, -0.0486388, -0.0756063,
-0.0728393, 0.315451, 0.0426718, 0.639178, 0.0925341, 0.0450367,
0.006901, -0.0429835, 0.134707, 0.162444, 0.212584, 0.165407,
-0.0872685, 0.679763, 0.389217, -0.033093, -0.152635, -0.0211448,
0.0847027, -0.195283, 0.0832398, -0.013115, -0.100765, -0.0671384,
-0.329955, 0.0578964, -0.0457076, -0.462796, 0.316622, NA, -0.0127716,
0.15966, NA, 0.447555, -0.00621892, 0.0649842, -0.332575, NA,
0.320113, 0.589376, -0.162403, NA, -0.242632, -0.0594681, 0.0641964,
0.172721, -0.12879, -0.247723, 0.0341975, 0.948706, -0.362608,
0.0129117, 0.148919, 0.39596, -0.170329, 0.192233, 0.0750046,
0.239356, -0.113177, 0.184563, -0.0462517, 0.111383, 0.111771,
0.160304, -0.000317698, -0.0305621, 0.0506452, -0.0694846, -0.122766,
0.127085, -0.737947, 0.0698927, 0.16994, -0.291991, -0.12592,
-0.198342, -0.273148, -0.224852, -0.148406, 0.0278062, -0.12111,
0.154747, -0.0123293, -0.174397, -0.296173, -0.142334, 0.002339,
-0.28758, -0.00561539, 0.351303), logpv = c(0.117565320209322,
0.369598291242942, NA, 0.751054703257298, 2.22594458046808, 0.31656749829483,
0.0549806795912063, 0.910518320182657, 0.1194939793847, 0.175295460825524,
0.685593423679773, 0.671682142129748, 0.279553424344115, 0.665957245629606,
0.115452197262338, 0.0964542712389723, 0.0283108105904126, 0.045686230612953,
0.239329454488972, 0.650852335308652, 0.129085041869883, 0.576503802048531,
0.845411558724963, 1.15680198554459, 0.632866328717744, 0.109898148391564,
0.487910939061688, 0.102004734166744, 1.16721621479739, 1.00082705832325,
0.211467150602276, 0.0773943989155459, 0.682644465007817, 0.153183240729545,
0.547325576464062, 0.238225871919829, 0.159445192411196, 1.13098058477778,
0.195899786158926, NA, 0.056471171238956, 0.126471254156812,
NA, 0.662802232744691, 0.0110990059628997, 0.195349021984076,
1.20932017338426, NA, 0.281021480991908, NA, 0.699824504515865,
NA, 0.398156422666091, 0.293591836627236, 0.110397085099532,
0.494151495842147, 0.990907277089313, 0.786275076845587, 0.0840744769949052,
0.660822228561536, 0.499337725955263, 0.0662268167776316, 0.373436957226925,
0.565853819066506, 0.964699228397632, 0.971176002920955, 0.458470171093662,
0.60133208657991, 0.899234051257062, 0.364765030398401, 0.439855359484562,
0.767135696394782, 0.47627256194497, 1.08942768689603, 0.00112302275044461,
0.109124234131234, 0.453261883835865, 0.278147939249062, 0.343288527072693,
0.781944424204553, 1.07263087927085, 0.450943261704409, 0.9196249283577,
0.275002110784777, 0.423170900052689, 0.338331977539611, 0.481540717547861,
1.82425307026197, 0.559509374951755, 0.263394351228691, 0.815960420377561,
0.17589288151863, 0.0881364167450121, 0.938979021291792, 1.03733665131581,
0.361367867691101, 0.00525996509392326, 1.96102257617475, 0.0218149113494686,
0.589210871333344), diffexpressed = c("Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig", "Non-Sig",
"Non-Sig"), Genes3 = c("Cask", "Ank3", "Ktn1", "R3hdm1", "Lrrfip1",
"Dst", "Clasp1", "Kif1a", "Plppr2", "Slc35a4", "Ktn1", "Agap1",
"Pfkfb2", "Cfap77", "Ank3", "Fat1", "Tns1", "Lrrfip1", "Gm28778",
"Relch", "Swi5", "Macf1", "Arhgap21", "Tsga10", "Mbnl1", "Enah",
"Rgs7", "Map4k4", "Rapgef2", "Pcdh9", "Atp6v1h", "Dclk2", "Prkar2a",
"Lrba", "Rabgap1l", "Pcdh7", "Ncam1", "Plec", "Ldah", "Gbp6",
"Agap3", "Gm43738", "Dclk1", "Klhl5", "Ppp1cc", "Rufy3", "Map4",
"Dhx37", "Rap1gds1", "Lhfpl3", "Camk2d", "Eif2b4", "0610012G03Rik",
"Specc1", "Slc12a5", "Nup205", "Dnm1", "Specc1", "Hmgb1", "Nbdy",
"Chl1", "Gm44596", "Eif4g3", "Cnbp", "Rab11fip5", "C2cd5", "Plppr3",
"Trmt1", "Slc9a3r2", "Lims1", "Pdxdc1", "Prmt5", "Acy1", "Atg4b",
"Ubxn4", "Acadl", "Fahd2a", "Slc17a6", "Rusf1", "Sh3gl3", "Serpina3k",
"Ckap5", "Inpp4a", "Cog7", "Commd5", "Jmy", "Serpina1c", "Eif4g3",
"Igsf8", "Srpk2", "Cluh", "Spag9", "Eef1d", "Ank1", "Plec", "Bscl2",
"Syncrip", "Syt2", "Mia3", "Maz"), genelabels = c(FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)), row.names = c(NA,
100L), class = "data.frame")
Diestrus1 <- data.frame(log2FC = Diestrus_df$log.foldchange_F_dies.M,
                        logpv = -log10(Diestrus_df$pvalue))

Diestrus1$diffexpressed <- "Non-Sig"

Diestrus1$diffexpressed[Diestrus1$log2FC > 1 & Diestrus1$logpv > 1.2] <- "UP"

Diestrus1$diffexpressed[Diestrus1$log2FC < -1 & Diestrus1$logpv > 1.2] <- "DOWN"

Genes3 <- Diestrus_df$Genes

head(Genes3)

Diestrus2 <- cbind(Diestrus1, Genes3)

head(Diestrus2)

Diestrus2$genelabels <- ""
Diestrus2$genelabels <- ifelse(Diestrus2$Genes3 == "Serpina1e"
                               |Diestrus2$Genes3 == "Mtatp6"
                               |Diestrus2$Genes3 == "Cfap54"
                               |Diestrus2$Genes3 == "Camk2a"
                               |Diestrus2$Genes3 == "CSN2"
                               |Diestrus2$Genes3 == "CSN1S1"
                               |Diestrus2$Genes3 == "CSN1S2"
                               |Diestrus2$Genes3 == "Krt80"
                               |Diestrus2$Genes3 == "Galm", T, F)

options(ggrepel.max.overlaps = Inf)

ggplot(Diestrus2) + geom_point(aes(log2FC, logpv, col = diffexpressed)) + geom_text_repel(aes(log2FC, logpv), label = ifelse(Diestrus2$genelabels == TRUE, as.character(Diestrus2$Genes3), ""), box.padding = unit(0.90, "lines"), hjust = 0.40) + theme(legend.title = element_blank(), text = element_text(size = 14)) + scale_color_manual(values = c("red", "black", "blue"))

(x一个电子零个电子)](x一个电子零个电子)
火山图显示一些
(一个电子两个电子一个电子)](一个电子三个电子)

s5a0g9ez

s5a0g9ez1#

好的,我想我有一个可能的解决方案。label_sig()下面的函数获取数据,图的x和y变量(假设你想基于x变量的值进行标记。它还需要标识标记的变量,要标记的点数,以及你是否想返回一个图。如果TRUE,那么返回一个ggplot,否则,带标签的数据被返回。下面是一个使用你的数据的例子。

library(dplyr)
  library(ggplot2)
  library(ggrepel)
  Diestrus1 <- structure(list(log2FC = c(-0.0794009, -0.113568, 0.615316, -1.16623, 
                                         -0.32056, -0.206217, -0.0100415, -0.0448286, -0.0486388, -0.0756063, 
                                         -0.0728393, 0.315451, 0.0426718, 0.639178, 0.0925341, 0.0450367, 
                                         0.006901, -0.0429835, 0.134707, 0.162444, 0.212584, 0.165407, 
                                         -0.0872685, 0.679763, 0.389217, -0.033093, -0.152635, -0.0211448, 
                                         0.0847027, -0.195283, 0.0832398, -0.013115, -0.100765, -0.0671384, 
                                         -0.329955, 0.0578964, -0.0457076, -0.462796, 0.316622, NA, -0.0127716, 
                                         0.15966, NA, 0.447555, -0.00621892, 0.0649842, -0.332575, NA, 
                                         0.320113, 0.589376, -0.162403, NA, -0.242632, -0.0594681, 0.0641964, 
                                         0.172721, -0.12879, -0.247723, 0.0341975, 0.948706, -0.362608, 
                                         0.0129117, 0.148919, 0.39596, -0.170329, 0.192233, 0.0750046, 
                                         0.239356, -0.113177, 0.184563, -0.0462517, 0.111383, 0.111771, 
                                         0.160304, -0.000317698, -0.0305621, 0.0506452, -0.0694846, -0.122766, 
                                         0.127085, -0.737947, 0.0698927, 0.16994, -0.291991, -0.12592, 
                                         -0.198342, -0.273148, -0.224852, -0.148406, 0.0278062, -0.12111, 
                                         0.154747, -0.0123293, -0.174397, -0.296173, -0.142334, 0.002339, 
                                         -0.28758, -0.00561539, 0.351303), 
                              logpv = c(0.117565320209322, 
                                         0.369598291242942, NA, 0.751054703257298, 2.22594458046808, 0.31656749829483, 
                                         0.0549806795912063, 0.910518320182657, 0.1194939793847, 0.175295460825524, 
                                         0.685593423679773, 0.671682142129748, 0.279553424344115, 0.665957245629606, 
                                         0.115452197262338, 0.0964542712389723, 0.0283108105904126, 0.045686230612953, 
                                         0.239329454488972, 0.650852335308652, 0.129085041869883, 0.576503802048531, 
                                         0.845411558724963, 1.15680198554459, 0.632866328717744, 0.109898148391564, 
                                         0.487910939061688, 0.102004734166744, 1.16721621479739, 1.00082705832325, 
                                         0.211467150602276, 0.0773943989155459, 0.682644465007817, 0.153183240729545, 
                                         0.547325576464062, 0.238225871919829, 0.159445192411196, 1.13098058477778, 
                                         0.195899786158926, NA, 0.056471171238956, 0.126471254156812, 
                                         NA, 0.662802232744691, 0.0110990059628997, 0.195349021984076, 
                                         1.20932017338426, NA, 0.281021480991908, NA, 0.699824504515865, 
                                         NA, 0.398156422666091, 0.293591836627236, 0.110397085099532, 
                                         0.494151495842147, 0.990907277089313, 0.786275076845587, 0.0840744769949052, 
                                         0.660822228561536, 0.499337725955263, 0.0662268167776316, 0.373436957226925, 
                                         0.565853819066506, 0.964699228397632, 0.971176002920955, 0.458470171093662, 
                                         0.60133208657991, 0.899234051257062, 0.364765030398401, 0.439855359484562, 
                                         0.767135696394782, 0.47627256194497, 1.08942768689603, 0.00112302275044461, 
                                         0.109124234131234, 0.453261883835865, 0.278147939249062, 0.343288527072693, 
                                         0.781944424204553, 1.07263087927085, 0.450943261704409, 0.9196249283577, 
                                         0.275002110784777, 0.423170900052689, 0.338331977539611, 0.481540717547861, 
                                         1.82425307026197, 0.559509374951755, 0.263394351228691, 0.815960420377561, 
                                         0.17589288151863, 0.0881364167450121, 0.938979021291792, 1.03733665131581, 
                                         0.361367867691101, 0.00525996509392326, 1.96102257617475, 0.0218149113494686, 
                                         0.589210871333344), 
                              Genes3 = c("Cask", "Ank3", "Ktn1", "R3hdm1", 
                                          "Lrrfip1", "Dst", "Clasp1", "Kif1a", "Plppr2", "Slc35a4", "Ktn1", 
                                          "Agap1", "Pfkfb2", "Cfap77", "Ank3", "Fat1", "Tns1", "Lrrfip1", 
                                          "Gm28778", "Relch", "Swi5", "Macf1", "Arhgap21", "Tsga10", "Mbnl1", 
                                          "Enah", "Rgs7", "Map4k4", "Rapgef2", "Pcdh9", "Atp6v1h", "Dclk2", 
                                          "Prkar2a", "Lrba", "Rabgap1l", "Pcdh7", "Ncam1", "Plec", "Ldah", 
                                          "Gbp6", "Agap3", "Gm43738", "Dclk1", "Klhl5", "Ppp1cc", "Rufy3", 
                                          "Map4", "Dhx37", "Rap1gds1", "Lhfpl3", "Camk2d", "Eif2b4", "0610012G03Rik", 
                                          "Specc1", "Slc12a5", "Nup205", "Dnm1", "Specc1", "Hmgb1", "Nbdy", 
                                          "Chl1", "Gm44596", "Eif4g3", "Cnbp", "Rab11fip5", "C2cd5", "Plppr3", 
                                          "Trmt1", "Slc9a3r2", "Lims1", "Pdxdc1", "Prmt5", "Acy1", "Atg4b", 
                                          "Ubxn4", "Acadl", "Fahd2a", "Slc17a6", "Rusf1", "Sh3gl3", "Serpina3k", 
                                          "Ckap5", "Inpp4a", "Cog7", "Commd5", "Jmy", "Serpina1c", "Eif4g3", 
                                          "Igsf8", "Srpk2", "Cluh", "Spag9", "Eef1d", "Ank1", "Plec", "Bscl2", 
                                          "Syncrip", "Syt2", "Mia3", "Maz")), 
                         class = "data.frame", row.names = c(NA, 100L))

  label_sig <- function(data, xvar, yvar, label, n=10, plot=TRUE){
    require(dplyr)
    require(ggplot2)
    require(ggrepel)
    data <- data %>% select(all_of(c(xvar, yvar, label))) %>% na.omit()
    s <- sort(abs(data[[xvar]]))
    thresh <- s[(length(s) - n)]
    data <- data %>% 
      mutate(sig = ifelse(abs(!!sym(xvar)) > thresh, 1, 0),
             lab = ifelse(sig == 1, Genes3, NA), 
             col = ifelse(sig == 1 & !!sym(xvar) < 0, "Down", "Non-sig"),
             col = ifelse(sig == 1 & !!sym(xvar) > 0, "Up", col))
    if(!plot){
      ret <- data
    }else{
      ret <- ggplot(data) + 
        geom_point(aes(!!sym(xvar), !!sym(yvar), color = col)) + 
        geom_text_repel(aes(!!sym(xvar), !!sym(yvar), label=lab), box.padding = unit(0.90, "lines"), hjust = 0.40) + 
        theme(legend.title = element_blank(), 
              text = element_text(size = 14)) + 
        scale_color_manual(values = c("red", "black", "blue"))
    }
    return(ret)
    
  }

首先,返回一个图形:

label_sig(Diestrus1, "log2FC", "logpv", "Genes3")
#> Warning: Removed 84 rows containing missing values (`geom_text_repel()`).

接下来,返回标记的数据集。

out <- label_sig(Diestrus1, "log2FC", "logpv", "Genes3", plot=FALSE) 
head(out)
#>       log2FC      logpv  Genes3 sig    lab     col
#> 1 -0.0794009 0.11756532    Cask   0   <NA> Non-sig
#> 2 -0.1135680 0.36959829    Ank3   0   <NA> Non-sig
#> 4 -1.1662300 0.75105470  R3hdm1   1 R3hdm1    Down
#> 5 -0.3205600 2.22594458 Lrrfip1   0   <NA> Non-sig
#> 6 -0.2062170 0.31656750     Dst   0   <NA> Non-sig
#> 7 -0.0100415 0.05498068  Clasp1   0   <NA> Non-sig

reprex package(v2.0.1)于2023年1月18日创建

相关问题