R语言 在ggplot突变棒棒糖图中添加分支

lp0sw83n  于 2023-11-14  发布在  其他
关注(0)|答案(2)|浏览(138)

我有一个非常简单的ggplot 2棒棒糖图,来自一些虚拟数据:

mut.df <- data.frame("AA" = c(201, 203, 500, 601), 
                     "Mut" = c("V201L", "R203H", "Q500*", "P601fs"), 
                     "Type" = c("Missense", "Missense", "Nonsense", "Frameshift"), 
                     "Freq" = c(2,3,4,1))

domain.df <- data.frame("Feature" = c("Start", "Dom1", "Dom2", "End"), 
                        "Type" = c("str", "dom", "dom", "str"),
                        "Start" = c(1, 180, 480, 650), 
                        "End" = c(1, 230, 630, 650))

str.fill <- "#E1E1E1"
str.col <- "#16161D"

dom.fill <- c("Dom2" = "#FF0000", "Dom1" = "#AA4BAB")
dom.col <- c("#16161D")

library(ggplot2)
library(ggrepel)

gp <- ggplot() +
  geom_rect(data = subset(domain.df, Type == "str"),
            mapping = aes(xmin = min(Start), xmax = max(End), ymin = 0.3, ymax = 0.7),
            fill = str.fill,
            colour = str.col)

gp <- gp + scale_y_continuous(limits = c(0, 10), breaks = 0:10)

gp <- gp + geom_segment(data = mut.df, 
                  mapping = aes(x = AA, xend = AA, y = 0.7, yend = Freq)) +
  geom_point(data = mut.df,
             mapping = aes(x = AA, y = Freq, fill = Type),
             shape = 21,
             size = 2) +
  geom_text_repel(data = mut.df,
                  mapping = aes(x = AA, y = Freq, label = Mut),
                  bg.colour = "white",
                  seed = 12345,
                  nudge_y = 0.25)
  
gp <- gp + geom_rect(data = subset(domain.df, Type == "dom"),
               mapping = aes(xmin = Start, xmax = End, ymin = 0.2, ymax = 0.8, fill = Feature, group = Feature),
               fill = dom.fill[subset(domain.df, Type == "dom")$Feature],
               colour = dom.col)

gp <- gp +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted")) +
  labs(x = "AA", y = "Freq", fill = "Mutation")

字符串

我想添加一些智能格式的分支,将前两个棒棒糖分开,而不是将它们堆叠在一起。例如:

有人能建议如何在ggplot 2中实现这一点吗?
欢迎提出任何建议!

hfyxw5xn

hfyxw5xn1#

我不知道有任何扩展包允许这种有条件的“分支”类型的回避。我所知道的最接近的事情是将棒棒糖转换为带有手动布局的两元素图。棒棒糖的下端将是固定的,但是如果上端接近AA的另一个值,则它们的位置上会添加一些随机噪声。geom_edge_elbow

library(tidyverse)
library(tidygraph)
library(ggraph)

mut.df %>%
  select(Mut, AA, Type, Freq) %>%
  mutate(Base = paste0(Mut, '_0'), .after = 'Mut') %>%
  as_tbl_graph() %>%
  mutate(AA = rep(mut.df$AA, 2),
         Freq = c(mut.df$Freq, rep(0.5, nrow(mut.df))),
         Type = c(mut.df$Type, rep(NA, nrow(mut.df)))) %>%
  mutate(dist = sapply(AA, \(x) min(abs(x - mut.df$AA[!mut.df$AA %in% x])))) %>%
  mutate(AA = ifelse(!is.na(Type) & dist < 20, 
                     runif(n(), -50, 50), 0) + AA) %>%
  ggraph(layout = 'manual', x = AA, y = Freq) +
  geom_rect(data = subset(domain.df, Type == "str"),
            mapping = aes(xmin = min(Start), xmax = max(End), 
                          ymin = 0.3, ymax = 0.7),
            fill = str.fill,
            colour = str.col) +
  geom_rect(data = subset(domain.df, Type == "dom"),
            mapping = aes(xmin = Start, xmax = End, ymin = 0.2, 
                          ymax = 0.8, fill = Feature, group = Feature),
            fill = dom.fill[subset(domain.df, Type == "dom")$Feature],
            colour = dom.col) +
  geom_edge_elbow(aes(direction = 1), strength = 0.5) +
  geom_node_point(shape = 21, aes(fill = Type, size = Type)) +
  geom_node_text(aes(label = ifelse(is.na(Type), '', name)), 
                 angle = 90, hjust = -0.3) +
  scale_size_manual(values = rep(3, 3), breaks = unique(mut.df$Type),
                    guide = 'none') +
  scale_fill_discrete(breaks = unique(mut.df$Type)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted")) +
  labs(x = "AA", y = "Freq", fill = "Mutation") +
  ylim(c(0, 5))

字符串

请注意,您可能需要运行代码几次,以获得一个您满意的躲避图,因为它是随机发生的。您可以 * 安排它,以便计算躲避,但这将是一个非常复杂的计算,需要选择性随机化(ggrepel使用的),或者某种形式的聚类/圆 Package 算法。除非我要产生很多这样的图,或者有很多冲突的聚类,我可能会坚持上面所示的“随机选择最好的”策略。

rqdpfwrv

rqdpfwrv2#

我试着在没有任何花哨的聚类的情况下做到这一点,只是做了一点间距。我认为这将适用于稀疏的点集,但一旦突变变得非常密集,可能会成为一个问题。

# function to shift the x-axis coordinates when points are too close
shift.lollipop.x <- function(mut.pos = NULL, total.length = NULL, shift.factor = 0.05){
  
  pos.dif <- 0
  for (i in 1:length(mut.pos)){
    pos.dif <- c(pos.dif, mut.pos[i+1] - mut.pos[i])
  }
  
  idx <- which(pos.dif < shift.factor*total.length)
  
  ## deal with odd and even sets of points
  if (median(idx) %% 1==0){
    mut.pos[idx[idx < median(idx)]] <- mut.pos[idx[idx < median(idx)]] - shift.factor*total.length
    mut.pos[idx[idx > median(idx)]] <- mut.pos[idx[idx > median(idx)]] + shift.factor*total.length
  } else {
    mut.pos[idx[idx == median(idx)-0.5]] <- mut.pos[idx[idx == median(idx)-0.5]] - 0.5*shift.factor*total.length
    mut.pos[idx[idx == median(idx)+0.5]] <- mut.pos[idx[idx == median(idx)+0.5]] + 0.5*shift.factor*total.length
    
    mut.pos[idx[idx < median(idx)-0.5]] <- mut.pos[idx[idx < median(idx)-0.5]] - shift.factor*total.length
    mut.pos[idx[idx > median(idx)+0.5]] <- mut.pos[idx[idx > median(idx)+0.5]] + shift.factor*total.length
  }
  
  mut.pos
  
}

# function to split the segment into 3 parts
shift.lollipop.y <- function(x, start.y = 0.7){
  mod.start <- x - start.y
  
  set1 <- start.y + mod.start/3
  set2 <- set1 + mod.start/3
  
  as.data.frame(cbind(set1,set2))
}

mut.df$Shift.AA <- shift.lollipop.x(mut.df$AA, 650)
mut.df <- cbind(mut.df, shift.lollipop.y(mut.df$Freq, 0.7))

str.fill <- "#E1E1E1"
str.col <- "#16161D"

dom.fill <- c("Dom2" = "#FF0000", "Dom1" = "#AA4BAB")
dom.col <- c("#16161D")

library(ggplot2)
library(ggrepel)

gp <- ggplot() +
  geom_rect(data = subset(domain.df, Type == "str"),
            mapping = aes(xmin = min(Start), xmax = max(End), ymin = 0.3, ymax = 0.7),
            fill = str.fill,
            colour = str.col)

gp <- gp + scale_y_continuous(limits = c(0, 10), breaks = 0:10)

gp <- gp + 
  geom_segment(data = mut.df, 
                        mapping = aes(x = AA, xend = AA, y = 0.7, yend = set1)) +
  geom_segment(data = mut.df, 
               mapping = aes(x = AA, xend = Shift.AA, y = set1, yend = set2)) +

  geom_segment(data = mut.df, 
               mapping = aes(x = Shift.AA, xend = Shift.AA, y = set2, yend = Freq)) +
  
  geom_point(data = mut.df,
             mapping = aes(x = Shift.AA, y = Freq, fill = Type),
             shape = 21,
             size = 2) +
  geom_text_repel(data = mut.df,
                  mapping = aes(x = Shift.AA, y = Freq, label = Mut),
                  bg.colour = "white",
                  seed = 12345,
                  nudge_y = 0.25,
                  angle = 90)

gp <- gp + geom_rect(data = subset(domain.df, Type == "dom"),
                     mapping = aes(xmin = Start, xmax = End, ymin = 0.2, ymax = 0.8, fill = Feature, group = Feature),
                     fill = dom.fill[subset(domain.df, Type == "dom")$Feature],
                     colour = dom.col)

gp <- gp +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted")) +
  labs(x = "AA", y = "Freq", fill = "Mutation")

gp

字符串

相关问题