R语言 为什么我的(ggplot2)图值与数据集中的值不同?

r9f1avp5  于 2023-03-20  发布在  其他
关注(0)|答案(2)|浏览(338)

我在纠结一件事:我的ggplot图值与我的数据不匹配。有什么想法吗?下面是我的代码:

# data
gar <– structure(list(Day = c("22-02-2023", "22-02-2023", "22-02-2023", 
"23-02-2023", "23-02-2023", "23-02-2023", "24-02-2023", "24-02-2023", 
"24-02-2023", "24-02-2023", "27-02-2023", "27-02-2023", "28-02-2023", 
"28-02-2023", "28-02-2023", "28-02-2023", "28-02-2023", "28-02-2023", 
"22-02-2023", "22-02-2023", "22-02-2023", "23-02-2023", "23-02-2023", 
"24-02-2023", "24-02-2023", "24-02-2023", "27-02-2023", "27-02-2023", 
"28-02-2023", "28-02-2023", "28-02-2023", "22-02-2023", "22-02-2023", 
"22-02-2023", "22-02-2023", "23-02-2023", "23-02-2023", "24-02-2023", 
"24-02-2023", "24-02-2023", "27-02-2023", "27-02-2023", "28-02-2023", 
"28-02-2023", "28-02-2023", "28-02-2023", "28-02-2023", "22-02-2023", 
"23-02-2023", "24-02-2023", "24-02-2023", "27-02-2023", "28-02-2023"
), Behaviour = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Red", 
"Amber", "Green", "Asleep"), class = "factor"), Start = c("12:00", 
"17:00", "21:00", "09:00", "18:00", "21:00", "09:00", "14:00", 
"18:00", "20:00", "10:00", "18:00", "12:00", "17:00", "19:00", 
"00:00", "02:00", "04:00", "09:00", "15:00", "00:00", "06:00", 
"22:00", "06:00", "16:00", "22:00", "06:00", "21:00", "06:00", 
"15:00", "22:00", "07:00", "11:00", "16:00", "20:00", "08:00", 
"20:00", "07:00", "15:00", "17:00", "09:00", "23:00", "07:00", 
"11:00", "16:00", "18:00", "20:00", "13:00", "17:00", "11:00", 
"19:00", "16:00", "08:00"), End = c("15:00", "20:00", "00:00", 
"17:00", "20:00", "22:00", "11:00", "15:00", "19:00", "22:00", 
"16:00", "21:00", "15:00", "18:00", "20:00", "01:00", "03:00", 
"05:00", "11:00", "16:00", "06:00", "08:00", "06:00", "07:00", 
"17:00", "06:00", "09:00", "06:00", "07:00", "16:00", "06:00", 
"09:00", "12:00", "17:00", "21:00", "09:00", "21:00", "09:00", 
"16:00", "18:00", "10:00", "00:00", "08:00", "12:00", "17:00", 
"19:00", "22:00", "14:00", "19:00", "14:00", "20:00", "18:00", 
"11:00")), row.names = c(NA, -53L), class = c("tbl_df", "tbl", 
"data.frame"))

# convert the column to POSIXlt & date format
gar$Start <- as.POSIXlt(gar$Start)
gar$End <- as.POSIXlt(gar$End)
gar$Day <- as.Date(gar$Day)

# convert Behaviour as factor
gar$Behaviour <- as.factor(gar$Behaviour)

# extract the time component using the format "%H:%M" & "%d-%m-%Y"
gar$Start <- format(gar$Start, "%H:%M")
gar$End <- format(gar$End, "%H:%M")
gar$Day <- format(gar$Day, "%d-%m-%Y")

# arrange order of variables
gar <- gar %>%
  arrange(Behaviour) %>%
  mutate(Behaviour = factor(Behaviour, 
                       levels=c("Red",
                                "Amber",
                                "Green",
                                "Asleep"))) 

# plot
ggplot(gar, aes(colour=Behaviour)) +
  geom_rect(aes(xmin = Start, xmax = End, ymin = Behaviour, ymax = Behaviour)) +
  facet_wrap(~Day, ncol = 1) +
  geom_segment(aes(x = Start, 
                   xend = End, 
                   y = Behaviour,
                   yend = Behaviour), size=6) +
  xlab("Duration") +
  scale_colour_manual(values = c(Red = "red", 
                                 Green = "#99CC33", 
                                 Amber = "#FFCC00", 
                                 Asleep = "gray" )) +
  theme_minimal() +
  xlab("\n\nDuration") + ylab("Behaviour\n\n")

然后,我得到了所附的图像。

不过,大家会看到,2023年2月22日的琥珀色数值并不是数据指定的数值,其他几天的其他品类也是完全一样的情况......
任何帮助将不胜感激,谢谢!
详情如上所述。

qltillow

qltillow1#

您的问题是您有一些End时间是00:00。但是由于您将时间绘制为字符变量,00:00位于图的左手。第三个Amber条目从21:00开始,然后一直追溯到同一天的00:00。
我通常建议坚持使用实际的日期时间来绘制日期时间,但是这里明显的修正方法是将00:00到24:00之间的任何End值更改为

gar %>%
  mutate(End = ifelse(End == "00:00", "24:00", End)) %>%
  ggplot(aes(colour=Behaviour)) +
  geom_rect(aes(xmin = Start, xmax = End, ymin = Behaviour, 
                ymax = Behaviour)) +
  facet_wrap(~Day, ncol = 1) +
  geom_segment(aes(x = Start, 
                   xend = End, 
                   y = Behaviour,
                   yend = Behaviour), size=6) +
  xlab("Duration") +
  scale_colour_manual(values = c(Red = "red", 
                                 Green = "#99CC33", 
                                 Amber = "#FFCC00", 
                                 Asleep = "gray" )) +
  theme_minimal() +
  xlab("\n\nDuration") + 
  ylab("Behaviour\n\n")

最好将时间转换为整数小时,并在x轴上使用自定义标签函数:

gar %>%
  mutate(End = as.numeric(substr(End, 1, 2)),
         Start = as.numeric(substr(Start, 1, 2)),
         End = ifelse(End == 0, 24, End)) %>%
  ggplot(aes(colour = Behaviour)) +
  facet_wrap(~Day, ncol = 1) +
  geom_segment(aes(x = Start, xend = End, y = Behaviour,
                   yend = Behaviour), size = 6) +
  scale_x_continuous("Duration", breaks = 0:24,
                     labels = ~sprintf("%02d:00", .x %% 24)) +
  scale_colour_manual(values = c(Red = "red", 
                                 Green = "#99CC33", 
                                 Amber = "#FFCC00", 
                                 Asleep = "gray" )) +
  theme_minimal() +
  theme(panel.grid.minor.x = element_blank())

yzuktlbb

yzuktlbb2#

@AllanCameron的回答是正确的,这个精简你的数据处理管道的建议应该是一个评论,但是有点太长了......(它确实在技术上解决了问题,但是没有给之前的答案增加任何概念性的东西)。

library(tidyverse)
gar <- (gar
    |> mutate(
           across(End,  ~ ifelse(. == "00:00", "24:00", .)),
           across(c(Start, End), readr::parse_time),
           across(Day, ~ readr::parse_date(., format = "%d-%m-%Y")),
           across(Behaviour, ~ factor(., levels=c("Red",
                                                     "Amber",
                                                     "Green",
                                                     "Asleep")))
              )
)

相关问题