R语言 GGPlot2:错误:美学必须为长度1或与数据相同(16):x,y,组

insrf1ej  于 2023-11-14  发布在  其他
关注(0)|答案(2)|浏览(189)

下面是我认为是一个简单的线图的代码

ggplot(data=top15andAllDatasummary.df, aes(x=years, y=calculations, group=1)) +
    geom_line() +
    geom_point()

字符串
我得到了这个错误:
错误:美学必须为长度1或与数据(16)相同:x,y,组
我把数据放在r的一个矩阵中,X轴是年,Y轴是我每年构造的一些计算(16个)。
编辑添加

structure(list(`2001` = c(349.315750645518, 217.47436370343, 
5.17963850977499, 126.661748432313, 57, 39), `2002` = c(703.26693877551, 
429.92, 9.32897959183673, 264.017959183673, 161, 108), `2003` = c(314.897774687065, 
193.792420027816, 4.08936022253129, 117.015994436718, 54, 37), 
    `2004` = c(305.988451086957, 190.680027173913, 3.87839673913043, 
    111.430027173913, 55, 38), `2005` = c(118.528015659408, 74.3175923660387, 
    1.50942011255199, 42.7010031808172, 10, 8), `2006` = c(120.531992244304, 
    73.8279205041202, 1.54362578768783, 45.1604459524964, 10, 
    8), `2007` = c(113.973899988451, 69.7619817530893, 1.44693382607691, 
    42.7649844092851, 10, 8), `2008` = c(110.676242590059, 67.3693570451436, 
    1.36285909712722, 41.9440264477884, 9, 7), `2009` = c(101.965558714192, 
    63.1446534003936, 1.22982724688388, 37.5910780669145, 9, 
    7), `2010` = c(93.9744360902256, 59.8894736842105, 1.14199785177229, 
    32.9429645542427, 9, 7), `2011` = c(91.8911316298046, 58.5660296328108, 
    1.15675327464033, 32.1683487223534, 9, 7), `2012` = c(91.2302181013592, 
    58.598356337583, 1.16773785691708, 31.4641239068591, 8, 6
    ), `2013` = c(87.1390443392165, 55.0509040034438, 1.10277658200603, 
    30.9853637537667, 8, 6), `2014` = c(85.7812132234942, 56.0456831068792, 
    1.09725045469134, 28.6382796619236, 8, 6), `2015` = c(88.331452900479, 
    58.526237360298, 1.22362959020756, 28.5815859499734, 8, 6
    )), .Names = c("2001", "2002", "2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), row.names = c("AllDataMeanByYear", "AllDataMeanAggAssault", 
"AllDataMeanMurderManSlaughter", "AllDataMeanRobbery", "AllDataMedianByYear", 
"AllDataMedianAggAssault"), class = "data.frame")

All Code:

 ## Total
lwdata$total <- lwdata$murdermanslaughter + lwdata$Robbery +    lwdata$Aggravated_assault
## Data Calculations Top 15
top15 <- lwdata[lwdata$total >= lwdata$total[order(lwdata$Year, lwdata$total, decreasing=TRUE)][15] , ]
## Top 15 Means
Top15MeanByYear <- tapply(top15$total,top15$Year,mean)
Top15MeanAggAssault <- tapply(top15$Aggravated_assault,top15$Year,mean)
Top15MeanMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,mean)
Top15MeanRob <- tapply(top15$Robbery,top15$Year,mean)
## All Data Means
AllDataMeanByYear <- tapply(lwdata$total,lwdata$Year,mean)
AllDataMeanAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,mean)
AllDataMeanMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,mean)
AllDataMeanRobbery <- tapply(lwdata$Robbery,lwdata$Year,mean)
## Top 15 Medians
Top15MedianByYear <- tapply(top15$total,top15$Year,median)
Top15MedianAggAssault <- tapply(top15$Aggravated_assault,top15$Year,median)
Top15MedianMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,median)
Top15MedianRob <- tapply(top15$Robbery,top15$Year,median)
## All Data Medians
AllDataMedianByYear <- tapply(lwdata$total,lwdata$Year,median)
AllDataMedianAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,median)
AllDataMedianMurderManSlaughter <-  tapply(lwdata$murdermanslaughter,lwdata$Year,median)
AllDataMedianRobbery <- tapply(lwdata$Robbery,lwdata$Year,median)
## Rounding Data To Two Decimal Points
Top15MeanByYear <- round(Top15MeanByYear,digits=2)
Top15MeanAggAssault <- round(Top15MeanAggAssault,digits=2)
Top15MeanMurderManSlaughter <- round(Top15MeanMurderManSlaughter,digits=2)
Top15MeanRob <- round(Top15MeanRob,digits=2)
AllDataMeanByYear <- round(AllDataMeanByYear,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanRobbery <- round(AllDataMeanRobbery,digits=2)
Top15MedianByYear <- round(Top15MedianByYear,digits=2)
Top15MedianAggAssault <- round(Top15MedianAggAssault,digits=2)
Top15MedianMurderManSlaughter <- round(Top15MedianMurderManSlaughter,digits=2)
Top15MedianRob <- round(Top15MedianRob,digits=2)
AllDataMedianByYear <- round(AllDataMedianByYear,digits=2)
AllDataMedianAggAssault <- round(AllDataMedianAggAssault,digits=2)
AllDataMedianMurderManSlaughter <-     round(AllDataMedianMurderManSlaughter,digits=2)
AllDataMedianRobbery <- round(AllDataMedianRobbery,digits=2)
## Summaries
AllDataSummary <- rbind(AllDataMeanByYear, AllDataMeanAggAssault, AllDataMeanMurderManSlaughter, AllDataMeanRobbery, AllDataMedianByYear, AllDataMedianAggAssault, AllDataMedianMurderManSlaughter, AllDataMedianRobbery)
Top15Summary <- rbind(Top15MeanByYear, Top15MeanAggAssault, Top15MeanMurderManSlaughter, Top15MeanRob,Top15MedianByYear,Top15MedianAggAssault,Top15MedianMurderManSlaughter,Top15MedianRob)
Top15andAllDatasummary <- rbind(AllDataSummary,Top15Summary)
## Class of New Items
class(AllDataSummary)
class(Top15Summary)
class(top15andAllDatasummary)
## Converting Matrices to Data Frames
AllDataSummary.df <- as.data.frame(AllDataSummary)
Top15Summary.df <- as.data.frame(Top15Summary)
Top15andAllDatasummary.df <- as.data.frame(Top15andAllDatasummary)
## Checking of New Classes
class(AllDataSummary.df)
class(Top15Summary.df)
class(Top15andAllDatasummary.df)
## Verifications for Names of New Components
colnames(Top15andAllDatasummary.df)
rownames(Top15andAllDatasummary.df)
## New Components
years <- colnames(Top15andAllDatasummary.df)
calculations <- colnames(Top15andAllDatasummary.df)
## Chicago
Chicago <- top15[which(top15$City=="Chicago"), ] 
## Basic Plots
plot(y=Chicago$total, x=Chicago$Year, type="l", xlab = "Year", ylab = "Total       Violent Crime (minus rape)", main="Chicago-Specific Data", col="blue")
## Data Types for Chicago
str(Chicago)


链接到完整的>100K数据集是here

g6ll5ycj

g6ll5ycj1#

您的数据框(我们称之为df)有一列表示每一年,以及每个计算变量的行名称。这是“宽”数据,其中相同的数据类型存储在多个列中。ggplot 用于处理“长”数据,其中每一列包含数据的唯一方面(即变量、年份和数据值的单独列)。
由Hadley威克姆(他也写了 ggplot)编写的 tidyverse 包库可以轻松地将数据从宽到长再转换回来。从 tidyr 1.0开始,这是通过pivot_widerpivot_longer函数(以前分别是spreadgather)完成的。我在下面展示了这两种方法。

library(tidyverse)

# current pivot_longer() implementation:
df.new <- mutate(df, variable = rownames(df)) %>%
    pivot_longer(-variable, names_to = 'year', values_to = 'value')

# deprecated gather() function
df.new <- mutate(df, variable = rownames(df)) %>% 
    gather(year, value, -variable)

                        variable year      value
1              AllDataMeanByYear 2001 349.315751
2          AllDataMeanAggAssault 2001 217.474364
3  AllDataMeanMurderManSlaughter 2001   5.179639
4             AllDataMeanRobbery 2001 126.661748
5            AllDataMedianByYear 2001  57.000000
6        AllDataMedianAggAssault 2001  39.000000
7              AllDataMeanByYear 2002 703.266939
8          AllDataMeanAggAssault 2002 429.920000
9  AllDataMeanMurderManSlaughter 2002   9.328980
10            AllDataMeanRobbery 2002 264.017959
11           AllDataMedianByYear 2002 161.000000
12       AllDataMedianAggAssault 2002 108.000000
13             AllDataMeanByYear 2003 314.897775
14         AllDataMeanAggAssault 2003 193.792420
15 AllDataMeanMurderManSlaughter 2003   4.089360
16            AllDataMeanRobbery 2003 117.015994
17           AllDataMedianByYear 2003  54.000000
18       AllDataMedianAggAssault 2003  37.000000
19             AllDataMeanByYear 2004 305.988451
20         AllDataMeanAggAssault 2004 190.680027
... and 70 more rows

字符串
这个长数据可以被发送到 ggplot。注意,你最初的尝试使用了一个名为“years”的变量,它在数据框中不存在。R(和 ggplot)无法知道你的列名(2001:2015)以某种方式神奇地代表了年份。

plot.years <- ggplot(data = df.new, aes(x = year, y = value, color = variable, group = variable)) +
    geom_line()
print(plot.years)


的数据

lhcgjxsq

lhcgjxsq2#

根据你的数据,我会这样做:

library(tidyr)
top15andAllDatasummary.df$variable = rownames(top15andAllDatasummary.df)
df.long = gather(data = top15andAllDatasummary.df, 
                 key = years, 
                 value = calculations, 
                 -variable)

字符串
这个gather调用的目的是将数据重新构造为以下形式:

head(df.long)
#                        variable years calculations
# 1             AllDataMeanByYear  2001   349.315751
# 2         AllDataMeanAggAssault  2001   217.474364
# 3 AllDataMeanMurderManSlaughter  2001     5.179639
# 4            AllDataMeanRobbery  2001   126.661748
# 5           AllDataMedianByYear  2001    57.000000
# 6       AllDataMedianAggAssault  2001    39.000000


完成后,我们可以继续绘制:

ggplot(data = df.long, aes(x = years, 
                           y = calculations, 
                           group=variable, 
                           color=variable)) +
   geom_line() +
   geom_point()


这是你想要的结果吗?

相关问题