我需要计算几年数据的平均住院时间,取决于患者人口统计学(CPL状态和人口普查或出院状态)。我已经计算了这些的平均值,包括偶然事件,并将它们分配为值,以便我记住它们是什么。我感兴趣的是将值列表编译成一个表,这样我就可以将整个内容导出到Excel中。
这是我的资料
#2014 average
#census#
y2014_cen_all <- mean(inp[(inp$year == 2014),]$los_years)
y2014_cen_cpl <- mean(inp[(inp$year == 2014 & inp$cpl_stat == "cpl"),]$los_years)
y2014_cen_non <- mean(inp[(inp$year == 2014 & inp$cpl_stat == "non"),]$los_years)
#discharge
y2014_dis_all <- mean(inp[(inp$year == 2014 & inp$census_discharge == "disch"),]$los_years)
y2014_dis_all_count <- nrow(inp[inp$year == 2014 & inp$census_discharge == "disch",])
y2014_dis_cpl <- mean(inp[(inp$year == 2014 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch"),]$los_years)
y2014_dis_cpl_count <- nrow(inp[inp$year == 2014 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch",])
y2014_dis_non <- mean(inp[(inp$year == 2014 & inp$cpl_stat == "non" & inp$census_discharge == "disch"),]$los_years)
y2014_dis_non_count <- nrow(inp[inp$year == 2014 & inp$cpl_stat == "non" & inp$census_discharge == "disch",])
#2015 average
#census#
y2015_cen_all <- mean(inp[(inp$year == 2015),]$los_years)
y2015_cen_cpl <- mean(inp[(inp$year == 2015 & inp$cpl_stat == "cpl"),]$los_years)
y2015_cen_non <- mean(inp[(inp$year == 2015 & inp$cpl_stat == "non"),]$los_years)
#discharge
y2015_dis_all <- mean( inp[(inp$year == 2015 & inp$census_discharge == "disch"),]$los_years)
y2015_dis_all_count <- nrow(inp[inp$year == 2015 & inp$census_discharge == "disch",])
y2015_dis_cpl <- mean( inp[(inp$year == 2015 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch"),]$los_years)
y2015_dis_cpl_count <- nrow(inp[inp$year == 2015 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch",])
y2015_dis_non <- mean( inp[(inp$year == 2015 & inp$cpl_stat == "non" & inp$census_discharge == "disch"),]$los_years)
y2015_dis_non_count <- nrow(inp[inp$year == 2015 & inp$cpl_stat == "non" & inp$census_discharge == "disch",])
#2016 average
#census#
y2016_cen_all <- mean(inp[(inp$year == 2016),]$los_years)
y2016_cen_cpl <- mean(inp[(inp$year == 2016 & inp$cpl_stat == "cpl"),]$los_years)
y2016_cen_non <- mean(inp[(inp$year == 2016 & inp$cpl_stat == "non"),]$los_years)
#discharge
y2016_dis_all <- mean( inp[(inp$year == 2016 & inp$census_discharge == "disch"),]$los_years)
y2016_dis_all_count <- nrow(inp[inp$year == 2016 & inp$census_discharge == "disch",])
y2016_dis_cpl <- mean( inp[(inp$year == 2016 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch"),]$los_years)
y2016_dis_cpl_count <- nrow(inp[inp$year == 2016 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch",])
y2016_dis_non <- mean( inp[(inp$year == 2016 & inp$cpl_stat == "non" & inp$census_discharge == "disch"),]$los_years)
y2016_dis_non_count <- nrow(inp[inp$year == 2016 & inp$cpl_stat == "non" & inp$census_discharge == "disch",])
#2017 average
#census#
y2017_cen_all <- mean(inp[(inp$year == 2017),]$los_years)
y2017_cen_cpl <- mean(inp[(inp$year == 2017 & inp$cpl_stat == "cpl"),]$los_years)
y2017_cen_non <- mean(inp[(inp$year == 2017 & inp$cpl_stat == "non"),]$los_years)
#discharge
y2017_dis_all <- mean( inp[(inp$year == 2017 & inp$census_discharge == "disch"),]$los_years)
y2017_dis_all_count <- nrow(inp[inp$year == 2017 & inp$census_discharge == "disch",])
y2017_dis_cpl <- mean( inp[(inp$year == 2017 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch"),]$los_years)
y2017_dis_cpl_count <- nrow(inp[inp$year == 2017 & inp$cpl_stat == "cpl" & inp$census_discharge == "disch",])
y2017_dis_non <- mean( inp[(inp$year == 2017 & inp$cpl_stat == "non" & inp$census_discharge == "disch"),]$los_years)
y2017_dis_non_count <- nrow(inp[inp$year == 2017 & inp$cpl_stat == "non" & inp$census_discharge == "disch",])
它们都储存在环境中。
我如何创建一个看起来像这样的表:
| 年份|总平均值|CPL平均值|总计,不包括CPL平均值|
| - -----|- -----|- -----|- -----|
| 二○一四年|平均|平均|平均|
| 2015年|平均|平均|平均|
| 2016年|平均|平均|平均|
| 2017年|平均|平均|平均|
编辑
我只使用一个数据集,这里是一个例子:
下面是运行上述代码时的环境截图:
以下是dput(head(inp, n = 10))
的输出,删除了敏感信息:
#> dput(head(inp, n = 10))
structure(list(unit = c("065:Ward 65 Elder Rehab Medical Unit",
"065:Ward 65 Elder Rehab Medical Unit", "065:Ward 65 Elder Rehab Medical Unit",
"065:Ward 65 Elder Rehab Medical Unit", "065:Ward 65 Elder Rehab Medical Unit",
"065:Ward 65 Elder Rehab Medical Unit", "065:Ward 65 Elder Rehab Medical Unit",
"065:Ward 65 Elder Rehab Medical Unit", "065:Ward 65 Elder Rehab Medical Unit",
"065:Ward 65 Elder Rehab Medical Unit"),
admission_date = structure(c(1329091200, 1349222400, 1253059200,
1403568000, 1419292800, 1194393600, 1265241600, 1021420800,
777859200, 1357603200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), discharge_date = c(".", ".", ".", ".", ".", ".",
".", ".", ".", "."), discharge_month = c(".", ".", ".", ".",
".", ".", ".", ".", ".", "."), disposition = c(".", ".",
".", ".", ".", ".", ".", ".", ".", "."), los_days = c(1053,
820, 1933, 191, 9, 2612, 1792, 4614, 7433, 723), los_years = c(2.88493150684932,
2.24657534246575, 5.2958904109589, 0.523287671232877, 0.024657534246575,
7.15616438356164, 4.90958904109589, 12.641095890411, 20.3643835616438,
1.98082191780822), gender = c("Male", "Female", "Male", "Male",
"Female", "Male", "Male", "Female", "Female", "Male"), legal_status = c(21,
32, 32, 32, 20, 21, 32, 32, 32, 32), cpl_stat = c("non",
"non", "non", "non", "non", "non", "non", "non", "non", "non"
), prin_diag = c("Schizophrenia, Undifferentiated Type",
"Schizophrenia", "Schizophrenia", "Schizoaffective Disorder",
"Schizoaffective disorder, Bipolar type", "Schizoaffective disorder, Bipolar type",
"Schizophrenia, Paranoid Type", "Schizophrenia, Undifferentiated Type",
"Schizoaffective disorder, Bipolar type", "Schizophrenia, Paranoid Type"
), year = c(2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014), census_discharge = c("cen", "cen", "cen", "cen",
"cen", "cen", "cen", "cen", "cen", "cen"), inp_res = c("inp",
"inp", "inp", "inp", "inp", "inp", "inp", "inp", "inp", "inp"
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
1条答案
按热度按时间xytpbqjk1#
这里最好的办法是保留原始数据集(
inp
),并通过对inp
进行分组和整形来实现摘要。“整洁”表
开始加载您的资源...
...然后应用此工作流:
给定一个
inp
ut,就像你的例子一样这将产生一个整洁的
out
put,其中**year
与census_discharge
和cpl_stat
的每个组合都有一行**。avg_*
和n_*
)。**例如,考虑avg_*
度量:avg_year
冗余地表现为循环year
的平均值;然后avg_year_cendis
冗余地出现为year
和census_discharge
状态的重复组合的平均值;最后avg_year_cendis_cpl
只出现一次,作为year
与census_discharge
状态和cpl_stat
us的 * 唯一 * 组合的平均值。注意事项
完全巧合的是,度量值(
avg_*
和n_*
)在所有级别的分组(_year
与_cendis
和_cpl
)中都是相同的(5.80
和10
)。这是因为样本数据(inp
)只有一个分组的数据:2014
是唯一的year
,"cen"
是唯一的census_discharge
,"non"
是唯一的cpl_stat
。* * 对于 * 完整 *inp
数据集,这些度量值会有所不同,因为它们应该有所不同。**“宽”表
要实现所需的形式,即每个组合都有一个 * 列 *(而不是一个 * 行 *),必须应用进一步的步骤。首先加载附加资源
...然后应用此工作流来调整数据。
如前所述,给定一个
inp
,这应该会产生以下输出:census_discharge
status和cpl_stat
us的每个组合都显示一个单独的度量值列。**例如,对于非CPL的人口普查患者,*_avg
标记为cen_non_avg
。*_all_avg
还用于标记所有CPL状态的总数。最后,请注意
total_*
列如何描述全年所有状态的度量。注意事项
sd()
引入标准差,可以在第一个工作流中将sd_year = sd(los_years)
添加到avg_year = mean(los_years)
的正下方;对于sd_year_cendis
和sd_year_cendis_cpl
等等。inp
数据集。**使用完整的数据集,我们希望看到更多列,如dis_cpl_avg
:显然,你可以把它简化为平均值
通过简单地从第一个工作流中省略
n_* = ...
行。你也可以在最后做同样的事情,比如... %>% select(!ends_with("_n"))
。* * 我建议只在最后排除度量值,这样您就不会丢失任何信息:你可以选择你想要的东西**