R中每小时到半小时的插值

sy5wg1nm  于 2023-01-15  发布在  其他
关注(0)|答案(2)|浏览(298)

我有一个名为“load_demand”的数据框,包含2018年至2022年期间按天分组的每小时电力需求。以下是关于数据框“load_demand”的详细信息:

dput(head(load_demand))
structure(list(Date = structure(c(17532, 17533, 17534, 17535, 
17536, 17537), class = "Date"), HR1 = c(617.3, 611.9, 621.6, 
651.4, 639, 653.9), HR2 = c(589.4, 578.8, 600.3, 622.6, 624.4, 
631.3), HR3 = c(556.1, 569.9, 579.1, 610.6, 611.1, 612.9), HR4 = c(566.3, 
558.8, 580.1, 592, 600, 607.3), HR5 = c(563.4, 573.1, 589.8, 
598.4, 591.6, 608.5), HR6 = c(551.8, 597, 609.2, 624.4, 622, 
601), HR7 = c(523.6, 577.1, 578.5, 605.3, 592.8, 582), HR8 = c(520.7, 
638.5, 647.1, 671.9, 674.8, 606.8), HR9 = c(542.5, 729, 732.3, 
745.7, 760.1, 659.1), HR10 = c(589, 797.4, 796.2, 811.9, 821.4, 
719.7), HR11 = c(617, 815.2, 818.1, 840.7, 845.6, 743.2), HR12 = c(611.3, 
796.8, 792.2, 813.5, 820.7, 723.1), HR13 = c(617.9, 785, 800.6, 
806.1, 810.2, 712.9), HR14 = c(620.7, 822, 840.3, 832.2, 829.8, 
733.4), HR15 = c(624.6, 840.4, 846.6, 854.3, 859.1, 714.9), HR16 = c(631.3, 
833.9, 825.5, 854, 853.9, 702), HR17 = c(632.2, 789.1, 778.1, 
806, 770.1, 694.9), HR18 = c(689.1, 776.8, 794.6, 788.4, 793.9, 
723.4), HR19 = c(758.3, 831.2, 843, 848, 836.6, 785.7), HR20 = c(756.6, 
799.6, 831.5, 826.1, 819.2, 763.2), HR21 = c(744.7, 781.2, 812.3, 
807.1, 784.6, 736.8), HR22 = c(713.7, 734.2, 764.4, 761.5, 748.2, 
677.6), HR23 = c(686.1, 713.6, 732.9, 720.1, 730.6, 673.9), HR24 = c(637.8, 
657.2, 688.9, 676.8, 676.7, 643)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

我想将列2:25中的数据(从名为“HR1”的列到名为“HR24”的列)从每小时到每半小时的数据插入名为halfhourly_load的新数据框中,该数据框保留第一列“Date”并创建48个负载列,其中从2:49开始的每列都被命名为

c("HR0030", "HR0100", "HR0130", "HR0200", "HR0230", "HR0300", 
"HR0330", "HR0400", "HR0430", "HR0500", "HR0530", "HR0600", "HR0630", 
"HR0700", "HR0730", "HR0800", "HR0830", "HR0900", "HR0930", "HR1000", 
"HR1030", "HR1100", "HR1130", "HR1200", "HR1230", "HR1300", "HR1330", 
"HR1400", "HR1430", "HR1500", "HR1530", "HR1600", "HR1630", "HR1700", 
"HR1730", "HR1800", "HR1830", "HR1900", "HR1930", "HR2000", "HR2030", 
"HR2100", "HR2130", "HR2200", "HR2230", "HR2300", "HR2330", "HR2400")

“HR”是一个24小时时钟,例如“HR1”等于01:00AM,“HR24”是记录的特定负载的00:00AM。因此,在新的 Dataframe 中,“HR0030”表示00:30am,这是前一天的“HR24”和当天的“HR1”的插值。

ubof19bj

ubof19bj1#

这里有一个方法,使用基函数approx线性插值数据集的每个行向量,忽略日期列,只有小时列是重要的。
"HR0030"的值需要外推。计算向量前两个值的斜率和y截距,因为前半小时超出x范围,并且approx仅进行内插。

finterp <- function(y, x, col_names = new_col_names) {
  out <- approx(x, y, xout = x - 0.5)
  m <- diff(y[1:2])/diff(x[1:2])
  b <- y[1] - m*x[1]
  out$y[1] <- m*0.5 + b
  ynew <- numeric(length(col_names))
  ynew[c(FALSE, TRUE)] <- y
  ynew[c(TRUE, FALSE)] <- out$y
  setNames(ynew, col_names)
}

i_cols <- grep("H", names(load_demand))
halfhourly_load <- t(apply(load_demand[i_cols], 1, finterp, x = seq_along(i_cols)))

halfhourly_load <- cbind(load_demand["Date"], halfhourly_load)

创建于2023年1月12日,使用reprex v2.0.2

编辑

虽然这里没有要求,但是有一个函数使用行向量对小时的线性回归来预测半小时值。
就像上面一样,该函数对每一行执行apply运算,以获得新的半小时值。

flm <- function(y, x, col_names = new_col_names) {
  fit <- lm(y ~ x, data.frame(x, y))
  ypred <- predict(fit, data.frame(x = x - 0.5))
  ynew <- numeric(length(col_names))
  ynew[c(FALSE, TRUE)] <- y
  ynew[c(TRUE, FALSE)] <- ypred
  setNames(ynew, col_names)
}

i_cols <- grep("H", names(load_demand))
halfhourly_load_lm <- t(apply(load_demand[i_cols], 1, flm, x = seq_along(i_cols)))
halfhourly_load_lm <- cbind(load_demand["Date"], halfhourly_load_lm)

创建于2023年1月12日,使用reprex v2.0.2

编辑

上面的代码中缺少新的列名向量。

new_col_names <- c("HR0030", "HR0100", "HR0130", "HR0200", "HR0230", "HR0300", 
                   "HR0330", "HR0400", "HR0430", "HR0500", "HR0530", "HR0600", "HR0630", 
                   "HR0700", "HR0730", "HR0800", "HR0830", "HR0900", "HR0930", "HR1000", 
                   "HR1030", "HR1100", "HR1130", "HR1200", "HR1230", "HR1300", "HR1330", 
                   "HR1400", "HR1430", "HR1500", "HR1530", "HR1600", "HR1630", "HR1700", 
                   "HR1730", "HR1800", "HR1830", "HR1900", "HR1930", "HR2000", "HR2030", 
                   "HR2100", "HR2130", "HR2200", "HR2230", "HR2300", "HR2330", "HR2400")

创建于2023年1月12日,使用reprex v2.0.2

jm2pwxwz

jm2pwxwz2#

我认为这应该比Rui的解决方案更快,因为它避免了循环。而且我们的0030时间是不同的,因为我在00240010之间插值。

l <- as.matrix(load_demand[-1])
o <- as.numeric(t(l))
s <- c(o[1], o[-length(o)])
i <- matrix((o+s)/2, ncol=24, byrow=TRUE)
n <- rbind(i, l)
dim(n) <- dim(n)*c(0.5, 2)

hr <- head(sprintf("HR%02d%02d", rep(0:24, each=2), c(0, 30))[-1], -1)
colnames(n) <- hr

load_demand_i <- data.frame(load_demand[,1], n)    
load_demand_i[1:6, 1:9]

#         Date HR0030 HR0100 HR0130 HR0200 HR0230 HR0300 HR0330 HR0400
# 1 2018-01-01 617.30  617.3 603.35  589.4 572.75  556.1 561.20  566.3
# 2 2018-01-02 624.85  611.9 595.35  578.8 574.35  569.9 564.35  558.8
# 3 2018-01-03 639.40  621.6 610.95  600.3 589.70  579.1 579.60  580.1
# 4 2018-01-04 670.15  651.4 637.00  622.6 616.60  610.6 601.30  592.0
# 5 2018-01-05 657.90  639.0 631.70  624.4 617.75  611.1 605.55  600.0
# 6 2018-01-06 665.30  653.9 642.60  631.3 622.10  612.9 610.10  607.3

相关问题