在下面的数据表中,我试图找到value1
列和value2
列之间的7天滚动斯皮尔曼相关性。
temp_dt = structure(list(date = structure(c(19390L, 19391L, 19394L, 19395L,
19396L, 19397L, 19398L, 19401L, 19402L, 19403L, 19404L, 19405L,
19409L, 19410L, 19411L, 19412L, 19415L, 19416L, 19417L, 19418L,
19419L, 19422L, 19423L, 19424L, 19425L, 19426L, 19429L, 19430L,
19431L, 19432L, 19433L, 19436L, 19437L, 19438L, 19439L, 19440L,
19443L, 19444L, 19445L, 19446L, 19447L, 19450L, 19451L, 19452L,
19453L, 19457L, 19458L, 19459L, 19460L, 19461L), class = c("IDate",
"Date")), close = c(21.34, 21.4, 21.45, 21.37, 21.26, 21.14,
21.33, 21.28, 21.15, 21.03, 21.11, 21.22, 21.47, 21.38, 20.78,
20.54, 20.24, 19.88, 19.87, 19.87, 20.08, 20.09, 20, 20.11, 19.93,
19.91, 19.63, 19.64, 19.73, 19.68, 19.5, 19.2, 19.34, 19.65,
19.9, 20.19, 20.31, 20.33, 20.32, 20.5, 20.78, 20.9, 20.75, 20.94,
21.05, 21.21, 20.87, 20.64, 20.63, 20.69), bop_green = c(0.00302961,
0.0231944, 0.0374391, 0.0390877, 0.03104, 0.0131761, 0.0131656,
0.00135036, -0.0208444, -0.0470593, -0.0502583, -0.0391544, -0.013647,
-0.0106879, -0.0103398, -0.027718, -0.0562916, -0.0885738, -0.101889,
-0.117401, -0.115863, -0.118662, -0.123646, -0.114466, -0.126912,
-0.13905, -0.158688, -0.17043, -0.165913, -0.165853, -0.165351,
-0.181065, -0.181714, -0.160895, -0.126132, -0.0803255, -0.0442461,
-0.0255004, -0.0293242, -0.0132638, 0.0143276, 0.0355189, 0.0398848,
0.0508185, 0.054249, 0.0759373, 0.0894885, 0.0744317, 0.0643457,
0.0695191)), row.names = c(NA, -50L), class = c("data.table",
"data.frame"))
> head(temp_dt, 20)
date value1 value2
1: 2023-03-17 19.50 -0.1653510
2: 2023-03-20 19.20 -0.1810650
3: 2023-03-21 19.34 -0.1817140
4: 2023-03-22 19.65 -0.1608950
5: 2023-03-23 19.90 -0.1261320
6: 2023-03-24 20.19 -0.0803255
7: 2023-03-27 20.31 -0.0442461
8: 2023-03-28 20.33 -0.0255004
9: 2023-03-29 20.32 -0.0293242
10: 2023-03-30 20.50 -0.0132638
11: 2023-03-31 20.78 0.0143276
12: 2023-04-03 20.90 0.0355189
13: 2023-04-04 20.75 0.0398848
14: 2023-04-05 20.94 0.0508185
15: 2023-04-06 21.05 0.0542490
16: 2023-04-10 21.21 0.0759373
17: 2023-04-11 20.87 0.0894885
18: 2023-04-12 20.64 0.0744317
19: 2023-04-13 20.63 0.0643457
20: 2023-04-14 20.69 0.0695191
因为我希望这个操作尽可能快,所以我从post-Fast rolling correlation using data.table中泛化代码。
我尝试实现的代码如下
library(data.table)
library(Rfast)
temp_dt[, c(rep(NA_real_, 6), cor(Rfast::colRanks(matrix(value1[sequence(rep(7, .N-6), 1:(.N-6))], 7)), Rfast::colRanks(matrix(value2[sequence(rep(7, .N-6), 1:(.N-6))], 7))))]
然而,我得到了一个返回443562个条目的向量。返回向量中的值的预期数量不应超过50。任何关于我做错了什么的帮助都将是有帮助的。这一定是一个愚蠢的错误。
我在下面的代码中尝试了另一种方法,它也给出了错误-Error in (function (x, y = NULL, use = "everything", method = c("pearson", : incompatible dimensions
temp_dt[, corr := frollapply(x = value1, n = 7, cor, fill = NA, align = "right", method = "spearman", y = value2)]
1条答案
按热度按时间pkwftd7m1#
您需要变量对之间的相关性(
colRanks
返回的两个矩阵的每个相应列都是一对变量)。使用Rfast::corpairs
: