如何改进R中for循环中的简单减法?

njthzxwz  于 2023-01-22  发布在  其他
关注(0)|答案(2)|浏览(113)

我想从一个矩阵(S_t)的每一行中减去一个向量(S_0),不幸的是,由于行数是100万,计算我的for循环花费了很多时间。

i <- 1
n <- 1000000

X_t <- data.frame(matrix(0, nrow = n, ncol = 10))

for (i in i:n) {
  X_t[i,] <- S_t[i, ] - S_0 
}

S_0是长度为10的向量
S_t是包含来自先前计算的值的维度为N × 10的 Dataframe
我的第一个想法是把S_0转换成一个n x 10维的矩阵(那么所有的行都是相同的)。也许从一个矩阵中减去一个矩阵会更快?不幸的是,如果不使用另一个for循环,我就找不到有效的方法。
此外,我尝试了以下方法:
data.frame(matrix(S_0, nrow = n, ncol = 10))
但是输出并不是我所期望的,因为每行中数字的顺序都混在一起了。

js4nwp54

js4nwp541#

您可以使用col转置矢量并保持S_t的类型

X_t <- S_t - S_0[col(S_t)]
S_0 <- 1:10
S_t <- data.frame(matrix(0, nrow = 5, ncol = 10))

X_t <- S_t - S_0[col(S_t)]

X_t
#  X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
#1 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
#2 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
#3 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
#4 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
#5 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10

str(X_t)
#'data.frame':   5 obs. of  10 variables:
# $ X1 : num  -1 -1 -1 -1 -1
# $ X2 : num  -2 -2 -2 -2 -2
# $ X3 : num  -3 -3 -3 -3 -3
# $ X4 : num  -4 -4 -4 -4 -4
# $ X5 : num  -5 -5 -5 -5 -5
# $ X6 : num  -6 -6 -6 -6 -6
# $ X7 : num  -7 -7 -7 -7 -7
# $ X8 : num  -8 -8 -8 -8 -8
# $ X9 : num  -9 -9 -9 -9 -9
# $ X10: num  -10 -10 -10 -10 -10
S_t <- matrix(0, nrow = 5, ncol = 10)
X_t <- S_t - S_0[col(S_t)]
str(X_t)
# num [1:5, 1:10] -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 ...

另一个选择是使用sweep,也保留类型。

sweep(S_t, 2, S_0)

另一个选项是使用Map

Map(`-`, S_t, S_0) #Returning list

list2DF(Map(`-`, S_t, S_0)) #Returning data.frame

do.call(cbind, Map(`-`, S_t, S_0)) #Returning matrix

基准(感谢@Maël提供模板)

S_t <- data.frame(matrix(0, nrow = 1000000, ncol = 10))
S_0 <- 1:10

bench::mark(check = FALSE, iterations = 10,
tt = t(t(S_t) - S_0),
col = S_t - S_0[col(S_t)], 
sweep = sweep(S_t, 2, S_0),
Map = Map(`-`, S_t, S_0),
MapDf = list2DF(Map(`-`, S_t, S_0)),
MapMatr = do.call(cbind, Map(`-`, S_t, S_0))
)
#  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
#1 tt          96.1ms  98.6ms      9.92   228.9MB     4.96    10     5      1.01s
#2 col        282.9ms 310.4ms      3.28   509.5MB     7.55    10    23      3.04s
#3 sweep      312.9ms 314.1ms      3.18   547.6MB     6.36    10    20      3.15s
#4 Map           13ms    19ms     53.6     76.3MB    26.8     10     5   186.54ms
#5 MapDf         13ms  20.3ms     48.0     76.3MB    14.4     10     3    208.5ms
#6 MapMatr     23.2ms    25ms     37.8    152.6MB    26.4     10     7   264.76ms
xdnvmnnf

xdnvmnnf2#

可以使用t两次:

S_t <- data.frame(matrix(0, nrow = 1000000, ncol = 10))
S_0 <- 1:10

X_t <- t(t(S_t) - S_0)

# > head(X_t)
#      X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
# [1,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
# [2,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
# [3,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
# [4,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
# [5,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
# [6,] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10

基准测试:t速度最快

bench::mark(t(t(S_t) - S_0),
            S_t - S_0[col(S_t)], 
            sweep(S_t, 2, S_0),
            check = FALSE, iterations = 10)
#  expression               min  median itr/s…¹ mem_a…² gc/se…³ n_itr
#1 t(t(S_t) - S_0)        211ms   321ms    3.10   229MB    2.17    10
#2 S_t - S_0[col(S_t)]    691ms   874ms    1.13   509MB    1.82    10
#3 sweep(S_t, 2, S_0)     638ms   735ms    1.34   548MB    2.54    10

相关问题