name y x intercept
0 a 13.7 7.8 1
1 a -14.7 -9.7 1
2 a -3.4 -0.6 1
3 a 7.4 3.3 1
4 a -5.3 -1.9 1
5 a -8.3 -2.3 1
6 a 8.9 3.7 1
7 a 10.0 7.9 1
8 a 1.8 -0.4 1
9 a 6.7 3.1 1
10 a 17.4 9.9 1
11 a 8.9 7.7 1
12 a -3.1 -1.5 1
13 a -12.2 -7.9 1
14 a 7.6 4.9 1
15 a 4.2 2.3 1
16 a -15.3 -5.6 1
17 a 9.9 6.7 1
18 a 11.0 5.2 1
19 a 5.7 5.1 1
20 a -0.3 -0.6 1
21 a -15.0 -8.7 1
22 a -10.6 -5.7 1
23 a -16.0 -9.1 1
24 b 16.7 8.5 1
25 b 9.2 8.2 1
26 b 4.7 3.4 1
27 b -16.7 -8.7 1
28 b -4.8 -1.5 1
29 b -2.6 -2.2 1
30 b 16.3 9.5 1
31 b 15.8 9.8 1
32 b -10.8 -7.3 1
33 b -5.4 -3.4 1
34 b -6.0 -1.8 1
35 b 1.9 -0.6 1
36 b 6.3 6.1 1
37 b -14.7 -8.0 1
38 b -16.1 -9.7 1
39 b -10.5 -8.0 1
40 b 4.9 1.0 1
41 b 11.1 4.5 1
42 b -14.8 -8.5 1
43 b -0.2 -2.8 1
44 b 6.3 1.7 1
45 b -14.1 -8.7 1
46 b 13.8 8.9 1
47 b -6.2 -3.0 1
from statsmodels.regression.rolling import RollingOLS
from statsmodels.tools.tools import add_constant
import statsmodels.api as sm
import pandas as pd
import numpy as np
data = sm.datasets.grunfeld.load()
df_grunfeld = pd.DataFrame(data.data)
df_grunfeld.set_index(['firm'], append=True, inplace=True)
# Simple Model
# $$invest = \beta_0 + \beta_1 value$$
def invest_params(df_gf, intercept=False):
"""
Function to operate on the data of a single firm.
Assumes df_gf has the columns 'invest' and 'value' available.
Returns a dataframe containing model parameters
"""
# we should have at least k + 1 observations
min_obs = 3 if intercept else 2
wndw = 8
# if there are less than min_obs rows in df_gf, RollingOLS will throw an error
# Instead, handle this case separately
if df_gf.shape[0] < min_obs:
cols = ['coef_intercept', 'coef_value'] if intercept else ['coef_value']
return pd.DataFrame(index=df_gf.index, columns=cols)
y = df_gf['invest']
x = add_constant(df_gf['value']) if intercept else df_gf['value']
model = RollingOLS(y, x, expanding=True, min_nobs=min_obs, window=wndw).fit()
parameters = model.params
params_shifted = model.params.shift(1)
mse = model.mse_resid
parameters['invest_hat'] = (parameters.mul(add_constant(df_gf['value']), axis=0)\
.sum(axis=1, min_count=1)).to_frame('invest_hat')
parameters['invest_hat_shift'] = (params_shifted.mul(add_constant(df_gf['value']), axis=0)\
.sum(axis=1, min_count=1)).to_frame('invest_hat_shift')
parameters['mse'] = mse
parameters['rmse'] = np.sqrt(mse)
parameters['nobs'] = model.nobs
parameters['ssr'] = model.ssr
parameters['t_const'] = model.tvalues['const']
parameters['t_value'] = model.tvalues['value']
parameters.rename(columns = {'const' : 'b0', 'value' : 'b1'}, inplace = True)
parameters['r2_adj'] = model.rsquared_adj
return parameters
grouped = df_grunfeld.groupby('firm')
df_params = grouped.apply(lambda x: invest_params(x, True))
df_grunfeld_output = df_grunfeld.join(df_params, rsuffix='_coef')
2条答案
按热度按时间t8e9dugd1#
在
statsmodels
中有滚动OLS。样本代码:
样本数据:或可从以下网址下载:https://www.dropbox.com/s/zhklsg5cmfksufm/sample_rolling_regression_OLS.csv?dl=0
ttp71kqs2#
下面是一个使用statmodels的RollingOLS的工作示例,灵感来自answer to this question on Rolling OLS Regressions and Predictions by Group。
这可以很容易地为您的面板数据进行修改,以执行滚动窗口回归。
model.params
将为您提供系数,特别是每月截距基金明智的输出。