def r_confidence_interval(r, alpha, n):
z = r_to_z(r)
se = 1.0 / math.sqrt(n - 3)
z_crit = stats.norm.ppf((1 + alpha)/2) # 2-tailed z critical value
lo = z - z_crit * se
hi = z + z_crit * se
# Return a sequence
return (z_to_r(lo), z_to_r(hi))
import numpy as np
from scipy.optimize import curve_fit
import pandas as pd
from sklearn.metrics import r2_score
x = np.array([your numbers here])
y = np.array([your numbers here])
### define list for R2 values
r2s = []
### define dataframe to append your bootstrapped fits for Y hat ranges
ci_df = pd.DataFrame({'x': x})
### define how many samples you want
how_many_straps = 5000
### define your fit function/s
def func_exponential(x,a,b):
return np.exp(b) * np.exp(a * x)
### fit original, using log because fitting exponential
polyfit_original = np.polyfit(x
,np.log(y)
,1
,# w= could supply weight for observations here)
)
for i in range(how_many_straps+1):
### zip into tuples attaching X to Y, can combine more variables as well
zipped_for_boot = pd.Series(tuple(zip(x,y)))
### sample zipped X & Y pairs above with replacement
zipped_resampled = zipped_for_boot.sample(frac=1,
replace=True)
### creater your sampled X & Y
boot_x = []
boot_y = []
for sample in zipped_resampled:
boot_x.append(sample[0])
boot_y.append(sample[1])
### predict sampled using original fit
y_hat_boot_via_original_fit = func_exponential(np.asarray(boot_x),
polyfit_original[0],
polyfit_original[1])
### calculate r2 and append
r2s.append(r2_score(boot_y, y_hat_boot_via_original_fit))
### fit sampled
polyfit_boot = np.polyfit(boot_x
,np.log(boot_y)
,1
,# w= could supply weight for observations here)
)
### predict original via sampled fit or on a range of min(x) to Z
y_hat_original_via_sampled_fit = func_exponential(x,
polyfit_boot[0],
polyfit_boot[1])
### insert y hat into dataframe for calculating y hat confidence intervals
ci_df["trial_" + str(i)] = y_hat_original_via_sampled_fit
### R2 conf interval
low = round(pd.Series(r2s).quantile([0.025, 0.975]).tolist()[0],3)
up = round(pd.Series(r2s).quantile([0.025, 0.975]).tolist()[1],3)
F"r2 confidence interval = {low} - {up}"
5条答案
按热度按时间gcmastyq1#
根据文献[1],直接用Pearson r来计算置信区间,由于它不是正态分布,因此计算起来比较复杂,需要进行以下步骤:
1.将r转换为z ',
1.计算z'的置信区间。z'的抽样分布近似正态分布,标准误差为1/sqrt(n-3)。
1.将置信区间转换回r。
以下是一些示例代码:
参考编号:
dhxwm5r42#
使用rpy2和心理测试库(您需要安装R,并首先在R中运行install.packages(“心理测试”))
其中,0.9是相关性,n是样本量,0.95是置信水平
93ze6v8z3#
这里有一个解决方案,它使用自举法来计算置信区间,而不是Fisher变换(它假设二元正态性等),借用this answer:
fumotvh34#
bennylp给出的答案大部分是正确的,但在计算第三个函数中的临界值时有一个小误差。
它应该改为:
这里有另一个帖子供参考:Scipy - two tail ppf function for a z value?
p4rjhz4m5#
我知道上面已经建议了自举,下面提出了它的另一种变体,它可能更适合一些其他的设置。
#1对数据进行抽样(成对的X和Y,也可以添加其他权重),拟合原始模型,记录r2,附加它。然后从记录的所有R2的分布中提取置信区间。
#2此外,可以拟合抽样数据,并使用抽样数据模型预测非抽样X (也可以提供连续范围来扩展预测,而不是使用原始X),以获得Y帽的置信区间。
因此,在示例代码中: