基于this answer,我使用以下代码绘制相关性矩阵,该矩阵仅绘制p〈0.05的数据:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
# Simulate 3 correlated variables
num_samples = 100
mu = np.array([5.0, 0.0, 10.0])
# The desired covariance matrix.
r = np.array([
[ 3.40, -2.75, -2.00],
[ -2.75, 5.50, 1.50],
[ -2.00, 1.50, 1.25]
])
y = np.random.multivariate_normal(mu, r, size=num_samples)
df = pd.DataFrame(y)
df.columns = ["Correlated1","Correlated2","Correlated3"]
# Create two random variables
for i in range(2):
df.loc[:,f"Uncorrelated{i}"] = np.random.randint(-2000,2000,len(df))
def corr_sig(df=None):
p_matrix = np.zeros(shape=(df.shape[1],df.shape[1]))
for col in df.columns:
for col2 in df.drop(col,axis=1).columns:
_ , p = stats.pearsonr(df[col],df[col2])
p_matrix[df.columns.to_list().index(col),df.columns.to_list().index(col2)] = p
return p_matrix
p_values = corr_sig(df)
mask = np.invert(np.tril(p_values<0.05))
def plot_cor_matrix(corr, mask=None):
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr, ax=ax,
mask=mask,
# cosmetics
annot=True,
cmap='coolwarm')
# Plotting with significance filter
corr = df.corr() # get correlation
p_values = corr_sig(df) # get p-Value
mask = np.invert(np.tril(p_values<0.05)) # mask - only get significant corr
plot_cor_matrix(corr,mask)
如何还能过滤掉对角线上的相关性,在对角线上,特征与自身进行比较(即相关性为1)?
1条答案
按热度按时间pes8fvy91#
tril
函数可以取k为kwarg,根据文档:对角线,其上的元素为零。k = 0(默认值)是主对角线,k〈0是主对角线之下,k〉0是主对角线之上。
在您的情况下,您将需要
k=-1
:输出: