pandas 如何在箱线图的不同组的中位数处放置中位数标记

q5lcpyga  于 2023-04-28  发布在  其他
关注(0)|答案(1)|浏览(233)

我用seaborn做了一个分组箱线图。我有两个子图,描述不同类型的数据,为了比较类型(我想保持组的原样),我想在类型1的箱线图上绘制类型2的数据框的中位数,反之亦然。这是我的脚本

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import netCDF4 as nc
sns.set_theme(style='ticks', palette='pastel')

fig = plt.figure(figsize=(15,5))

fig.subplots_adjust(hspace=0.12)
fig.subplots_adjust(wspace=0.15)
fig.subplots_adjust(right=0.98)
fig.subplots_adjust(left=0.12)
fig.subplots_adjust(bottom=0.1)
fig.subplots_adjust(top=0.98)

plt.rcParams['text.usetex'] = False
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['font.size'] = 11
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11

ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

def grouped_boxplot(axis_type1, axis_type2):
    methods = ['m1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7']

    df_model1_type1 = pd.DataFrame()
    df_model1_type2 = pd.DataFrame()
    df_model2_type1 = pd.DataFrame()
    df_model2_type2 = pd.DataFrame()
    df_model3_type1 = pd.DataFrame()
    df_model3_type2 = pd.DataFrame()
    df_model4_type1 = pd.DataFrame()
    df_model4_type2 = pd.DataFrame()

    for m in methods:
        df_model1_type1[m] = np.random.randint(1,101,10)
        df_model1_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model2_type1[m] = np.random.randint(1,101,10)
        df_model2_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model3_type1[m] = np.random.randint(1,101,10)
        df_model3_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model4_type1[m] = np.random.randint(1,101,10)
        df_model4_type2[m] = np.random.randint(1,101,10)

    df_model1_type1 = df_model1_type1.assign(Model='model1')
    df_model1_type2 = df_model1_type2.assign(Model='model1')
    df_model2_type1 = df_model2_type1.assign(Model='model2')
    df_model2_type2 = df_model2_type2.assign(Model='model2')
    df_model3_type1 = df_model3_type1.assign(Model='model3')
    df_model3_type2 = df_model3_type2.assign(Model='model3')
    df_model4_type1 = df_model4_type1.assign(Model='model4')
    df_model4_type2 = df_model4_type2.assign(Model='model4')

    df_type1 = pd.concat([df_model1_type1,df_model2_type1,df_model3_type1,
                          df_model4_type1])
    df_type2 = pd.concat([df_model1_type2,df_model2_type2,df_model3_type2,
                          df_model4_type2])

    df_type1_long = pd.melt(df_type1, 'Model', var_name='Method',
                            value_name='var')
    df_type2_long = pd.melt(df_type2, 'Model', var_name='Method',
                           value_name='var')

    axis_type1 = sns.boxplot(x='Model', hue='Method', y='var',
                             data=df_type1_long, showfliers=False, whis=0,
                             ax=axis_type1)
    axis_type2 = sns.boxplot(x='Model', hue='Method', y='var', data=df_type2_long,
                            showfliers=False, whis=0, ax=axis_type2)

    type1_median = df_type1.median().to_numpy()
    type2_median = df_type2.median().to_numpy()

    for xtick, ytick in zip(axis_type1.get_xticks(), type2_median):
        axis_type1.scatter(xtick, ytick, s=20, marker='*', color='red')

    for xtick, ytick in zip(axis_type2.get_xticks(), type1_median):
        axis_type2.scatter(xtick, ytick, s=20, marker='*', color='red')

    axis_type1.legend([],[], frameon=False)
    axis_type2.legend(loc='lower center', bbox_to_anchor=(-0.2,-0.25), ncol=7)

grouped_boxplot(ax1, ax2)

plt.show()
# plt.savefig('the_ultimate_boxplot.pdf')

我设法把中位数绘制到xtick上的箱线图上。

有没有一种方法,使我可以有一个符号的中位数m1(蓝色箱线图)为模型1的类型2上的m1(蓝色箱线图)为模型1的类型1,中位数m2(橙色箱线图)为模型1的类型2上的m2(橙色箱线图)为模型1的类型1 [...]?

xriantvc

xriantvc1#

sns.pointplot可用于计算和定位中位数。
示例代码对pointplot使用以下参数:

  • dodge=.8 - .8 / len(methods)dodge按色调分隔点。点图和箱线图的默认减淡宽度不同。参见this github issue
  • linestyles='':不在点之间画线
  • markers='D':使用菱形marker
  • color='black':标记的颜色(默认颜色来自hue
  • estimator=np.median:计算y值的中位数;请注意,这些点与箱形图的中心线相同
  • ci=None:不显示置信区间

图例已更改,以删除pointplot中的条目。bbox_to_anchor的x位置设置为wspace的一半,以尝试将图例置于两个子图之间的中心。

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='ticks', palette='pastel')

fig = plt.figure(figsize=(15, 5))
fig.subplots_adjust(wspace=0.15, right=0.98, left=0.04, bottom=0.14, top=0.98)

axis_type1 = fig.add_subplot(1, 2, 1)
axis_type2 = fig.add_subplot(1, 2, 2)
methods = ['m1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7']
models = ['model1', 'model2', 'model3', 'model4']

df_type1_long = pd.DataFrame({'Model': np.random.choice(models, 500),
                              'Method': np.random.choice(methods, 500),
                              'var': np.random.randint(1, 101, 500)})
df_type2_long = pd.DataFrame({'Model': np.random.choice(models, 800),
                              'Method': np.random.choice(methods, 800),
                              'var': np.random.randint(1, 101, 800)})

for df_long, ax in zip([df_type1_long, df_type2_long], [axis_type1, axis_type2]):
     sns.boxplot(x='Model', hue='Method', y='var', data=df_long,
                 showfliers=False, whis=0, ax=ax)
     sns.pointplot(x='Model', hue='Method', y='var', dodge=.8 - .8 / len(methods),
                   linestyles='', markers='D', color='black', estimator=np.median, ci=None,
                   data=df_long, ax=ax)
     # sns.pointplot(x='Model', hue='Method', y='var', dodge=.8 - .8 / len(methods),
     #               linestyles='', markers='v', color='black', estimator=np.min, ci=None,
     #               data=df_long, ax=ax)
axis_type1.set_xlabel('')
axis_type2.set_xlabel('')
axis_type1.legend_.remove()
axis_type2.legend(handles=axis_type2.legend_.legendHandles[:len(methods)],
                  loc='upper center', bbox_to_anchor=(-0.075, -0.06), ncol=len(methods))
plt.show()

相关问题