matplotlib 如何按类别用百分比注解countplot

u5rb5r59  于 2023-05-18  发布在  其他
关注(0)|答案(2)|浏览(225)

你好,我试图添加百分比到我的countplot与5类别和2值(旧的和年轻的)。我试着从How to add percentages on top of bars in seaborn?添加def和循环
我的代码:

plt.figure(figsize =(7,5))
ax = sb.countplot(data = df_x_1, x = 'concern_virus', hue = 'age')
plt.xticks(size =12)
plt.xlabel('Level of Concern', size = 14)
plt.yticks(size = 12)
plt.ylabel('Number of People', size = 12)
plt.title("Older and Younger People's Concern over the Virus", size = 16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right");

for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_height()/total)
    x = p.get_x() + p.get_width()
    y = p.get_height()
    ax.annotate(percentage, (x, y),ha='center')
plt.show()

正如你所见,这些百分比没有意义。

svujldwt

svujldwt1#

问题似乎出在上面代码中未定义的变量:totaltotal应该是您要调用100%的数字,例如 Dataframe 中的行数。这样,所有显示的百分比总和为100。
下面是一些示例代码:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

N = 250
df_x_1 = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], N),
                       'age': np.random.choice(['younger', 'older'], N)})
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
                   hue='age', hue_order=['younger', 'older'],
                   palette=['chartreuse', 'darkviolet'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")

total = len(df_x_1)
for p in ax.patches:
    percentage = f'{100 * p.get_height() / total:.1f}%\n'
    x = p.get_x() + p.get_width() / 2
    y = p.get_height()
    ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()

要使文本位于栏的中心,选择ha='center'并将宽度的一半添加到x位置会有所帮助。在文本后面附加一个换行符可以帮助文本很好地定位在栏的顶部。plt.tight_layout()可以帮助将所有标签都适合图。
Seaborn允许您通过order=...固定x轴的顺序。图例元素的顺序和相应的颜色可以通过hue_order=...palette=...设置。
PS:对于新问题,每个年龄组的总数,而不是直接循环通过所有的条,第一个循环可以访问组:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

label_younger = 'younger'
label_older = 'older'
df_younger = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 230)})
df_older = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 120)})
df_younger['age'] = label_younger
df_older['age'] = label_older
df_x_1 = pd.concat([df_younger, df_older], ignore_index=True)

plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
                   hue='age', hue_order=[label_younger, label_older],
                   palette=['orangered', 'skyblue'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")

for bars in ax.containers:
    if bars.get_label() == label_younger:
        group_total = len(df_younger)
    else:
        group_total = len(df_older)
    for p in bars.patches:
        # print(p.get_facecolor(), p.get_label())
        percentage = f'{100 * p.get_height() / group_total:.1f}%\n'
        x = p.get_x() + p.get_width() / 2
        y = p.get_height()
        ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()

c9qzyr3d

c9qzyr3d2#

导入和DataFrame

import seaborn as sns
import pandas as pd
import numpy as np

np.random.seed(2023)
N = 250
df = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], N),
                   'age': np.random.choice(['younger', 'older'], N)})

# get the total count of the entire age column
total = df.age.count()

# get the count of each group in the age column
vc = df.age.value_counts()

按年龄总数百分比绘制

ax = sns.countplot(data=df, x='concern_virus', hue='age', order=sorted(df.concern_virus.unique()))
sns.move_legend(ax, bbox_to_anchor=(1, 0.5), loc='upper left', frameon=False)

for c in ax.containers:
    ax.bar_label(c, fmt=lambda v: f'{(v/total)*100:0.1f}%')

按年龄组总数百分比绘制

ax = sns.countplot(data=df, x='concern_virus', hue='age', order=sorted(df.concern_virus.unique()))
sns.move_legend(ax, bbox_to_anchor=(1, 0.5), loc='upper left', frameon=False)

for c in ax.containers:
    label = c.get_label()
    total = vc[label]
    ax.bar_label(c, fmt=lambda v: f'{(v/total)*100:0.1f}%')

相关问题