matplotlib 如何纠正for循环中条形图的重叠注解[重复]

k2fxgqgv  于 2023-06-23  发布在  其他
关注(0)|答案(1)|浏览(103)

此问题已在此处有答案

Stacked Bar Chart with Centered Labels(2个答案)
How to add value labels on a bar chart(7个回答)
14天前关闭
我正在尝试用许多条来绘制条形图。我想使代码更容易使用varios数据。我在dataframe中对iters列做了'for'循环,条形看起来很好,但是一些注解是重叠的。我想我需要纠正这个循环:
for(month,value),rect in zip(labels[l].items(),other_bars):
下面是我的代码:

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patheffects import Normal, Stroke
import numpy as np

data = {
    'all_ses': [3, 3, 7, 19, 27, 37, 59, 71, 101, 119, 161, 192, 223, 335, 466, 593, 675, 935, 1356, 1142],
    'canceled_ptc': [
        0.0, 0.0, 28.57, 21.05, 14.81, 18.92, 10.17, 12.68, 10.89, 20.17, 18.63,
        17.19, 15.25, 17.91, 16.09, 18.21, 13.33, 17.11, 15.93, 16.37
    ],
    'month_dt': [
        '2021-02-01', '2021-03-01', '2021-04-01', '2021-05-01', '2021-06-01',
        '2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01', '2021-11-01',
        '2021-12-01', '2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01',
        '2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01'
    ],
    'ses_canceled': [0, 0, 2, 4, 4, 7, 6, 9, 11, 24, 30, 33, 34, 60, 75, 108, 90, 160, 216, 187],
    'ses_finished': [3, 3, 5, 15, 23, 30, 53, 62, 90, 95, 131, 159, 189, 275, 391, 485, 585, 775, 1140, 955]
}

df7_2 = (
    pd.DataFrame(data).astype({'month_dt': 'datetime64[ns]'})
    .set_index('month_dt')
)

# quantiles
quantiles = df7_2.quantile([0, 0.25, 0.5, 0.75, 1], numeric_only=True, interpolation='nearest')
quantiles = pd.concat([quantiles, df7_2[-1:]])

# quantiles filter
to_label = df7_2.apply(lambda s: s.isin(quantiles[s.name]))
labels = df7_2.where(to_label)

# set axis
fig, bar_ax = plt.subplots(figsize=(14, 6))

bars_columns_list = list(df7_2[['ses_finished', 'ses_canceled', 'all_ses']].columns)
df7_2_bars = df7_2[bars_columns_list]
bars_arr = list(np.array(range(0, len(bars_columns_list))))
bars_colors = ['#3049BF', '#BF9530', 'grey']

for i, l, c in zip(list(np.array(bars_arr)), bars_columns_list, bars_colors):
    if l == bars_columns_list[0]:
        # first bar
        first_bar = bar_ax.bar(
            x=df7_2_bars.index,
            height=df7_2_bars[bars_columns_list[i]],
            label=f'{l}',
            linewidth=0,
            width=20,
            color=c
        )

        bar_labels = {}
        for (month, value), rect in zip(labels[l].items(), first_bar):
            if pd.isna(value):
                continue

            center_x, _ = rect.get_center()

            bar_labels[month] = bar_ax.annotate(
                f'{value:.0f}',
                (center_x, rect.get_y() + rect.get_height()),
                textcoords='offset points',
                xytext=(0, 1),
                ha='center',
                color=rect.get_facecolor(),
                path_effects=[Stroke(linewidth=0, foreground='white'), Normal()]
            )

        fig.canvas.draw_idle()

    else:
        # another bars
        other_bars = bar_ax.bar(
            x=df7_2_bars.index,
            height=df7_2_bars[bars_columns_list[i]],
            bottom=df7_2_bars[bars_columns_list[0:i]].sum(axis=1),
            label=f'{l}',
            linewidth=0,
            width=20,
            color=c
        )
   
        # THIS LOOP MUST BE CORRECTED

        for (month, value), rect in zip(labels[l].items(), other_bars):
            if pd.isna(value):
                continue

            center_x, _ = rect.get_center()
            existing_label = bar_labels.get(month)

            if existing_label:
                label_top = (
                    bar_ax.transData.inverted()
                    .transform(existing_label.get_window_extent())
                    .flat[-1]
                )
            else:
                label_top = 0

            bar_y = max(rect.get_y() + rect.get_height(), label_top)
            bar_labels[month] = bar_ax.annotate(
                f'{value:.0f}',
                (center_x, bar_y),
                textcoords='offset points',
                xytext=(0, 2),
                ha='center',
                color=rect.get_facecolor(),
                path_effects=[Stroke(linewidth=0, foreground='gainsboro'), Normal()]
            )

bar_ax.grid(False)
bar_ax.legend()

plt.show()

我标记了,我需要更正:

如果你给予我有用的文章或指南,那就太好了。

dphi5xsq

dphi5xsq1#

# create the dataframe
df7_2 = pd.DataFrame(data).astype({'month_dt': 'datetime64[ns]'}).set_index('month_dt')

# format the index (xtick labels) befor plotting
df7_2.index = df7_2.index.strftime('%Y-%m')

color = ['#3049BF', '#BF9530', 'grey']

# plot the selected columns from df7_2
ax = df7_2.iloc[:, [3, 2, 0]].plot(kind='bar', figsize=(14, 6), width=0.85, color=color, stacked=True, xlabel='', rot=0)

# every other xtick
ax.set_xticks(range(0, len(df7_2)), [v if i%2 == 0 else '' for i, v in enumerate(df7_2.index)])

# iterate through the bar containers
for c in ax.containers:
    
    # filter the values that get labels: must be matplotlib >= v3.7.0
    ax.bar_label(c, fmt=lambda x: f'{x:0.0f}' if x > 70 else '', label_type='center', fontsize=8, fontweight='bold')

  • 在总数中加上一个栏是个坏主意。它扭曲了视觉效果。y轴范围是数据实际范围的两倍(3000)。
  • 而是添加一个顶部边缘标签和总计。
# create the dataframe
df7_2 = pd.DataFrame(data).astype({'month_dt': 'datetime64[ns]'}).set_index('month_dt')

# format the index (xtick labels) befor plotting
df7_2.index = df7_2.index.strftime('%Y-%m')

color = ['#3049BF', '#BF9530']

# plot the selected columns from df7_2
ax = df7_2.iloc[:, [3, 2]].plot(kind='bar', figsize=(14, 6), width=0.85, color=color, stacked=True, xlabel='', rot=0)

# every other xtick
ax.set_xticks(range(0, len(df7_2)), [v if i%2 == 0 else '' for i, v in enumerate(df7_2.index)])

# iterate through the bar containers
for c in ax.containers:
    
    # filter the values that get labels: must be matplotlib >= v3.7.0
    ax.bar_label(c, fmt=lambda x: f'{x:0.0f}' if x > 30 else '', label_type='center', fontsize=8, fontweight='bold')

# annotate with the total on the top edge
_ = ax.bar_label(ax.containers[-1], labels=df7_2.all_ses, label_type='edge')

相关问题