pandas 分组DF函数问题

xytpbqjk  于 12个月前  发布在  其他
关注(0)|答案(1)|浏览(66)

我试图定义一个python函数,在输入中给出一个分组的df,在输出中给出一个包含输入列的一些信息的df,如N°NA,%NA,Nunique,Gini Index,Mode,median,ecc.。
我不能指出我的手指为什么这段代码不给予我任何结果,如果我试图提取任何变量,如结果,或group_results,或group_df_mode,它说xxx没有定义
我收到了这个错误,所有的变量都在这个代码中。
你能帮帮我吗

grouped_df = df.groupby(['Store_Region', 'shop_retail'])

def calculate_group_statistics(grouped_df):
    results = []
    
    for (group_Store_Region, group_shop_retail), group_df in grouped_df:
        group_result = {
            'Group_Store_Region': group_Store_Region,
            'shop_Retail': group_shop_retail
        }
    
        group_df_na_sum = group_df.isna().sum()
        group_result.update({'Sum_of_NA': group_df_na_sum})
    
        group_df_na_percentage = (group_df_na_sum / len(group_df)) * 100
        group_result.update({'Percentage_of_NA': group_df_na_percentage})
    
        group_df_n_unique = group_df.nunique()
        group_result.update({'N_Unique': group_df_n_unique})
    
        gini_values = []
    
        for colonna in group_df.columns:
            fr_absolute = group_df.groupby(by=colonna, dropna=False).size()
            array = fr_absolute.values
            fr_relative = array / sum(array)
            indice_gini = 1 - sum(fr_relative * fr_relative)
    
            gini_values.append({'Colonna': colonna, 'Indice Gini': indice_gini})
    
        gini_values_df = pd.DataFrame(gini_values)
        group_result['Gini_Values'] = gini_values_df
    
        group_df_std = group_df.std()
        group_result.update({'Standard_Deviation': group_df_std})
    
        group_df_mean = group_df.mean()
        group_result.update({'Mean': group_df_mean})
    
        group_df_mode = group_df.mode().iloc[0] 
        group_result.update({'Mode': group_df_mode})
    
        group_df_median = group_df.median()
        group_result.update({'Median': group_df_median})
    
        results.append(group_result)
    
    result_df = pd.DataFrame(results)
    return result_df```
vltsax25

vltsax251#

将pandas导入为pd def calculate_group_statistics(df):结果= []

grouped_df = df.groupby(['Store_Region', 'shop_retail'])

for (group_Store_Region, group_shop_retail), group_df in grouped_df:
    group_result = {
        'Group_Store_Region': group_Store_Region,
        'shop_Retail': group_shop_retail
    }

    group_df_na_sum = group_df.isna().sum()
    group_result.update({'Sum_of_NA': group_df_na_sum})

    group_df_na_percentage = (group_df_na_sum / len(group_df)) * 100
    group_result.update({'Percentage_of_NA': group_df_na_percentage})

    group_df_n_unique = group_df.nunique()
    group_result.update({'N_Unique': group_df_n_unique})

    gini_values = []

    for colonna in group_df.columns:
        fr_absolute = group_df.groupby(by=colonna, dropna=False).size()
        array = fr_absolute.values
        fr_relative = array / sum(array)
        indice_gini = 1 - sum(fr_relative * fr_relative)

        gini_values.append({'Colonna': colonna, 'Indice Gini': indice_gini})

    gini_values_df = pd.DataFrame(gini_values)
    group_result['Gini_Values'] = gini_values_df

    group_df_std = group_df.std()
    group_result.update({'Standard_Deviation': group_df_std})

    group_df_mean = group_df.mean()
    group_result.update({'Mean': group_df_mean})

    group_df_mode = group_df.mode().iloc[0] 
    group_result.update({'Mode': group_df_mode})

    group_df_median = group_df.median()
    group_result.update({'Median': group_df_median})

    results.append(group_result)

result_df = pd.DataFrame(results)
return result_df

相关问题