[In]:
pd.set_option('display.max_colwidth', 200)
topic_stats_df = corpus_topic_df.groupby('Dominant Topic').agg({
'Dominant Topic': {
'Doc Count': np.size,
'% Total Docs': np.size }
})
topic_stats_df = topic_stats_df['Dominant Topic'].reset_index()
topic_stats_df['% Total Docs'] = topic_stats_df['% Total Docs'].apply(lambda row: round((row*100) / len(papers), 2))
topic_stats_df['Topic Desc'] = [topics_df.iloc[t]['Terms per Topic'] for t in range(len(topic_stats_df))]
topic_stats_df
[Out]:
---------------------------------------------------------------------------
SpecificationError Traceback (most recent call last)
Cell In[47], line 2
1 pd.set_option('display.max_colwidth', 200)
----> 2 topic_stats_df = corpus_topic_df.groupby('Dominant Topic').agg({
3 'Dominant Topic': {
4 'Doc Count': np.size,
5 '% Total Docs': np.size }
6 })
7 topic_stats_df = topic_stats_df['Dominant Topic'].reset_index()
8 topic_stats_df['% Total Docs'] = topic_stats_df['% Total Docs'].apply(lambda row: round((row*100) / len(papers), 2))
File ~/miniconda3/envs/nlp/lib/python3.8/site-packages/pandas/core/groupby/generic.py:894, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
891 func = maybe_mangle_lambdas(func)
893 op = GroupByApply(self, func, args, kwargs)
--> 894 result = op.agg()
895 if not is_dict_like(func) and result is not None:
896 return result
File ~/miniconda3/envs/nlp/lib/python3.8/site-packages/pandas/core/apply.py:169, in Apply.agg(self)
166 return self.apply_str()
168 if is_dict_like(arg):
--> 169 return self.agg_dict_like()
170 elif is_list_like(arg):
171 # we require a list, but not a 'str'
172 return self.agg_list_like()
File ~/miniconda3/envs/nlp/lib/python3.8/site-packages/pandas/core/apply.py:478, in Apply.agg_dict_like(self)
475 selected_obj = obj._selected_obj
476 selection = obj._selection
--> 478 arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
480 if selected_obj.ndim == 1:
481 # key only used for output
482 colg = obj._gotitem(selection, ndim=1)
File ~/miniconda3/envs/nlp/lib/python3.8/site-packages/pandas/core/apply.py:594, in Apply.normalize_dictlike_arg(self, how, obj, func)
587 # Can't use func.values(); wouldn't work for a Series
588 if (
589 how == "agg"
590 and isinstance(obj, ABCSeries)
591 and any(is_list_like(v) for _, v in func.items())
592 ) or (any(is_dict_like(v) for _, v in func.items())):
593 # GH 15931 - deprecation of renaming keys
--> 594 raise SpecificationError("nested renamer is not supported")
596 if obj.ndim != 1:
597 # Check for missing columns on a frame
598 cols = set(func.keys()) - set(obj.columns)
SpecificationError: nested renamer is not supported
这段代码的作者是Sarkar,D.(2019). * 使用Python进行文本分析 * Apress,Topic modeling section.
小Pandas0.25.3失败,因为我在一个m1的Mac.
- 试过了:pip安装Pandas==0.25.3
- 已经试过了:arch-x86_64 pip安装Pandas==0.25.3
1条答案
按热度按时间kkbh8khc1#
Pandas a将支持删除嵌套重命名,而使用
pd.NamedAgg
此语句可重写如下: