pandas 当附加2个 Dataframe 时,得到“ValueError:无法设置没有定义列的框架”

m1m5dgzv  于 2023-05-27  发布在  其他
关注(0)|答案(1)|浏览(185)

当我运行main函数时,Dataframe 1在函数内部,Dataframe 2在main函数中,它将JSON中的值追加并存储在result_df中,并获得ValueError: cannot set a frame with no defined columns错误
为什么我在main函数中创建dataframe 2?我在其他函数中使用Dataframe 2(total_device_df)转换为csv。

可复制编码:

import pandas as pd
import os
import json

class KatsRequest:

currDir = os.getcwd()
    def parse_json_response():

        filename = "my_json_file.json"
        device_name = ["Trona", "Sheldon"]
        "creating dataframe to store result"
        column_names = ["DEVICE", "STATUS", "LAST UPDATED"]
        result_df = pd.DataFrame(columns=column_names)
        my_json_file = currDir + '/' + filename

        for i in range(len(device_name)):
            my_device_name = device_name[i]
            with open(my_json_file) as f:
                data = json.load(f)

            for devices in data:
                device_types = devices['device_types']
                if my_device_name in device_types['name']:
                    if device_types['name'] == my_device_name:
                        device = devices['device_types']['name']
                        last_updated = devices['devices']['last_status_update']
                        device_status = devices['devices']['status']

                        result_df.loc[len(result_df)] = {'DEVICE': device, 'STATUS': device_status, 'LAST UPDATED': last_updated}
        return result_df

def main()
    total_device_df = pd.DataFrame()
    total_device_df.loc[len(total_device_df)] = KatsRequest().parse_json_response(filename, device_names)

if __name__ == '__main__':
    main()

以下是我的JSON文件内容:(保存在当前路径“my_json_file.json”中)

[{"devices": {"id": 34815, "last_status_update": "2023-05-25 07:56:49", "status": "idle" }, "device_types": {"name": "Trona"}}, {"devices": {"id": 34815, "last_status_update": "2023-05-25 07:56:49", "status": "idle" }, "device_types": {"name": "Sheldon"}}]

输出:ValueError: cannot set a frame with no defined columns

这里缺少了什么/错了什么?

vsmadaxz

vsmadaxz1#

(Your示例不可重现)
如果你想合并total_device_dfparse_json_response返回的 Dataframe ,你必须使用pd.concat

def main():
    total_device_df = pd.DataFrame()
    total_device_df = pd.concat([total_device_df, KatsRequest().parse_json_response(filename, device_names)])

或者简单地说:

def main():
    total_device_df = pd.DataFrame()
    total_device_df = KatsRequest().parse_json_response(filename, device_names)

完整示例:

import os
import json
import pandas as pd

currDir = os.getcwd()
filename = 'my_json_file.json'
device_names = ['Trona', 'Sheldon']

class KatsRequest:
    def parse_json_response(self, filename, device_name):

        # creating dataframe to store result
        column_names = ["DEVICE", "STATUS", "LAST UPDATED"]
        result_df = pd.DataFrame(columns=column_names)
        my_json_file = currDir + '/' + filename

        for i in range(len(device_name)):
            my_device_name = device_name[i]
            with open(my_json_file) as f:
                data = json.load(f)

            for devices in data:
                device_types = devices['device_types']
                if my_device_name in device_types['name']:
                    if device_types['name'] == my_device_name:
                        device = devices['device_types']['name']
                        last_updated = devices['devices']['last_status_update']
                        device_status = devices['devices']['status']

                        result_df.loc[len(result_df)] = {'DEVICE': device, 'STATUS': device_status, 'LAST UPDATED': last_updated}
        return result_df

def main():
    total_device_df = pd.DataFrame()
    total_device_df = pd.concat([total_device_df, KatsRequest().parse_json_response(filename, device_names)])
    print(total_device_df)

if __name__ == '__main__':
    main()

输出:

>>> total_device_df
    DEVICE STATUS         LAST UPDATED
0    Trona   idle  2023-05-25 07:56:49
1  Sheldon   idle  2023-05-25 07:56:49

更新

一个简单的方法是使用pd.json_normalize

with open('my_json_file.json') as jp:
    data = json.load(jp)
total_device_df = (pd.json_normalize(data).loc[lambda x: x['device_types.name'].isin(device_names)]
                     .rename(columns=lambda x: x.split('.', maxsplit=1)[-1].upper()))

输出:

>>> total_device_df
      ID   LAST_STATUS_UPDATE STATUS     NAME
0  34815  2023-05-25 07:56:49   idle    Trona
1  34815  2023-05-25 07:56:49   idle  Sheldon

如何在完整的示例代码中使用pd.json_normalize方法?

import os
import json
import pandas as pd

currDir = os.getcwd()
filename = 'my_json_file.json'
device_names = ['Trona', 'Sheldon']

class KatsRequest:
    def parse_json_response(self, filename, device_name):

        # creating dataframe to store result
        dmap = {
            'device_types.name': 'DEVICE',
            'devices.status': 'STATUS',
            'devices.last_status_update': 'LAST STATUS'
        }
        my_json_file = currDir + '/' + filename

        with open(my_json_file) as f:
            data = json.load(f)
        results_df = (pd.json_normalize(data)[dmap.keys()].rename(columns=dmap)
                        .loc[lambda x: x['DEVICE'].isin(device_names)])
        return results_df

def main():
    total_device_df = pd.DataFrame()
    total_device_df = pd.concat([total_device_df, KatsRequest().parse_json_response(filename, device_names)])
    print(total_device_df)

if __name__ == '__main__':
    main()

相关问题