pandas JSON中的多个实体到 Dataframe

von4xj4u  于 2022-12-25  发布在  其他
关注(0)|答案(1)|浏览(108)

我有多个JSON文件。每个JSON文件包含多个实体中的结构化数据,如下所示:

{
  "block": {
    "type": "block",
    "number": 2000000,
    "hash": "0xc0f4906fea23cf6f3cce98cb44e8e1449e455b28d684dfa9ff65426495584de6",
    "parent_hash": "0x57ebf07eb9ed1137d41447020a25e51d30a0c272b5896571499c82c33ecb7288",
    "nonce": "0x3b05c6d5524209f1",
    "sha3_uncles": "0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347",
    "logs_bloom": "0x00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
    "transactions_root": "0xb31f174d27b99cdae8e746bd138a01ce60d8dd7b224f7c60845914def05ecc58",
    "state_root": "0x96dbad955b166f5119793815c36f11ffa909859bbfeb64b735cca37cbf10bef1",
    "receipts_root": "0x84aea4a7aad5c5899bd5cfc7f309cc379009d30179316a2a7baa4a2ea4a438ac",
    "miner": "0x61c808d82a3ac53231750dadc13c777b59310bd9",
    "difficulty": 49824742724615,
    "total_difficulty": 44010101827705409388,
    "size": 650,
    "extra_data": "0xe4b883e5bda9e7a59ee4bb99e9b1bc",
    "gas_limit": 4712388,
    "gas_used": 21000,
    "timestamp": 1470173578,
    "transaction_count": 1,
    "base_fee_per_gas": null,
    "item_id": "block_0xc0f4906fea23cf6f3cce98cb44e8e1449e455b28d684dfa9ff65426495584de6",
    "item_timestamp": "2016-08-02T21:32:58Z"
  },
  "transactions": [
    {
      "type": "transaction",
      "hash": "0xc55e2b90168af6972193c1f86fa4d7d7b31a29c156665d15b9cd48618b5177ef",
      "nonce": 126917,
      "transaction_index": 0,
      "from_address": "0x32be343b94f860124dc4fee278fdcbd38c102d88",
      "to_address": "0x104994f45d9d697ca104e5704a7b77d7fec3537c",
      "value": 149990000000000000000,
      "gas": 333333,
      "gas_price": 30000000000,
      "input": "0x",
      "block_timestamp": 1470173578,
      "block_number": 2000000,
      "block_hash": "0xc0f4906fea23cf6f3cce98cb44e8e1449e455b28d684dfa9ff65426495584de6",
      "max_fee_per_gas": null,
      "max_priority_fee_per_gas": null,
      "transaction_type": 0,
      "receipt_cumulative_gas_used": 21000,
      "receipt_gas_used": 21000,
      "receipt_contract_address": null,
      "receipt_root": "0x5a2a9b14d22f5bb89b188d97312f397e94417d01f9a588eddf8924aa51700f40",
      "receipt_status": null,
      "receipt_effective_gas_price": 30000000000,
      "item_id": "transaction_0xc55e2b90168af6972193c1f86fa4d7d7b31a29c156665d15b9cd48618b5177ef",
      "item_timestamp": "2016-08-02T21:32:58Z"
    }
  ],
  "logs": [],
  "token_transfers": [],
  "traces": [
    {
      "type": "trace",
      "transaction_index": 0,
      "from_address": "0x32be343b94f860124dc4fee278fdcbd38c102d88",
      "to_address": "0x104994f45d9d697ca104e5704a7b77d7fec3537c",
      "value": 149990000000000000000,
      "input": "0x",
      "output": "0x",
      "trace_type": "call",
      "call_type": "call",
      "reward_type": null,
      "gas": 312333,
      "gas_used": 0,
      "subtraces": 0,
      "trace_address": [],
      "error": null,
      "status": 1,
      "transaction_hash": "0xc55e2b90168af6972193c1f86fa4d7d7b31a29c156665d15b9cd48618b5177ef",
      "block_number": 2000000,
      "trace_id": "call_0xc55e2b90168af6972193c1f86fa4d7d7b31a29c156665d15b9cd48618b5177ef_",
      "trace_index": 0,
      "block_timestamp": 1470173578,
      "block_hash": "0xc0f4906fea23cf6f3cce98cb44e8e1449e455b28d684dfa9ff65426495584de6",
      "item_id": "trace_call_0xc55e2b90168af6972193c1f86fa4d7d7b31a29c156665d15b9cd48618b5177ef_",
      "item_timestamp": "2016-08-02T21:32:58Z"
    },
    {
      "type": "trace",
      "transaction_index": null,
      "from_address": null,
      "to_address": "0x61c808d82a3ac53231750dadc13c777b59310bd9",
      "value": 5000000000000000000,
      "input": null,
      "output": null,
      "trace_type": "reward",
      "call_type": null,
      "reward_type": "block",
      "gas": null,
      "gas_used": null,
      "subtraces": 0,
      "trace_address": [],
      "error": null,
      "status": 1,
      "transaction_hash": null,
      "block_number": 2000000,
      "trace_id": "reward_2000000_0",
      "trace_index": 1,
      "block_timestamp": 1470173578,
      "block_hash": "0xc0f4906fea23cf6f3cce98cb44e8e1449e455b28d684dfa9ff65426495584de6",
      "item_id": "trace_reward_2000000_0",
      "item_timestamp": "2016-08-02T21:32:58Z"
    }
  ]
}

我想把这些JSON文件转换成顶键的 Dataframe ,所以每个 Dataframe 对应于blocktransactionslogs等,因为它们是顶实体名称。
我正尝试使用df = pd.read_json("block-data/2000000.json")执行此操作,这会产生错误ValueError: Value is too big!
对于如何在大规模处理数十亿个相同的JSON文件时正确执行此操作,您有什么建议吗?

qpgpyjmq

qpgpyjmq1#

由于某种原因,Pandas试图将149990000000000000000转换为Int64,所以您会得到错误ValueError: Value is too big!。但您的另一个问题是,Pandas不清楚您希望最终的 Dataframe 是什么样子的,因为您的键包含对象和列表的组合。
为了更好地控制最终结果,您可以先将文件作为dict读入,然后使用Pandas创建您要查找的 Dataframe 。例如:

import pandas as pd
import json

with open('file.json', 'r') as myfile:
    data=myfile.read()

obj = json.loads(data)

df_block = pd.DataFrame([obj['block']])
df_traces = pd.DataFrame(obj['traces'])
df_transactions = pd.DataFrame(obj['transactions'])

相关问题