JSON到Python Dataframe :从另一个文件Map值

rsaldnfx  于 2023-02-01  发布在  Python
关注(0)|答案(1)|浏览(114)

我有一些JSON文件,如下所示,每个学生ID将有一个相应的JSON文件与标记的细节。
students.json

{
    "Students": [
        {
            "StudentName": "AAA",
            "Sid": 1020,
            "Saddress": "st.aaa",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "BBB",
            "Sid": 1021,
            "Saddress": "st.bbb",
            "Sdob": "11-11-1999"
        },
        {
            "StudentName": "CCC",
            "Sid": 1022,
            "Saddress": "st.fff",
            "Sdob": "05-12-1999"
        },
        {
            "StudentName": "DDD",
            "Sid": 1023,
            "Saddress": "st.ddd",
            "Sdob": "15-09-1999"
        },
        {
            "StudentName": "EEE",
            "Sid": 1024,
            "Saddress": "st.eee",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "FFF",
            "Sid": 1025,
            "Saddress": "st.ddd",
            "Sdob": "20-11-1999"
        },
        {
            "StudentName": "GGG",
            "Sid": 1026,
            "Saddress": "st.ggg",
            "Sdob": "25-11-1999"
        },
        {
            "StudentName": "JJJ",
            "Sid": 1019,
            "Saddress": "st.aaa",
            "Sdob": "18-11-1999"
        }
    ]
}

1020.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1021.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1022.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1023.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1024.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1025.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1026.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1019.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

我需要得到这样的输出

我的代码:

import json
import pandas as pd

data_JSON = open("students.json")

json_str = data_JSON.read()
data= json.loads(json_str)

print("JSON Data")
print(data)

def Normalize(data_JSON,record_path):
    temp = json.dumps(data_JSON)
    ar = json.loads(temp)
    df = pd.json_normalize(ar[record_path])
    return df

data2 = Normalize(data, "Students")
print("\nAfter normalizing JSON Data")
print(data2)

df0 = pd.DataFrame(data2)
df1 = df0[['StudentName', 'Sid']]
print("\nTaking specific columns from the df")
print(df1)

files = []

for i, row in df1.groupby('Sid').size().iteritems():
    file = str(i)+".json"
    files.append(file)

print("\nFile Names")
print(files)

df2 = pd.DataFrame(columns=['StudentName','Sid'])
merged = pd.DataFrame()

for i in files:
    data_JSON = open(i)
    json_str = data_JSON.read()
    data= json.loads(json_str)
    marks = Normalize(data, "marks")
    df = pd.DataFrame(marks)
    merged = pd.concat([df2, df], ignore_index=True, sort=False)

print("\nMarks")
print(merged)

# saving the dataframe
merged.to_csv('StudentsMark.csv',index=False)

输出:

是否可以将StudentName和Sid与标记一起插入?我不想将StudentName和Sid直接提供给 Dataframe df2,当程序从每个文件中获取标记时,它应该能够将StudentName和SidMap并添加到每个文件的df2中。

yjghlzjz

yjghlzjz1#

使用json_normalize

import json

import pandas as pd

path = "path/to/files"

def get_data(file: str) -> json:
    with open(f"{path}/{file}.json", "r") as f:
        return json.loads(f.read())

students_df = pd.json_normalize(data=get_data("students"), record_path="Students")

student_ids = students_df["Sid"].tolist()
grades_df = pd.concat([pd.json_normalize(data=get_data(x), record_path="marks").assign(Id=x) for x in student_ids])

(pd
 .merge(left=students_df, right=grades_df, left_on="Sid", right_on="Id")
 .drop(columns="Id")
 .to_csv(f"{path}/students_marks.csv", index=False)
 )

相关问题