pandas 循环遍历多个xml文件

piztneat  于 2023-02-02  发布在  其他
关注(0)|答案(2)|浏览(114)

我对python比较陌生,我想循环遍历多个xml文件,目前我正在使用现有代码拉入sample2 xml文件:

import xml.etree.ElementTree as ET
import pandas as pd
import os

tree=ET.parse("sample2.xml")
root = tree.getroot()

qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")

data =[]
for x, y, z in zip(qty, pri, cor):
    #print(x.get("v"), y.get("v"))
    a = x.get("v"), y.get("v"), z.get("v")
    data.append(a)
    
    
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)

#print(df)

total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']

df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]

我尝试将soup xml添加到下面的代码行中,但没有成功
树=ET.解析(“sample2.xml,“汤xml”)
根=树.getroot()

nzk0hqpo

nzk0hqpo1#

考虑将代码转换为函数,并为所需的各种文件调用该函数:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename):

   tree=ET.parse(filename)
   root = tree.getroot()

   qty=root.iterfind(".//Qty")
   pri=root.iterfind(".//PriceAmount")
   cor=root.iterfind(".//AuctionIdentification")

   data =[]
   for x, y, z in zip(qty, pri, cor):
       #print(x.get("v"), y.get("v"))
       a = x.get("v"), y.get("v"), z.get("v")
       data.append(a)
    
    
   df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
   df['Qty'] = df['Qty'].astype(float)
   df['Price'] = df['Price'].astype(float)

   #print(df)

   total = df['Qty'].sum()
   price = df['Price'].mean()
   border = df.loc[0,'Border']

   df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

   df2['Qty'] = [total]
   df2['Price'] = [price]
   df2['Border'] = [str(border)[0:12]]

   return df2

然后,您可以调用它来获取文件:

my_xml_processor("sample2.xml")

my_xml_processor("soup.xml")
h5qlskok

h5qlskok2#

您可以使用现有的代码,但要为每个文件名循环运行它,类似于:

import xml.etree.ElementTree as ET
import pandas as pd
import os

files = ['sample2.xml', 'sample3.xml', 'sample4.xml']

for file in files: #read each filename from above list
    tree=ET.parse(file)
    root = tree.getroot()

    qty=root.iterfind(".//Qty")
    pri=root.iterfind(".//PriceAmount")
    cor=root.iterfind(".//AuctionIdentification")

    data =[]
    for x, y, z in zip(qty, pri, cor):
        #print(x.get("v"), y.get("v"))
        a = x.get("v"), y.get("v"), z.get("v")
        data.append(a)
        
        
    df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
    df['Qty'] = df['Qty'].astype(float)
    df['Price'] = df['Price'].astype(float)

    #print(df)

    total = df['Qty'].sum()
    price = df['Price'].mean()
    border = df.loc[0,'Border']

    df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

    df2['Qty'] = [total]
    df2['Price'] = [price]
    df2['Border'] = [str(border)[0:12]]

相关问题