pandas 当XML标记具有子属性时将XML解析为CSV

4ktjp1zp  于 2022-11-20  发布在  其他
关注(0)|答案(1)|浏览(145)

我写了一个小的python应用程序来打印一些XML标签和选择子属性。XML用于墨西哥的电子发票,下面是一个XML的例子:

<?xml version="1.0" encoding="UTF-8"?><cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="4.0" xsi:schemaLocation="http://www.sat.gob.mx/cfd/4 http://www.sat.gob.mx/sitio_internet/cfd/4/cfdv40.xsd" Serie="V" Folio="10030062" Fecha="2022-11-09T18:55:51" Sello="kWjohv/nlmGBVrIUBxeULiiF2HiUGxAsDC4FTirGnF8GMD7tTVDwpzDOVcyJJupQYJKj/xRPIz46i1RjZYX2jIskXxJwb5QkWfSUC6rO3TdHr4nqJQnLCD2cdp66u2/v+8uYJv+as7uXvuGv1JwQ67Mg037b0IPTjHPKaZvRwIBQCrLukLB4bOX8yuBGWWqrAqJPR/eS/wRt3QedyBhUIbUsebRgtirOQ0ywarSPUJ9Dll0KmaWq3rrHN+jkoAUZSgy+mJoR2WldeIbuiHXml/QXezl4o34ICK32gYyzvzrpLTslxPYTKcoKzDvGo2jK5/T7NctbNrrH29i515lugg==" FormaPago="04" NoCertificado="00001000000503805521" Certificado="MIIGITCCBAmgAwIBAgIUMDAwMDEwMDAwMDA1MDM4MDU1MjEwDQYJKoZIhvcNAQELBQAwggGEMSAwHgYDVQQDDBdBVVRPUklEQUQgQ0VSVElGSUNBRE9SQTEuMCwGA1UECgwlU0VSVklDSU8gREUgQURNSU5JU1RSQUNJT04gVFJJQlVUQVJJQTEaMBgGA1UECwwRU0FULUlFUyBBdXRob3JpdHkxKjAoBgkqhkiG9w0BCQEWG2NvbnRhY3RvLnRlY25pY29Ac2F0LmdvYi5teDEmMCQGA1UECQwdQVYuIEhJREFMR08gNzcsIENPTC4gR1VFUlJFUk8xDjAMBgNVBBEMBTA2MzAwMQswCQYDVQQGEwJNWDEZMBcGA1UECAwQQ0lVREFEIERFIE1FWElDTzETMBEGA1UEBwwKQ1VBVUhURU1PQzEVMBMGA1UELRMMU0FUOTcwNzAxTk4zMVwwWgYJKoZIhvcNAQkCE01yZXNwb25zYWJsZTogQURNSU5JU1RSQUNJT04gQ0VOVFJBTCBERSBTRVJWSUNJT1MgVFJJQlVUQVJJT1MgQUwgQ09OVFJJQlVZRU5URTAeFw0yMDA0MTYyMDE1MTdaFw0yNDA0MTYyMDE1MTdaMIHvMTAwLgYDVQQDEydQUkVNSVVNIFJFU1RBVVJBTlQgQlJBTkRTIFMgREUgUkwgREUgQ1YxMDAuBgNVBCkTJ1BSRU1JVU0gUkVTVEFVUkFOVCBCUkFORFMgUyBERSBSTCBERSBDVjEwMC4GA1UEChMnUFJFTUlVTSBSRVNUQVVSQU5UIEJSQU5EUyBTIERFIFJMIERFIENWMSUwIwYDVQQtExxQUkIxMDA4MDJIMjAgLyBSQVpFNjUwNTAzVUY4MR4wHAYDVQQFExUgLyBSQVpFNjUwNTAzSE5FTUNMMDcxEDAOBgNVBAsUB1BSQl9GQUMwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCacpMWcQqSuS0mc8CfDLBvhLqPL5LyxYcEi/TYqHpje3DeVkkB6uYB19+3MO3oTnGnZgt7Jhs6/eM1+3ch/4EnAxUvVbBAHaXUUmRHTXGwBgqRMHgYYQ/DwsKHjL2fQoCodxSsJCKSg93GO4JXXHIFITALb9aOmPLd/hRc4krOqZT2egVL/HrIY+4Y2L9y9HEH+B8HUC5tbmsal5V9XNQs86nSg8Zc8IPUNMhWRQtKwdIwDwCTccYTTiBK7O2ykiba6/Ef3ORb1bDHv8YSzfjnNpD/yhXn3PyCKR9KjXp1dxGyFsEbqZH5SwUp5/aDDXetI1dal7GYSxqYA54BRKQFAgMBAAGjHTAbMAwGA1UdEwEB/wQCMAAwCwYDVR0PBAQDAgbAMA0GCSqGSIb3DQEBCwUAA4ICAQAU8WJ25ANnPSd09lBj1XsKcDlREx1zr3Tlw9UrFIZZJdsd2f0BeJFtolsWO3afHiVcpk5IfUshjI9fe/uzm8AbbMPpaoBhywoHTBJiG4bGkwQpVEddjufDKKxkuao+NALpwhfFc8kNJTmG0FuOYEVU7pKh/gz2kZOhcKViXGt3OQYLlUZ6+PP99Z2AePkz2x6gtC+A20oxfDLkPXqtEez2mby//bUSgtGsFWTIkrtC7Zro47zNOCYDngKWoke4T91o8xTtcABoeRlZTDovLCFsVm0zg5Cd22PWFkfIvlVZyIRSlJcrq2P3fo0fzeQ+rG+CpntIfOrYZr5eQHOOLUMPavazsTvFDJQpnCbZnNIxnaMKAPmXbgJHyMx24tARd0rGEuM/KLn/ZW2TCUAD5mofsT6++Z/EMsAZ68Tv4ZbwcPlWDbuJEUTsK/z2angnO55xA+NdPz+MltizMUcKXjzzvUanOAXQNIHD2wEbyXHpD3Ytb6BU6OOAx7HNiBnokxkyr7riD/slEL/di09S3Po3Q5X0z4ygUh2lHyxJDDJtNYiYLsscbliVVk0BtPAuTidOlLutw9N19zSE4AZgzIhwIF7oiJlM4EytSIZsM6GUWniN4+tWRDoV+sEgpKnblH4ms3OHB3ZE5LsgHAjcfFyToVaA3GpzLJSkQawmhc+ylw==" SubTotal="145.69" Moneda="MXN" Total="169.00" TipoDeComprobante="I" Exportacion="01" MetodoPago="PUE" LugarExpedicion="11520"><cfdi:Emisor Rfc="PRB100802H20" Nombre="PREMIUM RESTAURANT BRANDS" RegimenFiscal="601"/><cfdi:Receptor Rfc="IST190806QJ7" Nombre="INDRA SISTEMAS TRANSPORTE Y DEFENSA" DomicilioFiscalReceptor="11520" RegimenFiscalReceptor="601" UsoCFDI="G03"/><cfdi:Conceptos><cfdi:Concepto ClaveProdServ="90101503" NoIdentificacion="0385101372231252" ObjetoImp="02" Cantidad="1" ClaveUnidad="XPK" Unidad="Paquete" Descripcion="PQT. DE ALIMENTOS (CONSUMO: 2022-11-08) FOLIO(0385101372231252)" ValorUnitario="145.69" Importe="145.69"><cfdi:Impuestos><cfdi:Traslados><cfdi:Traslado Base="145.69" Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.160000" Importe="23.31"/></cfdi:Traslados></cfdi:Impuestos></cfdi:Concepto></cfdi:Conceptos><cfdi:Impuestos TotalImpuestosTrasladados="23.31"><cfdi:Traslados><cfdi:Traslado Base="145.69" Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.160000" Importe="23.31"/></cfdi:Traslados></cfdi:Impuestos><cfdi:Complemento><tfd:TimbreFiscalDigital xmlns:tfd="http://www.sat.gob.mx/TimbreFiscalDigital" FechaTimbrado="2022-11-09T19:05:56" UUID="67B2DDD8-ABCF-4CD1-B435-C228742542B6" NoCertificadoSAT="00001000000503270882" SelloCFD="kWjohv/nlmGBVrIUBxeULiiF2HiUGxAsDC4FTirGnF8GMD7tTVDwpzDOVcyJJupQYJKj/xRPIz46i1RjZYX2jIskXxJwb5QkWfSUC6rO3TdHr4nqJQnLCD2cdp66u2/v+8uYJv+as7uXvuGv1JwQ67Mg037b0IPTjHPKaZvRwIBQCrLukLB4bOX8yuBGWWqrAqJPR/eS/wRt3QedyBhUIbUsebRgtirOQ0ywarSPUJ9Dll0KmaWq3rrHN+jkoAUZSgy+mJoR2WldeIbuiHXml/QXezl4o34ICK32gYyzvzrpLTslxPYTKcoKzDvGo2jK5/T7NctbNrrH29i515lugg==" SelloSAT="LBOVbhfMGU8T2Tsrz6fFTLkCz90Z0sZIkJqLquayWD5GIhdw6UDvp2Lo5r40jjGC1WwvHMsimi6Ho5xMH70nHH9gkeUIRK3BdsPcUjwFSnYzL1TwG70ZGf7hFBh8uflI1jKzLPRFvWhfHyw1Wznof9NtlXCvSRYhmlcxM6/kj/gOOG0hrq+DJaEsTNJgD7XQzUlMJ9/Casc2kgvOYAdwpXdmkNEtEe9oqQiti4VbPXxEUKpE66hik/Rg4txFMCTPlAMpiz3XfDig/gp6lrFnb/TYkSFr3E9/oPJxoig4xTwPuCZ9uOxfExpxtI3ASXpCoh4isWqqlgxc7abxIoA5Tw==" Version="1.1" RfcProvCertif="TLE011122SC2" xsi:schemaLocation="http://www.sat.gob.mx/TimbreFiscalDigital http://www.sat.gob.mx/sitio_internet/cfd/TimbreFiscalDigital/TimbreFiscalDigitalv11.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"/></cfdi:Complemento><cfdi:Addenda><Referencia xmlns="https://facturacion.prb.com.mx/XSD/"
                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                  xsi:schemaLocation="https://facturacion.prb.com.mx/XSD/ https://facturacion.prb.com.mx/XSD/prb_addenda.xsd"
                  ticket="0385101372231252"/></cfdi:Addenda></cfdi:Comprobante>

下面是我编写的代码:

while True:

    import xml.dom.minidom
    import csv

    from  tkinter import *
    from tkinter import filedialog

    root = Tk()

    root.filename =  filedialog.askopenfilename(title = "Select file", filetypes =[('XML Files', '*.xml')])
    print (root.filename)

    root.mainloop()
    print("------------------------------------------------------------------------------")
    def main():
        # use the parse() function to load and parse an XML file
        doc = xml.dom.minidom.parse(root.filename);

        # print out the document node and the name of the first child tag
        print (doc.nodeName)
        print (doc.firstChild.tagName)
        # get a list of XML tags from the document and print each one

        cfd = doc.getElementsByTagName("cfdi:Comprobante")
        print ("%d Monto:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("Total"))
      
        cfd = doc.getElementsByTagName("cfdi:Comprobante")
        print ("%d Fecha:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("Fecha"))
        
        cfd = doc.getElementsByTagName("cfdi:Concepto")
        print ("%d Descripción:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("Descripcion"))

        cfd = doc.getElementsByTagName("cfdi:Emisor")
        print ("%d RFC_Emisor:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("Rfc"))
        
        cfd = doc.getElementsByTagName("cfdi:Emisor")
        print ("%d Emisor:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("Nombre"))

        cfd = doc.getElementsByTagName("tfd:TimbreFiscalDigital")
        print ("%d UUID:" % cfd.length)
        for skill in cfd:
            print (skill.getAttribute("UUID"))

        cfd = doc.getElementsByTagName("cfdi:Receptor")
        print ("%d RFC_Receptor:" % cfd.length)
        for skill in cfd:
                print (skill.getAttribute("Rfc"))

        cfd = doc.getElementsByTagName("cfdi:Receptor")
        print ("%d Receptor:" % cfd.length)
        for skill in cfd:
                print (skill.getAttribute("Nombre"))
    print("------------------------------------------------------------------------------")
    if __name__ == "__main__":
        main();
    try_again = int(input("Press 1 to try again, 0 to exit."))
    if try_again == 0:
        break # break out of the outer while loop

现在,我尝试更新它,将所有内容写入CSV文件,我尝试了以下代码

import xml.etree.ElementTree as Xet
import pandas as pd
import xml.dom.minidom
import csv

from tkinter import *
from tkinter import filedialog

root1 = Tk()

root1.filename =  filedialog.askopenfilename(title = "Select file", filetypes =[('XML Files', '*.xml')])
print (root1.filename)

cols = ["Monto","Fecha","Descripcion","RFC_Emisor","Emisor","UUID","RFC_Receptor","Receptor"]
rows = []

# use the parse() function to load and parse an XML file
doc = xml.dom.minidom.parse(root1.filename);
Total = doc.getElementsByTagName("cfdi:Comprobante")
for skill in Total:
    print (skill.getAttribute("Total"))
      
Date = doc.getElementsByTagName("cfdi:Comprobante")
for skill in Date:
    print (skill.getAttribute("Fecha"))
        
Desc = doc.getElementsByTagName("cfdi:Concepto")
for skill in Desc:
    print (skill.getAttribute("Descripcion"))

RFC1 = doc.getElementsByTagName("cfdi:Emisor")
for skill in RFC1:
    print (skill.getAttribute("Rfc"))
        
Name = doc.getElementsByTagName("cfdi:Emisor")
for skill in Name:
    print (skill.getAttribute("Nombre"))

UUI = doc.getElementsByTagName("tfd:TimbreFiscalDigital")
for skill in UUI:
    print (skill.getAttribute("UUID"))

RFC2 = doc.getElementsByTagName("cfdi:Receptor")
for skill in RFC2:
    print (skill.getAttribute("Rfc"))

Name2 = doc.getElementsByTagName("cfdi:Receptor")
for skill in Name2:
    print (skill.getAttribute("Nombre"))

#Parsing the XML file
xmlparse = Xet.parse(root1.filename)
root = xmlparse.getroot()
for i in root:
    Monto = Total
    Fecha = Date
    Descripcion = Desc
    RFC_Emisor = RFC1
    Emisor = Name
    UUID = UUI
    RFC_Receptor = RFC2
    Receptor = Name2

    rows.append({"Monto": Monto,
                 "Fecha": Fecha,
                 "Descripcion": Descripcion,
                 "RFC_Emisor": RFC_Emisor,
                 "Emisor": Emisor,
                 "UUID": UUID,
                 "RFC_Receptor": RFC_Receptor,
                 "Receptor": Receptor})

df= pd.DataFrame(rows, columns=cols)

df.to_csv('output.csv')

但CSV文件只写了以下内容:、监视器、Fecha、描述、RFC_发射器、发射器、UUID、RFC_接收器、接收器0、[〈DOM元素:在0x 2c 76 ad 9a 030处存在问题〉]、[〈DOM元素:在0x 2c 76 ad 9a 030处存在问题〉]、[〈DOM元素:在0x 2c 76 adbdef 0〉处的概念]、[在0x 2c 76 adbdbd 0处的发射器〉]、[〈DOM元素:在0x 2c 76 adbdbd 0处的发射器〉]、[〈DOM元素:在0x 2c 76 adbe 5d 0处的时间文件格式cfdi:0x 2c 76 adbdd 10处的受体〉],[〈DOM元素:cfdi:0x 2c 76 adbdd 10处的受体〉]
我知道我解析了XML两次,但不知道如何解析
预期的CSV应如下所示:
,Monto,Fecha,描述,RFC_发射器,发射器,UUID,RFC_接收器,接收器0,169.00,2022 -11- 09 T18:55:51,产品说明(消费品:2022年11月8日)产品资料(0385101372231252),PRB 100802 H20,高级餐厅品牌,67 B2 DDD 8-ABCF-4CD 1-B435-C228742542 B6,IST 190806 QJ 7,运输和国防工业系统

2022年11月14日编辑

我尝试使用此代码,但无法获得Total或Fecha值

import pandas as pd
import xml.dom.minidom
import xml.etree.ElementTree as Xet

from tkinter import *
from tkinter import filedialog
from lxml import etree

root1 = Tk()

root1.filename = filedialog.askopenfilename(title="Select file", filetypes=[('XML Files','*.xml')])
print (root1.filename)
cols = ["Monto","Fecha","Descripcion","RFC_Emisor","Emisor","UUID"]
rows = []
row =[]
doc = xml.dom.minidom.parse(root1.filename);

xmlparse = Xet.parse(root1.filename)
root = xmlparse.getroot()

for m in root.findall('.//*[@Total]'):
    row.extend(m.attrib.get("Total")) #,m.attrib.get("Fecha")))
for d in root.findall('.//*[@Descripcion]'):
    row.append(d.attrib.get('Descripcion'))
for rf in root.findall('.//*[@Rfc]'):
    row.extend((rf.attrib.get("Rfc"),rf.attrib.get("Nombre")))
for u in root.findall('.//*[@UUID]'):
    row.append(u.attrib.get("UUID"))
    
rows.append(row)
df= pd.DataFrame(rows,columns=cols)
df.to_csv('output.csv',mode='a', index=False, header=False)

编辑日期:2022年11月16日

我能够运行代码,但使用了以下XML:

<?xml version="1.0" encoding="utf-8"?><cfdi:Comprobante xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cfdi="http://www.sat.gob.mx/cfd/3" xsi:schemaLocation="http://www.sat.gob.mx/cfd/3 http://www.sat.gob.mx/sitio_internet/cfd/3/cfdv33.xsd" Version="3.3" Serie="EE" Folio="3963205" Fecha="2022-08-26T11:56:10" Sello="fg7F9EF8YXkWP4UAw96g97gat8D7nzJ10TjtxB/x4t1G10LS7RmRKa/jQ1dcBpJ96ck8FPBnOirF8Ya4IQ7hJgkoWRDkY4cpTI5UChiZsld7frl8x1yz21HIckqWqBtn/xQT4l0iAXda5xIRA6shOf0YErTU6NOkZNLNp4ToNg6hUbaoc4RXTNWcyc25lyXc9nMY6BkYiDNaCgLnIZ/d1jTIrwIPOyAlhAcdmVaPKxyfpMNrUhPBh4FKRy6MW8iNGXw+ZhPSYUncSiuUYA6O7B1qlHGHQSuN4q+dEv8T3C+4gM0PjInYhRt3XSOmwDfXAvjUFy4tyqXIHBHZCnpG+A==" FormaPago="99" NoCertificado="00001000000507261913" Certificado="MIIF6DCCA9CgAwIBAgIUMDAwMDEwMDAwMDA1MDcyNjE5MTMwDQYJKoZIhvcNAQELBQAwggGEMSAwHgYDVQQDDBdBVVRPUklEQUQgQ0VSVElGSUNBRE9SQTEuMCwGA1UECgwlU0VSVklDSU8gREUgQURNSU5JU1RSQUNJT04gVFJJQlVUQVJJQTEaMBgGA1UECwwRU0FULUlFUyBBdXRob3JpdHkxKjAoBgkqhkiG9w0BCQEWG2NvbnRhY3RvLnRlY25pY29Ac2F0LmdvYi5teDEmMCQGA1UECQwdQVYuIEhJREFMR08gNzcsIENPTC4gR1VFUlJFUk8xDjAMBgNVBBEMBTA2MzAwMQswCQYDVQQGEwJNWDEZMBcGA1UECAwQQ0lVREFEIERFIE1FWElDTzETMBEGA1UEBwwKQ1VBVUhURU1PQzEVMBMGA1UELRMMU0FUOTcwNzAxTk4zMVwwWgYJKoZIhvcNAQkCE01yZXNwb25zYWJsZTogQURNSU5JU1RSQUNJT04gQ0VOVFJBTCBERSBTRVJWSUNJT1MgVFJJQlVUQVJJT1MgQUwgQ09OVFJJQlVZRU5URTAeFw0yMTA0MzAxOTMwNTFaFw0yNTA0MzAxOTMwNTFaMIG2MRwwGgYDVQQDExNCQ0QgVFJBVkVMIFNBIERFIENWMRwwGgYDVQQpExNCQ0QgVFJBVkVMIFNBIERFIENWMRwwGgYDVQQKExNCQ0QgVFJBVkVMIFNBIERFIENWMSUwIwYDVQQtExxCVFI3ODAyMjM3VTIgLyBNRVNFNjkwNjEzNTQ3MR4wHAYDVQQFExUgLyBNRVNFNjkwNjEzSERGTk5OMDUxEzARBgNVBAsTCkJDRCBUUkFWRUwwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDcOxH+0iOfuj7fkxJhU3EoUo/aHFcofV2RkJ0XOOTP7H0oL99KE1AofsUbPZZvn4605puQWp2Fu/xg544Fd24fQ3WinhktnFu/bfP4X7O6hFyiL7//Kcme87/sNkFqaO6JUjkGMAASa3XROPUyYrnPszshF4sne+KZZzHg2347l2qPhN6LMMMyqIN0YdS9AsMTdUnSnZYgfgrxHN8RnWrwgmpELGJ6lZBf4mEpltzXYNYOWgm9t2xlnmMXss7MCsvQh6+ctA8iwEe64F2AesQzFNarer48RI8WHhBeUqO6APnom8tgA9K9SlBYxgR7FyRlrR8Q7NWRy122yTDiqUD3AgMBAAGjHTAbMAwGA1UdEwEB/wQCMAAwCwYDVR0PBAQDAgbAMA0GCSqGSIb3DQEBCwUAA4ICAQAJruwufHGLVpyZ6ReQ8AyrkMtxONRmLhv7C2nY2c8+O+k/emdZU0Zm2iTQViXTPo0K0i2o4scEVMZAKtTbqzJk4NDHTYHn6ESNsH1whLcBAtGn2b+GYt4TYMZVy7zP/ty5mL8rlMBmg89zi2NGQRqLl4l5DoI1KgcSk7wue2hCOj3kIi4noWZQdh8kgACthei7aPscfNQXivZ17tBDTzmdWIBcn4KACIwFvkTCeGl1gsV5i1CIdex5p011qXsmIcPMIF/gAsVZRbfEKpu2afxZcCC9ig/xmB2blpv2E8QSMa6S27w7dxH3i92OpFBRXONpFXNJRtx7r6UvUw6Shq2oqTfImGzvdfC8oOa3LIq2AdoD2XGRjuaOQLqRhWldxyiW0N7jTreMnZoeUi2TiN8yp8aJFMRf8pj5sxGsUVBUiktYp6FYk3H+hk3DPEngZje2pJog0suuUyHJJEWaSMQlyFe30lyEMvnXt/xSpMwjt0PU3I9VOhXHrLW0QeCV8AyQ/timemYdJlgke6ROygIU01xqG/SBfM8REYYgtoKFKrtBuqowSrbjlJOXZHFZzKapAipbQ4dfOWoiBIlxHwgOtmOqfBlGRSf/MfSDxIERRGybZzBTnrMBPbgXPNeqs2qFAyrIx/qiTqp85KMgwoANVHUy4rk6mrc3gxZtp3isxA==" SubTotal="1483.10" Descuento="0.00" Moneda="MXN" Total="1713.48" TipoDeComprobante="I" MetodoPago="PPD" LugarExpedicion="11560"><cfdi:Emisor Rfc="BTR7802237U2" Nombre="BCD Travel S.A. de C.V." RegimenFiscal="601"/><cfdi:Receptor Rfc="IST190806QJ7" Nombre="INDRA SISTEMAS TRANSPORTE Y DEFENSA" UsoCFDI="G03"/><cfdi:Conceptos><cfdi:Concepto ClaveProdServ="90121502" NoIdentificacion="AZOEIG-3969614-15936193-16737975" Cantidad="1" ClaveUnidad="E48" Unidad="Unidad de Servicio" Descripcion="Reservaci&#243;n Hotel ( Tasa 16% )" ValorUnitario="1439.90" Importe="1439.90" Descuento="0.00"><cfdi:Impuestos><cfdi:Traslados><cfdi:Traslado Base="1439.90" Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.160000" Importe="230.38"/></cfdi:Traslados></cfdi:Impuestos></cfdi:Concepto><cfdi:Concepto ClaveProdServ="90121502" NoIdentificacion="AZOEIG-3969614-15936193-16737975" Cantidad="1" ClaveUnidad="E48" Unidad="Unidad de Servicio" Descripcion="Otros Impuestos" ValorUnitario="43.20" Importe="43.20" Descuento="0.00"><cfdi:Impuestos><cfdi:Traslados><cfdi:Traslado Base="43.20" Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.000000" Importe="0.00"/></cfdi:Traslados></cfdi:Impuestos></cfdi:Concepto></cfdi:Conceptos><cfdi:Impuestos TotalImpuestosTrasladados="230.38"><cfdi:Traslados><cfdi:Traslado Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.160000" Importe="230.38"/><cfdi:Traslado Impuesto="002" TipoFactor="Tasa" TasaOCuota="0.000000" Importe="0.00"/></cfdi:Traslados></cfdi:Impuestos><cfdi:Complemento><tfd:TimbreFiscalDigital xmlns:tfd="http://www.sat.gob.mx/TimbreFiscalDigital" xsi:schemaLocation="http://www.sat.gob.mx/TimbreFiscalDigital http://www.sat.gob.mx/sitio_internet/cfd/TimbreFiscalDigital/TimbreFiscalDigitalv11.xsd" Version="1.1" UUID="055DC12A-C9F7-4E70-B23B-EB6CA1ABDC4A" FechaTimbrado="2022-08-26T13:10:51" RfcProvCertif="DET080304395" SelloCFD="fg7F9EF8YXkWP4UAw96g97gat8D7nzJ10TjtxB/x4t1G10LS7RmRKa/jQ1dcBpJ96ck8FPBnOirF8Ya4IQ7hJgkoWRDkY4cpTI5UChiZsld7frl8x1yz21HIckqWqBtn/xQT4l0iAXda5xIRA6shOf0YErTU6NOkZNLNp4ToNg6hUbaoc4RXTNWcyc25lyXc9nMY6BkYiDNaCgLnIZ/d1jTIrwIPOyAlhAcdmVaPKxyfpMNrUhPBh4FKRy6MW8iNGXw+ZhPSYUncSiuUYA6O7B1qlHGHQSuN4q+dEv8T3C+4gM0PjInYhRt3XSOmwDfXAvjUFy4tyqXIHBHZCnpG+A==" NoCertificadoSAT="00001000000503726537" SelloSAT="LyWQC2ExMofC25dv/qhchiKH2yVf29BuRzA1WJaPFOGq5+JF+bJL7nPpV2jE6iP1aKbtD7lyPLHRW8/P9KTR47GtGf3iuPpUWddsUA70cVTk1ol6/FJfrfuE1G2CLlUdhhf8MholjYtJNgbZ7hlfdmv0Zrj5vv3waO9FIRr0J/P6fA0uBK0qX0CxGYxNTsxPrwJ3CNkWFa94rVdM4iCfCZeXNGoqTXF+EEe2yPFJUvMR/BcYoiG8w6mKrojzKetDgg3J6bSDhW8XNGvYNt300fwUlU7arvoCo7f36UbI1lh+xGWEIDPy/IO7bccpfOm3T7xf0bfWBT1kl3o8IqxqgQ=="/></cfdi:Complemento><cfdi:Addenda><BCDTravel:AdditionalInformation xmlns:BCDTravel="https://www.bcdtravelmexico.com.mx/Addenda" xsi:schemaLocation="https://www.bcdtravelmexico.com.mx/Addenda https://www.bcdtravelmexico.com.mx/Addenda/BCDTravel.xsd"><BCDTravel:RecordInformation><BCDTravel:Reservacion ClaveReservacion="AZOEIG" NumeroOS="3969614" Pasajero="TEJEDA/EDGAR LEONARDO"/></BCDTravel:RecordInformation><BCDTravel:PaymentInformation><BCDTravel:MetodoPago Metodo="AR" Monto="1713.48"/></BCDTravel:PaymentInformation></BCDTravel:AdditionalInformation></cfdi:Addenda></cfdi:Comprobante>

我得到这个错误:
通过了8列,通过的数据有9列
我假设我必须添加一个异常,但我不知道如何操作。

juzqafwq

juzqafwq1#

看起来你的代码比必要的要复杂得多。
请尝试以下方法:

from lxml import etree

cols = ["Monto","Fecha","Descripcion","RFC_Emisor","Emisor","UUID","RFC_Receptor","Receptor"]
rows = []
row =[]
for t in root.xpath('//*[@Total]'):
    row.extend((t.attrib.get("Total"),t.attrib.get("Fecha")))
for d in doc.xpath('//*[@Descripcion]'):
    row.append(d.attrib.get('Descripcion'))
for rf in doc.xpath('//*[@Rfc]'):
    row.extend((rf.attrib.get("Rfc"),rf.attrib.get("Nombre")))
for u in doc.xpath('//*[@UUID]'):
    row.append(u.attrib.get("UUID"))
    
rows.append(row)
pd.DataFrame(rows,columns=cols)

输出(基于示例xml):

Monto   Fecha   Descripcion     RFC_Emisor  Emisor  UUID    RFC_Receptor    Receptor
0   169.00  2022-11-09T18:55:51     PQT. DE ALIMENTOS (CONSUMO: 2022-11-08) FOLIO(...   PRB100802H20    PREMIUM RESTAURANT BRANDS   IST190806QJ7    INDRA SISTEMAS TRANSPORTE Y DEFENSA     67B2DDD8-ABCF-4CD1-B435-C228742542B6

您可能需要修改它以适合您的实际xml。

相关问题