from sepa import parser
import re
import csv
import pandas as pd
import numpy as np
# Utility function to remove additional namespaces from the XML
def strip_namespace(xml):
return re.sub(' xmlns="[^"]+"', '', xml, count=1)
# Read file
with open('test.xml', 'r') as f:
input_data = f.read()
# Parse the bank statement XML to dictionary
camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
statements = pd.DataFrame.from_dict(camt_dict['statements'])
all_entries = []
for i, _ in statements.iterrows():
if 'entries' in camt_dict['statements'][i]:
df = pd.DataFrame()
dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
dg = dd['entry_details']
df['Date'] = dd['value_date'].str['date']
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%d-%m-%Y')
iban = camt_dict['statements'][i]['account']['id']['iban']
df['IBAN'] = iban
df['Currency'] = dd['amount'].str['currency']
# Sort Credit/Debit in separate Columns
df['Credit'] = np.where(dd['credit_debit_indicator'] == 'CRDT', dd['amount'].str['_value'], '')
df['Debit'] = np.where(dd['credit_debit_indicator'] == 'DBIT', dd['amount'].str['_value'], '')
# Get destination IBAN
getlength = len(dg.index) #2
for i in range(0, getlength):
result = str(dd['entry_details'][i])
print(result + "Resultat " + str(i))
search_for_iban = re.search("CH\d{2}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{1}|CH\d{19}", result)
if(search_for_iban is None):
print('the search is none')
df['Test'] = 'None'
else:
print('the search is a match')
df['Test'] = 'Yes'
all_entries.append(df)
df_entries = pd.concat(all_entries)
print(df_entries)
我的问题就出在这段代码上
for i in range(0, getlength):
result = str(dd['entry_details'][i])
search_for_iban = re.search("CH\d{2}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{1}|CH\d{19}", result)
if(search_for_iban is None):
df['Test'] = 'None'
else:
df['Test'] = search_for_iban.group()
all_entries.append(df)
我已经尝试过通过索引来解决各种问题,这在变量i中的计数也很高,并且getlength对于2个条目也是正确的
我期待什么如果在'search_for_iban'(使用正则表达式查找(re.search))中有一个IBAN号码与第2行匹配,我希望该iban仅在第2行( Dataframe )“测试”如下:
我得到了什么我得到了第1行和第2行中的两个条目,尽管在第1行中没有找到。我忽略了什么,我的头很痛!:D
what i got
我想我在正常的for循环和panda条目之间犯了一个思维错误
1条答案
按热度按时间3b6akqbq1#
您可以尝试: