book_title, book_author, book_release, book_language = [],[],[],[]
# These can be reused for each book
title = re.compile(r'Title: (.*)\n')
author = re.compile(r'Author: (.*)\n')
release_date = re.compile(r'Release Date: (.*)\s')
language = re.compile(r'Language: (.*)\n')
for n in files:
with open(n, 'r') as text_file:
text = text_file.read()
book_title.append(title.search(text).group(1))
book_author.append(author.search(text).group(1))
book_language.append(language.search(text).group(1))
book_release.append(release_date.search(text).group(1).split(' [')[0])
books = pd.DataFrame({"Title": book_title, "Author": book_author,
"Release_Date": book_release, "Language": book_language})
注:
若要处理帐簿中缺少数据的问题,可以使用以下类型的技术:
if author.search(text) is not None:
book_author.append(author.search(text).group(1))
else:
book_author.append('-')
1条答案
按热度按时间wj8zmpe11#
类似这样的东西可能会起作用,但是,必须指出的是,如果在ANY的书籍中找不到ANY的书籍详细信息,该程序将生成错误。
代码:
注:
若要处理帐簿中缺少数据的问题,可以使用以下类型的技术: