data = []
while True:
print(url)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.select_one('li.page-item.nb.active')
for links in soup.find_all("h6", {"class": "text-primary title"}):
sublink = links.find("a").get("href")
new_link = "LINK" + sublink
response2 = requests.get(new_link)
soup2 = BeautifulSoup(response2.content, 'html.parser')
# print('-------------------')
heading = soup2.find('h1').text
print(heading)
table = soup2.find_all('tbody')[0]
for i in table.find_all('td', class_='title'):
movies = i.find('a', class_="text-primary")
for movie in movies:
data.append((heading,movie))
df = pd.DataFrame(data=data)
df.to_csv('list.csv', index=False, encoding='utf-8')
next_page = soup.select_one('li.page-item.next>a')
if next_page:
next_url = next_page.get('href')
url = urljoin(url, next_url)
else:
break
大家好!我怎样才能像这样对CSV的结果进行排序呢?我尽了最大的努力,但是作为一个初学者,我很难做到这一点。
示例
Column1 | Column2
James | Movie1, Movie2, Movie3
Peter | Movie1, Movie2, Movie3
我现在想要的是Column1 | Column2
James, movie 1
James, movie 2
James, movie 3
1条答案
按热度按时间0wi1tuuw1#
使用
", ".join(movies)
,而不是迭代电影