对A列表中的Python进行排序

kd3sttzy  于 2023-01-12  发布在  Python
关注(0)|答案(1)|浏览(96)
data = []

while True:
    print(url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select_one('li.page-item.nb.active')
    
    for links in soup.find_all("h6", {"class": "text-primary title"}):
        sublink = links.find("a").get("href")
        new_link = "LINK" + sublink
        response2 = requests.get(new_link)
        soup2 = BeautifulSoup(response2.content, 'html.parser')
        
        # print('-------------------')
        heading = soup2.find('h1').text
        print(heading)

        table = soup2.find_all('tbody')[0]
        for i in table.find_all('td', class_='title'):
            movies = i.find('a', class_="text-primary")
            for movie in movies:
                data.append((heading,movie))
                
        df = pd.DataFrame(data=data)
        df.to_csv('list.csv', index=False, encoding='utf-8')

    next_page = soup.select_one('li.page-item.next>a')
    if next_page:
        next_url = next_page.get('href')
        url = urljoin(url, next_url)
    else:
        break

大家好!我怎样才能像这样对CSV的结果进行排序呢?我尽了最大的努力,但是作为一个初学者,我很难做到这一点。
示例

Column1 | Column2  
James | Movie1, Movie2, Movie3
Peter | Movie1, Movie2, Movie3

我现在想要的是
Column1 | Column2
James, movie 1
James, movie 2
James, movie 3

0wi1tuuw

0wi1tuuw1#

使用", ".join(movies),而不是迭代电影

while True:
    print(url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select_one('li.page-item.nb.active')
    
    for links in soup.find_all("h6", {"class": "text-primary title"}):
        sublink = links.find("a").get("href")
        new_link = "LINK" + sublink
        response2 = requests.get(new_link)
        soup2 = BeautifulSoup(response2.content, 'html.parser')
        
        # print('-------------------')
        heading = soup2.find('h1').text
        print(heading)

        table = soup2.find_all('tbody')[0]
        movies = []
        for i in table.find_all('td', class_='title'):
            movies +=  i.find('a', class_="text-primary")
        data.append((heading, ", ".join(movies)))
                
        df = pd.DataFrame(data=data)
        df.to_csv('list.csv', index=False, encoding='utf-8')

    next_page = soup.select_one('li.page-item.next>a')
    if next_page:
        next_url = next_page.get('href')
        url = urljoin(url, next_url)
    else:
        break

相关问题