selenium 数据框中的数据被覆盖

9cbw7uwe  于 2022-12-18  发布在  其他
关注(0)|答案(2)|浏览(215)

尝试刮数据,但数据被覆盖,他们会给予只有2页的csv文件中的数据,请推荐任何解决方案,我在等待您的回应我怎么能解决这个问题?有什么办法,然后建议我,我认为由于循环他们覆盖数据谢谢。这些是页面链接https://www.askgamblers.com/online-casinos/countries/uk/

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer

options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 

for page in range(1,3):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []

    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    product=[]
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)

        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
    df=pd.DataFrame(product)
    df.to_csv('casino.csv')
sr4lhrrt

sr4lhrrt1#

1个文件中的所有结果:

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer

options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 

product=[]
for page in range(1,4):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []

    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)

        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
df=pd.DataFrame(product)
df.to_csv('casino.csv')
cgh8pdjw

cgh8pdjw2#

1.在第一个循环中,仅运行2次:将其更改为1,4,如下所示,然后它会给予你[1,2,3]:

for page in range(1,4):

1.然后数据被覆盖,因为输出文件名相同:更改文件名如下:

df.to_csv(f'casino_{page}.csv')

相关问题