当在python中追加列表中的元素时,使用for循环只返回第一次迭代的结果

4nkexdtk  于 2023-03-20  发布在  Python
关注(0)|答案(1)|浏览(107)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import requests
import urllib
import time
import pandas
import re
driver_service = Service(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=driver_service)
driver.get("https://www.rataindia.com/vendor-listing.php?page=")

# paste url that you want to scrape

wait = WebDriverWait(driver, 10)
try:
    driver.execute_script("arguments[0].click();",wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "close-button"))))
except:
    pass
siteData = []
for nav in range(1,82,1):
    for i in range(1,2,1):
       # listing = driver.find_element(By.ID,'vendor-block')
       
        listing = driver.find_elements(By.ID, "vendor-block")
        lists = listing.find_elements(By.XPATH, "/html/body/section[2]/div/div/div/div["+ str(i) +"]/a")
        for listss in lists:
            time.sleep(2) # wait 2 seconds
            try:
                name = driver.find_element(By.XPATH,'//*[@id="vendor-block"]/div/div[1]/div/div/a/div/div[2]').text
                print(name)
            except:
                pass
            try:
                 address = driver.find_element(By.XPATH,'//*[@id="vendor-block"]/div/div[1]/div/div/a/div/div[3]').text
                 print(address)
                
                
            except:
                pass
            
            time.sleep(2)  #wait for 2 minutes

           
           
               
            
            
        
       
       #append the data 

        siteData.append({"company":name, "address":address})
        df = pandas.DataFrame(siteData)
        df.to_csv("rataindia12.csv")

        #next page click
        
    driver.find_element(By.XPATH,'//*[@id="vendor-block"]/div/div[2]/div/ul/li/a/span[contains(@class, "hidden-xs") and text()="Next"]').click()

当在python中追加列表中的元素时,使用for循环只返回第一次迭代的结果
我试图废弃excel表中的网站列表数据,它只返回第一次迭代值在每一页使用循环。请帮助我解决这个问题

wwwo4jvm

wwwo4jvm1#

你的代码有一些问题,最严重的是在每个循环中你都覆盖了nameaddress。你需要做的是在进入循环之前初始化一个列表或字典,然后在每个循环中向它添加值。更好的是,你可以删除嵌套的循环,并通过列表解析获得页面中的所有值(例如[x.text for in x driver.find_elements(...)])。

data = {key:[] for key in ['name','address']}
wait = WebDriverWait(driver, 10)

while 1:
    
    print(f'current page: {driver.find_element(By.CLASS_NAME, "current").text}', end='\r')
    
    data['name']    += [x.text                     for x in wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'v-logo')))]
    data['address'] += [x.text.replace('\n',' - ') for x in wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'v-name')))]

    next_btn = driver.find_element(By.XPATH, "//span[text()='Next']")
    next_btn_disabled = next_btn.find_element(By.XPATH, './ancestor::li').get_attribute('class') == 'disabled'
    if next_btn_disabled:
        print('\nNo more pages')
        break
    else:
        next_btn.click()

import pandas as pd
pd.DataFrame(data)

产出

相关问题