使用Selify在PYTHON中进行Web抓取,Selenium.common.exceptions.NoSuchElementException

bhmjp9jg  于 2022-11-10  发布在  Python
关注(0)|答案(1)|浏览(184)

What I am trying to scrape我已尝试更改睡眠时间和检查之间的时间。它在第一次迭代时返回,然后在While循环中失败。为什么在第一次调用GET_FIRST_LISTING函数时,By.css选择器会正确执行,然后第二次失败?

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import os

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])

os.environ['WDM_LOG_LEVEL'] = '0'
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=chrome_options)

# driver = webdriver.Chrome(s=path, options=chrome_options) # if you have problems with line 15

# Setting

classified_link = 'https://classifieds.ksl.com/search/Furniture'
time_to_wait_between_checking = 15

def get_first_listing():
    driver.get(classified_link)
    time.sleep(15)
    link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) > '
                                                'section:nth-child(4) > div.listing-item-info > h2 >'
                                                ' div > a').get_attribute('href')
    title = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
                                                 ' section:nth-child(4) > div.listing-item-info > h2 > div > a').text
    return (link, title)

listing_info = get_first_listing()
first_listing_link_temp = listing_info[0]
listing_title = listing_info[1]

print(f"First Listing Title: {listing_title}, Link: {first_listing_link_temp}")

check_count = 0
active = True
while active:
    check_count += 1
    time.sleep(time_to_wait_between_checking)
    print(f"Checking to see if new listing, this is attempt number {check_count}")
    new_listing_info = get_first_listing()
    first_listing_link = new_listing_info[0]
    title = new_listing_info[1]
    if first_listing_link_temp != first_listing_link:
        print(f"There is a new ad. Title {title}, Link: {first_listing_link}")
        active = False
        break

输出为:

Traceback (most recent call last):
  File "C:PATH.py", line 46, in <module>
    new_listing_info = get_first_listing()
  File "C:PATH.py", line 26, in get_first_listing
    link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 856, in find_element
    return self.execute(Command.FIND_ELEMENT, {
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 429, in execute
    self.error_handler.check_response(response)
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 243, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"#search-results > div > section > div > div:nth-child(1) > section:nth-child(4) > div.listing-item-info > h2 > div > a"}
  (Session info: headless chrome=106.0.5249.119)
Stacktrace:
Backtrace:
...

Process finished with exit code 1
bxpogfeg

bxpogfeg1#

这里有几个问题使您的代码开始工作:
1.您必须关闭Cookies横幅。
1.需要引入WebDriverWait以等待元素可见、可点击等。
1.定位器需要改进。
下面的代码应该更好:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import os

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])

os.environ['WDM_LOG_LEVEL'] = '0'
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=chrome_options)

# driver = webdriver.Chrome(s=path, options=chrome_options) # if you have problems with line 15

wait = WebDriverWait(driver, 20)

# Setting

classified_link = 'https://classifieds.ksl.com/search/Furniture'
time_to_wait_between_checking = 15

def get_first_listing():
    driver.get(classified_link)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#onetrust-close-btn-container button.onetrust-close-btn-handler"))).click() #close the cookies banner
    title_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".item-info-title-link a")))
    link = title_element.get_attribute('href')
    title = title_element.text
    return (link, title)

listing_info = get_first_listing()
first_listing_link_temp = listing_info[0]
listing_title = listing_info[1]

print(f"First Listing Title: {listing_title}, Link: {first_listing_link_temp}")

check_count = 0
active = True
while active:
    check_count += 1
    time.sleep(time_to_wait_between_checking)
    print(f"Checking to see if new listing, this is attempt number {check_count}")
    new_listing_info = get_first_listing()
    first_listing_link = new_listing_info[0]
    title = new_listing_info[1]
    if first_listing_link_temp != first_listing_link:
        print(f"There is a new ad. Title {title}, Link: {first_listing_link}")
        active = False
        break

这里我只修复了get_first_listing()方法,而不是继续使用while循环

相关问题