What I am trying to scrape我已尝试更改睡眠时间和检查之间的时间。它在第一次迭代时返回,然后在While循环中失败。为什么在第一次调用GET_FIRST_LISTING函数时,By.css选择器会正确执行,然后第二次失败?
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import os
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
os.environ['WDM_LOG_LEVEL'] = '0'
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=chrome_options)
# driver = webdriver.Chrome(s=path, options=chrome_options) # if you have problems with line 15
# Setting
classified_link = 'https://classifieds.ksl.com/search/Furniture'
time_to_wait_between_checking = 15
def get_first_listing():
driver.get(classified_link)
time.sleep(15)
link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) > '
'section:nth-child(4) > div.listing-item-info > h2 >'
' div > a').get_attribute('href')
title = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
' section:nth-child(4) > div.listing-item-info > h2 > div > a').text
return (link, title)
listing_info = get_first_listing()
first_listing_link_temp = listing_info[0]
listing_title = listing_info[1]
print(f"First Listing Title: {listing_title}, Link: {first_listing_link_temp}")
check_count = 0
active = True
while active:
check_count += 1
time.sleep(time_to_wait_between_checking)
print(f"Checking to see if new listing, this is attempt number {check_count}")
new_listing_info = get_first_listing()
first_listing_link = new_listing_info[0]
title = new_listing_info[1]
if first_listing_link_temp != first_listing_link:
print(f"There is a new ad. Title {title}, Link: {first_listing_link}")
active = False
break
输出为:
Traceback (most recent call last):
File "C:PATH.py", line 46, in <module>
new_listing_info = get_first_listing()
File "C:PATH.py", line 26, in get_first_listing
link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 856, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 429, in execute
self.error_handler.check_response(response)
File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 243, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"#search-results > div > section > div > div:nth-child(1) > section:nth-child(4) > div.listing-item-info > h2 > div > a"}
(Session info: headless chrome=106.0.5249.119)
Stacktrace:
Backtrace:
...
Process finished with exit code 1
1条答案
按热度按时间bxpogfeg1#
这里有几个问题使您的代码开始工作:
1.您必须关闭Cookies横幅。
1.需要引入
WebDriverWait
以等待元素可见、可点击等。1.定位器需要改进。
下面的代码应该更好:
这里我只修复了
get_first_listing()
方法,而不是继续使用while
循环