python 如何使用Selenium向下滚动zillow网页的右侧部分

webghufk  于 2022-12-28  发布在  Python
关注(0)|答案(3)|浏览(192)

我试图向下滚动到网页右侧下面website的最底部。
我用下面的代码试过了--但不幸的是,它不能在网站的右侧向下滚动

import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import os, sys
import xlwings as xw
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from fake_useragent import UserAgent

if __name__ == '__main__':
  print(f"Checking chromedriver...")
  os.environ['WDM_LOG_LEVEL'] = '0' 
  ua = UserAgent()
  userAgent = ua.random
  options = Options()
  # options.add_argument('--headless')
  options.add_argument("start-maximized")
  options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 1})    
  options.add_experimental_option("excludeSwitches", ["enable-automation"])
  options.add_experimental_option('excludeSwitches', ['enable-logging'])
  options.add_experimental_option('useAutomationExtension', False)
  options.add_argument('--disable-blink-features=AutomationControlled')
  options.add_argument(f'user-agent={userAgent}') 
  srv=Service(ChromeDriverManager().install())
  driver = webdriver.Chrome (service=srv, options=options)    
  waitWebDriver = WebDriverWait (driver, 10)         
  
  link = f"https://www.zillow.com/clayton-county-ga/houses/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22Clayton%20County%2C%20GA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-84.83476148874033%2C%22east%22%3A-84.0313862445997%2C%22south%22%3A33.22700148452994%2C%22north%22%3A33.70472214817801%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A1622%2C%22regionType%22%3A4%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22beds%22%3A%7B%22min%22%3A3%7D%2C%22baths%22%3A%7B%22min%22%3A2%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%2C%22mf%22%3A%7B%22value%22%3Afalse%7D%2C%22con%22%3A%7B%22value%22%3Afalse%7D%2C%22land%22%3A%7B%22value%22%3Afalse%7D%2C%22apa%22%3A%7B%22value%22%3Afalse%7D%2C%22manu%22%3A%7B%22value%22%3Afalse%7D%2C%22apco%22%3A%7B%22value%22%3Afalse%7D%2C%22sqft%22%3A%7B%22min%22%3A1000%2C%22max%22%3A3000%7D%2C%22lot%22%3A%7B%22max%22%3A43560%7D%2C%22built%22%3A%7B%22min%22%3A1965%7D%2C%22gar%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A11%7D" 

  driver.get (link)       
  time.sleep(WAIT) 

  element = driver.find_element(By.XPATH,"//div[@id='px-captcha']")
  action = ActionChains(driver)
  click = ActionChains(driver)
  action.click_and_hold(element)
  action.perform()
  time.sleep(10)
  action.release(element)
  action.perform()
  time.sleep(0.2)
  action.release(element)     
  time.sleep(WAIT)      

  driver.find_element(By.XPATH,"//h1").click()  
  time.sleep(WAIT)      

  driver.execute_script("window.scrollTo(0, 10000)")               
  time.sleep(5000)

我怎样才能向下滚动到页面右侧的最底部?
在网站的以下部分,我想向下滚动到最底部:

kxxlusnw

kxxlusnw1#

要向下滚动到网页右侧zillow网站的最底部,您可以scrollIntoViewpagination***元素(一旦该元素***可见),从而为visibility_of_element_located()诱导WebDriverWait,并且您可以使用以下locator strategies之一:

  • 代码块:
driver.get("https://www.zillow.com/clayton-county-ga/houses/3-_beds/2.0-_baths/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22Clayton%20County%2C%20GA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-85.19662367135751%2C%22east%22%3A-83.66952406198251%2C%22south%22%3A33.16207210856734%2C%22north%22%3A33.76924644337602%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A1622%2C%22regionType%22%3A4%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22beds%22%3A%7B%22min%22%3A3%7D%2C%22baths%22%3A%7B%22min%22%3A2%7D%2C%22sqft%22%3A%7B%22min%22%3A1000%2C%22max%22%3A3000%7D%2C%22built%22%3A%7B%22min%22%3A1965%7D%2C%22con%22%3A%7B%22value%22%3Afalse%7D%2C%22apa%22%3A%7B%22value%22%3Afalse%7D%2C%22mf%22%3A%7B%22value%22%3Afalse%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22lot%22%3A%7B%22max%22%3A43560%7D%2C%22land%22%3A%7B%22value%22%3Afalse%7D%2C%22gar%22%3A%7B%22value%22%3Atrue%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%2C%22manu%22%3A%7B%22value%22%3Afalse%7D%2C%22apco%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A9%7D")
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='search-pagination']"))))
  • 浏览器快照:

kb5ga3dv

kb5ga3dv2#

互联网世界里最聪明的网站之一是zillow,它也提供API.如果你真的喜欢并且想从内心学习网页抓取为什么要硬编码,为什么不从api中提取数据呢?下面是一个例子,如何从api中抓取数据作为json响应,只使用requests模块。

    • 脚本:**
import requests

headers = {
    "User-Agent": "Mozilla/5.0 ",
    "Accept": "*/*",
    "Accept-Language": "en-US,en;q=0.5",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache"
}

with requests.Session() as s:
    s.headers.update(headers)
    s.head('https://www.zillow.com/')
    for page in range(1,3):
        params = {
                "searchQueryState": '{"pagination":{"currentPage":2},"usersSearchTerm":"Clayton County, GA","mapBounds":{"west":-84.85604749948251,"east":-84.01010023385751,"south":33.18506146243053,"north":33.746411533949434},"regionSelection":[{"regionId":1622,"regionType":4}],"isMapVisible":false,"filterState":{"beds":{"min":3},"baths":{"min":2},"sqft":{"min":1000,"max":3000},"built":{"min":1965},"isCondo":{"value":false},"isApartment":{"value":false},"isMultiFamily":{"value":false},"isAllHomes":{"value":true},"sortSelection":{"value":"globalrelevanceex"},"lotSize":{"max":43560},"isLotLand":{"value":false},"hasGarage":{"value":true},"isTownhouse":{"value":false},"isManufactured":{"value":false},"isApartmentOrCondo":{"value":false}},"isListVisible":true}',
                "wants": '{"cat1":["listResults"],"cat2":["total"]}'
                }
   

        r = s.get('https://www.zillow.com/search/GetSearchPageState.htm',params=params).json()
        #print(r)
        for card in r['cat1']['searchResults']['listResults']:
            price=card['price']
            print(price)
    • 输出:**
$235,000
$299,000
$259,000
$290,000
$255,000
$300,000
$295,000
$250,000
$250,000
$259,900
$290,000
$315,000
$249,000
$289,900

...等等

eeq64g8w

eeq64g8w3#

一个简单的选择是使用Keys.End函数。在尝试了建议的答案后,我根本无法让它工作。也许它现在不工作,如果Zillow改变了他们的网站。我发现这段代码,它工作得很好,更容易理解。
这首先点击结果上的按钮,让你进入网站的那个部分。然后点击结束键。工作正常。我必须有一个等待计时器的3秒,一旦页面加载,给予它有足够的时间加载。

search_results = driver.find_element(by=By.CLASS_NAME, value="left-option")
search_results.click()
search_results.send_keys(Keys.END)

相关问题