使用Selenium从URL下载多个文件并对其重命名

t5zmwmid  于 2023-02-08  发布在  其他
关注(0)|答案(1)|浏览(232)

下面的代码尝试从通过将“tickers”列表插入到URL中而修改的链接下载数据
我试着把这个放在URL的for循环中,但是它产生了一个错误,因为它试图遍历整个股票代码列表。

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os
import time
import pandas as pd
import datetime
from datetime import datetime

start = '10/26/2020'
end = '1/22/2023'
tickers = ["ap","dmc","creit","chib","fli","fb","dmc","fph","gma7","ltg",
           "mbt",",mreit","nikl","pse","rcr","rlc","rrhi","scc","secb"]

urls = [(f'https://www.wsj.com/market-data/quotes/PH/{ticker}/historical-prices') for ticker in tickers]
path = "/Users/sef/Documents/Py-MSC/chromedriver_mac_arm64/chromedriver"

options = Options()
options.add_experimental_option('detatch', True)
s = Service(path)
chromeOptions = webdriver.ChromeOptions()
folder = "/Users/sef/Documents/PSE_Data Repository"
prefs = {"download.default_directory" : folder}
chromeOptions.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(service=s, options=chromeOptions)
wait = WebDriverWait(driver, 10)

for url in urls:
    driver.get(url)
    time.sleep(4)
    beg_date = driver.find_element("xpath", '//*[@id="selectDateFrom"]')
    beg_date.clear()
    beg_date.send_keys(start)
    end_date = driver.find_element("xpath", '//*[@id="selectDateTo"]')
    end_date.clear()
    end_date.send_keys(end)
    driver.find_element("xpath", '//*[@id="datPickerButton"]').click()
    driver.find_element("xpath", '//*[@id="dl_spreadsheet"]').click()
    time.sleep(5)
for ticker in tickers: 
label = ticker.upper()
    old = r'/Users/sef/Documents/PSE_Data Repository/HistoricalPrices.csv'
    new = f'/Users/sef/Documents/PSE_Data Repository/{label}.csv'
    os.rename(old, new)

我如何修改它,使它只使用股票代码列表中的第一个项目,然后移动到下一个项目的URL的下一个循环。

for ticker in tickers: 
label = ticker.upper()
    old = r'/Users/sef/Documents/PSE_Data Repository/HistoricalPrices.csv'
    new = f'/Users/sef/Documents/PSE_Data Repository/{label}.csv'
    os.rename(old, new)
wfypjpf4

wfypjpf41#

我不熟悉Selenium,但是如果你重新构造你的for循环来循环代码而不是url,你就可以达到你的结果。
我认为你的问题是,每次获取数据的循环都会创建一个名为“HistoricalPrices.csv”的文件,并且每次迭代都会覆盖它。你需要在每次下载文件后执行重命名操作,然后再执行下一次循环。要做到这一点,循环代码而不是URL。

for ticker in tickers: 
    # get data & download file
    url = f'https://www.wsj.com/market-data/quotes/PH/{ticker}/historical-prices'
    driver.get(url)
    time.sleep(4)
    beg_date = driver.find_element("xpath", '//*[@id="selectDateFrom"]')
    beg_date.clear()
    beg_date.send_keys(start)
    end_date = driver.find_element("xpath", '//*[@id="selectDateTo"]')
    end_date.clear()
    end_date.send_keys(end)
    driver.find_element("xpath", '//*[@id="datPickerButton"]').click()
    driver.find_element("xpath", '//*[@id="dl_spreadsheet"]').click()
    time.sleep(5)    
    
    # rename file
    label = ticker.upper()
    old = r'/Users/sef/Documents/PSE_Data Repository/HistoricalPrices.csv'
    new = f'/Users/sef/Documents/PSE_Data Repository/{label}.csv'
    os.rename(old, new)

希望这能解决你的问题。

相关问题