from bs4 import BeautifulSoup
import pandas as pd
from datetime import date, timedelta
import requests
import os
start_date = date(2018,8,2)
end_date = date(2018,8,8)
data = []
while True:
start_date.strftime("%m%d%y")
url = f'https://littleriveroutfitters.com/WEBSITE-2008/pages/fishing/{start_date.strftime("%m%d%y")}.htm'
try:
soup = BeautifulSoup(requests.get(url).text)
d = soup.select('table table tr:has(img[alt*="Fishing Gauge"]) td:has(p)')
d = dict(zip(d[0].stripped_strings, d[1].stripped_strings))
d.update({'date': start_date})
data.append(d)
except:
print(start_date, url)
if start_date == end_date:
break
else:
start_date = start_date + timedelta(days=1)
df_1 = pd.DataFrame(data)
save_path = "C:/Users/Bay/Documents/Python_Files/fish_info_scrape/"
#df_1.to_csv(save_path + 'report_output.csv', mode = 'w')
df_1
字符串
上述代码适用于八月2日,2018,但任何日期后,代码将无法工作.我试图修复行d = soup.select('table table tr:has(img[alt*="Fishing Gauge"]) td:has(p)')
,但没有运气.任何帮助将不胜感激.
我已经尝试了d = soup.select('blockquote p:has(img[alt*="Fishing Gauge"])')
没有运气。我期待收到一个输出与8月3日-8月8日的日期完成像8月2日。
Water Temperature Little River Stream Flow Sunrise Sunset Rainfall 2018 YTD Knoxville Apt Rainfall Normal YTD Knoxville Apt date
0 64.4 Fahrenheit 2.93 Feet 678 CFS 6:44 8:39 33.62" 30.45" 2018-08-02
1 NaN NaN NaN NaN NaN NaN 2018-08-03
2 NaN NaN NaN NaN NaN NaN 2018-08-04
3 NaN NaN NaN NaN NaN NaN 2018-08-05
4 NaN NaN NaN NaN NaN NaN 2018-08-06
5 NaN NaN NaN NaN NaN NaN 2018-08-07
6 NaN NaN NaN NaN NaN NaN 2018-08-08
型
1条答案
按热度按时间p4rjhz4m1#
不幸的是,网站在02.08和03.08之间有一个很小的变化。从那时起,你的脚本就不能工作了,因为他再也找不到同一个地方的数据了。
您可以尝试以下代码以更稳定的方式接收数据:
字符串
这将取代您的
型