import requests
from bs4 import BeautifulSoup
from texttable import Texttable
url = "https://realpython.github.io/fake-jobs/"
site = requests.get(url) #send a request to the site
table = Texttable() #create a table
table.set_chars(['-', '|', '+', '='])
table.header(['Titel','Company','Location'])
table.set_cols_dtype(['t','i','a'])
table.set_cols_align(["c", "c", "c"])
table.set_cols_valign(["m", "m", "m"])
table.set_cols_width([20,20,20])
table.set_deco(Texttable.BORDER|Texttable.HEADER |Texttable.HLINES| Texttable.VLINES)
with open('Shore.txt', 'w') as f: #create a file
pass
soup = BeautifulSoup(site.content, "html.parser")
results = soup.find(id="ResultsContainer")
job_elements = results.find_all("div", class_="card-content") #find all div with class "card-content"
for job_element in job_elements:
title_element = job_element.find("h2", class_="title") #get the different elements from divs with class "card-content"
company_element = job_element.find("h3", class_="company") #get the different elements from divs with class "card-content"
location_element = job_element.find("p", class_="location") #get the different elements from divs with class "card-content"
item_element = job_element.find("a", class_="card-footer-item") #get the link with divs from class "card-content"
item_site = requests.get(item_element["href"]) #send a request to the site from link
item_soup = BeautifulSoup(item_site.content, "html.parser")
results_site = item_soup.find(id="ResultsContainer")
item_element_elements = results_site.find("div", class_="content")
item_element_element = item_element_elements.find("p", class_=False) #get the text without class
print(title_element.text.strip()) #get it all data received into the console
print(company_element.text.strip()) #get all data received into the console
print(location_element.text.strip()) #get all data received into the console
print(item_element_element.text.strip()) # get all data received into the console
table.add_row([title_element.text.strip(),company_element.text.strip(),location_element.text.strip()]) #add rows in corrects rows "add_rows"
with open('Shore.txt', 'w') as f: #enter all data received into a table file
f.write(table.draw())
f.write(str(len(job_elements)))
f.close
print(len(job_elements)) #get the number of elements with the class
错误:
line 38, in <module>
item_element_elements = results_site.find("div", class_="content")
AttributeError: 'NoneType' object has no attribute 'find'
正在尝试解析div:
item_element_elements = results_site.find("div", class_="content")
item_element_element = item_element_elements.find("p", class_=False)
但是我得到了一个错误。Can't find the "find" attribute
。我能够解析所有其他元素。不知道如何修复这个错误。
1条答案
按热度按时间j8ag8udp1#
尝试更具体地选择元素-这里的问题是您选择了第一个链接,而不是指向详细信息的链接:
或
.find()
执行此操作,但可以 checkoutcss selectors
*示例