for header in response.xpath("//h2"):
role = header.xpath("./text()").get()
for sibling in header.xpath("./following-sibling::*"):
if sibling.root.tag == "h2":
break
name = sibling.xpath(".//h3/*/text()").get()
location = sibling.xpath(".//p[@class='mb-2']/text()").get()
if name and location:
yield{
"role": role.strip(),
"name": name.strip(),
"location": location.strip()
}
def parse(self, response):
for h3_node in response.xpath('//div[@class="container"]//h3'):
role = h3_node.xpath('normalize-space(./preceding::h2[1])').get()
name = h3_node.xpath('normalize-space(.)').get()
location = h3_node.xpath("normalize-space(./following-sibling::p[1])").get()
if name and location:
yield{
"role": role,
"name": name,
"location": location,
}
2条答案
按热度按时间f8rj6qna1#
您可以使用相对xpath表达式和following-sibling指令沿着使用selectors
root.tag
属性来测试相邻的角色头,这样就可以准确地确定每个人的角色。例如:
输出
pieyvz9o2#
相同的结果,但使用了另一种方法(和一个
for
循环)。我找到每个h3
元素(name
),并使用preceding
XPath表达式得到role
(上面的第一个h2
元素):