import scrapy
from scrapy.http import Request
from bs4 import BeautifulSoup
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.baroul-bucuresti.ro/tablou-definitivi']
page_number = 1
def parse(self, response):
base_url='https://www.baroul-bucuresti.ro'
soup=BeautifulSoup(response.text, 'html.parser')
tra = soup.find_all('div',class_='panel-title')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True)[1:]:
comp=base_url+link['href']
yield Request(comp, callback=self.parse_book)
def parse_book(self, response):
# header=response.xpath("//div[@class='av_bot_left left']")
# for k in header:
# title=k.xpath("//h1//text()").get()
# title=title.strip()
# dec=k.xpath("//p[@class='ral_r f16']//text()").get()
# dec=dec.strip()
d1=''
d2=''
d3=''
d4=''
d5=''
detail=response.xpath("//div[@class='av_bot_left left']//p")
for i in range(len(detail)):
if 'Decizia de intrare:' in detail[i].get():
d1=detail[i].xpath('.//text()').getall()
d1 = [i.strip() for i in d1 if i.strip()][-1]
print(d1)
elif 'Telefon:' in detail[i].get():
d2=detail[i].xpath('.//text()').getall()
d2 = [i.strip() for i in d2 if i.strip()][-1]
print(d2)
这是我的输出。我想删除电话号码中的点,并将0替换为+
。这是页面链接https://www.baroul-bucuresti.ro/avocat/15655/aanegroae-ana-maria
0752.172.817
我希望输出如下所示:
+752 172 817
1条答案
按热度按时间iugsix8n1#
可以使用
replace()
函数来解决将点替换为空格的问题,还需要对字符串的第一个字符进行切片以删除第一个零:请注意您不能使用
replace()
来删除第一个零,因为电话号码可能包含零并被替换为+
字符输出量: