我写这个蜘蛛是为了从Google Play中刮取应用程序的评论。我在这方面部分成功。我能够提取名称,日期和评论。
我的疑问:
- 如何得到所有的评论,因为我只得到41。
- 如何从
<div>
获得评级?
import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from urlparse import urljoin
class CompItem(scrapy.Item):
rating = scrapy.Field()
data = scrapy.Field()
name = scrapy.Field()
date = scrapy.Field()
class criticspider(CrawlSpider):
name = "gaana"
allowed_domains = ["play.google.com"]
start_urls = ["https://play.google.com/store/apps/details?id=com.gaana&hl=en"]
# rules = (
# Rule(
# SgmlLinkExtractor(allow=('search=jabong&page=1/+',)),
# callback="parse_start_url",
# follow=True),
# )
def parse(self, response):
sites = response.xpath('//div[@class="single-review"]')
items = []
for site in sites:
item = CompItem()
item['data'] = site.xpath('.//div[@class="review-body"]/text()').extract()
item['name'] = site.xpath('.//div/div/span[@class="author-name"]/a/text()').extract()[0]
item['date'] = site.xpath('.//span[@class="review-date"]/text()').extract()[0]
item['rating'] = site.xpath('div[@class="review-info-star-rating"]/aria-label/text()').extract()
items.append(item)
return items
2条答案
按热度按时间qco9c6ql1#
您已
它不应该是这样的:
??不知道它是否会工作,但尝试:)
vmjh9lq92#
你可以试试这个: