from scrapy.exceptions import DropItem
class DropIfEmptyFieldPipeline(object):
def process_item(self, item, spider):
# to test if only "job_id" is empty,
# change to:
# if not(item["job_id"]):
if not(all(item.values())):
raise DropItem()
else:
return item
from scrapy.exceptions import DropItem
import re
class DropIfEmptyFieldPipeline(object):
# case-insensitive search for string "nurse"
REGEX_NURSE = re.compile(r'nurse', re.IGNORECASE)
def process_item(self, item, spider):
# user .search() and not .match() to test for substring match
if not(self.REGEX_NURSE.search(item["job_id"])):
raise DropItem()
else:
return item
1条答案
按热度按时间41ik7eoe1#
您可以按照[the scrapy docs]中的说明编写和配置一个Item Pipeline,并在其值上进行测试。
在
pipeline.py
文件中添加以下内容:并在您的
settings.py
中设置此(适应您的项目名称)在OP关于“护士”测试的评论后编辑