[专题] 2026-01-19 圈点610
摘要:[待测]scrapy 监控spider启动和关闭
这是一段在网络上找到的关于scrapy监控spider启动和关闭的代码段,不知道是否有效,看逻辑好像是这么回事,但看日期这段代码是N年前的事了,估计是有问题的,等待修改。
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.exceptions import DropItem
class DuplicatesPipeline(object):
def __init__(self):
self.duplicates = {}
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.duplicates[spider] = set()
def spider_closed(self, spider):
del self.duplicates[spider]
def process_item(self, item, spider):
if item[’id’] in self.duplicates[spider]:
raise DropItem("Duplicate item found: %s" % item)
else:
self.duplicates[spider].add(item[’id’])
return item