def spider_closed(self, spider, reason): if hasattr(spider, 'debug') and spider.debug: log.msg(utils.Y(u'disable logger'), level=log.WARNING) return if hasattr(spider, 'logger'): try: from pymongo import uri_parser, MongoClient uri = spider.logger if not uri: return log.msg('post bot stats to <{}>'.format(uri)) cnn, db, tbl = utils.connect_uri(uri) ago = self.stats.get_value('start_time', datetime.utcnow()) now = datetime.utcnow() self.stats.set_value('finish_time', now, spider=spider) self.stats.set_value('elapsed_time', (now-ago).total_seconds(), spider=spider) self.stats.set_value('finish_reason', reason, spider=spider) self.stats.set_value('bot_ip', utils.get_ipaddr('eth0')) self.stats.set_value('bot_name', self.crawler.settings.get('BOT_NAME', 'unknown')) self.stats.set_value('spider_name', spider.name) self.stats.set_value('config_path', spider.config) self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None)) tbl.insert({k.replace('.', '_'):v for k,v in self.stats.get_stats().iteritems()}) cnn.close() except Exception as ex: log.err('cannot post bot stats')
def open_spider(self, spider): if hasattr(spider, 'mongo'): try: uri = spider.mongo log.msg('connect <{}>'.format(uri)) self.cnn, self.db, self.tbl = utils.connect_uri(uri) return except Exception as ex: log.err('cannot connect to mongodb: {}'.format(ex)) self.cnn = self.db = None
def open_spider(self, spider): if hasattr(spider, 'zmq'): try: self.zmq = __import__('zmq') uri = spider.zmq log.msg('connect <{}>'.format(uri)) self.ctx, self.skt, _ = utils.connect_uri(uri) return except Exception as ex: log.err('cannot connect to zmq: {}'.format(ex)) self.ctx = self.skt = None
def open_spider(self, spider): if hasattr(spider, 'mysql'): try: uri = spider.mysql log.msg('connect <{}>'.format(uri)) self.cnn, _, self.tbl = utils.connect_uri(uri) self.cur = self.cnn.cursor() return except Exception as ex: traceback.print_exc() log.err('cannot connect to mysql: {}'.format(ex)) self.cnn = self.cur = None
def spider_closed(self, spider, reason): if hasattr(spider, 'debug') and spider.debug: log.msg(utils.Y(u'disable logger'), level=log.WARNING) return if hasattr(spider, 'logger'): try: from pymongo import uri_parser, MongoClient uri = spider.logger if not uri: return log.msg('post bot stats to <{}>'.format(uri)) cnn, db, tbl = utils.connect_uri(uri) ago = self.stats.get_value('start_time', datetime.utcnow()) now = datetime.utcnow() self.stats.set_value('finish_time', now, spider=spider) self.stats.set_value('elapsed_time', (now - ago).total_seconds(), spider=spider) self.stats.set_value('finish_reason', reason, spider=spider) self.stats.set_value('bot_ip', utils.get_ipaddr('eth0')) self.stats.set_value( 'bot_name', self.crawler.settings.get('BOT_NAME', 'unknown')) self.stats.set_value('spider_name', spider.name) self.stats.set_value('config_path', spider.config) self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None)) tbl.insert({ k.replace('.', '_'): v for k, v in self.stats.get_stats().iteritems() }) cnn.close() except Exception as ex: log.err('cannot post bot stats')