def spider_closed(self, spider, reason): if self.enabled and hasattr(spider, 'logger'): try: uri = spider.logger if not uri: return log.msg('post bot stats to <{}>'.format(uri)) cnn, db, tbl = utils.connect_uri(uri) ago = self.stats.get_value('start_time', datetime.utcnow()) now = datetime.utcnow() self.stats.set_value('finish_time', now, spider=spider) self.stats.set_value('elapsed_time', (now - ago).total_seconds(), spider=spider) self.stats.set_value('finish_reason', reason, spider=spider) self.stats.set_value('bot_ip', utils.get_ipaddr()) self.stats.set_value( 'bot_name', self.crawler.settings.get('BOT_NAME', 'unknown')) self.stats.set_value('spider_name', spider.name) self.stats.set_value('config_path', spider.config) self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None)) tbl.insert({ k.replace('.', '_'): v for k, v in self.stats.get_stats().iteritems() }) cnn.close() except Exception as ex: log.err('cannot post bot stats')
def spider_closed(self, spider, reason): if self.enabled and hasattr(spider, 'logger'): try: uri = spider.logger if not uri: return log.msg('post bot stats to <{}>'.format(uri)) cnn, db, tbl = utils.connect_uri(uri) ago = self.stats.get_value('start_time', datetime.utcnow()) now = datetime.utcnow() self.stats.set_value('finish_time', now, spider=spider) self.stats.set_value('elapsed_time', (now-ago).total_seconds(), spider=spider) self.stats.set_value('finish_reason', reason, spider=spider) self.stats.set_value('bot_ip', utils.get_ipaddr()) self.stats.set_value('bot_name', self.crawler.settings.get('BOT_NAME', 'unknown')) self.stats.set_value('spider_name', spider.name) self.stats.set_value('config_path', spider.config) self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None)) tbl.insert({k.replace('.', '_'):v for k,v in self.stats.get_stats().iteritems()}) cnn.close() except Exception as ex: log.err('cannot post bot stats')
def open_spider(self, spider): if hasattr(spider, 'mongo'): try: self.upsert_keys = self.get_upsert_keys() uri = spider.mongo log.msg('connect <{}>'.format(uri)) self.cnn, self.db, self.tbl = utils.connect_uri(uri) return except Exception as ex: log.err('cannot connect to mongodb: {}'.format(ex)) self.cnn = self.db = None
def open_spider(self, spider): if hasattr(spider, 'mysql'): try: uri = spider.mysql log.msg('connect <{}>'.format(uri)) self.cnn, _, self.tbl = utils.connect_uri(uri) self.cur = self.cnn.cursor() return except Exception as ex: traceback.print_exc() log.err('cannot connect to mysql: {}'.format(ex)) self.cnn = self.cur = None