def run(self): try: delay = self.settings.DELAY limit = self.settings.LIMIT depth = self.settings.DEPTH if not isinstance(delay, int) and delay <= 0: spider_log.critical("settings.DELAY should be a positive integer") raise ValueError if not isinstance(limit, int) and limit <= 0: spider_log.critical("settings.LIMIT should be a positive integer") raise ValueError if not isinstance(depth, int) and depth <= 0: spider_log.critical("settings.DEPTH should be a positive integer") raise ValueError io_loop = ioloop.IOLoop.current() ioloop.PeriodicCallback(worker, delay).start() io_loop.start() except AttributeError: spider_log.critical("settings.DELAY not found") except ValueError: pass
# -*- coding: utf-8 -*- import datetime import sys import MySQLdb import settings from spider.log import spider_log try: db_setting = settings.DATABASE except AttributeError: spider_log.critical("settings.DATABASE not found!") sys.exit(-1) host = db_setting["host"] port = db_setting["port"] user = db_setting["user"] password = db_setting["password"] database_name = db_setting["database_name"] # a global variable of db connect spider_session = MySQLdb.connect(host=host, port=port, user=user, passwd=password, db=database_name, charset="utf8") # a global variable to store url hash hash_list = list() class SpiderResult(object): """A simple class to store spider result """