def _init_uri_routes(self): """ 初始化uri路由 """ logger.info('init uri routes start >>>', caller=self) handlers = route.make_routes(self.handler_pathes) self.handlers = handlers logger.info('init uri routes done <<<', caller=self)
async def start_crawling(self, _type='crawler'): """ trigger spiders """ started = [] if _type == 'crawler' and not await self.crawler_condition(): return for spider in self.enabled_crawlers[_type]: key = build_key(spider) running = bool(await self.check(key=key)) if running: continue # register st = await self.register_status(key) # TODO: specify settings logger.info('Started %s at %s. Key: %s.' % (st, spider, key)) self.run_crawler(spider) # IOLoop.current().run_in_executor(None, self.run_crawler, spider) started.append(key) return started
def _init_application(self): """ 初始化HTTP监听服务 """ settings = {'debug': self.debug, 'cookie_secret': self.cookie_secret} app = Application(self.handlers, **settings) app.listen(self.http_port) logger.info('listen http port at:', self.http_port, caller=self)
async def start_crawling(self): """ trigger spiders """ started = [] for spider in self.enabled_spiders: key = build_key(spider) running = bool(await self.check(key=key)) if running: continue # register st = await self.register_status(key) started.append(key) # TODO: specify settings conf = {} if not config.CONSOLE_OUTPUT: conf['LOG_FILE'] = '%s%s.log' % (key_prefix, spider.name) logger.info('Started %s at %s. Key: %s.' % (st, spider, key)) d = crawler_runner.crawl(spider) # unregister d.addBoth(self.callback_unregister_status, st=st, key=key) return started
def do_something(x, y, *args, **kwargs): hb_count = kwargs.get('heart_beat_count', 0) logger.debug('heart_beat_count:', hb_count) if hb_count % 10 == 0: logger.info('do something here every 10 seconds.') logger.info('x:', x, 'y:', y)
def start(self): """ 启动 """ logger.info('start io loop ...') # self.loop.start() # self.loop.start() IOLoop.current().start()
async def _patch_(self, *args, **kwargs): body = self.get_body() logger.info('test handler PATCH:', self.request.path, 'body:', body, caller=self) result = {'ok': 1} self.do_success(result)
async def _post_(self, *args, **kwargs): info = validators.dict_field(self.data, 'info') logger.info('test handler POST:', self.request.path, 'info:', info, caller=self) result = {'ok': 1} self.do_success(result)
async def _get_(self, *args, **kwargs): page = validators.int_field(self.query_params, 'page') logger.info('test handler GET:', self.request.path, 'page:', page, caller=self) result = {'ok': 1} self.do_success(result)
async def initRedisPool(host='localhost', port=6379, db=None, *args, **kw): """ 初始化连接池 """ url = 'redis://%s:%s' % (host, port) global REDIS_CONN_POOL REDIS_CONN_POOL = await aioredis.create_redis_pool(url, db=db, encoding='utf-8') logger.info('Create redis pool success. Got pool: %s.' % REDIS_CONN_POOL)
def add_failure(self, key): fail_times = self.cli.hget(key, 'fail_times') or 0 fail_times = int(fail_times) if fail_times > 9: self.cli.delete(key) logger.info('Remove key: %s' % key) self.cli.hincrby(key, 'fail_times', 1) logger.debug('Key: %s Fail times: %s' % (key, fail_times + 1))
def initMongodb(host='127.0.0.1:27017', username='', password='', dbname='admin'): """ 初始化mongodb连接 """ if username and password: uri = 'mongodb://{username}:{password}@{host}/{dbname}'.format(username=quote_plus(username), password=quote_plus(password), host=quote_plus(host), dbname=dbname) else: uri = "mongodb://{host}/{dbname}".format(host=host, dbname=dbname) mongo_client = motor.motor_tornado.MotorClient(uri) global MONGO_CONN MONGO_CONN = mongo_client logger.info('create mongodb connection pool.')
def start(self): """ 启动心跳, 每秒执行一次 """ self._count += 1 if self._count > 9999999: self._count = 1 if self._count % 5 == 0: logger.info('do server heartbeat, count:', self._count, caller=self) IOLoop.current().add_timeout(datetime.timedelta(seconds=self._interval), self.start) for task in self.tasks: func = task['func'] args = task['args'] kwargs = task['kwargs'] kwargs['heart_beat_count'] = self._count IOLoop.current().add_callback(func, *args, **kwargs)
def make_routes(cls, dirs): """ 注册并返回所有的handler @param dirs list,需要注册uri路由的处理方法路径 """ for dir in dirs: s = 'import %s' % dir exec(s) routes = [] for handler_dic in cls._routes: logger.info('register uri:', handler_dic['uri'], 'handler:', handler_dic.get('handler'), caller=cls) routes.append((handler_dic.get('uri'), handler_dic.get('handler'))) return routes
def callback_unregister_status(self, _, st, key, *args, **kw): """ as callback of crawling Twisted doesn't support aioredis :param _: preserved for fired deffer :param st: start time :param key: key in db :param args: preserved :param kw: preserved :return: preserved """ total_time = int(time.time()) - int(st) logger.info("One spider finished working. " "Delete key: %s. Total time: %s" % (key, total_time)) return self.blocking_cli.delete(key), total_time
def _init_db_instance(self): """ 初始化数据库对象 """ logger.info('init db instance start >>>', caller=self) if self.mysql_config: from tbag.core.db.mysql import initMySQL logger.info('mysql config:', self.mysql_config, caller=self) initMySQL(**self.mysql_config) if self.mongo_config: from tbag.core.db.mongo import initMongodb logger.info('mongodb config:', self.mongo_config, caller=self) initMongodb(**self.mongo_config) if self.redis_config: from tbag.core.db.redis import initRedisPool logger.info('redis config:', self.redis_config, caller=self) # self.loop.run_until_complete(initRedisPool(**self.redis_config)) IOLoop.current().add_callback(initRedisPool, **self.redis_config) logger.info('init db instance done <<<', caller=self)
def initMySQL(host='127.0.0.1', port=3306, username='', password='', db='mysql'): """ 初始化mysql连接池 @param host MySQL数据库ip @param port MySQL数据库端口 @param username MySQL数据库用户名 @param password MySQL数据库密码 @param db 需要连接的数据库名 """ mysql_config = { 'host': host, 'port': port, 'user': username, 'passwd': password, 'db': db, 'cursorclass': cursors.DictCursor, 'charset': 'utf8' } logger.info('mysql_config:', mysql_config) global CONN_POOL CONN_POOL = pools.Pool(mysql_config, max_idle_connections=1, max_recycle_sec=3) logger.info('create mysql connection pool.')
def _init_middlewares(self): """ 加载中间件 """ logger.info('load middleware start >>>', caller=self) middlewares = [] for m in self.middlewares: l = m.split('.') class_name = l[-1] model = '.'.join(l[:-1]) mo = __import__(model, {}, {}, ['classes']) middleware = getattr(mo, class_name) instance = middleware() if not isinstance(instance, Middleware): logger.warn( 'middleware must inherit from tbag.core.middleware.Middleware:', m, caller=self) continue middlewares.append(instance) logger.info('middleware:', middleware, caller=self) options.define('middlewares', middlewares, help='set web api middlewares') logger.info('load middleware done <<<', caller=self)
def _do_heartbeat(self): """ 服务器心跳 """ from tbag.core.heartbeat import heartbeat logger.info('Heartbeat started...') IOLoop.current().call_later(3, heartbeat.start)
async def finish(self, response): logger.info('test middleware out', caller=self)
async def _get_(self, *args, **kwargs): logger.info('test handler GET:', self.request.path, caller=self) result = {'ok': 1} self.do_success(result)
async def prepare(self, request): logger.info('test middleware in', caller=self)
async def initRedisPool(host='redis://127.0.0.1:6379', db=None): """ 初始化连接池 """ global REDIS_CONN_POOL REDIS_CONN_POOL = await aioredis.create_redis_pool(host, db=db, encoding='utf-8') logger.info('create redis pool success.')