Пример #1
0
def update_train_rank(user_id):
    logger.info('[Account] update_train_rank <User #{}>'.format(user_id))
    user_info = session.query(UserInfo).filter_by(user_id=user_id).first()
    if user_info is None:
        logger.warn(
            "[Account] update_train_rank => UserInfo of User #{} does't exists"
            .format(user_id))
        return
    accounts = session.query(Account).filter_by(user_id=user_id)
    ranks = []
    for account in accounts:
        solved, submitted = account.solved, account.submitted
        oj_name = account.oj_name
        top_account = session.query(Account).filter_by(oj_name=oj_name)\
            .order_by(Account.solved.desc(), Account.submitted.desc())\
            .first()
        max_solved = max(top_account.solved, solved)
        if max_solved == 0:
            this_rank = 1000
        else:
            this_rank = (solved / max_solved) * 1000
        ranks.append(this_rank)
        logger.debug("[Account] update_train_rank <User #{}> {} => {}".format(
            user_id, oj_name, this_rank))
    end_rank = sum(ranks) / len(ranks)
    user_info.train_rank = end_rank
    user_info.save()
    logger.info("[Account] update_train_rank success <User #{}> => {}".format(
        user_id, end_rank))
Пример #2
0
def push_submit_to_queue(submit_id):
    logger.info('[redis] push submit #{} to queue'.format(submit_id))
    redis.lpush(RedisKey.achieve_mq,
                json.dumps({
                    'type': 'submit',
                    'id': submit_id
                }))
Пример #3
0
 def set_general(self, solved, submitted):
     self.solved = solved
     self.submitted = submitted
     self.save()
     yield ThreadPool.submit(update_train_rank, self.user_id)
     logger.info('{} 更新 solved: {} / submitted: {}'.format(
         self, solved, submitted))
Пример #4
0
 def init_http_client():
     try:
         httpclient.AsyncHTTPClient.configure(
             "tornado.curl_httpclient.CurlAsyncHTTPClient")
         logger.info('[ACM-Spider] 配置 CurlAsyncHTTPClient 成功')
     except Exception as ex:
         logger.error(
             '[ACM-Spider] 配置 CurlAsyncHTTPClient 失败: {}'.format(ex))
Пример #5
0
async def account_producer():
    """ 待爬取账号生产者 """
    logger.info('[AccountProducer] 开始获取可用账号放入队列 ...')
    while True:
        cur = account.get_available_account()
        if cur and is_spider_open(cur.oj_name):
            await AccountQueue.put(cur)
            logger.info('{0} ===> 账号入队列 AccountQueue(size={1})'.format(cur, AccountQueue.qsize()))
        else:
            await gen.sleep(10)
Пример #6
0
 async def fetch_cookie(self):
     if self.cookie:
         return True
     response = await self.load_page(self.index_url)
     if not response:
         return False
     self.cookie = response.headers['Set-Cookie']
     self.cookie = self.cookie.split(';')[0] + ';'
     logger.info('{} fetch cookie success {}'.format(self.TAG, self.account))
     return True
Пример #7
0
 async def fetch_cookie(self):
     if self.cookie:
         return True
     response = await self.load_page(self.index_url)
     if not response:
         return False
     self.cookie = response.headers['Set-Cookie']
     self.cookie = self.cookie.split(';')[0] + '; username={};'.format(self.account.nickname)
     logger.info('{} fetch cookie success'.format(self.TAG))
     return True
Пример #8
0
def spider_init():
    """ 实例化SpiderRunner, 放入SpiderFactory """
    logger.info('[SpiderInit] 生成 SpiderRunner 缓存 ...')
    for oj, oj_queue in SpiderFactory.items():
        spider_name = settings.SUPPORT_OJ[oj] + 'Spider'
        spider_class = getattr(sys.modules['app.spiders.' + spider_name],
                               spider_name)
        while oj_queue.qsize() < oj_queue.maxsize:
            oj_queue.put_nowait(spider_class())
        logger.info('[{0}] 缓存池初始化 OK => size {1}'.format(spider_name, oj_queue.qsize()))
Пример #9
0
async def account_producer():
    """ 待爬取账号生产者 """
    logger.info('[AccountProducer] 开始获取可用账号放入队列 ...')
    while True:
        cur = account.get_available_account()
        if cur and is_spider_open(cur.oj_name):
            await AccountQueue.put(cur)
            logger.info('{0} ===> 账号入队列 AccountQueue(size={1})'.format(
                cur, AccountQueue.qsize()))
        else:
            await gen.sleep(10)
Пример #10
0
def spider_init():
    """ 实例化SpiderRunner, 放入SpiderFactory """
    logger.info('[SpiderInit] 生成 SpiderRunner 缓存 ...')
    for oj, oj_queue in SpiderFactory.items():
        spider_name = settings.SUPPORT_OJ[oj] + 'Spider'
        spider_class = getattr(sys.modules['app.spiders.' + spider_name],
                               spider_name)
        while oj_queue.qsize() < oj_queue.maxsize:
            oj_queue.put_nowait(spider_class())
        logger.info('[{0}] 缓存池初始化 OK => size {1}'.format(
            spider_name, oj_queue.qsize()))
Пример #11
0
 async def fetch_cookie(self):
     if self.cookie:
         return True
     response = await self.load_page(self.index_url)
     if not response:
         return False
     self.cookie = response.headers['Set-Cookie']
     if self.cookie:
         self.cookie = self.cookie.split(';')[0] + ';'
         logger.info('{} {} fetch cookie success'.format(self.TAG, self.account))
         return True
     else:
         self.cookie = ''
         return False
Пример #12
0
def update_train_rank(user_id):
    user = session.query(User).filter_by(id=user_id).first()
    logger.info('[Account] update_train_rank #{}'.format(user))
    if user is None:
        logger.warn("[Account] update_train_rank => UserInfo of #{} does't exists".format(user))
        return

    ranks = []
    # normal_oj => sum(solved) / sum(top_solved)
    normal_oj = ['bnu', 'hdu', 'poj', 'vj']
    accounts = session.query(Account).filter_by(user_id=user_id)\
        .filter(Account.oj_name.in_(normal_oj))
    solved_sum = sum([account.solved for account in accounts])
    top_account = session.query(func.sum(Account.solved), Account.user_id)\
        .filter(Account.oj_name.in_(normal_oj))\
        .group_by(Account.user_id)\
        .order_by(func.sum(Account.solved).desc())\
        .first()
    if top_account:
        top_solved_sum = int(top_account[0])
        this_rank = (solved_sum / top_solved_sum) * 1000 if top_solved_sum > 0 else 1000
        ranks.append(this_rank)
    else:
        ranks.append(1000)

    # rating_oj => sum(rating / top_rating for every account)
    rating_oj = ['cf', 'bc']
    accounts = session.query(Account).filter_by(user_id=user_id) \
        .filter(Account.oj_name.in_(rating_oj))
    for account in accounts:
        rating = account.solved
        oj_name = account.oj_name
        top_account = session.query(Account).filter_by(oj_name=oj_name)\
            .order_by(Account.solved.desc(), Account.submitted.desc())\
            .first()
        if not top_account:
            this_rank = 1000
        else:
            top_rating = max(top_account.solved, rating)
            this_rank = (rating / top_rating) * 1000
        ranks.append(this_rank)

    # end_rank = sum(ranks)
    print(ranks)
    user.train_rank = sum(ranks)
    user.save()
    logger.info("[Account] update_train_rank success #{} => sum({}) => {}".format(user, ranks, sum(ranks)))
Пример #13
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'userpass': self.account.password,
         'login': '******'
     })
     response = await self.fetch(self.login_url, method=HttpMethod.POST,
                                 headers={'Cookie': self.cookie}, body=post_body)
     code = response.code
     page = response.body.decode('gb2312')
     if (code != 200 and code != 302) or page.find('Sign Out') == -1:
         return False
     logger.info('{} {} login success'.format(self.TAG, self.account))
     self.has_login = True
     return True
Пример #14
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'password': self.account.password,
         'remember': 'on'
     })
     headers = dict(Host='bestcoder.hdu.edu.cn', Cookie=self.cookie)
     response = await self.fetch(self.login_url, method=HttpMethod.POST,
                                 headers=headers, body=post_body)
     code = response.code
     page = response.body.decode('gb2312')
     if code != 200 and code != 302 or page.find('Logout') == -1:
         return False
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #15
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'user_id1': self.account.nickname,
         'password1': self.account.password,
         'B1': 'login',
         'url': '/'
     })
     headers = dict(Cookie=self.cookie)
     response = await self.fetch(self.login_url, method=HttpMethod.POST,
                                 body=post_body, headers=headers)
     code = response.code
     page = response.body.decode()
     if code != 200 and code != 302 or page.find('Log Out') == -1:
         return False
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #16
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'userpass': self.account.password,
         'login': '******'
     })
     response = await self.fetch(self.login_url,
                                 method=HttpMethod.POST,
                                 headers={'Cookie': self.cookie},
                                 body=post_body)
     code = response.code
     page = response.body.decode('gb2312')
     if (code != 200 and code != 302) or page.find('Sign Out') == -1:
         return False
     logger.info('{} {} login success'.format(self.TAG, self.account))
     self.has_login = True
     return True
Пример #17
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'user_id1': self.account.nickname,
         'password1': self.account.password,
         'B1': 'login',
         'url': '/'
     })
     headers = dict(Cookie=self.cookie)
     response = await self.fetch(self.login_url, method=HttpMethod.POST,
                                 body=post_body, headers=headers)
     code = response.code
     page = response.body.decode()
     if code != 200 and code != 302 or page.find('Log Out') == -1:
         return False
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #18
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'password': self.account.password
     })
     headers = dict(Host='vjudge.net', Origin=self.domain,
                    Referer='http://vjudge.net/index')
     response = await self.fetch(self.login_url, method=HttpMethod.POST, body=post_body,
                                 headers=headers, validate_cert=False)
     code = response.code
     res = response.body.decode()
     if code != 200 and code != 302 or res != 'success':
         return False
     self.cookie = response.headers['Set-Cookie']
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #19
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'password': self.account.password,
         'remember': 'on'
     })
     headers = dict(Host='bestcoder.hdu.edu.cn', Cookie=self.cookie)
     response = await self.fetch(self.login_url,
                                 method=HttpMethod.POST,
                                 headers=headers,
                                 body=post_body)
     code = response.code
     page = response.body.decode('gb2312')
     if code != 200 and code != 302 or page.find('Logout') == -1:
         return False
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #20
0
async def spider_runner(idx):
    """ 爬虫运行地 """
    logger.info('[SpiderRunner #{0}] 开始运行 ...'.format(idx))
    while True:
        cur_account = await AccountQueue.get()
        logger.info('[SpiderRunner #{0}] {1} <=== account_queue(size={2})'
                    .format(idx, cur_account, AccountQueue.qsize()))
        # let spider.run()
        worker = await SpiderFactory[cur_account.oj_name].get()
        worker.account = cur_account

        try:
            cur_account.set_status(account.AccountStatus.UPDATING)
            cur_account.save()
            await worker.run()
            cur_account.set_status(account.AccountStatus.NORMAL)
        except LoginException as ex:
            logger.error(ex)
            cur_account.set_status(account.AccountStatus.ACCOUNT_ERROR)
            await gen.sleep(60 * 2)
        except Exception as ex:
            logger.error(ex)
            logger.error(traceback.format_exc())
            cur_account.set_status(account.AccountStatus.UPDATE_ERROR)
            await gen.sleep(60 * 2)
        finally:
            cur_account.save()

        # work done
        logger.info('[SpiderRunner #{0}] {1} work done'.format(idx, cur_account))
        SpiderFactory[cur_account.oj_name].task_done()
        AccountQueue.task_done()
        await SpiderFactory[cur_account.oj_name].put(worker)
Пример #21
0
async def spider_runner(idx):
    """ 爬虫运行地 """
    logger.info('[SpiderRunner #{0}] 开始运行 ...'.format(idx))
    while True:
        cur_account = await AccountQueue.get()
        logger.info(
            '[SpiderRunner #{0}] {1} <=== account_queue(size={2})'.format(
                idx, cur_account, AccountQueue.qsize()))
        # let spider.run()
        worker = await SpiderFactory[cur_account.oj_name].get()
        worker.account = cur_account

        try:
            cur_account.set_status(account.AccountStatus.UPDATING)
            cur_account.save()
            await worker.run()
            cur_account.set_status(account.AccountStatus.NORMAL)
        except LoginException as ex:
            logger.error(ex)
            cur_account.set_status(account.AccountStatus.ACCOUNT_ERROR)
        except Exception as ex:
            logger.error(ex)
            logger.error(traceback.format_exc())
            cur_account.set_status(account.AccountStatus.UPDATE_ERROR)
        finally:
            cur_account.save()

        # work done
        logger.info('[SpiderRunner #{0}] {1} work done'.format(
            idx, cur_account))
        SpiderFactory[cur_account.oj_name].task_done()
        AccountQueue.task_done()
        await SpiderFactory[cur_account.oj_name].put(worker)
Пример #22
0
 async def login(self):
     if self.has_login:
         return True
     post_body = parse.urlencode({
         'username': self.account.nickname,
         'password': self.account.password
     })
     headers = dict(Host='vjudge.net',
                    Origin=self.domain,
                    Referer='http://vjudge.net/index')
     response = await self.fetch(self.login_url,
                                 method=HttpMethod.POST,
                                 body=post_body,
                                 headers=headers,
                                 validate_cert=False)
     code = response.code
     res = response.body.decode()
     if code != 200 and code != 302 or res != 'success':
         return False
     self.cookie = response.headers['Set-Cookie']
     self.has_login = True
     logger.info('{} login success {}'.format(self.TAG, self.account))
     return True
Пример #23
0
async def data_pool_consumer():
    """ 爬取的数据消费协程 """
    logger.info('[DataPoolConsumer] 数据消费协程开启 ... ')
    while True:
        while DataPool.empty():
            await gen.sleep(10)
        new_data = await DataPool.get()
        # new submit
        if new_data['type'] == DataType.Submit:
            if submit.create_submit(new_data):
                logger.info('[DataPoolConsumer] 存入新提交 for <{} {} {}>'.format(
                    new_data['account'].oj_name, new_data['run_id'], new_data['account'].nickname
                ))
        # save the code
        elif new_data['type'] == DataType.Code:
            if submit.update_code(new_data):
                logger.info('[DataPoolConsumer] 更新代码 for <{} {} {}>'.format(
                    new_data['account'].oj_name, new_data['run_id'], new_data['account'].nickname
                ))
            else:
                await DataPool.put(new_data)
        DataPool.task_done()
Пример #24
0
async def data_pool_consumer():
    """ 爬取的数据消费协程 """
    logger.info('[DataPoolConsumer] 数据消费协程开启 ... ')
    while True:
        while DataPool.empty():
            await gen.sleep(10)
        new_data = await DataPool.get()
        # new submit
        if new_data['type'] == DataType.Submit:
            if submit.create_submit(new_data):
                logger.info('[DataPoolConsumer] 存入新提交 for <{} {} {}>'.format(
                    new_data['account'].oj_name, new_data['run_id'],
                    new_data['account'].nickname))
        # save the code
        elif new_data['type'] == DataType.Code:
            if submit.update_code(new_data):
                logger.info('[DataPoolConsumer] 更新代码 for <{} {} {}>'.format(
                    new_data['account'].oj_name, new_data['run_id'],
                    new_data['account'].nickname))
            else:
                await DataPool.put(new_data)
        DataPool.task_done()
Пример #25
0
def init_all():
    logger.info("[AccountInit] 所有非 [NOT_INIT, STOP] 账号已经重置为 NORMAL")
    session.query(Account)\
        .filter(~Account.status.in_([AccountStatus.NOT_INIT, AccountStatus.STOP]))\
        .update({Account.status: AccountStatus.NORMAL}, synchronize_session=False)
    session.commit()
Пример #26
0
 def set_general(self, solved, submitted):
     self.solved = solved
     self.submitted = submitted
     self.save()
     yield ThreadPool.submit(update_train_rank, self.user_id)
     logger.info('{} 更新 solved: {} / submitted: {}'.format(self, solved, submitted))
Пример #27
0
def log_spider_status():
    logger.info('[OPEN Spider] {0}'.format(get_all_open_spider()))
Пример #28
0
def setup_redis():
    if not redis.exists(RedisKey.switch):
        ret = redis.hmset(RedisKey.switch, {oj: 1 for oj in SUPPORT_OJ})
        if ret:
            logger.info('[redis] setup switch key success')
    log_spider_status()
Пример #29
0
def init_all():
    logger.info("[AccountInit] 所有非 [NOT_INIT, STOP] 账号已经重置为 NORMAL")
    session.query(Account)\
        .filter(~Account.status.in_([AccountStatus.NOT_INIT, AccountStatus.STOP]))\
        .update({Account.status: AccountStatus.NORMAL}, synchronize_session=False)
    session.commit()
Пример #30
0
 def set_general(self, solved, submitted):
     self.solved = solved
     self.submitted = submitted
     logger.info('{} 更新 solved: {} / submitted: {}'.format(self, solved, submitted))
Пример #31
0
from tornado import ioloop

from app import make_spider_app
from app.api import make_api_app
from app.helpers.logger import setup_logger, logger
from app.helpers.redis_utils import setup_redis
from config import settings

if __name__ == '__main__':
    setup_logger(settings.log_level, settings.log_dir)
    logger.info('--------------------------------------')
    logger.info('--------------------------------------')
    logger.info('[ACM-Spider] 程序启动,初始化中 .........')

    # 加入 SpiderApp 和 ApiApp 到 io_loop
    io_loop = ioloop.IOLoop().current()
    make_spider_app(io_loop)
    api_app = make_api_app()

    # 配置 redis
    setup_redis()

    # 开始运行
    api_app.listen(settings.app_port)
    io_loop.start()
Пример #32
0
def log_spider_status():
    logger.info('[OPEN Spider] {0}'.format(
        get_all_open_spider()
    ))
Пример #33
0
def push_submit_to_queue(submit_id):
    logger.info('[redis] push submit #{} to queue'.format(submit_id))
    redis.lpush(RedisKey.achieve_mq, json.dumps({'type': 'submit', 'id': submit_id}))
Пример #34
0
def setup_redis():
    if not redis.exists(RedisKey.switch):
        ret = redis.hmset(RedisKey.switch, {oj: 1 for oj in SUPPORT_OJ})
        if ret:
            logger.info('[redis] setup switch key success')
    log_spider_status()
Пример #35
0
 def init_http_client():
     try:
         httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
         logger.info('[ACM-Spider] 配置 CurlAsyncHTTPClient 成功')
     except Exception as ex:
         logger.error('[ACM-Spider] 配置 CurlAsyncHTTPClient 失败: {}'.format(ex))