예제 #1
0
def terminate(*msg):
    if msg:
        logger.error(msg[0])
        status.update({
            'status': 'error',
            'message': str(msg[0])
        })
    else:
        status.update({
            'status': 'exit',
            'message': 'Exit gracefully',
        })

    logger.error('Terminate gracefully...')

    acct = Account.get_using()
    if 'username' in acct:
        rh.push_acct(acct)
        logger.error('return account %s', acct['username'])

    if task:
        rh.lpush_task_user(task)
        logger.error('return task %s', task)

    status.update({
        'finished': finished,
        'task': task,
        'account': acct,
        'update_time': int(time.time())
    })
    rh.publish_status(status)

    logger.error('Exit')
    if msg:
        sys.exit(msg[0])
    else:
        sys.exit(0)
예제 #2
0
def start(instance_id):
    global task
    global status
    task = ''

    try:
        logger.warning('Instance id: %s', instance_id)

        hostname = socket.gethostname()
        ip = socket.gethostbyname(hostname)
        start_time = int(time.time())

        session = SessionHelper()

        status = {
            'id': hostname + '-' + str(instance_id),
            'hostname': hostname,
            'ip': ip,
            'finished': finished,
            'task': '',
            'status': 'init',
            'message': '',
            'account': Account.get_using(),
            'start_time': start_time,
            'update_time': int(time.time())
        }

        rh.publish_status(status)

        while True:
            task = ''
            task = rh.get_task_user()
            logger.warning('Get task: ' + task)

            if rh.is_user_crawled(task):
                logger.warning("User %s crawled, skip", task)
                continue

            status.update({
                'finished': finished,
                'task': task,
                'status': 'crawling',
                'account': Account.get_using(),
                'update_time': int(time.time())
            })
            rh.publish_status(status)

            try:
                fc = FollowingsCrawler(session, task)
                user = fc.get()
                logger.warning('Push result: ' + task)
                rh.push_result_user(user)

                finished['user'] += 1
                finished['followings'] += len(user['followings'])

                time.sleep(random.uniform(1, 5))
            except NotFoundException:
                message = "User %s not found, continue" % task
                logger.error(message)
                continue
    except ResponseException:
        message = 'Crawling response error, push back task, quit'
        terminate(message)
    except RedisException:
        logger.error('Redis connection error, quit')
        sys.exit('Redis Error!')
    except NetworkException:
        message = 'Network connection error, quit'
        terminate(message)
    except Exception as e:
        print(traceback.format_exc())
        terminate(e)