Пример #1
0
def publish_toutiao():
    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = Toutiao()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            logger.info('Pre-publish article [%s]' % title)
            if article and (not client.sismember(published_key, title)):
                publisher.publish(article)
                client.sadd(published_key, title)
            else:
                logger.error(
                    'Pre-publish article [%s] error, due to published before' %
                    title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')
Пример #2
0
def publish_toutiao(event, context):
    import json
    from publisher.toutiao.publisher import ToutiaoPublisher
    from core import logger, config, db
    from entities import Article
    import hashlib

    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = ToutiaoPublisher()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            hashed_title = hashlib.md5(title.encode('utf8')).hexdigest().upper()
            logger.info('Pre-publish article [%s] hash value [%s]' % (title, hashed_title))
            if article and (not client.sismember(published_key, hash(title))):
                publisher.publish(article)
                client.sadd(published_key, hashed_title)
            else:
                logger.error('Pre-publish article [%s] error, due to published before' % title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')
Пример #3
0
def fetch(event, context):
    # import package
    import json
    from fetcher import jianshu
    from core import config, logger, db

    # init
    cfg = jianshu.Config({
        'seminars': config.get('fetcher.jianshu.seminars'),
        'limit': config.get('fetcher.jianshu.limit'),
        'debug': config.get('app.debug')
    })
    fetchers = [jianshu.Jianshu(cfg)]
    client = db.get_redis_client(config.get('app.redis'))

    articles = []
    for f in fetchers:
        for article in f.fetch():
            articles.append(article)

    articles = sorted(articles, reverse=True)
    for article in articles:
        article.summary = None
        article_str = json.dumps(article, default=lambda obj: obj.__dict__)
        rtn = client.rpush('fetched_article', article_str)
        if rtn:
            logger.info('Push [%s] to redis successfully' % repr(article.title))
Пример #4
0
 def __init__(self):
     self._seminars = config.get('fetcher.jianshu.seminars')
     self._limit = config.get('fetcher.jianshu.limit')
     self._up_to_last_time = config.get('fetcher.jianshu.up_to_last_time')
     self._debug = config.get('app.debug')
     self._set_manager = db.get_redis_client(config.get('app.redis'))
Пример #5
0
    while True:
        try:
            t = time.localtime(time.time())
            if t.tm_hour not in time_point:
                continue
            cfg = jianshu.Config({
                'seminars':
                config.get('fetcher.jianshu.seminars'),
                'limit':
                config.get('fetcher.jianshu.limit'),
                'debug':
                config.get('app.debug')
            })
            fetchers = [jianshu.Jianshu(cfg)]
            analytizer = Analytizer()
            client = db.get_redis_client(config.get('app.redis'))

            articles = []
            for f in fetchers:
                for article in f.fetch():
                    analytizer.estimate(article)
                    articles.append(article)
                del f
            del analytizer
            articles = sorted(articles, reverse=True)
            for article in articles:
                article.summary = None
                article_str = json.dumps(article,
                                         default=lambda obj: obj.__dict__)
                rtn = client.rpush('fetched_article', article_str)
                if rtn:
Пример #6
0
 def __init__(self, cfg: Config):
     self._config = cfg
     self._set_manager = db.get_redis_client(config.get('app.redis'))