示例#1
0
def _do_find(url, max_trys, printer, rss_proxy_url, rss_proxy_token):
    def message_handler(msg):
        print(msg)

    finder = FeedFinder(
        url,
        max_trys=max_trys,
        rss_proxy_url=rss_proxy_url,
        rss_proxy_token=rss_proxy_token,
        message_handler=message_handler,
    )
    with finder:
        found = finder.find()
    if found:
        response, raw_result = found
        printer('-> {}'.format(response))
        result = FeedParser().parse(raw_result)
        printer("-> {}".format(result))
        printer('-' * 79)
        printer(pretty_format_json(result.feed))
        for i, story in enumerate(result.storys):
            printer('{:03d}{}'.format(i, '-' * 76))
            story['content'] = shorten(story['content'], 60)
            story['summary'] = shorten(story['summary'], 60)
            printer(pretty_format_json(story))
示例#2
0
 def write(self, response: FeedResponse):
     content_length = 0
     if response.content:
         content_length = len(response.content)
     feed_type = response.feed_type.value if response.feed_type else None
     filename = None
     if response.content:
         file_ext = self._get_file_ext(response)
         filename = os.path.basename(self._filename) + file_ext
     meta = dict(
         filename=filename,
         url=response.url,
         status=response.status,
         content_length=content_length,
         encoding=response.encoding,
         feed_type=feed_type,
         mime_type=response.mime_type,
         use_proxy=response.use_proxy,
         etag=response.etag,
         last_modified=response.last_modified,
     )
     os.makedirs(self._output_dir, exist_ok=True)
     with open(self._meta_filepath, 'w') as f:
         f.write(pretty_format_json(meta))
     if filename:
         filepath = _normalize_path(os.path.join(self._output_dir,
                                                 filename))
         with open(filepath, 'wb') as f:
             f.write(response.content)
示例#3
0
def main(verify, filepath, verify_bias):
    if verify:
        if verify != '-':
            with open(verify) as f:
                data = json.load(f)
            result = [(x['name'], x['count']) for x in data['tables']]
        else:
            result = pg_count()
        if filepath and filepath != '-':
            with open(filepath) as f:
                content = f.read()
        else:
            content = sys.stdin.read()
        expect_data = json.loads(content)
        expect_result = [(x['name'], x['count'])
                         for x in expect_data['tables']]
        is_ok = pg_verify(result, expect_result, verify_bias)
        sys.exit(0 if is_ok else 1)
    else:
        result = pg_count()
        tables = [dict(name=name, count=count) for name, count in result]
        content = pretty_format_json(dict(tables=tables))
        if filepath and filepath != '-':
            with open(filepath, 'w') as f:
                f.write(content)
        else:
            print(content)
示例#4
0
def _do_parse(
    url: str,
    printer,
    checksum,
    save_checksum,
    proxy_url,
    rss_proxy_url,
    rss_proxy_token,
):
    if not url.startswith('http://') and not url.startswith('https://'):
        response_file = FeedResponseFile(url)
        response = response_file.read()
    else:
        reader = FeedReader(
            proxy_url=proxy_url,
            rss_proxy_url=rss_proxy_url,
            rss_proxy_token=rss_proxy_token,
        )
        with reader:
            response = reader.read(url, use_proxy=reader.has_proxy)
    print('-> {}'.format(response))
    if not response.ok:
        return
    if checksum:
        with open(checksum, 'rb') as f:
            data = f.read()
        checksum = FeedChecksum.load(data)
        print('-> {}'.format(checksum))
    else:
        checksum = None
    raw_result = RawFeedParser().parse(response)
    if raw_result.warnings:
        print('Warning: ' + '; '.join(raw_result.warnings))
    result = FeedParser(checksum=checksum).parse(raw_result)
    print("-> {}".format(result))
    printer('-' * 79)
    printer(pretty_format_json(result.feed))
    for i, story in enumerate(result.storys):
        printer('{:03d}{}'.format(i, '-' * 76))
        story['content'] = shorten(story['content'], 60)
        story['summary'] = shorten(story['summary'], 60)
        printer(pretty_format_json(story))
    if save_checksum:
        print('-> save {}'.format(save_checksum))
        data = result.checksum.dump()
        with open(save_checksum, 'wb') as f:
            f.write(data)
示例#5
0
文件: rss.py 项目: sun816/rssant
def delete_invalid_feeds(days=1, limit=100, threshold=99):
    sql = """
    SELECT feed_id, title, link, url, status_code, count FROM (
        SELECT feed_id, status_code, count(1) as count FROM rssant_api_rawfeed
        WHERE dt_created >= %s and (status_code < 200 or status_code >= 400)
        group by feed_id, status_code
        having count(1) > 3
        order by count desc
        limit %s
    ) error_feed
    join rssant_api_feed
        on error_feed.feed_id = rssant_api_feed.id
    order by feed_id, status_code, count;
    """
    sql_ok_count = """
    SELECT feed_id, count(1) as count FROM rssant_api_rawfeed
    WHERE dt_created >= %s and (status_code >= 200 and status_code < 400)
        AND feed_id=ANY(%s)
    group by feed_id
    """
    t_begin = timezone.now() - timezone.timedelta(days=days)
    error_feeds = defaultdict(dict)
    with connection.cursor() as cursor:
        cursor.execute(sql, [t_begin, limit])
        for feed_id, title, link, url, status_code, count in cursor.fetchall():
            error_feeds[feed_id].update(feed_id=feed_id,
                                        title=title,
                                        link=link,
                                        url=url)
            error = error_feeds[feed_id].setdefault('error', {})
            error_name = FeedResponseStatus.name_of(status_code)
            error[error_name] = count
            error_feeds[feed_id]['error_count'] = sum(error.values())
            error_feeds[feed_id].update(ok_count=0, error_percent=100)
        cursor.execute(sql_ok_count, [t_begin, list(error_feeds)])
        for feed_id, ok_count in cursor.fetchall():
            feed = error_feeds[feed_id]
            total = feed['error_count'] + ok_count
            error_percent = round((feed['error_count'] / total) * 100)
            feed.update(ok_count=ok_count, error_percent=error_percent)
    error_feeds = list(
        sorted(error_feeds.values(),
               key=lambda x: x['error_percent'],
               reverse=True))
    delete_feed_ids = []
    for feed in error_feeds:
        if feed['error_percent'] >= threshold:
            delete_feed_ids.append(feed['feed_id'])
            click.echo(pretty_format_json(feed))
    if delete_feed_ids:
        confirm_delete = click.confirm(f'Delete {len(delete_feed_ids)} feeds?')
        if not confirm_delete:
            click.echo('Abort!')
        else:
            UnionFeed.bulk_delete(delete_feed_ids)
            click.echo('Done!')
    return error_feeds
示例#6
0
async def do_register(ctx: ActorContext, node: NodeSpecSchema) -> T.dict(nodes=T.list(NodeSpecSchema)):
    LOG.info(f'register node:\n{pretty_format_json(node)}')
    existed = ctx.registery.get(node['name'])
    if existed and existed.to_spec() == node:
        LOG.info(f'register node {node["name"]} already existed and no changes')
    else:
        ctx.registery.add(node)
        LOG.info('current registery info:\n' + pretty_format_json(ctx.registery.to_spec()))
        await ctx.tell('scheduler.save_registery')
    return dict(nodes=ctx.registery.to_spec())
示例#7
0
def do_load_registery(ctx: ActorContext):
    registery_node = ctx.registery.registery_node.name
    LOG.info(f'load registery info for {registery_node}')
    registery = Registery.get(registery_node)
    if registery:
        ctx.registery.update(registery.node_specs)
        title = 'loaded'
    else:
        title = 'current'
    LOG.info(f'{title} registery info:\n' +
             pretty_format_json(ctx.registery.to_spec()))
    ctx.tell('scheduler.boardcast_registery')
示例#8
0
def on_startup(app):
    while True:
        try:
            r = app.ask('scheduler.register', dict(node=app.registery.current_node.to_spec()))
        except Exception as ex:
            LOG.warning(f'ask scheduler.register failed: {ex}')
            time.sleep(3)
        else:
            app.registery.update(r['nodes'])
            break
    nodes = pretty_format_json(app.registery.to_spec())
    LOG.info('current registery:\n' + nodes)
示例#9
0
def on_startup(app):
    while True:
        try:
            r = app.ask('scheduler.register', dict(node=app.registery.current_node.to_spec()))
        except Exception as ex:
            LOG.warning(f'ask scheduler.register failed: {ex}')
            time.sleep(3)
        else:
            app.registery.update(r['nodes'])
            break
    nodes = pretty_format_json(app.registery.to_spec())
    LOG.info(f'current registery:\n' + nodes)
    if app.kong_client:
        LOG.info(f'kong register {app.name} url={app.kong_actor_url}')
        while True:
            try:
                app.kong_client.register(app.name, app.kong_actor_url)
            except Exception as ex:
                LOG.warning(f'kong register failed: {ex}')
                time.sleep(3)
            else:
                break
示例#10
0
def do_update_registery(ctx, nodes: T.list(NodeSpecSchema)):
    LOG.info(f'update registery {ctx.message}')
    ctx.registery.update(nodes)
    nodes = pretty_format_json(ctx.registery.to_spec())
    LOG.info(f'current registery:\n' + nodes)
示例#11
0
def do_load_registery(ctx: ActorContext):
    registery_node = ctx.registery.registery_node.name
    registery_info = pretty_format_json(ctx.registery.to_spec())
    LOG.info(f'load registery info for {registery_node}:\n' + registery_info)
    ctx.tell('scheduler.boardcast_registery')
示例#12
0
 def print_health(self):
     print(pretty_format_json(self.health()))