Пример #1
0
def aggregate_file(client, file_path, categs):
    """
    Aggregate all links from a text file
    """
    log.info('Loading from file: %s' % file_path)
    def read_data():
        file = open(file_path)
        for url in file.xreadlines():
            yield url.strip(), categs
    feeds.aggregate_all(client, read_data(), get_hbase_client)
Пример #2
0
def aggregate_opml(client, file, categs):
    """
    Aggregate all links from an OPML file
    """ 
    log.info('Loading from file: %s' % file)
    def read_data():
        loader = OpmlLoader(file)
        if not loader.is_valid():
            log.error('Invalid opml file: %s' % file)
            return
        for element in loader:
            yield element.xmlUrl, categs
    feeds.aggregate_all(client, read_data(), get_hbase_client)
Пример #3
0
def refresh_feeds(client, allowed_categs):
    """
    Refresh all feeds found in the database using a pool of threads. 
    """
    log.info('Starting to refresh all feeds')
    allowed_categs = split_csv(allowed_categs)
    def read_data():
        scanner = db.Scanner(client, 'Feeds', ['Meta:'])
        for row in scanner:
            feed, categs = row.row, row.columns['Meta:categs'].value
            if allowed_categs and not any_in(split_csv(categs), allowed_categs):
                continue
            yield feed, categs
    feeds.aggregate_all(client, read_data(), get_hbase_client)