Exemplo n.º 1
0
def load_metas(path, pattern, offset, count, load):
    records = load(path, offset)
    records = match_names(records, pattern)
    records = head(records, count)
    for record in records:
        for item in parse_meta(record.file):
            yield item
Exemplo n.º 2
0
def load_taiga_social(path, offset=3985892864, count=4):
    records = load_tar(path, offset=offset)
    records = match_names(records, '*/texts/*.txt')
    records = head(records, count)
    for record in records:
        network = parse_filename_id(record.name)
        network = NETWORKS[network]
        for record in parse_social(record.file, network):
            yield record
Exemplo n.º 3
0
def load_texts(path, pattern, offset, count, parse_id, load, encoding='utf8'):
    records = load(path, offset=offset)
    records = match_names(records, pattern)
    records = head(records, count)
    for record in records:
        id = parse_id(record.name)
        file = TextIOWrapper(record.file, encoding)
        text = file.read()
        yield TaigaRecord(id=id, meta=None, text=text)