def load_id(id, dir, set): path = txt_path(id, dir, set) text = load_text(path) path = spans_path(id, dir, set) lines = load_lines(path) spans = list(parse_spans(lines)) path = objects_path(id, dir, set) lines = load_lines(path) objects = list(parse_objects(lines, spans)) return FactruMarkup(id, text, objects)
def load_id(id, dir): path = txt_path(id, dir) text = load_text(path) path = ann_path(id, dir) lines = load_lines(path) spans = list(parse_spans(lines)) return Ne5Markup(id, text, spans)
def load_id(id, dir, set): path = part_path(id, dir, set, TXT) text = load_text(path) path = part_path(id, dir, set, SPANS) lines = load_lines(path) spans = list(parse_spans(lines)) path = part_path(id, dir, set, OBJECTS) lines = load_lines(path) objects = list(parse_objects(lines, spans)) path = part_path(id, dir, set, COREF) lines = load_lines(path) corefs = list(parse_corefs(lines, objects)) path = part_path(id, dir, set, FACTS) lines = load_lines(path) facts = list(parse_facts(lines, corefs, spans)) return FactruMarkup(id, text, objects, corefs, facts)
def load_rudrec(path): lines = load_lines(path) items = parse_jsonl(lines) return parse_rudrec(items)
def load_annotated(records): for record in records: lines = load_lines(record.path) yield parse_annotated(record.name, lines)
def load_ud(path): lines = load_lines(path) return parse_ud(lines)
def load_id(id, dir): path = join_path(dir, '%s.txt.iob' % id) lines = load_lines(path) return parse_conll(lines)
def load_russe(path): lines = load_lines(path) return parse_russe(lines)
def load_toloka_lrwc(path): lines = load_lines(path) return parse_toloka_lrwc(lines)
def load_ruadrect(path): lines = load_lines(path) return parse_ruadrect(lines)
def load_simlex(path): lines = load_lines(path) return parse_simlex(lines)
def load_mokoron(path): for line in load_lines(path): if line.startswith(INSERT): for match in RECORD.finditer(line): yield MokoronRecord.from_match(match)
def load_morphoru_corpora(path): lines = load_lines(path) return parse_morphoru(lines, parse_morphoru_corpora_sent)
def load_morphoru_rnc(path): lines = load_lines(path) return parse_morphoru_rnc(lines)
def load_morphoru_gicrya(path): lines = load_lines(path) return parse_morphoru(lines, parse_morphoru_gicrya_sent)