def genia_line_IIH(inf, outf): lines = tools.line_stripper(inf) for sublines in tools.blank_line_splitter(lines): sublines = [line.split('\t') for line in sublines] word, lemma, tag, chunk, _ = zip(*sublines) chunk = chunk_BII2IIH(chunk) with contextlib.redirect_stdout(outf): print(*word, end='\t') print(*lemma, end='\t') print(*tag, end='\t') print(*chunk)
def run(self): with self.input().open('r') as in_file, self.output().open( 'w') as out_file: lines = tools.line_stripper(in_file) for sublines in tools.blank_line_splitter(lines): sublines = [line.split('\t') for line in sublines] word, lemma, tag, chunk, _ = zip(*sublines) chunk = chunk_BII2IIH(chunk) print(*word, end='\t', file=out_file) print(*lemma, end='\t', file=out_file) print(*tag, end='\t', file=out_file) print(*chunk, file=out_file)