if __name__ == '__main__': import argparse from stog.data.dataset_readers.amr_parsing.io import AMRIO parser = argparse.ArgumentParser('feature_annotator.py') parser.add_argument('files', nargs='+', help='files to annotate.') parser.add_argument('--compound_file', default='') args = parser.parse_args() annotator = FeatureAnnotator('http://localhost:9000', args.compound_file) for file_path in args.files: logger.info('Processing {}'.format(file_path)) for i, amr in enumerate(AMRIO.read(file_path), 1): if i % 1000 == 0: logger.info('{} processed.'.format(i)) annotation = annotator(amr.sentence) amr.tokens = annotation['tokens'] amr.lemmas = annotation['lemmas'] amr.pos_tags = annotation['pos_tags'] amr.ner_tags = annotation['ner_tags'] amr.original = annotation['original'] with open(file_path + '.features', 'a', encoding='utf-8') as f: AMRIO.dump([amr], f) logger.info('Done!')
def dump_amr_features(amr, annotation, f): amr.tokens = annotation['tokens'] amr.lemmas = annotation['lemmas'] amr.pos_tags = annotation['pos_tags'] amr.ner_tags = annotation['ner_tags'] AMRIO.dump([amr], f)