def _tagged_data_to_parsed(overwrite=False): # Take tagged data and covert it to parsed Trees in files for root, dirs, files in os.walk(os.path.join(_EVENT_EVAL_DATA_PATH, "tagged")): for f in files: parsed_fn = os.path.splitext(f)[0] + '.parsed' parsed_filepath = os.path.join(_EVENT_EVAL_DATA_PATH, "parsed", parsed_fn) if os.path.exists(parsed_filepath) and not overwrite: print ("file {0} already exists, skipping..." "".format(parsed_filepath)) continue tree = NLTKParseTree("TEXT", []) tagged_tokens = load_file(os.path.join(root, f), "tagged") tree = EventChunkParser._tagged_to_parse(tagged_tokens) with open(parsed_filepath, 'w') as parsed_fh: parsed_fh.write(tree.pprint())