def run(filepaths, out_file, s_parsed): # this is the routine called from main() print(filepaths) events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) #print("events_input:",events) flag = gna if flag: get_nullactor(events) else: updated_events = do_coding(events) print("update_event:") # print(json.dumps(updated_events, ensure_ascii=False, encoding='utf-8')) if PETRglobals.NullVerbs: output_event = PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file) elif PETRglobals.NullActors: output_event = PETRwriter.write_nullactors(updated_events, 'nullactors.txt') else: output_event = PETRwriter.write_events(updated_events, out_file) return output_event
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format(utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events, None) else: events = utilities.stanford_parse(events) updated_events = do_coding(events, None) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def run(filepaths, out_file, s_parsed, sub_command_args): # this is the routine called from main() events = [] if filepaths == 'javainfo': events = PETRreader.read_story_input(sub_command_args.story_content, sub_command_args.story_title, sub_command_args.story_date, sub_command_args.story_src, sub_command_args.story_url, sub_command_args.story_id) # The StanfordCoreNLP calling in read_story_input has a side effect that a StreamHandler was left, # which is owned by the root logger. # Remove all handlers associated with the root logger object. while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) else: events = PETRreader.read_xml_input(filepaths, s_parsed) print("events before coding:", events) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events) print("updated_events after coding:", updated_events) if PETRglobals.NullVerbs: PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file) elif PETRglobals.NullActors: PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file) else: # PETRwriter.write_events(updated_events, 'evts.' + out_file) # databasewriter.write_events_to_db(updated_events, 'evts.' + out_file) print("updated_events:") print(updated_events) databasewriter.write_events(updated_events, None, False)
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) PETRwriter.write_events(updated_events, 'evts.' + out_file)
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events) if PETRglobals.NullVerbs: PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file) elif PETRglobals.NullActors: PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file) else: PETRwriter.write_events(updated_events, 'evts.' + out_file)
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) if PETRglobals.NullVerbs: PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file) elif PETRglobals.NullActors: PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file) else: PETRwriter.write_events(updated_events, 'evts.' + out_file)
def run(filepaths, out_file, s_parsed): # this is the routine called from main() print(filepaths) events = PETRreader.read_xml_input(filepaths, s_parsed) print("123") if not s_parsed: events = utilities.stanford_parse(events) #print("events_input:",events) updated_events = do_coding(events) print("update_event:") print(json.dumps(updated_events, ensure_ascii=False, encoding='utf-8')) """
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): # this is called externally utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format( utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events) else: events = utilities.stanford_parse(events) updated_events = do_coding(events) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def run(filepaths, out_file, s_parsed): events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) PETRwriter.write_events(updated_events, out_file)