Пример #1
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    print(filepaths)
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)

    #print("events_input:",events)
    flag = gna

    if flag:
        get_nullactor(events)

    else:
        updated_events = do_coding(events)
        print("update_event:")
        # print(json.dumps(updated_events, ensure_ascii=False, encoding='utf-8'))
        if PETRglobals.NullVerbs:
            output_event = PETRwriter.write_nullverbs(updated_events,
                                                      'nullverbs.' + out_file)
        elif PETRglobals.NullActors:
            output_event = PETRwriter.write_nullactors(updated_events,
                                                       'nullactors.txt')
        else:
            output_event = PETRwriter.write_events(updated_events, out_file)
        return output_event
Пример #2
0
def run_pipeline(data, out_file=None, config=None, write_output=True,
                 parsed=False):
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(utilities._get_data('data/config/',
                                                                 'PETR_config.ini')))
        PETRreader.parse_Config(utilities._get_data('data/config/',
                                                    'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events, None)
    else:
        events = utilities.stanford_parse(events)
        updated_events = do_coding(events, None)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
Пример #3
0
def run(filepaths, out_file, s_parsed, sub_command_args):
    # this is the routine called from main()
    events = []
    if filepaths == 'javainfo':
        events = PETRreader.read_story_input(sub_command_args.story_content,
                                             sub_command_args.story_title,
                                             sub_command_args.story_date,
                                             sub_command_args.story_src,
                                             sub_command_args.story_url,
                                             sub_command_args.story_id)
        # The StanfordCoreNLP calling in read_story_input has a side effect that a StreamHandler was left,
        # which is owned by the root logger.
        # Remove all handlers associated with the root logger object.
        while len(logging.root.handlers) > 0:
            logging.root.removeHandler(logging.root.handlers[-1])
    else:
        events = PETRreader.read_xml_input(filepaths, s_parsed)
    print("events before coding:", events)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events)
    print("updated_events after coding:", updated_events)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        #         PETRwriter.write_events(updated_events, 'evts.' + out_file)
        #         databasewriter.write_events_to_db(updated_events, 'evts.' + out_file)
        print("updated_events:")
        print(updated_events)
        databasewriter.write_events(updated_events, None, False)
Пример #4
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    PETRwriter.write_events(updated_events, 'evts.' + out_file)
Пример #5
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        PETRwriter.write_events(updated_events, 'evts.' + out_file)
Пример #6
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        PETRwriter.write_events(updated_events, 'evts.' + out_file)
Пример #7
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    print(filepaths)
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    print("123")
    if not s_parsed:
        events = utilities.stanford_parse(events)

    #print("events_input:",events)

    updated_events = do_coding(events)
    print("update_event:")
    print(json.dumps(updated_events, ensure_ascii=False, encoding='utf-8'))
    """
Пример #8
0
def run_pipeline(data,
                 out_file=None,
                 config=None,
                 write_output=True,
                 parsed=False):
    # this is called externally
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(
            utilities._get_data('data/config/', 'PETR_config.ini')))
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events)
    else:
        events = utilities.stanford_parse(events)
        updated_events = do_coding(events)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
Пример #9
0
def run(filepaths, out_file, s_parsed):
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    PETRwriter.write_events(updated_events, out_file)