Пример #1
0
def main():
    """Main function"""
    actors, verbs   = parse_config()
    cli_args        = parse_cli_args()
    cli_command     = cli_args.command_name
    inputs          = cli_args.inputs
    out_path        = cli_args.output
    username        = cli_args.username
    geo_boolean     = cli_args.geolocate
    feature_boolean = cli_args.features

    if cli_command == 'parse':
        print 'Reading in data...{}:{}.{}'.format(datetime.now().now().hour, datetime.now().minute, datetime.now().second)
        chunk = _get_data('ubt_chunker_trained.pickle')
        ubt_chunker = pickle.load(open(chunk))
        tag = _get_data('maxent_treebank_pos_tagger.pickle')
        pos_tagger = pickle.load(open(tag))

        print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second)
        events = read_data(inputs)

        print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second)
        parse.parse(events, ubt_chunker, pos_tagger, cli_args.n_cores)
        print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second)

        if geo_boolean or feature_boolean:
            print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second)
            postprocess.process(events, username, geo_boolean, feature_boolean)
            print 'Done feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second)

        event_output = str()

        print 'Writing the events to file...'

        for event in events:
            event_output += '\n=======================\n\n'
            event_output += 'event id: {}\n\n'.format(event)
            event_output += 'POS tagged sent:\n {}\n\n'.format(events[event]['tagged'])
            event_output += 'NP tagged sent:\n {}\n\n'.format(events[event]['sent_tree'])
            event_output += 'Noun phrases: \n {}\n'.format(events[event]['noun_phrases'])
            event_output += 'Verb phrases: \n {}\n\n'.format(events[event]['verb_phrases'])
            event_output += 'Parse time: \n {}\n'.format(events[event]['parse_chunk_time'])
            #event_output += 'Instantiation time: \n {}\n'.format(events[event]['parse_call_time'])

            if geo_boolean:
                event_output += '\nGeolocate: \n {}, {}\n'.format(events[event]['lat'], events[event]['lon'])

            if feature_boolean:
                event_output += 'Feature extract: \n {}\n'.format(events[event]['num_involved'])

        with open(out_path, 'w') as f:
            f.write(event_output)
    else:
        print 'Please enter a valid command!'
Пример #2
0
def process_plot():
    print "Post processing"
    postprocess.process(csvdirname)

    for key, values in config.map_pid_filename.items():
        print "Plot and Store: ", values["filename"]
        filename = csvdirname + values["filename"]
        print filename
        result_folder = resultdirname + values["filename"]
        intervalplot.plot(filename, seq=result_folder)

    files = [csvdirname + values["filename"] for key, values in config.map_pid_filename.items()]
    compareintervalplot.plot(files, seq=resultdirname + "compare")

    print "Results can be found in: " + csvdirname
Пример #3
0
def execute(benchmark):
    excution_path = __file__[:-10]
    if os.path.exists(excution_path + '/data/0'):
        x = [int(num) for num in os.listdir(excution_path + '/data')]
        x.sort()
        print(x)
        benchmark_num = x[-1] + 1
    else:
        benchmark_num = 0
    print('using folder ' + str(benchmark_num))
    output_name = excution_path + '/data/' + str(benchmark_num) + '/out.csv'
    os.makedirs(os.path.dirname(output_name), exist_ok=True)
    logging_delay = benchmark.logging_delay

    # Create the event for when the command is done executing
    finished = threading.Event()

    # Start the logging thread and run the task, trigger the event when the task is done, ending the logging thread
    log = logThread(finished, output_name, logging_delay)
    log.start()
    try:
        score = benchmark.run()
        print('score', score)
    finally:
        finished.set()
        log.join()

    return postprocess.process(output_name, score)
def signal_handler(signal,frame):
    print "You pressed Ctrl+C"
    print "Post processing"
    postprocess.process(csvdirname)

    for key,values in config.map_pid_filename.items():
        print "Plot and Store: ", values['filename']
        filename = csvdirname+values['filename']
        result_folder = resultdirname+values['filename']
        intervalplot.plot(filename, seq=result_folder)

    files = [csvdirname+values['filename'] for key,values in config.map_pid_filename.items() ]
    compareintervalplot.plot(files, seq = resultdirname+'compare')


    print "Results can be found in: " + csvdirname
    sys.exit(0)
Пример #5
0
def main():
    """Main function"""
    actors, verbs, stanford_dir = parse_config()

    cli_args = parse_cli_args()
    cli_command = cli_args.command_name
    inputs = cli_args.inputs
    out_path = cli_args.output
    username = cli_args.username
    if cli_command == 'parallel_parse':
        cpus = cli_args.n_cores
    geo_boolean = cli_args.geolocate
    feature_boolean = cli_args.features

    if cli_command == 'parse':
        print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour,
                                                       datetime.now().minute,
                                                       datetime.now().second)
        events = read_data(inputs)

        print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour,
                                                    datetime.now().minute,
                                                    datetime.now().second)
        results = parse.parse(events, stanford_dir)
        print 'Done processing...{}:{}.{}'.format(datetime.now().hour,
                                                  datetime.now().minute,
                                                  datetime.now().second)

        if geo_boolean or feature_boolean:
            print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour,
                                                         datetime.now().minute,
                                                         datetime.now().second)
            postprocess.process(events, username, geo_boolean, feature_boolean)
            print 'Done...{}:{}.{}'.format(datetime.now().hour,
                                           datetime.now().minute,
                                           datetime.now().second)
    elif cli_command == 'batch_parse':
        print 'Running...{}:{}.{}'.format(datetime.now().hour,
                                          datetime.now().minute,
                                          datetime.now().second)

        print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour,
                                                    datetime.now().minute,
                                                    datetime.now().second)
        results = parse.batch_parse(inputs, stanford_dir)
        print 'Done processing...{}:{}.{}'.format(datetime.now().hour,
                                                  datetime.now().minute,
                                                  datetime.now().second)

        if geo_boolean or feature_boolean:
            print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour,
                                                         datetime.now().minute,
                                                         datetime.now().second)
            postprocess.process(events, username, geo_boolean, feature_boolean)
            print 'Done...{}:{}.{}'.format(datetime.now().hour,
                                           datetime.now().minute,
                                           datetime.now().second)
    elif cli_command == 'parallel_parse':
        import pp

        print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour,
                                                       datetime.now().minute,
                                                       datetime.now().second)
        events = read_data(inputs)

        ppservers = ()
        if cpus == -1:
            job_server = pp.Server(ppservers=ppservers)
        else:
            job_server = pp.Server(cpus, ppservers=ppservers)

        chunks = list()

        if len(events) % int(len(events) / (job_server.get_ncpus())) == 1:
            chunk_size = int(len(events) / (job_server.get_ncpus()-1))
        else:
            chunk_size = int(len(events) / (job_server.get_ncpus()))

        for i in xrange(0, len(events), chunk_size):
            chunks.append(dict(events.items()[i:i+chunk_size]))

        print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour,
                                                    datetime.now().minute,
                                                    datetime.now().second)
        jobs = [job_server.submit(parse.parse, (chunk, stanford_dir,),
                                  (parse.parse_sents,),
                                  ("corenlp", "nltk.tree", "utilities",))
                for chunk in chunks]

        parallel_results = list()
        for job in jobs:
            parallel_results.append(job())

        results = dict()
        for result in parallel_results:
            results.update(result)

        print 'Done processing...{}:{}.{}'.format(datetime.now().hour,
                                                  datetime.now().minute,
                                                  datetime.now().second)

        if geo_boolean or feature_boolean:
            print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour,
                                                         datetime.now().minute,
                                                         datetime.now().second)
            postprocess.process(events, username, geo_boolean, feature_boolean)
            print 'Donel..{}:{}.{}'.format(datetime.now().hour,
                                           datetime.now().minute,
                                           datetime.now().second)
    else:
        print 'Please enter a valid command!'

    event_output = str()

    print 'Writing the events to file...'
    for event in results:
        event_output += '\n=======================\n\n'
        event_output += 'event id: {}\n\n'.format(event)
        sent_inf = results[event]['sent_info']
        for sent in sent_inf['sents']:
            try:
                event_output += 'Sentence {}:\n'.format(sent)
                event_output += 'Word info:\n {}\n\n'.format(sent_inf['sents'][sent]['word_info'])
                event_output += 'Parse tree:\n {}\n\n'.format(sent_inf['sents'][sent]['parse_tree'])
                event_output += 'Word dependencies:\n {}\n\n'.format(sent_inf['sents'][sent]['dependencies'])
                event_output += 'Coref info:\n\n'
                try:
                    event_output += 'Corefs:\n {}\n\n'.format(sent_inf['coref_info'][sent]['corefs'])
                    event_output += 'Coref tree:\n {}\n\n'.format(sent_inf['sents'][sent]['coref_tree'])
                except KeyError:
                    pass
                event_output += '----------------------\n\n'
            except KeyError:
                print 'There was a key error'
                print results[event].keys()
                print sent_inf['sents'][sent].keys()

    with open(out_path, 'w') as f:
        f.write(event_output)
Пример #6
0
def main():
    """Main function"""
    actors, verbs = parse_config()
    cli_args = parse_cli_args()
    cli_command = cli_args.command_name
    inputs = cli_args.inputs
    out_path = cli_args.output
    username = cli_args.username
    geo_boolean = cli_args.geolocate
    feature_boolean = cli_args.features

    if cli_command == 'parse':
        print 'Reading in data...{}:{}.{}'.format(datetime.now().now().hour,
                                                  datetime.now().minute,
                                                  datetime.now().second)
        chunk = _get_data('ubt_chunker_trained.pickle')
        ubt_chunker = pickle.load(open(chunk))
        tag = _get_data('maxent_treebank_pos_tagger.pickle')
        pos_tagger = pickle.load(open(tag))

        print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour,
                                                       datetime.now().minute,
                                                       datetime.now().second)
        events = read_data(inputs)

        print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour,
                                                    datetime.now().minute,
                                                    datetime.now().second)
        parse.parse(events, ubt_chunker, pos_tagger, cli_args.n_cores)
        print 'Done processing...{}:{}.{}'.format(datetime.now().hour,
                                                  datetime.now().minute,
                                                  datetime.now().second)

        if geo_boolean or feature_boolean:
            print 'Feature extraction...{}:{}.{}'.format(
                datetime.now().hour,
                datetime.now().minute,
                datetime.now().second)
            postprocess.process(events, username, geo_boolean, feature_boolean)
            print 'Done feature extraction...{}:{}.{}'.format(
                datetime.now().hour,
                datetime.now().minute,
                datetime.now().second)

        event_output = str()

        print 'Writing the events to file...'

        for event in events:
            event_output += '\n=======================\n\n'
            event_output += 'event id: {}\n\n'.format(event)
            event_output += 'POS tagged sent:\n {}\n\n'.format(
                events[event]['tagged'])
            event_output += 'NP tagged sent:\n {}\n\n'.format(
                events[event]['sent_tree'])
            event_output += 'Noun phrases: \n {}\n'.format(
                events[event]['noun_phrases'])
            event_output += 'Verb phrases: \n {}\n\n'.format(
                events[event]['verb_phrases'])
            event_output += 'Parse time: \n {}\n'.format(
                events[event]['parse_chunk_time'])
            #event_output += 'Instantiation time: \n {}\n'.format(events[event]['parse_call_time'])

            if geo_boolean:
                event_output += '\nGeolocate: \n {}, {}\n'.format(
                    events[event]['lat'], events[event]['lon'])

            if feature_boolean:
                event_output += 'Feature extract: \n {}\n'.format(
                    events[event]['num_involved'])

        with open(out_path, 'w') as f:
            f.write(event_output)
    else:
        print 'Please enter a valid command!'