def __init__(self, petrGlobal={}, config_folder='data/config/', config_file='PETR_config.ini'): # cli_args = petrarch2.parse_cli_args() if not petrGlobal: utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() logger.info('Using Config file: ' + config_file) PETRreader.parse_Config(utilities._get_data(config_folder, config_file)) petrarch2.read_dictionaries() print("SUCCESSFULL ON LOADING DICTIONARIES") else: print ("LOADING FROM MAP") self.load(petrGlobal)
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): # this is called externally utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format( utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events) else: events = utilities.stanford_parse(events) updated_events = do_coding(events) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out, True) # <=== print("Coding time:", time.time() - start_time) print("Finished")
from petrarch2 import petrarch2, PETRglobals, PETRreader, PETRtree, utilities from ConfigParser import ConfigParser from flask import jsonify, make_response from flask.ext.httpauth import HTTPBasicAuth from flask.ext.restful import Resource, reqparse from flask.ext.restful.representations.json import output_json import os config = "/app/resources/PETR_config.ini" PETRreader.parse_Config(config) petrarch2.read_dictionaries() class PhraseExtractAPI(Resource): def __init__(self): self.reqparse = reqparse.RequestParser() self.reqparse.add_argument('text', type=unicode, location='json') self.reqparse.add_argument('parse', type=unicode, location='json') super(PhraseExtractAPI, self).__init__() def get(self): return """ This service expects a POST in the form '{"text":""Airstrikes and artillery...", "parse" : "(ROOT (S (S (NP (NP (NNP Airstrikes)) (CC and) (NP (NN artillery)))..."}' It will return a list of nouns and verbs...TBD""" def post(self): args = self.reqparse.parse_args() print args text = args['text'] parse = args['parse'] output = self.get_phrases(text, parse) return output