def __init__(self, petrGlobal={}, config_folder='data/config/', config_file='PETR_config.ini'):
     # cli_args = petrarch2.parse_cli_args()
     if not petrGlobal:
         utilities.init_logger('PETRARCH.log')
         logger = logging.getLogger('petr_log')
         PETRglobals.RunTimeString = time.asctime()
         logger.info('Using Config file: ' + config_file)
         PETRreader.parse_Config(utilities._get_data(config_folder, config_file))
         petrarch2.read_dictionaries()
         print("SUCCESSFULL ON LOADING DICTIONARIES")
     else:
         print ("LOADING FROM MAP")
         self.load(petrGlobal)
示例#2
0
def run_pipeline(data,
                 out_file=None,
                 config=None,
                 write_output=True,
                 parsed=False):
    # this is called externally
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(
            utilities._get_data('data/config/', 'PETR_config.ini')))
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events)
    else:
        events = utilities.stanford_parse(events)
        updated_events = do_coding(events)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
示例#3
0
def main():
    cli_args = parse_cli_args()
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    print(cli_args)
    if cli_args.config:
        print('Using user-specified config: {}'.format(cli_args.config))
        logger.info('Using user-specified config: {}'.format(cli_args.config))
        PETRreader.parse_Config(cli_args.config)
    else:
        logger.info('Using default config file.')
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    if cli_args.nullverbs:
        print('Coding in null verbs mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get verb phrases that are not in the dictionary but are
        # associated with coded noun phrases
        PETRglobals.NullVerbs = True
    elif cli_args.nullactors:
        print('Coding in null actors mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get actor phrases that are not in the dictionary but
        # associated with coded verb phrases
        PETRglobals.NullActors = True
        PETRglobals.NewActorLength = int(cli_args.nullactors)

    read_dictionaries()
    start_time = time.time()
    print('\n\n')

    paths = PETRglobals.TextFileList
    if cli_args.inputs:
        if os.path.isdir(cli_args.inputs):
            if cli_args.inputs[-1] != '/':
                paths = glob.glob(cli_args.inputs + '/*.xml')
            else:
                paths = glob.glob(cli_args.inputs + '*.xml')
        elif os.path.isfile(cli_args.inputs):
            paths = [cli_args.inputs]
        else:
            print(
                '\nFatal runtime error:\n"' + cli_args.inputs +
                '" could not be located\nPlease enter a valid directory or file of source texts.'
            )
            sys.exit()

    out = ""  # PETRglobals.EventFileName
    if cli_args.outputs:
        out = cli_args.outputs

    if cli_args.command_name == 'parse':
        run(paths, out, cli_args.parsed)

    else:
        run(paths, out, True)  # <===

    print("Coding time:", time.time() - start_time)

    print("Finished")
示例#4
0
from petrarch2 import petrarch2, PETRglobals, PETRreader, PETRtree, utilities
from ConfigParser import ConfigParser
from flask import jsonify, make_response
from flask.ext.httpauth import HTTPBasicAuth
from flask.ext.restful import Resource, reqparse
from flask.ext.restful.representations.json import output_json
import os

config = "/app/resources/PETR_config.ini"
PETRreader.parse_Config(config)
petrarch2.read_dictionaries()

class PhraseExtractAPI(Resource):
    def __init__(self):
        self.reqparse = reqparse.RequestParser()
        self.reqparse.add_argument('text', type=unicode, location='json')
        self.reqparse.add_argument('parse', type=unicode, location='json')
        super(PhraseExtractAPI, self).__init__()

    def get(self):
        return """ This service expects a POST in the form '{"text":""Airstrikes 
    and artillery...", "parse" : "(ROOT (S (S (NP (NP (NNP Airstrikes)) 
    (CC and) (NP (NN artillery)))..."}' It will return a list of nouns and verbs...TBD"""

    def post(self):
        args = self.reqparse.parse_args()
        print args
        text = args['text']
        parse = args['parse']
        output = self.get_phrases(text, parse)
        return output