Exemplo n.º 1
0
	def __init__(self, port, host, request_handler, parameters):

		# Registering configuration settings and request handler, logger
		self.config_dict = utilities.load_config(CONFIG_FILE)

		self.logger = logging.getLogger()
    		utilities.init_logger(self.logger, self.config_dict)

		check_config(self.config_dict, self.logger)
		
		self.request_handler = request_handler
		self.parameters = parameters

		self.servSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		self.servSock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
		self.servSock.bind((host, port))
		self.servSock.listen(self.config_dict['listen_connections'])
		self.servSock.setblocking(0)
		
		if (self.config_dict['tcp_nagle']):
			self.servSock.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

		# Intializing client dicts

                self.connections = {}
		self.responses = {}
				
		self.epoll = select.epoll()
		
		# Creating Epoll for future read events
		self.epoll.register(self.servSock.fileno(), select.EPOLLIN | select.EPOLLET)

		self.logger.info('[%s:%d] started' % (host, port))
Exemplo n.º 2
0
def run_pipeline(data, out_file=None, config=None, write_output=True,
                 parsed=False):
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(utilities._get_data('data/config/',
                                                                 'PETR_config.ini')))
        PETRreader.parse_Config(utilities._get_data('data/config/',
                                                    'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events, None)
    else:
        events = utilities.stanford_parse(events)
        updated_events = do_coding(events, None)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
Exemplo n.º 3
0
def run_pipeline(data, out_file=None, config=None, write_output=True,
                 parsed=False):
    # this is called externally
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(utilities._get_data('data/config/',
                                                                 'PETR_config.ini')))
        PETRreader.parse_Config(utilities._get_data('data/config/',
                                                    'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events, None)
#     else:
#         events = utilities.stanford_parse(events)
#         updated_events = do_coding(events, None)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
Exemplo n.º 4
0
def process_target(queue, cli_args, multi_log_lock):
    # 打印子进程启动消息
    write_multiprocess_log(
        multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(),
                                                  u'started.'))

    # 子进程先读取进程运行所需各种信息
    utilities.init_logger()
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    if cli_args.config:
        print('Using user-specified config: {}'.format(cli_args.config))
        logger.info('Using user-specified config: {}'.format(cli_args.config))
        PETRreader.parse_Config(cli_args.config)
    else:
        logger.info('Using default config file.')
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    if cli_args.nullverbs:
        print('Coding in null verbs mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get verb phrases that are not in the dictionary but are
        # associated with coded noun phrases
        PETRglobals.NullVerbs = True
    elif cli_args.nullactors:
        print('Coding in null actors mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get actor phrases that are not in the dictionary but
        # associated with coded verb phrases
        PETRglobals.NullActors = True
        PETRglobals.NewActorLength = int(cli_args.nullactors)

    read_dictionaries()
    print('\n\n')

    out = ""  # PETRglobals.EventFileName
    if cli_args.outputs:
        out = cli_args.outputs

    # 创建一个和数据库交流的session
    session = Session()

    while True:
        if queue.qsize > 0:
            # 从队列中获取一个任务
            task = queue.get()
            # 打印日志,获取到了任务
            write_multiprocess_log(
                multi_log_lock,
                '{}Process {} get one task: {}'.format(u'', os.getpid(), task))
            # 执行任务
            process_task(task, out, multi_log_lock, session)
        else:
            time.sleep(0.5 * random.random())
            continue
Exemplo n.º 5
0
def main():

    cli_args = parse_cli_args()
    utilities.init_logger('PETRARCH.log', cli_args.debug)
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    if cli_args.command_name == 'parse' or cli_args.command_name == 'batch':

        if cli_args.config:
            print('Using user-specified config: {}'.format(cli_args.config))
            logger.info(
                'Using user-specified config: {}'.format(cli_args.config))
            PETRglobals.ConfigFileName = cli_args.config

            PETRreader.parse_Config(cli_args.config)
        else:
            logger.info('Using default config file.')
            PETRglobals.ConfigFileName = 'PETR_config.ini'
            PETRreader.parse_Config(utilities._get_data('data/config/',
                                                        'PETR_config.ini'))

        read_dictionaries()

        start_time = time.time()
        print('\n\n')

        paths = PETRglobals.TextFileList
        if cli_args.inputs:
            if os.path.isdir(cli_args.inputs):
                if cli_args.inputs[-1] != '/':
                    paths = glob.glob(cli_args.inputs + '/*.xml')
                else:
                    paths = glob.glob(cli_args.inputs + '*.xml')
            elif os.path.isfile(cli_args.inputs):
                paths = [cli_args.inputs]
            else:
                print(
                    '\nFatal runtime error:\n"' +
                    cli_args.inputs +
                    '" could not be located\nPlease enter a valid directory or file of source texts.')
                sys.exit()

        out = "" #PETRglobals.EventFileName
        if cli_args.outputs:
                out = cli_args.outputs

        if cli_args.command_name == 'parse':
            run(paths, out, cli_args.parsed)

        else:
            run(paths, out , True)  ## <===

        print("Coding time:", time.time() - start_time)

    print("Finished")
Exemplo n.º 6
0
def main():

    cli_args = parse_cli_args()
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()


    if cli_args.command_name == 'parse' or cli_args.command_name == 'batch':

        if cli_args.config:
            print('Using user-specified config: {}'.format(cli_args.config))
            logger.info(
                'Using user-specified config: {}'.format(cli_args.config))
            PETRreader.parse_Config(cli_args.config)
        else:
            logger.info('Using default config file.')
            PETRreader.parse_Config(utilities._get_data('data/config/',
                                                        'PETR_config.ini'))

        read_dictionaries()
        start_time = time.time()
        print('\n\n')

        paths = PETRglobals.TextFileList
        if cli_args.inputs:
            if os.path.isdir(cli_args.inputs):
                if cli_args.inputs[-1] != '/':
                    paths = glob.glob(cli_args.inputs + '/*.xml')
                else:
                    paths = glob.glob(cli_args.inputs + '*.xml')
            elif os.path.isfile(cli_args.inputs):
                paths = [cli_args.inputs]
            else:
                print(
                    '\nFatal runtime error:\n"' +
                    cli_args.inputs +
                    '" could not be located\nPlease enter a valid directory or file of source texts.')
                sys.exit()
        
        out = "" #PETRglobals.EventFileName
        if cli_args.outputs:
                out = cli_args.outputs
             
        if cli_args.command_name == 'parse':
            run(paths, out, cli_args.parsed)

        else:
            run(paths, out , True)

        print("Coding time:", time.time() - start_time)

    print("Finished")
Exemplo n.º 7
0
 def __init__(self,
              config_folder='data/config/',
              config_file='PETR_config.ini'):
     #cli_args = petrarch2.parse_cli_args()
     utilities.init_logger('PETRARCH.log')
     logger = logging.getLogger('petr_log')
     PETRglobals.RunTimeString = time.asctime()
     logger.info('Using Config file: ' + config_file)
     PETRreader.parse_Config(utilities._get_data(config_folder,
                                                 config_file))
     petrarch2.read_dictionaries()
Exemplo n.º 8
0
 def __init__(self, petrGlobal={}, config_folder='data/config/', config_file='PETR_config.ini'):
     #cli_args = petrarch2.parse_cli_args()
     if not petrGlobal:
         utilities.init_logger('PETRARCH.log', debug=False)
         logger = logging.getLogger('petr_log')
         PETRglobals.RunTimeString = time.asctime()
         logger.info('Using Config file: '+config_file)
         PETRreader.parse_Config(utilities._get_data(config_folder, config_file))
         petrarch_ud.read_dictionaries()
         print("SUCCESSFULL ON LOADING DICTIONARIES")
     else:
         print ("LOADING FROM MAP")
         self.load(petrGlobal)
Exemplo n.º 9
0
def main(message, logger_file=None, run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    process_date = datetime.datetime.utcnow()
    date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                              process_date.month,
                                              process_date.day)
    logger.info('Date string: {}'.format(date_string))
    print('Date string:', date_string)

    server_details = ''

    logger.info("Extracting date.")
    print("Extracting date.")
    date = formatter.get_date(message, process_date)

    logger.info("Sending to Hypnos.")
    story_id = message['entry_id']
    print(story_id)
    text = message['cleaned_text']
    headers = {'Content-Type': 'application/json'}
    payload = {'text': text, 'id': story_id, 'date': date}
    data = json.dumps(payload)
    hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR']
    hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip)
    r = requests.get(hypnos_url, data=data, headers=headers)

    print(r.status_code)

    if r.status_code == 200:
        logger.info("Running postprocess.py")
        print("Running postprocess.py")

        hypnos_res = r.json()
        print(hypnos_res)
        events = []
        for k, v in hypnos_res[story_id]['sents'].iteritems():
            if 'events' in v:
                sent = hypnos_res[story_id]['sents'][k]
                for event in v['events']:
                    event_tup = (date, event[0], event[1], event[2])
                    formatted, actors = postprocess.main(
                        event_tup, sent, version, server_details)
                    logger.info(formatted)
                    logger.info(actors)
                    print(formatted, actors)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 10
0
def main(cli_args=None):
    if not cli_args:
        cli_args = parse_cli_args()
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    print(cli_args)
    if cli_args.config:
        print('Using user-specified config: {}'.format(cli_args.config))
        logger.info('Using user-specified config: {}'.format(cli_args.config))
        PETRreader.parse_Config(cli_args.config)
    else:
        logger.info('Using default config file.')
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    if cli_args.nullverbs:
        print('Coding in null verbs mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get verb phrases that are not in the dictionary but are
        # associated with coded noun phrases
        PETRglobals.NullVerbs = True
    elif cli_args.nullactors:
        print('Coding in null actors mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get actor phrases that are not in the dictionary but
        # associated with coded verb phrases
        PETRglobals.NullActors = True
        PETRglobals.NewActorLength = int(cli_args.nullactors)

    read_dictionaries()
    start_time = time.time()
    print('\n\n')

    paths = PETRglobals.TextFileList
    if cli_args.inputs:
        if os.path.isdir(cli_args.inputs):
            if cli_args.inputs[-1] != '/':
                paths = glob.glob(cli_args.inputs + '/*.xml')
            else:
                paths = glob.glob(cli_args.inputs + '*.xml')
        elif os.path.isfile(cli_args.inputs):
            paths = [cli_args.inputs]
        else:
            print(
                '\nFatal runtime error:\n"' + cli_args.inputs +
                '" could not be located\nPlease enter a valid directory or file of source texts.'
            )
            sys.exit()

    out = ""  # PETRglobals.EventFileName
    if cli_args.outputs:
        out = cli_args.outputs

    if cli_args.command_name == 'parse':
        events = run(paths, out, cli_args.parsed)
    else:
        events = run(paths, out, True)  # <===
    print("Coding time:", time.time() - start_time)
    print("Finished")
    return events
Exemplo n.º 11
0
def process_target_bak(q, l, first_task, cli_args, multi_log_lock):

    # 子进程先读取进程运行所需各种信息
    utilities.init_logger()
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    if cli_args.config:
        print('Using user-specified config: {}'.format(cli_args.config))
        logger.info('Using user-specified config: {}'.format(cli_args.config))
        PETRreader.parse_Config(cli_args.config)
    else:
        logger.info('Using default config file.')
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    if cli_args.nullverbs:
        print('Coding in null verbs mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get verb phrases that are not in the dictionary but are
        # associated with coded noun phrases
        PETRglobals.NullVerbs = True
    elif cli_args.nullactors:
        print('Coding in null actors mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get actor phrases that are not in the dictionary but
        # associated with coded verb phrases
        PETRglobals.NullActors = True
        PETRglobals.NewActorLength = int(cli_args.nullactors)

    read_dictionaries()
    print('\n\n')

    out = ""  # PETRglobals.EventFileName
    if cli_args.outputs:
        out = cli_args.outputs

    # 创建一个和数据库交流的session
    session = Session()

    # 子进程先完成第一个任务
    write_multiprocess_log(
        multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(),
                                                  first_task))
    process_task(first_task, out, multi_log_lock, session)

    while l.acquire():
        # 队列不为空,empty()方法不可靠,使用qsize()
        if q.qsize() != 0:
            # 从队列中获取下一个任务
            task = q.get()
            # 任务获取完之后释放锁
            l.release()
            # 完成获取到的任务
            write_multiprocess_log(
                multi_log_lock,
                '{}Process {}: {}'.format(u'', os.getpid(), task))
            process_task(task, out, multi_log_lock, session)
        # 队列为空
        else:
            # 释放锁
            l.release()
            # 跳出循环
            break

    write_multiprocess_log(
        multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(),
                                                  u'exited...'))
Exemplo n.º 12
0
def main(file_details, geo_details, server_details, petrarch_version, logger_file=None, run_filter=None,
         run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    geo_details: Named tuple.
                  Settings for geocoding.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    if petrarch_version == '1':
        from petrarch import petrarch
        logger.info("Using original Petrarch version")
    elif petrarch_version == '2':
        from petrarch2 import petrarch2 as petrarch
        logger.info("Using Petrarch2")
    else:
        logger.error("Invalid Petrarch version. Argument must be '1' or '2'")


    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)
    if geo_details.geo_service == "Mordecai":
        dest = "{0}:{1}/places".format(geo_details.mordecai_host, geo_details.mordecai_port)
        try:
            out = requests.get(dest)
            assert out.status_code == 200
        except (AssertionError, requests.exceptions.ConnectionError):
            print("Mordecai geolocation service not responding. Continuing anyway...")
    elif geo_details.geo_service == "CLIFF":
        print("CLIFF")
    else:
        print("Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing...")


    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)
    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details, geo_details)
    else:
        print("Please specify a data version number. Program ending.")

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    try:
        uploader.main(date_string, server_details, file_details)
    except Exception as e:
        logger.warning("Error on the upload portion. {}".format(e))
        print("""Error on the uploader. This step isn't absolutely necessary.
              Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 13
0
def main(file_details, server_details, logger_file=None, run_filter=None):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if len(sys.argv) > 1:
        date_string = sys.argv[1]
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)

    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        petrarch.run_pipeline(formatted,
                              '{}{}.txt'.format(file_details.fullfile_stem,
                                                date_string), parsed=True)
        results = ''
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    else:
        print("Can't run with the options you've specified. You need to fix something.")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        oneaday_formatter.main(petr_results, date_string, server_details,
                               file_details)

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    uploader.main(date_string, server_details, file_details)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 14
0
def main():

    cli_args = parse_cli_args()
    """print(cli_args)
    sys.exit()"""
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()


    if cli_args.command_name == 'parse' or cli_args.command_name == 'batch':  # 16.06.27: no longer needed, right?

        print(cli_args)
        if cli_args.config:
            print('Using user-specified config: {}'.format(cli_args.config))
            logger.info(
                'Using user-specified config: {}'.format(cli_args.config))
            PETRreader.parse_Config(cli_args.config)
        else:
            logger.info('Using default config file.')
            PETRreader.parse_Config(utilities._get_data('data/config/',
                                                        'PETR_config.ini'))

        if cli_args.nullverbs:
            print('Coding in null verbs mode; no events will be generated')
            logger.info('Coding in null verbs mode; no events will be generated')
            PETRglobals.NullVerbs  = True  # Only get verb phrases that are not in the dictionary but are associated with coded noun phrases
        elif cli_args.nullactors:
            print('Coding in null actors mode; no events will be generated')
            logger.info('Coding in null verbs mode; no events will be generated')
            PETRglobals.NullActors = True  # Only get actor phrases that are not in the dictionary but associated with coded verb phrases
            PETRglobals.NewActorLength = int(cli_args.nullactors)

        read_dictionaries()
        start_time = time.time()
        print('\n\n')

        paths = PETRglobals.TextFileList
        if cli_args.inputs:
            if os.path.isdir(cli_args.inputs):
                if cli_args.inputs[-1] != '/':
                    paths = glob.glob(cli_args.inputs + '/*.xml')
                else:
                    paths = glob.glob(cli_args.inputs + '*.xml')
            elif os.path.isfile(cli_args.inputs):
                paths = [cli_args.inputs]
            else:
                print(
                    '\nFatal runtime error:\n"' +
                    cli_args.inputs +
                    '" could not be located\nPlease enter a valid directory or file of source texts.')
                sys.exit()
        
        out = "" #PETRglobals.EventFileName
        if cli_args.outputs:
                out = cli_args.outputs
             
        if cli_args.command_name == 'parse':
            run(paths, out, cli_args.parsed)

        else:
            run(paths, out , True)  ## <===

        print("Coding time:", time.time() - start_time)

    print("Finished")
Exemplo n.º 15
0
def main(file_details,
         server_details,
         logger_file=None,
         run_filter=None,
         run_date='',
         version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details, process_date,
                               date_string)

    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted,
                                             write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted,
                                             write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details)
    else:
        print("Please specify a data version number. Program ending.")

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    try:
        uploader.main(date_string, server_details, file_details)
    except Exception as e:
        logger.warning("Error on the upload portion. {}".format(e))
        print("""Error on the uploader. This step isn't absolutely necessary.
              Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 16
0
def main(file_details, server_details, logger_file=None, run_filter=None,
         run_date=None):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)

    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        #Command to write output to a file directly from PETR
#        petrarch.run_pipeline(formatted,
#                              '{}{}.txt'.format(file_details.fullfile_stem,
#                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    postprocess.main(formatted_results, date_string, file_details)

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    uploader.main(date_string, server_details, file_details)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 17
0
def main():
    cli_args = parse_cli_args()

    # miaoweixin added begin
    # 作为后台程序无限循环运行
    if cli_args.command_name == 'background':
        try:
            # infinite loop
            run_in_background(cli_args)
        except KeyboardInterrupt:
            print("Program exited due to keyboard interrupt.\n")
            return None
    # miaoweixin added end

    utilities.init_logger()
    logger = logging.getLogger('petr_log')

    PETRglobals.RunTimeString = time.asctime()

    print(cli_args)
    if cli_args.config:
        print('Using user-specified config: {}'.format(cli_args.config))
        logger.info('Using user-specified config: {}'.format(cli_args.config))
        PETRreader.parse_Config(cli_args.config)
    else:
        logger.info('Using default config file.')
        PETRreader.parse_Config(
            utilities._get_data('data/config/', 'PETR_config.ini'))

    if cli_args.nullverbs:
        print('Coding in null verbs mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get verb phrases that are not in the dictionary but are
        # associated with coded noun phrases
        PETRglobals.NullVerbs = True
    elif cli_args.nullactors:
        print('Coding in null actors mode; no events will be generated')
        logger.info('Coding in null verbs mode; no events will be generated')
        # Only get actor phrases that are not in the dictionary but
        # associated with coded verb phrases
        PETRglobals.NullActors = True
        PETRglobals.NewActorLength = int(cli_args.nullactors)

    read_dictionaries()
    start_time = time.time()
    print('\n\n')

    paths = PETRglobals.TextFileList
    if cli_args.inputs:
        if os.path.isdir(cli_args.inputs):
            if cli_args.inputs[-1] != '/':
                paths = glob.glob(cli_args.inputs + '/*.xml')
            else:
                paths = glob.glob(cli_args.inputs + '*.xml')
        elif os.path.isfile(cli_args.inputs):
            paths = [cli_args.inputs]
        else:
            print(
                '\nFatal runtime error:\n"' + cli_args.inputs +
                '" could not be located\nPlease enter a valid directory or file of source texts.'
            )
            sys.exit()
    elif cli_args.command_name == 'javainfo':
        # add else to java info 0904
        paths = 'javainfo'

    out = ""  # PETRglobals.EventFileName
    if cli_args.outputs:
        out = cli_args.outputs

    if cli_args.command_name == 'parse':
        run(paths, out, cli_args.parsed, cli_args)
    else:
        run(paths, out, True, cli_args)  # <===

    print("Coding time:", time.time() - start_time)

    print("Finished")
Exemplo n.º 18
0
def main(file_details,
         geo_details,
         server_details,
         petrarch_version,
         run_date,
         mongo_details,
         logger_file=None,
         run_filter=None,
         version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    geo_details: Named tuple.
                  Settings for geocoding.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    petrarch_version: String.
                       Which version of Petrarch to use. Must be '1' or '2'

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    if petrarch_version == '1':
        from petrarch import petrarch
        logger.info("Using original Petrarch version")
    elif petrarch_version == '2':
        from petrarch2 import petrarch2 as petrarch
        logger.info("Using Petrarch2")
    else:
        logger.error("Invalid Petrarch version. Argument must be '1' or '2'")

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    # Get the stories for the desired date from the DB
    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)
    if geo_details.geo_service == "Mordecai":
        dest = "{0}:{1}/places".format(geo_details.mordecai_host,
                                       geo_details.mordecai_port)
        try:
            out = requests.get(dest)
            assert out.status_code == 200
        except (AssertionError, requests.exceptions.ConnectionError):
            print(
                "Mordecai geolocation service not responding. Continuing anyway..."
            )
    elif geo_details.geo_service == "CLIFF":
        print("CLIFF")
    else:
        print(
            "Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing..."
        )

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details, process_date,
                               date_string)
    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)

        petr_results = petrarch.run_pipeline(
            formatted,
            config="petr_config.ini",
            write_output=False,

            # DGM TEst
            # petrarch.run_pipeline(formatted, out_file = "TESTOUT.txt", config = "petr_config.ini", write_output=True,
            parsed=True)
        #sys.exit()

    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted,
                                             config="petr_config.ini",
                                             write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details, geo_details)
    else:
        print("Please specify a data version number. Program ending.")

    #logger.info("Running phox_uploader.py")
    # print("Running phox_uploader.py")
    # try:
    #     uploader.main(date_string, server_details, file_details)
    # except Exception as e:
    #     logger.warning("Error on the upload portion. {}".format(e))
    #     print("""Error on the uploader. This step isn't absolutely necessary.
    #           Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 19
0
	if not (sport and fport and log and host):
		logger.error('%s is not correctly configured' % CONFIG_FILE)
		sys.exit(-1)


def proxy_request_handler(epoll_context, parameters):
	startTime = time.time()
	request, host, port = epoll_context
	

def server_request_handler(epoll_context, parameters):
    startTime = time.time()
    request, host, port = epoll_context
    	
def receive_signal(signum, stack):
    print 'Received:', signum


if __name__ == '__main__':
        opts = argparse.ArgumentParser(description=__description__)
	parse_config(opts)
	args = opts.parse_args()

	config_dict = utilities.load_config(args.config)
	utilities.init_logger(logger, config_dict)
	check_config(config_dict, logger)
	signal.signal(signal.SIGUSR1, receive_signal)

	#server = epoll.Server(int(args.port), args.host, request_handler, [])
	#thisserver.run()
Exemplo n.º 20
0
def main(file_details, geo_details, server_details, petrarch_version, run_date, mongo_details, logger_file=None, run_filter=None,
         version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    geo_details: Named tuple.
                  Settings for geocoding.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    petrarch_version: String.
                       Which version of Petrarch to use. Must be '1' or '2'

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    if petrarch_version == '1':
        from petrarch import petrarch
        logger.info("Using original Petrarch version")
    elif petrarch_version == '2':
        from petrarch2 import petrarch2 as petrarch
        logger.info("Using Petrarch2")
    else:
        logger.error("Invalid Petrarch version. Argument must be '1' or '2'")


    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)
    if geo_details.geo_service == "Mordecai":
        dest = "{0}:{1}/places".format(geo_details.mordecai_host, geo_details.mordecai_port)
        try:
            out = requests.get(dest)
            assert out.status_code == 200
        except (AssertionError, requests.exceptions.ConnectionError):
            print("Mordecai geolocation service not responding. Continuing anyway...")
    elif geo_details.geo_service == "CLIFF":
        print("CLIFF")
    else:
        print("Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing...")


    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)
    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    
    print('Running PETRARCH in Null Actors Mode.')
    # DGM Run this in actor-gen mode
    ACTOR_OUTPUT_FILE = "events_null_actors_mode_" + run_date + ".txt"
    petrarch.run_pipeline(formatted, out_file = ACTOR_OUTPUT_FILE, config = "petr_config.ini", write_output=True,
                                             parsed=True)

    print("See events results in: " + ACTOR_OUTPUT_FILE)   
    print("See actors list in: nullactors." + ACTOR_OUTPUT_FILE)
    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 21
0
def main(message, logger_file=None, run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    process_date = datetime.datetime.utcnow()
    date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                              process_date.month,
                                              process_date.day)
    logger.info('Date string: {}'.format(date_string))
    print('Date string:', date_string)

    server_details = ''

    logger.info("Extracting date.")
    print("Extracting date.")
    date = formatter.get_date(message, process_date)

    logger.info("Sending to Hypnos.")
    story_id = message['entry_id']
    print(story_id)
    text = message['cleaned_text']
    headers = {'Content-Type': 'application/json'}
    payload = {'text': text, 'id': story_id, 'date': date}
    data = json.dumps(payload)
    hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR']
    hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip)
    r = requests.get(hypnos_url, data=data, headers=headers)

    print(r.status_code)

    if r.status_code == 200:
        logger.info("Running postprocess.py")
        print("Running postprocess.py")

        hypnos_res = r.json()
        print(hypnos_res)
        events = []
        for k, v in hypnos_res[story_id]['sents'].iteritems():
            if 'events' in v:
                sent = hypnos_res[story_id]['sents'][k]
                for event in v['events']:
                    event_tup = (date, event[0], event[1], event[2])
                    formatted, actors = postprocess.main(event_tup, sent, version, server_details)
                    logger.info(formatted)
                    logger.info(actors)
                    print(formatted, actors)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Exemplo n.º 22
0
import os
import sys
from shutil import copyfile

from keras.models import load_model
from keras.utils import to_categorical

from alagent import ALAgent
from tagger import CRFTagger
import utilities
import tensorflow as tf
import numpy as np
import time

args = utilities.get_args()
logger = utilities.init_logger()

max_len = args.max_seq_length
VOCABULARY = args.vocab_size
EPISODES = args.episodes
BUDGET = args.annotation_budget
k = args.k

rootdir = args.root_dir
train_file = args.train_file
dev_file = args.dev_file
test_file = args.test_file
emb_file = args.word_vec_file
DATASET_NAME = args.dataset_name
policy_path = args.policy_path
Exemplo n.º 23
0
                source = triple[0] if isinstance(
                    triple[0], basestring) else triple[0].text
                target = triple[1] if isinstance(
                    triple[1], basestring) else triple[1].text
                others = ""
                for other in triple[3]:
                    others = others + other.text + ","
                tuples = tuples + "source: " + source + "\ttarget: " + target + "\tverb: " + triple[
                    2].text + "\tother_noun: " + others + "\n"
            ET.SubElement(sentence, "Triplets").text = tuples

    tree = ET.ElementTree(root)
    tree.write(outputfile, 'UTF-8')


utilities.init_logger('PETRARCH.log', True)
config = utilities._get_data('data/config/', 'PETR_config.ini')
print("reading config")
sys.stdout.write('Mk1\n')
PETRreader.parse_Config(config)
print("reading dicts")
petrarch_ud.read_dictionaries()
inputFile = sys.argv[1]
#inputFile=sys.argv[1].replace(".xml","")+"_parsed.xml"
outputFile = inputFile.replace("_parsed.xml", "") + "_phrase.xml"
events = read_xml_input([inputFile], True)
'''
print(len(events))
for key in events.keys():
	print(len(events[key]['sents']))
	for subkey,v in events[key]['sents'].items():