def do_the_business(): # open logfile with open(logfile_name,'w') as logfile: # some details broadcast(logfile,"File list contains %d files"%len(file_list)) # delete the database if do_delete_db: os.remove(dbfile_name) # analysis stage if do_analyse: start = datetime.now() analyse.analyse(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,use_multiprocessing=use_multiprocessing,rel_judgment_dir=rel_judgment_dir) elapsed = datetime.now() - start broadcast(logfile,"Analyse phase took %s"%elapsed) # crossreference stage if do_crossreference: start = datetime.now() crossreference.crossreference(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,use_multiprocessing=use_multiprocessing) elapsed = datetime.now() - start broadcast(logfile,"Crossreference phase took %s"%elapsed) # convert stage if do_convert: conversion_start = time.time() start = datetime.now() convert.convert(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,public_html_dir=public_html_dir,use_multiprocessing=use_multiprocessing,do_legislation=do_legislation) elapsed = datetime.now() - start broadcast(logfile,"Convert phase took %s"%elapsed) if do_delete_html: delete_html.delete_html(conversion_start,output_dir) # disambiguation stage if do_disambiguation: disambiguation_start = time.time() start = datetime.now() disambiguation.disambiguation(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,output_dir=output_dir,use_multiprocessing=use_multiprocessing) elapsed = datetime.now() - start broadcast(logfile,"Disambiguation phase took %s"%elapsed) # index stage if do_index: start = datetime.now() indexes.make_indexes(dbfile_name=dbfile_name,logfile=logfile,output_dir=output_dir,use_multiprocessing=use_multiprocessing) elapsed = datetime.now() - start broadcast(logfile,"Index phase took %s"%elapsed)
log.error( "error during tokenization of file '{0}', exiting".format( filename)) continue tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]]) else: tokenized = text log.info("Parsing") drs = get_all(tokenized) if not drs: log.error("error during the execution of Boxer on file '{0}', exiting". format(filename)) continue log.info("Word sense disambiguation and entity linking") synsets, entities = disambiguation(tokenized, drs) if synsets == None or entities == None: log.error( "error during the disambiguation of file '{0}', exiting".format( filename)) continue # extracting co-mentions if options.comentions: dbpedia_entities = set(map(lambda x: x['entity'], entities)) for entity1, entity2 in combinations(dbpedia_entities, 2): if (entity1 != 'null' and entity2 != 'null'): triples.append( ('<{0}>'.format(entity1), '<{0}#comention>', '<{2}>'.format(config.get('namespace', 'relation'), entity2)))
log.info("Parsing with Boxer") semantics = candc.get_all(tokenized) if not semantics: log.error("error during the execution of Boxer on file '{0}', exiting".format(filename)) continue elif config.get('semantics', 'module') == 'semafor': log.info("Parsing with Semafor") semantics, tokenized = semafor.parse(text) if not semantics: log.error("error during the execution of Semafor on file '{0}', exiting".format(filename)) continue log.info("Word sense disambiguation and entity linking") synsets, entities = disambiguation(tokenized, semantics) if synsets==None or entities==None: log.error("error during the disambiguation of file '{0}', exiting".format(filename)) continue # extracting co-mentions if options.comentions: dbpedia_entities = set(map(lambda x: x['entity'], entities)) for entity1, entity2 in combinations(dbpedia_entities, 2): if (entity1 != 'null' and entity2 != 'null'): triples.append(('<{0}>'.format(entity1), '<{0}#comention>', '<{2}>'.format(config.get('namespace', 'relation'), entity2))) # build dictionary of variables try: variables = dict()
tokens = tokenize(text) if not tokens: log.error("error during tokenization of file '{0}', exiting".format(filename)) continue tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]]) else: tokenized = text log.info("Parsing") drs = get_all(tokenized) if not drs: log.error("error during the execution of Boxer on file '{0}', exiting".format(filename)) continue log.info("Word sense disambiguation and entity linking") synsets, entities = disambiguation(tokenized, drs) if synsets==None or entities==None: log.error("error during the disambiguation of file '{0}', exiting".format(filename)) continue # extracting co-mentions if options.comentions: dbpedia_entities = set(map(lambda x: x['entity'], entities)) for entity1, entity2 in combinations(dbpedia_entities, 2): if (entity1 != 'null' and entity2 != 'null'): triples.append(('<{0}>'.format(entity1), '<{0}#comention>', '<{2}>'.format(config.get('namespace', 'relation'), entity2))) # build dictionary of variables try: variables = dict()