def read_dictionaries(validation=False): print('Verb dictionary:', PETRglobals.VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.VerbFileName) PETRreader.read_verb_dictionary(verb_path) print('Actor dictionaries:', PETRglobals.ActorFileList) for actdict in PETRglobals.ActorFileList: actor_path = utilities._get_data('data/dictionaries', actdict) PETRreader.read_actor_dictionary(actor_path) print('Agent dictionary:', PETRglobals.AgentFileName) agent_path = utilities._get_data('data/dictionaries', PETRglobals.AgentFileName) PETRreader.read_agent_dictionary(agent_path) print('Discard dictionary:', PETRglobals.DiscardFileName) discard_path = utilities._get_data('data/dictionaries', PETRglobals.DiscardFileName) PETRreader.read_discard_list(discard_path) if PETRglobals.IssueFileName != "": print('Issues dictionary:', PETRglobals.IssueFileName) issue_path = utilities._get_data('data/dictionaries', PETRglobals.IssueFileName) PETRreader.read_issue_list(issue_path)
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format(utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events, None) else: events = utilities.stanford_parse(events) updated_events = do_coding(events, None) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info( 'Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.') sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out , True) print("Coding time:", time.time() - start_time) print("Finished")
def test_date_check(): parse = "(S (NP (NNP CARL ) (NN XVI ) (NNP GUSTAF ) ) )" test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEGOV"] test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate( "19720813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEELI"] test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("19010813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEELI"]
def check_date(self, match): """ Method for resolving date restrictions on actor codes. Parameters ----------- match: list Dates and codes from the dictionary Returns ------- code: string The code corresponding to how the actor should be coded given the date """ code = None try: for j in match: dates = j[1] date = [] code = "" for d in dates: if d[0] in "<>": date.append(d[0] + str(PETRreader.dstr_to_ordate(d[1:]))) else: date.append(str(PETRreader.dstr_to_ordate(d))) curdate = self.date if not date: code = j[0] elif len(date) == 1: if date[0][0] == "<": if curdate < int(date[0][1:]): code = j[0] else: if curdate >= int(date[0][1:]): code = j[0] else: if curdate < int(date[1]): if curdate >= int(date[0]): code = j[0] if code: return code except Exception as e: # print(e) return code return code
def gen_cameo_event(jsonString): events = PETRreader.read_json(jsonString) if events: updated_events = do_coding(events, None) return updated_events else: return {}
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) # if not s_parsed: # events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) PETRwriter.write_events(updated_events, 'evts.' + out_file)
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) if PETRglobals.NullVerbs: PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file) elif PETRglobals.NullActors: PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file) else: PETRwriter.write_events(updated_events, 'evts.' + out_file)
def main(): cli_args = parse_cli_args() """print(cli_args) sys.exit()""" utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': # 16.06.27: no longer needed, right? print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info( 'Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') PETRglobals.NullVerbs = True # Only get verb phrases that are not in the dictionary but are associated with coded noun phrases elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') PETRglobals.NullActors = True # Only get actor phrases that are not in the dictionary but associated with coded verb phrases PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.') sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out , True) ## <=== print("Coding time:", time.time() - start_time) print("Finished")
def test_reflexive(): parse = "(S (NP (NNP Obama ) ) (VP (VBD asked ) (NP (PRP himself ) ) (SBAR (WHADVP (WRB why ) ) (S (NP (NNP Biden ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) )".upper() test = ptree.Sentence(parse,"Obama asked himself why Biden was tired",PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1] assert phrase.get_meaning() == ["USAGOV"]
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': # 16.06.27: no longer needed, right? print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format( cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info( 'Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info( 'Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out, True) # <=== print("Coding time:", time.time() - start_time) print("Finished")
def do_coding(event_dict, out_file): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 """if out_file: # <16.06.18 pas> disable for now file = open_tex(out_file)""" logger = logging.getLogger('petr_log') times = 0 sents = 0 for key, val in sorted(event_dict.items()): NStory += 1 prev_code = [] SkipStory = False print('\n\nProcessing story {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] StorySource = 'TEMP' for sent in val['sents']: NSent += 1 if 'parsed' in event_dict[key]['sents'][sent]: if 'config' in val['sents'][sent]: for id, config in event_dict[key][ 'sents'][sent]['config'].items(): change_Config_Options(config) SentenceID = '{}_{}'.format(key, sent) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][sent] else StoryDate Date = PETRreader.dstr_to_ordate(SentenceDate) SentenceSource = 'TEMP' print("\n",SentenceID) parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = True NDiscardStory += 1 break t1 = time.time() sentence = PETRtree.Sentence(treestr,SentenceText,Date) print(sentence.txt) coded_events , meta = sentence.get_events() # this is the entry point into the processing in PETRtree code_time = time.time()-t1 if PETRglobals.NullVerbs or PETRglobals.NullActors: event_dict[key]['meta'] = meta event_dict[key]['text'] = sentence.txt elif PETRglobals.NullActors: event_dict[key]['events'] = coded_events coded_events = None # skips additional processing event_dict[key]['text'] = sentence.txt else: event_dict[key]['meta']['verbs'] = meta # 16.04.30 pas: we're using the key value 'meta' at two very different # levels of event_dict -- see the code about ten lines below -- and # this is potentially confusing, so it probably would be useful to # change one of those """if out_file: # <16.06.18 pas> This isn't doing anything useful right now, just flipping bits on the hard drive, so I'm disabling it sentence.print_to_file(sentence.tree,file = file)""" del(sentence) times+=code_time sents += 1 #print('\t\t',code_time) if coded_events: event_dict[key]['sents'][sent]['events'] = coded_events event_dict[key]['sents'][sent]['meta'] = meta """print('DC-events:', coded_events) # -- print('DC-meta:', meta) # -- print('+++',event_dict[key]['sents'][sent]) # --""" if PETRglobals.WriteActorText or PETRglobals.WriteEventText or PETRglobals.WriteActorRoot : text_dict = utilities.extract_phrases(event_dict[key]['sents'][sent],SentenceID) # -- print('DC-td1:',text_dict) # -- if text_dict: event_dict[key]['sents'][sent]['meta']['actortext'] = {} event_dict[key]['sents'][sent]['meta']['eventtext'] = {} event_dict[key]['sents'][sent]['meta']['actorroot'] = {} # -- print('DC1:',text_dict) # -- for evt in coded_events: if evt in text_dict: # 16.04.30 pas bypasses problems with expansion of compounds event_dict[key]['sents'][sent]['meta']['actortext'][evt] = text_dict[evt][:2] event_dict[key]['sents'][sent]['meta']['eventtext'][evt] = text_dict[evt][2] event_dict[key]['sents'][sent]['meta']['actorroot'][evt] = text_dict[evt][3:5] if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len(input("Press Enter to continue...")) > 0: sys.exit() prev_code = coded_events NEvents += len(coded_events) if len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None """if out_file: # <16.06.18 pas> disable for now close_tex(file)""" print("\nSummary:") print( "Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print( "Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times/sents if sents else 0) # -- print('DC-exit:',event_dict) return event_dict
def check_date(self, match): """ Method for resolving date restrictions on actor codes. Parameters ----------- match: list Dates and codes from the dictionary Returns ------- code: string The code corresponding to how the actor should be coded given the date Note <16.06.10 pas> ------------------- In a very small set of cases involving a reflexive PRP inside a PP, the system can get into an infinite recursion where it first backs up a couple levels from the (PP, then this call to child.get_meaning() drops back down to the same point via the two child invocations in NounPhrase.get_meaning() elif child.label == "PP": m = self.resolve_codes(child.get_meaning()) and in PrepPhrase.get_meaning() self.meaning = self.children[1].get_meaning() if isinstance(self.children[1],NounPhrase) else "" which takes one back to the same point at one deeper level of recursion. These structures occurred about five times in a 20M sentence corpus, and I couldn't find any fix that didn't break something else, so I just trapped it here. There are a bunch of commented-out debugging prints remaining from this futile pursuit that could presumably be removed at some point. The full record for one of the offending cases is: <Sentence date = "20150824" id ="e35ef55a-fa30-4c34-baae-965dea33d8d8_3" source = "ANOTHER INFINITE RECURSION" sentence = "True"> <Text> He started out at the bottom of the Hollywood rung, directed his own movie and managed to get noticed by Steven Spielberg himself to nab a tiny role in 1998s Saving Private Ryan . </Text> <Parse> (ROOT (S (S (NP (PRP He)) (VP (VBD started) (PRT (RP out)) (PP (IN at) (NP (NP (DT the) (NN bottom)) (PP (IN of) (NP (DT the) (NNP Hollywood) )))))) (VP (VBD rung)) (, ,) (S (VP (VP (VBD directed) (NP (PRP$ his) (JJ own) (NN movie))) (CC and) (VP (VBD managed) (S (VP (TO to) (VP (VB get) (VP (VBN noticed) (PP (IN by) (NP (NNP Steven) (NNP Spielberg) (PRP himself)) ) (S (VP (TO to) (VP (VB nab) (NP (NP (DT a) (JJ tiny) (NN role)) (PP (IN in) (NP (NP (NNS 1998s)) (VP (VBG Saving) (NP (JJ Private) (NNP Ryan)) )))))))))))))) (. .))) </Parse> </Sentence> """ code = None #try: for j in match: dates = j[1] date = [] code = "" for d in dates: if d[0] in '<>': date.append(d[0] + str(PETRreader.dstr_to_ordate(d[1:]))) else: date.append(str(PETRreader.dstr_to_ordate(d))) curdate = self.date if not date: code = j[0] elif len(date) == 1: if date[0][0] == '<': if curdate < int(date[0][1:]): code = j[0] else: if curdate >= int(date[0][1:]): code = j[0] else: if curdate < int(date[1]): if curdate >= int(date[0]): code = j[0] if code: return code #except Exception as e: # print(e) # return code return code
def do_coding(event_dict): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 logger = logging.getLogger('petr_log') times = 0 sents = 0 for key, val in sorted(event_dict.items()): NStory += 1 prev_code = [] SkipStory = False print('\n\nProcessing story {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] for sent in val['sents']: NSent += 1 if 'parsed' in event_dict[key]['sents'][sent]: if 'config' in val['sents'][sent]: for _, config in event_dict[key]['sents'][sent][ 'config'].items(): change_Config_Options(config) SentenceID = '{}_{}'.format(key, sent) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][ sent] else StoryDate Date = PETRreader.dstr_to_ordate(SentenceDate) print("\n", SentenceID) parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = True NDiscardStory += 1 break t1 = time.time() sentence = PETRtree.Sentence(treestr, SentenceText, Date) print(sentence.txt) # this is the entry point into the processing in PETRtree coded_events, meta = sentence.get_events() code_time = time.time() - t1 if PETRglobals.NullVerbs or PETRglobals.NullActors: event_dict[key]['meta'] = meta event_dict[key]['text'] = sentence.txt elif PETRglobals.NullActors: event_dict[key]['events'] = coded_events coded_events = None # skips additional processing event_dict[key]['text'] = sentence.txt else: # 16.04.30 pas: we're using the key value 'meta' at two # very different event_dict[key]['meta']['verbs'] = meta # levels of event_dict -- see the code about ten lines below -- and # this is potentially confusing, so it probably would be useful to # change one of those del (sentence) times += code_time sents += 1 # print('\t\t',code_time) if coded_events: event_dict[key]['sents'][sent]['events'] = coded_events event_dict[key]['sents'][sent]['meta'] = meta #print('DC-events:', coded_events) # -- #print('DC-meta:', meta) # -- #print('+++',event_dict[key]['sents'][sent]) # -- if PETRglobals.WriteActorText or PETRglobals.WriteEventText or PETRglobals.WriteActorRoot: text_dict = utilities.extract_phrases( event_dict[key]['sents'][sent], SentenceID) # -- print('DC-td1:',text_dict) # -- if text_dict: event_dict[key]['sents'][sent]['meta'][ 'actortext'] = {} event_dict[key]['sents'][sent]['meta'][ 'eventtext'] = {} event_dict[key]['sents'][sent]['meta'][ 'actorroot'] = {} # -- print('DC1:',text_dict) # -- for evt in coded_events: if evt in text_dict: # 16.04.30 pas bypasses problems with expansion of compounds event_dict[key]['sents'][sent]['meta'][ 'actortext'][evt] = text_dict[evt][:2] event_dict[key]['sents'][sent]['meta'][ 'eventtext'][evt] = text_dict[evt][2] event_dict[key]['sents'][sent]['meta'][ 'actorroot'][evt] = text_dict[evt][3:5] if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len(input("Press Enter to continue...")) > 0: sys.exit() prev_code = coded_events NEvents += len(coded_events) if len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None print("\nSummary:") print("Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print("Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times / sents if sents else 0) # -- print('DC-exit:',event_dict) return event_dict
def process_target_bak(q, l, first_task, cli_args, multi_log_lock): # 子进程先读取进程运行所需各种信息 utilities.init_logger() logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() print('\n\n') out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs # 创建一个和数据库交流的session session = Session() # 子进程先完成第一个任务 write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), first_task)) process_task(first_task, out, multi_log_lock, session) while l.acquire(): # 队列不为空,empty()方法不可靠,使用qsize() if q.qsize() != 0: # 从队列中获取下一个任务 task = q.get() # 任务获取完之后释放锁 l.release() # 完成获取到的任务 write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), task)) process_task(task, out, multi_log_lock, session) # 队列为空 else: # 释放锁 l.release() # 跳出循环 break write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), u'exited...'))
def main(): cli_args = parse_cli_args() # miaoweixin added begin # 作为后台程序无限循环运行 if cli_args.command_name == 'background': try: # infinite loop run_in_background(cli_args) except KeyboardInterrupt: print("Program exited due to keyboard interrupt.\n") return None # miaoweixin added end utilities.init_logger() logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() elif cli_args.command_name == 'javainfo': # add else to java info 0904 paths = 'javainfo' out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed, cli_args) else: run(paths, out, True, cli_args) # <=== print("Coding time:", time.time() - start_time) print("Finished")
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log', cli_args.debug) logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format( cli_args.config)) PETRglobals.ConfigFileName = cli_args.config PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRglobals.ConfigFileName = 'PETR_config.ini' PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out, True) ## <=== print("Coding time:", time.time() - start_time) elif cli_args.command_name == 'preprocess': if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format( cli_args.config)) PETRglobals.ConfigFileName = cli_args.config PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRglobals.ConfigFileName = 'PETR_config.ini' PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() preprocess(paths) print("Preprocessing time:", time.time() - start_time) print("Finished")
def read_dictionaries(validation=False): print('Internal Coding Ontology:', PETRglobals.InternalCodingOntologyFileName) pico_path = utilities._get_data('data/dictionaries', PETRglobals.InternalCodingOntologyFileName) PETRreader.read_internal_coding_ontology(pico_path) print('Verb dictionary:', PETRglobals.VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.VerbFileName) PETRreader.read_verb_dictionary(verb_path) if PETRglobals.CodeWithPetrarch1: print('Petrarch 1 Verb dictionary:', PETRglobals.P1VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.P1VerbFileName) PETRreader.read_petrarch1_verb_dictionary(verb_path) print('Actor dictionaries:', PETRglobals.ActorFileList) for actdict in PETRglobals.ActorFileList: actor_path = utilities._get_data('data/dictionaries', actdict) PETRreader.read_actor_dictionary(actor_path) print('Agent dictionary:', PETRglobals.AgentFileList) for agentdict in PETRglobals.AgentFileList: agent_path = utilities._get_data('data/dictionaries', agentdict) PETRreader.read_agent_dictionary(agent_path) print('Discard dictionary:', PETRglobals.DiscardFileName) discard_path = utilities._get_data('data/dictionaries', PETRglobals.DiscardFileName) PETRreader.read_discard_list(discard_path) if PETRglobals.IssueFileName != "": print('Issues dictionary:', PETRglobals.IssueFileName) issue_path = utilities._get_data('data/dictionaries', PETRglobals.IssueFileName) PETRreader.read_issue_list(issue_path)
def test_personal1(): parse = "(S (NP (NNP Obama ) ) (VP (VBD said ) (SBAR (S (NP (PRP he ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) ) ".upper() test = ptree.Sentence(parse,"Obama said he was tired",PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1].children[0].children[0] assert phrase.get_meaning() == ["USAGOV"]
def preprocess(filepaths): logger = logging.getLogger('petr_log') # this is the routine called from main() events = PETRreader.depparse_xml_input(filepaths)
def do_coding(event_dict): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 logger = logging.getLogger('petr_log') times = 0 sents = 0 for key, val in sorted(list(event_dict.items())): NStory += 1 prev_code = [] SkipStory = False #print('\n\nProcessing story {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] for sent in val['sents']: NSent += 1 SentenceID = '{}_{}'.format(key, sent) #if "AFP" not in key: #continue if 'parsed' in event_dict[key]['sents'][sent]: if 'config' in val['sents'][sent]: for _, config in event_dict[key]['sents'][sent][ 'config'].items(): change_Config_Options(config) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][ sent] else StoryDate Date = PETRreader.dstr_to_ordate(SentenceDate) print("\n", SentenceID) #if '020675' not in SentenceID: #continue parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = True NDiscardStory += 1 break t1 = time.time() sentence = PETRgraph.Sentence(treestr, SentenceText, Date) # print(sentence.txt) # this is the entry point into the processing in PETRgraph coded_events = {} if PETRglobals.CodeWithPetrarch2: p2_coded_events = sentence.get_events() coded_events.update(p2_coded_events) event_dict[key]['sents'][sent]['events'] = sentence.events event_dict[key]['sents'][sent]['verbs'] = sentence.verbs event_dict[key]['sents'][sent]['nouns'] = sentence.nouns event_dict[key]['sents'][sent][ 'triplets'] = sentence.triplets if PETRglobals.CodeWithPetrarch1: p1_coded_events = sentence.get_events_from_petrarch1_patterns( ) event_dict[key]['sents'][sent].setdefault('events', {}) event_dict[key]['sents'][sent].setdefault('triplets', {}) for i in range(0, len(p1_coded_events)): #raw_input(p1_coded_events[i]) event_dict[key]['sents'][sent]['events'][ 'p1_' + str(i)] = [[p1_coded_events[i][0]], [p1_coded_events[i][1]], p1_coded_events[i][2]] event_dict[key]['sents'][sent]['triplets'][ 'p1_' + str(i)] = {} event_dict[key]['sents'][sent]['triplets'][ 'p1_' + str(i)]['matched_txt'] = p1_coded_events[i][5] event_dict[key]['sents'][sent]['triplets'][ 'p1_' + str(i)]['source_text'] = p1_coded_events[i][ 3] if p1_coded_events[i][3] != None else "---" event_dict[key]['sents'][sent]['triplets'][ 'p1_' + str(i)]['target_text'] = p1_coded_events[i][ 4] if p1_coded_events[i][4] != None else "---" event_dict[key]['sents'][sent]['triplets'][ 'p1_' + str(i)]['verb_text'] = p1_coded_events[i][6] coded_events['p1_' + str(i)] = event_dict[key][ 'sents'][sent]['events']['p1_' + str(i)] logger.debug("check events of id:" + SentenceID) for eventID, event in event_dict[key]['sents'][sent][ 'events'].items(): logger.debug("event:" + eventID) logger.debug(event) for tID, triplet in event_dict[key]['sents'][sent][ 'triplets'].items(): logger.debug("triplet:" + tID) logger.debug(triplet['matched_txt']) code_time = time.time() - t1 ''' if PETRglobals.NullVerbs or PETRglobals.NullActors: event_dict[key]['meta'] = meta event_dict[key]['text'] = sentence.txt elif PETRglobals.NullActors: event_dict[key]['events'] = coded_events coded_events = None # skips additional processing event_dict[key]['text'] = sentence.txt else: # 16.04.30 pas: we're using the key value 'meta' at two # very different event_dict[key]['meta']['verbs'] = meta # levels of event_dict -- see the code about ten lines below -- and # this is potentially confusing, so it probably would be useful to # change one of those ''' del (sentence) times += code_time sents += 1 # print('\t\t',code_time) if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len((input("Press Enter to continue..."))) > 0: sys.exit() NEvents += len(coded_events.values()) if len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None print("\nSummary:") print("Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print("Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times / sents if sents else 0) # -- print('DC-exit:',event_dict) return event_dict
def do_coding(event_dict, out_file): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 if out_file: file = open_tex(out_file) logger = logging.getLogger('petr_log') times = 0 sents = 0 for key, val in sorted(event_dict.items()): NStory += 1 prev_code = [] SkipStory = False print('\n\nProcessing {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] StorySource = 'TEMP' for sent in val['sents']: NSent += 1 if 'parsed' in event_dict[key]['sents'][sent]: if 'config' in val['sents'][sent]: for id, config in event_dict[key][ 'sents'][sent]['config'].items(): change_Config_Options(config) SentenceID = '{}_{}'.format(key, sent) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][sent] else StoryDate Date = PETRreader.dstr_to_ordate(SentenceDate) SentenceSource = 'TEMP' #if not "SYNSET" in SentenceID: # continue #if not "Sarkozy" in SentenceText: # continue print("\t\t",SentenceID) parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = True NDiscardStory += 1 break t1 = time.time() sentence = PETRtree.Sentence(treestr,SentenceText,Date) coded_events , meta = sentence.get_events() code_time = time.time()-t1 event_dict[key]['meta']['verbs'] = meta if out_file: sentence.print_to_file(sentence.tree,file = file) del(sentence) times+=code_time sents += 1 print('\t\t',code_time) if coded_events: event_dict[key]['sents'][sent]['events'] = coded_events if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len(input("Press Enter to continue...")) > 0: sys.exit() prev_code = coded_events NEvents += len(coded_events) if len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None if out_file: close_tex(file) print("\nSummary:") print( "Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print( "Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times/sents if sents else 0) return event_dict
def run(filepaths, out_file, s_parsed): events = PETRreader.read_xml_input(filepaths, s_parsed) if not s_parsed: events = utilities.stanford_parse(events) updated_events = do_coding(events, out_file) PETRwriter.write_events(updated_events, out_file)
def test_reflexive2(): parse = "(S (NP (NNP Obama ) ) (VP (VBD knew ) (SBAR (IN that ) (S (NP (NNP Putin ) ) (VP (VBD liked ) (NP (PRP himself ) ) ) ) ) ) ) ".upper() test = ptree.Sentence(parse,"Obama knew that Biden liked him",PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1].children[1].children[1].children[1] assert phrase.get_meaning() == ["RUSGOV"]
def run(filepaths, out_file, s_parsed): # this is the routine called from main() events = PETRreader.read_xml_input(filepaths, s_parsed) #if not s_parsed: # events = utilities.stanford_parse(events) updated_events = do_coding(events)
others = "" for other in triple[3]: others = others + other.text + "," tuples = tuples + "source: " + source + "\ttarget: " + target + "\tverb: " + triple[ 2].text + "\tother_noun: " + others + "\n" ET.SubElement(sentence, "Triplets").text = tuples tree = ET.ElementTree(root) tree.write(outputfile, 'UTF-8') utilities.init_logger('PETRARCH.log', True) config = utilities._get_data('data/config/', 'PETR_config.ini') print("reading config") sys.stdout.write('Mk1\n') PETRreader.parse_Config(config) print("reading dicts") petrarch_ud.read_dictionaries() inputFile = sys.argv[1] #inputFile=sys.argv[1].replace(".xml","")+"_parsed.xml" outputFile = inputFile.replace("_parsed.xml", "") + "_phrase.xml" events = read_xml_input([inputFile], True) ''' print(len(events)) for key in events.keys(): print(len(events[key]['sents'])) for subkey,v in events[key]['sents'].items(): print(subkey) print(v) ''' updated_events = extract_actor_code(events)
def do_coding(event_dict): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 logger = logging.getLogger('petr_log') times = 0 sents = 0 #获得发布时间 realiseTimeDic = get_releasetime(event_dict) if not realiseTimeDic: print( "realiseTimeDic have no timeinfo ,please check “get_releasetime” method" ) #获得报道时间 reporttimeDic = get_reporttime(event_dict, realiseTimeDic) for key, val in sorted(event_dict.items()): NStory += 1 prev_code = [] SkipStory = False print('\n\nProcessing paragraph {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] if StoryDate == 'NULL': continue id = key.split("-") articleId = id[0] paraghId = id[1] #设置发布时间与报道时间,报道时间缺失的按发布时间确定 val["meta"]["realiseTime"] = realiseTimeDic[articleId] if articleId in reporttimeDic.keys(): val["meta"]["reportTime"] = reporttimeDic[articleId] else: val["meta"]["reportTime"] = realiseTimeDic[articleId] if paraghId == "0000": with open("timeinfo.txt", "a") as f: f.writelines(("发布时间:" + val["meta"]["realiseTime"] ).decode("utf-8").encode("utf-8") + "\n") f.writelines(("报道时间:" + val["meta"]["reportTime"] ).decode("utf-8").encode("utf-8") + "\n") with open("timeinfo.txt", "a") as f: f.writelines(("文章段落ID:" + articleId + " " + paraghId + "\n").decode("utf-8").encode("utf-8")) for sent in sorted(val['sents']): print('\n\nProcessing sentence {}'.format(sent)) NSent += 1 if 'parsed' in event_dict[key]['sents'][sent]: SentenceID = '{}_{}'.format(key, sent) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][ sent] else StoryDate Date = PETRreader.dstr_to_ordate( SentenceDate.split(' ')[0].replace('-', '')) parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = False NDiscardStory += 1 break t1 = time.time() try: sentence = PETRtree.Sentence(treestr, SentenceText, Date) ''' 下面一行是调用句法树分类器 ''' sentence.classify_tree() except Exception as e: message = "ERROR IN PETRARCH2 DO_CODING:" + SentenceID + "\n" + SentenceText + str( e) + "\n" logging.exception(message) continue set_nt_textList(sentence) set_sentenceTimeByReport(sentence, val["meta"]["reportTime"], val['sents'], sent) with open("timeinfo.txt", "a") as f: f.writelines((" 句子ID:" + sent + "\n").decode("utf-8").encode("utf-8")) f.write(" " + sentence.txt.decode("utf-8").encode("utf-8") + "\n") f.write(" 时间词列表: ") for text in sentence.ntTextList: f.write(text + ",") f.write("\n 句子时间:" + str(sentence.sentenceTime).decode("utf-8").encode( "utf-8") + "\n\n") timeText = sentence.ntTextList sentenceTime = sentence.sentenceTime try: coded_events, meta = sentence.get_events() except Exception as e: message = "ERROR IN PETRARCH2 DO_CODING:" + SentenceID + "\n" + SentenceText + str( e) + "\n" logging.exception(message) # print("coded_events:",coded_events) # print("meta:",meta) #print("coded_events:",coded_events) #print("meta:",meta) # exit() # 暂时只走了最后一条分支 code_time = time.time() - t1 if PETRglobals.NullVerbs or PETRglobals.NullActors: event_dict[key]['meta'] = meta event_dict[key]['text'] = sentence.txt elif PETRglobals.NullActors: event_dict[key]['events'] = coded_events coded_events = None # skips additional processing event_dict[key]['text'] = sentence.txt else: # 16.04.30 pas: we're using the key value 'meta' at two # very different event_dict[key]['meta']['verbs'] = meta # levels of event_dict -- see the code about ten lines below -- and # this is potentially confusing, so it probably would be useful to # change one of those del (sentence) times += code_time sents += 1 # print('\t\t',code_time) if coded_events: event_dict[key]['sents'][sent]['events'] = coded_events event_dict[key]['sents'][sent]['meta'] = meta #print('DC-events:', coded_events) # -- #print('DC-meta:', meta) # -- #print('+++',event_dict[key]['sents'][sent]) # -- if PETRglobals.WriteActorText or PETRglobals.WriteEventText or PETRglobals.WriteActorRoot: text_dict = utilities.extract_phrases( event_dict[key]['sents'][sent], SentenceID) print('DC-td1:', text_dict) # -- if text_dict: event_dict[key]['sents'][sent]['meta'][ 'actortext'] = {} event_dict[key]['sents'][sent]['meta'][ 'eventtext'] = {} event_dict[key]['sents'][sent]['meta'][ 'actorroot'] = {} event_dict[key]['sents'][sent]['meta'][ 'eventroot'] = {} event_dict[key]['sents'][sent]['meta'][ 'Source'] = {} event_dict[key]['sents'][sent]['meta'][ 'Target'] = {} event_dict[key]['sents'][sent]['meta'][ 'timeText'] = timeText event_dict[key]['sents'][sent]['meta'][ 'sentenceTime'] = {sentenceTime} # -- print('DC1:',text_dict) # -- for evt in coded_events: # realLocation = [] # location_initial = event_dict[key]['sents'][sent]['ner'] # # index1 = SentenceText.find(text_dict[evt][0]) + 1 # index2 = SentenceText.find(text_dict[evt][1]) - 1 # index3 = SentenceText.find(text_dict[evt][2]) - 1 # for loc in location_initial: # if (SentenceText.find(loc, index1, index2) # or SentenceText.find(loc, index1, index3)): # realLocation.append(loc) # event_dict[key]['sents'][sent]['ner'] = realLocation if evt in text_dict: # 16.04.30 pas bypasses problems with expansion of compounds event_dict[key]['sents'][sent]['meta'][ 'actortext'][evt] = text_dict[evt][:2] event_dict[key]['sents'][sent]['meta'][ 'eventtext'][evt] = text_dict[evt][2] event_dict[key]['sents'][sent]['meta'][ 'actorroot'][evt] = text_dict[evt][3:5] event_dict[key]['sents'][sent]['meta'][ 'eventroot'][evt] = text_dict[evt][5] event_dict[key]['sents'][sent]['meta'][ 'Source'][evt] = text_dict[evt][0] event_dict[key]['sents'][sent]['meta'][ 'Target'][evt] = text_dict[evt][1] if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len(input("Press Enter to continue...")) > 0: sys.exit() prev_code = coded_events # NEvents += len(coded_events) if coded_events is not None and len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None print("\nSummary:") """ print( "Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print( "Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times / sents if sents else 0) """ # -- print('DC-exit:',event_dict) return event_dict