class BTime(TarsqiComponent): """Fledgling component to add Timex tags to a document. It now only serves to fill in some timexes that GUTime does not get, doing it this way is easier than modifiying GUTime. Instance variables: name - a string xmldoc - an XmlDocument doctree - a Document""" def __init__(self): """Set the NAME instance variable.""" self.NAME = BTIME def process(self, infile, outfile): """Process a fragment file and add TIMEX3 tags that were missed by Tempex. Arguments: infile - an absolute path outfile - an absolute path""" xmldoc = Parser().parse_file(open(infile,'r')) self.doctree = FragmentConverter(xmldoc, infile).convert() #self.print_doctree(BTIME) self.find_timexes() self.doctree.printOut(outfile) def find_timexes(self): """Loop through all sentences in self.doctree and through all nodes in each sentence and search for missed timexes in noun groups.""" for sentence in self.doctree: for node in sentence: if node.isNounChunk(): self.find_timex_in_noungroup(node) def find_timex_in_noungroup(self, noungroup): """Find missing timexes in noun groups. Searches for years. Assumes that the noun group is not contained in a Timex tag.""" idx = 0 for node in noungroup: if node.isToken() and node.pos == POS_CD: text = node.getText() if text.isdigit() and len(text) == 4: attrs = { 'TYPE': 'DATE', 'VAL': text } timex = TimexTag(attrs) timex.dtrs = [node] # this changes doctree.sentenceList noungroup[idx] = timex # and this changes doctree.nodeList self.doctree.addTimexTag(timex) idx += 1
class Evita(TarsqiComponent): """Class that implements Evita's event recognizer. Instance variables: NAME - a string doctree - a Document instance """ def __init__(self): """Set the NAME instance variable.""" self.NAME = EVITA def process(self, infile, outfile): """Process a fragment file and write a file with EVENT tags. Arguments: infile - an absolute path outfile - an absolute path""" use_old = True use_old = False if use_old: #logger.out('start event parser ', time.time()) self.doctree = parseFile(infile) #logger.out('end event parser ', time.time()) else: xmldoc = Parser().parse_file(open(infile, 'r')) # creating the document tree takes way too long, needs # to be optimized self.doctree = FragmentConverter(xmldoc, infile).convert() #xmldoc.pretty_print() #self.print_doctree(EVITA) self.extractEvents() self.doctree.printOut(outfile) def extractEvents(self): """Loop through all sentences in self.doctree and through all nodes in each sentence and determine if the node contains an event.""" #print self.doctree for sentence in self.doctree: logger.debug("> SENTENCE:" + str(getWordList(sentence))) #print (sentence) for node in sentence: print(node) #print #node.pretty_print() wordlist = str(getWordList(node)) poslist = str(getPOSList(node)) #logger.debug("> NODE:" + wordlist + poslist + " checked=" + # str(node.flagCheckedForEvents)) if not node.flagCheckedForEvents: #logger.out(node.__class__.__name__) node.createEvent() else: logger.debug("PASSING, already checked!")
class Evita(TarsqiComponent): """Class that implements Evita's event recognizer. Instance variables: NAME - a string doctree - a Document instance """ def __init__(self): """Set the NAME instance variable.""" self.NAME = EVITA def process(self, infile, outfile): """Process a fragment file and write a file with EVENT tags. Arguments: infile - an absolute path outfile - an absolute path""" use_old = True use_old = False if use_old: # logger.out('start event parser ', time.time()) self.doctree = parseFile(infile) # logger.out('end event parser ', time.time()) else: xmldoc = Parser().parse_file(open(infile, "r")) # creating the document tree takes way too long, needs # to be optimized self.doctree = FragmentConverter(xmldoc, infile).convert() # xmldoc.pretty_print() # self.print_doctree(EVITA) self.extractEvents() self.doctree.printOut(outfile) def extractEvents(self): """Loop through all sentences in self.doctree and through all nodes in each sentence and determine if the node contains an event.""" for sentence in self.doctree: logger.debug("> SENTENCE:" + str(getWordList(sentence))) for node in sentence: # print # node.pretty_print() wordlist = str(getWordList(node)) poslist = str(getPOSList(node)) # logger.debug("> NODE:" + wordlist + poslist + " checked=" + # str(node.flagCheckedForEvents)) if not node.flagCheckedForEvents: # logger.out(node.__class__.__name__) node.createEvent() else: logger.debug("PASSING, already checked!")
class Slinket(TarsqiComponent): """Class that implements the Slinket SLINK and ALINK parser. Instance variables: NAME - a string doctree - a Document""" def __init__(self): """Load the Slinket dictionaries if they have not been loaded yet.""" self.NAME = SLINKET self.doctree = None SLINKET_DICTS.load() def process(self, infile, outfile): """Run Slinket on the input file and write the results to the output file. Both input an doutput file are fragments. Uses the xml parser as well as the fragment converter to prepare the input and create the shallow tree that Slinket requires. Arguments: infile - an absolute path outfile - an absolute path""" use_old = True use_old = False if use_old: self.doctree = eventParser.readFileWithEvents(infile) else: xmldoc = Parser().parse_file(open(infile, 'r')) self.doctree = FragmentConverter(xmldoc, infile).convert(user=SLINKET) #self.print_doctree(SLINKET) #logger.debug("Number of sentences in file: " + str(len(self.doctree))) for sentence in self.doctree: self._find_links(self.doctree, sentence) self.doctree.printOut(outfile) def _find_links(self, doc, sentence): """For each event in the sentence, check whether an Alink or Slink can be created for it.""" self.currSent = sentence #print; print "SENTENCE\n"; sentence.pretty_print(); print #logger.out("ELIST", self.currSent.eventList) eventNum = -1 for (eLocation, eId) in self.currSent.eventList: eventNum += 1 #logger.out("< %d %d %s >" % (eventNum, eLocation, eId)) event_expr = EventExpression(eId, eLocation, eventNum, doc.taggedEventsDict[eId]) # alinks if event_expr.can_introduce_alink(): logger.debug("EVENT: '" + event_expr.form + "' is candidate to Alinking") self._find_alinks(event_expr) # lexical slinks if event_expr.can_introduce_slink(): logger.debug("EVENT: '" + event_expr.form + "' is candidate to Slinking") self._find_lexically_based_slinks(event_expr) # syntactic slinks self._find_purpose_clause_slinks(event_expr) self._find_conditional_slinks(event_expr) def _find_alinks(self, event_expr): evNode = self.currSent[event_expr.locInSent] if evNode is None: logger.warning("No event node found at locInSent") forwardFSAs = event_expr.alinkingContexts('forward') if forwardFSAs: logger.debug("PROCESS for FORWARD alinks") evNode.createForwardAlink(forwardFSAs) if evNode.createdAlink: evNode.createdAlink = 0 return backwardFSAs = event_expr.alinkingContexts('backwards') if backwardFSAs: logger.debug("PROCESS for BACKWARD alinks") evNode.createBackwardAlink(backwardFSAs) if evNode.createdAlink: evNode.createdAlink = 0 def _find_lexically_based_slinks(self, event_expr): """Try to find lexically based Slinks using forward, backward and reporting FSA paterns. No return value, if an Slink is found, it will be created by the chunk that embeds the Slink triggering event. Arguments: event_expr - an EventExpression""" evNode = self.currSent[event_expr.locInSent] #logger.out('trying slink') if evNode is None: logger.error("No event node found at locInSent") forwardFSAs = event_expr.slinkingContexts('forward') if forwardFSAs: #logger.out('found', len(forwardFSAs[0]), 'groups of forwardFSAs') evNode.find_forward_slink(forwardFSAs) if evNode.createdLexicalSlink: #logger.out('created slink') evNode.createdLexicalSlink = 0 return backwardFSAs = event_expr.slinkingContexts('backwards') if backwardFSAs: #logger.out('found', len(backwardFSAs[0]), 'groups of backwardFSAs') logger.debug("PROCESS for BACKWARD slinks") evNode.find_backward_slink(backwardFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0 return reportingFSAs = event_expr.slinkingContexts('reporting') if reportingFSAs: #logger.out('found', len(reportingFSAs[0]), 'groups of reportingFSAs') logger.debug("PROCESS for REPORTING slinks") evNode.find_reporting_slink(reportingFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0 def _find_purpose_clause_slinks(self, event_expr): """Not yet implemented. But note that some purpose clause SLINKS are already introduced in the lexically-triggered process. This is so for those events that discoursively tend to appear modified by a Purpose Clause (e.g., 'address'). The data are based on TimeBank.""" pass def _find_conditional_slinks(self, event_expr): """Not yet implemented.""" pass
class Slinket (TarsqiComponent): """Class that implements the Slinket SLINK and ALINK parser. Instance variables: NAME - a string doctree - a Document""" def __init__(self): """Load the Slinket dictionaries if they have not been loaded yet.""" self.NAME = SLINKET self.doctree = None SLINKET_DICTS.load() def process(self, infile, outfile): """Run Slinket on the input file and write the results to the output file. Both input an doutput file are fragments. Uses the xml parser as well as the fragment converter to prepare the input and create the shallow tree that Slinket requires. Arguments: infile - an absolute path outfile - an absolute path""" use_old = True use_old = False if use_old: self.doctree = eventParser.readFileWithEvents(infile) else: xmldoc = Parser().parse_file(open(infile,'r')) self.doctree = FragmentConverter(xmldoc, infile).convert(user=SLINKET) #self.print_doctree(SLINKET) #logger.debug("Number of sentences in file: " + str(len(self.doctree))) for sentence in self.doctree: self._find_links(self.doctree, sentence) self.doctree.printOut(outfile) def _find_links(self, doc, sentence): """For each event in the sentence, check whether an Alink or Slink can be created for it.""" self.currSent = sentence #print; print "SENTENCE\n"; sentence.pretty_print(); print #logger.out("ELIST", self.currSent.eventList) eventNum = -1 for (eLocation, eId) in self.currSent.eventList: eventNum += 1 #logger.out("< %d %d %s >" % (eventNum, eLocation, eId)) event_expr = EventExpression(eId, eLocation, eventNum, doc.taggedEventsDict[eId]) # alinks if event_expr.can_introduce_alink(): logger.debug("EVENT: '"+event_expr.form+"' is candidate to Alinking") self._find_alinks(event_expr) # lexical slinks if event_expr.can_introduce_slink() : logger.debug("EVENT: '"+event_expr.form+"' is candidate to Slinking") self._find_lexically_based_slinks(event_expr) # syntactic slinks self._find_purpose_clause_slinks(event_expr) self._find_conditional_slinks(event_expr) def _find_alinks(self, event_expr): evNode = self.currSent[event_expr.locInSent] if evNode is None: logger.warning("No event node found at locInSent") forwardFSAs = event_expr.alinkingContexts('forward') if forwardFSAs: logger.debug("PROCESS for FORWARD alinks") evNode.createForwardAlink(forwardFSAs) if evNode.createdAlink: evNode.createdAlink = 0 return backwardFSAs = event_expr.alinkingContexts('backwards') if backwardFSAs: logger.debug("PROCESS for BACKWARD alinks") evNode.createBackwardAlink(backwardFSAs) if evNode.createdAlink: evNode.createdAlink = 0 def _find_lexically_based_slinks(self, event_expr): """Try to find lexically based Slinks using forward, backward and reporting FSA paterns. No return value, if an Slink is found, it will be created by the chunk that embeds the Slink triggering event. Arguments: event_expr - an EventExpression""" evNode = self.currSent[event_expr.locInSent] #logger.out('trying slink') if evNode is None: logger.error("No event node found at locInSent") forwardFSAs = event_expr.slinkingContexts('forward') if forwardFSAs: #logger.out('found', len(forwardFSAs[0]), 'groups of forwardFSAs') evNode.find_forward_slink(forwardFSAs) if evNode.createdLexicalSlink: #logger.out('created slink') evNode.createdLexicalSlink = 0 return backwardFSAs = event_expr.slinkingContexts('backwards') if backwardFSAs: #logger.out('found', len(backwardFSAs[0]), 'groups of backwardFSAs') logger.debug("PROCESS for BACKWARD slinks") evNode.find_backward_slink(backwardFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0 return reportingFSAs = event_expr.slinkingContexts('reporting') if reportingFSAs: #logger.out('found', len(reportingFSAs[0]), 'groups of reportingFSAs') logger.debug("PROCESS for REPORTING slinks") evNode.find_reporting_slink(reportingFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0 def _find_purpose_clause_slinks(self, event_expr): """Not yet implemented. But note that some purpose clause SLINKS are already introduced in the lexically-triggered process. This is so for those events that discoursively tend to appear modified by a Purpose Clause (e.g., 'address'). The data are based on TimeBank.""" pass def _find_conditional_slinks(self, event_expr): """Not yet implemented.""" pass