Пример #1
0
 def _getRestSent(self, structure_type):
     """Obtain the rest of the sentence as a list of tokens if structure_type is
     'flat' and as a list of constituents if structure type is 'chunked'. Log a
     warning and return a list of constituents for an unknown structure type."""
     if structure_type == 'flat':
         restSentence = utils.get_tokens(self.parent[self.position + 1:])
     elif structure_type == 'chunked':
         restSentence = self.parent[self.position + 1:]
         if structure_type != 'chunked':
             logger.warn("unknown structure type: %s" % structure_type)
     return restSentence
Пример #2
0
 def _getRestSent(self, structure_type):
     """Obtain the rest of the sentence as a list of tokens if structure_type is
     'flat' and as a list of constituents if structure type is 'chunked'. Log a
     warning and return a list of constituents for an unknown structure type."""
     if structure_type == 'flat':
         restSentence = utils.get_tokens(self.parent[self.position + 1:])
     elif structure_type == 'chunked':
         restSentence = self.parent[self.position + 1:]
         if structure_type != 'chunked':
             logger.warn("unknown structure type: %s" % structure_type)
     return restSentence
Пример #3
0
 def dribble(self, header, text):
     """Write information on the sentence that an event was added to."""
     if DRIBBLE:
         toks = utils.get_tokens(self.parent.dtrs)
         p1 = toks[0].begin
         p2 = toks[-1].end
         e_p1 = self.dtrs[-1].begin
         e_p2 = self.dtrs[-1].end
         text = ' '.join(text.split())
         sentence = self.tree.tarsqidoc.sourcedoc.text[p1:p2]
         sentence = ' '.join(sentence.split())
         line = "%s\t%s\t%s\t%s:%s\n" % (header, text, sentence, e_p1, e_p2)
         VerbChunk.DRIBBLE_FH.write(line)
Пример #4
0
 def dribble(self, header, text):
     """Write information on the sentence that an event was added to."""
     if DRIBBLE:
         toks = utils.get_tokens(self.parent.dtrs)
         p1 = toks[0].begin
         p2 = toks[-1].end
         e_p1 = self.dtrs[-1].begin
         e_p2 = self.dtrs[-1].end
         text = ' '.join(text.split())
         sentence = self.tree.tarsqidoc.sourcedoc.text[p1:p2]
         sentence = ' '.join(sentence.split())
         line = "%s\t%s\t%s\t%s:%s\n" % (header, text, sentence, e_p1, e_p2)
         VerbChunk.DRIBBLE_FH.write(line)
Пример #5
0
 def _lookForMultiChunk(self, FSA_set, structure_type='flat'):
     """Returns the prefix of the rest of the sentence is it matches one of
     the FSAs in FSA_set. The structure_type argument specifies the
     structural format of the rest of the sentence: either a flat,
     token-level representation or a chunked one. This method is used for
     finding specific right contexts of verb chunks."""
     logger.debug("Entering _lookForMultiChunk for '%s' with %d FSAs"
                  % (self.getText().strip(), len(FSA_set)))
     logger.debug("\tstructure_type = %s" % structure_type)
     restSentence = self._getRestSent(structure_type)
     logger.debug("\trest = %s"
                  % ' '.join([t.__class__.__name__ for t in restSentence]))
     logger.debug("\trest = %s"
                  % ' '.join(["%s/%s" % (t.getText(), t.pos)
                              for t in utils.get_tokens(restSentence)]))
     lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set)
     if lenSubstring:
         logger.debug("\tACCEPTED by FSA %d, LENGTH=%d" % (fsaNum, lenSubstring))
         return restSentence[:lenSubstring]
     else:
         logger.debug("\tREJECTED by all FSAs")
         return 0
Пример #6
0
 def _lookForMultiChunk(self, FSA_set, structure_type='flat'):
     """Returns the prefix of the rest of the sentence is it matches one of
     the FSAs in FSA_set. The structure_type argument specifies the
     structural format of the rest of the sentence: either a flat,
     token-level representation or a chunked one. This method is used for
     finding specific right contexts of verb chunks."""
     logger.debug("Entering _lookForMultiChunk for '%s' with %d FSAs" %
                  (self.getText().strip(), len(FSA_set)))
     logger.debug("\tstructure_type = %s" % structure_type)
     restSentence = self._getRestSent(structure_type)
     logger.debug("\trest = %s" %
                  ' '.join([t.__class__.__name__ for t in restSentence]))
     logger.debug("\trest = %s" % ' '.join([
         "%s/%s" % (t.getText(), t.pos)
         for t in utils.get_tokens(restSentence)
     ]))
     lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set)
     if lenSubstring:
         logger.debug("\tACCEPTED by FSA %d, LENGTH=%d" %
                      (fsaNum, lenSubstring))
         return restSentence[:lenSubstring]
     else:
         logger.debug("\tREJECTED by all FSAs")
         return 0
Пример #7
0
 def _processEventInMultiVChunk(self, substring):
     token_list = utils.get_tokens(self) + substring
     verbfeatureslist = VChunkFeaturesList(tokens=token_list)
     GramMultiVChunk = verbfeatureslist[0]
     self._conditionallyAddEvent(GramMultiVChunk)
     map(update_event_checked_marker, substring)
Пример #8
0
 def _processEventInMultiVChunk(self, substring):
     token_list = utils.get_tokens(self) + substring
     verbfeatureslist = VChunkFeaturesList(tokens=token_list)
     GramMultiVChunk = verbfeatureslist[0]
     self._conditionallyAddEvent(GramMultiVChunk)
     map(update_event_checked_marker, substring)
Пример #9
0
 def _processEventInMultiVChunk(self, substring):
     chunk_list = utils.get_tokens(self) + substring
     gramvchunklist = GramVChunkList(tokens=chunk_list)
     GramMultiVChunk = gramvchunklist[0]
     self._conditionallyAddEvent(GramMultiVChunk)
     map(update_event_checked_marker, substring)