Пример #1
0
 def __handleSentence(self, xmlSentence, sentence):
     #s = Sentence()
     s = sentence
     s.idx = xmlSentence.attributes['ID'].nodeValue
     
     numWords = 0
     if xmlSentence.hasChildNodes():
         for xmlNode in xmlSentence.childNodes:
             print 'xmlNode.nodeName: '+xmlNode.nodeName
             if xmlNode.nodeName == 'MARKABLE':
                 finalWordIndex = self.__handleMarkable(xmlNode, s, s, numWords)
                 
                 # Add +1 to the finalWordIndex because it is decremented by
                 # one unit to make the slice range from index 0 to n-1
                 # (being n the number of words in the sentence)
                 numWords = finalWordIndex
                 
             if xmlNode.nodeName == 'W':
                 numWords += 1
                 w = Word()
                 self.__handleWord(xmlNode, w)
                 s.addWord(w)
                 w.sentence = s
     
     print s
     print 's.words: '+str(s.words)
     print 's.markables: '+str(s.markables)
     return s
Пример #2
0
 def __handleMarkable(self, xmlMarkable, xmlMarkableParent, s, nextWordIndex, appendString="...."):
     
     # The first element in the slice of this markable
     initialWordIndex = nextWordIndex
     
     # For all the possible child nodes inside a markable,
     # parse them
     if xmlMarkable.hasChildNodes():
         for xmlNode in xmlMarkable.childNodes:
             
             # If we've found a word inside this markable, parse it
             # an increase the word count only
             if xmlNode.nodeName == 'W':
                 nextWordIndex += 1
                 w = Word()
                 self.__handleWord(xmlNode, w)
                 s.addWord(w)
                 w.sentence = s
                 
                 #print '....added word id=' + str(xmlNode.attributes['ID'].nodeValue)
             
             # If we found a nested markable inside this markable
             if xmlNode.nodeName == 'MARKABLE':
                 # Update the nextWordIndex according to the index updated by the
                 # call of self.__handleMarkable recursively
                 nextWordIndex = self.__handleMarkable(xmlNode, xmlMarkable, s, nextWordIndex, appendString+"....")
                 #numWords = finalWordIndex
             
     else:
         raise Exception("There should be at least one word inside the markable id="+
                         xmlMarkable.attributes['ID'].nodeValue)
     
     # Create the markable
     m = Markable()
     m.idx = xmlMarkable.attributes['ID'].nodeValue
     m.comment = xmlMarkable.attributes['COMMENT'].nodeValue
     # Create the slice using the words' indexes in the sentence
     m.slice = str(initialWordIndex) + ":" + str(nextWordIndex-1)
     m.sentence = s
     # Every markable, no matter how deeply nested inside another markables,
     # will always be added to a sentence. They'll be accessible and identifiable
     # by their word slices.
     
     # Add the markable to the sentence
     s.addMarkable(m)
     
     # TO-DO: handle CoRef
     
     return nextWordIndex