def __handleSentence(self, xmlSentence, sentence): #s = Sentence() s = sentence s.idx = xmlSentence.attributes['ID'].nodeValue numWords = 0 if xmlSentence.hasChildNodes(): for xmlNode in xmlSentence.childNodes: print 'xmlNode.nodeName: '+xmlNode.nodeName if xmlNode.nodeName == 'MARKABLE': finalWordIndex = self.__handleMarkable(xmlNode, s, s, numWords) # Add +1 to the finalWordIndex because it is decremented by # one unit to make the slice range from index 0 to n-1 # (being n the number of words in the sentence) numWords = finalWordIndex if xmlNode.nodeName == 'W': numWords += 1 w = Word() self.__handleWord(xmlNode, w) s.addWord(w) w.sentence = s print s print 's.words: '+str(s.words) print 's.markables: '+str(s.markables) return s
def __handleMarkable(self, xmlMarkable, xmlMarkableParent, s, nextWordIndex, appendString="...."): # The first element in the slice of this markable initialWordIndex = nextWordIndex # For all the possible child nodes inside a markable, # parse them if xmlMarkable.hasChildNodes(): for xmlNode in xmlMarkable.childNodes: # If we've found a word inside this markable, parse it # an increase the word count only if xmlNode.nodeName == 'W': nextWordIndex += 1 w = Word() self.__handleWord(xmlNode, w) s.addWord(w) w.sentence = s #print '....added word id=' + str(xmlNode.attributes['ID'].nodeValue) # If we found a nested markable inside this markable if xmlNode.nodeName == 'MARKABLE': # Update the nextWordIndex according to the index updated by the # call of self.__handleMarkable recursively nextWordIndex = self.__handleMarkable(xmlNode, xmlMarkable, s, nextWordIndex, appendString+"....") #numWords = finalWordIndex else: raise Exception("There should be at least one word inside the markable id="+ xmlMarkable.attributes['ID'].nodeValue) # Create the markable m = Markable() m.idx = xmlMarkable.attributes['ID'].nodeValue m.comment = xmlMarkable.attributes['COMMENT'].nodeValue # Create the slice using the words' indexes in the sentence m.slice = str(initialWordIndex) + ":" + str(nextWordIndex-1) m.sentence = s # Every markable, no matter how deeply nested inside another markables, # will always be added to a sentence. They'll be accessible and identifiable # by their word slices. # Add the markable to the sentence s.addMarkable(m) # TO-DO: handle CoRef return nextWordIndex