def __handleParagraph__(self, dstree, dsnode, paragraph): sentences = self.sentence.split(paragraph) prevItem = '' for i, sentence in enumerate(sentences): if i % 2 != 0: prevItem = sentence elif len(sentence) > 0: if len(prevItem) > 0: firstLetter = prevItem[(len(prevItem) - 1)] sentence = firstLetter + sentence prevItem = '' else: firstLetter = sentence[0] if sentence[(len(prevItem) - 1)] != '.': sentence = sentence + '.' if firstLetter == DocumentStructure.SYMBOL_DBLQUOTE: if sentence.count(DocumentStructure.SYMBOL_DBLQUOTE) % 2 != 0: sentence += firstLetter elif firstLetter == DocumentStructure.SYMBOL_SINGLEQUOTE: if sentence.count(DocumentStructure.SYMBOL_SINGLEQUOTE) % 2 != 0: sentence += firstLetter self.__handleTextSentence__(dstree, dsnode, sentence.strip()) DocumentStructure.deleteValue(dsnode)
def __handleTextClause__(self, dstree, dsnode, clause): phrases = re.split(self.phrase, clause.strip()) if len(phrases) == 1: dstree.addNode(dsnode, DocumentStructure.TEXT_CLAUSE, value = clause) else: dschildnode = dstree.addNode(dsnode, DocumentStructure.TEXT_CLAUSE) for phrase in phrases: if phrase != None: phrase = phrase.strip() if len(phrase) > 0: self.__handleTextPhrase__(dstree, dschildnode, phrase.strip()) DocumentStructure.deleteValue(dsnode)
def __handleTextSentence__(self, dstree, dsnode, sentence): clauses = self.clause.split(sentence.strip()) if len(clauses) == 1: dstree.addNode(dsnode, DocumentStructure.TEXT_SENTENCE, value = sentence) else: dschildnode = dstree.addNode(dsnode, DocumentStructure.TEXT_SENTENCE) prevItem = '' for i, clause in enumerate(clauses): if i % 2 != 0: prevItem = clause else: if len(prevItem) > 0: clause = prevItem + clause prevItem = '' if clause.endswith('.') == False: clause = clause self.__handleTextClause__(dstree, dschildnode, clause) DocumentStructure.deleteValue(dsnode)