def parseText(text): """ Wraps the parser with logic to create the appropriate data structures """ #splits text into paragraphs to parse, which is easier than making the parser #more complex separated = LINESEP.split(text) rootSection = None currentSection = None while bool(separated): current = separated.pop(0) if current == "": continue try: results = ROOT.parseString(current) except Exception as e: logging.info('Parser Issue with: {}'.format(current)) raise e #parsed a header, create a section if not bool(results): results = "Nothing, just add the paragraph" if isinstance(results[0], HEADER) and rootSection is None: #a new, root section currentSection = Section(results[0].title, results[0].level) rootSection = currentSection for res in results[1:]: if isinstance(res, TAG): currentSection.add_tag(res.text) elif isinstance(results[0], HEADER): #a subsection if results[0].level > currentSection._level: currentSection = currentSection.add_subsection(results[0].title, results[0].level) elif results[0].level <= currentSection._level: #a subsection of an ancestor while results[0].level <= currentSection._level: currentSection = currentSection.get_parent() currentSection = currentSection.add_subsection(results[0].title, results[0].level) for res in results[1:]: if isinstance(res, TAG): currentSection.add_tag(res.text) else: #Paragraphs of a section currentParagraph = currentSection.add_paragraph(current) for res in results: if isinstance(res, TAG): currentParagraph['tags'].add(res.text) elif isinstance(res, CITATION): currentParagraph['citations'].add(res.text) return rootSection
def test_bad_set_parent(self): aSection = Section('aTitle', 1) anotherSection = Section('AnotherTitle', 2) self.assertIsNone(aSection.get_parent()) with self.assertRaises(Exception): aSection.set_parent(anotherSection)
def test_set_parent(self): aSection = Section('aTitle', 1) anotherSection = Section('AnotherTitle', 2) self.assertIsNone(anotherSection.get_parent()) anotherSection.set_parent(aSection) self.assertIsNotNone(anotherSection.get_parent())