Exemplo n.º 1
0
def parseText(text):
    """
    	Wraps the parser with logic to create the appropriate data structures
    """
    #splits text into paragraphs to parse, which is easier than making the parser
    #more complex
    separated = LINESEP.split(text)
    rootSection = None
    currentSection = None
    while bool(separated):
        current = separated.pop(0)
        if current == "":
            continue
        try:
            results = ROOT.parseString(current)
        except Exception as e:
            logging.info('Parser Issue with: {}'.format(current))
            raise e
        #parsed a header, create a section
        if not bool(results):
            results = "Nothing, just add the paragraph"

        if isinstance(results[0], HEADER) and rootSection is None:
            #a new, root section
            currentSection = Section(results[0].title, results[0].level)
            rootSection = currentSection
            for res in results[1:]:
                if isinstance(res, TAG):
                    currentSection.add_tag(res.text)

        elif isinstance(results[0], HEADER):
            #a subsection
            if results[0].level > currentSection._level:
                currentSection = currentSection.add_subsection(results[0].title,
                                                               results[0].level)
            elif results[0].level <= currentSection._level:
                #a subsection of an ancestor
                while results[0].level <= currentSection._level:
                    currentSection = currentSection.get_parent()

                currentSection = currentSection.add_subsection(results[0].title,
                                                               results[0].level)
            for res in results[1:]:
                if isinstance(res, TAG):
                    currentSection.add_tag(res.text)

        else:
            #Paragraphs of a section
            currentParagraph = currentSection.add_paragraph(current)
            for res in results:
                if isinstance(res, TAG):
                    currentParagraph['tags'].add(res.text)
                elif isinstance(res, CITATION):
                    currentParagraph['citations'].add(res.text)

    return rootSection
Exemplo n.º 2
0
 def test_subsection(self):
     aSection = Section('aTitle', 1)
     newSection = aSection.add_subsection('other', 2)
     self.assertEqual(len(aSection._ordered_subsections), 1)
     retrieved = aSection.section('other')
     self.assertIsNotNone(retrieved)
     self.assertIsInstance(retrieved, Section)
Exemplo n.º 3
0
 def test_deep_tags_sections(self):
     aSection = Section('aTitle', 1)
     aSubSection = aSection.add_subsection('subsection', 2)
     aSubSection.add_tag('blah')
     self.assertTrue(aSection.has_tag('blah'))