Пример #1
0
 def htmlparser(chunks):
     b = Body()
     for block in chunks:
         tagtype = Preformatted if block.name == "pre" else Paragraph
         t = util.normalize_space(''.join(block.findAll(text=True)))
         block.extract()  # to avoid seeing it again
         if t:
             b.append(tagtype([t]))
     return b
Пример #2
0
 def htmlparser(chunks):
     b = Body()
     for block in chunks:
         tagtype = Preformatted if block.name == "pre" else Paragraph
         t = util.normalize_space(''.join(block.findAll(text=True)))
         block.extract()  # to avoid seeing it again
         if t:
             b.append(tagtype([t]))
     return b
Пример #3
0
    def test_serialize_roundtrip(self):
        # Create a elements object tree
        tree = Body([
            Section([Paragraph(["Hello"]),
                     Paragraph(["World"])],
                    ordinal="1",
                    title="Main section"),
            Section([
                42,
                date(2013, 11, 27),
                datetime(2013, 11, 27, 12, 0, 0), b'bytestring', {
                    'foo': 'bar',
                    'x': 'y'
                }
            ],
                    ordinal=2,
                    title="Native types")
        ])
        # roundtrip using the default XML format
        serialized = serialize(tree)
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, caller_globals=globals())
        self.assertEqual(tree, newtree)

        # make another section with special (but commonly used) types
        # and try to roundtrip them. The XML serialization format does
        # not support this.
        graph = Graph().parse(
            data="""@prefix dcterms: <http://purl.org/dc/terms/> .

<http://example.org/1> dcterms:title "Hello world"@en .
""",
            format="turtle")
        parseresult = urlparser.parseString("http://example.org/1")
        tree.append(Section([parseresult, graph], meta=graph))

        # roundtrip using JSON (which uses fully qualified classnames,
        # so we don't need to pass globals() into deserialize()
        serialized = serialize(tree, format="json")
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, format="json")

        # two pyparsing.ParseResult objects cannot be directly
        # compared (they don't implement __eq__), therefore we compare
        # their XML representations
        tree[2][0] = util.parseresults_as_xml(tree[2][0])
        newtree[2][0] = util.parseresults_as_xml(newtree[2][0])
        self.assertEqual(tree, newtree)
Пример #4
0
    def test_parse_existing(self):
        # make sure parserecursive doesn't mess with existing structure.
        class MyHeader(UnicodeElement):
            pass

        doc = Body([
            MyHeader("My document"),
            Paragraph([
                "It's a very very fine document.",
                MyHeader("Subheading"), "And now we're done."
            ])
        ])
        want = serialize(doc)

        # first test a blank CitationParser, w/o patterns or formatter
        cp = CitationParser()

        doccopy = deepcopy(doc)
        cp.parse_recursive(doccopy)
        got = serialize(doccopy)
        self.assertEqual(want, got)

        cp = CitationParser(ferenda.citationpatterns.url)
        cp.set_formatter(URIFormatter(("url", ferenda.uriformats.url)))
        doccopy = deepcopy(doc)
        cp.parse_recursive(doccopy)
        got = serialize(doccopy)
        self.assertEqual(want, got)
Пример #5
0
 def toc_generate_page_body(self, documentlist, nav):
     # make a copy because toc_generate_page_body_thread will eat
     # it, and we need to reuse it
     documentlist = list(documentlist)
     # for item in documentlist:
     #     print(repr(str(item[0]))+",")
     rootul = self.toc_generate_page_body_thread(documentlist)
     assert len(
         documentlist
     ) == 0, "toc_generate_page_body_thread left some items in the documentlist"
     uls = OrderedDict()
     # create one ul per two-char-prefix (eg "Ab", "Ac", "Ad", "Af" and so on)
     for li in rootul:
         strdoc = str(li)
         prefix = strdoc.replace(" ", "").replace(
             "-",
             "")[:2].capitalize()  # maybe clean even more, eg remove space?
         # remove anything non-numerical
         if prefix not in uls:
             uls[prefix] = UnorderedList()
             currentul = uls[prefix]
         currentul.append(li)
     d = Div(**{'class': 'threecol'})
     for k, v in uls.items():
         if len(k) > 2:
             continue
         d.append(H2([k]))
         d.append(v)
     return Body([nav, d])
Пример #6
0
 def parse(self, doc):
     # create a dummy txt
     d = Describer(doc.meta, doc.uri)
     d.rdftype(self.rdf_type)
     d.value(self.ns['dcterms'].title, Literal(doc.basefile, lang=doc.lang))
     d.value(self.ns['prov'].wasGeneratedBy, self.qualified_class_name())
     doc.body = Body()  # can be empty, all content in doc.meta
     return True
Пример #7
0
    def test_parse_recursive(self):
        doc_citation = ("Doc" + Word(nums).setResultsName("ordinal") + "/" +
                        Word(nums, exact=4).setResultsName("year")
                        ).setResultsName("DocRef")

        def doc_uri_formatter(parts):
            return "http://example.org/docs/%(year)s/%(ordinal)s/" % parts

        doc = Body([
            Heading(["About Doc 43/2012 and it's interpretation"]),
            Paragraph([
                "According to Doc 43/2012",
                Footnote(["Available at http://example.org/xyz"]),
                " the bizbaz should be frobnicated"
            ])
        ])

        result = Body([
            Heading([
                "About ",
                LinkSubject("Doc 43/2012",
                            predicate="dcterms:references",
                            uri="http://example.org/docs/2012/43/"),
                " and it's interpretation"
            ]),
            Paragraph([
                "According to ",
                LinkSubject("Doc 43/2012",
                            predicate="dcterms:references",
                            uri="http://example.org/docs/2012/43/"),
                Footnote([
                    "Available at ",
                    LinkSubject("http://example.org/xyz",
                                predicate="dcterms:references",
                                uri="http://example.org/xyz")
                ]), " the bizbaz should be frobnicated"
            ])
        ])

        cp = CitationParser(ferenda.citationpatterns.url, doc_citation)
        cp.set_formatter(
            URIFormatter(("url", ferenda.uriformats.url),
                         ("DocRef", doc_uri_formatter)))
        doc = cp.parse_recursive(doc)
        self.maxDiff = 4096
        self.assertEqual(serialize(doc), serialize(result))
Пример #8
0
 def toc_generate_page_body(self, documentlist, nav):
     ul = UnorderedList([ListItem(x) for x in documentlist], role='main')
     dl = DL(**{'class': 'dl-horizontal'})
     for label, doclist in documentlist:
         dl.append(DT(label))
         for doc in doclist:
             dl.append(DD(doc))
     return Body([nav, dl])
Пример #9
0
    def parse_from_pdfreader(self, pdfreader, doc):
        doc.body = Body([pdfreader])

        d = Describer(doc.meta, doc.uri)
        d.rdftype(self.rdf_type)
        d.value(self.ns['prov'].wasGeneratedBy, self.qualified_class_name())

        return doc
Пример #10
0
    def test_serialize_roundtrip(self):
        # Create a elements object tree
        tree = Body([Section([Paragraph(["Hello"]),
                              Paragraph(["World"])],
                             ordinal="1",
                             title="Main section"),
                     Section([42,
                              date(2013,11,27),
                              datetime(2013,11,27,12,0,0),
                              b'bytestring',
                              {'foo': 'bar',
                               'x': 'y'}],
                             ordinal=2,
                             title="Native types")
                 ])
        # roundtrip using the default XML format
        serialized = serialize(tree)
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, caller_globals=globals())
        self.assertEqual(tree, newtree)

        # make another section with special (but commonly used) types
        # and try to roundtrip them. The XML serialization format does
        # not support this.
        graph = Graph().parse(data="""@prefix dcterms: <http://purl.org/dc/terms/> .

<http://example.org/1> dcterms:title "Hello world"@en .
""", format="turtle")
        parseresult = urlparser.parseString("http://example.org/1")
        tree.append(Section([parseresult,
                             graph],
                            meta=graph))
        
        # roundtrip using JSON (which uses fully qualified classnames,
        # so we don't need to pass globals() into deserialize()
        serialized = serialize(tree, format="json")
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, format="json")

        # two pyparsing.ParseResult objects cannot be directly
        # compared (they don't implement __eq__), therefore we compare
        # their XML representations
        tree[2][0] = util.parseresults_as_xml(tree[2][0])
        newtree[2][0] = util.parseresults_as_xml(newtree[2][0])
        self.assertEqual(tree, newtree)
Пример #11
0
    def parse_pdfs(self, basefile, pdffiles):
        doc = Body()
        for pdffile in pdffiles:
            # FIXME: downloaded_path must be more fully mocked
            # (support attachments) by testutil.RepoTester. In the
            # meantime, we do some path munging ourselves

            pdf_path = self.store.downloaded_path(basefile).replace("index.html", pdffile)
            intermediate_path = self.store.intermediate_path(basefile, attachment=pdffile)
            intermediate_dir = os.path.dirname(intermediate_path)
            try:
                pdf = self.parse_pdf(pdf_path, intermediate_dir)
                for page in pdf:
                    pass
                    # page.crop(left=50,top=0,bottom=900,right=700)
                doc.append(pdf)
            except ValueError:
                (exc_type, exc_value, exc_trackback) = sys.exc_info()
                self.log.warning("Ignoring exception %s (%s), skipping PDF %s" %
                                 (exc_type, exc_value, pdffile))
        return doc
Пример #12
0
 def postprocess_doc(self, doc):
     next_is_title = False
     newbody = Body()
     glue = lambda x, y, z: False
     for para in doc.body.textboxes(gluefunc=glue, pageobjects=True):
         strpara = str(para).strip()
         if strpara == "Kommittédirektiv":
             next_is_title = True
         elif next_is_title:
             doc.meta.add((URIRef(doc.uri), DCTERMS.title, Literal(strpara)))
             next_is_title = False
         elif strpara.startswith("Beslut vid regeringssammanträde den "):
             datestr = strpara[36:]  # length of above prefix
             if datestr.endswith("."):
                 datestr = datestr[:-1]
             doc.meta.add((URIRef(doc.uri), DCTERMS.issued,
                           Literal(self.parse_swedish_date(datestr),
                                   datatype=XSD.date)))
         if isinstance(para, Page):
             newbody.append(Sidbrytning(ordinal=para.number,
                                        width=para.width,
                                        height=para.height,
                                        src=para.src))
         else:
             newbody.append(para)
         doc.body = newbody
Пример #13
0
 def test_compound(self):
     x = CompoundElement(["hello", "world"], id="42", foo="bar")
     x.foo = "baz"
     with self.assertRaises(AttributeError):
         x.y = "z"
     x.append(
         os.listdir)  # a non-serializable object (in this case a function)
     self.assertEqual(
         b'<compoundelement xmlns="http://www.w3.org/1999/xhtml" id="42">helloworld&lt;built-in function listdir&gt;</compoundelement>',
         etree.tostring(x.as_xhtml()))
     self.assertEqual(
         Body([Section([Paragraph(["Hello"]),
                        Paragraph(["World"])])]).as_plaintext(),
         "Hello World")
Пример #14
0
    def test_serialize_pyparsing(self):
        # these objects can't be roundtripped
        from ferenda.citationpatterns import url
        x = url.parseString("http://example.org/foo?param=val")
        serialized = serialize(Body([x]))
        self.assertEqual(
            """<Body>
  <url>
    <netloc>example.org</netloc>
    <path>/foo</path>
    <query>param=val</query>
    <scheme>http</scheme>
  </url>
</Body>
""", serialized)
Пример #15
0
 def parse(tokenstream):
     current_type = None
     body = Body()
     for p in tokenstream:
         new_type = guess_type(p, current_type)
         # if not new_type == None:
         #    print "Guessed %s for %r" % (new_type.__name__,p[:20])
         if new_type is None:
             pass
         elif new_type == Continuation and len(body) > 0:
             # Don't create a new text node, add this text to the last
             # text node created
             para = body.pop()
             para.append(p)
             body.append(para)
         else:
             if new_type == Continuation:
                 new_type = Paragraph
             body.append(new_type([p]))
             current_type = new_type
     return body
Пример #16
0
 def make_body(parser):
     return parser.make_children(Body())
Пример #17
0
    def toc_generate_page_body(self, documentlist, nav):
        # move documentlist into a ordereddict keyed on url,
        # concatenating rpubl_konsolideringsunderlag as we go
        documents = OrderedDict()
        # make sure all rpubl:KonsolideradGrundforfattning comes first in the list
        for row in documentlist:
            row = dict(row)
            if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning):
                if row['uri'] not in documents:
                    documents[row['uri']] = row
                    # transform single value to a list, so we can
                    # append more if other rows are about the same
                    # rpubl:KonsolideradGrundforfattning
                    row['rpubl_konsolideringsunderlag'] = [
                        row['rpubl_konsolideringsunderlag']
                    ]
                else:
                    documents[
                        row['uri']]['rpubl_konsolideringsunderlag'].append(
                            row['rpubl_konsolideringsunderlag'])
        # then the rest
        for row in documentlist:
            if row['rdf_type'] != str(RPUBL.KonsolideradGrundforfattning):
                documents[row['uri']] = row

        # now that we have all documents, check if some of them change
        # some other of them
        for uri in list(documents):
            row = documents[uri]
            if 'rpubl_andrar' in row:
                if row['rpubl_andrar'] not in documents:
                    self.log.warning(
                        "%(uri)s: changes %(rpubl_andrar)s, but that doc doesn't exist"
                        % row)
                    continue
                if 'andras_av' not in documents[row['rpubl_andrar']]:
                    documents[row['rpubl_andrar']]['andras_av'] = []
                documents[row['rpubl_andrar']]['andras_av'].insert(0, uri)
                documents.move_to_end(uri)

        dl = html.DL(role='main')
        for uri in list(documents):
            if uri not in documents:
                continue  # we must have removed it earlier in the loop
            row = documents[uri]
            label = row.get('dcterms_title',
                            row.get('dcterms_identifier', '(Titel saknas)'))
            if row['dcterms_identifier'] not in label:
                label = "%s: %s" % (row['dcterms_identifier'], label)
            # in most cases we want to link this thing, but not if
            # this is the base act of a non-consolidated act (we link
            # to it in the DD element below instead)
            if (row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning)
                    or 'andras_av' not in row):
                label = Link(label, uri=uri)
            dl.append(html.DT([label]))
            # groups of base+change acts may be present wether we have
            # consolidated acts or not, and they might be grouped a
            # little differently, but we need to do the same things
            # with them.
            relevant_docs = []
            if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning):
                relevant_docs = row['rpubl_konsolideringsunderlag']
            elif 'andras_av' in row:
                relevant_docs = [uri] + row['andras_av']
            if relevant_docs:
                fs = []
                for f in relevant_docs:
                    if f in documents:
                        fs.append(
                            Link(documents[f]['dcterms_identifier'],
                                 uri=documents[f]['uri']))
                        fs.append(", ")
                        del documents[f]
                if fs:
                    dl.append(
                        html.DD(
                            ["Grund- och ändringsförfattningar: ", *fs[:-1]]))
        return Body([nav, dl])
Пример #18
0
    def parse(self, doc):
        # some very simple heuristic rules for determining
        # what an individual paragraph is

        def is_heading(p):
            # If it's on a single line and it isn't indented with spaces
            # it's probably a heading.
            if p.count("\n") == 0 and not p.startswith(" "):
                return True

        def is_pagebreak(p):
            # if it contains a form feed character, it represents a page break
            return "\f" in p

        # Parsing a document consists mainly of two parts:
        # 1: First we parse the body of text and store it in doc.body
        from ferenda.elements import Body, Preformatted, Title, Heading
        from ferenda import Describer
        reader = TextReader(self.store.downloaded_path(doc.basefile))

        # First paragraph of an RFC is always a header block
        header = reader.readparagraph()
        # Preformatted is a ferenda.elements class representing a
        # block of preformatted text. It is derived from the built-in
        # list type, and must thus be initialized with an iterable, in
        # this case a single-element list of strings. (Note: if you
        # try to initialize it with a string, because strings are
        # iterables as well, you'll end up with a list where each
        # character in the string is an element, which is not what you
        # want).
        preheader = Preformatted([header])
        # Doc.body is a ferenda.elements.Body class, which is also
        # is derived from list, so it has (amongst others) the append
        # method. We build our document by adding to this root
        # element.
        doc.body.append(preheader)

        # Second paragraph is always the title, and we don't include
        # this in the body of the document, since we'll add it to the
        # medata -- once is enough
        title = reader.readparagraph()

        # After that, just iterate over the document and guess what
        # everything is. TextReader.getiterator is useful for
        # iterating through a text in other chunks than single lines
        for para in reader.getiterator(reader.readparagraph):
            if is_heading(para):
                # Heading is yet another of these ferenda.elements
                # classes.
                doc.body.append(Heading([para]))
            elif is_pagebreak(para):
                # Just drop these remnants of a page-and-paper-based past
                pass
            else:
                # If we don't know that it's something else, it's a
                # preformatted section (the safest bet for RFC text).
                doc.body.append(Preformatted([para]))

        # 2: Then we create metadata for the document and store it in
        # doc.meta (in this case using the convenience
        # ferenda.Describer class).

        desc = Describer(doc.meta, doc.uri)

        # Set the rdf:type of the document
        desc.rdftype(self.rdf_type)

        # Set the title we've captured as the dcterms:title of the document and
        # specify that it is in English
        desc.value(self.ns['dcterms'].title,
                   util.normalize_space(title),
                   lang="en")

        # Construct the dcterms:identifier (eg "RFC 6991") for this document from the basefile
        desc.value(self.ns['dcterms'].identifier, "RFC " + doc.basefile)

        # find and convert the publication date in the header to a datetime
        # object, and set it as the dcterms:issued date for the document
        re_date = re.compile(
            "(January|February|March|April|May|June|July|August|September|October|November|December) (\d{4})"
        ).search
        # This is a context manager that temporarily sets the system
        # locale to the "C" locale in order to be able to use strptime
        # with a string on the form "August 2013", even though the
        # system may use another locale.
        dt_match = re_date(header)
        if dt_match:
            with util.c_locale():
                dt = datetime.strptime(re_date(header).group(0), "%B %Y")
            pubdate = date(dt.year, dt.month, dt.day)
            # Note that using some python types (cf. datetime.date)
            # results in a datatyped RDF literal, ie in this case
            #   <http://localhost:8000/res/rfc/6994> dcterms:issued "2013-08-01"^^xsd:date
            desc.value(self.ns['dcterms'].issued, pubdate)

        # find any older RFCs that this document updates or obsoletes
        obsoletes = re.search("^Obsoletes: ([\d+, ]+)", header, re.MULTILINE)
        updates = re.search("^Updates: ([\d+, ]+)", header, re.MULTILINE)

        # Find the category of this RFC, store it as dcterms:subject
        cat_match = re.search("^Category: ([\w ]+?)(  |$)", header,
                              re.MULTILINE)
        if cat_match:
            desc.value(self.ns['dcterms'].subject, cat_match.group(1))

        for predicate, matches in ((self.ns['rfc'].updates, updates),
                                   (self.ns['rfc'].obsoletes, obsoletes)):
            if matches is None:
                continue
            # add references between this document and these older rfcs,
            # using either rfc:updates or rfc:obsoletes
            for match in matches.group(1).strip().split(", "):
                uri = self.canonical_uri(match)
                # Note that this uses our own unofficial
                # namespace/vocabulary
                # http://example.org/ontology/rfc/
                desc.rel(predicate, uri)

        # And now we're done. We don't need to return anything as
        # we've modified the Document object that was passed to
        # us. The calling code will serialize this modified object to
        # XHTML and RDF and store it on disk

# end parse1
# Now do it again
        reader.seek(0)
        reader.readparagraph()
        reader.readparagraph()
        doc.body = Body()
        doc.body.append(preheader)
        # doc.body.append(Title([util.normalize_space(title)]))
        # begin parse2
        from ferenda.elements import Section, Subsection, Subsubsection

        # More heuristic rules: Section headers start at the beginning
        # of a line and are numbered. Subsections and subsubsections
        # have dotted numbers, optionally with a trailing period, ie
        # '9.2.' or '11.3.1'
        def is_section(p):
            return re.match(r"\d+\.? +[A-Z]", p)

        def is_subsection(p):
            return re.match(r"\d+\.\d+\.? +[A-Z]", p)

        def is_subsubsection(p):
            return re.match(r"\d+\.\d+\.\d+\.? +[A-Z]", p)

        def split_sectionheader(p):
            # returns a tuple of title, ordinal, identifier
            ordinal, title = p.split(" ", 1)
            ordinal = ordinal.strip(".")
            return title.strip(), ordinal, "RFC %s, section %s" % (
                doc.basefile, ordinal)

        # Use a list as a simple stack to keep track of the nesting
        # depth of a document. Every time we create a Section,
        # Subsection or Subsubsection object, we push it onto the
        # stack (and clear the stack down to the appropriate nesting
        # depth). Every time we create some other object, we append it
        # to whatever object is at the top of the stack. As your rules
        # for representing the nesting of structure become more
        # complicated, you might want to use the
        # :class:`~ferenda.FSMParser` class, which lets you define
        # heuristic rules (recognizers), states and transitions, and
        # takes care of putting your structure together.
        stack = [doc.body]

        for para in reader.getiterator(reader.readparagraph):
            if is_section(para):
                title, ordinal, identifier = split_sectionheader(para)
                s = Section(title=title,
                            ordinal=ordinal,
                            identifier=identifier)
                stack[1:] = []  # clear all but bottom element
                stack[0].append(s)  # add new section to body
                stack.append(s)  # push new section on top of stack
            elif is_subsection(para):
                title, ordinal, identifier = split_sectionheader(para)
                s = Subsection(title=title,
                               ordinal=ordinal,
                               identifier=identifier)
                stack[2:] = []  # clear all but bottom two elements
                stack[1].append(s)  # add new subsection to current section
                stack.append(s)
            elif is_subsubsection(para):
                title, ordinal, identifier = split_sectionheader(para)
                s = Subsubsection(title=title,
                                  ordinal=ordinal,
                                  identifier=identifier)
                stack[3:] = []  # clear all but bottom three
                stack[-1].append(
                    s)  # add new subsubsection to current subsection
                stack.append(s)
            elif is_heading(para):
                stack[-1].append(Heading([para]))
            elif is_pagebreak(para):
                pass
            else:
                pre = Preformatted([para])
                stack[-1].append(pre)
# end parse2

# begin citation1
        from pyparsing import Word, CaselessLiteral, nums
        section_citation = (
            CaselessLiteral("section") +
            Word(nums + ".").setResultsName("Sec")).setResultsName("SecRef")
        rfc_citation = ("[RFC" + Word(nums).setResultsName("RFC") +
                        "]").setResultsName("RFCRef")
        section_rfc_citation = (section_citation + "of" +
                                rfc_citation).setResultsName("SecRFCRef")

        # end citation1

        # begin citation2
        def rfc_uriformatter(parts):
            uri = ""
            if 'RFC' in parts:
                uri += self.canonical_uri(parts['RFC'].lstrip("0"))
            if 'Sec' in parts:
                uri += "#S" + parts['Sec']
            return uri
# end citation2

# begin citation3

        from ferenda import CitationParser, URIFormatter
        citparser = CitationParser(section_rfc_citation, section_citation,
                                   rfc_citation)
        citparser.set_formatter(
            URIFormatter(("SecRFCRef", rfc_uriformatter),
                         ("SecRef", rfc_uriformatter),
                         ("RFCRef", rfc_uriformatter)))
        citparser.parse_recursive(doc.body)
Пример #19
0
 def test_serialize_newstr(self):
     # really a test for future.types.newstr.newstr, here aliased
     # to str() -- this is only ever an issue on py2.
     tree = Body([], a=str("x"), b="y")
     serialized = serialize(tree, format="xml")
     self.assertEqual('<Body a="x" b="y" />\n', serialized)
Пример #20
0
                       Footnote(["Available at http://example.org/xyz"]),
                       " the bizbaz should be frobnicated"])
           ])
# end makedoc

# begin derived-class
from ferenda.elements import CompoundElement, OrdinalElement

class Preamble(CompoundElement): pass
class PreambleRecital(CompoundElement,OrdinalElement):
    tagname = "div"
    rdftype = "eurlex:PreambleRecital"

doc = Preamble([PreambleRecital("Un",ordinal=1)],
               [PreambleRecital("Deux",ordinal=2)],
               [PreambleRecital("Trois",ordinal=3)])
# end derived-class

# begin as-xhtml
from ferenda.elements import SectionalElement
p = SectionalElement(["Some content"],
                     ordinal = "1a",
                     identifier = "Doc pt 1(a)",
                     title="Title or name of the part")
body = Body([p])
from lxml import etree               
etree.tostring(body.as_xhtml("http://example.org/doc"))
# end as-xhtml
return_value = etree.tostring(body.as_xhtml("http://example.org/doc"),
                              pretty_print=True)
Пример #21
0
 def get_parser(self, basefile, sanitized, parseconfig="default"):
     return lambda stream: Body(list(stream))
Пример #22
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

# begin makedoc
from ferenda.elements import Body, Heading, Paragraph, Footnote

doc = Body([Heading(["About Doc 43/2012 and it's interpretation"],predicate="dcterms:title"),
            Paragraph(["According to Doc 43/2012",
                       Footnote(["Available at http://example.org/xyz"]),
                       " the bizbaz should be frobnicated"])
           ])
# end makedoc

# begin derived-class
from ferenda.elements import CompoundElement, OrdinalElement

class Preamble(CompoundElement): pass
class PreambleRecital(CompoundElement,OrdinalElement):
    tagname = "div"
    rdftype = "eurlex:PreambleRecital"

doc = Preamble([PreambleRecital("Un",ordinal=1)],
               [PreambleRecital("Deux",ordinal=2)],
               [PreambleRecital("Trois",ordinal=3)])
# end derived-class

# begin as-xhtml
from ferenda.elements import SectionalElement
p = SectionalElement(["Some content"],
                     ordinal = "1a",
                     identifier = "Doc pt 1(a)",
Пример #23
0
 def test_create_body(self):
     b = Body()
     doc = Document(body=b)
     self.assertIs(b, doc.body)