Exemplo n.º 1
0
    def stringSubstitutions(self,
                            ElementTree,
                            w3c_compat=False,
                            w3c_compat_substitutions=False,
                            w3c_compat_crazy_substitutions=False,
                            **kwargs):
        # Get doc_title from the title element
        try:
            doc_title = utils.textContent(
                ElementTree.getroot().find(u"head").find(u"title"))
        except (AttributeError, TypeError):
            doc_title = u""

        if w3c_compat or w3c_compat_substitutions:
            # Get the right long status
            doc_longstatus = longstatus_map[self.w3c_status]

        if w3c_compat_crazy_substitutions:
            # Get the right stylesheet
            doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + \
                                 self.w3c_status

        # Get all the subs we want
        instance_string_subs = string_subs + \
                               ((title, doc_title, title_identifier), )

        # And even more in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_string_subs += ((status, self.w3c_status,
                                      status_identifier),
                                     (longstatus, doc_longstatus,
                                      longstatus_identifier))

        # And more that aren't even enabled by default in compat. mode
        if w3c_compat_crazy_substitutions:
            instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet,
                                      w3c_stylesheet_identifier), )

        for node in ElementTree.iter():
            for regex, sub, identifier in instance_string_subs:
                if node.text is not None and identifier in node.text:
                    node.text = regex.sub(sub, node.text)
                if node.tail is not None and identifier in node.tail:
                    node.tail = regex.sub(sub, node.tail)
                for name, value in node.attrib.items():
                    if identifier in value:
                        node.attrib[name] = regex.sub(sub, value)
Exemplo n.º 2
0
    def getTerm(self,
                element,
                w3c_compat=False,
                w3c_compat_xref_normalization=False,
                **kwargs):
        if element.get("data-anolis-xref") is not None:
            term = element.get("data-anolis-xref")
        elif element.get("data-x") is not None:
            term = element.get("data-x")
        elif element.get("title") is not None:
            term = element.get("title")
        else:
            term = utils.textContent(element)

        term = term.strip(utils.spaceCharacters).lower()

        return utils.spacesRegex.sub(" ", term)
Exemplo n.º 3
0
    def getTerm(self, element, w3c_compat=False,
                w3c_compat_xref_normalization=False, **kwargs):
        if element.get(u"data-anolis-xref") is not None:
            term = element.get(u"data-anolis-xref")
        elif element.get(u"title") is not None:
            term = element.get(u"title")
        else:
            term = utils.textContent(element)

        term = term.strip(utils.spaceCharacters).lower()

        term = utils.spacesRegex.sub(u" ", term)

        if w3c_compat or w3c_compat_xref_normalization:
            term = non_alphanumeric_spaces.sub(u"", term)

        return term
Exemplo n.º 4
0
    def stringSubstitutions(self, ElementTree, w3c_compat=False,
                            w3c_compat_substitutions=False,
                            w3c_compat_crazy_substitutions=False, **kwargs):
        # Get doc_title from the title element
        try:
            doc_title = utils.textContent(ElementTree.getroot().find(u"head")
                                                               .find(u"title"))
        except (AttributeError, TypeError):
            doc_title = u""

        if w3c_compat or w3c_compat_substitutions:
            # Get the right long status
            doc_longstatus = longstatus_map[self.w3c_status]

        if w3c_compat_crazy_substitutions:
            # Get the right stylesheet
            doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + \
                                 self.w3c_status

        # Get all the subs we want
        instance_string_subs = string_subs + \
                               ((title, doc_title, title_identifier), )

        # And even more in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_string_subs += ((status, self.w3c_status,
                                      status_identifier),
                                     (longstatus, doc_longstatus,
                                      longstatus_identifier))

        # And more that aren't even enabled by default in compat. mode
        if w3c_compat_crazy_substitutions:
            instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet,
                                      w3c_stylesheet_identifier), )

        for node in ElementTree.iter():
            for regex, sub, identifier in instance_string_subs:
                if node.text is not None and identifier in node.text:
                    node.text = regex.sub(sub, node.text)
                if node.tail is not None and identifier in node.tail:
                    node.tail = regex.sub(sub, node.tail)
                for name, value in node.attrib.items():
                    if identifier in value:
                        node.attrib[name] = regex.sub(sub, value)
Exemplo n.º 5
0
    def getTerm(self,
                element,
                w3c_compat=False,
                w3c_compat_xref_normalization=False,
                **kwargs):
        if element.get(u"data-anolis-xref") is not None:
            term = element.get(u"data-anolis-xref")
        elif element.get(u"title") is not None:
            term = element.get(u"title")
        else:
            term = utils.textContent(element)

        term = term.strip(utils.spaceCharacters).lower()

        term = utils.spacesRegex.sub(u" ", term)

        if w3c_compat or w3c_compat_xref_normalization:
            term = non_alphanumeric_spaces.sub(u"", term)

        return term
Exemplo n.º 6
0
    def readDoc(self, ElementTree, name, localName, captionLocalName, figures):
        i = 0
        for element in ElementTree.getroot().findall(u".//%s" % localName):
            i += 1
            if utils.elementHasClass(element, u"no-num"):
                continue

            if not u"id" in element.attrib:
                element.set(u"id", u"anolis-%s-%d" % (localName, i))
            id = element.get(u"id")

            cap = element.find(u".//%s" % captionLocalName)
            if cap is None:
                cap = etree.Element(u"%s" % captionLocalName)
                cap.text = u"(untitled)"
                element.append(cap)

            caption = utils.textContent(cap)
            cap.text = u"%s %d: %s" % (name, i, cap.text)

            figures.append((id, caption))
Exemplo n.º 7
0
    def commentSubstitutions(self, ElementTree, w3c_compat=False, \
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False, **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_basic_comment_subs += ((logo, logo_sub),
                                            (copyright, copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        in_link = False
        for node in ElementTree.iter():
            if in_link:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == u"end-link":
                    if node.getparent() is not link_parent:
                        raise DifferentParentException(u"begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set(u"href", utils.textContent(link))
                    in_link = False
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == u"begin-link":
                link_parent = node.getparent()
                in_link = True
                link = etree.Element(u"a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            begin_sub = u"begin-" + comment
            end_sub = u"end-" + comment
            in_sub = False
            for node in ElementTree.iter():
                if in_sub:
                    if node.tag is etree.Comment and \
                       node.text.strip(utils.spaceCharacters) == end_sub:
                        if node.getparent() is not sub_parent:
                            raise DifferentParentException(u"%s and %s have different parents" % begin_sub, end_sub)
                        in_sub = False
                    else:
                        to_remove.add(node)
                elif node.tag is etree.Comment:
                    if node.text.strip(utils.spaceCharacters) == begin_sub:
                        sub_parent = node.getparent()
                        in_sub = True
                        node.tail = None
                        node.addnext(deepcopy(sub))
                    elif node.text.strip(utils.spaceCharacters) == comment:
                        node.addprevious(etree.Comment(begin_sub))
                        node.addprevious(deepcopy(sub))
                        node.addprevious(etree.Comment(end_sub))
                        node.getprevious().tail = node.tail
                        to_remove.add(node)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Exemplo n.º 8
0
    def buildTerms(self, ElementTree, w3c_compat=False, **kwargs):
        self.terms.text = "\n"
        # make a list of all the defining instances of "terms" in the document
        # -- <dfn> elements
        dfnList = ElementTree.findall("//dfn")
        if dfnList:
            indexNavTop = etree.Element(u"div", {
                u"class": "index-nav",
                u"id": "index-terms_top"
            })
            indexNavTop.text = "\n"
            indexNavTop.tail = "\n"
            indexNavHelpers = {"top": indexNavTop}
            self.terms.append(indexNavHelpers["top"])
            termFirstLetter = None
            prevTermFirstLetter = None
            firstLetters = ["top"]
            # sort the list of <dfn> terms by the lowercase value of the DOM
            # textContent of the <dfn> element (concantentation of the <dfn>
            # text nodes and that of any of its descendant elements)
            dfnList.sort(key=lambda dfn: utils.textContent(dfn).lower())
            for dfn in dfnList:
                # we don't need the tail, so copy the <dfn> and drop the tail
                term = deepcopy(dfn)
                term.tail = None
                termID = None
                dfnHasID = False
                if dfn.get("id"):
                    # if this <dfn> itself has an id, we'll use it as part of the
                    # id on the index entry for this term
                    termID = dfn.get("id")
                    dfnHasID = True
                elif dfn.getparent().get("id"):
                    # if this <dfn> itself has no id, use the id of its parent
                    # node as the id on the index entry for this term, with or
                    termID = dfn.getparent().get("id")
                # if we found an id, then create an index entry for this <dfn>
                # term; otherwise, do nothing further
                if termID:
                    indexEntry = etree.Element(u"dl")
                    # we want to give this index entry an id attribute based on
                    # the <dfn> or parent of a <dfn> we got the id-attribute
                    # value from earlier; but, if this <dfn> has no id attribute
                    # and has any sibling <dfn>s that also lack id attributes,
                    # we need to further qualify the id attribute here to make
                    # it unique
                    dfnSiblings = int(
                        dfn.xpath("count(preceding-sibling::dfn[not(@id)])"))
                    if not dfnHasID and dfnSiblings > 0:
                        indexEntry = etree.Element(u"dl", {
                            u"id":
                            termID + "_" + str(dfnSiblings) + "_index"
                        })
                    else:
                        indexEntry = etree.Element(u"dl",
                                                   {u"id": termID + "_index"})
                    indexEntry.text = "\n"
                    # termName is container of the name of the term as it appears in the index
                    termName = etree.Element(u"dt")
                    if "id" in term.attrib:
                        del term.attrib["id"]
                    term.tag = "span"
                    term.tail = "\n"
                    termName.append(term)
                    termName.tail = "\n"
                    indexEntry.append(termName)
                    # normalize the text content of each <dfn> in the document
                    # and then normalize the text content of this <dfn>, then
                    # do a case-insensitive comparison of them and count how
                    # many matches we have
                    expr = "count(//dfn\
                            [normalize-space(translate(.,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))\
                            =normalize-space(translate($content,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))])"

                    if ElementTree.xpath(expr,
                                         content=utils.textContent(term)) > 1:
                        # we have more than one <dfn> in the document whose
                        # content is a case-insensitive match for the
                        # textContent of this <dfn>; so, we attempt to
                        # disambiguate them by copying the parent node of the
                        # <dfn> and including that in our output as an excerpt,
                        # to provide the context for the term
                        dfnContext = etree.Element(u"dd",
                                                   {u"class": u"dfn-excerpt"})
                        dfnContext.text = "\n"
                        dfnContext.tail = "\n"
                        dfnParentNode = deepcopy(dfn.getparent())
                        # if length of the parent node isn't greater than 1,
                        # then the <dfn> is the only child node of its parent,
                        # and so there is no useful context we can provide, so
                        # we do nothing. Also, if the parent node is an h1-h6
                        # heading, we are already listing it in the entry, to
                        # it'd be redundant to be it here too, so we don't
                        if len(dfnParentNode) > 1 and not re.match(
                                "^[hH][1-6]$", dfnParentNode.tag):
                            # we just drop all of the text in this parent up to
                            # the first child element, because it's often just
                            # part of phrase like "The foo attribute" or
                            # something, and we don't need that. But, after we
                            # drop it, we don't want the node to end up starting
                            # with no next at all (because it looks odd in our
                            # output), so we replace it with some characters to
                            # indicate that there's something been ellided
                            if not dfnParentNode[0].tag == "dfn":
                                dfnParentNode.text = "*** "
                            # ...except for the case where we know our current
                            # dfn is the first child element, and then we deal
                            # with handling of that a little further down
                            else:
                                dfnParentNode.text = ""
                            dfnParentNode.tag = "span"
                            # remove ID so that we don't duplicate it
                            if "id" in dfnParentNode.attrib:
                                del dfnParentNode.attrib["id"]
                            descendants = dfnParentNode.xpath(
                                ".//*[self::dfn or @id]")
                            for descendant in descendants:
                                if descendant.tag == "dfn":
                                    descendant.tag = "span"
                                if "id" in descendant.attrib:
                                    del descendant.attrib["id"]
                                # if the text content of this descendant is the
                                # same as the text content of the term, then we
                                # don't want to repeat it, so instead we
                                # replace it with ellipses
                                if utils.textContent(descendant).lower(
                                ) == utils.textContent(term).lower():
                                    tail = ""
                                    if descendant.tail is not None:
                                        tail = descendant.tail
                                    # drop any children this element might have,
                                    # and just put ellipsis in place of it
                                    descendant.clear()
                                    descendant.text = "..." + tail
                                elif descendant == descendants[0]:
                                    # if we get here it means that the first dfn
                                    # child of this parent node is _not_ our
                                    # current dfn, so we use some alternative
                                    # characters (other than ellipses) to
                                    # indicate that we've ellided something
                                    dfnParentNode.text = "*** "
                            dfnContext.append(dfnParentNode)
                            indexEntry.append(dfnContext)
                    # we need a first letter so that we can build navigational
                    # links for the alphabetic nav bars injected into the index
                    termFirstLetter = utils.textContent(term)[0].upper()
                    if termFirstLetter != prevTermFirstLetter and termFirstLetter.isalpha(
                    ):
                        firstLetters.append(termFirstLetter)
                        indexNavHelpers[termFirstLetter] = etree.Element(
                            u"div", {
                                u"class": "index-nav",
                                u"id": "index-terms_" + termFirstLetter
                            })
                        prevTermFirstLetter = termFirstLetter
                        self.terms.append(indexNavHelpers[termFirstLetter])
                    # #########################################################
                    # make a list of all the instances of terms in the document
                    # that are hyperlinked references back to the <dfn> term
                    # that is the defining instance of this term, as well as
                    # the <dfn> defining instance itself
                    # #########################################################
                    instanceList = ElementTree.xpath(
                        "//a[substring-after(@href,'#')=$targetID]|//*[@id=$targetID]",
                        targetID=termID)
                    if instanceList:
                        instanceItem = None
                        lastLinkToHeading = None
                        lastInstanceItem = None
                        for instance in instanceList:
                            # each of these term instances is an <a> hyperlink
                            # without an id attribute, but we need each to have
                            # an id attribute so that we can link back to it
                            # from the index of terms; so, create an id for each
                            instanceID = utils.generateID(instance, **kwargs)
                            instance.set(u"id", instanceID)
                            # make a link that's a copy of the node of the h1-h6
                            # heading for the section that contains this
                            # instance hyperlink
                            linkToHeading = self.getAncestorHeadingLink(
                                instance, instanceID)
                            if not instance.tag == u"a":
                                linkToHeading.set(u"class", "dfn-ref")
                            # if this heading is not the same as one that we've
                            # already added to the index entry for this term,
                            # then process the heading
                            if lastLinkToHeading is None or \
                               utils.textContent(linkToHeading) != utils.textContent(lastLinkToHeading):
                                instanceItem = etree.Element(u"dd")
                                instanceItem.text = "\n"
                                lastLinkToHeading = linkToHeading
                                n = 1
                                # we wait to add the item for the previous
                                # instance at this point because we need to
                                # delay adding in order to see if for this
                                # instance there are multiple references to the
                                # same ancestor heading (if there are, we append
                                # link numbers to them, instead of repeating the
                                # heading; see below)
                                if lastInstanceItem is not None:
                                    #print(etree.tostring(lastInstanceItem,method="text"))
                                    indexEntry.append(lastInstanceItem)
                                lastInstanceItem = instanceItem
                                linkToHeading.tail = "\n"
                                instanceItem.append(linkToHeading)
                                instanceItem.tail = "\n"
                            # otherwise, this heading is the same as one that
                            # we've already added to the index entry for this
                            # term; so instead of reprocessing the heading, we
                            # just append one or more link numbers to it
                            else:
                                n += 1
                                counterLink = etree.Element(
                                    u"a", {
                                        u"href": "#" + instanceID,
                                        u"class": "index-counter"
                                    })
                                if not instance.tag == u"a":
                                    counterLink.set(u"class", "dfn-ref")
                                else:
                                    counterLink.set(u"class", "index-counter")
                                counterLink.text = "(" + str(n) + ")"
                                counterLink.tail = "\n"
                                instanceItem.append(counterLink)
                            # if the value of our n counter is still at 1 at
                            # this point, it means the document contains only
                            # one instance of a reference this term, so we need
                            # to add that instance now
                            if n == 1:
                                indexEntry.append(instanceItem)
                    if not len(instanceList) > 1:
                        # if we don't have more than one item in this list, it
                        # means the <dfn> defining instance is the only item in
                        # the list, and the document contains no hyperlinked
                        # references back to that defining instance at all, so
                        # we need to set a flag to indicate that
                        indexEntry.set(u"class", "has-norefs")
                    self.terms.append(indexEntry)
                    indexEntry.tail = "\n"
            # ######################################################################
            # inject some alphabetic nav hyperlink bars into the index, strictly
            # for convenience purposes
            # ######################################################################
            navLetters = etree.Element(u"p")
            navLetters.text = "\n"
            navLetters.tail = "\n"
            navLettersClones = {}
            # reverse the letters list so that we can just pop off it
            firstLetters.append("end")
            firstLetters.reverse()
            while (firstLetters):
                letter = firstLetters.pop()
                navLetter = etree.Element(u"a",
                                          {u"href": "#index-terms_" + letter})
                navLetter.text = letter
                navLetter.tail = "\n"
                navLetters.append(navLetter)
            for key, navNode in indexNavHelpers.items():
                # this seems really hacky... but we need some way to manage multiple
                # copies of the sets of nav hyperlink letters we inject into the
                # index; otherwise, how to do it without just moving a single node
                # around instead of copying it...
                navLettersClones[key] = deepcopy(navLetters)
                navNode.text = "\n"
                navNode.append(navLettersClones[key])
                navNode.tail = "\n"
            navLettersEnd = deepcopy(navLetters)
            indexNavEnd = etree.Element(u"div", {
                u"class": "index-nav",
                u"id": "index-terms_end"
            })
            indexNavEnd.text = "\n"
            indexNavEnd.tail = "\n"
            indexNavEnd.append(navLettersEnd)
            indexNavHelpers = {"end": indexNavEnd}
            self.terms.append(indexNavHelpers["end"])
        self.terms.tail = "\n"
Exemplo n.º 9
0
    def commentSubstitutions(self,
                             ElementTree,
                             w3c_compat=False,
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False,
                             enable_woolly=False,
                             **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            copyright = "copyright"
            copyright_sub = etree.fromstring(
                '<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &#xA9; %s <a href="http://www.w3.org/"><abbr title="World Wide Web Consortium">W3C</abbr></a><sup>&#xAE;</sup> (<a href="http://www.csail.mit.edu/"><abbr title="Massachusetts Institute of Technology">MIT</abbr></a>, <a href="http://www.ercim.eu/"><abbr title="European Research Consortium for Informatics and Mathematics">ERCIM</abbr></a>, <a href="http://www.keio.ac.jp/">Keio</a>, <a href="http://ev.buaa.edu.cn/">Beihang</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>'
                % time.strftime("%Y", self.pubdate))

            logo = "logo"
            logo_str = '<a href="http://www.w3.org/"><img height="48" width="72" alt="W3C" src="https://www.w3.org/Icons/w3c_home"/></a>'
            if enable_woolly:
                logo_str += '<a class="logo" href="https://www.w3.org/Style/Group/" rel="in-activity"><img alt="CSS WG" src="https://www.w3.org/Style/Woolly/woolly-icon"/></a>'

            logo_sub = etree.fromstring('<p>%s</p>' % logo_str)

            instance_basic_comment_subs += ((logo, logo_sub), (copyright,
                                                               copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        link_parent = None
        link = None
        for node in ElementTree.iter():
            if link_parent is not None:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == "end-link":
                    if node.getparent() is not link_parent:
                        raise utils.DifferentParentException(
                            "begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set("href", utils.textContent(link))
                    link_parent = None
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == "begin-link":
                link_parent = node.getparent()
                link = etree.Element("a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            utils.replaceComment(ElementTree, comment, sub, **kwargs)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Exemplo n.º 10
0
    def stringSubstitutions(self,
                            ElementTree,
                            w3c_compat=False,
                            w3c_compat_substitutions=False,
                            w3c_compat_crazy_substitutions=False,
                            w3c_shortname='',
                            **kwargs):
        # Get doc_title from the title element
        try:
            doc_title = utils.textContent(
                ElementTree.getroot().find("head").find("title"))
        except (AttributeError, TypeError):
            doc_title = ""

        year = re.compile(r"\[YEAR[^\]]*\]")
        year_sub = time.strftime("%Y", self.pubdate)
        year_identifier = "[YEAR"

        date = re.compile(r"\[DATE[^\]]*\]")
        date_sub = time.strftime("%d %B %Y", self.pubdate).lstrip("0")
        date_identifier = "[DATE"

        cdate = re.compile(r"\[CDATE[^\]]*\]")
        cdate_sub = time.strftime("%Y%m%d", self.pubdate)
        cdate_identifier = "[CDATE"

        udate = re.compile(r"\[UDATE[^\]]*\]")
        udate_sub = time.strftime("%Y-%m-%d", self.pubdate)
        udate_identifier = "[UDATE"

        string_subs = ((year, year_sub, year_identifier),
                       (date, date_sub, date_identifier), (cdate, cdate_sub,
                                                           cdate_identifier),
                       (udate, udate_sub, udate_identifier))

        if w3c_compat or w3c_compat_substitutions:
            # Get the right long status
            doc_longstatus = longstatus_map[self.w3c_status]

        if w3c_compat_crazy_substitutions:
            # Get the right stylesheet
            doc_w3c_stylesheet = "http://www.w3.org/StyleSheets/TR/W3C-%s" % (
                self.w3c_status, )

        # Get all the subs we want
        string_subs += ((title, doc_title, title_identifier), )

        # And even more in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            try:
                shortname_sub = w3c_shortname or os.path.basename(os.getcwd())
            except OSError:
                shortname_sub = ""
            latest_sub = "http://www.w3.org/TR/%s/" % (shortname_sub, )
            version_sub = "http://www.w3.org/TR/%s/%s-%s-%s/" % (
                year_sub, self.w3c_status, shortname_sub, cdate_sub)
            string_subs += ((status, self.w3c_status, status_identifier),
                            (longstatus, doc_longstatus,
                             longstatus_identifier), (shortname, shortname_sub,
                                                      shortname_identifier),
                            (latest, latest_sub, latest_identifier),
                            (version, version_sub, version_identifier))

        # And more that aren't even enabled by default in compat. mode
        if w3c_compat_crazy_substitutions:
            string_subs += ((w3c_stylesheet, doc_w3c_stylesheet,
                             w3c_stylesheet_identifier), )

        for node in ElementTree.iter():
            for regex, sub, identifier in string_subs:
                if node.text is not None and identifier in node.text:
                    node.text = regex.sub(sub, node.text)
                if node.tail is not None and identifier in node.tail:
                    node.tail = regex.sub(sub, node.tail)
                for name, value in node.attrib.items():
                    if identifier in value:
                        node.attrib[name] = regex.sub(sub, value)
Exemplo n.º 11
0
    def commentSubstitutions(self, ElementTree, w3c_compat=False, \
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False, **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_basic_comment_subs += ((logo, logo_sub), (copyright,
                                                               copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        in_link = False
        for node in ElementTree.iter():
            if in_link:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == u"end-link":
                    if node.getparent() is not link_parent:
                        raise DifferentParentException(
                            u"begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set(u"href", utils.textContent(link))
                    in_link = False
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == u"begin-link":
                link_parent = node.getparent()
                in_link = True
                link = etree.Element(u"a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            begin_sub = u"begin-" + comment
            end_sub = u"end-" + comment
            in_sub = False
            for node in ElementTree.iter():
                if in_sub:
                    if node.tag is etree.Comment and \
                       node.text.strip(utils.spaceCharacters) == end_sub:
                        if node.getparent() is not sub_parent:
                            raise DifferentParentException(
                                u"%s and %s have different parents" %
                                begin_sub, end_sub)
                        in_sub = False
                    else:
                        to_remove.add(node)
                elif node.tag is etree.Comment:
                    if node.text.strip(utils.spaceCharacters) == begin_sub:
                        sub_parent = node.getparent()
                        in_sub = True
                        node.tail = None
                        node.addnext(deepcopy(sub))
                    elif node.text.strip(utils.spaceCharacters) == comment:
                        node.addprevious(etree.Comment(begin_sub))
                        node.addprevious(deepcopy(sub))
                        node.addprevious(etree.Comment(end_sub))
                        node.getprevious().tail = node.tail
                        to_remove.add(node)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Exemplo n.º 12
0
    def buildToc(self, ElementTree, min_depth=2, max_depth=6, w3c_compat=False,
                 w3c_compat_class_toc=False, **kwargs):
        # Build the outline of the document
        outline_creator = outliner.Outliner(ElementTree, **kwargs)
        outline = outline_creator.build(**kwargs)

        # Get a list of all the top level sections, and their depth (0)
        sections = [(section, 0) for section in reversed(outline)]

        # Numbering
        num = []

        # Loop over all sections in a DFS
        while sections:
            # Get the section and depth at the end of list
            section, depth = sections.pop()

            # If we have a header, regardless of how deep we are
            if section.header is not None:
                # Get the element that represents the section header's text
                if section.header.tag == u"hgroup":
                    i = 1
                    while i <= 6:
                        header_text = section.header.find(u".//h" + unicode(i))
                        if header_text is not None:
                            break
                        i += 1
                    else:
                        header_text = None
                else:
                    header_text = section.header
            else:
                header_text = None

            # If we have a section heading text element, regardless of depth
            if header_text is not None:
                # Remove any existing number
                for element in header_text.findall(u".//span"):
                    if utils.elementHasClass(element, u"secno"):
                        # Copy content, to prepare for the node being
                        # removed
                        utils.copyContentForRemoval(element, text=False,
                                                    children=False)
                        # Remove the element (we can do this as we're not
                        # iterating over the elements, but over a list)
                        element.getparent().remove(element)

            # Check we're in the valid depth range (min/max_depth are 1 based,
            # depth is 0 based)
            if depth >= min_depth - 1 and depth <= max_depth - 1:
                # Calculate the corrected depth (i.e., the actual depth within
                # the numbering/TOC)
                corrected_depth = depth - min_depth + 1

                # Numbering:
                # No children, no sibling, move back to parent's sibling
                if corrected_depth + 1 < len(num):
                    del num[corrected_depth + 1:]
                # Children
                elif corrected_depth == len(num):
                    num.append(0)

                # Increment the current section's number
                if header_text is not None and \
                   not utils.elementHasClass(header_text, u"no-num") or \
                   header_text is None and section:
                    num[-1] += 1

                # Get the current TOC section for this depth, and add another
                # item to it
                if header_text is not None and \
                   not utils.elementHasClass(header_text, u"no-toc") or \
                   header_text is None and section:
                    # Find the appropriate section of the TOC
                    i = 0
                    toc_section = self.toc
                    while i < corrected_depth:
                        try:
                            # If the final li has no children, or the last
                            # children isn't an ol element
                            if len(toc_section[-1]) == 0 or \
                               toc_section[-1][-1].tag != u"ol":
                                toc_section[-1].append(etree.Element(u"ol"))
                                self.indentNode(toc_section[-1][-1],
                                                (i + 1) * 2, **kwargs)
                                if w3c_compat or w3c_compat_class_toc:
                                    toc_section[-1][-1].set(u"class", u"toc")
                        except IndexError:
                            # If the current ol has no li in it
                            toc_section.append(etree.Element(u"li"))
                            self.indentNode(toc_section[0], (i + 1) * 2 - 1,
                                            **kwargs)
                            toc_section[0].append(etree.Element(u"ol"))
                            self.indentNode(toc_section[0][0], (i + 1) * 2,
                                            **kwargs)
                            if w3c_compat or w3c_compat_class_toc:
                                toc_section[0][0].set(u"class", u"toc")
                        # TOC Section is now the final child (ol) of the final
                        # item (li) in the previous section
                        assert toc_section[-1].tag == u"li"
                        assert toc_section[-1][-1].tag == u"ol"
                        toc_section = toc_section[-1][-1]
                        i += 1
                    # Add the current item to the TOC
                    item = etree.Element(u"li")
                    toc_section.append(item)
                    self.indentNode(item, (i + 1) * 2 - 1, **kwargs)

                # If we have a header
                if header_text is not None:
                    # Add ID to header
                    id = utils.generateID(header_text, **kwargs)
                    if header_text.get(u"id") is not None:
                        del header_text.attrib[u"id"]
                    section.header.set(u"id", id)

                    # Add number, if @class doesn't contain no-num
                    if not utils.elementHasClass(header_text, u"no-num"):
                        header_text[0:0] = [etree.Element(u"span", {u"class":
                                                                    u"secno"})]
                        header_text[0].tail = header_text.text
                        header_text.text = None
                        header_text[0].text = u".".join(map(unicode, num))
                        header_text[0].text += u" "
                    # Add to TOC, if @class doesn't contain no-toc
                    if not utils.elementHasClass(header_text, u"no-toc"):
                        link = deepcopy(header_text)
                        item.append(link)
                        # Make it link to the header
                        link.tag = u"a"
                        link.set(u"href", u"#" + id)
                        # Remove interactive content child elements
                        utils.removeInteractiveContentChildren(link)
                        # Remove other child elements
                        for element_name in remove_elements_from_toc:
                            # Iterate over all the desendants of the new link
                            # with that element name
                            for element in link.findall(u".//" + element_name):
                                # Copy content, to prepare for the node being
                                # removed
                                utils.copyContentForRemoval(element)
                                # Remove the element (we can do this as we're
                                # not iterating over the elements, but over a
                                # list)
                                element.getparent().remove(element)
                        # Remove unwanted attributes
                        for element in link.iter(tag=etree.Element):
                            for attribute_name in remove_attributes_from_toc:
                                if element.get(attribute_name) is not None:
                                    del element.attrib[attribute_name]
                        # We don't want the old tail
                        link.tail = None
                        # Check we haven't changed the content in all of that
                        assert utils.textContent(header_text) == \
                               utils.textContent(link)
            # Add subsections in reverse order (so the next one is executed
            # next) with a higher depth value
            sections.extend([(child_section, depth + 1)
                             for child_section in reversed(section)])
Exemplo n.º 13
0
def buildToc(ElementTree, **kwargs):
    # Create root element of TOC
    toc = etree.Element(u"ol")
    
    # Build the outline of the document
    outline_creator = outliner.Outliner(ElementTree, **kwargs)
    outline = outline_creator.build(**kwargs)

    # Get a list of all the top level sections, and their depth (0)
    sections = [(section, 0) for section in reversed(outline)]

    # Loop over all sections in a DFS
    while sections:
        # Get the section and depth at the end of list
        section, depth = sections.pop()

        # If we have a header, regardless of how deep we are
        if section.header is not None:
            # Get the element that represents the section header's text
            if section.header.tag == u"hgroup":
                i = 1
                while i <= 6:
                    header_text = section.header.find(u".//h" + unicode(i))
                    if header_text is not None:
                        break
                    i += 1
                else:
                    header_text = None
            else:
                header_text = section.header
        else:
            header_text = None

        # Find the appropriate section of the TOC
        i = 0
        toc_section = toc
        while i < depth:
            try:
                # If the final li has no children, or the last
                # children isn't an ol element
                if len(toc_section[-1]) == 0 or \
                   toc_section[-1][-1].tag != u"ol":
                    toc_section[-1].append(etree.Element(u"ol"))
            except IndexError:
                # If the current ol has no li in it
                toc_section.append(etree.Element(u"li"))
                toc_section[0].append(etree.Element(u"ol"))
            # TOC Section is now the final child (ol) of the final
            # item (li) in the previous section
            assert toc_section[-1].tag == u"li"
            assert toc_section[-1][-1].tag == u"ol"
            toc_section = toc_section[-1][-1]
            i += 1
        # Add the current item to the TOC
        item = etree.Element(u"li")
        toc_section.append(item)

        # If we have a header
        if header_text is not None:
            item.text = utils.textContent(header_text)
        else:
            italics = etree.Element(u"i")
            italics.text = "Untitled Section"
            item.append(italics)
        
        # Add subsections in reverse order (so the next one is executed
        # next) with a higher depth value
        sections.extend([(child_section, depth + 1)
                         for child_section in reversed(section)])
    
    return toc