Пример #1
0
def loadDTD(modelXbrl):
    global edbodyDTD
    if edbodyDTD is None:
        with open(
                os.path.join(modelXbrl.modelManager.cntlr.configDir,
                             "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Пример #2
0
def loadDTD(modelXbrl):
    global edbodyDTD, isInlineDTD
    initModelDocumentTypeReferences()
    _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL
    if isInlineDTD is None or isInlineDTD != _isInline:
        isInlineDTD = _isInline
        with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, 
                               "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Пример #3
0
def run(xhtml_files: List[Path], dtd_file: Path, images: bool,
        links: bool) -> bool:
    try:
        dtd = DTD(str(dtd_file))
    except DTDParseError as e:
        print(e.error_log, file=stderr)
        clear_error_log()
        return False
    else:
        success = True
        for file in xhtml_files:
            # if you reuse the parser on too many documents it gets confused
            parser = XHTMLParser(dtd_validation=True, ns_clean=True)
            dtd = DTD(str(dtd_file))
            if settings.verbose:
                print(xhtml_file)
            if not test(file, parser, dtd, images, links):
                success = False
        return success
Пример #4
0
def loadDTD(modelXbrl):
    global edbodyDTD, isInlineDTD, ModelDocumentTypeINLINEXBRL
    if ModelDocumentTypeINLINEXBRL is None:
        from arelle.ModelDocument import Type
        ModelDocumentTypeINLINEXBRL = Type.INLINEXBRL
    _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL
    if isInlineDTD is None or isInlineDTD != _isInline:
        isInlineDTD = _isInline
        with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, 
                               "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Пример #5
0
def open_dtd(dtd_file: Path) -> DTD:
    """
    Open a validate an XML DTD. Exit program on failure.

    :param dtd_file: path to a DTD file
    :return: A DTD object
    """
    try:
        return DTD(str(dtd_file))
    except DTDParseError as e:
        print(f"{dtd_file}:1: {e}", file=stderr)
        exit(1)
Пример #6
0
 def normalize_samples(self, norm, name, version):
     """Test logparser.normalize validate for syslog normalizer."""
     # open parser
     n = parse(open(os.path.join(self.normalizer_path, norm)))
     # validate DTD
     dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd')))
     self.assertTrue(dtd.validate(n))
     # Create normalizer from xml definition
     normalizer = Normalizer(
         n, os.path.join(self.normalizer_path, 'common_tagTypes.xml'))
     self.assertEquals(normalizer.name, name)
     self.assertEquals(normalizer.version, version)
     self.assertTrue(normalizer.validate())
Пример #7
0
    def __init__(self, normalizers_paths, active_normalizers={}):
        """
        Instantiates a flow manager. The default behavior is to activate every
        available normalizer.
        
        @param normalizers_paths: a list of absolute paths to the normalizer
        XML definitions to use or a just a single path as str.
        @param active_normalizers: a dictionary of active normalizers
        in the form {name-version : [True|False]}.
        """
        if not isinstance(normalizers_paths, list or tuple):
            normalizers_paths = [
                normalizers_paths,
            ]
        self.normalizers_paths = normalizers_paths
        self.active_normalizers = active_normalizers
        self.dtd, self.ctt, self.ccb = None, None, None

        # Walk through paths for normalizer.dtd and common_tagTypes.xml
        # /!\ dtd file and common elements will be overrriden if present in
        # many directories.
        for norm_path in self.normalizers_paths:
            if not os.path.isdir(norm_path):
                raise ValueError, "Invalid normalizer directory : %s" % norm_path
            dtd = os.path.join(norm_path, 'normalizer.dtd')
            ctt = os.path.join(norm_path, 'common_tagTypes.xml')
            ccb = os.path.join(norm_path, 'common_callBacks.xml')
            if os.path.isfile(dtd):
                self.dtd = DTD(open(dtd))
            if os.path.isfile(ctt):
                self.ctt = ctt
            if os.path.isfile(ccb):
                self.ccb = ccb
        # Technically the common elements files should NOT be mandatory.
        # But many normalizers use them, so better safe than sorry.
        if not self.dtd or not self.ctt or not self.ccb:
            raise StandardError, "Missing DTD or common library files"
        self._cache = []
        self.reload()
Пример #8
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    # copy xhtml elements to fresh tree
    with open(
            os.path.join(modelXbrl.modelManager.cntlr.configDir,
                         "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        if not dtd.validate(XmlUtil.ixToXhtml(elt)):
            modelXbrl.error("xmlDTD:error",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("xmlDTD:error",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Пример #9
0
def validate_norm(fn, nn, version, it):
    global norm
    global result

    # open XML parser
    n = parse(open(os.path.join(path, fn)))
    # validate DTD
    dtd = DTD(open(os.path.join(path, 'normalizer.dtd')))
    assert dtd.validate(n) == True
    # Create normalizer from xml definition
    norm = Normalizer(n, os.path.join(path, 'common_tagTypes.xml'),
                      os.path.join(path, 'common_callBacks.xml'))
    # Time normalizer validation
    try:
        assert norm.name.lower() == nn.lower()
        if norm.name != nn:
            print "Warning, %s has name attribute set to %s" % (fn, norm.name)
    except AssertionError:
        print "\n[%s]" % norm.name, "and [%s]" % nn, "don't match"
        return
    try:
        assert norm.version == version
    except AssertionError:
        print "\n[%s]" % norm.version, "and [%s]" % version, "don't match"
        return
    samples_amount = len(
        [u for u in [v.examples for v in norm.patterns.values()]])
    if samples_amount <= 0:
        print "No samples to validate in %s" % fn
        return
    t = timeit.Timer("assert norm.validate() == True",
                     "from __main__ import norm")
    s = t.timeit(it)
    # Normalize result against number of validated samples
    s = s / float(samples_amount)
    # Add result
    result.add_res(norm.name, norm.version, norm.authors, s)
Пример #10
0
        'definition':
        template('PCDATA_OPERATOR_DEFINITION').render({
            'class': element,
            'type': 'int'
        })
    }

if __name__ == '__main__':
    import argparse

    cmdline = argparse.ArgumentParser()
    cmdline.add_argument("dtd")
    cmdline.add_argument("hxx")
    cmdline.add_argument("cxx")
    args = cmdline.parse_args()
    dtd = DTD(args.dtd)
    metadata = {
        'dtd':
        dtd,
        'enumerations':
        enumerations,
        'extra_methods':
        methods,
        'enum_classes':
        sorted([(v['name'], k) for k, v in enumerations.items()
                if not v in [e.name for e in dtd.iterelements()]]),
        'forwards_for': {
            'ornament': ['ornament_type'],
            'score': ['score_data', 'score_header']
        }
    }
Пример #11
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    from arelle import FunctionIxt
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    _xhtmlDTD = XHTML_DTD[_ixNS]
    _customTransforms = modelXbrl.modelManager.customTransforms or {}

    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI,
                                          EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if ns == XbrlConst.xsi:
                pass  # xsi attributes are always allowed
            elif isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                        "fraction", "nonFraction", "nonNumeric", "references",
                        "relationship", "tuple"
                }:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(
                        ixMsgCode("inlineAttributeMisplaced",
                                  elt,
                                  name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"
                          ),
                        modelObject=elt,
                        name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(
                        ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"
                          ),
                        modelObject=elt,
                        tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and
                         (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({
                    "scheme", "periodType", "balance", "contextRef", "unitRef",
                    "precision", "decimals"
                } - {
                    "fraction": {"contextRef", "unitRef"},
                    "nonFraction":
                    {"contextRef", "unitRef", "decimals", "precision"},
                    "nonNumeric": {"contextRef"}
                }.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs
                                      if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(
                        ixMsgCode("inlineElementAttributes", elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"
                          ),
                        modelObject=elt,
                        element=elt.elementQname,
                        attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(
                    ixMsgCode("attributeNotExpected", elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"
                      ),
                    modelObject=elt,
                    attribute=attrTag,
                    element=elt.localName)
        elif ns is None:
            _xsdType = htmlAttrType.get(localName)
            if _xsdType is not None:
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^', '1'):
                    nameFilter = ('*', )
                else:
                    nameFilter = names
                if nameFilter == ('*', ):
                    namespaceFilter = namespacePrefix = '*'
                elif len(nameFilter) == 1 and "}" in nameFilter[
                        0] and nameFilter[0][0] == "{":
                    namespaceFilter, _sep, nameFilter = nameFilter[0][
                        1:].partition("}")
                    namespacePrefix = XmlUtil.xmlnsprefix(elt, namespaceFilter)
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {
                    "ancestor": XmlUtil.ancestor,
                    "parent": XmlUtil.parent,
                    "child-choice": XmlUtil.children,
                    "child-sequence": XmlUtil.children,
                    "child-or-text": XmlUtil.children,
                    "descendant": XmlUtil.descendants
                }[rel](elt, namespaceFilter, nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt,
                                                        ixExclude=True,
                                                        ixEscape=False,
                                                        ixContinuation=False)
                issue = ''
                if reqt in ('^', ):
                    if not any(r.localName in names
                               and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('1', ) and not elt.isNil:
                    if sum(r.localName in names
                           and r.namespaceURI == elt.namespaceURI
                           for r in relations) != 1:
                        issue = " and must have exactly one of " + ', '.join(
                            names)
                if reqt in ('&', '^'):
                    disallowed = [
                        str(r.elementQname) for r in relations
                        if not (r.tag in names or
                                (r.localName in names
                                 and r.namespaceURI == elt.namespaceURI))
                    ]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(
                                    r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(
                        len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                disallowedChildText = bool(
                    reqt == '&' and rel in ("child-sequence", "child-choice")
                    and elt.textValue.strip())
                if ((reqt == '+' and not relations)
                        or (reqt == '-' and relations) or (issue)
                        or disallowedChildText):
                    code = "{}:{}".format(
                        ixSect[elt.namespaceURI].get(elt.localName,
                                                     "other")["constraint"],
                        {
                            'ancestor': "ancestorNode",
                            'parent': "parentNode",
                            'child-choice': "childNodes",
                            'child-sequence': "childNodes",
                            'child-or-text': "childNodesOrText",
                            'descendant': "descendantNodes"
                        }[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified",
                            '1': "Specified"
                        }.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}"
                            ).format(
                                elt.localName, {
                                    '+': "must",
                                    '-': "may not",
                                    '&': "may only",
                                    '?': "may",
                                    '+': "must",
                                    '^': "must",
                                    '1': "must"
                                }[reqt], {
                                    'ancestor': "be nested in",
                                    'parent': "have parent",
                                    'child-choice': "have child",
                                    'child-sequence': "have child",
                                    'child-or-text': "have child or text,",
                                    'descendant': "have as descendant"
                                }[rel],
                                '' if rel == 'child-or-text' else ', '.join(
                                    str(r.elementQname) for r in relations) if
                                names == ('*', ) and relations else ", ".join(
                                    "{}:{}".format(namespacePrefix, n)
                                    for n in names), issue,
                                " and no child text (\"{}\")".format(
                                    elt.textValue.strip()[:32])
                                if disallowedChildText else "")
                    modelXbrl.error(
                        code,
                        msg,
                        modelObject=[elt] + relations,
                        requirement=reqt,
                        messageCodes=
                        ("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                         "ix{ver.sect}:childNodesOrTextRequired",
                         "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                         "ix{ver.sect}:descendantNodesDisallowed",
                         "ix{ver.sect}:parentNodeRequired"))
        # other static element checks (that don't require a complete object model, context, units, etc
        if elt.localName == "nonFraction":
            childElts = XmlUtil.children(elt, '*', '*')
            hasText = (elt.text or "") or any(
                (childElt.tail or "") for childElt in childElts)
            if elt.isNil:
                ancestorNonFractions = XmlUtil.ancestors(
                    elt, _ixNS, elt.localName)
                if ancestorNonFractions:
                    modelXbrl.error(
                        ixMsgCode("nonFractionAncestors", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"
                          ),
                        modelObject=[elt] + ancestorNonFractions,
                        fact=elt.qname)
                if childElts or hasText:
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid(
                    )  # prevent further validation or cascading errors
            else:
                if ((childElts and
                     (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS
                      or childElts[0].localName != "nonFraction"))
                        or (childElts and hasText)):
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid()
        if elt.localName == "fraction":
            if elt.isNil:
                ancestorFractions = XmlUtil.ancestors(elt, _ixNS,
                                                      elt.localName)
                if ancestorFractions:
                    modelXbrl.error(
                        ixMsgCode("fractionAncestors", elt),
                        _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"
                          ),
                        modelObject=[elt] + ancestorFractions,
                        fact=elt.qname)
            else:
                nonFrChildren = [
                    e for e in XmlUtil.children(elt, _ixNS, '*')
                    if e.localName not in ("fraction", "numerator",
                                           "denominator")
                ]
                if nonFrChildren:
                    modelXbrl.error(
                        ixMsgCode("fractionElementChildren", elt),
                        _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"
                          ),
                        modelObject=[elt] + nonFrChildren,
                        fact=elt.qname,
                        children=", ".join(e.localName for e in nonFrChildren))
                for ancestorFraction in XmlUtil.ancestors(
                        elt, XbrlConst.ixbrl11, "fraction"):  # only ix 1.1
                    if normalizeSpace(elt.get("unitRef")) != normalizeSpace(
                            ancestorFraction.get("unitRef")):
                        modelXbrl.error(
                            ixMsgCode("fractionNestedUnitRef", elt),
                            _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"
                              ),
                            modelObject=[elt] + nonFrChildren,
                            fact=elt.qname,
                            unitRef=elt.get("unitRef"),
                            unitRef2=ancestorFraction.get("unitRef"))
        if elt.localName in ("nonFraction", "numerator", "denominator",
                             "nonNumeric"):
            fmt = elt.format
            if fmt:
                if fmt in _customTransforms:
                    pass
                elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        namespace=fmt.namespaceURI)
                    elt.setInvalid()
                elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[
                        fmt.namespaceURI]:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation name %(name)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        name=fmt.localName)
                    elt.setInvalid()

    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in (
                    'version',  # used in inline test cases but not valid xhtml
                    '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'
            ):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[
                            fromChild.namespaceURI]:
                        modelXbrl.error(
                            ixMsgCode("elementNameInvalid", ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"
                              ),
                            modelObject=fromChild,
                            element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[
                                fromChild.namespaceURI].get(
                                    fromChild.localName, []):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(
                                    ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"
                                      ),
                                    modelObject=fromChild,
                                    attribute=attrTag,
                                    element=fromChild.localName)
                if excludeSubtree or (fromChild.localName
                                      in {"references", "resources"}
                                      and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {
                            "footnote", "nonNumeric", "continuation"
                    } and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag,
                                           attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail

    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir,
                           _xhtmlDTD)) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate(ixToXhtml(elt)):
            modelXbrl.error("html:syntaxError",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl,
                                                   elt,
                                                   elt,
                                                   "InlineXBRL",
                                                   "EFM.5.02.05.",
                                                   isInline=True)
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Пример #12
0
def run(ebook: Path, bigbook: Path, ubercoordinator: Path,
        files: List[Path]) -> None:
    """
    :param ebook: the ebook source directory
    :param bigbook: the Big Book of Key
    :param ubercoordinator: the ubercoordinator source directory, for the DTD
    :param files: the XHTML file from the Big Book of Key that need adding
    :return:
    """

    index = Index(bigbook)

    book_dtd = DTD((ubercoordinator / 'src' / 'book.dtd').open())
    book = xml.read(ebook / 'book.xml', dtd=book_dtd)

    illustrations = xml.get_one(book, 'illustrations')
    contents = xml.get_one(book, 'contents')

    sections = set(
        xml.get_all_str(contents, '//section[not(@template="yes")]/@file'))
    images = set(xml.get_all_str(illustrations, '//image/@file'))

    initial_sections = sections.copy()
    initial_images = images.copy()

    for filename in sections:
        ebook_file = ebook / 'Text' / filename
        bigbook_file = bigbook / 'Text' / filename
        if not ebook_file.exists() and bigbook_file.exists():
            copyfile(bigbook_file, ebook_file)
        if ebook_file.exists():
            for img_filename in find_images(ebook_file):
                if img_filename not in images:
                    illustrations.append(file_element('image', img_filename))
                    images.add(img_filename)
        else:
            print(
                f"{ebook / 'book.xml'}:0:0:WARNING: is this missing?: {filename}"
            )

    for file in files:
        article_id = file.stem
        article = index.articles_by_id[article_id]

        if article.file.name not in sections:
            copyfile(article.file, ebook / 'Text' / article.file.name)
            title = xml.rewrap('title', XML(article.link))
            section = file_element('section', article.file.name)
            section.append(title)
            contents.append(section)
            sections.add(file.name)

        for img_filename in find_images(article.file):
            if img_filename not in images:
                illustrations.append(file_element('image', img_filename))
                images.add(img_filename)

    for img_filename in images:
        file = ebook / 'Images' / img_filename
        if not file.exists():
            copyfile(bigbook / 'Images' / img_filename, file)

    book.attrib['date'] = strftime("%Y-%m-%d")

    if sections != initial_sections or images != initial_images:
        copyfile(ebook / 'book.xml', ebook / 'book.xml.bak')
        xml.save(ebook / 'book.xml', book, doctype='book')
Пример #13
0
 def test_00_validate_fake_syslog(self):
     """Validate the fake normalizer"""
     dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd')))
     self.assertTrue(dtd.validate(self.n))
Пример #14
0
def validateXbrlFinally(val, *args, **kwargs):
    if not (val.validateEFMHTMplugin):
        return

    modelXbrl = val.modelXbrl
    allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
    efmHtmDTD = None
    with open(
            os.path.join(os.path.dirname(__file__), "resources",
                         "efm-htm.dtd")) as fh:
        efmHtmDTD = DTD(fh)
    if efmHtmDTD and not efmHtmDTD.validate(
            modelXbrl.modelDocument.xmlRootElement.getroottree()):
        for e in efmHtmDTD.error_log.filter_from_errors():
            if "declared in the external subset contains white spaces nodes" not in e.message:
                modelXbrl.error("html.syntax",
                                _("HTML error %(error)s"),
                                error=e.message)
    for elt in modelXbrl.modelDocument.xmlRootElement.iter():
        eltTag = elt.tag
        if isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
            continue  # comment or other non-parsed element
        for attrTag, attrValue in elt.items():
            if ((attrTag == "href" and eltTag == "a")
                    or (attrTag == "src" and eltTag == "img")):
                if "javascript:" in attrValue:
                    modelXbrl.error(
                        "EFM.5.02.02.10.activeContent",
                        _("Element has javascript in '%(attribute)s' for <%(element)s>"
                          ),
                        modelObject=elt,
                        attribute=attrTag,
                        element=eltTag)
                elif eltTag == "a" and (
                        not allowedExternalHrefPattern
                        or allowedExternalHrefPattern.match(attrValue)):
                    pass
                elif scheme(attrValue) in ("http", "https", "ftp"):
                    modelXbrl.error(
                        "EFM.6.05.16.externalReference",
                        _("Element has an invalid external reference in '%(attribute)s' for <%(element)s>"
                          ),
                        modelObject=elt,
                        attribute=attrTag,
                        element=eltTag)
                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                    if scheme(attrValue) == "data":
                        modelXbrl.error(
                            "EFM.5.02.02.10.graphicDataUrl",
                            _("Element references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"
                              ),
                            modelObject=elt,
                            attribute=attrValue[:32],
                            element=eltTag)
                    elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                        modelXbrl.error(
                            "EFM.5.02.02.10.graphicFileType",
                            _("Element references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"
                              ),
                            modelObject=elt,
                            attribute=attrValue,
                            element=eltTag)
        if eltTag == "table" and any(a is not None
                                     for a in elt.iterancestors("table")):
            modelXbrl.error("EFM.5.02.02.10.nestedTable",
                            _("Element is a disallowed nested <table>."),
                            modelObject=elt)
Пример #15
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]

    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
            if isIxElt:
                allowedNs = nonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(
                        "ix:qualifiedAttributeNotExpected",
                        _("Inline XBRL element %(element)s: has qualified attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(
                        "ix:inlineAttributeMisplaced",
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"
                          ),
                        modelObject=elt,
                        name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(
                        "ix:extensionAttributeMisplaced",
                        _("Extension attributes are not allowed on html elements: %(tag)s"
                          ),
                        modelObject=elt,
                        tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][attrTag]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

                disallowedXbrliAttrs = ({
                    "scheme", "periodType", "balance", "contextRef", "unitRef",
                    "precision", "decimals"
                } - {
                    "fraction": {"contextRef", "unitRef"},
                    "nonFraction":
                    {"contextRef", "unitRef", "decimals", "precision"},
                    "nonNumeric": {"contextRef"}
                }.get(elt.localName, set()))
                disallowedAttrs = [
                    a for a in disallowedXbrliAttrs if elt.get(a) is not None
                ]
                if disallowedAttrs:
                    modelXbrl.error(
                        "ix:inlineElementAttributes",
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"
                          ),
                        modelObject=elt,
                        element=elt.elementQname,
                        attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(
                    "ix:attributeNotExpected",
                    _("Attribute %(attribute)s is not expected on element element ix:%(element)s"
                      ),
                    modelObject=elt,
                    attribute=attrTag,
                    element=elt.localName)

    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^'):
                    nameFilter = ('*', )
                else:
                    nameFilter = names
                relations = {
                    "ancestor": XmlUtil.ancestor,
                    "parent": XmlUtil.parent,
                    "child-choice": XmlUtil.children,
                    "child-sequence": XmlUtil.children,
                    "descendant": XmlUtil.descendants
                }[rel](elt, '*' if nameFilter == ('*', ) else elt.namespaceURI,
                       nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                issue = ''
                if reqt == '^':
                    if not any(r.localName in names
                               and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [
                        str(r.elementQname) for r in relations
                        if not (r.tag in names or
                                (r.localName in names
                                 and r.namespaceURI == elt.namespaceURI))
                    ]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(
                                    r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(
                        len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have more than 1 but none present "
                if ((reqt == '+' and not relations)
                        or (reqt == '-' and relations) or (issue)):
                    code = "ix:" + {
                        'ancestor': "ancestorNode",
                        'parent': "parentNode",
                        'child-choice': "childNodes",
                        'child-sequence': "childNodes",
                        'descendant': "descendantNodes"
                    }[rel] + {
                        '+': "Required",
                        '-': "Disallowed",
                        '&': "Allowed",
                        '^': "Specified"
                    }.get(reqt, "Specified")
                    msg = _("Inline XBRL 1.0 ix:{0} {1} {2} {3} {4} element"
                            ).format(
                                elt.localName, {
                                    '+': "must",
                                    '-': "may not",
                                    '&': "may only",
                                    '?': "may",
                                    '+': "must"
                                }[reqt], {
                                    'ancestor': "be nested in",
                                    'parent': "have parent",
                                    'child-choice': "have child",
                                    'child-sequence': "have child",
                                    'descendant': "have as descendant"
                                }[rel], ', '.join(
                                    str(r.elementQname)
                                    for r in relations) if names == ('*', )
                                and relations else ", ".join("ix:" + n
                                                             for n in names),
                                issue)
                    modelXbrl.error(code,
                                    msg,
                                    modelObject=[elt] + relations,
                                    requirement=reqt)

    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in (
                    'version',  # used in inline test cases but not valid xhtml
                    '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'
            ):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[
                            fromChild.namespaceURI]:
                        modelXbrl.error(
                            "ix:elementNameInvalid",
                            _("Inline XBRL element name %(element)s is not valid"
                              ),
                            modelObject=fromChild,
                            element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[
                                fromChild.namespaceURI].get(
                                    fromChild.localName, []):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(
                                    "ix:attributeRequired",
                                    _("Attribute %(attribute)s required on element ix:%(element)s"
                                      ),
                                    modelObject=elt,
                                    attribute=attrTag,
                                    element=fromChild.localName)
                if not (fromChild.localName in {"references", "resources"}
                        and isIxNs):
                    if fromChild.localName in {
                            "footnote", "nonNumeric", "continuation"
                    } and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag,
                                           attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail

    # copy xhtml elements to fresh tree
    with open(
            os.path.join(modelXbrl.modelManager.cntlr.configDir,
                         "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate(ixToXhtml(elt)):
            modelXbrl.error("ix:DTDelementUnexpected",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("ix:DTDerror",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Пример #16
0
 def read(self, file):
     dtd = DTD(file)
     for entity in dtd.entities():
         unit = Unit(entity.name, entity.content)
         self.units.append(unit)
Пример #17
0
 def _validate_dtd_name(self, identifier:str):
     dtd = "<!ELEMENT S EMPTY><!ATTLIST S id ID #REQUIRED>"
     dtd_file = StringIO(dtd)
     dtd_validator = DTD(dtd_file)
     sample_xml_element = Element("S", id = identifier)
     return dtd_validator.validate(sample_xml_element)