def loadDTD(modelXbrl): global edbodyDTD if edbodyDTD is None: with open( os.path.join(modelXbrl.modelManager.cntlr.configDir, "edbody.dtd")) as fh: edbodyDTD = DTD(fh)
def loadDTD(modelXbrl): global edbodyDTD, isInlineDTD initModelDocumentTypeReferences() _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL if isInlineDTD is None or isInlineDTD != _isInline: isInlineDTD = _isInline with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh: edbodyDTD = DTD(fh)
def run(xhtml_files: List[Path], dtd_file: Path, images: bool, links: bool) -> bool: try: dtd = DTD(str(dtd_file)) except DTDParseError as e: print(e.error_log, file=stderr) clear_error_log() return False else: success = True for file in xhtml_files: # if you reuse the parser on too many documents it gets confused parser = XHTMLParser(dtd_validation=True, ns_clean=True) dtd = DTD(str(dtd_file)) if settings.verbose: print(xhtml_file) if not test(file, parser, dtd, images, links): success = False return success
def loadDTD(modelXbrl): global edbodyDTD, isInlineDTD, ModelDocumentTypeINLINEXBRL if ModelDocumentTypeINLINEXBRL is None: from arelle.ModelDocument import Type ModelDocumentTypeINLINEXBRL = Type.INLINEXBRL _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL if isInlineDTD is None or isInlineDTD != _isInline: isInlineDTD = _isInline with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh: edbodyDTD = DTD(fh)
def open_dtd(dtd_file: Path) -> DTD: """ Open a validate an XML DTD. Exit program on failure. :param dtd_file: path to a DTD file :return: A DTD object """ try: return DTD(str(dtd_file)) except DTDParseError as e: print(f"{dtd_file}:1: {e}", file=stderr) exit(1)
def normalize_samples(self, norm, name, version): """Test logparser.normalize validate for syslog normalizer.""" # open parser n = parse(open(os.path.join(self.normalizer_path, norm))) # validate DTD dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd'))) self.assertTrue(dtd.validate(n)) # Create normalizer from xml definition normalizer = Normalizer( n, os.path.join(self.normalizer_path, 'common_tagTypes.xml')) self.assertEquals(normalizer.name, name) self.assertEquals(normalizer.version, version) self.assertTrue(normalizer.validate())
def __init__(self, normalizers_paths, active_normalizers={}): """ Instantiates a flow manager. The default behavior is to activate every available normalizer. @param normalizers_paths: a list of absolute paths to the normalizer XML definitions to use or a just a single path as str. @param active_normalizers: a dictionary of active normalizers in the form {name-version : [True|False]}. """ if not isinstance(normalizers_paths, list or tuple): normalizers_paths = [ normalizers_paths, ] self.normalizers_paths = normalizers_paths self.active_normalizers = active_normalizers self.dtd, self.ctt, self.ccb = None, None, None # Walk through paths for normalizer.dtd and common_tagTypes.xml # /!\ dtd file and common elements will be overrriden if present in # many directories. for norm_path in self.normalizers_paths: if not os.path.isdir(norm_path): raise ValueError, "Invalid normalizer directory : %s" % norm_path dtd = os.path.join(norm_path, 'normalizer.dtd') ctt = os.path.join(norm_path, 'common_tagTypes.xml') ccb = os.path.join(norm_path, 'common_callBacks.xml') if os.path.isfile(dtd): self.dtd = DTD(open(dtd)) if os.path.isfile(ctt): self.ctt = ctt if os.path.isfile(ccb): self.ccb = ccb # Technically the common elements files should NOT be mandatory. # But many normalizers use them, so better safe than sorry. if not self.dtd or not self.ctt or not self.ccb: raise StandardError, "Missing DTD or common library files" self._cache = [] self.reload()
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError # copy xhtml elements to fresh tree with open( os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh: dtd = DTD(fh) try: if not dtd.validate(XmlUtil.ixToXhtml(elt)): modelXbrl.error("xmlDTD:error", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join( e.message for e in dtd.error_log.filter_from_errors())) except XMLSyntaxError as err: modelXbrl.error("xmlDTD:error", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def validate_norm(fn, nn, version, it): global norm global result # open XML parser n = parse(open(os.path.join(path, fn))) # validate DTD dtd = DTD(open(os.path.join(path, 'normalizer.dtd'))) assert dtd.validate(n) == True # Create normalizer from xml definition norm = Normalizer(n, os.path.join(path, 'common_tagTypes.xml'), os.path.join(path, 'common_callBacks.xml')) # Time normalizer validation try: assert norm.name.lower() == nn.lower() if norm.name != nn: print "Warning, %s has name attribute set to %s" % (fn, norm.name) except AssertionError: print "\n[%s]" % norm.name, "and [%s]" % nn, "don't match" return try: assert norm.version == version except AssertionError: print "\n[%s]" % norm.version, "and [%s]" % version, "don't match" return samples_amount = len( [u for u in [v.examples for v in norm.patterns.values()]]) if samples_amount <= 0: print "No samples to validate in %s" % fn return t = timeit.Timer("assert norm.validate() == True", "from __main__ import norm") s = t.timeit(it) # Normalize result against number of validated samples s = s / float(samples_amount) # Add result result.add_res(norm.name, norm.version, norm.authors, s)
'definition': template('PCDATA_OPERATOR_DEFINITION').render({ 'class': element, 'type': 'int' }) } if __name__ == '__main__': import argparse cmdline = argparse.ArgumentParser() cmdline.add_argument("dtd") cmdline.add_argument("hxx") cmdline.add_argument("cxx") args = cmdline.parse_args() dtd = DTD(args.dtd) metadata = { 'dtd': dtd, 'enumerations': enumerations, 'extra_methods': methods, 'enum_classes': sorted([(v['name'], k) for k, v in enumerations.items() if not v in [e.name for e in dtd.iterelements()]]), 'forwards_for': { 'ornament': ['ornament_type'], 'score': ['score_data', 'score_header'] } }
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError from arelle import FunctionIxt ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll] isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM" # find ix version for messages _ixNS = elt.modelDocument.ixNS _xhtmlDTD = XHTML_DTD[_ixNS] _customTransforms = modelXbrl.modelManager.customTransforms or {} def checkAttribute(elt, isIxElt, attrTag, attrValue): ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ()) if attrTag.startswith("{"): ns, sep, localName = attrTag[1:].partition("}") else: ns = None localName = attrTag if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs: if ns == XbrlConst.xsi: pass # xsi attributes are always allowed elif isIxElt: allowedNs = allowedNonIxAttrNS.get(elt.localName, None) if allowedNs != "##other" and ns != allowedNs: modelXbrl.error( ixMsgCode("qualifiedAttributeNotExpected", elt), _("Inline XBRL element %(element)s has qualified attribute %(name)s" ), modelObject=elt, element=str(elt.elementQname), name=attrTag) if ns == XbrlConst.xbrli and elt.localName in { "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple" }: modelXbrl.error( ixMsgCode("qualifiedAttributeDisallowed", elt), _("Inline XBRL element %(element)s has disallowed attribute %(name)s" ), modelObject=elt, element=str(elt.elementQname), name=attrTag) else: if ns in XbrlConst.ixbrlAll: modelXbrl.error( ixMsgCode("inlineAttributeMisplaced", elt, name="other"), _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s" ), modelObject=elt, name=localName) elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}: modelXbrl.error( ixMsgCode("extensionAttributeMisplaced", ns=_ixNS), _("Extension attributes are not allowed on html elements: %(tag)s" ), modelObject=elt, tag=attrTag) elif isIxElt: try: _xsdType = ixAttrType[elt.namespaceURI][localName] if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) if not (attrTag in ixEltAttrDefs or (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))): raise KeyError disallowedXbrliAttrs = ({ "scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals" } - { "fraction": {"contextRef", "unitRef"}, "nonFraction": {"contextRef", "unitRef", "decimals", "precision"}, "nonNumeric": {"contextRef"} }.get(elt.localName, set())) disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None) if disallowedAttrs: modelXbrl.error( ixMsgCode("inlineElementAttributes", elt), _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s" ), modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs)) except KeyError: modelXbrl.error( ixMsgCode("attributeNotExpected", elt), _("Attribute %(attribute)s is not expected on element ix:%(element)s" ), modelObject=elt, attribute=attrTag, element=elt.localName) elif ns is None: _xsdType = htmlAttrType.get(localName) if _xsdType is not None: if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) def checkHierarchyConstraints(elt): constraints = ixHierarchyConstraints.get(elt.localName) if constraints: for _rel, names in constraints: reqt = _rel[0] rel = _rel[1:] if reqt in ('&', '^', '1'): nameFilter = ('*', ) else: nameFilter = names if nameFilter == ('*', ): namespaceFilter = namespacePrefix = '*' elif len(nameFilter) == 1 and "}" in nameFilter[ 0] and nameFilter[0][0] == "{": namespaceFilter, _sep, nameFilter = nameFilter[0][ 1:].partition("}") namespacePrefix = XmlUtil.xmlnsprefix(elt, namespaceFilter) else: namespaceFilter = elt.namespaceURI namespacePrefix = elt.prefix relations = { "ancestor": XmlUtil.ancestor, "parent": XmlUtil.parent, "child-choice": XmlUtil.children, "child-sequence": XmlUtil.children, "child-or-text": XmlUtil.children, "descendant": XmlUtil.descendants }[rel](elt, namespaceFilter, nameFilter) if rel in ("ancestor", "parent"): if relations is None: relations = [] else: relations = [relations] if rel == "child-or-text": relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False) issue = '' if reqt in ('^', ): if not any(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations): issue = " and is missing one of " + ', '.join(names) if reqt in ('1', ) and not elt.isNil: if sum(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations) != 1: issue = " and must have exactly one of " + ', '.join( names) if reqt in ('&', '^'): disallowed = [ str(r.elementQname) for r in relations if not (r.tag in names or (r.localName in names and r.namespaceURI == elt.namespaceURI)) ] if disallowed: issue += " and may not have " + ", ".join(disallowed) elif rel == "child-sequence": sequencePosition = 0 for i, r in enumerate(relations): rPos = names.index(str(r.localName)) if rPos < sequencePosition: issue += " and is out of sequence: " + str( r.elementQname) else: sequencePosition = rPos if reqt == '?' and len(relations) > 1: issue = " may only have 0 or 1 but {0} present ".format( len(relations)) if reqt == '+' and len(relations) == 0: issue = " must have at least 1 but none present " disallowedChildText = bool( reqt == '&' and rel in ("child-sequence", "child-choice") and elt.textValue.strip()) if ((reqt == '+' and not relations) or (reqt == '-' and relations) or (issue) or disallowedChildText): code = "{}:{}".format( ixSect[elt.namespaceURI].get(elt.localName, "other")["constraint"], { 'ancestor': "ancestorNode", 'parent': "parentNode", 'child-choice': "childNodes", 'child-sequence': "childNodes", 'child-or-text': "childNodesOrText", 'descendant': "descendantNodes" }[rel] + { '+': "Required", '-': "Disallowed", '&': "Allowed", '^': "Specified", '1': "Specified" }.get(reqt, "Specified")) msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}" ).format( elt.localName, { '+': "must", '-': "may not", '&': "may only", '?': "may", '+': "must", '^': "must", '1': "must" }[reqt], { 'ancestor': "be nested in", 'parent': "have parent", 'child-choice': "have child", 'child-sequence': "have child", 'child-or-text': "have child or text,", 'descendant': "have as descendant" }[rel], '' if rel == 'child-or-text' else ', '.join( str(r.elementQname) for r in relations) if names == ('*', ) and relations else ", ".join( "{}:{}".format(namespacePrefix, n) for n in names), issue, " and no child text (\"{}\")".format( elt.textValue.strip()[:32]) if disallowedChildText else "") modelXbrl.error( code, msg, modelObject=[elt] + relations, requirement=reqt, messageCodes= ("ix{ver.sect}:ancestorNode{Required|Disallowed}", "ix{ver.sect}:childNodesOrTextRequired", "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}", "ix{ver.sect}:descendantNodesDisallowed", "ix{ver.sect}:parentNodeRequired")) # other static element checks (that don't require a complete object model, context, units, etc if elt.localName == "nonFraction": childElts = XmlUtil.children(elt, '*', '*') hasText = (elt.text or "") or any( (childElt.tail or "") for childElt in childElts) if elt.isNil: ancestorNonFractions = XmlUtil.ancestors( elt, _ixNS, elt.localName) if ancestorNonFractions: modelXbrl.error( ixMsgCode("nonFractionAncestors", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction" ), modelObject=[elt] + ancestorNonFractions, fact=elt.qname) if childElts or hasText: modelXbrl.error( ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text" ), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid( ) # prevent further validation or cascading errors else: if ((childElts and (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS or childElts[0].localName != "nonFraction")) or (childElts and hasText)): modelXbrl.error( ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text." ), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid() if elt.localName == "fraction": if elt.isNil: ancestorFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName) if ancestorFractions: modelXbrl.error( ixMsgCode("fractionAncestors", elt), _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction" ), modelObject=[elt] + ancestorFractions, fact=elt.qname) else: nonFrChildren = [ e for e in XmlUtil.children(elt, _ixNS, '*') if e.localName not in ("fraction", "numerator", "denominator") ] if nonFrChildren: modelXbrl.error( ixMsgCode("fractionElementChildren", elt), _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s" ), modelObject=[elt] + nonFrChildren, fact=elt.qname, children=", ".join(e.localName for e in nonFrChildren)) for ancestorFraction in XmlUtil.ancestors( elt, XbrlConst.ixbrl11, "fraction"): # only ix 1.1 if normalizeSpace(elt.get("unitRef")) != normalizeSpace( ancestorFraction.get("unitRef")): modelXbrl.error( ixMsgCode("fractionNestedUnitRef", elt), _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s" ), modelObject=[elt] + nonFrChildren, fact=elt.qname, unitRef=elt.get("unitRef"), unitRef2=ancestorFraction.get("unitRef")) if elt.localName in ("nonFraction", "numerator", "denominator", "nonNumeric"): fmt = elt.format if fmt: if fmt in _customTransforms: pass elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions: modelXbrl.error( ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s" ), modelObject=elt, fact=elt.qname, transform=fmt, namespace=fmt.namespaceURI) elt.setInvalid() elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[ fmt.namespaceURI]: modelXbrl.error( ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation name %(name)s" ), modelObject=elt, fact=elt.qname, transform=fmt, name=fmt.localName) elt.setInvalid() def ixToXhtml(fromRoot): toRoot = etree.Element(fromRoot.localName) copyNonIxChildren(fromRoot, toRoot) for attrTag, attrValue in fromRoot.items(): checkAttribute(fromRoot, False, attrTag, attrValue) if attrTag not in ( 'version', # used in inline test cases but not valid xhtml '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation' ): toRoot.set(attrTag, attrValue) return toRoot def copyNonIxChildren(fromElt, toElt, excludeSubtree=False): for fromChild in fromElt.iterchildren(): if isinstance(fromChild, ModelObject): isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll if isIxNs: if fromChild.localName not in ixElements[ fromChild.namespaceURI]: modelXbrl.error( ixMsgCode("elementNameInvalid", ns=_ixNS), _("Inline XBRL element name %(element)s is not valid" ), modelObject=fromChild, element=str(fromChild.elementQname)) else: checkHierarchyConstraints(fromChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, True, attrTag, attrValue) for attrTag in ixAttrRequired[ fromChild.namespaceURI].get( fromChild.localName, []): if fromChild.get(attrTag) is None: modelXbrl.error( ixMsgCode("attributeRequired", fromChild), _("Attribute %(attribute)s required on element ix:%(element)s" ), modelObject=fromChild, attribute=attrTag, element=fromChild.localName) if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs): copyNonIxChildren(fromChild, toElt, excludeSubtree=True) else: if fromChild.localName in { "footnote", "nonNumeric", "continuation" } and isIxNs: toChild = etree.Element("ixNestedContent") toElt.append(toChild) copyNonIxChildren(fromChild, toChild) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail elif isIxNs: copyNonIxChildren(fromChild, toElt) else: toChild = etree.Element(fromChild.localName) toElt.append(toChild) copyNonIxChildren(fromChild, toChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, False, attrTag, attrValue) toChild.set(attrTag, attrValue) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail # copy xhtml elements to fresh tree with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, _xhtmlDTD)) as fh: dtd = DTD(fh) try: #with open("/users/hermf/temp/testDtd.htm", "w") as fh: # fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True)) if not dtd.validate(ixToXhtml(elt)): modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join( e.message for e in dtd.error_log.filter_from_errors())) if isEFM: ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) except XMLSyntaxError as err: modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def run(ebook: Path, bigbook: Path, ubercoordinator: Path, files: List[Path]) -> None: """ :param ebook: the ebook source directory :param bigbook: the Big Book of Key :param ubercoordinator: the ubercoordinator source directory, for the DTD :param files: the XHTML file from the Big Book of Key that need adding :return: """ index = Index(bigbook) book_dtd = DTD((ubercoordinator / 'src' / 'book.dtd').open()) book = xml.read(ebook / 'book.xml', dtd=book_dtd) illustrations = xml.get_one(book, 'illustrations') contents = xml.get_one(book, 'contents') sections = set( xml.get_all_str(contents, '//section[not(@template="yes")]/@file')) images = set(xml.get_all_str(illustrations, '//image/@file')) initial_sections = sections.copy() initial_images = images.copy() for filename in sections: ebook_file = ebook / 'Text' / filename bigbook_file = bigbook / 'Text' / filename if not ebook_file.exists() and bigbook_file.exists(): copyfile(bigbook_file, ebook_file) if ebook_file.exists(): for img_filename in find_images(ebook_file): if img_filename not in images: illustrations.append(file_element('image', img_filename)) images.add(img_filename) else: print( f"{ebook / 'book.xml'}:0:0:WARNING: is this missing?: {filename}" ) for file in files: article_id = file.stem article = index.articles_by_id[article_id] if article.file.name not in sections: copyfile(article.file, ebook / 'Text' / article.file.name) title = xml.rewrap('title', XML(article.link)) section = file_element('section', article.file.name) section.append(title) contents.append(section) sections.add(file.name) for img_filename in find_images(article.file): if img_filename not in images: illustrations.append(file_element('image', img_filename)) images.add(img_filename) for img_filename in images: file = ebook / 'Images' / img_filename if not file.exists(): copyfile(bigbook / 'Images' / img_filename, file) book.attrib['date'] = strftime("%Y-%m-%d") if sections != initial_sections or images != initial_images: copyfile(ebook / 'book.xml', ebook / 'book.xml.bak') xml.save(ebook / 'book.xml', book, doctype='book')
def test_00_validate_fake_syslog(self): """Validate the fake normalizer""" dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd'))) self.assertTrue(dtd.validate(self.n))
def validateXbrlFinally(val, *args, **kwargs): if not (val.validateEFMHTMplugin): return modelXbrl = val.modelXbrl allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern efmHtmDTD = None with open( os.path.join(os.path.dirname(__file__), "resources", "efm-htm.dtd")) as fh: efmHtmDTD = DTD(fh) if efmHtmDTD and not efmHtmDTD.validate( modelXbrl.modelDocument.xmlRootElement.getroottree()): for e in efmHtmDTD.error_log.filter_from_errors(): if "declared in the external subset contains white spaces nodes" not in e.message: modelXbrl.error("html.syntax", _("HTML error %(error)s"), error=e.message) for elt in modelXbrl.modelDocument.xmlRootElement.iter(): eltTag = elt.tag if isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)): continue # comment or other non-parsed element for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error( "EFM.5.02.02.10.activeContent", _("Element has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=elt, attribute=attrTag, element=eltTag) elif eltTag == "a" and ( not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)): pass elif scheme(attrValue) in ("http", "https", "ftp"): modelXbrl.error( "EFM.6.05.16.externalReference", _("Element has an invalid external reference in '%(attribute)s' for <%(element)s>" ), modelObject=elt, attribute=attrTag, element=eltTag) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if scheme(attrValue) == "data": modelXbrl.error( "EFM.5.02.02.10.graphicDataUrl", _("Element references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>" ), modelObject=elt, attribute=attrValue[:32], element=eltTag) elif attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error( "EFM.5.02.02.10.graphicFileType", _("Element references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>" ), modelObject=elt, attribute=attrValue, element=eltTag) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.5.02.02.10.nestedTable", _("Element is a disallowed nested <table>."), modelObject=elt)
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll] def checkAttribute(elt, isIxElt, attrTag, attrValue): if attrTag.startswith("{"): ns, sep, localName = attrTag[1:].partition("}") if isIxElt: allowedNs = nonIxAttrNS.get(elt.localName, None) if allowedNs != "##other" and ns != allowedNs: modelXbrl.error( "ix:qualifiedAttributeNotExpected", _("Inline XBRL element %(element)s: has qualified attribute %(name)s" ), modelObject=elt, element=str(elt.elementQname), name=attrTag) else: if ns in XbrlConst.ixbrlAll: modelXbrl.error( "ix:inlineAttributeMisplaced", _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s" ), modelObject=elt, name=localName) elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}: modelXbrl.error( "ix:extensionAttributeMisplaced", _("Extension attributes are not allowed on html elements: %(tag)s" ), modelObject=elt, tag=attrTag) elif isIxElt: try: _xsdType = ixAttrType[elt.namespaceURI][attrTag] if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) disallowedXbrliAttrs = ({ "scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals" } - { "fraction": {"contextRef", "unitRef"}, "nonFraction": {"contextRef", "unitRef", "decimals", "precision"}, "nonNumeric": {"contextRef"} }.get(elt.localName, set())) disallowedAttrs = [ a for a in disallowedXbrliAttrs if elt.get(a) is not None ] if disallowedAttrs: modelXbrl.error( "ix:inlineElementAttributes", _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s" ), modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs)) except KeyError: modelXbrl.error( "ix:attributeNotExpected", _("Attribute %(attribute)s is not expected on element element ix:%(element)s" ), modelObject=elt, attribute=attrTag, element=elt.localName) def checkHierarchyConstraints(elt): constraints = ixHierarchyConstraints.get(elt.localName) if constraints: for _rel, names in constraints: reqt = _rel[0] rel = _rel[1:] if reqt in ('&', '^'): nameFilter = ('*', ) else: nameFilter = names relations = { "ancestor": XmlUtil.ancestor, "parent": XmlUtil.parent, "child-choice": XmlUtil.children, "child-sequence": XmlUtil.children, "descendant": XmlUtil.descendants }[rel](elt, '*' if nameFilter == ('*', ) else elt.namespaceURI, nameFilter) if rel in ("ancestor", "parent"): if relations is None: relations = [] else: relations = [relations] issue = '' if reqt == '^': if not any(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations): issue = " and is missing one of " + ', '.join(names) if reqt in ('&', '^'): disallowed = [ str(r.elementQname) for r in relations if not (r.tag in names or (r.localName in names and r.namespaceURI == elt.namespaceURI)) ] if disallowed: issue += " and may not have " + ", ".join(disallowed) elif rel == "child-sequence": sequencePosition = 0 for i, r in enumerate(relations): rPos = names.index(str(r.localName)) if rPos < sequencePosition: issue += " and is out of sequence: " + str( r.elementQname) else: sequencePosition = rPos if reqt == '?' and len(relations) > 1: issue = " may only have 0 or 1 but {0} present ".format( len(relations)) if reqt == '+' and len(relations) == 0: issue = " must have more than 1 but none present " if ((reqt == '+' and not relations) or (reqt == '-' and relations) or (issue)): code = "ix:" + { 'ancestor': "ancestorNode", 'parent': "parentNode", 'child-choice': "childNodes", 'child-sequence': "childNodes", 'descendant': "descendantNodes" }[rel] + { '+': "Required", '-': "Disallowed", '&': "Allowed", '^': "Specified" }.get(reqt, "Specified") msg = _("Inline XBRL 1.0 ix:{0} {1} {2} {3} {4} element" ).format( elt.localName, { '+': "must", '-': "may not", '&': "may only", '?': "may", '+': "must" }[reqt], { 'ancestor': "be nested in", 'parent': "have parent", 'child-choice': "have child", 'child-sequence': "have child", 'descendant': "have as descendant" }[rel], ', '.join( str(r.elementQname) for r in relations) if names == ('*', ) and relations else ", ".join("ix:" + n for n in names), issue) modelXbrl.error(code, msg, modelObject=[elt] + relations, requirement=reqt) def ixToXhtml(fromRoot): toRoot = etree.Element(fromRoot.localName) copyNonIxChildren(fromRoot, toRoot) for attrTag, attrValue in fromRoot.items(): checkAttribute(fromRoot, False, attrTag, attrValue) if attrTag not in ( 'version', # used in inline test cases but not valid xhtml '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation' ): toRoot.set(attrTag, attrValue) return toRoot def copyNonIxChildren(fromElt, toElt): for fromChild in fromElt.iterchildren(): if isinstance(fromChild, ModelObject): isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll if isIxNs: if fromChild.localName not in ixElements[ fromChild.namespaceURI]: modelXbrl.error( "ix:elementNameInvalid", _("Inline XBRL element name %(element)s is not valid" ), modelObject=fromChild, element=str(fromChild.elementQname)) else: checkHierarchyConstraints(fromChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, True, attrTag, attrValue) for attrTag in ixAttrRequired[ fromChild.namespaceURI].get( fromChild.localName, []): if fromChild.get(attrTag) is None: modelXbrl.error( "ix:attributeRequired", _("Attribute %(attribute)s required on element ix:%(element)s" ), modelObject=elt, attribute=attrTag, element=fromChild.localName) if not (fromChild.localName in {"references", "resources"} and isIxNs): if fromChild.localName in { "footnote", "nonNumeric", "continuation" } and isIxNs: toChild = etree.Element("ixNestedContent") toElt.append(toChild) copyNonIxChildren(fromChild, toChild) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail elif isIxNs: copyNonIxChildren(fromChild, toElt) else: toChild = etree.Element(fromChild.localName) toElt.append(toChild) copyNonIxChildren(fromChild, toChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, False, attrTag, attrValue) toChild.set(attrTag, attrValue) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail # copy xhtml elements to fresh tree with open( os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh: dtd = DTD(fh) try: #with open("/users/hermf/temp/testDtd.htm", "w") as fh: # fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True)) if not dtd.validate(ixToXhtml(elt)): modelXbrl.error("ix:DTDelementUnexpected", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join( e.message for e in dtd.error_log.filter_from_errors())) except XMLSyntaxError as err: modelXbrl.error("ix:DTDerror", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def read(self, file): dtd = DTD(file) for entity in dtd.entities(): unit = Unit(entity.name, entity.content) self.units.append(unit)
def _validate_dtd_name(self, identifier:str): dtd = "<!ELEMENT S EMPTY><!ATTLIST S id ID #REQUIRED>" dtd_file = StringIO(dtd) dtd_validator = DTD(dtd_file) sample_xml_element = Element("S", id = identifier) return dtd_validator.validate(sample_xml_element)