def htmlLoader(modelXbrl, mappedUri, filepath, *args, **kwargs): if filepath != lastFilePath or not lastFilePathIsHTML: return None # not an HTML file cntlr = modelXbrl.modelManager.cntlr cntlr.showStatus( _("Loading HTML file: {0}").format(os.path.basename(filepath))) # parse html try: if (modelXbrl.modelManager.validateDisclosureSystem and modelXbrl.modelManager.disclosureSystem.validateFileText): file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath) else: file, _encoding = modelXbrl.fileSource.file(filepath, stripDeclaration=False) _parser = HTMLParser() htmlTree = parse(file, _parser, base_url=filepath) for error in _parser.error_log: if not (error.type_name == "HTML_UNKNOWN_TAG" and error.message.startswith("Tag ") and error.message.lower()[4:].partition(" ")[0] in edgarAdditionalTags): modelXbrl.error( "html:syntax", _("%(error)s, %(fileName)s, line %(line)s, column %(column)s" ), fileName=os.path.basename(mappedUri), error=error.message, line=error.line, column=error.column) file.close() except Exception as err: modelXbrl.error(type(err).__name__, _("Unrecoverable error: %(error)s, %(fileName)s"), fileName=os.path.basename(mappedUri), error=str(err), exc_info=True) return None if modelXbrl: # pull loader implementation modelXbrl.blockDpmDBrecursion = True modelXbrl.modelDocument = doc = createModelDocument( modelXbrl, Type.HTML, filepath, isEntry=True, documentEncoding="utf-8", base=filepath) else: # API implementation modelXbrl = ModelXbrl.create(cntlr.modelManager, Type.HTML, filepath, isEntry=True, base=filepath) doc = modelXbrl.modelDocument doc.xmlRootElement = htmlTree.getroot() if doc is None: return None # not an HTML file modelXbrl.loadedFromHTML = True return doc
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError from arelle import FunctionIxt ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll] isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM" # find ix version for messages _ixNS = elt.modelDocument.ixNS _xhtmlDTD = XHTML_DTD[_ixNS] _customTransforms = modelXbrl.modelManager.customTransforms or {} def checkAttribute(elt, isIxElt, attrTag, attrValue): ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ()) if attrTag.startswith("{"): ns, sep, localName = attrTag[1:].partition("}") else: ns = None localName = attrTag if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs: if ns == XbrlConst.xsi: pass # xsi attributes are always allowed elif isIxElt: allowedNs = allowedNonIxAttrNS.get(elt.localName, None) if allowedNs != "##other" and ns != allowedNs: modelXbrl.error( ixMsgCode("qualifiedAttributeNotExpected", elt), _("Inline XBRL element %(element)s has qualified attribute %(name)s" ), modelObject=elt, element=str(elt.elementQname), name=attrTag) if ns == XbrlConst.xbrli and elt.localName in { "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple" }: modelXbrl.error( ixMsgCode("qualifiedAttributeDisallowed", elt), _("Inline XBRL element %(element)s has disallowed attribute %(name)s" ), modelObject=elt, element=str(elt.elementQname), name=attrTag) else: if ns in XbrlConst.ixbrlAll: modelXbrl.error( ixMsgCode("inlineAttributeMisplaced", elt, name="other"), _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s" ), modelObject=elt, name=localName) elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}: modelXbrl.error( ixMsgCode("extensionAttributeMisplaced", ns=_ixNS), _("Extension attributes are not allowed on html elements: %(tag)s" ), modelObject=elt, tag=attrTag) elif isIxElt: try: _xsdType = ixAttrType[elt.namespaceURI][localName] if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) if not (attrTag in ixEltAttrDefs or (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))): raise KeyError disallowedXbrliAttrs = ({ "scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals" } - { "fraction": {"contextRef", "unitRef"}, "nonFraction": {"contextRef", "unitRef", "decimals", "precision"}, "nonNumeric": {"contextRef"} }.get(elt.localName, set())) disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None) if disallowedAttrs: modelXbrl.error( ixMsgCode("inlineElementAttributes", elt), _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s" ), modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs)) except KeyError: modelXbrl.error( ixMsgCode("attributeNotExpected", elt), _("Attribute %(attribute)s is not expected on element ix:%(element)s" ), modelObject=elt, attribute=attrTag, element=elt.localName) elif ns is None: _xsdType = htmlAttrType.get(localName) if _xsdType is not None: if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) def checkHierarchyConstraints(elt): constraints = ixHierarchyConstraints.get(elt.localName) if constraints: for _rel, names in constraints: reqt = _rel[0] rel = _rel[1:] if reqt in ('&', '^', '1'): nameFilter = ('*', ) else: nameFilter = names if nameFilter == ('*', ): namespaceFilter = namespacePrefix = '*' elif len(nameFilter) == 1 and "}" in nameFilter[ 0] and nameFilter[0][0] == "{": namespaceFilter, _sep, nameFilter = nameFilter[0][ 1:].partition("}") namespacePrefix = XmlUtil.xmlnsprefix(elt, namespaceFilter) else: namespaceFilter = elt.namespaceURI namespacePrefix = elt.prefix relations = { "ancestor": XmlUtil.ancestor, "parent": XmlUtil.parent, "child-choice": XmlUtil.children, "child-sequence": XmlUtil.children, "child-or-text": XmlUtil.children, "descendant": XmlUtil.descendants }[rel](elt, namespaceFilter, nameFilter) if rel in ("ancestor", "parent"): if relations is None: relations = [] else: relations = [relations] if rel == "child-or-text": relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False) issue = '' if reqt in ('^', ): if not any(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations): issue = " and is missing one of " + ', '.join(names) if reqt in ('1', ) and not elt.isNil: if sum(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations) != 1: issue = " and must have exactly one of " + ', '.join( names) if reqt in ('&', '^'): disallowed = [ str(r.elementQname) for r in relations if not (r.tag in names or (r.localName in names and r.namespaceURI == elt.namespaceURI)) ] if disallowed: issue += " and may not have " + ", ".join(disallowed) elif rel == "child-sequence": sequencePosition = 0 for i, r in enumerate(relations): rPos = names.index(str(r.localName)) if rPos < sequencePosition: issue += " and is out of sequence: " + str( r.elementQname) else: sequencePosition = rPos if reqt == '?' and len(relations) > 1: issue = " may only have 0 or 1 but {0} present ".format( len(relations)) if reqt == '+' and len(relations) == 0: issue = " must have at least 1 but none present " disallowedChildText = bool( reqt == '&' and rel in ("child-sequence", "child-choice") and elt.textValue.strip()) if ((reqt == '+' and not relations) or (reqt == '-' and relations) or (issue) or disallowedChildText): code = "{}:{}".format( ixSect[elt.namespaceURI].get(elt.localName, "other")["constraint"], { 'ancestor': "ancestorNode", 'parent': "parentNode", 'child-choice': "childNodes", 'child-sequence': "childNodes", 'child-or-text': "childNodesOrText", 'descendant': "descendantNodes" }[rel] + { '+': "Required", '-': "Disallowed", '&': "Allowed", '^': "Specified", '1': "Specified" }.get(reqt, "Specified")) msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}" ).format( elt.localName, { '+': "must", '-': "may not", '&': "may only", '?': "may", '+': "must", '^': "must", '1': "must" }[reqt], { 'ancestor': "be nested in", 'parent': "have parent", 'child-choice': "have child", 'child-sequence': "have child", 'child-or-text': "have child or text,", 'descendant': "have as descendant" }[rel], '' if rel == 'child-or-text' else ', '.join( str(r.elementQname) for r in relations) if names == ('*', ) and relations else ", ".join( "{}:{}".format(namespacePrefix, n) for n in names), issue, " and no child text (\"{}\")".format( elt.textValue.strip()[:32]) if disallowedChildText else "") modelXbrl.error( code, msg, modelObject=[elt] + relations, requirement=reqt, messageCodes= ("ix{ver.sect}:ancestorNode{Required|Disallowed}", "ix{ver.sect}:childNodesOrTextRequired", "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}", "ix{ver.sect}:descendantNodesDisallowed", "ix{ver.sect}:parentNodeRequired")) # other static element checks (that don't require a complete object model, context, units, etc if elt.localName == "nonFraction": childElts = XmlUtil.children(elt, '*', '*') hasText = (elt.text or "") or any( (childElt.tail or "") for childElt in childElts) if elt.isNil: ancestorNonFractions = XmlUtil.ancestors( elt, _ixNS, elt.localName) if ancestorNonFractions: modelXbrl.error( ixMsgCode("nonFractionAncestors", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction" ), modelObject=[elt] + ancestorNonFractions, fact=elt.qname) if childElts or hasText: modelXbrl.error( ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text" ), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid( ) # prevent further validation or cascading errors else: if ((childElts and (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS or childElts[0].localName != "nonFraction")) or (childElts and hasText)): modelXbrl.error( ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text." ), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid() if elt.localName == "fraction": if elt.isNil: ancestorFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName) if ancestorFractions: modelXbrl.error( ixMsgCode("fractionAncestors", elt), _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction" ), modelObject=[elt] + ancestorFractions, fact=elt.qname) else: nonFrChildren = [ e for e in XmlUtil.children(elt, _ixNS, '*') if e.localName not in ("fraction", "numerator", "denominator") ] if nonFrChildren: modelXbrl.error( ixMsgCode("fractionElementChildren", elt), _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s" ), modelObject=[elt] + nonFrChildren, fact=elt.qname, children=", ".join(e.localName for e in nonFrChildren)) for ancestorFraction in XmlUtil.ancestors( elt, XbrlConst.ixbrl11, "fraction"): # only ix 1.1 if normalizeSpace(elt.get("unitRef")) != normalizeSpace( ancestorFraction.get("unitRef")): modelXbrl.error( ixMsgCode("fractionNestedUnitRef", elt), _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s" ), modelObject=[elt] + nonFrChildren, fact=elt.qname, unitRef=elt.get("unitRef"), unitRef2=ancestorFraction.get("unitRef")) if elt.localName in ("nonFraction", "numerator", "denominator", "nonNumeric"): fmt = elt.format if fmt: if fmt in _customTransforms: pass elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions: modelXbrl.error( ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s" ), modelObject=elt, fact=elt.qname, transform=fmt, namespace=fmt.namespaceURI) elt.setInvalid() elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[ fmt.namespaceURI]: modelXbrl.error( ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation name %(name)s" ), modelObject=elt, fact=elt.qname, transform=fmt, name=fmt.localName) elt.setInvalid() def ixToXhtml(fromRoot): toRoot = etree.Element(fromRoot.localName) copyNonIxChildren(fromRoot, toRoot) for attrTag, attrValue in fromRoot.items(): checkAttribute(fromRoot, False, attrTag, attrValue) if attrTag not in ( 'version', # used in inline test cases but not valid xhtml '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation' ): toRoot.set(attrTag, attrValue) return toRoot def copyNonIxChildren(fromElt, toElt, excludeSubtree=False): for fromChild in fromElt.iterchildren(): if isinstance(fromChild, ModelObject): isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll if isIxNs: if fromChild.localName not in ixElements[ fromChild.namespaceURI]: modelXbrl.error( ixMsgCode("elementNameInvalid", ns=_ixNS), _("Inline XBRL element name %(element)s is not valid" ), modelObject=fromChild, element=str(fromChild.elementQname)) else: checkHierarchyConstraints(fromChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, True, attrTag, attrValue) for attrTag in ixAttrRequired[ fromChild.namespaceURI].get( fromChild.localName, []): if fromChild.get(attrTag) is None: modelXbrl.error( ixMsgCode("attributeRequired", fromChild), _("Attribute %(attribute)s required on element ix:%(element)s" ), modelObject=fromChild, attribute=attrTag, element=fromChild.localName) if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs): copyNonIxChildren(fromChild, toElt, excludeSubtree=True) else: if fromChild.localName in { "footnote", "nonNumeric", "continuation" } and isIxNs: toChild = etree.Element("ixNestedContent") toElt.append(toChild) copyNonIxChildren(fromChild, toChild) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail elif isIxNs: copyNonIxChildren(fromChild, toElt) else: toChild = etree.Element(fromChild.localName) toElt.append(toChild) copyNonIxChildren(fromChild, toChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, False, attrTag, attrValue) toChild.set(attrTag, attrValue) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail # copy xhtml elements to fresh tree with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, _xhtmlDTD)) as fh: dtd = DTD(fh) try: #with open("/users/hermf/temp/testDtd.htm", "w") as fh: # fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True)) if not dtd.validate(ixToXhtml(elt)): modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join( e.message for e in dtd.error_log.filter_from_errors())) if isEFM: ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) except XMLSyntaxError as err: modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDiscovered=False, isIncluded=None, namespace=None, reloadCache=False): if referringElement is None: # used for error messages referringElement = modelXbrl normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, base) if isEntry: modelXbrl.entryLoadingUrl = normalizedUri # for error loggiong during loading modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error(("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06"), _("Prohibited file for filings %(blockedIndicator)s: %(url)s"), modelObject=referringElement, url=normalizedUri, blockedIndicator=_(" blocked") if blocked else "") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if isEntry: modelXbrl.entryLoadingUrl = mappedUri # for error loggiong during loading if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error("FileNotLoadable", _("File can not be loaded: %(fileName)s"), modelObject=referringElement, fileName=mappedUri) type = Type.Unknown return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if (modelXbrl.modelManager.validateDisclosureSystem and modelXbrl.modelManager.disclosureSystem.validateFileText): file, _encoding = ValidateFilingText.checkfile(modelXbrl,filepath) else: file, _encoding = modelXbrl.fileSource.file(filepath) _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath) xmlDocument = etree.parse(file,parser=_parser,base_url=filepath) file.close() except (EnvironmentError, KeyError) as err: # missing zip file raises KeyError modelXbrl.error("IOerror", _("%(fileName)s: file error: %(error)s"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err)) type = Type.Unknown if file: file.close() return None except (etree.LxmlError, ValueError) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters modelXbrl.error("xmlSchema:syntax", _("%(error)s, %(fileName)s, %(sourceAction)s source element"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err), sourceAction=("including" if isIncluded else "importing")) type = Type.Unknown if file: file.close() return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None rootNode = xmlDocument.getroot() if rootNode is not None: ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ (ln == "html" or ln == "xhtml"): type = Type.Unknown if XbrlConst.ixbrl in rootNode.nsmap.values(): type = Type.INLINEXBRL elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED else: type = Type.Unknown nestedInline = None for htmlElt in rootNode.iter(tag="{http://www.w3.org/1999/xhtml}html"): nestedInline = htmlElt break if nestedInline is None: for htmlElt in rootNode.iter(tag="{http://www.w3.org/1999/xhtml}xhtml"): nestedInline = htmlElt break if nestedInline is not None: if XbrlConst.ixbrl in nestedInline.nsmap.values(): type = Type.INLINEXBRL rootNode = nestedInline #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) rootNode.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) modelDocument.documentEncoding = _encoding if isEntry or isDiscovered: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) return modelDocument
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError from arelle import FunctionIxt ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll] isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM" # find ix version for messages _ixNS = elt.modelDocument.ixNS _xhtmlDTD = XHTML_DTD[_ixNS] _customTransforms = modelXbrl.modelManager.customTransforms or {} def checkAttribute(elt, isIxElt, attrTag, attrValue): ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ()) if attrTag.startswith("{"): ns, sep, localName = attrTag[1:].partition("}") else: ns = None localName = attrTag if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs: if ns == XbrlConst.xsi: pass # xsi attributes are always allowed elif isIxElt: allowedNs = allowedNonIxAttrNS.get(elt.localName, None) if allowedNs != "##other" and ns != allowedNs: modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt), _("Inline XBRL element %(element)s has qualified attribute %(name)s"), modelObject=elt, element=str(elt.elementQname), name=attrTag) if ns == XbrlConst.xbrli and elt.localName in { "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}: modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt), _("Inline XBRL element %(element)s has disallowed attribute %(name)s"), modelObject=elt, element=str(elt.elementQname), name=attrTag) else: if ns in XbrlConst.ixbrlAll: modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"), _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"), modelObject=elt, name=localName) elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}: modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS), _("Extension attributes are not allowed on html elements: %(tag)s"), modelObject=elt, tag=attrTag) elif isIxElt: try: _xsdType = ixAttrType[elt.namespaceURI][localName] if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) if not (attrTag in ixEltAttrDefs or (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))): raise KeyError disallowedXbrliAttrs = ({"scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals"} - {"fraction": {"contextRef", "unitRef"}, "nonFraction": {"contextRef", "unitRef", "decimals", "precision"}, "nonNumeric": {"contextRef"}}.get(elt.localName, set())) disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None) if disallowedAttrs: modelXbrl.error(ixMsgCode("inlineElementAttributes",elt), _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"), modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs)) except KeyError: modelXbrl.error(ixMsgCode("attributeNotExpected",elt), _("Attribute %(attribute)s is not expected on element ix:%(element)s"), modelObject=elt, attribute=attrTag, element=elt.localName) elif ns is None: _xsdType = htmlAttrType.get(localName) if _xsdType is not None: if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) def checkHierarchyConstraints(elt): constraints = ixHierarchyConstraints.get(elt.localName) if constraints: for _rel, names in constraints: reqt = _rel[0] rel = _rel[1:] if reqt in ('&', '^', '1'): nameFilter = ('*',) else: nameFilter = names if nameFilter == ('*',): namespaceFilter = namespacePrefix = '*' elif len(nameFilter) == 1 and "}" in nameFilter[0] and nameFilter[0][0] == "{": namespaceFilter, _sep, nameFilter = nameFilter[0][1:].partition("}") namespacePrefix = XmlUtil.xmlnsprefix(elt,namespaceFilter) else: namespaceFilter = elt.namespaceURI namespacePrefix = elt.prefix relations = {"ancestor": XmlUtil.ancestor, "parent": XmlUtil.parent, "child-choice": XmlUtil.children, "child-sequence": XmlUtil.children, "child-or-text": XmlUtil.children, "descendant": XmlUtil.descendants}[rel]( elt, namespaceFilter, nameFilter) if rel in ("ancestor", "parent"): if relations is None: relations = [] else: relations = [relations] if rel == "child-or-text": relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False, ixResolveUris=False) issue = '' if reqt in ('^',): if not any(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations): issue = " and is missing one of " + ', '.join(names) if reqt in ('1',) and not elt.isNil: if sum(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations) != 1: issue = " and must have exactly one of " + ', '.join(names) if reqt in ('&', '^'): disallowed = [str(r.elementQname) for r in relations if not (r.tag in names or (r.localName in names and r.namespaceURI == elt.namespaceURI))] if disallowed: issue += " and may not have " + ", ".join(disallowed) elif rel == "child-sequence": sequencePosition = 0 for i, r in enumerate(relations): rPos = names.index(str(r.localName)) if rPos < sequencePosition: issue += " and is out of sequence: " + str(r.elementQname) else: sequencePosition = rPos if reqt == '?' and len(relations) > 1: issue = " may only have 0 or 1 but {0} present ".format(len(relations)) if reqt == '+' and len(relations) == 0: issue = " must have at least 1 but none present " disallowedChildText = bool(reqt == '&' and rel in ("child-sequence", "child-choice") and elt.textValue.strip()) if ((reqt == '+' and not relations) or (reqt == '-' and relations) or (issue) or disallowedChildText): code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], { 'ancestor': "ancestorNode", 'parent': "parentNode", 'child-choice': "childNodes", 'child-sequence': "childNodes", 'child-or-text': "childNodesOrText", 'descendant': "descendantNodes"}[rel] + { '+': "Required", '-': "Disallowed", '&': "Allowed", '^': "Specified", '1': "Specified"}.get(reqt, "Specified")) msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}").format( elt.localName, {'+': "must", '-': "may not", '&': "may only", '?': "may", '+': "must", '^': "must", '1': "must"}[reqt], {'ancestor': "be nested in", 'parent': "have parent", 'child-choice': "have child", 'child-sequence': "have child", 'child-or-text': "have child or text,", 'descendant': "have as descendant"}[rel], '' if rel == 'child-or-text' else ', '.join(str(r.elementQname) for r in relations) if names == ('*',) and relations else ", ".join("{}:{}".format(namespacePrefix, n) for n in names), issue, " and no child text (\"{}\")".format(elt.textValue.strip()[:32]) if disallowedChildText else "") modelXbrl.error(code, msg, modelObject=[elt] + relations, requirement=reqt, messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}", "ix{ver.sect}:childNodesOrTextRequired", "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}", "ix{ver.sect}:descendantNodesDisallowed", "ix{ver.sect}:parentNodeRequired")) # other static element checks (that don't require a complete object model, context, units, etc if elt.localName == "nonFraction": childElts = XmlUtil.children(elt, '*', '*') hasText = (elt.text or "") or any((childElt.tail or "") for childElt in childElts) if elt.isNil: ancestorNonFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName) if ancestorNonFractions: modelXbrl.error(ixMsgCode("nonFractionAncestors", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"), modelObject=[elt] + ancestorNonFractions, fact=elt.qname) if childElts or hasText: modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid() # prevent further validation or cascading errors else: if ((childElts and (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS or childElts[0].localName != "nonFraction")) or (childElts and hasText)): modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt), _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."), modelObject=[elt] + childElts, fact=elt.qname) elt.setInvalid() if elt.localName == "fraction": if elt.isNil: ancestorFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName) if ancestorFractions: modelXbrl.error(ixMsgCode("fractionAncestors", elt), _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"), modelObject=[elt] + ancestorFractions, fact=elt.qname) else: nonFrChildren = [e for e in XmlUtil.children(elt, _ixNS, '*') if e.localName not in ("fraction", "numerator", "denominator")] if nonFrChildren: modelXbrl.error(ixMsgCode("fractionElementChildren", elt), _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"), modelObject=[elt] + nonFrChildren, fact=elt.qname, children=", ".join(e.localName for e in nonFrChildren)) for ancestorFraction in XmlUtil.ancestors(elt, XbrlConst.ixbrl11, "fraction"): # only ix 1.1 if normalizeSpace(elt.get("unitRef")) != normalizeSpace(ancestorFraction.get("unitRef")): modelXbrl.error(ixMsgCode("fractionNestedUnitRef", elt), _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"), modelObject=[elt] + nonFrChildren, fact=elt.qname, unitRef=elt.get("unitRef"), unitRef2=ancestorFraction.get("unitRef")) if elt.localName in ("nonFraction", "numerator", "denominator", "nonNumeric"): fmt = elt.format if fmt: if fmt in _customTransforms: pass elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions: modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"), modelObject=elt, fact=elt.qname, transform=fmt, namespace=fmt.namespaceURI) elt.setInvalid() elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[fmt.namespaceURI]: modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"), _("Fact %(fact)s has unrecognized transformation name %(name)s"), modelObject=elt, fact=elt.qname, transform=fmt, name=fmt.localName) elt.setInvalid() def ixToXhtml(fromRoot): toRoot = etree.Element(fromRoot.localName) copyNonIxChildren(fromRoot, toRoot) for attrTag, attrValue in fromRoot.items(): checkAttribute(fromRoot, False, attrTag, attrValue) if attrTag not in ('version', # used in inline test cases but not valid xhtml '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'): toRoot.set(attrTag, attrValue) return toRoot def copyNonIxChildren(fromElt, toElt, excludeSubtree=False): for fromChild in fromElt.iterchildren(): if isinstance(fromChild, ModelObject): isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll if isIxNs: if fromChild.localName not in ixElements[fromChild.namespaceURI]: modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS), _("Inline XBRL element name %(element)s is not valid"), modelObject=fromChild, element=str(fromChild.elementQname)) else: checkHierarchyConstraints(fromChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, True, attrTag, attrValue) for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]): if fromChild.get(attrTag) is None: modelXbrl.error(ixMsgCode("attributeRequired", fromChild), _("Attribute %(attribute)s required on element ix:%(element)s"), modelObject=fromChild, attribute=attrTag, element=fromChild.localName) if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs): copyNonIxChildren(fromChild, toElt, excludeSubtree=True) else: if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs: toChild = etree.Element("ixNestedContent") toElt.append(toChild) copyNonIxChildren(fromChild, toChild) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail elif isIxNs: copyNonIxChildren(fromChild, toElt) else: toChild = etree.Element(fromChild.localName) toElt.append(toChild) copyNonIxChildren(fromChild, toChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, False, attrTag, attrValue) toChild.set(attrTag, attrValue) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail # copy xhtml elements to fresh tree with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, _xhtmlDTD)) as fh: dtd = DTD(fh) try: #with open("/users/hermf/temp/testDtd.htm", "w") as fh: # fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True)) if not dtd.validate( ixToXhtml(elt) ): modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join(e.message for e in dtd.error_log.filter_from_errors())) if isEFM: ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) except XMLSyntaxError as err: modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def validateXbrlFinally(val, *args, **kwargs): if not (val.validateFERCplugin): return _xhtmlNs = "{{{}}}".format(xhtml) _xhtmlNsLen = len(_xhtmlNs) modelXbrl = val.modelXbrl modelDocument = modelXbrl.modelDocument if not modelDocument: return # never loaded properly disclosureSystem = val.disclosureSystem _statusMsg = _("validating {0} filing rules").format( val.disclosureSystem.name) modelXbrl.profileActivity() modelXbrl.modelManager.showStatus(_statusMsg) isInlineXbrl = modelXbrl.modelDocument.type in ( ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET) requiredFactLang = disclosureSystem.defaultXmlLang.lower( ) if disclosureSystem.defaultXmlLang else disclosureSystem.defaultXmlLang # inline doc set has multiple instance names to check if modelXbrl.modelDocument.type == ModelDocument.Type.INLINEXBRLDOCUMENTSET: instanceNames = [ ixDoc.basename for ixDoc in modelXbrl.modelDocument.referencesDocument.keys() if ixDoc.type == ModelDocument.Type.INLINEXBRL ] xbrlInstRoots = modelXbrl.ixdsHtmlElements else: # single instance document to check is the entry point document instanceNames = [modelXbrl.modelDocument.basename] xbrlInstRoots = [modelXbrl.modelDocument.xmlDocument.getroot()] #6.5.15 facts with xml in text blocks ValidateFilingText.validateTextBlockFacts( modelXbrl, { True: ("gif", "jpg", "jpeg", "png"), # img file extensions False: ("gif", "jpeg", "png") # mime types: jpg is not a valid mime type }) # check footnotes text if isInlineXbrl: _linkEltIter = ( linkPrototype for linkKey, links in modelXbrl.baseSets.items() for linkPrototype in links if linkPrototype.modelDocument.type in ( ModelDocument.Type.INLINEXBRL, ModelDocument.Type.INLINEXBRLDOCUMENTSET) and linkKey[1] and linkKey[2] and linkKey[3] # fully specified roles and linkKey[0] != "XBRL-footnotes") else: _linkEltIter = xbrlInstRoots[0].iterdescendants( tag="{http://www.xbrl.org/2003/linkbase}footnoteLink") for footnoteLinkElt in _linkEltIter: if isinstance(footnoteLinkElt, (ModelObject, LinkPrototype)): for child in footnoteLinkElt: if isinstance(child, (ModelObject, LocPrototype, ArcPrototype)): xlinkType = child.get("{http://www.w3.org/1999/xlink}type") if xlinkType == "resource" or isinstance( child, ModelInlineFootnote): # footnote if not isInlineXbrl: # inline content was validated before and needs continuations assembly ValidateFilingText.validateFootnote( modelXbrl, child) # same identifier in all contexts (EFM 6.5.3) entityIdentifiers = set() for xbrlInstRoot in xbrlInstRoots: # check all inline docs in ix doc set for entityIdentifierElt in xbrlInstRoot.iterdescendants( "{http://www.xbrl.org/2003/instance}identifier"): if isinstance(entityIdentifierElt, ModelObject): entityIdentifiers.add("{}#{}".format( entityIdentifierElt.get("scheme"), XmlUtil.text(entityIdentifierElt))) if len(entityIdentifiers) > 1: modelXbrl.error( "FERC.6.05.03", _("There are more than one entity identifiers: %(entityIdentifiers)s." ), modelObject=modelXbrl, entityIdentifiers=", ".join(sorted(entityIdentifiers))) for ei in sorted(entityIdentifiers): scheme, _sep, identifier = ei.rpartition("#") if not disclosureSystem.identifierSchemePattern.match( scheme) or not disclosureSystem.identifierValuePattern.match( identifier): modelXbrl.error( "FERC.6.05.01", _("Entity identifier %(identifier)s, or scheme %(scheme)s does not adhere " "to the standard naming convention of <identifier scheme='http://www.ferc.gov/CID'>Cnnnnnn</identifier>'. " ), modelObject=modelXbrl, scheme=scheme, identifier=identifier) #6.5.4 scenario segContexts = set() uniqueContextHashes = {} contextIDs = set() precisionFacts = set() formType = None formEntrySchema = None factsForLang = {} keysNotDefaultLang = {} allFormEntryXsd = () for c in modelXbrl.contexts.values(): if XmlUtil.hasChild(c, xbrli, "segment"): segContexts.add(c) h = c.contextDimAwareHash if h in uniqueContextHashes: if c.isEqualTo(uniqueContextHashes[h]): modelXbrl.error( "FERC.6.05.07", _("The instance document contained more than one context equivalent to %(context)s (%(context2)s). " "Please remove duplicate contexts from the instance."), modelObject=(c, uniqueContextHashes[h]), context=c.id, context2=uniqueContextHashes[h].id) else: uniqueContextHashes[h] = c contextIDs.add(c.id) if segContexts: modelXbrl.error( "FERC.6.05.04", _("There must be no contexts with segment, but %(count)s was(were) found: %(context)s." ), modelObject=segContexts, count=len(segContexts), context=", ".join(sorted(c.id for c in segContexts))) # unused contexts for f in modelXbrl.facts: factContextID = f.contextID contextIDs.discard(factContextID) if f.isNumeric: if f.precision is not None: precisionFacts.add(f) elif not f.isNil: langTestKey = "{0},{1}".format(f.qname, f.contextID) factsForLang.setdefault(langTestKey, []).append(f) lang = f.xmlLang if lang and lang.lower( ) != requiredFactLang: # not lang.startswith(factLangStartsWith): keysNotDefaultLang[langTestKey] = f if getattr(f, "xValid", 0) >= VALID: if f.qname.localName == "FormType": formType = f.xValue formNum = re.sub("([0-9]+).*", r"\1", formType) formLtr = re.match("[^A-Z]*([A-Z]?)", formType).group(1) txDate = re.sub( "http://ferc.gov/form/([0-9]{4}-[0-9]{2}-[0-9]{2})/ferc", r"\1", f.qname.namespaceURI) formEntryXsd = "https://eCollection.ferc.gov/taxonomy/form{}/{}/form/form{}{}/form-{}{}_{}.xsd".format( formNum, txDate, formNum, formLtr, formNum, formLtr, txDate) formEntryXsdUAT = formEntryXsd.replace("eCollection", "uat.eforms") formEntryXsdTest = formEntryXsd.replace( "eCollection", "test.eforms") formEntryXsdDev = formEntryXsd.replace("eCollection", "dev.eforms") confFormEntryXsd = "https://eCollection.ferc.gov/taxonomy/form{}/{}/ferc-core-footnote-roles_{}.xsd".format( formNum, txDate, txDate) confFormEntryXsdUAT = confFormEntryXsd.replace( "eCollection", "uat.eforms") confFormEntryXsdTest = confFormEntryXsd.replace( "eCollection", "test.eforms") confFormEntryXsdDev = confFormEntryXsd.replace( "eCollection", "dev.eforms") allFormEntryXsd = [ formEntryXsd, formEntryXsdUAT, formEntryXsdTest, formEntryXsdDev, confFormEntryXsd, confFormEntryXsdUAT, confFormEntryXsdTest, confFormEntryXsdDev ] unexpectedXsds = set(doc.modelDocument.uri for doc, referencingDoc in modelXbrl.modelDocument.referencesDocument.items() if "href" in referencingDoc.referenceTypes if doc.modelDocument.uri not in allFormEntryXsd) if unexpectedXsds: modelXbrl.error( "FERC.22.00", _("The instance document contained unexpected schema references %(schemaReferences)s." ), modelXbrl=modelXbrl, schemaReferences=", ".join(sorted(unexpectedXsds))) if contextIDs: # check if contextID is on any undefined facts for undefinedFact in modelXbrl.undefinedFacts: contextIDs.discard(undefinedFact.get("contextRef")) if contextIDs: modelXbrl.error( "FERC.6.05.08", _("The instance document contained context(s) %(contextIDs)s that was(were) not used in any fact." ), modelXbrl=modelXbrl, contextIDs=", ".join(str(c) for c in contextIDs)) if precisionFacts: modelXbrl.error( "FERC.6.05.17", _("The instance document contains elements using the precision attribute." ), modelObject=precisionFacts) #6.5.14 facts without english text for keyNotDefaultLang, factNotDefaultLang in keysNotDefaultLang.items(): anyDefaultLangFact = False for fact in factsForLang[keyNotDefaultLang]: if fact.xmlLang.lower( ) == requiredFactLang: #.startswith(factLangStartsWith): anyDefaultLangFact = True break if not anyDefaultLangFact: val.modelXbrl.error( "FERC.6.05.14", _("Element %(fact)s in context %(contextID)s has text with xml:lang other than '%(lang2)s' (%(lang)s) without matching English text. " ), modelObject=factNotDefaultLang, fact=factNotDefaultLang.qname, contextID=factNotDefaultLang.contextID, lang=factNotDefaultLang.xmlLang, lang2=disclosureSystem.defaultXmlLang ) # report lexical format default lang modelXbrl.profileActivity(_statusMsg, minTimeToShow=0.0) modelXbrl.modelManager.showStatus(None)
def load(modelXbrl, uri, base=None, isEntry=False, isIncluded=None, namespace=None, reloadCache=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, base) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error( "Prohibited file for filings{1}: {0}".format(normalizedUri, _(" blocked") if blocked else ""), "err", "EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error( "File can not be loaded: {0}".format( mappedUri), "err", "FileNotLoadable") type = Type.Unknown return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if modelXbrl.modelManager.validateDisclosureSystem: file = ValidateFilingText.checkfile(modelXbrl,filepath) else: file = modelXbrl.fileSource.file(filepath) xmlDocument = xml.dom.minidom.parse(file) file.close() except EnvironmentError as err: modelXbrl.error( "{0}: file error: {1}".format( os.path.basename(uri), err), "err", "IOerror") type = Type.Unknown if file: file.close() return None except (xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters modelXbrl.error( "{0}: import error: {1}".format( os.path.basename(uri), err), "err", "XMLsyntax") type = Type.Unknown if file: file.close() return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None for rootNode in xmlDocument.childNodes: if rootNode.nodeType == 1: #element ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ ln == "html" or ln == "xhtml": type = Type.Unknown for i in range(len(rootNode.attributes)): if rootNode.attributes.item(i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL break XmlUtil.markIdAttributes(rootNode) # required for minidom searchability elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED else: type = Type.Unknown nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml, ("html", "xhtml")) if nestedInline: for i in range(len(nestedInline.attributes)): if nestedInline.attributes.item(i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL rootNode = nestedInline break XmlUtil.markIdAttributes(rootNode) # required for minidom searchability #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) if isEntry: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) break return modelDocument
def xhtmlValidate(modelXbrl, elt): from lxml.etree import DTD, XMLSyntaxError ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll] isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM" # find ix version for messages _ixNS = elt.modelDocument.ixNS def checkAttribute(elt, isIxElt, attrTag, attrValue): ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ()) if attrTag.startswith("{"): ns, sep, localName = attrTag[1:].partition("}") else: ns = None localName = attrTag if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs: if isIxElt: allowedNs = allowedNonIxAttrNS.get(elt.localName, None) if allowedNs != "##other" and ns != allowedNs: modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt), _("Inline XBRL element %(element)s has qualified attribute %(name)s"), modelObject=elt, element=str(elt.elementQname), name=attrTag) if ns == XbrlConst.xbrli and elt.localName in { "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}: modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt), _("Inline XBRL element %(element)s has disallowed attribute %(name)s"), modelObject=elt, element=str(elt.elementQname), name=attrTag) else: if ns in XbrlConst.ixbrlAll: modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"), _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"), modelObject=elt, name=localName) elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}: modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS), _("Extension attributes are not allowed on html elements: %(tag)s"), modelObject=elt, tag=attrTag) elif isIxElt: try: _xsdType = ixAttrType[elt.namespaceURI][localName] if isinstance(_xsdType, dict): baseXsdType = _xsdType["type"] facets = _xsdType else: baseXsdType = _xsdType facets = None XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets) if not (attrTag in ixEltAttrDefs or (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))): raise KeyError disallowedXbrliAttrs = ({"scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals"} - {"fraction": {"contextRef", "unitRef"}, "nonFraction": {"contextRef", "unitRef", "decimals", "precision"}, "nonNumeric": {"contextRef"}}.get(elt.localName, set())) disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None) if disallowedAttrs: modelXbrl.error(ixMsgCode("inlineElementAttributes",elt), _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"), modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs)) except KeyError: modelXbrl.error(ixMsgCode("attributeNotExpected",elt), _("Attribute %(attribute)s is not expected on element ix:%(element)s"), modelObject=elt, attribute=attrTag, element=elt.localName) def checkHierarchyConstraints(elt): constraints = ixHierarchyConstraints.get(elt.localName) if constraints: for _rel, names in constraints: reqt = _rel[0] rel = _rel[1:] if reqt in ('&', '^'): nameFilter = ('*',) else: nameFilter = names if nameFilter == ('*',): namespaceFilter = namespacePrefix = '*' else: namespaceFilter = elt.namespaceURI namespacePrefix = elt.prefix relations = {"ancestor": XmlUtil.ancestor, "parent": XmlUtil.parent, "child-choice": XmlUtil.children, "child-sequence": XmlUtil.children, "child-or-text": XmlUtil.children, "descendant": XmlUtil.descendants}[rel]( elt, namespaceFilter, nameFilter) if rel in ("ancestor", "parent"): if relations is None: relations = [] else: relations = [relations] if rel == "child-or-text": relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False) issue = '' if reqt == '^': if not any(r.localName in names and r.namespaceURI == elt.namespaceURI for r in relations): issue = " and is missing one of " + ', '.join(names) if reqt in ('&', '^'): disallowed = [str(r.elementQname) for r in relations if not (r.tag in names or (r.localName in names and r.namespaceURI == elt.namespaceURI))] if disallowed: issue += " and may not have " + ", ".join(disallowed) elif rel == "child-sequence": sequencePosition = 0 for i, r in enumerate(relations): rPos = names.index(str(r.localName)) if rPos < sequencePosition: issue += " and is out of sequence: " + str(r.elementQname) else: sequencePosition = rPos if reqt == '?' and len(relations) > 1: issue = " may only have 0 or 1 but {0} present ".format(len(relations)) if reqt == '+' and len(relations) == 0: issue = " must have at least 1 but none present " if ((reqt == '+' and not relations) or (reqt == '-' and relations) or (issue)): code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], { 'ancestor': "ancestorNode", 'parent': "parentNode", 'child-choice': "childNodes", 'child-sequence': "childNodes", 'child-or-text': "childNodesOrText", 'descendant': "descendantNodes"}[rel] + { '+': "Required", '-': "Disallowed", '&': "Allowed", '^': "Specified"}.get(reqt, "Specified")) msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element").format( elt.localName, {'+': "must", '-': "may not", '&': "may only", '?': "may", '+': "must"}[reqt], {'ancestor': "be nested in", 'parent': "have parent", 'child-choice': "have child", 'child-sequence': "have child", 'child-or-text': "have child or text,", 'descendant': "have as descendant"}[rel], '' if rel == 'child-or-text' else ', '.join(str(r.elementQname) for r in relations) if names == ('*',) and relations else ", ".join("{}:{}".format(namespacePrefix, n) for n in names), issue) modelXbrl.error(code, msg, modelObject=[elt] + relations, requirement=reqt, messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}", "ix{ver.sect}:childNodesOrTextRequired", "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}", "ix{ver.sect}:descendantNodesDisallowed", "ix{ver.sect}:parentNodeRequired")) def ixToXhtml(fromRoot): toRoot = etree.Element(fromRoot.localName) copyNonIxChildren(fromRoot, toRoot) for attrTag, attrValue in fromRoot.items(): checkAttribute(fromRoot, False, attrTag, attrValue) if attrTag not in ('version', # used in inline test cases but not valid xhtml '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'): toRoot.set(attrTag, attrValue) return toRoot def copyNonIxChildren(fromElt, toElt, excludeSubtree=False): for fromChild in fromElt.iterchildren(): if isinstance(fromChild, ModelObject): isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll if isIxNs: if fromChild.localName not in ixElements[fromChild.namespaceURI]: modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS), _("Inline XBRL element name %(element)s is not valid"), modelObject=fromChild, element=str(fromChild.elementQname)) else: checkHierarchyConstraints(fromChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, True, attrTag, attrValue) for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]): if fromChild.get(attrTag) is None: modelXbrl.error(ixMsgCode("attributeRequired", fromChild), _("Attribute %(attribute)s required on element ix:%(element)s"), modelObject=elt, attribute=attrTag, element=fromChild.localName) if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs): copyNonIxChildren(fromChild, toElt, excludeSubtree=True) else: if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs: toChild = etree.Element("ixNestedContent") toElt.append(toChild) copyNonIxChildren(fromChild, toChild) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail elif isIxNs: copyNonIxChildren(fromChild, toElt) else: toChild = etree.Element(fromChild.localName) toElt.append(toChild) copyNonIxChildren(fromChild, toChild) for attrTag, attrValue in fromChild.items(): checkAttribute(fromChild, False, attrTag, attrValue) toChild.set(attrTag, attrValue) if fromChild.text is not None: toChild.text = fromChild.text if fromChild.tail is not None: toChild.tail = fromChild.tail # copy xhtml elements to fresh tree with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh: dtd = DTD(fh) try: #with open("/users/hermf/temp/testDtd.htm", "w") as fh: # fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True)) if not dtd.validate( ixToXhtml(elt) ): modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=', '.join(e.message for e in dtd.error_log.filter_from_errors())) if isEFM: ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) except XMLSyntaxError as err: modelXbrl.error("html:syntaxError", _("%(element)s error %(error)s"), modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDiscovered=False, isIncluded=None, namespace=None, reloadCache=False): if referringElement is None: # used for error messages referringElement = modelXbrl normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, base) if isEntry: modelXbrl.entryLoadingUrl = normalizedUri # for error loggiong during loading modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error( ("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06" if normalizedUri.startswith("http") else "SBR.NL.2.2.0.17"), _("Prohibited file for filings %(blockedIndicator)s: %(url)s"), modelObject=referringElement, url=normalizedUri, blockedIndicator=_(" blocked") if blocked else "") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[ normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if isEntry: modelXbrl.entryLoadingUrl = mappedUri # for error loggiong during loading if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error("FileNotLoadable", _("File can not be loaded: %(fileName)s"), modelObject=referringElement, fileName=mappedUri) return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if (modelXbrl.modelManager.validateDisclosureSystem and modelXbrl.modelManager.disclosureSystem.validateFileText): file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath) else: file, _encoding = modelXbrl.fileSource.file(filepath) _parser, _parserLookupName, _parserLookupClass = parser( modelXbrl, filepath) xmlDocument = etree.parse(file, parser=_parser, base_url=filepath) file.close() except (EnvironmentError, KeyError) as err: # missing zip file raises KeyError if file: file.close() # retry in case of well known schema locations if not isIncluded and namespace and namespace in XbrlConst.standardNamespaceSchemaLocations and uri != XbrlConst.standardNamespaceSchemaLocations[ namespace]: return load(modelXbrl, XbrlConst.standardNamespaceSchemaLocations[namespace], base, referringElement, isEntry, isDiscovered, isIncluded, namespace, reloadCache) modelXbrl.error("IOerror", _("%(fileName)s: file error: %(error)s"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err)) return None except ( etree.LxmlError, ValueError ) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters if file: file.close() if not isEntry and str( err) == "Start tag expected, '<' not found, line 1, column 1": return ModelDocument(modelXbrl, Type.UnknownNonXML, mappedUri, filepath, None) else: modelXbrl.error( "xmlSchema:syntax", _("%(error)s, %(fileName)s, %(sourceAction)s source element"), modelObject=referringElement, fileName=os.path.basename(uri), error=str(err), sourceAction=("including" if isIncluded else "importing")) return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None rootNode = xmlDocument.getroot() if rootNode is not None: ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ (ln == "html" or ln == "xhtml"): type = Type.UnknownXML if XbrlConst.ixbrl in rootNode.nsmap.values(): type = Type.INLINEXBRL elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED elif ln == "ptvl": type = Type.ARCSINFOSET elif ln == "facts": type = Type.FACTDIMSINFOSET else: type = Type.UnknownXML nestedInline = None for htmlElt in rootNode.iter( tag="{http://www.w3.org/1999/xhtml}html"): nestedInline = htmlElt break if nestedInline is None: for htmlElt in rootNode.iter( tag="{http://www.w3.org/1999/xhtml}xhtml"): nestedInline = htmlElt break if nestedInline is not None: if XbrlConst.ixbrl in nestedInline.nsmap.values(): type = Type.INLINEXBRL rootNode = nestedInline #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) rootNode.init(modelDocument) modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement modelDocument.parserLookupName = _parserLookupName modelDocument.parserLookupClass = _parserLookupClass modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) modelDocument.documentEncoding = _encoding if isEntry or isDiscovered: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) return modelDocument
def load(modelXbrl, uri, base=None, isEntry=False, isIncluded=None, namespace=None, reloadCache=False): normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( uri, base) if isEntry: modelXbrl.uri = normalizedUri modelXbrl.uriDir = os.path.dirname(normalizedUri) for i in range(modelXbrl.modelManager.disclosureSystem. maxSubmissionSubdirectoryEntryNesting): modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir) if modelXbrl.modelManager.validateDisclosureSystem and \ not normalizedUri.startswith(modelXbrl.uriDir) and \ not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri): blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences modelXbrl.error( "Prohibited file for filings{1}: {0}".format( normalizedUri, _(" blocked") if blocked else ""), "err", "EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06") if blocked: return None if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles: mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[ normalizedUri] else: # handle mapped paths mappedUri = normalizedUri for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths: if normalizedUri.startswith(mapFrom): mappedUri = mapTo + normalizedUri[len(mapFrom):] break if modelXbrl.fileSource.isInArchive(mappedUri): filepath = mappedUri else: filepath = modelXbrl.modelManager.cntlr.webCache.getfilename( mappedUri, reload=reloadCache) if filepath: uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath) if filepath is None: # error such as HTTPerror is already logged modelXbrl.error("File can not be loaded: {0}".format(mappedUri), "err", "FileNotLoadable") type = Type.Unknown return None modelDocument = modelXbrl.urlDocs.get(mappedUri) if modelDocument: return modelDocument # load XML and determine type of model document modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri)) file = None try: if modelXbrl.modelManager.validateDisclosureSystem: file = ValidateFilingText.checkfile(modelXbrl, filepath) else: file = modelXbrl.fileSource.file(filepath) xmlDocument = xml.dom.minidom.parse(file) file.close() except EnvironmentError as err: modelXbrl.error( "{0}: file error: {1}".format(os.path.basename(uri), err), "err", "IOerror") type = Type.Unknown if file: file.close() return None except ( xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError ) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters modelXbrl.error( "{0}: import error: {1}".format(os.path.basename(uri), err), "err", "XMLsyntax") type = Type.Unknown if file: file.close() return None # identify document #modelXbrl.modelManager.addToLog("discovery: {0}".format( # os.path.basename(uri))) modelXbrl.modelManager.showStatus(_("loading {0}").format(uri)) modelDocument = None for rootNode in xmlDocument.childNodes: if rootNode.nodeType == 1: #element ln = rootNode.localName ns = rootNode.namespaceURI # type classification if ns == XbrlConst.xsd and ln == "schema": type = Type.SCHEMA elif ns == XbrlConst.link: if ln == "linkbase": type = Type.LINKBASE elif ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xbrli: if ln == "xbrl": type = Type.INSTANCE elif ns == XbrlConst.xhtml and \ ln == "html" or ln == "xhtml": type = Type.Unknown for i in range(len(rootNode.attributes)): if rootNode.attributes.item(i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL break XmlUtil.markIdAttributes( rootNode) # required for minidom searchability elif ln == "report" and ns == XbrlConst.ver: type = Type.VERSIONINGREPORT elif ln == "testcases" or ln == "documentation": type = Type.TESTCASESINDEX elif ln == "testcase": type = Type.TESTCASE elif ln == "registry" and ns == XbrlConst.registry: type = Type.REGISTRY elif ln == "rss": type = Type.RSSFEED else: type = Type.Unknown nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml, ("html", "xhtml")) if nestedInline: for i in range(len(nestedInline.attributes)): if nestedInline.attributes.item( i).value == XbrlConst.ixbrl: type = Type.INLINEXBRL rootNode = nestedInline break XmlUtil.markIdAttributes( rootNode) # required for minidom searchability #create modelDocument object or subtype as identified if type == Type.VERSIONINGREPORT: from arelle.ModelVersReport import ModelVersReport modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument) elif type == Type.RSSFEED: from arelle.ModelRssObject import ModelRssObject modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument) else: modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument) modelDocument.xmlRootElement = rootNode modelDocument.schemaLocationElements.add(rootNode) if isEntry: modelDocument.inDTS = True # discovery (parsing) if type == Type.SCHEMA: modelDocument.schemaDiscover(rootNode, isIncluded, namespace) elif type == Type.LINKBASE: modelDocument.linkbaseDiscover(rootNode) elif type == Type.INSTANCE: modelDocument.instanceDiscover(rootNode) elif type == Type.INLINEXBRL: modelDocument.inlineXbrlDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.TESTCASESINDEX: modelDocument.testcasesIndexDiscover(xmlDocument) elif type == Type.TESTCASE: modelDocument.testcaseDiscover(rootNode) elif type == Type.REGISTRY: modelDocument.registryDiscover(rootNode) elif type == Type.VERSIONINGREPORT: modelDocument.versioningReportDiscover(rootNode) elif type == Type.RSSFEED: modelDocument.rssFeedDiscover(rootNode) break return modelDocument