Exemplo n.º 1
0
def htmlLoader(modelXbrl, mappedUri, filepath, *args, **kwargs):
    if filepath != lastFilePath or not lastFilePathIsHTML:
        return None  # not an HTML file

    cntlr = modelXbrl.modelManager.cntlr
    cntlr.showStatus(
        _("Loading HTML file: {0}").format(os.path.basename(filepath)))
    # parse html
    try:
        if (modelXbrl.modelManager.validateDisclosureSystem
                and modelXbrl.modelManager.disclosureSystem.validateFileText):
            file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file, _encoding = modelXbrl.fileSource.file(filepath,
                                                        stripDeclaration=False)
        _parser = HTMLParser()
        htmlTree = parse(file, _parser, base_url=filepath)
        for error in _parser.error_log:
            if not (error.type_name == "HTML_UNKNOWN_TAG"
                    and error.message.startswith("Tag ")
                    and error.message.lower()[4:].partition(" ")[0]
                    in edgarAdditionalTags):
                modelXbrl.error(
                    "html:syntax",
                    _("%(error)s, %(fileName)s, line %(line)s, column %(column)s"
                      ),
                    fileName=os.path.basename(mappedUri),
                    error=error.message,
                    line=error.line,
                    column=error.column)
        file.close()
    except Exception as err:
        modelXbrl.error(type(err).__name__,
                        _("Unrecoverable error: %(error)s, %(fileName)s"),
                        fileName=os.path.basename(mappedUri),
                        error=str(err),
                        exc_info=True)
        return None
    if modelXbrl:  # pull loader implementation
        modelXbrl.blockDpmDBrecursion = True
        modelXbrl.modelDocument = doc = createModelDocument(
            modelXbrl,
            Type.HTML,
            filepath,
            isEntry=True,
            documentEncoding="utf-8",
            base=filepath)
    else:  # API implementation
        modelXbrl = ModelXbrl.create(cntlr.modelManager,
                                     Type.HTML,
                                     filepath,
                                     isEntry=True,
                                     base=filepath)
        doc = modelXbrl.modelDocument
    doc.xmlRootElement = htmlTree.getroot()

    if doc is None:
        return None  # not an HTML file
    modelXbrl.loadedFromHTML = True
    return doc
Exemplo n.º 2
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    from arelle import FunctionIxt
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    _xhtmlDTD = XHTML_DTD[_ixNS]
    _customTransforms = modelXbrl.modelManager.customTransforms or {}

    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI,
                                          EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if ns == XbrlConst.xsi:
                pass  # xsi attributes are always allowed
            elif isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                        "fraction", "nonFraction", "nonNumeric", "references",
                        "relationship", "tuple"
                }:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(
                        ixMsgCode("inlineAttributeMisplaced",
                                  elt,
                                  name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"
                          ),
                        modelObject=elt,
                        name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(
                        ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"
                          ),
                        modelObject=elt,
                        tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and
                         (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({
                    "scheme", "periodType", "balance", "contextRef", "unitRef",
                    "precision", "decimals"
                } - {
                    "fraction": {"contextRef", "unitRef"},
                    "nonFraction":
                    {"contextRef", "unitRef", "decimals", "precision"},
                    "nonNumeric": {"contextRef"}
                }.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs
                                      if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(
                        ixMsgCode("inlineElementAttributes", elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"
                          ),
                        modelObject=elt,
                        element=elt.elementQname,
                        attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(
                    ixMsgCode("attributeNotExpected", elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"
                      ),
                    modelObject=elt,
                    attribute=attrTag,
                    element=elt.localName)
        elif ns is None:
            _xsdType = htmlAttrType.get(localName)
            if _xsdType is not None:
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^', '1'):
                    nameFilter = ('*', )
                else:
                    nameFilter = names
                if nameFilter == ('*', ):
                    namespaceFilter = namespacePrefix = '*'
                elif len(nameFilter) == 1 and "}" in nameFilter[
                        0] and nameFilter[0][0] == "{":
                    namespaceFilter, _sep, nameFilter = nameFilter[0][
                        1:].partition("}")
                    namespacePrefix = XmlUtil.xmlnsprefix(elt, namespaceFilter)
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {
                    "ancestor": XmlUtil.ancestor,
                    "parent": XmlUtil.parent,
                    "child-choice": XmlUtil.children,
                    "child-sequence": XmlUtil.children,
                    "child-or-text": XmlUtil.children,
                    "descendant": XmlUtil.descendants
                }[rel](elt, namespaceFilter, nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt,
                                                        ixExclude=True,
                                                        ixEscape=False,
                                                        ixContinuation=False)
                issue = ''
                if reqt in ('^', ):
                    if not any(r.localName in names
                               and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('1', ) and not elt.isNil:
                    if sum(r.localName in names
                           and r.namespaceURI == elt.namespaceURI
                           for r in relations) != 1:
                        issue = " and must have exactly one of " + ', '.join(
                            names)
                if reqt in ('&', '^'):
                    disallowed = [
                        str(r.elementQname) for r in relations
                        if not (r.tag in names or
                                (r.localName in names
                                 and r.namespaceURI == elt.namespaceURI))
                    ]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(
                                    r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(
                        len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                disallowedChildText = bool(
                    reqt == '&' and rel in ("child-sequence", "child-choice")
                    and elt.textValue.strip())
                if ((reqt == '+' and not relations)
                        or (reqt == '-' and relations) or (issue)
                        or disallowedChildText):
                    code = "{}:{}".format(
                        ixSect[elt.namespaceURI].get(elt.localName,
                                                     "other")["constraint"],
                        {
                            'ancestor': "ancestorNode",
                            'parent': "parentNode",
                            'child-choice': "childNodes",
                            'child-sequence': "childNodes",
                            'child-or-text': "childNodesOrText",
                            'descendant': "descendantNodes"
                        }[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified",
                            '1': "Specified"
                        }.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}"
                            ).format(
                                elt.localName, {
                                    '+': "must",
                                    '-': "may not",
                                    '&': "may only",
                                    '?': "may",
                                    '+': "must",
                                    '^': "must",
                                    '1': "must"
                                }[reqt], {
                                    'ancestor': "be nested in",
                                    'parent': "have parent",
                                    'child-choice': "have child",
                                    'child-sequence': "have child",
                                    'child-or-text': "have child or text,",
                                    'descendant': "have as descendant"
                                }[rel],
                                '' if rel == 'child-or-text' else ', '.join(
                                    str(r.elementQname) for r in relations) if
                                names == ('*', ) and relations else ", ".join(
                                    "{}:{}".format(namespacePrefix, n)
                                    for n in names), issue,
                                " and no child text (\"{}\")".format(
                                    elt.textValue.strip()[:32])
                                if disallowedChildText else "")
                    modelXbrl.error(
                        code,
                        msg,
                        modelObject=[elt] + relations,
                        requirement=reqt,
                        messageCodes=
                        ("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                         "ix{ver.sect}:childNodesOrTextRequired",
                         "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                         "ix{ver.sect}:descendantNodesDisallowed",
                         "ix{ver.sect}:parentNodeRequired"))
        # other static element checks (that don't require a complete object model, context, units, etc
        if elt.localName == "nonFraction":
            childElts = XmlUtil.children(elt, '*', '*')
            hasText = (elt.text or "") or any(
                (childElt.tail or "") for childElt in childElts)
            if elt.isNil:
                ancestorNonFractions = XmlUtil.ancestors(
                    elt, _ixNS, elt.localName)
                if ancestorNonFractions:
                    modelXbrl.error(
                        ixMsgCode("nonFractionAncestors", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"
                          ),
                        modelObject=[elt] + ancestorNonFractions,
                        fact=elt.qname)
                if childElts or hasText:
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid(
                    )  # prevent further validation or cascading errors
            else:
                if ((childElts and
                     (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS
                      or childElts[0].localName != "nonFraction"))
                        or (childElts and hasText)):
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid()
        if elt.localName == "fraction":
            if elt.isNil:
                ancestorFractions = XmlUtil.ancestors(elt, _ixNS,
                                                      elt.localName)
                if ancestorFractions:
                    modelXbrl.error(
                        ixMsgCode("fractionAncestors", elt),
                        _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"
                          ),
                        modelObject=[elt] + ancestorFractions,
                        fact=elt.qname)
            else:
                nonFrChildren = [
                    e for e in XmlUtil.children(elt, _ixNS, '*')
                    if e.localName not in ("fraction", "numerator",
                                           "denominator")
                ]
                if nonFrChildren:
                    modelXbrl.error(
                        ixMsgCode("fractionElementChildren", elt),
                        _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"
                          ),
                        modelObject=[elt] + nonFrChildren,
                        fact=elt.qname,
                        children=", ".join(e.localName for e in nonFrChildren))
                for ancestorFraction in XmlUtil.ancestors(
                        elt, XbrlConst.ixbrl11, "fraction"):  # only ix 1.1
                    if normalizeSpace(elt.get("unitRef")) != normalizeSpace(
                            ancestorFraction.get("unitRef")):
                        modelXbrl.error(
                            ixMsgCode("fractionNestedUnitRef", elt),
                            _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"
                              ),
                            modelObject=[elt] + nonFrChildren,
                            fact=elt.qname,
                            unitRef=elt.get("unitRef"),
                            unitRef2=ancestorFraction.get("unitRef"))
        if elt.localName in ("nonFraction", "numerator", "denominator",
                             "nonNumeric"):
            fmt = elt.format
            if fmt:
                if fmt in _customTransforms:
                    pass
                elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        namespace=fmt.namespaceURI)
                    elt.setInvalid()
                elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[
                        fmt.namespaceURI]:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation name %(name)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        name=fmt.localName)
                    elt.setInvalid()

    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in (
                    'version',  # used in inline test cases but not valid xhtml
                    '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'
            ):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[
                            fromChild.namespaceURI]:
                        modelXbrl.error(
                            ixMsgCode("elementNameInvalid", ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"
                              ),
                            modelObject=fromChild,
                            element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[
                                fromChild.namespaceURI].get(
                                    fromChild.localName, []):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(
                                    ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"
                                      ),
                                    modelObject=fromChild,
                                    attribute=attrTag,
                                    element=fromChild.localName)
                if excludeSubtree or (fromChild.localName
                                      in {"references", "resources"}
                                      and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {
                            "footnote", "nonNumeric", "continuation"
                    } and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag,
                                           attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail

    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir,
                           _xhtmlDTD)) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate(ixToXhtml(elt)):
            modelXbrl.error("html:syntaxError",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl,
                                                   elt,
                                                   elt,
                                                   "InlineXBRL",
                                                   "EFM.5.02.05.",
                                                   isInline=True)
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Exemplo n.º 3
0
def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDiscovered=False, isIncluded=None, namespace=None, reloadCache=False):
    if referringElement is None: # used for error messages
        referringElement = modelXbrl
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, base)
    if isEntry:
        modelXbrl.entryLoadingUrl = normalizedUri   # for error loggiong during loading
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06"),
                _("Prohibited file for filings %(blockedIndicator)s: %(url)s"),
                modelObject=referringElement, url=normalizedUri, blockedIndicator=_(" blocked") if blocked else "")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if isEntry:
        modelXbrl.entryLoadingUrl = mappedUri   # for error loggiong during loading
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None: # error such as HTTPerror is already logged
        modelXbrl.error("FileNotLoadable",
                _("File can not be loaded: %(fileName)s"),
                modelObject=referringElement, fileName=mappedUri)
        type = Type.Unknown
        return None
    
    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument
    
    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if (modelXbrl.modelManager.validateDisclosureSystem and 
            modelXbrl.modelManager.disclosureSystem.validateFileText):
            file, _encoding = ValidateFilingText.checkfile(modelXbrl,filepath)
        else:
            file, _encoding = modelXbrl.fileSource.file(filepath)
        _parser, _parserLookupName, _parserLookupClass = parser(modelXbrl,filepath)
        xmlDocument = etree.parse(file,parser=_parser,base_url=filepath)
        file.close()
    except (EnvironmentError, KeyError) as err:  # missing zip file raises KeyError
        modelXbrl.error("IOerror",
                _("%(fileName)s: file error: %(error)s"),
                modelObject=referringElement, fileName=os.path.basename(uri), error=str(err))
        type = Type.Unknown
        if file:
            file.close()
        return None
    except (etree.LxmlError,
            ValueError) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        modelXbrl.error("xmlSchema:syntax",
                _("%(error)s, %(fileName)s, %(sourceAction)s source element"),
                modelObject=referringElement, fileName=os.path.basename(uri), 
                error=str(err), sourceAction=("including" if isIncluded else "importing"))
        type = Type.Unknown
        if file:
            file.close()
        return None
    
    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None
    
    rootNode = xmlDocument.getroot()
    if rootNode is not None:
        ln = rootNode.localName
        ns = rootNode.namespaceURI
        
        # type classification
        if ns == XbrlConst.xsd and ln == "schema":
            type = Type.SCHEMA
        elif ns == XbrlConst.link:
            if ln == "linkbase":
                type = Type.LINKBASE
            elif ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xbrli:
            if ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xhtml and \
             (ln == "html" or ln == "xhtml"):
            type = Type.Unknown
            if XbrlConst.ixbrl in rootNode.nsmap.values():
                type = Type.INLINEXBRL
        elif ln == "report" and ns == XbrlConst.ver:
            type = Type.VERSIONINGREPORT
        elif ln == "testcases" or ln == "documentation":
            type = Type.TESTCASESINDEX
        elif ln == "testcase":
            type = Type.TESTCASE
        elif ln == "registry" and ns == XbrlConst.registry:
            type = Type.REGISTRY
        elif ln == "rss":
            type = Type.RSSFEED
        else:
            type = Type.Unknown
            nestedInline = None
            for htmlElt in rootNode.iter(tag="{http://www.w3.org/1999/xhtml}html"):
                nestedInline = htmlElt
                break
            if nestedInline is None:
                for htmlElt in rootNode.iter(tag="{http://www.w3.org/1999/xhtml}xhtml"):
                    nestedInline = htmlElt
                    break
            if nestedInline is not None:
                if XbrlConst.ixbrl in nestedInline.nsmap.values():
                    type = Type.INLINEXBRL
                    rootNode = nestedInline

        #create modelDocument object or subtype as identified
        if type == Type.VERSIONINGREPORT:
            from arelle.ModelVersReport import ModelVersReport
            modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument)
        elif type == Type.RSSFEED:
            from arelle.ModelRssObject import ModelRssObject 
            modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument)
        else:
            modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument)
        rootNode.init(modelDocument)
        modelDocument.parser = _parser # needed for XmlUtil addChild's makeelement 
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.xmlRootElement = rootNode
        modelDocument.schemaLocationElements.add(rootNode)
        modelDocument.documentEncoding = _encoding

        if isEntry or isDiscovered:
            modelDocument.inDTS = True
        
        # discovery (parsing)
        if type == Type.SCHEMA:
            modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
        elif type == Type.LINKBASE:
            modelDocument.linkbaseDiscover(rootNode)
        elif type == Type.INSTANCE:
            modelDocument.instanceDiscover(rootNode)
        elif type == Type.INLINEXBRL:
            modelDocument.inlineXbrlDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.TESTCASESINDEX:
            modelDocument.testcasesIndexDiscover(xmlDocument)
        elif type == Type.TESTCASE:
            modelDocument.testcaseDiscover(rootNode)
        elif type == Type.REGISTRY:
            modelDocument.registryDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.RSSFEED:
            modelDocument.rssFeedDiscover(rootNode)
    return modelDocument
Exemplo n.º 4
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    from arelle import FunctionIxt
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    _xhtmlDTD = XHTML_DTD[_ixNS]
    _customTransforms = modelXbrl.modelManager.customTransforms or {}
    
    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if ns == XbrlConst.xsi:
                pass # xsi attributes are always allowed
            elif isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                    "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}:                
                    modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"),
                        modelObject=elt, name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"),
                        modelObject=elt, tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({"scheme", "periodType",     "balance", "contextRef", "unitRef", "precision", "decimals"} -
                                        {"fraction": {"contextRef", "unitRef"},
                                         "nonFraction": {"contextRef", "unitRef", "decimals", "precision"},
                                         "nonNumeric": {"contextRef"}}.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(ixMsgCode("inlineElementAttributes",elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"),
                        modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(ixMsgCode("attributeNotExpected",elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"),
                    modelObject=elt, attribute=attrTag, element=elt.localName)
        elif ns is None:
            _xsdType = htmlAttrType.get(localName)
            if _xsdType is not None:
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^', '1'):
                    nameFilter = ('*',)
                else:
                    nameFilter = names
                if nameFilter == ('*',):
                    namespaceFilter = namespacePrefix = '*'
                elif len(nameFilter) == 1 and "}" in nameFilter[0] and nameFilter[0][0] == "{":
                    namespaceFilter, _sep, nameFilter = nameFilter[0][1:].partition("}")
                    namespacePrefix = XmlUtil.xmlnsprefix(elt,namespaceFilter)
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {"ancestor": XmlUtil.ancestor, 
                             "parent": XmlUtil.parent, 
                             "child-choice": XmlUtil.children, 
                             "child-sequence": XmlUtil.children,
                             "child-or-text": XmlUtil.children,
                             "descendant": XmlUtil.descendants}[rel](
                            elt, 
                            namespaceFilter,
                            nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False, ixResolveUris=False)
                issue = ''
                if reqt in ('^',):
                    if not any(r.localName in names and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('1',) and not elt.isNil:
                    if sum(r.localName in names and r.namespaceURI == elt.namespaceURI
                           for r in relations) != 1:
                        issue = " and must have exactly one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [str(r.elementQname)
                                  for r in relations
                                  if not (r.tag in names or
                                          (r.localName in names and r.namespaceURI == elt.namespaceURI))]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                disallowedChildText = bool(reqt == '&' and 
                                           rel in ("child-sequence", "child-choice") 
                                           and elt.textValue.strip())
                if ((reqt == '+' and not relations) or
                    (reqt == '-' and relations) or
                    (issue) or disallowedChildText):
                    code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], {
                           'ancestor': "ancestorNode",
                           'parent': "parentNode",
                           'child-choice': "childNodes",
                           'child-sequence': "childNodes",
                           'child-or-text': "childNodesOrText",
                           'descendant': "descendantNodes"}[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified",
                            '1': "Specified"}.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}").format(
                                elt.localName,
                                {'+': "must", '-': "may not", '&': "may only",
                                 '?': "may", '+': "must", '^': "must", '1': "must"}[reqt],
                                {'ancestor': "be nested in",
                                 'parent': "have parent",
                                 'child-choice': "have child",
                                 'child-sequence': "have child",
                                 'child-or-text': "have child or text,",
                                 'descendant': "have as descendant"}[rel],
                                '' if rel == 'child-or-text' else
                                ', '.join(str(r.elementQname) for r in relations)
                                if names == ('*',) and relations else
                                ", ".join("{}:{}".format(namespacePrefix, n) for n in names),
                                issue,
                                " and no child text (\"{}\")".format(elt.textValue.strip()[:32]) if disallowedChildText else "")
                    modelXbrl.error(code, msg, 
                                    modelObject=[elt] + relations, requirement=reqt,
                                    messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                                                  "ix{ver.sect}:childNodesOrTextRequired",
                                                  "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                                                  "ix{ver.sect}:descendantNodesDisallowed",
                                                  "ix{ver.sect}:parentNodeRequired"))
        # other static element checks (that don't require a complete object model, context, units, etc
        if elt.localName == "nonFraction":
            childElts = XmlUtil.children(elt, '*', '*')
            hasText = (elt.text or "") or any((childElt.tail or "") for childElt in childElts)
            if elt.isNil:
                ancestorNonFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName)
                if ancestorNonFractions:
                    modelXbrl.error(ixMsgCode("nonFractionAncestors", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"),
                        modelObject=[elt] + ancestorNonFractions, fact=elt.qname)
                if childElts or hasText:
                    modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"),
                        modelObject=[elt] + childElts, fact=elt.qname)
                    elt.setInvalid() # prevent further validation or cascading errors
            else:
                if ((childElts and (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS or childElts[0].localName != "nonFraction")) or
                    (childElts and hasText)):
                    modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."),
                        modelObject=[elt] + childElts, fact=elt.qname)
                    elt.setInvalid()
        if elt.localName == "fraction":
            if elt.isNil:
                ancestorFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName)
                if ancestorFractions:
                    modelXbrl.error(ixMsgCode("fractionAncestors", elt),
                        _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"),
                        modelObject=[elt] + ancestorFractions, fact=elt.qname)
            else:
                nonFrChildren = [e for e in XmlUtil.children(elt, _ixNS, '*') if e.localName not in ("fraction", "numerator", "denominator")]
                if nonFrChildren:
                    modelXbrl.error(ixMsgCode("fractionElementChildren", elt),
                        _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"),
                        modelObject=[elt] + nonFrChildren, fact=elt.qname, children=", ".join(e.localName for e in nonFrChildren))
                for ancestorFraction in XmlUtil.ancestors(elt, XbrlConst.ixbrl11, "fraction"): # only ix 1.1
                    if normalizeSpace(elt.get("unitRef")) != normalizeSpace(ancestorFraction.get("unitRef")):
                        modelXbrl.error(ixMsgCode("fractionNestedUnitRef", elt),
                            _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"),
                            modelObject=[elt] + nonFrChildren, fact=elt.qname, unitRef=elt.get("unitRef"), unitRef2=ancestorFraction.get("unitRef"))
        if elt.localName in ("nonFraction", "numerator", "denominator", "nonNumeric"):
            fmt = elt.format
            if fmt:
                if fmt in _customTransforms:
                    pass
                elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions:
                    modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"),
                        modelObject=elt, fact=elt.qname, transform=fmt, namespace=fmt.namespaceURI)
                    elt.setInvalid()
                elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[fmt.namespaceURI]:
                    modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation name %(name)s"),
                        modelObject=elt, fact=elt.qname, transform=fmt, name=fmt.localName)
                    elt.setInvalid()

                    
    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in ('version', # used in inline test cases but not valid xhtml
                               '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[fromChild.namespaceURI]:
                        modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"),
                            modelObject=fromChild, element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"),
                                    modelObject=fromChild, attribute=attrTag, element=fromChild.localName)
                if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag, attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail    
                            
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, _xhtmlDTD)) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate( ixToXhtml(elt) ):
            modelXbrl.error("html:syntaxError",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) 
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 5
0
def validateXbrlFinally(val, *args, **kwargs):
    if not (val.validateFERCplugin):
        return

    _xhtmlNs = "{{{}}}".format(xhtml)
    _xhtmlNsLen = len(_xhtmlNs)
    modelXbrl = val.modelXbrl
    modelDocument = modelXbrl.modelDocument
    if not modelDocument:
        return  # never loaded properly
    disclosureSystem = val.disclosureSystem

    _statusMsg = _("validating {0} filing rules").format(
        val.disclosureSystem.name)
    modelXbrl.profileActivity()
    modelXbrl.modelManager.showStatus(_statusMsg)

    isInlineXbrl = modelXbrl.modelDocument.type in (
        ModelDocument.Type.INLINEXBRL,
        ModelDocument.Type.INLINEXBRLDOCUMENTSET)
    requiredFactLang = disclosureSystem.defaultXmlLang.lower(
    ) if disclosureSystem.defaultXmlLang else disclosureSystem.defaultXmlLang

    # inline doc set has multiple instance names to check
    if modelXbrl.modelDocument.type == ModelDocument.Type.INLINEXBRLDOCUMENTSET:
        instanceNames = [
            ixDoc.basename
            for ixDoc in modelXbrl.modelDocument.referencesDocument.keys()
            if ixDoc.type == ModelDocument.Type.INLINEXBRL
        ]
        xbrlInstRoots = modelXbrl.ixdsHtmlElements
    else:  # single instance document to check is the entry point document
        instanceNames = [modelXbrl.modelDocument.basename]
        xbrlInstRoots = [modelXbrl.modelDocument.xmlDocument.getroot()]

    #6.5.15 facts with xml in text blocks
    ValidateFilingText.validateTextBlockFacts(
        modelXbrl,
        {
            True: ("gif", "jpg", "jpeg", "png"),  # img file extensions
            False:
            ("gif", "jpeg", "png")  # mime types: jpg is not a valid mime type
        })

    # check footnotes text
    if isInlineXbrl:
        _linkEltIter = (
            linkPrototype for linkKey, links in modelXbrl.baseSets.items()
            for linkPrototype in links if linkPrototype.modelDocument.type in (
                ModelDocument.Type.INLINEXBRL,
                ModelDocument.Type.INLINEXBRLDOCUMENTSET) and linkKey[1]
            and linkKey[2] and linkKey[3]  # fully specified roles
            and linkKey[0] != "XBRL-footnotes")
    else:
        _linkEltIter = xbrlInstRoots[0].iterdescendants(
            tag="{http://www.xbrl.org/2003/linkbase}footnoteLink")
    for footnoteLinkElt in _linkEltIter:
        if isinstance(footnoteLinkElt, (ModelObject, LinkPrototype)):
            for child in footnoteLinkElt:
                if isinstance(child,
                              (ModelObject, LocPrototype, ArcPrototype)):
                    xlinkType = child.get("{http://www.w3.org/1999/xlink}type")
                    if xlinkType == "resource" or isinstance(
                            child, ModelInlineFootnote):  # footnote
                        if not isInlineXbrl:  # inline content was validated before and needs continuations assembly
                            ValidateFilingText.validateFootnote(
                                modelXbrl, child)

    # same identifier in all contexts (EFM 6.5.3)
    entityIdentifiers = set()
    for xbrlInstRoot in xbrlInstRoots:  # check all inline docs in ix doc set
        for entityIdentifierElt in xbrlInstRoot.iterdescendants(
                "{http://www.xbrl.org/2003/instance}identifier"):
            if isinstance(entityIdentifierElt, ModelObject):
                entityIdentifiers.add("{}#{}".format(
                    entityIdentifierElt.get("scheme"),
                    XmlUtil.text(entityIdentifierElt)))
    if len(entityIdentifiers) > 1:
        modelXbrl.error(
            "FERC.6.05.03",
            _("There are more than one entity identifiers: %(entityIdentifiers)s."
              ),
            modelObject=modelXbrl,
            entityIdentifiers=", ".join(sorted(entityIdentifiers)))
    for ei in sorted(entityIdentifiers):
        scheme, _sep, identifier = ei.rpartition("#")
        if not disclosureSystem.identifierSchemePattern.match(
                scheme) or not disclosureSystem.identifierValuePattern.match(
                    identifier):
            modelXbrl.error(
                "FERC.6.05.01",
                _("Entity identifier %(identifier)s, or scheme %(scheme)s does not adhere "
                  "to the standard naming convention of <identifier scheme='http://www.ferc.gov/CID'>Cnnnnnn</identifier>'.  "
                  ),
                modelObject=modelXbrl,
                scheme=scheme,
                identifier=identifier)

    #6.5.4 scenario
    segContexts = set()
    uniqueContextHashes = {}
    contextIDs = set()
    precisionFacts = set()
    formType = None
    formEntrySchema = None
    factsForLang = {}
    keysNotDefaultLang = {}
    allFormEntryXsd = ()

    for c in modelXbrl.contexts.values():
        if XmlUtil.hasChild(c, xbrli, "segment"):
            segContexts.add(c)
        h = c.contextDimAwareHash
        if h in uniqueContextHashes:
            if c.isEqualTo(uniqueContextHashes[h]):
                modelXbrl.error(
                    "FERC.6.05.07",
                    _("The instance document contained more than one context equivalent to %(context)s (%(context2)s).  "
                      "Please remove duplicate contexts from the instance."),
                    modelObject=(c, uniqueContextHashes[h]),
                    context=c.id,
                    context2=uniqueContextHashes[h].id)
        else:
            uniqueContextHashes[h] = c
        contextIDs.add(c.id)

    if segContexts:
        modelXbrl.error(
            "FERC.6.05.04",
            _("There must be no contexts with segment, but %(count)s was(were) found: %(context)s."
              ),
            modelObject=segContexts,
            count=len(segContexts),
            context=", ".join(sorted(c.id for c in segContexts)))

    # unused contexts
    for f in modelXbrl.facts:
        factContextID = f.contextID
        contextIDs.discard(factContextID)
        if f.isNumeric:
            if f.precision is not None:
                precisionFacts.add(f)
        elif not f.isNil:
            langTestKey = "{0},{1}".format(f.qname, f.contextID)
            factsForLang.setdefault(langTestKey, []).append(f)
            lang = f.xmlLang
            if lang and lang.lower(
            ) != requiredFactLang:  # not lang.startswith(factLangStartsWith):
                keysNotDefaultLang[langTestKey] = f
        if getattr(f, "xValid", 0) >= VALID:
            if f.qname.localName == "FormType":
                formType = f.xValue
                formNum = re.sub("([0-9]+).*", r"\1", formType)
                formLtr = re.match("[^A-Z]*([A-Z]?)", formType).group(1)
                txDate = re.sub(
                    "http://ferc.gov/form/([0-9]{4}-[0-9]{2}-[0-9]{2})/ferc",
                    r"\1", f.qname.namespaceURI)

                formEntryXsd = "https://eCollection.ferc.gov/taxonomy/form{}/{}/form/form{}{}/form-{}{}_{}.xsd".format(
                    formNum, txDate, formNum, formLtr, formNum, formLtr,
                    txDate)

                formEntryXsdUAT = formEntryXsd.replace("eCollection",
                                                       "uat.eforms")
                formEntryXsdTest = formEntryXsd.replace(
                    "eCollection", "test.eforms")
                formEntryXsdDev = formEntryXsd.replace("eCollection",
                                                       "dev.eforms")

                confFormEntryXsd = "https://eCollection.ferc.gov/taxonomy/form{}/{}/ferc-core-footnote-roles_{}.xsd".format(
                    formNum, txDate, txDate)

                confFormEntryXsdUAT = confFormEntryXsd.replace(
                    "eCollection", "uat.eforms")
                confFormEntryXsdTest = confFormEntryXsd.replace(
                    "eCollection", "test.eforms")
                confFormEntryXsdDev = confFormEntryXsd.replace(
                    "eCollection", "dev.eforms")

                allFormEntryXsd = [
                    formEntryXsd, formEntryXsdUAT, formEntryXsdTest,
                    formEntryXsdDev, confFormEntryXsd, confFormEntryXsdUAT,
                    confFormEntryXsdTest, confFormEntryXsdDev
                ]

    unexpectedXsds = set(doc.modelDocument.uri for doc, referencingDoc in
                         modelXbrl.modelDocument.referencesDocument.items()
                         if "href" in referencingDoc.referenceTypes
                         if doc.modelDocument.uri not in allFormEntryXsd)
    if unexpectedXsds:
        modelXbrl.error(
            "FERC.22.00",
            _("The instance document contained unexpected schema references %(schemaReferences)s."
              ),
            modelXbrl=modelXbrl,
            schemaReferences=", ".join(sorted(unexpectedXsds)))

    if contextIDs:  # check if contextID is on any undefined facts
        for undefinedFact in modelXbrl.undefinedFacts:
            contextIDs.discard(undefinedFact.get("contextRef"))
        if contextIDs:
            modelXbrl.error(
                "FERC.6.05.08",
                _("The instance document contained context(s) %(contextIDs)s that was(were) not used in any fact."
                  ),
                modelXbrl=modelXbrl,
                contextIDs=", ".join(str(c) for c in contextIDs))
    if precisionFacts:
        modelXbrl.error(
            "FERC.6.05.17",
            _("The instance document contains elements using the precision attribute."
              ),
            modelObject=precisionFacts)

    #6.5.14 facts without english text
    for keyNotDefaultLang, factNotDefaultLang in keysNotDefaultLang.items():
        anyDefaultLangFact = False
        for fact in factsForLang[keyNotDefaultLang]:
            if fact.xmlLang.lower(
            ) == requiredFactLang:  #.startswith(factLangStartsWith):
                anyDefaultLangFact = True
                break
        if not anyDefaultLangFact:
            val.modelXbrl.error(
                "FERC.6.05.14",
                _("Element %(fact)s in context %(contextID)s has text with xml:lang other than '%(lang2)s' (%(lang)s) without matching English text.  "
                  ),
                modelObject=factNotDefaultLang,
                fact=factNotDefaultLang.qname,
                contextID=factNotDefaultLang.contextID,
                lang=factNotDefaultLang.xmlLang,
                lang2=disclosureSystem.defaultXmlLang
            )  # report lexical format default lang

    modelXbrl.profileActivity(_statusMsg, minTimeToShow=0.0)
    modelXbrl.modelManager.showStatus(None)
Exemplo n.º 6
0
def load(modelXbrl, uri, base=None, isEntry=False, isIncluded=None, namespace=None, reloadCache=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(uri, base)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
                "Prohibited file for filings{1}: {0}".format(normalizedUri, _(" blocked") if blocked else ""), 
                "err", "EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None: # error such as HTTPerror is already logged
        modelXbrl.error(
                "File can not be loaded: {0}".format(
                mappedUri),
                "err", "FileNotLoadable")
        type = Type.Unknown
        return None
    
    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument
    
    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if modelXbrl.modelManager.validateDisclosureSystem:
            file = ValidateFilingText.checkfile(modelXbrl,filepath)
        else:
            file = modelXbrl.fileSource.file(filepath)
        xmlDocument = xml.dom.minidom.parse(file)
        file.close()
    except EnvironmentError as err:
        modelXbrl.error(
                "{0}: file error: {1}".format(
                os.path.basename(uri), err),
                "err", "IOerror")
        type = Type.Unknown
        if file:
            file.close()
        return None
    except (xml.parsers.expat.ExpatError,
            xml.dom.DOMException,
            ValueError) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        modelXbrl.error(
                "{0}: import error: {1}".format(
                os.path.basename(uri), err),
                "err", "XMLsyntax")
        type = Type.Unknown
        if file:
            file.close()
        return None
    
    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None
    
    for rootNode in xmlDocument.childNodes:
        if rootNode.nodeType == 1: #element
            ln = rootNode.localName
            ns = rootNode.namespaceURI
            
            # type classification
            if ns == XbrlConst.xsd and ln == "schema":
                type = Type.SCHEMA
            elif ns == XbrlConst.link:
                if ln == "linkbase":
                    type = Type.LINKBASE
                elif ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xbrli:
                if ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xhtml and \
                 ln == "html" or ln == "xhtml":
                type = Type.Unknown
                for i in range(len(rootNode.attributes)):
                    if rootNode.attributes.item(i).value == XbrlConst.ixbrl:
                        type = Type.INLINEXBRL
                        break
                XmlUtil.markIdAttributes(rootNode)  # required for minidom searchability
            elif ln == "report" and ns == XbrlConst.ver:
                type = Type.VERSIONINGREPORT
            elif ln == "testcases" or ln == "documentation":
                type = Type.TESTCASESINDEX
            elif ln == "testcase":
                type = Type.TESTCASE
            elif ln == "registry" and ns == XbrlConst.registry:
                type = Type.REGISTRY
            elif ln == "rss":
                type = Type.RSSFEED
            else:
                type = Type.Unknown
                nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml, ("html", "xhtml"))
                if nestedInline:
                    for i in range(len(nestedInline.attributes)):
                        if nestedInline.attributes.item(i).value == XbrlConst.ixbrl:
                            type = Type.INLINEXBRL
                            rootNode = nestedInline
                            break
                XmlUtil.markIdAttributes(rootNode)  # required for minidom searchability

            #create modelDocument object or subtype as identified
            if type == Type.VERSIONINGREPORT:
                from arelle.ModelVersReport import ModelVersReport
                modelDocument = ModelVersReport(modelXbrl, type, mappedUri, filepath, xmlDocument)
            elif type == Type.RSSFEED:
                from arelle.ModelRssObject import ModelRssObject 
                modelDocument = ModelRssObject(modelXbrl, type, mappedUri, filepath, xmlDocument)
            else:
                modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath, xmlDocument)
            modelDocument.xmlRootElement = rootNode
            modelDocument.schemaLocationElements.add(rootNode)

            if isEntry:
                modelDocument.inDTS = True
            
            # discovery (parsing)
            if type == Type.SCHEMA:
                modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
            elif type == Type.LINKBASE:
                modelDocument.linkbaseDiscover(rootNode)
            elif type == Type.INSTANCE:
                modelDocument.instanceDiscover(rootNode)
            elif type == Type.INLINEXBRL:
                modelDocument.inlineXbrlDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.TESTCASESINDEX:
                modelDocument.testcasesIndexDiscover(xmlDocument)
            elif type == Type.TESTCASE:
                modelDocument.testcaseDiscover(rootNode)
            elif type == Type.REGISTRY:
                modelDocument.registryDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.RSSFEED:
                modelDocument.rssFeedDiscover(rootNode)
            break
    return modelDocument
Exemplo n.º 7
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    
    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                    "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}:                
                    modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"),
                        modelObject=elt, name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"),
                        modelObject=elt, tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({"scheme", "periodType",     "balance", "contextRef", "unitRef", "precision", "decimals"} -
                                        {"fraction": {"contextRef", "unitRef"},
                                         "nonFraction": {"contextRef", "unitRef", "decimals", "precision"},
                                         "nonNumeric": {"contextRef"}}.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(ixMsgCode("inlineElementAttributes",elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"),
                        modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(ixMsgCode("attributeNotExpected",elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"),
                    modelObject=elt, attribute=attrTag, element=elt.localName)
                
    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^'):
                    nameFilter = ('*',)
                else:
                    nameFilter = names
                if nameFilter == ('*',):
                    namespaceFilter = namespacePrefix = '*'
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {"ancestor": XmlUtil.ancestor, 
                             "parent": XmlUtil.parent, 
                             "child-choice": XmlUtil.children, 
                             "child-sequence": XmlUtil.children,
                             "child-or-text": XmlUtil.children,
                             "descendant": XmlUtil.descendants}[rel](
                            elt, 
                            namespaceFilter,
                            nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False)
                issue = ''
                if reqt == '^':
                    if not any(r.localName in names and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [str(r.elementQname)
                                  for r in relations
                                  if not (r.tag in names or
                                          (r.localName in names and r.namespaceURI == elt.namespaceURI))]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                if ((reqt == '+' and not relations) or
                    (reqt == '-' and relations) or
                    (issue)):
                    code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], {
                           'ancestor': "ancestorNode",
                           'parent': "parentNode",
                           'child-choice': "childNodes",
                           'child-sequence': "childNodes",
                           'child-or-text': "childNodesOrText",
                           'descendant': "descendantNodes"}[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified"}.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element").format(
                                elt.localName,
                                {'+': "must", '-': "may not", '&': "may only",
                                 '?': "may", '+': "must"}[reqt],
                                {'ancestor': "be nested in",
                                 'parent': "have parent",
                                 'child-choice': "have child",
                                 'child-sequence': "have child",
                                 'child-or-text': "have child or text,",
                                 'descendant': "have as descendant"}[rel],
                                '' if rel == 'child-or-text' else
                                ', '.join(str(r.elementQname) for r in relations)
                                if names == ('*',) and relations else
                                ", ".join("{}:{}".format(namespacePrefix, n) for n in names),
                                issue)
                    modelXbrl.error(code, msg, 
                                    modelObject=[elt] + relations, requirement=reqt,
                                    messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                                                  "ix{ver.sect}:childNodesOrTextRequired",
                                                  "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                                                  "ix{ver.sect}:descendantNodesDisallowed",
                                                  "ix{ver.sect}:parentNodeRequired"))
                
    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in ('version', # used in inline test cases but not valid xhtml
                               '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[fromChild.namespaceURI]:
                        modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"),
                            modelObject=fromChild, element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"),
                                    modelObject=elt, attribute=attrTag, element=fromChild.localName)
                if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag, attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail    
                            
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate( ixToXhtml(elt) ):
            modelXbrl.error("html:syntaxError",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) 
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 8
0
def load(modelXbrl,
         uri,
         base=None,
         referringElement=None,
         isEntry=False,
         isDiscovered=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    if referringElement is None:  # used for error messages
        referringElement = modelXbrl
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.entryLoadingUrl = normalizedUri  # for error loggiong during loading
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            ("EFM.6.22.02", "GFM.1.1.3", "SBR.NL.2.1.0.06"
             if normalizedUri.startswith("http") else "SBR.NL.2.2.0.17"),
            _("Prohibited file for filings %(blockedIndicator)s: %(url)s"),
            modelObject=referringElement,
            url=normalizedUri,
            blockedIndicator=_(" blocked") if blocked else "")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if isEntry:
        modelXbrl.entryLoadingUrl = mappedUri  # for error loggiong during loading
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("FileNotLoadable",
                        _("File can not be loaded: %(fileName)s"),
                        modelObject=referringElement,
                        fileName=mappedUri)
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if (modelXbrl.modelManager.validateDisclosureSystem
                and modelXbrl.modelManager.disclosureSystem.validateFileText):
            file, _encoding = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file, _encoding = modelXbrl.fileSource.file(filepath)
        _parser, _parserLookupName, _parserLookupClass = parser(
            modelXbrl, filepath)
        xmlDocument = etree.parse(file, parser=_parser, base_url=filepath)
        file.close()
    except (EnvironmentError,
            KeyError) as err:  # missing zip file raises KeyError
        if file:
            file.close()
        # retry in case of well known schema locations
        if not isIncluded and namespace and namespace in XbrlConst.standardNamespaceSchemaLocations and uri != XbrlConst.standardNamespaceSchemaLocations[
                namespace]:
            return load(modelXbrl,
                        XbrlConst.standardNamespaceSchemaLocations[namespace],
                        base, referringElement, isEntry, isDiscovered,
                        isIncluded, namespace, reloadCache)
        modelXbrl.error("IOerror",
                        _("%(fileName)s: file error: %(error)s"),
                        modelObject=referringElement,
                        fileName=os.path.basename(uri),
                        error=str(err))
        return None
    except (
            etree.LxmlError, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        if file:
            file.close()
        if not isEntry and str(
                err) == "Start tag expected, '<' not found, line 1, column 1":
            return ModelDocument(modelXbrl, Type.UnknownNonXML, mappedUri,
                                 filepath, None)
        else:
            modelXbrl.error(
                "xmlSchema:syntax",
                _("%(error)s, %(fileName)s, %(sourceAction)s source element"),
                modelObject=referringElement,
                fileName=os.path.basename(uri),
                error=str(err),
                sourceAction=("including" if isIncluded else "importing"))
            return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    rootNode = xmlDocument.getroot()
    if rootNode is not None:
        ln = rootNode.localName
        ns = rootNode.namespaceURI

        # type classification
        if ns == XbrlConst.xsd and ln == "schema":
            type = Type.SCHEMA
        elif ns == XbrlConst.link:
            if ln == "linkbase":
                type = Type.LINKBASE
            elif ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xbrli:
            if ln == "xbrl":
                type = Type.INSTANCE
        elif ns == XbrlConst.xhtml and \
             (ln == "html" or ln == "xhtml"):
            type = Type.UnknownXML
            if XbrlConst.ixbrl in rootNode.nsmap.values():
                type = Type.INLINEXBRL
        elif ln == "report" and ns == XbrlConst.ver:
            type = Type.VERSIONINGREPORT
        elif ln == "testcases" or ln == "documentation":
            type = Type.TESTCASESINDEX
        elif ln == "testcase":
            type = Type.TESTCASE
        elif ln == "registry" and ns == XbrlConst.registry:
            type = Type.REGISTRY
        elif ln == "rss":
            type = Type.RSSFEED
        elif ln == "ptvl":
            type = Type.ARCSINFOSET
        elif ln == "facts":
            type = Type.FACTDIMSINFOSET
        else:
            type = Type.UnknownXML
            nestedInline = None
            for htmlElt in rootNode.iter(
                    tag="{http://www.w3.org/1999/xhtml}html"):
                nestedInline = htmlElt
                break
            if nestedInline is None:
                for htmlElt in rootNode.iter(
                        tag="{http://www.w3.org/1999/xhtml}xhtml"):
                    nestedInline = htmlElt
                    break
            if nestedInline is not None:
                if XbrlConst.ixbrl in nestedInline.nsmap.values():
                    type = Type.INLINEXBRL
                    rootNode = nestedInline

        #create modelDocument object or subtype as identified
        if type == Type.VERSIONINGREPORT:
            from arelle.ModelVersReport import ModelVersReport
            modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                            filepath, xmlDocument)
        elif type == Type.RSSFEED:
            from arelle.ModelRssObject import ModelRssObject
            modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                           filepath, xmlDocument)
        else:
            modelDocument = ModelDocument(modelXbrl, type, mappedUri, filepath,
                                          xmlDocument)
        rootNode.init(modelDocument)
        modelDocument.parser = _parser  # needed for XmlUtil addChild's makeelement
        modelDocument.parserLookupName = _parserLookupName
        modelDocument.parserLookupClass = _parserLookupClass
        modelDocument.xmlRootElement = rootNode
        modelDocument.schemaLocationElements.add(rootNode)
        modelDocument.documentEncoding = _encoding

        if isEntry or isDiscovered:
            modelDocument.inDTS = True

        # discovery (parsing)
        if type == Type.SCHEMA:
            modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
        elif type == Type.LINKBASE:
            modelDocument.linkbaseDiscover(rootNode)
        elif type == Type.INSTANCE:
            modelDocument.instanceDiscover(rootNode)
        elif type == Type.INLINEXBRL:
            modelDocument.inlineXbrlDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.TESTCASESINDEX:
            modelDocument.testcasesIndexDiscover(xmlDocument)
        elif type == Type.TESTCASE:
            modelDocument.testcaseDiscover(rootNode)
        elif type == Type.REGISTRY:
            modelDocument.registryDiscover(rootNode)
        elif type == Type.VERSIONINGREPORT:
            modelDocument.versioningReportDiscover(rootNode)
        elif type == Type.RSSFEED:
            modelDocument.rssFeedDiscover(rootNode)
    return modelDocument
Exemplo n.º 9
0
def load(modelXbrl,
         uri,
         base=None,
         isEntry=False,
         isIncluded=None,
         namespace=None,
         reloadCache=False):
    normalizedUri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        uri, base)
    if isEntry:
        modelXbrl.uri = normalizedUri
        modelXbrl.uriDir = os.path.dirname(normalizedUri)
        for i in range(modelXbrl.modelManager.disclosureSystem.
                       maxSubmissionSubdirectoryEntryNesting):
            modelXbrl.uriDir = os.path.dirname(modelXbrl.uriDir)
    if modelXbrl.modelManager.validateDisclosureSystem and \
       not normalizedUri.startswith(modelXbrl.uriDir) and \
       not modelXbrl.modelManager.disclosureSystem.hrefValid(normalizedUri):
        blocked = modelXbrl.modelManager.disclosureSystem.blockDisallowedReferences
        modelXbrl.error(
            "Prohibited file for filings{1}: {0}".format(
                normalizedUri,
                _(" blocked") if blocked else ""), "err", "EFM.6.22.02",
            "GFM.1.1.3", "SBR.NL.2.1.0.06")
        if blocked:
            return None
    if normalizedUri in modelXbrl.modelManager.disclosureSystem.mappedFiles:
        mappedUri = modelXbrl.modelManager.disclosureSystem.mappedFiles[
            normalizedUri]
    else:  # handle mapped paths
        mappedUri = normalizedUri
        for mapFrom, mapTo in modelXbrl.modelManager.disclosureSystem.mappedPaths:
            if normalizedUri.startswith(mapFrom):
                mappedUri = mapTo + normalizedUri[len(mapFrom):]
                break
    if modelXbrl.fileSource.isInArchive(mappedUri):
        filepath = mappedUri
    else:
        filepath = modelXbrl.modelManager.cntlr.webCache.getfilename(
            mappedUri, reload=reloadCache)
        if filepath:
            uri = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(filepath)
    if filepath is None:  # error such as HTTPerror is already logged
        modelXbrl.error("File can not be loaded: {0}".format(mappedUri), "err",
                        "FileNotLoadable")
        type = Type.Unknown
        return None

    modelDocument = modelXbrl.urlDocs.get(mappedUri)
    if modelDocument:
        return modelDocument

    # load XML and determine type of model document
    modelXbrl.modelManager.showStatus(_("parsing {0}").format(uri))
    file = None
    try:
        if modelXbrl.modelManager.validateDisclosureSystem:
            file = ValidateFilingText.checkfile(modelXbrl, filepath)
        else:
            file = modelXbrl.fileSource.file(filepath)
        xmlDocument = xml.dom.minidom.parse(file)
        file.close()
    except EnvironmentError as err:
        modelXbrl.error(
            "{0}: file error: {1}".format(os.path.basename(uri), err), "err",
            "IOerror")
        type = Type.Unknown
        if file:
            file.close()
        return None
    except (
            xml.parsers.expat.ExpatError, xml.dom.DOMException, ValueError
    ) as err:  # ValueError raised on bad format of qnames, xmlns'es, or parameters
        modelXbrl.error(
            "{0}: import error: {1}".format(os.path.basename(uri), err), "err",
            "XMLsyntax")
        type = Type.Unknown
        if file:
            file.close()
        return None

    # identify document
    #modelXbrl.modelManager.addToLog("discovery: {0}".format(
    #            os.path.basename(uri)))
    modelXbrl.modelManager.showStatus(_("loading {0}").format(uri))
    modelDocument = None

    for rootNode in xmlDocument.childNodes:
        if rootNode.nodeType == 1:  #element
            ln = rootNode.localName
            ns = rootNode.namespaceURI

            # type classification
            if ns == XbrlConst.xsd and ln == "schema":
                type = Type.SCHEMA
            elif ns == XbrlConst.link:
                if ln == "linkbase":
                    type = Type.LINKBASE
                elif ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xbrli:
                if ln == "xbrl":
                    type = Type.INSTANCE
            elif ns == XbrlConst.xhtml and \
                 ln == "html" or ln == "xhtml":
                type = Type.Unknown
                for i in range(len(rootNode.attributes)):
                    if rootNode.attributes.item(i).value == XbrlConst.ixbrl:
                        type = Type.INLINEXBRL
                        break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability
            elif ln == "report" and ns == XbrlConst.ver:
                type = Type.VERSIONINGREPORT
            elif ln == "testcases" or ln == "documentation":
                type = Type.TESTCASESINDEX
            elif ln == "testcase":
                type = Type.TESTCASE
            elif ln == "registry" and ns == XbrlConst.registry:
                type = Type.REGISTRY
            elif ln == "rss":
                type = Type.RSSFEED
            else:
                type = Type.Unknown
                nestedInline = XmlUtil.descendant(rootNode, XbrlConst.xhtml,
                                                  ("html", "xhtml"))
                if nestedInline:
                    for i in range(len(nestedInline.attributes)):
                        if nestedInline.attributes.item(
                                i).value == XbrlConst.ixbrl:
                            type = Type.INLINEXBRL
                            rootNode = nestedInline
                            break
                XmlUtil.markIdAttributes(
                    rootNode)  # required for minidom searchability

            #create modelDocument object or subtype as identified
            if type == Type.VERSIONINGREPORT:
                from arelle.ModelVersReport import ModelVersReport
                modelDocument = ModelVersReport(modelXbrl, type, mappedUri,
                                                filepath, xmlDocument)
            elif type == Type.RSSFEED:
                from arelle.ModelRssObject import ModelRssObject
                modelDocument = ModelRssObject(modelXbrl, type, mappedUri,
                                               filepath, xmlDocument)
            else:
                modelDocument = ModelDocument(modelXbrl, type, mappedUri,
                                              filepath, xmlDocument)
            modelDocument.xmlRootElement = rootNode
            modelDocument.schemaLocationElements.add(rootNode)

            if isEntry:
                modelDocument.inDTS = True

            # discovery (parsing)
            if type == Type.SCHEMA:
                modelDocument.schemaDiscover(rootNode, isIncluded, namespace)
            elif type == Type.LINKBASE:
                modelDocument.linkbaseDiscover(rootNode)
            elif type == Type.INSTANCE:
                modelDocument.instanceDiscover(rootNode)
            elif type == Type.INLINEXBRL:
                modelDocument.inlineXbrlDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.TESTCASESINDEX:
                modelDocument.testcasesIndexDiscover(xmlDocument)
            elif type == Type.TESTCASE:
                modelDocument.testcaseDiscover(rootNode)
            elif type == Type.REGISTRY:
                modelDocument.registryDiscover(rootNode)
            elif type == Type.VERSIONINGREPORT:
                modelDocument.versioningReportDiscover(rootNode)
            elif type == Type.RSSFEED:
                modelDocument.rssFeedDiscover(rootNode)
            break
    return modelDocument