Exemplo n.º 1
0
def validateFootnote(modelXbrl, footnote):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() # only check any graphics file reference once per footnote
    
    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate( footnoteHtml ):
            modelXbrl.error("EFM.6.05.34.dtdError",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors()))
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if ((attrTag == "href" and eltTag == "a") or 
                    (attrTag == "src" and eltTag == "img")):
                    if "javascript:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.activeContent",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag)
                    elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.externalReference",
                            _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag, value=attrValue)
                    if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                        if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                            modelXbrl.error("EFM.6.05.34.graphicFileType",
                                _("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                attribute=attrValue, element=eltTag)
                        else:   # test file contents
                            try:
                                if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]:
                                    modelXbrl.error("EFM.6.05.34.graphicFileContent",
                                        _("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                        modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                        attribute=attrValue, element=eltTag)
                            except IOError as err:
                                modelXbrl.error("EFM.6.05.34.graphicFileError",
                                    _("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                    attribute=attrValue, element=eltTag, error=err)
                        checkedGraphicsFiles.add(attrValue)
            if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                modelXbrl.error("EFM.6.05.34.nestedTable",
                    _("Footnote %(xlinkLabel)s has nested <table> elements."),
                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"))
    except (XMLSyntaxError,
            UnicodeDecodeError) as err:
        #if not err.endswith("undefined entity"):
        modelXbrl.error("EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors())
Exemplo n.º 2
0
def validateFootnote(modelXbrl, footnote):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() # only check any graphics file reference once per footnote
    
    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate( footnoteHtml ):
            modelXbrl.error("EFM.6.05.34.dtdError",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors()))
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if ((attrTag == "href" and eltTag == "a") or 
                    (attrTag == "src" and eltTag == "img")):
                    if "javascript:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.activeContent",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag)
                    elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.externalReference",
                            _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag, value=attrValue)
                    if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                        if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                            modelXbrl.error("EFM.6.05.34.graphicFileType",
                                _("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                attribute=attrValue, element=eltTag)
                        else:   # test file contents
                            try:
                                if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]:
                                    modelXbrl.error("EFM.6.05.34.graphicFileContent",
                                        _("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                        modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                        attribute=attrValue, element=eltTag)
                            except IOError as err:
                                modelXbrl.error("EFM.6.05.34.graphicFileError",
                                    _("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                    attribute=attrValue, element=eltTag, error=err)
                        checkedGraphicsFiles.add(attrValue)
            if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                modelXbrl.error("EFM.6.05.34.nestedTable",
                    _("Footnote %(xlinkLabel)s has nested <table> elements."),
                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"))
    except (XMLSyntaxError,
            UnicodeDecodeError) as err:
        #if not err.endswith("undefined entity"):
        modelXbrl.error("EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors())
Exemplo n.º 3
0
def validateFootnote(modelXbrl, footnote):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate(footnoteHtml):
            modelXbrl.error(
                "EFM.6.05.34",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote,
                xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=', '.join(
                    e.message
                    for e in edbodyDTD.error_log.filter_from_errors()))
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if ((attrTag == "href" and eltTag == "a")
                        or (attrTag == "src" and eltTag == "img")):
                    if "javascript:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"
                              ),
                            modelObject=footnote,
                            xlinkLabel=footnote.get(
                                "{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag)
                    elif attrValue.startswith(
                            "http://www.sec.gov/Archives/edgar/data/"
                    ) and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"
                              ),
                            modelObject=footnote,
                            xlinkLabel=footnote.get(
                                "{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag,
                            value=attrValue)
    except (XMLSyntaxError, UnicodeDecodeError) as err:
        #if not err.endswith("undefined entity"):
        modelXbrl.error(
            "EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote,
            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors())
Exemplo n.º 4
0
def validateFootnote(modelXbrl, footnote):
    # handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate(footnoteHtml):
            modelXbrl.error(
                "EFM.6.05.34",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote,
                xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=", ".join(e.message for e in edbodyDTD.error_log.filter_from_errors()),
            )
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
                    if "javascript:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
                            modelObject=footnote,
                            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag,
                        )
                    elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _(
                                "Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"
                            ),
                            modelObject=footnote,
                            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag,
                            value=attrValue,
                        )
    except (XMLSyntaxError, UnicodeDecodeError) as err:
        # if not err.endswith("undefined entity"):
        modelXbrl.error(
            "EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote,
            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors(),
        )
Exemplo n.º 5
0
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() #  only check any graphics file reference once per fact
    
    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate( textblockXml ):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") 
                                        for e in errors)
                        modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, 
                            error=', '.join(e.message for e in errors))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if ((attrTag == "href" and eltTag == "a") or 
                                (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                                        modelXbrl.error("EFM.6.05.16.graphicFileType",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue, element=eltTag)
                                    else:   # test file contents
                                        try:
                                            if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]:
                                                modelXbrl.error("EFM.6.05.16.graphicFileContent",
                                                    _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                                    modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                    attribute=attrValue, element=eltTag)
                                        except IOError as err:
                                            modelXbrl.error("EFM.6.05.16.graphicFileError",
                                                _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                                modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                attribute=attrValue, element=eltTag, error=err)
                                    checkedGraphicsFiles.add(attrValue)
                        if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                            modelXbrl.error("EFM.6.05.16.nestedTable",
                                _("Fact %(fact)s of context %(contextID)s has nested <table> elements."),
                                modelObject=f1, fact=f1.qname, contextID=f1.contextID)
                except (XMLSyntaxError,
                        UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                    
                checkedGraphicsFiles.clear()
Exemplo n.º 6
0
def validateTextBlockFacts(modelXbrl):
    # handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and concept is not None and concept.isTextBlock and XMLpattern.match(f1.value):
            # handler.fact = f1
            # test encoded entity tags
            for match in entityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(
                        ("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        entity=entity,
                    )
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                """
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                """
                try:
                    textblockXml = XML("<body>\n{0}\n</body>\n".format(removeEntities(xmltext)))
                    if not edbodyDTD.validate(textblockXml):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors)
                        modelXbrl.error(
                            "EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1,
                            fact=f1.qname,
                            contextID=f1.contextID,
                            error=", ".join(e.message for e in errors),
                        )
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
                                if "javascript:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _(
                                            "Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>"
                                        ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag,
                                    )
                                elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _(
                                            "Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>"
                                        ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag,
                                    )
                except (XMLSyntaxError, UnicodeDecodeError) as err:
                    # if not err.endswith("undefined entity"):
                    modelXbrl.error(
                        ("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        error=err,
                    )
Exemplo n.º 7
0
def createTargetInstance(modelXbrl,
                         targetUrl,
                         targetDocumentSchemaRefs,
                         filingFiles,
                         baseXmlLang=None,
                         defaultXmlLang=None):
    def addLocallyReferencedFile(elt, filingFiles):
        if elt.tag in ("a", "img"):
            for attrTag, attrValue in elt.items():
                if attrTag in ("href", "src") and not isHttpUrl(
                        attrValue) and not os.path.isabs(attrValue):
                    attrValue = attrValue.partition('#')[0]  # remove anchor
                    if attrValue:  # ignore anchor references to base document
                        attrValue = os.path.normpath(
                            attrValue
                        )  # change url path separators to host separators
                        file = os.path.join(sourceDir, attrValue)
                        if modelXbrl.fileSource.isInArchive(
                                file,
                                checkExistence=True) or os.path.exists(file):
                            filingFiles.add(file)

    targetInstance = ModelXbrl.create(
        modelXbrl.modelManager,
        newDocumentType=Type.INSTANCE,
        url=targetUrl,
        schemaRefs=targetDocumentSchemaRefs,
        isEntry=True,
        discover=False)  # don't attempt to load DTS
    ixTargetRootElt = modelXbrl.ixTargetRootElements[getattr(
        modelXbrl, "ixdsTarget", None)]
    langIsSet = False
    # copy ix resources target root attributes
    for attrName, attrValue in ixTargetRootElt.items():
        if attrName != "target":  # ix:references target is not mapped to xbrli:xbrl
            targetInstance.modelDocument.xmlRootElement.set(
                attrName, attrValue)
        if attrName == "{http://www.w3.org/XML/1998/namespace}lang":
            langIsSet = True
            defaultXmlLang = attrValue
        if attrName.startswith("{"):
            ns, _sep, ln = attrName[1:].rpartition("}")
            if ns:
                prefix = xmlnsprefix(ixTargetRootElt, ns)
                if prefix not in (None, "xml"):
                    setXmlns(targetInstance.modelDocument, prefix, ns)

    if not langIsSet and baseXmlLang:
        targetInstance.modelDocument.xmlRootElement.set(
            "{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
        if defaultXmlLang is None:
            defaultXmlLang = baseXmlLang  # allows facts/footnotes to override baseXmlLang
    ValidateXbrlDimensions.loadDimensionDefaults(
        targetInstance)  # need dimension defaults
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement,
                     roleRefElt.qname,
                     attributes=roleRefElt.items())

    # contexts
    for context in sorted(modelXbrl.contexts.values(),
                          key=lambda c: c.objectIndex
                          ):  # contexts may come from multiple IXDS files
        ignore = targetInstance.createContext(
            context.entityIdentifier[0],
            context.entityIdentifier[1],
            'instant' if context.isInstantPeriod else
            'duration' if context.isStartEndPeriod else 'forever',
            context.startDatetime,
            context.endDatetime,
            None,
            context.qnameDims, [], [],
            id=context.id)
    for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex
                       ):  # units may come from multiple IXDS files
        measures = unit.measures
        ignore = targetInstance.createUnit(measures[0],
                                           measures[1],
                                           id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}

    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem:  # HF does not de-duplicate, which is currently-desired behavior
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                for attrName, attrValue in fact.items():
                    if attrName.startswith("{"):
                        attrs[qname(
                            attrName, fact.nsmap
                        )] = attrValue  # using qname allows setting prefix in extracted instance
                newFact = targetInstance.createFact(fact.qname,
                                                    attributes=attrs,
                                                    text=text,
                                                    parent=parent)
                # if fact.isFraction, create numerator and denominator
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references so that referenced files are included in the zip.
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(
                                    xmltext)).iter():
                                addLocallyReferencedFile(elt, filingFiles)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass  # TODO: Why ignore UnicodeDecodeError?
            elif fact.isTuple:
                attrs = {}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                for attrName, attrValue in fact.items():
                    if attrName.startswith("{"):
                        attrs[qname(attrName, fact.nsmap)] = attrValue
                newTuple = targetInstance.createFact(fact.qname,
                                                     attributes=attrs,
                                                     parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)

    createFacts(modelXbrl.facts, None)
    modelXbrl.modelManager.showStatus(
        _("Creating and validating footnotes and relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    footnoteIdCount = {}
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
                arcrole != "XBRL-footnotes" and any(
                    lP.modelDocument.type == Type.INLINEXBRL
                    for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement,
                               linkPrototype.qname,
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID,
                                                     0) + 1
                    if idUseCount > 1:  # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(
                            attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink,
                                        linkChild.qname,
                                        attributes=attributes)
                    xmlLang = linkChild.xmlLang
                    if xmlLang is not None and xmlLang != defaultXmlLang:  # default
                        newChild.set(
                            "{http://www.w3.org/XML/1998/namespace}lang",
                            xmlLang)
                    copyIxFootnoteHtml(
                        linkChild,
                        newChild,
                        targetModelDocument=targetInstance.modelDocument,
                        withText=True)

                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            addLocallyReferencedFile(elt, filingFiles)
    return targetInstance
Exemplo n.º 8
0
def saveTargetDocument(modelXbrl,
                       targetDocumentFilename,
                       targetDocumentSchemaRefs,
                       outputZip=None,
                       filingFiles=None,
                       *args,
                       **kwargs):
    targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        targetDocumentFilename, modelXbrl.modelDocument.filepath)
    targetUrlParts = targetUrl.rpartition(".")
    targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2]
    modelXbrl.modelManager.showStatus(
        _("Extracting instance ") + os.path.basename(targetUrl))
    targetInstance = ModelXbrl.create(modelXbrl.modelManager,
                                      newDocumentType=Type.INSTANCE,
                                      url=targetUrl,
                                      schemaRefs=targetDocumentSchemaRefs,
                                      isEntry=True)
    ValidateXbrlDimensions.loadDimensionDefaults(
        targetInstance)  # need dimension defaults
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement,
                     roleRefElt.qname,
                     attributes=roleRefElt.items())

    # contexts
    for context in modelXbrl.contexts.values():
        newCntx = targetInstance.createContext(
            context.entityIdentifier[0],
            context.entityIdentifier[1],
            'instant' if context.isInstantPeriod else
            'duration' if context.isStartEndPeriod else 'forever',
            context.startDatetime,
            context.endDatetime,
            None,
            context.qnameDims, [], [],
            id=context.id)
    for unit in modelXbrl.units.values():
        measures = unit.measures
        newUnit = targetInstance.createUnit(measures[0],
                                            measures[1],
                                            id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}

    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem:
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                newFact = targetInstance.createFact(fact.qname,
                                                    attributes=attrs,
                                                    text=text,
                                                    parent=parent)
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML(
                                    "<body>\n{0}\n</body>\n".format(xmltext)):
                                if elt.tag in ("a", "img") and not isHttpUrl(
                                        attrValue) and not os.path.isabs(
                                            attrvalue):
                                    for attrTag, attrValue in elt.items():
                                        if attrTag in ("href", "src"):
                                            filingFiles.add(attrValue)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)

    createFacts(modelXbrl.facts, None)
    # footnote links
    footnoteIdCount = {}
    modelXbrl.modelManager.showStatus(
        _("Creating and validating footnotes & relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
                arcrole != "XBRL-footnotes" and any(
                    lP.modelDocument.type == Type.INLINEXBRL
                    for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement,
                               linkPrototype.qname,
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID,
                                                     0) + 1
                    if idUseCount > 1:  # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(
                            attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink,
                                        linkChild.qname,
                                        attributes=attributes)
                    copyIxFootnoteHtml(
                        linkChild,
                        newChild,
                        targetModelDocument=targetInstance.modelDocument,
                        withText=True)
                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            if elt.tag in ("a", "img"):
                                for attrTag, attrValue in elt.items():
                                    if attrTag in (
                                            "href", "src") and not isHttpUrl(
                                                attrValue
                                            ) and not os.path.isabs(attrvalue):
                                        filingFiles.add(attrValue)

    targetInstance.saveInstance(overrideFilepath=targetUrl,
                                outputZip=outputZip)
    if getattr(modelXbrl, "isTestcaseVariation", False):
        modelXbrl.extractedInlineInstance = True  # for validation comparison
    modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
Exemplo n.º 9
0
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() #  only check any graphics file reference once per fact
    allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
    
    if isInlineDTD:
        htmlBodyTemplate = "<body><div>\n{0}\n</div></body>\n"
    else:
        htmlBodyTemplate = "<body>\n{0}\n</body>\n"
    _xhtmlNs = "{{{}}}".format(xhtml)
    _xhtmlNsLen = len(_xhtmlNs)
    
    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = htmlBodyTemplate.format(removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate( textblockXml ):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") 
                                        for e in errors)
                        modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, 
                            error=', '.join(e.message for e in errors),
                            messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14"))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        if isinstance(elt, ModelObject) and elt.namespaceURI == xhtml:
                            eltTag = elt.localName
                        elif isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
                            continue # comment or other non-parsed element
                        else:
                            eltTag = elt.tag
                            if eltTag.startswith(_xhtmlNs):
                                eltTag = eltTag[_xhtmlNsLen:]
                        if isInlineDTD and eltTag in efmBlockedInlineHtmlElements:
                            modelXbrl.error("EFM.5.02.05.disallowedElement",
                                _("%(validatedObjectLabel)s has disallowed element <%(element)s>"),
                                modelObject=elt, validatedObjectLabel=f1.qname,
                                element=eltTag)
                        for attrTag, attrValue in elt.items():
                            if isInlineDTD:
                                if attrTag in efmBlockedInlineHtmlElementAttributes.get(eltTag,()):
                                    modelXbrl.error("EFM.5.02.05.disallowedAttribute",
                                        _("%(validatedObjectLabel)s has disallowed attribute on element <%(element)s>: %(attribute)s=\"%(value)s\""),
                                        modelObject=elt, validatedObjectLabel=validatedObjectLabel,
                                        element=eltTag, attribute=attrTag, value=attrValue)
                            if ((attrTag == "href" and eltTag == "a") or 
                                (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                elif eltTag == "a" and (not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)):
                                    pass
                                elif scheme(attrValue) in ("http", "https", "ftp"):
                                    modelXbrl.error("EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if scheme(attrValue)  == "data":
                                        modelXbrl.error("EFM.6.05.16.graphicDataUrl",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue[:32], element=eltTag)
                                    elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                                        modelXbrl.error("EFM.6.05.16.graphicFileType",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue, element=eltTag)
                                    else:   # test file contents
                                        try:
                                            if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]:
                                                modelXbrl.error("EFM.6.05.16.graphicFileContent",
                                                    _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                                    modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                    attribute=attrValue, element=eltTag)
                                        except IOError as err:
                                            modelXbrl.error("EFM.6.05.16.graphicFileError",
                                                _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                                modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                attribute=attrValue, element=eltTag, error=err)
                                    checkedGraphicsFiles.add(attrValue)
                        if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                            modelXbrl.error("EFM.6.05.16.nestedTable",
                                _("Fact %(fact)s of context %(contextID)s has nested <table> elements."),
                                modelObject=f1, fact=f1.qname, contextID=f1.contextID)
                except (XMLSyntaxError,
                        UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                    
                checkedGraphicsFiles.clear()
Exemplo n.º 10
0
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None):
    targetInstance = ModelXbrl.create(modelXbrl.modelManager,
                                      newDocumentType=Type.INSTANCE,
                                      url=targetUrl,
                                      schemaRefs=targetDocumentSchemaRefs,
                                      isEntry=True,
                                      discover=False) # don't attempt to load DTS
    if baseXmlLang:
        targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
        if defaultXmlLang is None:
            defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang
    ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults 
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname,
                     attributes=roleRefElt.items())
    
    # contexts
    for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex): # contexts may come from multiple IXDS files
        ignore = targetInstance.createContext(context.entityIdentifier[0],
                                               context.entityIdentifier[1],
                                               'instant' if context.isInstantPeriod else
                                               'duration' if context.isStartEndPeriod
                                               else 'forever',
                                               context.startDatetime,
                                               context.endDatetime,
                                               None,
                                               context.qnameDims, [], [],
                                               id=context.id)
    for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex): # units may come from multiple IXDS files
        measures = unit.measures
        ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}
    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                    if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default
                        xmlLang = fact.xmlLang
                        if xmlLang is not None and xmlLang != defaultXmlLang:
                            attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang
                newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
                # if fact.isFraction, create numerator and denominator
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references so that referenced files are included in the zip.
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter():
                                addLocallyReferencedFile(elt, filingFiles)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass  # TODO: Why ignore UnicodeDecodeError?
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)
                
    createFacts(modelXbrl.facts, None)
    modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    footnoteIdCount = {}
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
            arcrole != "XBRL-footnotes" and
            any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement, 
                               linkPrototype.qname, 
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, 
                             attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1
                    if idUseCount > 1: # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink, linkChild.qname, 
                                        attributes=attributes)
                    xmlLang = linkChild.xmlLang
                    if xmlLang is not None and xmlLang != defaultXmlLang: # default
                        newChild.set("{http://www.w3.org/XML/1998/namespace}lang", xmlLang)
                    copyIxFootnoteHtml(linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True)

                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            addLocallyReferencedFile(elt,filingFiles)
    return targetInstance
Exemplo n.º 11
0
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() #  only check any graphics file reference once per fact
    allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
    
    if isInlineDTD:
        htmlBodyTemplate = "<body><div>\n{0}\n</div></body>\n"
    else:
        htmlBodyTemplate = "<body>\n{0}\n</body>\n"
    _xhtmlNs = "{{{}}}".format(xhtml)
    _xhtmlNsLen = len(_xhtmlNs)
    
    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = htmlBodyTemplate.format(removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate( textblockXml ):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") 
                                        for e in errors)
                        modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, 
                            error=', '.join(e.message for e in errors),
                            messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14"))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        if isinstance(elt, ModelObject) and elt.namespaceURI == xhtml:
                            eltTag = elt.localName
                        elif isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
                            continue # comment or other non-parsed element
                        else:
                            eltTag = elt.tag
                            if eltTag.startswith(_xhtmlNs):
                                eltTag = eltTag[_xhtmlNsLen:]
                        if isInlineDTD and eltTag in efmBlockedInlineHtmlElements:
                            modelXbrl.error("EFM.5.02.05.disallowedElement",
                                _("%(validatedObjectLabel)s has disallowed element <%(element)s>"),
                                modelObject=elt, validatedObjectLabel=f1.qname,
                                element=eltTag)
                        for attrTag, attrValue in elt.items():
                            if isInlineDTD:
                                if attrTag in efmBlockedInlineHtmlElementAttributes.get(eltTag,()):
                                    modelXbrl.error("EFM.5.02.05.disallowedAttribute",
                                        _("%(validatedObjectLabel)s has disallowed attribute on element <%(element)s>: %(attribute)s=\"%(value)s\""),
                                        modelObject=elt, validatedObjectLabel=validatedObjectLabel,
                                        element=eltTag, attribute=attrTag, value=attrValue)
                            if ((attrTag == "href" and eltTag == "a") or 
                                (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                elif eltTag == "a" and (not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)):
                                    pass
                                elif scheme(attrValue) in ("http", "https", "ftp"):
                                    modelXbrl.error("EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if scheme(attrValue)  == "data":
                                        modelXbrl.error("EFM.6.05.16.graphicDataUrl",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue[:32], element=eltTag)
                                    elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                                        modelXbrl.error("EFM.6.05.16.graphicFileType",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue, element=eltTag)
                                    else:   # test file contents
                                        try:
                                            if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]:
                                                modelXbrl.error("EFM.6.05.16.graphicFileContent",
                                                    _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                                    modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                    attribute=attrValue, element=eltTag)
                                        except IOError as err:
                                            modelXbrl.error("EFM.6.05.16.graphicFileError",
                                                _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                                modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                attribute=attrValue, element=eltTag, error=err)
                                    checkedGraphicsFiles.add(attrValue)
                        if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                            modelXbrl.error("EFM.6.05.16.nestedTable",
                                _("Fact %(fact)s of context %(contextID)s has nested <table> elements."),
                                modelObject=f1, fact=f1.qname, contextID=f1.contextID)
                except (XMLSyntaxError,
                        UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                    
                checkedGraphicsFiles.clear()
Exemplo n.º 12
0
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None):
    targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(targetDocumentFilename, modelXbrl.modelDocument.filepath)
    targetUrlParts = targetUrl.rpartition(".")
    targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2]
    modelXbrl.modelManager.showStatus(_("Extracting instance ") + os.path.basename(targetUrl))
    targetInstance = ModelXbrl.create(modelXbrl.modelManager, 
                                      newDocumentType=Type.INSTANCE,
                                      url=targetUrl,
                                      schemaRefs=targetDocumentSchemaRefs,
                                      isEntry=True)
    ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults 
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, 
                     attributes=roleRefElt.items())
    
    # contexts
    for context in modelXbrl.contexts.values():
        newCntx = targetInstance.createContext(context.entityIdentifier[0],
                                               context.entityIdentifier[1],
                                               'instant' if context.isInstantPeriod else
                                               'duration' if context.isStartEndPeriod
                                               else 'forever',
                                               context.startDatetime,
                                               context.endDatetime,
                                               None, 
                                               context.qnameDims, [], [],
                                               id=context.id)
    for unit in modelXbrl.units.values():
        measures = unit.measures
        newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}
    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem:
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)):
                                if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
                                    for attrTag, attrValue in elt.items():
                                        if attrTag in ("href", "src"):
                                            filingFiles.add(attrValue)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)
                
    createFacts(modelXbrl.facts, None)
    # footnote links
    footnoteIdCount = {}
    modelXbrl.modelManager.showStatus(_("Creating and validating footnotes & relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and # fully specified roles
            arcrole != "XBRL-footnotes" and
            any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement, 
                               linkPrototype.qname, 
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, 
                             attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1
                    if idUseCount > 1: # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink, linkChild.qname, 
                                        attributes=attributes)
                    copyIxFootnoteHtml(linkChild, newChild, withText=True)
                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            if elt.tag in ("a", "img"):
                                for attrTag, attrValue in elt.items():
                                    if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
                                        filingFiles.add(attrValue)
        
    targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip)
    modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
class CoverletterCreator(QtWidgets.QMainWindow, mainWindow.Ui_MainWindow):
    def __init__(self, parent=None):
        super(CoverletterCreator, self).__init__(parent)
        self.setupUi(self)

        self.mainTitle = "Coverletter Creator"
        self.config = QSettings()
        self.settings = SettingsHandler(parent=self, settings=self.config)

        self.clipboard = QtWidgets.QApplication.clipboard()

        self.actionNew.triggered.connect(self.new_project)
        self.actionSave.triggered.connect(self.save_project)
        self.actionSave_As.triggered.connect(self.saveas_project)
        self.actionOpen.triggered.connect(self.open_project)
        self.actionExit.triggered.connect(self.close)
        self.actionSettings.triggered.connect(self.settings.show)

        # Set default values
        self.filename = "Examples/example_project.xml"
        self.file_dirty = False

        self.readSettings()
        self.load_file(self.filename)

        self.pb_browsePhoto.clicked.connect(self.browse_photo)
        self.pb_generatePdf.clicked.connect(self.generate_pdf)
        self.pb_generateText.clicked.connect(self.generate_text)

        self.connect_all_fields()
        self.connect_mandatory_fields()

        # Connect all labels to click handler
        for child in self.centralwidget.findChildren(QtWidgets.QLabel):
            child.mousePressEvent = functools.partial(self.label_clicked,
                                                      source=child)
        for child in self.centralwidget.findChildren(QtWidgets.QCheckBox):
            child.mousePressEvent = functools.partial(self.checkbox_clicked,
                                                      source=child)
        self.RECEIPIENTGENDER.mousePressEvent = functools.partial(
            self.combobox_clicked, source=self.RECEIPIENTGENDER)
        self.RECEIPIENTSALUTATION.mousePressEvent = functools.partial(
            self.combobox_clicked, source=self.RECEIPIENTSALUTATION)

        self.COMPANYNAME.editingFinished.connect(
            lambda: self.COMPANYSHORTNAME.setText(self.COMPANYNAME.text()))

    def connect_all_fields(self):
        for child in self.centralwidget.findChildren(QtWidgets.QLineEdit):
            child.textChanged.connect(self.setWindowTitleUnsaved)
        for child in self.centralwidget.findChildren(QtWidgets.QPlainTextEdit):
            child.textChanged.connect(self.setWindowTitleUnsaved)
        for child in self.centralwidget.findChildren(QtWidgets.QCheckBox):
            child.clicked.connect(self.setWindowTitleUnsaved)
        for child in self.centralwidget.findChildren(SpellTextEdit):
            child.textChanged.connect(self.setWindowTitleUnsaved)
        for child in self.centralwidget.findChildren(QtWidgets.QComboBox):
            child.currentIndexChanged.connect(self.setWindowTitleUnsaved)

    def connect_mandatory_fields(self):
        mandatory_fields_list = [
            self.FIRSTNAME, self.LASTNAME, self.MOBILE, self.EMAIL,
            self.COMPANYNAME
        ]
        for textBox in mandatory_fields_list:
            textBox.textChanged[str].connect(
                lambda: self.pb_generatePdf.setEnabled(textBox.text() != ""))

        for textBox in mandatory_fields_list:
            textBox.textChanged[str].connect(
                lambda: self.pb_generateText.setEnabled(textBox.text() != ""))

    def label_clicked(self, event, source):
        var_code = source.accessibleName()
        self.clipboard.setText(str(var_code))
        event.accept()

    def checkbox_clicked(self, event, source):
        var_code = source.accessibleName()
        self.clipboard.setText(str(var_code))
        source.toggle()

    def combobox_clicked(self, event, source):
        var_code = source.accessibleName()
        self.clipboard.setText(str(var_code))
        source.showPopup()

    def setWindowTitleUnsaved(self):
        self.file_dirty = True
        _, fname = os.path.split(self.filename)
        self.setWindowTitle(self.mainTitle + " - " + fname + "*")

    def setWindowTitleSaved(self):
        self.file_dirty = False
        _, fname = os.path.split(self.filename)
        self.setWindowTitle(self.mainTitle + " - " + fname)

    def new_project(self):
        filename, _ = QFileDialog.getSaveFileName(self, "New Project", "./",
                                                  "XML Files (*.xml)")
        if filename:
            if ".xml" not in filename:
                filename = filename + '.xml'

            self.reset_all_fields()
            self.filename = filename
            self.setWindowTitleUnsaved()
        else:
            return

    def reset_all_fields(self):
        for child in self.centralwidget.findChildren(QtWidgets.QLineEdit):
            child.clear()
        for child in self.centralwidget.findChildren(QtWidgets.QPlainTextEdit):
            child.clear()
        for child in self.centralwidget.findChildren(QtWidgets.QCheckBox):
            child.setChecked(False)
        for child in self.centralwidget.findChildren(SpellTextEdit):
            child.clear()
        for child in self.centralwidget.findChildren(SpellTextEdit):
            child.clear()
        self.label_pic.clear()

    def save_project(self):
        try:
            open(self.filename, 'w')
        except OSError:
            filename, _ = QFileDialog.getSaveFileName(self, "Save Project",
                                                      "./",
                                                      "XML Files (*.xml)")
            if filename:
                self.filename = filename
            else:
                return
        self.root = self.generate_root()

        if ".xml" not in self.filename:
            self.filename = self.filename + '.xml'
        with open(self.filename, 'wb') as f:
            f.write(tostring(self.root, pretty_print=True))

        self.setWindowTitleSaved()

    def generate_root(self):
        root = Element('root')

        personal_info = Element('personal_info')
        root.append(personal_info)
        for qW in [
                self.FIRSTNAME, self.LASTNAME, self.MOBILE, self.EMAIL,
                self.HOMEPAGE, self.GITHUBNAME, self.LINKEDINNAME
        ]:
            child = Element(qW.objectName())
            child.text = qW.text()
            personal_info.append(child)

        personal_address = Element('PERSONALADDRESS')
        personal_address.text = self.PERSONALADDRESS.toPlainText()
        personal_info.append(personal_address)

        company_info = Element('company_info')
        root.append(company_info)
        for qW in [
                self.COMPANYNAME, self.COMPANYSHORTNAME, self.DEPARTMENT,
                self.LETTERTITLE, self.JOBTITLE, self.JOBREFID,
                self.RECEIPIENTNAME
        ]:
            child = Element(qW.objectName())
            child.text = qW.text()
            company_info.append(child)

        company_address = Element('COMPANYADDRESS')
        company_address.text = self.COMPANYADDRESS.toPlainText()
        company_info.append(company_address)

        RECEIPIENTGENDER = Element('RECEIPIENTGENDER')
        RECEIPIENTGENDER.text = str(self.RECEIPIENTGENDER.currentText())
        company_info.append(RECEIPIENTGENDER)

        RECEIPIENTSALUTATION = Element('RECEIPIENTSALUTATION')
        RECEIPIENTSALUTATION.text = str(
            self.RECEIPIENTSALUTATION.currentText())
        company_info.append(RECEIPIENTSALUTATION)

        about_me = Element('TEXTABOUTME')
        about_me.text = self.TEXTABOUTME.toPlainText()
        root.append(about_me)

        WhyFirm = Element('TEXTWHYTHISFIRM')
        WhyFirm.text = self.TEXTWHYTHISFIRM.toPlainText()
        root.append(WhyFirm)

        whyYou = Element('TEXTWHYYOU')
        whyYou.text = self.TEXTWHYYOU.toPlainText()
        root.append(whyYou)

        misc = Element('misc')
        root.append(misc)
        for qW in [
                self.CLOSINGSALUTATION, self.ENCLOSINGPREFIX, self.PHOTOPATH
        ]:
            child = Element(qW.objectName())
            child.text = qW.text()
            misc.append(child)
        for qW in [
                self.CERTIFICATESATTACHED,
                self.CVATTACHED,
                self.REFLETTERSATTACHED,
                self.TRANSCRIPTSATTACHED,
        ]:
            child = Element(qW.objectName())
            child.text = str(qW.isChecked())
            misc.append(child)

        return root

    def saveas_project(self):
        filename, _ = QFileDialog.getSaveFileName(self, "Save Project As",
                                                  "./", "XML Files (*.xml)")

        if filename:
            if ".xml" not in filename:
                filename = filename + '.xml'
            with open(filename, 'wb') as f:
                f.write(tostring(self.generate_root(), pretty_print=True))
            self.load_file(filename)

    def open_project(self):
        filename, _ = QFileDialog.getOpenFileName(self, "Open Project", "./",
                                                  "XML Files (*.xml)")
        if not filename:
            return

        if ".xml" not in filename:
            filename = filename + '.xml'

        self.load_file(filename)

    def load_file(self, filename):
        try:
            with open(filename, 'r') as f:
                self.root = XML(f.read())  #.replace("\n", ""))
            self.reset_all_fields()
            for element in self.root.iter():
                widget = self.findChild(QtWidgets.QLineEdit, str(element.tag))
                if widget is not None and element.text is not None:
                    widget.setText(str(element.text))
                else:
                    widget = self.findChild(QtWidgets.QPlainTextEdit,
                                            str(element.tag))
                    if widget is not None and element.text is not None:
                        widget.setPlainText(str(element.text))
                    else:
                        widget = self.findChild(QtWidgets.QComboBox,
                                                str(element.tag))
                        if widget is not None and element.text is not None:
                            index = widget.findText(element.text,
                                                    QtCore.Qt.MatchFixedString)
                            if index >= 0:
                                widget.setCurrentIndex(index)
                            elif str(element.text).isdigit():
                                widget.setCurrentIndex(int(element.text))
                            else:
                                widget.setCurrentText(str(element.text))
                        else:
                            widget = self.findChild(QtWidgets.QCheckBox,
                                                    str(element.tag))
                            if widget is not None and element.text is not None:
                                widget.setChecked(str(element.text) == 'True')
                            else:
                                widget = self.findChild(
                                    SpellTextEdit, str(element.tag))
                                if widget is not None and element.text is not None:
                                    widget.setChecked(
                                        str(element.text) == 'True')

            self.filename = filename
            self.get_photo(self.PHOTOPATH.text())
            self.setWindowTitleSaved()

        except FileNotFoundError:
            # Warning: File not found!
            self.filename = "untitled.xml"
            self.setWindowTitleUnsaved()
            self.file_dirty = False

        except XMLSyntaxError:
            QtWidgets.QMessageBox.critical(
                self, "XML Read Failed",
                "Cannot read xml file %s. \n\nMake sure the xml file is not blank "
                % filename)

    def browse_photo(self):
        fname, _ = QFileDialog.getOpenFileName(self, 'Open profile photo',
                                               './',
                                               "Image files (*.jpg *.png)")
        if fname:
            self.get_photo(fname)

    def get_photo(self, fname):
        image = QtGui.QImage(fname)
        if image.isNull():
            QtWidgets.QMessageBox.information(self, "Image Viewer",
                                              "Cannot load %s." % fname)
            return
        self.PHOTOPATH.setText(fname)
        self.label_pic.setPixmap(
            QtGui.QPixmap(fname).scaled(160, 160, QtCore.Qt.KeepAspectRatio,
                                        QtCore.Qt.FastTransformation))

    def generate_pdf(self):
        pdfcreator = PdfCreator(data=self.generate_root(), parent=self)
        pdfcreator.read_template(template=self.settings.latex_template)
        pdfcreator.convert_to_dict()
        pdfcreator.render_template()
        filename = self.COMPANYSHORTNAME.text() + '_' + self.JOBREFID.text(
        ) + '_Coverletter'
        filename = "".join(i for i in filename
                           if i not in ".\/:*?<>|").replace(r' ', '_')

        self.pb_generatePdf.setEnabled(False)
        try:
            pdfcreator.compile_xelatex(
                compiler=self.settings.get_latex_compiler(),
                pdfname=filename + ".pdf",
                outputDir=self.settings.latex_dir,
                open_pdf=self.settings.open_pdf,
                keep_tex=self.settings.keep_tex)
        except FileNotFoundError as e:
            QtWidgets.QMessageBox.critical(
                self, "PDF Compilation Failed: " + str(e),
                "Cannot complete command {}.".format(
                    self.settings.get_latex_compiler()))
        self.pb_generatePdf.setEnabled(True)

    def generate_text(self):
        textcreator = TextCreator(data=self.generate_root())
        try:
            textcreator.read_template(template=self.settings.text_template)
        except FileNotFoundError as e:
            QtWidgets.QMessageBox.critical(
                self, "Error: " + repr(e),
                "Cannot find template file {}.\n".format(
                    self.settings.text_template))
        textcreator.convert_to_dict()
        textcreator.render_template()
        filename = self.COMPANYSHORTNAME.text() + '_' + self.JOBREFID.text(
        ) + '_Coverletter'
        filename = "".join(i for i in filename
                           if i not in ".\/:    *?<>|").replace(r' ', '_')
        self.pb_generateText.setEnabled(False)
        textcreator.compile_text(textname=filename + ".txt",
                                 outputDir=self.settings.text_dir,
                                 open_text=self.settings.open_text)
        self.pb_generateText.setEnabled(True)

    def writeSettings(self):
        self.config.beginGroup("MainWindow")
        self.config.setValue("size", self.size())
        self.config.setValue("pos", self.pos())
        self.config.endGroup()

        if not self.file_dirty:
            self.config.beginGroup("Project")
            self.config.setValue("filename", str(self.filename))
            self.config.endGroup()

        self.config.sync()

    def readSettings(self):
        self.config.beginGroup("MainWindow")
        self.resize(self.config.value("size", QtCore.QSize(616, 466)))
        self.move(self.config.value("pos", QtCore.QPoint(200, 200)))
        self.config.endGroup()

        self.config.beginGroup("Project")
        self.filename = str(self.config.value("filename", self.filename))
        self.config.endGroup()

    # event : QCloseEvent
    def closeEvent(self, event):
        if self.file_dirty:
            choice = QtWidgets.QMessageBox.question(
                self, 'Project not saved', "Save Project before exit?",
                QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No
                | QtWidgets.QMessageBox.Cancel)
            if choice == QtWidgets.QMessageBox.Yes:
                self.save_project()
                self.writeSettings()
                event.accept()
                #sys.exit()
            elif choice == QtWidgets.QMessageBox.Cancel:
                event.ignore()
            else:
                self.writeSettings()
                event.accept()
                #sys.exit()
        else:
            self.writeSettings()
            event.accept()
Exemplo n.º 14
0
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in entityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(
                        ("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"
                          ),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        entity=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                try:
                    textblockXml = XML("<body>\n{0}\n</body>\n".format(
                        removeEntities(xmltext)))
                    if not edbodyDTD.validate(textblockXml):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(
                            e.type_name in ("DTD_INVALID_CHILD",
                                            "DTD_UNKNOWN_ATTRIBUTE")
                            for e in errors)
                        modelXbrl.error(
                            "EFM.6.05.16" if htmlError else
                            ("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"
                              ),
                            modelObject=f1,
                            fact=f1.qname,
                            contextID=f1.contextID,
                            error=', '.join(e.message for e in errors))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if ((attrTag == "href" and eltTag == "a")
                                    or (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _("Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>"
                                          ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag)
                                elif attrValue.startswith(
                                        "http://www.sec.gov/Archives/edgar/data/"
                                ) and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _("Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>"
                                          ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag)
                except (XMLSyntaxError, UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(
                        ("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"
                          ),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        error=err)
Exemplo n.º 15
0
def saveTargetDocument(modelXbrl,
                       targetDocumentFilename,
                       targetDocumentSchemaRefs,
                       outputZip=None,
                       filingFiles=None,
                       *args,
                       **kwargs):
    targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(
        targetDocumentFilename, modelXbrl.modelDocument.filepath)

    def addLocallyReferencedFile(elt, filingFiles):
        if elt.tag in ("a", "img"):
            for attrTag, attrValue in elt.items():
                if attrTag in ("href", "src") and not isHttpUrl(
                        attrValue) and not os.path.isabs(attrValue):
                    attrValue = attrValue.partition('#')[0]  # remove anchor
                    if attrValue:  # ignore anchor references to base document
                        attrValue = os.path.normpath(
                            attrValue
                        )  # change url path separators to host separators
                        file = os.path.join(sourceDir, attrValue)
                        if modelXbrl.fileSource.isInArchive(
                                file,
                                checkExistence=True) or os.path.exists(file):
                            filingFiles.add(file)

    targetUrlParts = targetUrl.rpartition(".")
    targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2]
    modelXbrl.modelManager.showStatus(
        _("Extracting instance ") + os.path.basename(targetUrl))
    rootElt = modelXbrl.modelDocument.xmlRootElement
    # take baseXmlLang from <html> or <base>
    baseXmlLang = rootElt.get(
        "{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang")
    for ixElt in modelXbrl.modelDocument.xmlRootElement.iterdescendants(
            tag="{http://www.w3.org/1999/xhtml}body"):
        baseXmlLang = ixElt.get("{http://www.w3.org/XML/1998/namespace}lang"
                                ) or rootElt.get("lang") or baseXmlLang
    targetInstance = ModelXbrl.create(
        modelXbrl.modelManager,
        newDocumentType=Type.INSTANCE,
        url=targetUrl,
        schemaRefs=targetDocumentSchemaRefs,
        isEntry=True,
        discover=False)  # don't attempt to load DTS
    if baseXmlLang:
        targetInstance.modelDocument.xmlRootElement.set(
            "{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
    ValidateXbrlDimensions.loadDimensionDefaults(
        targetInstance)  # need dimension defaults
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement,
                     roleRefElt.qname,
                     attributes=roleRefElt.items())

    # contexts
    for context in sorted(modelXbrl.contexts.values(),
                          key=lambda c: elementChildSequence(c)):
        ignore = targetInstance.createContext(
            context.entityIdentifier[0],
            context.entityIdentifier[1],
            'instant' if context.isInstantPeriod else
            'duration' if context.isStartEndPeriod else 'forever',
            context.startDatetime,
            context.endDatetime,
            None,
            context.qnameDims, [], [],
            id=context.id)
    for unit in modelXbrl.units.values():
        measures = unit.measures
        ignore = targetInstance.createUnit(measures[0],
                                           measures[1],
                                           id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}

    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem:  # HF does not de-duplicate, which is currently-desired behavior
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                    if fact.concept is not None and fact.concept.baseXsdType in (
                            "string", "normalizedString"):  # default
                        xmlLang = fact.xmlLang
                        if xmlLang is not None and xmlLang != baseXmlLang:
                            attrs[
                                "{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang
                newFact = targetInstance.createFact(fact.qname,
                                                    attributes=attrs,
                                                    text=text,
                                                    parent=parent)
                # if fact.isFraction, create numerator and denominator
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references so that referenced files are included in the zip.
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(
                                    xmltext)).iter():
                                addLocallyReferencedFile(elt, filingFiles)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass  # TODO: Why ignore UnicodeDecodeError?
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)

    createFacts(modelXbrl.facts, None)
    modelXbrl.modelManager.showStatus(
        _("Creating and validating footnotes and relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    footnoteIdCount = {}
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
                arcrole != "XBRL-footnotes" and any(
                    lP.modelDocument.type == Type.INLINEXBRL
                    for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement,
                               linkPrototype.qname,
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID,
                                                     0) + 1
                    if idUseCount > 1:  # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(
                            attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink,
                                        linkChild.qname,
                                        attributes=attributes)
                    xmlLang = linkChild.xmlLang
                    if xmlLang is not None and xmlLang != baseXmlLang:  # default
                        newChild.set(
                            "{http://www.w3.org/XML/1998/namespace}lang",
                            xmlLang)
                    copyIxFootnoteHtml(
                        linkChild,
                        newChild,
                        targetModelDocument=targetInstance.modelDocument,
                        withText=True)

                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            addLocallyReferencedFile(elt, filingFiles)
    targetInstance.saveInstance(overrideFilepath=targetUrl,
                                outputZip=outputZip)
    if getattr(modelXbrl, "isTestcaseVariation", False):
        modelXbrl.extractedInlineInstance = True  # for validation comparison
    modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
Exemplo n.º 16
0
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set(
    )  #  only check any graphics file reference once per fact

    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           f1.namespaceURI not in ixbrlAll and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(
                        ("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"
                          ),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        entity=entity,
                        error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(
                    removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate(textblockXml):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(
                            e.type_name in ("DTD_INVALID_CHILD",
                                            "DTD_UNKNOWN_ATTRIBUTE")
                            for e in errors)
                        modelXbrl.error(
                            "EFM.6.05.16" if htmlError else
                            ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"
                              ),
                            modelObject=f1,
                            fact=f1.qname,
                            contextID=f1.contextID,
                            error=', '.join(e.message for e in errors),
                            messageCodes=("EFM.6.05.16",
                                          "EFM.6.05.15.dtdError",
                                          "GFM.1.02.14"))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if ((attrTag == "href" and eltTag == "a")
                                    or (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"
                                          ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag)
                                elif attrValue.startswith(
                                        "http://www.sec.gov/Archives/edgar/data/"
                                ) and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"
                                          ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if attrValue.lower()[-4:] not in ('.jpg',
                                                                      '.gif'):
                                        modelXbrl.error(
                                            "EFM.6.05.16.graphicFileType",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"
                                              ),
                                            modelObject=f1,
                                            fact=f1.qname,
                                            contextID=f1.contextID,
                                            attribute=attrValue,
                                            element=eltTag)
                                    else:  # test file contents
                                        try:
                                            if validateGraphicFile(
                                                    f1, attrValue
                                            ) != attrValue.lower()[-3:]:
                                                modelXbrl.error(
                                                    "EFM.6.05.16.graphicFileContent",
                                                    _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"
                                                      ),
                                                    modelObject=f1,
                                                    fact=f1.qname,
                                                    contextID=f1.contextID,
                                                    attribute=attrValue,
                                                    element=eltTag)
                                        except IOError as err:
                                            modelXbrl.error(
                                                "EFM.6.05.16.graphicFileError",
                                                _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"
                                                  ),
                                                modelObject=f1,
                                                fact=f1.qname,
                                                contextID=f1.contextID,
                                                attribute=attrValue,
                                                element=eltTag,
                                                error=err)
                                    checkedGraphicsFiles.add(attrValue)
                        if eltTag == "table" and any(
                                a is not None
                                for a in elt.iterancestors("table")):
                            modelXbrl.error(
                                "EFM.6.05.16.nestedTable",
                                _("Fact %(fact)s of context %(contextID)s has nested <table> elements."
                                  ),
                                modelObject=f1,
                                fact=f1.qname,
                                contextID=f1.contextID)
                except (XMLSyntaxError, UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(
                        ("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"
                          ),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        error=err)

                checkedGraphicsFiles.clear()
Exemplo n.º 17
0
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None, *args, **kwargs):
    targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(targetDocumentFilename, modelXbrl.modelDocument.filepath)
    def addLocallyReferencedFile(elt,filingFiles):
        if elt.tag in ("a", "img"):
            for attrTag, attrValue in elt.items():
                if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrValue):
                    attrValue = attrValue.partition('#')[0] # remove anchor
                    if attrValue: # ignore anchor references to base document
                        attrValue = os.path.normpath(attrValue) # change url path separators to host separators
                        file = os.path.join(sourceDir,attrValue)
                        if modelXbrl.fileSource.isInArchive(file, checkExistence=True) or os.path.exists(file):
                            filingFiles.add(file)
    targetUrlParts = targetUrl.rpartition(".")
    targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2]
    modelXbrl.modelManager.showStatus(_("Extracting instance ") + os.path.basename(targetUrl))
    rootElt = modelXbrl.modelDocument.xmlRootElement
    # take baseXmlLang from <html> or <base>
    baseXmlLang = rootElt.get("{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang")
    for ixElt in modelXbrl.modelDocument.xmlRootElement.iterdescendants(tag="{http://www.w3.org/1999/xhtml}body"):
        baseXmlLang = ixElt.get("{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang") or baseXmlLang
    targetInstance = ModelXbrl.create(modelXbrl.modelManager, 
                                      newDocumentType=Type.INSTANCE,
                                      url=targetUrl,
                                      schemaRefs=targetDocumentSchemaRefs,
                                      isEntry=True,
                                      discover=False) # don't attempt to load DTS
    if baseXmlLang:
        targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
    ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults 
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, 
                     attributes=roleRefElt.items())
    
    # contexts
    for context in sorted(modelXbrl.contexts.values(), key=lambda c: elementChildSequence(c)):
        ignore = targetInstance.createContext(context.entityIdentifier[0],
                                               context.entityIdentifier[1],
                                               'instant' if context.isInstantPeriod else
                                               'duration' if context.isStartEndPeriod
                                               else 'forever',
                                               context.startDatetime,
                                               context.endDatetime,
                                               None, 
                                               context.qnameDims, [], [],
                                               id=context.id)
    for unit in modelXbrl.units.values():
        measures = unit.measures
        ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}
    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                    if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default
                        xmlLang = fact.xmlLang
                        if xmlLang is not None and xmlLang != baseXmlLang:
                            attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang
                newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
                # if fact.isFraction, create numerator and denominator
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references so that referenced files are included in the zip.
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter():
                                addLocallyReferencedFile(elt, filingFiles)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass  # TODO: Why ignore UnicodeDecodeError?
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)
                
    createFacts(modelXbrl.facts, None)
    modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    footnoteIdCount = {}
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
            arcrole != "XBRL-footnotes" and
            any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement, 
                               linkPrototype.qname, 
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, 
                             attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1
                    if idUseCount > 1: # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink, linkChild.qname, 
                                        attributes=attributes)
                    xmlLang = linkChild.xmlLang
                    if xmlLang is not None and xmlLang != baseXmlLang: # default
                        newChild.set("{http://www.w3.org/XML/1998/namespace}lang", xmlLang)
                    copyIxFootnoteHtml(linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True)

                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            addLocallyReferencedFile(elt,filingFiles)
    targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip)
    if getattr(modelXbrl, "isTestcaseVariation", False):
        modelXbrl.extractedInlineInstance = True # for validation comparison
    modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)