Exemplo n.º 1
0
def validate_norm(fn, nn, version, it):
    global norm 
    global result

    # open XML parser
    n = parse(open(os.path.join(path, fn)))
    # validate DTD
    dtd = DTD(open(os.path.join(path, 'normalizer.dtd')))
    assert dtd.validate(n) == True
    # Create normalizer from xml definition
    norm = Normalizer(n, os.path.join(path, 'common_tagTypes.xml'),
                      os.path.join(path, 'common_callBacks.xml'))
    # Time normalizer validation
    try:
        assert norm.name.lower() == nn.lower()
        if norm.name != nn:
            print "Warning, %s has name attribute set to %s" % (fn, norm.name)
    except AssertionError:
        print "\n[%s]" % norm.name, "and [%s]" % nn, "don't match"
        return
    try:
        assert norm.version == version
    except AssertionError:
        print "\n[%s]" % norm.version, "and [%s]" % version, "don't match"
        return
    samples_amount = len([u for u in [v.examples for v in norm.patterns.values()]])
    if samples_amount <= 0:
        print "No samples to validate in %s" % fn
        return
    t = timeit.Timer("assert norm.validate() == True", "from __main__ import norm")
    s = t.timeit(it)
    # Normalize result against number of validated samples
    s = s / float(samples_amount)
    # Add result
    result.add_res(norm.name, norm.version, norm.authors, s)
Exemplo n.º 2
0
 def normalize_samples(self, norm, name, version):
     """Test logparser.normalize validate for syslog normalizer."""
     # open parser
     n = parse(open(os.path.join(self.normalizer_path, norm)))
     # validate DTD
     dtd = DTD(open(os.path.join(self.normalizer_path,
                                 'normalizer.dtd')))
     dtd.assertValid(n)
     # Create normalizer from xml definition
     normalizer = Normalizer(n, os.path.join(self.normalizer_path, 'common_tagTypes.xml'), os.path.join(self.normalizer_path, 'common_callBacks.xml'))
     self.assertEquals(normalizer.name, name)
     self.assertEquals(normalizer.version, version)
     self.assertTrue(normalizer.validate())
Exemplo n.º 3
0
 def normalize_samples(self, norm, name, version):
     """Test logparser.normalize validate for syslog normalizer."""
     # open parser
     n = parse(open(os.path.join(self.normalizer_path, norm)))
     # validate DTD
     dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd')))
     self.assertTrue(dtd.validate(n))
     # Create normalizer from xml definition
     normalizer = Normalizer(
         n, os.path.join(self.normalizer_path, 'common_tagTypes.xml'))
     self.assertEquals(normalizer.name, name)
     self.assertEquals(normalizer.version, version)
     self.assertTrue(normalizer.validate())
Exemplo n.º 4
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        if not dtd.validate( XmlUtil.ixToXhtml(elt) ):
            modelXbrl.error("xmlDTD:error",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("xmlDTD:error",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 5
0
  def _validate_xml_tree(
      self, tree: etree._ElementTree, dtd: etree.DTD
  ) -> None:
    """Verifies that element is valid.
    
    Arguments:
        tree {etree._ElementTree} -- tree to verify
        dtd {etree.DTD} -- DTD used for validation
    
    Raises:
        ValidationError: If element is invalid
    """
    if dtd and dtd.validate(tree) is False:
      #TODO convert list to text and rais for each entry
      raise ValidationError(str(dtd.error_log.filter_from_errors()))


#TODO: work out simple metrics that can be counted during parsing
#TODO define metric modules
#TODO toplevel class provides add_stats -> sub module references stats
#TODO open/close functions for reset/finish checking 
#TODO raw metrics: nummber of fbs, fbtypes, ecc states, events, inputs, outputs, ecc vertrices
#TODO further metrics: undefined datatypes, cycles in ecc, not reacheable ecc states, locks in ecc,...
#TODO each checker has a check function -> sorted list by prio -> do_check iterates over all checkers
#TODO define output format of msg, write list to file , maybe GUI output?? (super extra) would be useful for graphs
#TODO output would be filename, elment name, type (err/warn), message
#TODO how to handle incomplete types
Exemplo n.º 6
0
def test(xhtml_file: Path, dtd: DTD, schematron: Schematron) -> bool:
    """
    Test that an XHTML file matches a DTD and passes Schematron tests.
    Error messages are printed to stderr if the file doesn't pass.

    :param xhtml_file: the XHTML file to test
    :param dtd: the DTD
    :param schematron: the Schematron
    :return: True if the file passes
    """
    if settings.verbose:
        print(xhtml_file)

    clear_error_log()

    parser = XHTMLParser(dtd_validation=True, ns_clean=True)
    try:
        tree = parse(source=str(xhtml_file), parser=parser)
        html = tree.getroot()
    except IOError as e:
        print(f"{xhtml_file}: {e.strerror}", file=stderr)
        return False
    except XMLSyntaxError:
        print_error_log(parser.error_log)
        return False

    if not dtd.validate(html):
        print_error_log(dtd.error_log)
        return False

    if not schematron.validate(html):
        print_schematron_error_log(html, schematron)
        return False

    return test_links(xhtml_file, html) and test_images(xhtml_file, html)
Exemplo n.º 7
0
def loadDTD(modelXbrl):
    global edbodyDTD
    if edbodyDTD is None:
        with open(
                os.path.join(modelXbrl.modelManager.cntlr.configDir,
                             "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Exemplo n.º 8
0
def loadDTD(modelXbrl):
    global edbodyDTD, isInlineDTD
    initModelDocumentTypeReferences()
    _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL
    if isInlineDTD is None or isInlineDTD != _isInline:
        isInlineDTD = _isInline
        with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, 
                               "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Exemplo n.º 9
0
def run(xhtml_files: List[Path], dtd_file: Path, images: bool,
        links: bool) -> bool:
    try:
        dtd = DTD(str(dtd_file))
    except DTDParseError as e:
        print(e.error_log, file=stderr)
        clear_error_log()
        return False
    else:
        success = True
        for file in xhtml_files:
            # if you reuse the parser on too many documents it gets confused
            parser = XHTMLParser(dtd_validation=True, ns_clean=True)
            dtd = DTD(str(dtd_file))
            if settings.verbose:
                print(xhtml_file)
            if not test(file, parser, dtd, images, links):
                success = False
        return success
Exemplo n.º 10
0
def loadDTD(modelXbrl):
    global edbodyDTD, isInlineDTD, ModelDocumentTypeINLINEXBRL
    if ModelDocumentTypeINLINEXBRL is None:
        from arelle.ModelDocument import Type
        ModelDocumentTypeINLINEXBRL = Type.INLINEXBRL
    _isInline = modelXbrl.modelDocument.type == ModelDocumentTypeINLINEXBRL
    if isInlineDTD is None or isInlineDTD != _isInline:
        isInlineDTD = _isInline
        with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, 
                               "xhtml1-strict-ix.dtd" if _isInline else "edbody.dtd")) as fh:
            edbodyDTD = DTD(fh)
Exemplo n.º 11
0
def open_dtd(dtd_file: Path) -> DTD:
    """
    Open a validate an XML DTD. Exit program on failure.

    :param dtd_file: path to a DTD file
    :return: A DTD object
    """
    try:
        return DTD(str(dtd_file))
    except DTDParseError as e:
        print(f"{dtd_file}:1: {e}", file=stderr)
        exit(1)
Exemplo n.º 12
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    # copy xhtml elements to fresh tree
    with open(
            os.path.join(modelXbrl.modelManager.cntlr.configDir,
                         "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        if not dtd.validate(XmlUtil.ixToXhtml(elt)):
            modelXbrl.error("xmlDTD:error",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("xmlDTD:error",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Exemplo n.º 13
0
    def __init__(self, normalizers_paths, active_normalizers={}):
        """
        Instantiates a flow manager. The default behavior is to activate every
        available normalizer.
        
        @param normalizers_paths: a list of absolute paths to the normalizer
        XML definitions to use or a just a single path as str.
        @param active_normalizers: a dictionary of active normalizers
        in the form {name-version : [True|False]}.
        """
        if not isinstance(normalizers_paths, list or tuple):
            normalizers_paths = [
                normalizers_paths,
            ]
        self.normalizers_paths = normalizers_paths
        self.active_normalizers = active_normalizers
        self.dtd, self.ctt, self.ccb = None, None, None

        # Walk through paths for normalizer.dtd and common_tagTypes.xml
        # /!\ dtd file and common elements will be overrriden if present in
        # many directories.
        for norm_path in self.normalizers_paths:
            if not os.path.isdir(norm_path):
                raise ValueError, "Invalid normalizer directory : %s" % norm_path
            dtd = os.path.join(norm_path, 'normalizer.dtd')
            ctt = os.path.join(norm_path, 'common_tagTypes.xml')
            ccb = os.path.join(norm_path, 'common_callBacks.xml')
            if os.path.isfile(dtd):
                self.dtd = DTD(open(dtd))
            if os.path.isfile(ctt):
                self.ctt = ctt
            if os.path.isfile(ccb):
                self.ccb = ccb
        # Technically the common elements files should NOT be mandatory.
        # But many normalizers use them, so better safe than sorry.
        if not self.dtd or not self.ctt or not self.ccb:
            raise StandardError, "Missing DTD or common library files"
        self._cache = []
        self.reload()
Exemplo n.º 14
0
def test(xhtml_file: Path, parser: XHTMLParser, dtd: DTD, images: bool,
         links: bool) -> bool:
    success = False
    try:
        try:
            document = parse(source=str(xhtml_file), parser=parser).getroot()
            dtd.assertValid(document)
        except IOError as e:
            print(f"{xhtml_file}: {e.strerror}", file=stderr)
        except XMLSyntaxError as e:
            print(str(e.error_log), file=stderr)
        except DocumentInvalid as e:
            print(str(e.error_log), file=stderr)
        else:
            success = True
            if images:
                success = success and test_images(xhtml_file, document)
            if links:
                success = success and test_links(xhtml_file, document)
    finally:
        clear_error_log()
    return success
Exemplo n.º 15
0
def validate_norm(fn, nn, version, it):
    global norm
    global result

    # open XML parser
    n = parse(open(os.path.join(path, fn)))
    # validate DTD
    dtd = DTD(open(os.path.join(path, 'normalizer.dtd')))
    assert dtd.validate(n) == True
    # Create normalizer from xml definition
    norm = Normalizer(n, os.path.join(path, 'common_tagTypes.xml'),
                      os.path.join(path, 'common_callBacks.xml'))
    # Time normalizer validation
    try:
        assert norm.name.lower() == nn.lower()
        if norm.name != nn:
            print "Warning, %s has name attribute set to %s" % (fn, norm.name)
    except AssertionError:
        print "\n[%s]" % norm.name, "and [%s]" % nn, "don't match"
        return
    try:
        assert norm.version == version
    except AssertionError:
        print "\n[%s]" % norm.version, "and [%s]" % version, "don't match"
        return
    samples_amount = len(
        [u for u in [v.examples for v in norm.patterns.values()]])
    if samples_amount <= 0:
        print "No samples to validate in %s" % fn
        return
    t = timeit.Timer("assert norm.validate() == True",
                     "from __main__ import norm")
    s = t.timeit(it)
    # Normalize result against number of validated samples
    s = s / float(samples_amount)
    # Add result
    result.add_res(norm.name, norm.version, norm.authors, s)
Exemplo n.º 16
0
 def __init__(self, normalizers_path, active_normalizers={}):
     """
     Instantiates a flow manager. The default behavior is to activate every
     available normalizer.
     
     @param normalizer_path: absolute path to the normalizer XML definitions
     to use.
     @param active_normalizers: a dictionary of active normalizers
     in the form {name: [True|False]}.
     """
     self.normalizers_path = normalizers_path
     self.active_normalizers = active_normalizers
     self.dtd = DTD(open(os.path.join(self.normalizers_path, "normalizer.dtd")))
     self._cache = []
     self.reload()
Exemplo n.º 17
0
 def __init__(self, normalizers_paths, active_normalizers = {}):
     """
     Instantiates a flow manager. The default behavior is to activate every
     available normalizer.
     
     @param normalizers_paths: a list of absolute paths to the normalizer
     XML definitions to use or a just a single path as str.
     @param active_normalizers: a dictionary of active normalizers
     in the form {name-version : [True|False]}.
     """
     if not isinstance(normalizers_paths, list or tuple):
         normalizers_paths = [normalizers_paths,]
     self.normalizers_paths = normalizers_paths
     self.active_normalizers = active_normalizers
     self.dtd, self.ctt, self.ccb = None, None, None
     
     # Walk through paths for normalizer.dtd and common_tagTypes.xml
     # /!\ dtd file and common elements will be overrriden if present in
     # many directories.
     for norm_path in self.normalizers_paths:
         if not os.path.isdir(norm_path):
             raise ValueError, "Invalid normalizer directory : %s" % norm_path
         dtd = os.path.join(norm_path, 'normalizer.dtd')
         ctt = os.path.join(norm_path, 'common_tagTypes.xml')
         ccb = os.path.join(norm_path, 'common_callBacks.xml')
         if os.path.isfile(dtd):
             self.dtd = DTD(open(dtd))
         if os.path.isfile(ctt):
             self.ctt = ctt
         if os.path.isfile(ccb):
             self.ccb = ccb
     # Technically the common elements files should NOT be mandatory.
     # But many normalizers use them, so better safe than sorry.
     if not self.dtd or not self.ctt or not self.ccb:
         raise StandardError, "Missing DTD or common library files"
     self._cache = []
     self.reload()
Exemplo n.º 18
0
 def __init__(self, normalizers_paths, active_normalizers = {}):
     """
     Instantiates a flow manager. The default behavior is to activate every
     available normalizer.
     
     @param normalizers_paths: a list of absolute paths to the normalizer
     XML definitions to use or a just a single path as str.
     @param active_normalizers: a dictionary of active normalizers
     in the form {name: [True|False]}.
     """
     if not isinstance(normalizers_paths, list or tuple):
         normalizers_paths = [normalizers_paths,]
     self.normalizers_paths = normalizers_paths
     self.active_normalizers = active_normalizers
     # Walk through paths for normalizer.dtd and common_tagTypes.xml
     for norm_path in self.normalizers_paths:
         dtd = os.path.join(norm_path, 'normalizer.dtd')
         ctt = os.path.join(norm_path, 'common_tagTypes.xml')
         if os.path.isfile(dtd):
             self.dtd = DTD(open(dtd))
         if os.path.isfile(ctt):
             self.ctt = ctt
     self._cache = []
     self.reload()
Exemplo n.º 19
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    from arelle import FunctionIxt
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    _xhtmlDTD = XHTML_DTD[_ixNS]
    _customTransforms = modelXbrl.modelManager.customTransforms or {}
    
    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if ns == XbrlConst.xsi:
                pass # xsi attributes are always allowed
            elif isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                    "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}:                
                    modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"),
                        modelObject=elt, name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"),
                        modelObject=elt, tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({"scheme", "periodType",     "balance", "contextRef", "unitRef", "precision", "decimals"} -
                                        {"fraction": {"contextRef", "unitRef"},
                                         "nonFraction": {"contextRef", "unitRef", "decimals", "precision"},
                                         "nonNumeric": {"contextRef"}}.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(ixMsgCode("inlineElementAttributes",elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"),
                        modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(ixMsgCode("attributeNotExpected",elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"),
                    modelObject=elt, attribute=attrTag, element=elt.localName)
        elif ns is None:
            _xsdType = htmlAttrType.get(localName)
            if _xsdType is not None:
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^', '1'):
                    nameFilter = ('*',)
                else:
                    nameFilter = names
                if nameFilter == ('*',):
                    namespaceFilter = namespacePrefix = '*'
                elif len(nameFilter) == 1 and "}" in nameFilter[0] and nameFilter[0][0] == "{":
                    namespaceFilter, _sep, nameFilter = nameFilter[0][1:].partition("}")
                    namespacePrefix = XmlUtil.xmlnsprefix(elt,namespaceFilter)
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {"ancestor": XmlUtil.ancestor, 
                             "parent": XmlUtil.parent, 
                             "child-choice": XmlUtil.children, 
                             "child-sequence": XmlUtil.children,
                             "child-or-text": XmlUtil.children,
                             "descendant": XmlUtil.descendants}[rel](
                            elt, 
                            namespaceFilter,
                            nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False, ixResolveUris=False)
                issue = ''
                if reqt in ('^',):
                    if not any(r.localName in names and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('1',) and not elt.isNil:
                    if sum(r.localName in names and r.namespaceURI == elt.namespaceURI
                           for r in relations) != 1:
                        issue = " and must have exactly one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [str(r.elementQname)
                                  for r in relations
                                  if not (r.tag in names or
                                          (r.localName in names and r.namespaceURI == elt.namespaceURI))]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                disallowedChildText = bool(reqt == '&' and 
                                           rel in ("child-sequence", "child-choice") 
                                           and elt.textValue.strip())
                if ((reqt == '+' and not relations) or
                    (reqt == '-' and relations) or
                    (issue) or disallowedChildText):
                    code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], {
                           'ancestor': "ancestorNode",
                           'parent': "parentNode",
                           'child-choice': "childNodes",
                           'child-sequence': "childNodes",
                           'child-or-text': "childNodesOrText",
                           'descendant': "descendantNodes"}[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified",
                            '1': "Specified"}.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}").format(
                                elt.localName,
                                {'+': "must", '-': "may not", '&': "may only",
                                 '?': "may", '+': "must", '^': "must", '1': "must"}[reqt],
                                {'ancestor': "be nested in",
                                 'parent': "have parent",
                                 'child-choice': "have child",
                                 'child-sequence': "have child",
                                 'child-or-text': "have child or text,",
                                 'descendant': "have as descendant"}[rel],
                                '' if rel == 'child-or-text' else
                                ', '.join(str(r.elementQname) for r in relations)
                                if names == ('*',) and relations else
                                ", ".join("{}:{}".format(namespacePrefix, n) for n in names),
                                issue,
                                " and no child text (\"{}\")".format(elt.textValue.strip()[:32]) if disallowedChildText else "")
                    modelXbrl.error(code, msg, 
                                    modelObject=[elt] + relations, requirement=reqt,
                                    messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                                                  "ix{ver.sect}:childNodesOrTextRequired",
                                                  "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                                                  "ix{ver.sect}:descendantNodesDisallowed",
                                                  "ix{ver.sect}:parentNodeRequired"))
        # other static element checks (that don't require a complete object model, context, units, etc
        if elt.localName == "nonFraction":
            childElts = XmlUtil.children(elt, '*', '*')
            hasText = (elt.text or "") or any((childElt.tail or "") for childElt in childElts)
            if elt.isNil:
                ancestorNonFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName)
                if ancestorNonFractions:
                    modelXbrl.error(ixMsgCode("nonFractionAncestors", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"),
                        modelObject=[elt] + ancestorNonFractions, fact=elt.qname)
                if childElts or hasText:
                    modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"),
                        modelObject=[elt] + childElts, fact=elt.qname)
                    elt.setInvalid() # prevent further validation or cascading errors
            else:
                if ((childElts and (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS or childElts[0].localName != "nonFraction")) or
                    (childElts and hasText)):
                    modelXbrl.error(ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."),
                        modelObject=[elt] + childElts, fact=elt.qname)
                    elt.setInvalid()
        if elt.localName == "fraction":
            if elt.isNil:
                ancestorFractions = XmlUtil.ancestors(elt, _ixNS, elt.localName)
                if ancestorFractions:
                    modelXbrl.error(ixMsgCode("fractionAncestors", elt),
                        _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"),
                        modelObject=[elt] + ancestorFractions, fact=elt.qname)
            else:
                nonFrChildren = [e for e in XmlUtil.children(elt, _ixNS, '*') if e.localName not in ("fraction", "numerator", "denominator")]
                if nonFrChildren:
                    modelXbrl.error(ixMsgCode("fractionElementChildren", elt),
                        _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"),
                        modelObject=[elt] + nonFrChildren, fact=elt.qname, children=", ".join(e.localName for e in nonFrChildren))
                for ancestorFraction in XmlUtil.ancestors(elt, XbrlConst.ixbrl11, "fraction"): # only ix 1.1
                    if normalizeSpace(elt.get("unitRef")) != normalizeSpace(ancestorFraction.get("unitRef")):
                        modelXbrl.error(ixMsgCode("fractionNestedUnitRef", elt),
                            _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"),
                            modelObject=[elt] + nonFrChildren, fact=elt.qname, unitRef=elt.get("unitRef"), unitRef2=ancestorFraction.get("unitRef"))
        if elt.localName in ("nonFraction", "numerator", "denominator", "nonNumeric"):
            fmt = elt.format
            if fmt:
                if fmt in _customTransforms:
                    pass
                elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions:
                    modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"),
                        modelObject=elt, fact=elt.qname, transform=fmt, namespace=fmt.namespaceURI)
                    elt.setInvalid()
                elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[fmt.namespaceURI]:
                    modelXbrl.error(ixMsgCode("invalidTransformation", elt, sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation name %(name)s"),
                        modelObject=elt, fact=elt.qname, transform=fmt, name=fmt.localName)
                    elt.setInvalid()

                    
    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in ('version', # used in inline test cases but not valid xhtml
                               '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[fromChild.namespaceURI]:
                        modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"),
                            modelObject=fromChild, element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"),
                                    modelObject=fromChild, attribute=attrTag, element=fromChild.localName)
                if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag, attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail    
                            
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, _xhtmlDTD)) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate( ixToXhtml(elt) ):
            modelXbrl.error("html:syntaxError",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) 
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 20
0
 def test_00_validate_fake_syslog(self):
     """Validate the fake normalizer"""
     dtd = DTD(open(os.path.join(self.normalizer_path,
                                 'normalizer.dtd')))
     self.assertTrue(dtd.validate(self.n))
Exemplo n.º 21
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    
    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
            if isIxElt:
                if ns not in (XbrlConst.xml, XbrlConst.xsi):
                    modelXbrl.error("ix:qualifiedAttributeNotExpected",
                        _("Inline XBRL element %(element)s: has qualified attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error("ix:inlineAttributeMisplaced",
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"),
                        modelObject=elt, name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error("ix:extensionAttributeMisplaced",
                        _("Extension attributes are not allowed on html elements: %(tag)s"),
                        modelObject=elt, tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][attrTag]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
                disallowedXbrliAttrs = ({"scheme", "periodType", "balance", "contextRef", "unitRef", "precision", "decimals"} -
                                        {"fraction": {"contextRef", "unitRef"},
                                         "nonFraction": {"contextRef", "unitRef", "decimals", "precision"},
                                         "nonNumeric": {"contextRef"}}.get(elt.localName, set()))
                disallowedAttrs = [a for a in disallowedXbrliAttrs if elt.get(a) is not None]
                if disallowedAttrs:
                    modelXbrl.error("ix:inlineElementAttributes",
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"),
                        modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error("ix:attributeNotExpected",
                    _("Attribute %(attribute)s is not expected on element element ix:%(element)s"),
                    modelObject=elt, attribute=attrTag, element=elt.localName)
                
    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^'):
                    nameFilter = ('*',)
                else:
                    nameFilter = names
                relations = {"ancestor": XmlUtil.ancestor, 
                             "parent": XmlUtil.parent, 
                             "child": XmlUtil.children, 
                             "descendant": XmlUtil.descendants}[rel](
                            elt, 
                            '*' if nameFilter == ('*',) else elt.namespaceURI,
                            nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                issue = ''
                if reqt == '^':
                    if not any(r.localName in names and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [str(r.elementQname)
                                  for r in relations
                                  if r.localName not in names or r.namespaceURI != elt.namespaceURI]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have more than 1 but none present "
                if ((reqt == '+' and not relations) or
                    (reqt == '-' and relations) or
                    (issue)):
                    code = "ix:" + {
                           'ancestor': "ancestorNode",
                           'parent': "parentNode",
                           'child': "childNodes",
                           'descendant': "descendantNodes"}[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified"}.get(reqt, "Specified")
                    msg = _("Inline XBRL 1.0 ix:{0} {1} {2} {3} {4} element").format(
                                elt.localName,
                                {'+': "must", '-': "may not", '&': "may only",
                                 '?': "may", '+': "must"}[reqt],
                                {'ancestor': "be nested in",
                                 'parent': "have parent",
                                 'child': "have child",
                                 'descendant': "have as descendant"}[rel],
                                ', '.join(str(r.elementQname) for r in relations)
                                if names == ('*',) and relations else
                                ", ".join("ix:" + n for n in names),
                                issue)
                    modelXbrl.error(code, msg, 
                                    modelObject=[elt] + relations, requirement=reqt)
                
    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in ('version', # used in inline test cases but not valid xhtml
                               '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    checkHierarchyConstraints(fromChild)
                    for attrTag, attrValue in fromChild.items():
                        checkAttribute(fromChild, True, attrTag, attrValue)
                    for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]):
                        if fromChild.get(attrTag) is None:
                            modelXbrl.error("ix:attributeRequired",
                                _("Attribute %(attribute)s required on element ix:%(element)s"),
                                modelObject=elt, attribute=attrTag, element=fromChild.localName)
                if not (fromChild.localName in {"references", "resources"} and isIxNs):
                    if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag, attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail    
                            
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        if not dtd.validate( ixToXhtml(elt) ):
            modelXbrl.error("ix:DTDelementUnexpected",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("ix:DTDerror",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 22
0
  methods[element] = {
    'declaration': template('PCDATA_OPERATOR_DECLARATION').render(
      {'class': element, 'type': 'int'}),
    'definition': template('PCDATA_OPERATOR_DEFINITION').render(
      {'class': element, 'type': 'int'})
  }

if __name__ == '__main__':
  import argparse

  cmdline = argparse.ArgumentParser()
  cmdline.add_argument("dtd")
  cmdline.add_argument("hxx")
  cmdline.add_argument("cxx")
  args = cmdline.parse_args()
  dtd = DTD(args.dtd)
  metadata = {
    'dtd': dtd,
    'enumerations': enumerations,
    'extra_methods': methods,
    'enum_classes': sorted([(v['name'], k) for k, v in enumerations.items()
                            if not v in [e.name for e in dtd.iterelements()]]),
    'forwards_for': {'ornament': ['ornament_type'],
                     'score': ['score_data', 'score_header']}
  }
  with open(args.hxx, 'w') as hxx:
    print(template('LIBRARY_HEADER').render(metadata), file=hxx)
  with open(args.cxx, 'w') as cxx:
    print(template('LIBRARY_IMPLEMENTATION').render(metadata), file=cxx)

Exemplo n.º 23
0
class LogNormalizer():
    """Basic normalization flow manager.
    Normalizers definitions are loaded from a path and checked against the DTD.
    If the definitions are syntactically correct, the normalizers are
    instantiated and populate the manager's cache.
    Normalization priormority is established as follows:
    
    * Maximum priority assigned to normalizers where the "appliedTo" tag is set
      to "raw". They MUST be mutually exclusive.
    * Medium priority assigned to normalizers where the "appliedTo" tag is set
      to "body".
    * Lowest priority assigned to any remaining normalizers.
    
    Some extra treatment is also done prior and after the log normalization:
    
    * Assignment of a unique ID, under the tag "uuid"
    * Conversion of date tags to UTC, if the "_timezone" was set prior to
      the normalization process."""
    def __init__(self, normalizers_paths, active_normalizers={}):
        """
        Instantiates a flow manager. The default behavior is to activate every
        available normalizer.
        
        @param normalizers_paths: a list of absolute paths to the normalizer
        XML definitions to use or a just a single path as str.
        @param active_normalizers: a dictionary of active normalizers
        in the form {name-version : [True|False]}.
        """
        if not isinstance(normalizers_paths, list or tuple):
            normalizers_paths = [
                normalizers_paths,
            ]
        self.normalizers_paths = normalizers_paths
        self.active_normalizers = active_normalizers
        self.dtd, self.ctt, self.ccb = None, None, None

        # Walk through paths for normalizer.dtd and common_tagTypes.xml
        # /!\ dtd file and common elements will be overrriden if present in
        # many directories.
        for norm_path in self.normalizers_paths:
            if not os.path.isdir(norm_path):
                raise ValueError, "Invalid normalizer directory : %s" % norm_path
            dtd = os.path.join(norm_path, 'normalizer.dtd')
            ctt = os.path.join(norm_path, 'common_tagTypes.xml')
            ccb = os.path.join(norm_path, 'common_callBacks.xml')
            if os.path.isfile(dtd):
                self.dtd = DTD(open(dtd))
            if os.path.isfile(ctt):
                self.ctt = ctt
            if os.path.isfile(ccb):
                self.ccb = ccb
        # Technically the common elements files should NOT be mandatory.
        # But many normalizers use them, so better safe than sorry.
        if not self.dtd or not self.ctt or not self.ccb:
            raise StandardError, "Missing DTD or common library files"
        self._cache = []
        self.reload()

    def reload(self):
        """Refreshes this instance's normalizers pool."""
        self.normalizers = {'raw': [], 'body': []}
        for path in self.iter_normalizer():
            norm = parse(open(path))
            if not self.dtd.validate(norm):
                warnings.warn('Skipping %s : invalid DTD' % path)
                print 'invalid normalizer ', path
            else:
                normalizer = Normalizer(norm, self.ctt, self.ccb)
                normalizer.uuid = self._compute_norm_uuid(normalizer)
                self.normalizers.setdefault(normalizer.appliedTo, [])
                self.normalizers[normalizer.appliedTo].append(normalizer)
        self.activate_normalizers()

    def _compute_norm_uuid(self, normalizer):
        return "%s-%s" % (normalizer.name, normalizer.version)

    def iter_normalizer(self):
        """ Iterates through normalizers and returns the normalizers' paths.
        
        @return: a generator of absolute paths.
        """
        for path in self.normalizers_paths:
            for root, dirs, files in os.walk(path):
                for name in files:
                    if not name.startswith('common_tagTypes') and \
                       not name.startswith('common_callBacks') and \
                           name.endswith('.xml'):
                        yield os.path.join(root, name)

    def __len__(self):
        """ Returns the amount of available normalizers.
        """
        return len([n for n in self.iter_normalizer()])

    def update_normalizer(self, raw_xml_contents, name=None, dir_path=None):
        """used to add or update a normalizer.
        @param raw_xml_contents: XML description of normalizer as flat XML. It
        must comply to the DTD.
        @param name: if set, the XML description will be saved as name.xml.
        If left blank, name will be fetched from the XML description.
        @param dir_path: the path to the directory where to copy the given
        normalizer.
        """
        path = self.normalizers_paths[0]
        if dir_path:
            if dir_path in self.normalizers_paths:
                path = dir_path
        xmlconf = XMLfromstring(raw_xml_contents).getroottree()
        if not self.dtd.validate(xmlconf):
            raise ValueError, "This definition file does not follow the normalizers DTD :\n\n%s" % \
                               self.dtd.error_log.filter_from_errors()
        if not name:
            name = xmlconf.getroot().get('name')
        if not name.endswith('.xml'):
            name += '.xml'
        xmlconf.write(open(os.path.join(path, name), 'w'),
                      encoding='utf8',
                      method='xml',
                      pretty_print=True)
        self.reload()

    def get_normalizer_by_uuid(self, uuid):
        """Returns normalizer by uuid."""
        try:
            norm = [
                u for u in sum(self.normalizers.values(), []) if u.uuid == uuid
            ][0]
            return norm
        except:
            raise ValueError, "Normalizer uuid : %s not found" % uuid

    def get_normalizer_source(self, uuid):
        """Returns the raw XML source of normalizer uuid."""
        return self.get_normalizer_by_uuid(uuid).get_source()

    def get_normalizer_path(self, uuid):
        """Returns the filesystem path of a normalizer."""
        return self.get_normalizer_by_uuid(uuid).sys_path

    def activate_normalizers(self):
        """Activates normalizers according to what was set by calling
        set_active_normalizers. If no call to the latter function has been
        made so far, this method activates every normalizer."""
        if not self.active_normalizers:
            self.active_normalizers = dict([ (n.uuid, True) for n in \
                        sum([ v for v in self.normalizers.values()], []) ])
        # fool-proof the list
        self.set_active_normalizers(self.active_normalizers)
        # build an ordered cache to speed things up
        self._cache = []
        # First normalizers to apply are the "raw" ones.
        for norm in self.normalizers['raw']:
            # consider the normalizer to be inactive if not
            # explicitly in our list
            if self.active_normalizers.get(norm.uuid, False):
                self._cache.append(norm)
        # Then, apply the applicative normalization on "body":
        for norm in self.normalizers['body']:
            if self.active_normalizers.get(norm.uuid, False):
                self._cache.append(norm)
        # Then, apply everything else
        for norm in sum([
                self.normalizers[u]
                for u in self.normalizers if u not in ['raw', 'body']
        ], []):
            if self.active_normalizers.get(norm.uuid, False):
                self._cache.append(norm)

    def get_active_normalizers(self):
        """Returns a dictionary of normalizers; keys are normalizers' uuid and
        values are True|False according to the normalizer's activation state."""
        return self.active_normalizers

    def set_active_normalizers(self, norms={}):
        """Sets the active/inactive normalizers. Default behavior is to
        deactivate every normalizer.
        
        @param norms: a dictionary, similar to the one returned by
        get_active_normalizers."""
        default = dict([ (n.uuid, False) for n in \
                            sum([ v for v in self.normalizers.values()], []) ])
        default.update(norms)
        self.active_normalizers = default

    def lognormalize(self, data):
        """ This method is the entry point to normalize data (a log).

        data is passed through every activated normalizer
        and extra tagging occurs accordingly.
        
        data receives also an extra uuid tag.

        @param data: must be a dictionary with at least a key 'raw' or 'body'
                     with BaseString values (preferably Unicode).
        
        Here an example :
        >>> from logsparser import lognormalizer
        >>> from pprint import pprint
        >>> ln = lognormalizer.LogNormalizer('/usr/local/share/normalizers/')
        >>> mylog = {'raw' : 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)'}
        >>> ln.lognormalize(mylog)
        >>> pprint mylog
        {'body': '(root) CMD (/srv/git/redmine-changesets.sh)',
        'date': datetime.datetime(2011, 7, 18, 15, 35, 1),
        'pid': '14338',
        'program': '/USR/SBIN/CRON',
        'raw': 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)',
        'source': 'zoo',
        'uuid': 70851882840934161193887647073096992594L}
        """
        data = self.uuidify(data)
        data = self.normalize(data)

    # some more functions for clarity
    def uuidify(self, log):
        """Adds a unique UID to the normalized log."""
        log["uuid"] = _UUID_.uuid4().int
        return log

    def normalize(self, log):
        """plain normalization."""
        for norm in self._cache:
            log = norm.normalize(log)
        return log

    def _normalize(self, log):
        """Used for testing only, the normalizers' tags prerequisite are
        deactivated."""
        for norm in self._cache:
            log = norm.normalize(log, do_not_check_prereq=True)
        return log
Exemplo n.º 24
0
def run(ebook: Path, bigbook: Path, ubercoordinator: Path,
        files: List[Path]) -> None:
    """
    :param ebook: the ebook source directory
    :param bigbook: the Big Book of Key
    :param ubercoordinator: the ubercoordinator source directory, for the DTD
    :param files: the XHTML file from the Big Book of Key that need adding
    :return:
    """

    index = Index(bigbook)

    book_dtd = DTD((ubercoordinator / 'src' / 'book.dtd').open())
    book = xml.read(ebook / 'book.xml', dtd=book_dtd)

    illustrations = xml.get_one(book, 'illustrations')
    contents = xml.get_one(book, 'contents')

    sections = set(
        xml.get_all_str(contents, '//section[not(@template="yes")]/@file'))
    images = set(xml.get_all_str(illustrations, '//image/@file'))

    initial_sections = sections.copy()
    initial_images = images.copy()

    for filename in sections:
        ebook_file = ebook / 'Text' / filename
        bigbook_file = bigbook / 'Text' / filename
        if not ebook_file.exists() and bigbook_file.exists():
            copyfile(bigbook_file, ebook_file)
        if ebook_file.exists():
            for img_filename in find_images(ebook_file):
                if img_filename not in images:
                    illustrations.append(file_element('image', img_filename))
                    images.add(img_filename)
        else:
            print(
                f"{ebook / 'book.xml'}:0:0:WARNING: is this missing?: {filename}"
            )

    for file in files:
        article_id = file.stem
        article = index.articles_by_id[article_id]

        if article.file.name not in sections:
            copyfile(article.file, ebook / 'Text' / article.file.name)
            title = xml.rewrap('title', XML(article.link))
            section = file_element('section', article.file.name)
            section.append(title)
            contents.append(section)
            sections.add(file.name)

        for img_filename in find_images(article.file):
            if img_filename not in images:
                illustrations.append(file_element('image', img_filename))
                images.add(img_filename)

    for img_filename in images:
        file = ebook / 'Images' / img_filename
        if not file.exists():
            copyfile(bigbook / 'Images' / img_filename, file)

    book.attrib['date'] = strftime("%Y-%m-%d")

    if sections != initial_sections or images != initial_images:
        copyfile(ebook / 'book.xml', ebook / 'book.xml.bak')
        xml.save(ebook / 'book.xml', book, doctype='book')
Exemplo n.º 25
0
def read(filepath: Path, dtd: DTD = None) -> Element:
    with filepath.open(encoding='utf-8') as f:
        root = parse(f, XMLParser()).getroot()
    if dtd:
        dtd.validate(root)
    return root
Exemplo n.º 26
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]

    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
            if isIxElt:
                allowedNs = nonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(
                        "ix:qualifiedAttributeNotExpected",
                        _("Inline XBRL element %(element)s: has qualified attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(
                        "ix:inlineAttributeMisplaced",
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"
                          ),
                        modelObject=elt,
                        name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(
                        "ix:extensionAttributeMisplaced",
                        _("Extension attributes are not allowed on html elements: %(tag)s"
                          ),
                        modelObject=elt,
                        tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][attrTag]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

                disallowedXbrliAttrs = ({
                    "scheme", "periodType", "balance", "contextRef", "unitRef",
                    "precision", "decimals"
                } - {
                    "fraction": {"contextRef", "unitRef"},
                    "nonFraction":
                    {"contextRef", "unitRef", "decimals", "precision"},
                    "nonNumeric": {"contextRef"}
                }.get(elt.localName, set()))
                disallowedAttrs = [
                    a for a in disallowedXbrliAttrs if elt.get(a) is not None
                ]
                if disallowedAttrs:
                    modelXbrl.error(
                        "ix:inlineElementAttributes",
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"
                          ),
                        modelObject=elt,
                        element=elt.elementQname,
                        attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(
                    "ix:attributeNotExpected",
                    _("Attribute %(attribute)s is not expected on element element ix:%(element)s"
                      ),
                    modelObject=elt,
                    attribute=attrTag,
                    element=elt.localName)

    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^'):
                    nameFilter = ('*', )
                else:
                    nameFilter = names
                relations = {
                    "ancestor": XmlUtil.ancestor,
                    "parent": XmlUtil.parent,
                    "child-choice": XmlUtil.children,
                    "child-sequence": XmlUtil.children,
                    "descendant": XmlUtil.descendants
                }[rel](elt, '*' if nameFilter == ('*', ) else elt.namespaceURI,
                       nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                issue = ''
                if reqt == '^':
                    if not any(r.localName in names
                               and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [
                        str(r.elementQname) for r in relations
                        if not (r.tag in names or
                                (r.localName in names
                                 and r.namespaceURI == elt.namespaceURI))
                    ]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(
                                    r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(
                        len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have more than 1 but none present "
                if ((reqt == '+' and not relations)
                        or (reqt == '-' and relations) or (issue)):
                    code = "ix:" + {
                        'ancestor': "ancestorNode",
                        'parent': "parentNode",
                        'child-choice': "childNodes",
                        'child-sequence': "childNodes",
                        'descendant': "descendantNodes"
                    }[rel] + {
                        '+': "Required",
                        '-': "Disallowed",
                        '&': "Allowed",
                        '^': "Specified"
                    }.get(reqt, "Specified")
                    msg = _("Inline XBRL 1.0 ix:{0} {1} {2} {3} {4} element"
                            ).format(
                                elt.localName, {
                                    '+': "must",
                                    '-': "may not",
                                    '&': "may only",
                                    '?': "may",
                                    '+': "must"
                                }[reqt], {
                                    'ancestor': "be nested in",
                                    'parent': "have parent",
                                    'child-choice': "have child",
                                    'child-sequence': "have child",
                                    'descendant': "have as descendant"
                                }[rel], ', '.join(
                                    str(r.elementQname)
                                    for r in relations) if names == ('*', )
                                and relations else ", ".join("ix:" + n
                                                             for n in names),
                                issue)
                    modelXbrl.error(code,
                                    msg,
                                    modelObject=[elt] + relations,
                                    requirement=reqt)

    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in (
                    'version',  # used in inline test cases but not valid xhtml
                    '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'
            ):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[
                            fromChild.namespaceURI]:
                        modelXbrl.error(
                            "ix:elementNameInvalid",
                            _("Inline XBRL element name %(element)s is not valid"
                              ),
                            modelObject=fromChild,
                            element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[
                                fromChild.namespaceURI].get(
                                    fromChild.localName, []):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(
                                    "ix:attributeRequired",
                                    _("Attribute %(attribute)s required on element ix:%(element)s"
                                      ),
                                    modelObject=elt,
                                    attribute=attrTag,
                                    element=fromChild.localName)
                if not (fromChild.localName in {"references", "resources"}
                        and isIxNs):
                    if fromChild.localName in {
                            "footnote", "nonNumeric", "continuation"
                    } and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag,
                                           attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail

    # copy xhtml elements to fresh tree
    with open(
            os.path.join(modelXbrl.modelManager.cntlr.configDir,
                         "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate(ixToXhtml(elt)):
            modelXbrl.error("ix:DTDelementUnexpected",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
    except XMLSyntaxError as err:
        modelXbrl.error("ix:DTDerror",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Exemplo n.º 27
0
        'definition':
        template('PCDATA_OPERATOR_DEFINITION').render({
            'class': element,
            'type': 'int'
        })
    }

if __name__ == '__main__':
    import argparse

    cmdline = argparse.ArgumentParser()
    cmdline.add_argument("dtd")
    cmdline.add_argument("hxx")
    cmdline.add_argument("cxx")
    args = cmdline.parse_args()
    dtd = DTD(args.dtd)
    metadata = {
        'dtd':
        dtd,
        'enumerations':
        enumerations,
        'extra_methods':
        methods,
        'enum_classes':
        sorted([(v['name'], k) for k, v in enumerations.items()
                if not v in [e.name for e in dtd.iterelements()]]),
        'forwards_for': {
            'ornament': ['ornament_type'],
            'score': ['score_data', 'score_header']
        }
    }
Exemplo n.º 28
0
 def read(self, file):
     dtd = DTD(file)
     for entity in dtd.entities():
         unit = Unit(entity.name, entity.content)
         self.units.append(unit)
Exemplo n.º 29
0
class LogNormalizer():
    """Basic normalization flow manager.
    Normalizers definitions are loaded from a path and checked against the DTD.
    If the definitions are syntactically correct, the normalizers are
    instantiated and populate the manager's cache.
    Normalization priormority is established as follows:
    
    * Maximum priority assigned to normalizers where the "appliedTo" tag is set
      to "raw". They MUST be mutually exclusive.
    * Medium priority assigned to normalizers where the "appliedTo" tag is set
      to "body".
    * Lowest priority assigned to any remaining normalizers.
    
    Some extra treatment is also done prior and after the log normalization:
    
    * Assignment of a unique ID, under the tag "uuid"
    * Conversion of date tags to UTC, if the "_timezone" was set prior to
      the normalization process."""
    
    def __init__(self, normalizers_paths, active_normalizers = {}):
        """
        Instantiates a flow manager. The default behavior is to activate every
        available normalizer.
        
        @param normalizers_paths: a list of absolute paths to the normalizer
        XML definitions to use or a just a single path as str.
        @param active_normalizers: a dictionary of active normalizers
        in the form {name: [True|False]}.
        """
        if not isinstance(normalizers_paths, list or tuple):
            normalizers_paths = [normalizers_paths,]
        self.normalizers_paths = normalizers_paths
        self.active_normalizers = active_normalizers
        # Walk through paths for normalizer.dtd and common_tagTypes.xml
        for norm_path in self.normalizers_paths:
            dtd = os.path.join(norm_path, 'normalizer.dtd')
            ctt = os.path.join(norm_path, 'common_tagTypes.xml')
            if os.path.isfile(dtd):
                self.dtd = DTD(open(dtd))
            if os.path.isfile(ctt):
                self.ctt = ctt
        self._cache = []
        self.reload()
        
    def reload(self):
        """Refreshes this instance's normalizers pool."""
        self.normalizers = { 'raw' : [], 'body' : [] }
        for path in self.iter_normalizer():
            norm = parse(open(path))
            if not self.dtd.validate(norm):
                warnings.warn('Skipping %s : invalid DTD' % path)
                print 'invalid normalizer ', path
            else:
                normalizer = Normalizer(norm, self.ctt)
                normalizer.uuid = self._compute_norm_uuid(normalizer)
                self.normalizers.setdefault(normalizer.appliedTo, [])
                self.normalizers[normalizer.appliedTo].append(normalizer)
        self.activate_normalizers()

    def _compute_norm_uuid(self, normalizer):
        return "%s-%s" % (normalizer.name, normalizer.version)

    def iter_normalizer(self):
        """ Iterates through normalizers and returns the normalizers' paths.
        
        @return: a generator of absolute paths.
        """
        for path in self.normalizers_paths:
            for root, dirs, files in os.walk(path):
                for name in files:
                    if not name.startswith('common_tagTypes') and \
                           name.endswith('.xml'):
                        yield os.path.join(root, name)

    def __len__(self):
        """ Returns the amount of available normalizers.
        """
        return len([n for n in self.iter_normalizer()])

    def update_normalizer(self, raw_xml_contents, name = None, dir_path = None ):
        """used to add or update a normalizer.
        @param raw_xml_contents: XML description of normalizer as flat XML. It
        must comply to the DTD.
        @param name: if set, the XML description will be saved as name.xml.
        If left blank, name will be fetched from the XML description.
        @param dir_path: the path to the directory where to copy the given
        normalizer.
        """
        path = self.normalizers_paths[0]
        if dir_path:
            if dir_path in self.normalizers_paths:
                path = dir_path
        xmlconf = XMLfromstring(raw_xml_contents).getroottree()
        if not self.dtd.validate(xmlconf):
            raise ValueError, "This definition file does not follow the normalizers DTD :\n\n%s" % \
                               self.dtd.error_log.filter_from_errors()
        if not name:
            name = xmlconf.getroot().get('name')
        if not name.endswith('.xml'):
            name += '.xml'
        xmlconf.write(open(os.path.join(path, name), 'w'),
                      encoding = 'utf8',
                      method = 'xml',
                      pretty_print = True)
        self.reload()

    def get_normalizer_by_uuid(self, uuid):
        """Returns normalizer by uuid."""
        try:
            norm = [ u for u in sum(self.normalizers.values(), []) if u.uuid == uuid][0]
            return norm
        except:
            raise ValueError, "Normalizer uuid : %s not found" % uuid
        
    def get_normalizer_source(self, uuid):
        """Returns the raw XML source of normalizer uuid."""
        return self.get_normalizer_by_uuid(uuid).get_source()
    
    def get_normalizer_path(self, uuid):
        """Returns the filesystem path of a normalizer."""
        return self.get_normalizer_by_uuid(uuid).sys_path

    
    def activate_normalizers(self):
        """Activates normalizers according to what was set by calling
        set_active_normalizers. If no call to the latter function has been
        made so far, this method activates every normalizer."""
        if not self.active_normalizers:
            self.active_normalizers = dict([ (n.uuid, True) for n in \
                        sum([ v for v in self.normalizers.values()], []) ])
        # fool-proof the list
        self.set_active_normalizers(self.active_normalizers)
        # build an ordered cache to speed things up
        self._cache = []
        # First normalizers to apply are the "raw" ones.
        for norm in self.normalizers['raw']:
            # consider the normalizer to be inactive if not
            # explicitly in our list
            if self.active_normalizers.get(norm.uuid, False):
                self._cache.append(norm)
        # Then, apply the applicative normalization on "body":
        for norm in self.normalizers['body']:
            if self.active_normalizers.get(norm.uuid, False):
                self._cache.append(norm)
        # Then, apply everything else
        for norm in sum([ self.normalizers[u] for u in self.normalizers 
                                           if u not in ['raw', 'body']], []):
            self._cache.append(norm)

    def get_active_normalizers(self):
        """Returns a dictionary of normalizers; keys are normalizers' uuid and
        values are True|False according to the normalizer's activation state."""
        return self.active_normalizers

    def set_active_normalizers(self, norms = {}):
        """Sets the active/inactive normalizers. Default behavior is to
        deactivate every normalizer.
        
        @param norms: a dictionary, similar to the one returned by
        get_active_normalizers."""
        default = dict([ (n.uuid, False) for n in \
                            sum([ v for v in self.normalizers.values()], []) ])
        default.update(norms)
        self.active_normalizers = default
        
    def lognormalize(self, data):
        """ This method is the entry point to normalize data (a log).

        data is passed through every activated normalizer
        and extra tagging occurs accordingly.
        
        data receives also an extra uuid tag.

        @param data: must be a dictionary with at least a key 'raw' or 'body'
                     with BaseString values (preferably Unicode).
        
        Here an example :
        >>> from logsparser import lognormalizer
        >>> from pprint import pprint
        >>> ln = lognormalizer.LogNormalizer('/usr/local/share/normalizers/')
        >>> mylog = {'raw' : 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)'}
        >>> ln.lognormalize(mylog)
        >>> pprint mylog
        {'body': '(root) CMD (/srv/git/redmine-changesets.sh)',
        'date': datetime.datetime(2011, 7, 18, 15, 35, 1),
        'pid': '14338',
        'program': '/USR/SBIN/CRON',
        'raw': 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)',
        'source': 'zoo',
        'uuid': 70851882840934161193887647073096992594L}
        """
        data = self.uuidify(data)
        data = self.normalize(data)

    
    # some more functions for clarity
    def uuidify(self, log):
        """Adds a unique UID to the normalized log."""
        log["uuid"] = _UUID_.uuid4().int
        return log
        
    def normalize(self, log):
        """plain normalization."""
        for norm in self._cache:
            log = norm.normalize(log)
        return log

    def _normalize(self, log):
        """Used for testing only, the normalizers' tags prerequisite are
        deactivated."""
        for norm in self._cache:
            log = norm.normalize(log, do_not_check_prereq = True)
        return log
Exemplo n.º 30
0
def validateXbrlFinally(val, *args, **kwargs):
    if not (val.validateEFMHTMplugin):
        return

    modelXbrl = val.modelXbrl
    allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
    efmHtmDTD = None
    with open(
            os.path.join(os.path.dirname(__file__), "resources",
                         "efm-htm.dtd")) as fh:
        efmHtmDTD = DTD(fh)
    if efmHtmDTD and not efmHtmDTD.validate(
            modelXbrl.modelDocument.xmlRootElement.getroottree()):
        for e in efmHtmDTD.error_log.filter_from_errors():
            if "declared in the external subset contains white spaces nodes" not in e.message:
                modelXbrl.error("html.syntax",
                                _("HTML error %(error)s"),
                                error=e.message)
    for elt in modelXbrl.modelDocument.xmlRootElement.iter():
        eltTag = elt.tag
        if isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
            continue  # comment or other non-parsed element
        for attrTag, attrValue in elt.items():
            if ((attrTag == "href" and eltTag == "a")
                    or (attrTag == "src" and eltTag == "img")):
                if "javascript:" in attrValue:
                    modelXbrl.error(
                        "EFM.5.02.02.10.activeContent",
                        _("Element has javascript in '%(attribute)s' for <%(element)s>"
                          ),
                        modelObject=elt,
                        attribute=attrTag,
                        element=eltTag)
                elif eltTag == "a" and (
                        not allowedExternalHrefPattern
                        or allowedExternalHrefPattern.match(attrValue)):
                    pass
                elif scheme(attrValue) in ("http", "https", "ftp"):
                    modelXbrl.error(
                        "EFM.6.05.16.externalReference",
                        _("Element has an invalid external reference in '%(attribute)s' for <%(element)s>"
                          ),
                        modelObject=elt,
                        attribute=attrTag,
                        element=eltTag)
                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                    if scheme(attrValue) == "data":
                        modelXbrl.error(
                            "EFM.5.02.02.10.graphicDataUrl",
                            _("Element references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"
                              ),
                            modelObject=elt,
                            attribute=attrValue[:32],
                            element=eltTag)
                    elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                        modelXbrl.error(
                            "EFM.5.02.02.10.graphicFileType",
                            _("Element references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"
                              ),
                            modelObject=elt,
                            attribute=attrValue,
                            element=eltTag)
        if eltTag == "table" and any(a is not None
                                     for a in elt.iterancestors("table")):
            modelXbrl.error("EFM.5.02.02.10.nestedTable",
                            _("Element is a disallowed nested <table>."),
                            modelObject=elt)
Exemplo n.º 31
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    
    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI, EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                    "fraction", "nonFraction", "nonNumeric", "references", "relationship", "tuple"}:                
                    modelXbrl.error(ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"),
                        modelObject=elt, element=str(elt.elementQname), name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(ixMsgCode("inlineAttributeMisplaced", elt, name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"),
                        modelObject=elt, name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"),
                        modelObject=elt, tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl, elt, attrTag, baseXsdType, attrValue, facets=facets)
                
                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({"scheme", "periodType",     "balance", "contextRef", "unitRef", "precision", "decimals"} -
                                        {"fraction": {"contextRef", "unitRef"},
                                         "nonFraction": {"contextRef", "unitRef", "decimals", "precision"},
                                         "nonNumeric": {"contextRef"}}.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(ixMsgCode("inlineElementAttributes",elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"),
                        modelObject=elt, element=elt.elementQname, attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(ixMsgCode("attributeNotExpected",elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"),
                    modelObject=elt, attribute=attrTag, element=elt.localName)
                
    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^'):
                    nameFilter = ('*',)
                else:
                    nameFilter = names
                if nameFilter == ('*',):
                    namespaceFilter = namespacePrefix = '*'
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {"ancestor": XmlUtil.ancestor, 
                             "parent": XmlUtil.parent, 
                             "child-choice": XmlUtil.children, 
                             "child-sequence": XmlUtil.children,
                             "child-or-text": XmlUtil.children,
                             "descendant": XmlUtil.descendants}[rel](
                            elt, 
                            namespaceFilter,
                            nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt, ixExclude=True, ixEscape=False, ixContinuation=False)
                issue = ''
                if reqt == '^':
                    if not any(r.localName in names and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('&', '^'):
                    disallowed = [str(r.elementQname)
                                  for r in relations
                                  if not (r.tag in names or
                                          (r.localName in names and r.namespaceURI == elt.namespaceURI))]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                if ((reqt == '+' and not relations) or
                    (reqt == '-' and relations) or
                    (issue)):
                    code = "{}:{}".format(ixSect[elt.namespaceURI].get(elt.localName,"other")["constraint"], {
                           'ancestor': "ancestorNode",
                           'parent': "parentNode",
                           'child-choice': "childNodes",
                           'child-sequence': "childNodes",
                           'child-or-text': "childNodesOrText",
                           'descendant': "descendantNodes"}[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified"}.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element").format(
                                elt.localName,
                                {'+': "must", '-': "may not", '&': "may only",
                                 '?': "may", '+': "must"}[reqt],
                                {'ancestor': "be nested in",
                                 'parent': "have parent",
                                 'child-choice': "have child",
                                 'child-sequence': "have child",
                                 'child-or-text': "have child or text,",
                                 'descendant': "have as descendant"}[rel],
                                '' if rel == 'child-or-text' else
                                ', '.join(str(r.elementQname) for r in relations)
                                if names == ('*',) and relations else
                                ", ".join("{}:{}".format(namespacePrefix, n) for n in names),
                                issue)
                    modelXbrl.error(code, msg, 
                                    modelObject=[elt] + relations, requirement=reqt,
                                    messageCodes=("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                                                  "ix{ver.sect}:childNodesOrTextRequired",
                                                  "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                                                  "ix{ver.sect}:descendantNodesDisallowed",
                                                  "ix{ver.sect}:parentNodeRequired"))
                
    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in ('version', # used in inline test cases but not valid xhtml
                               '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[fromChild.namespaceURI]:
                        modelXbrl.error(ixMsgCode("elementNameInvalid",ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"),
                            modelObject=fromChild, element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[fromChild.namespaceURI].get(fromChild.localName,[]):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"),
                                    modelObject=elt, attribute=attrTag, element=fromChild.localName)
                if excludeSubtree or (fromChild.localName in {"references", "resources"} and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {"footnote", "nonNumeric", "continuation"} and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag, attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail    
                            
    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir, "xhtml1-strict-ix.dtd")) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate( ixToXhtml(elt) ):
            modelXbrl.error("html:syntaxError",
                _("%(element)s error %(error)s"),
                modelObject=elt, element=elt.localName.title(),
                error=', '.join(e.message for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl, elt, elt, "InlineXBRL", "EFM.5.02.05.", isInline=True) 
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
            _("%(element)s error %(error)s"),
            modelObject=elt, element=elt.localName.title(), error=dtd.error_log.filter_from_errors())
Exemplo n.º 32
0
 def _validate_dtd_name(self, identifier:str):
     dtd = "<!ELEMENT S EMPTY><!ATTLIST S id ID #REQUIRED>"
     dtd_file = StringIO(dtd)
     dtd_validator = DTD(dtd_file)
     sample_xml_element = Element("S", id = identifier)
     return dtd_validator.validate(sample_xml_element)
Exemplo n.º 33
0
def xhtmlValidate(modelXbrl, elt):
    from lxml.etree import DTD, XMLSyntaxError
    from arelle import FunctionIxt
    ixNsStartTags = ["{" + ns + "}" for ns in XbrlConst.ixbrlAll]
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    # find ix version for messages
    _ixNS = elt.modelDocument.ixNS
    _xhtmlDTD = XHTML_DTD[_ixNS]
    _customTransforms = modelXbrl.modelManager.customTransforms or {}

    def checkAttribute(elt, isIxElt, attrTag, attrValue):
        ixEltAttrDefs = ixAttrDefined.get(elt.namespaceURI,
                                          EMPTYDICT).get(elt.localName, ())
        if attrTag.startswith("{"):
            ns, sep, localName = attrTag[1:].partition("}")
        else:
            ns = None
            localName = attrTag
        if ns is not None and ns not in XbrlConst.ixbrlAll and attrTag not in ixEltAttrDefs:
            if ns == XbrlConst.xsi:
                pass  # xsi attributes are always allowed
            elif isIxElt:
                allowedNs = allowedNonIxAttrNS.get(elt.localName, None)
                if allowedNs != "##other" and ns != allowedNs:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeNotExpected", elt),
                        _("Inline XBRL element %(element)s has qualified attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
                if ns == XbrlConst.xbrli and elt.localName in {
                        "fraction", "nonFraction", "nonNumeric", "references",
                        "relationship", "tuple"
                }:
                    modelXbrl.error(
                        ixMsgCode("qualifiedAttributeDisallowed", elt),
                        _("Inline XBRL element %(element)s has disallowed attribute %(name)s"
                          ),
                        modelObject=elt,
                        element=str(elt.elementQname),
                        name=attrTag)
            else:
                if ns in XbrlConst.ixbrlAll:
                    modelXbrl.error(
                        ixMsgCode("inlineAttributeMisplaced",
                                  elt,
                                  name="other"),
                        _("Inline XBRL attributes are not allowed on html elements: ix:%(name)s"
                          ),
                        modelObject=elt,
                        name=localName)
                elif ns not in {XbrlConst.xml, XbrlConst.xsi, XbrlConst.xhtml}:
                    modelXbrl.error(
                        ixMsgCode("extensionAttributeMisplaced", ns=_ixNS),
                        _("Extension attributes are not allowed on html elements: %(tag)s"
                          ),
                        modelObject=elt,
                        tag=attrTag)
        elif isIxElt:
            try:
                _xsdType = ixAttrType[elt.namespaceURI][localName]
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

                if not (attrTag in ixEltAttrDefs or
                        (localName in ixEltAttrDefs and
                         (not ns or ns in XbrlConst.ixbrlAll))):
                    raise KeyError
                disallowedXbrliAttrs = ({
                    "scheme", "periodType", "balance", "contextRef", "unitRef",
                    "precision", "decimals"
                } - {
                    "fraction": {"contextRef", "unitRef"},
                    "nonFraction":
                    {"contextRef", "unitRef", "decimals", "precision"},
                    "nonNumeric": {"contextRef"}
                }.get(elt.localName, set()))
                disallowedAttrs = set(a for a in disallowedXbrliAttrs
                                      if elt.get(a) is not None)
                if disallowedAttrs:
                    modelXbrl.error(
                        ixMsgCode("inlineElementAttributes", elt),
                        _("Inline XBRL element %(element)s has disallowed attributes %(attributes)s"
                          ),
                        modelObject=elt,
                        element=elt.elementQname,
                        attributes=", ".join(disallowedAttrs))
            except KeyError:
                modelXbrl.error(
                    ixMsgCode("attributeNotExpected", elt),
                    _("Attribute %(attribute)s is not expected on element ix:%(element)s"
                      ),
                    modelObject=elt,
                    attribute=attrTag,
                    element=elt.localName)
        elif ns is None:
            _xsdType = htmlAttrType.get(localName)
            if _xsdType is not None:
                if isinstance(_xsdType, dict):
                    baseXsdType = _xsdType["type"]
                    facets = _xsdType
                else:
                    baseXsdType = _xsdType
                    facets = None
                XmlValidate.validateValue(modelXbrl,
                                          elt,
                                          attrTag,
                                          baseXsdType,
                                          attrValue,
                                          facets=facets)

    def checkHierarchyConstraints(elt):
        constraints = ixHierarchyConstraints.get(elt.localName)
        if constraints:
            for _rel, names in constraints:
                reqt = _rel[0]
                rel = _rel[1:]
                if reqt in ('&', '^', '1'):
                    nameFilter = ('*', )
                else:
                    nameFilter = names
                if nameFilter == ('*', ):
                    namespaceFilter = namespacePrefix = '*'
                elif len(nameFilter) == 1 and "}" in nameFilter[
                        0] and nameFilter[0][0] == "{":
                    namespaceFilter, _sep, nameFilter = nameFilter[0][
                        1:].partition("}")
                    namespacePrefix = XmlUtil.xmlnsprefix(elt, namespaceFilter)
                else:
                    namespaceFilter = elt.namespaceURI
                    namespacePrefix = elt.prefix
                relations = {
                    "ancestor": XmlUtil.ancestor,
                    "parent": XmlUtil.parent,
                    "child-choice": XmlUtil.children,
                    "child-sequence": XmlUtil.children,
                    "child-or-text": XmlUtil.children,
                    "descendant": XmlUtil.descendants
                }[rel](elt, namespaceFilter, nameFilter)
                if rel in ("ancestor", "parent"):
                    if relations is None: relations = []
                    else: relations = [relations]
                if rel == "child-or-text":
                    relations += XmlUtil.innerTextNodes(elt,
                                                        ixExclude=True,
                                                        ixEscape=False,
                                                        ixContinuation=False)
                issue = ''
                if reqt in ('^', ):
                    if not any(r.localName in names
                               and r.namespaceURI == elt.namespaceURI
                               for r in relations):
                        issue = " and is missing one of " + ', '.join(names)
                if reqt in ('1', ) and not elt.isNil:
                    if sum(r.localName in names
                           and r.namespaceURI == elt.namespaceURI
                           for r in relations) != 1:
                        issue = " and must have exactly one of " + ', '.join(
                            names)
                if reqt in ('&', '^'):
                    disallowed = [
                        str(r.elementQname) for r in relations
                        if not (r.tag in names or
                                (r.localName in names
                                 and r.namespaceURI == elt.namespaceURI))
                    ]
                    if disallowed:
                        issue += " and may not have " + ", ".join(disallowed)
                    elif rel == "child-sequence":
                        sequencePosition = 0
                        for i, r in enumerate(relations):
                            rPos = names.index(str(r.localName))
                            if rPos < sequencePosition:
                                issue += " and is out of sequence: " + str(
                                    r.elementQname)
                            else:
                                sequencePosition = rPos
                if reqt == '?' and len(relations) > 1:
                    issue = " may only have 0 or 1 but {0} present ".format(
                        len(relations))
                if reqt == '+' and len(relations) == 0:
                    issue = " must have at least 1 but none present "
                disallowedChildText = bool(
                    reqt == '&' and rel in ("child-sequence", "child-choice")
                    and elt.textValue.strip())
                if ((reqt == '+' and not relations)
                        or (reqt == '-' and relations) or (issue)
                        or disallowedChildText):
                    code = "{}:{}".format(
                        ixSect[elt.namespaceURI].get(elt.localName,
                                                     "other")["constraint"],
                        {
                            'ancestor': "ancestorNode",
                            'parent': "parentNode",
                            'child-choice': "childNodes",
                            'child-sequence': "childNodes",
                            'child-or-text': "childNodesOrText",
                            'descendant': "descendantNodes"
                        }[rel] + {
                            '+': "Required",
                            '-': "Disallowed",
                            '&': "Allowed",
                            '^': "Specified",
                            '1': "Specified"
                        }.get(reqt, "Specified"))
                    msg = _("Inline XBRL ix:{0} {1} {2} {3} {4} element{5}"
                            ).format(
                                elt.localName, {
                                    '+': "must",
                                    '-': "may not",
                                    '&': "may only",
                                    '?': "may",
                                    '+': "must",
                                    '^': "must",
                                    '1': "must"
                                }[reqt], {
                                    'ancestor': "be nested in",
                                    'parent': "have parent",
                                    'child-choice': "have child",
                                    'child-sequence': "have child",
                                    'child-or-text': "have child or text,",
                                    'descendant': "have as descendant"
                                }[rel],
                                '' if rel == 'child-or-text' else ', '.join(
                                    str(r.elementQname) for r in relations) if
                                names == ('*', ) and relations else ", ".join(
                                    "{}:{}".format(namespacePrefix, n)
                                    for n in names), issue,
                                " and no child text (\"{}\")".format(
                                    elt.textValue.strip()[:32])
                                if disallowedChildText else "")
                    modelXbrl.error(
                        code,
                        msg,
                        modelObject=[elt] + relations,
                        requirement=reqt,
                        messageCodes=
                        ("ix{ver.sect}:ancestorNode{Required|Disallowed}",
                         "ix{ver.sect}:childNodesOrTextRequired",
                         "ix{ver.sect}:childNodes{Required|Disallowed|Allowed}",
                         "ix{ver.sect}:descendantNodesDisallowed",
                         "ix{ver.sect}:parentNodeRequired"))
        # other static element checks (that don't require a complete object model, context, units, etc
        if elt.localName == "nonFraction":
            childElts = XmlUtil.children(elt, '*', '*')
            hasText = (elt.text or "") or any(
                (childElt.tail or "") for childElt in childElts)
            if elt.isNil:
                ancestorNonFractions = XmlUtil.ancestors(
                    elt, _ixNS, elt.localName)
                if ancestorNonFractions:
                    modelXbrl.error(
                        ixMsgCode("nonFractionAncestors", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an ancestor ix:nonFraction"
                          ),
                        modelObject=[elt] + ancestorNonFractions,
                        fact=elt.qname)
                if childElts or hasText:
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a nil nonFraction and MUST not have an child elements or text"
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid(
                    )  # prevent further validation or cascading errors
            else:
                if ((childElts and
                     (len(childElts) != 1 or childElts[0].namespaceURI != _ixNS
                      or childElts[0].localName != "nonFraction"))
                        or (childElts and hasText)):
                    modelXbrl.error(
                        ixMsgCode("nonFractionTextAndElementChildren", elt),
                        _("Fact %(fact)s is a non-nil nonFraction and MUST have exactly one ix:nonFraction child element or text."
                          ),
                        modelObject=[elt] + childElts,
                        fact=elt.qname)
                    elt.setInvalid()
        if elt.localName == "fraction":
            if elt.isNil:
                ancestorFractions = XmlUtil.ancestors(elt, _ixNS,
                                                      elt.localName)
                if ancestorFractions:
                    modelXbrl.error(
                        ixMsgCode("fractionAncestors", elt),
                        _("Fact %(fact)s is a nil fraction and MUST not have an ancestor ix:fraction"
                          ),
                        modelObject=[elt] + ancestorFractions,
                        fact=elt.qname)
            else:
                nonFrChildren = [
                    e for e in XmlUtil.children(elt, _ixNS, '*')
                    if e.localName not in ("fraction", "numerator",
                                           "denominator")
                ]
                if nonFrChildren:
                    modelXbrl.error(
                        ixMsgCode("fractionElementChildren", elt),
                        _("Fact %(fact)s is a non-nil fraction and not have any child elements except ix:fraction, ix:numerator and ix:denominator: %(children)s"
                          ),
                        modelObject=[elt] + nonFrChildren,
                        fact=elt.qname,
                        children=", ".join(e.localName for e in nonFrChildren))
                for ancestorFraction in XmlUtil.ancestors(
                        elt, XbrlConst.ixbrl11, "fraction"):  # only ix 1.1
                    if normalizeSpace(elt.get("unitRef")) != normalizeSpace(
                            ancestorFraction.get("unitRef")):
                        modelXbrl.error(
                            ixMsgCode("fractionNestedUnitRef", elt),
                            _("Fact %(fact)s fraction and ancestor fractions must have matching unitRefs: %(unitRef)s, %(unitRef2)s"
                              ),
                            modelObject=[elt] + nonFrChildren,
                            fact=elt.qname,
                            unitRef=elt.get("unitRef"),
                            unitRef2=ancestorFraction.get("unitRef"))
        if elt.localName in ("nonFraction", "numerator", "denominator",
                             "nonNumeric"):
            fmt = elt.format
            if fmt:
                if fmt in _customTransforms:
                    pass
                elif fmt.namespaceURI not in FunctionIxt.ixtNamespaceFunctions:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation namespace %(namespace)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        namespace=fmt.namespaceURI)
                    elt.setInvalid()
                elif fmt.localName not in FunctionIxt.ixtNamespaceFunctions[
                        fmt.namespaceURI]:
                    modelXbrl.error(
                        ixMsgCode("invalidTransformation",
                                  elt,
                                  sect="validation"),
                        _("Fact %(fact)s has unrecognized transformation name %(name)s"
                          ),
                        modelObject=elt,
                        fact=elt.qname,
                        transform=fmt,
                        name=fmt.localName)
                    elt.setInvalid()

    def ixToXhtml(fromRoot):
        toRoot = etree.Element(fromRoot.localName)
        copyNonIxChildren(fromRoot, toRoot)
        for attrTag, attrValue in fromRoot.items():
            checkAttribute(fromRoot, False, attrTag, attrValue)
            if attrTag not in (
                    'version',  # used in inline test cases but not valid xhtml
                    '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation'
            ):
                toRoot.set(attrTag, attrValue)
        return toRoot

    def copyNonIxChildren(fromElt, toElt, excludeSubtree=False):
        for fromChild in fromElt.iterchildren():
            if isinstance(fromChild, ModelObject):
                isIxNs = fromChild.namespaceURI in XbrlConst.ixbrlAll
                if isIxNs:
                    if fromChild.localName not in ixElements[
                            fromChild.namespaceURI]:
                        modelXbrl.error(
                            ixMsgCode("elementNameInvalid", ns=_ixNS),
                            _("Inline XBRL element name %(element)s is not valid"
                              ),
                            modelObject=fromChild,
                            element=str(fromChild.elementQname))
                    else:
                        checkHierarchyConstraints(fromChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, True, attrTag, attrValue)
                        for attrTag in ixAttrRequired[
                                fromChild.namespaceURI].get(
                                    fromChild.localName, []):
                            if fromChild.get(attrTag) is None:
                                modelXbrl.error(
                                    ixMsgCode("attributeRequired", fromChild),
                                    _("Attribute %(attribute)s required on element ix:%(element)s"
                                      ),
                                    modelObject=fromChild,
                                    attribute=attrTag,
                                    element=fromChild.localName)
                if excludeSubtree or (fromChild.localName
                                      in {"references", "resources"}
                                      and isIxNs):
                    copyNonIxChildren(fromChild, toElt, excludeSubtree=True)
                else:
                    if fromChild.localName in {
                            "footnote", "nonNumeric", "continuation"
                    } and isIxNs:
                        toChild = etree.Element("ixNestedContent")
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail
                    elif isIxNs:
                        copyNonIxChildren(fromChild, toElt)
                    else:
                        toChild = etree.Element(fromChild.localName)
                        toElt.append(toChild)
                        copyNonIxChildren(fromChild, toChild)
                        for attrTag, attrValue in fromChild.items():
                            checkAttribute(fromChild, False, attrTag,
                                           attrValue)
                            toChild.set(attrTag, attrValue)
                        if fromChild.text is not None:
                            toChild.text = fromChild.text
                        if fromChild.tail is not None:
                            toChild.tail = fromChild.tail

    # copy xhtml elements to fresh tree
    with open(os.path.join(modelXbrl.modelManager.cntlr.configDir,
                           _xhtmlDTD)) as fh:
        dtd = DTD(fh)
    try:
        #with open("/users/hermf/temp/testDtd.htm", "w") as fh:
        #    fh.write(etree.tostring(ixToXhtml(elt), encoding=_STR_UNICODE, pretty_print=True))
        if not dtd.validate(ixToXhtml(elt)):
            modelXbrl.error("html:syntaxError",
                            _("%(element)s error %(error)s"),
                            modelObject=elt,
                            element=elt.localName.title(),
                            error=', '.join(
                                e.message
                                for e in dtd.error_log.filter_from_errors()))
        if isEFM:
            ValidateFilingText.validateHtmlContent(modelXbrl,
                                                   elt,
                                                   elt,
                                                   "InlineXBRL",
                                                   "EFM.5.02.05.",
                                                   isInline=True)
    except XMLSyntaxError as err:
        modelXbrl.error("html:syntaxError",
                        _("%(element)s error %(error)s"),
                        modelObject=elt,
                        element=elt.localName.title(),
                        error=dtd.error_log.filter_from_errors())
Exemplo n.º 34
0
 def test_00_validate_fake_syslog(self):
     """Validate the fake normalizer"""
     dtd = DTD(open(os.path.join(self.normalizer_path, 'normalizer.dtd')))
     self.assertTrue(dtd.validate(self.n))
Exemplo n.º 35
0
	def read(self, file):
		dtd = DTD(file)
		for entity in dtd.entities():
			unit = Unit(entity.name, entity.content)
			self.units.append(unit)
Exemplo n.º 36
0
class LogNormalizer:
    """Basic normalization flow manager.
    Normalizers definitions are loaded from a path and checked against the DTD.
    If the definitions are syntactically correct, the normalizers are
    instantiated and populate the manager's cache.
    Normalization priormority is established as follows:
    
    * Maximum priority assigned to normalizers where the "appliedTo" tag is set
      to "raw". They MUST be mutually exclusive.
    * Medium priority assigned to normalizers where the "appliedTo" tag is set
      to "body".
    * Lowest priority assigned to any remaining normalizers.
    
    Some extra treatment is also done prior and after the log normalization:
    
    * Assignment of a unique ID, under the tag "uuid"
    * Conversion of date tags to UTC, if the "_timezone" was set prior to
      the normalization process."""

    def __init__(self, normalizers_path, active_normalizers={}):
        """
        Instantiates a flow manager. The default behavior is to activate every
        available normalizer.
        
        @param normalizer_path: absolute path to the normalizer XML definitions
        to use.
        @param active_normalizers: a dictionary of active normalizers
        in the form {name: [True|False]}.
        """
        self.normalizers_path = normalizers_path
        self.active_normalizers = active_normalizers
        self.dtd = DTD(open(os.path.join(self.normalizers_path, "normalizer.dtd")))
        self._cache = []
        self.reload()

    def reload(self):
        """Refreshes this instance's normalizers pool."""
        self.normalizers = {"raw": [], "body": []}
        for path in self.iter_normalizer():
            norm = parse(open(path))
            if not self.dtd.validate(norm):
                warnings.warn("Skipping %s : invalid DTD" % path)
            else:
                normalizer = Normalizer(norm, os.path.join(self.normalizers_path, "common_tagTypes.xml"))
                self.normalizers.setdefault(normalizer.appliedTo, [])
                self.normalizers[normalizer.appliedTo].append(normalizer)
        self.activate_normalizers()

    def iter_normalizer(self):
        """ Iterates through normalizers and returns the normalizers' paths.
        
        @return: a generator of absolute paths.
        """
        path = self.normalizers_path
        for root, dirs, files in os.walk(path):
            for name in files:
                if not name.startswith("common_tagTypes") and name.endswith(".xml"):
                    yield os.path.join(root, name)

    def __len__(self):
        """ Returns the amount of available normalizers.
        """
        return len([n for n in self.iter_normalizer()])

    def update_normalizer(self, raw_xml_contents, name=None):
        """used to add or update a normalizer.
        @param raw_xml_contents: XML description of normalizer as flat XML. It
        must comply to the DTD.
        @param name: if set, the XML description will be saved as name.xml.
        If left blank, name will be fetched from the XML description.
        """
        xmlconf = XMLfromstring(raw_xml_contents).getroottree()
        if not self.dtd.validate(xmlconf):
            raise ValueError, "This definition file does not follow the normalizers DTD :\n\n%s" % self.dtd.error_log.filter_from_errors()
        if not name:
            name = xmlconf.getroot().get("name")
        if not name.endswith(".xml"):
            name += ".xml"
        path = self.normalizers_path
        xmlconf.write(open(os.path.join(path, name), "w"), encoding="utf8", method="xml", pretty_print=True)
        self.reload()

    def get_normalizer_source(self, name):
        """Returns the raw XML source of normalizer name."""
        try:
            norm = [u for u in sum(self.normalizers.values(), []) if u.name == name][0]
            return norm.get_source()
        except:
            raise ValueError, "Normalizer %s not found" % name

    def activate_normalizers(self):
        """Activates normalizers according to what was set by calling
        set_active_normalizers. If no call to the latter function has been
        made so far, this method activates every normalizer."""
        if not self.active_normalizers:
            self.active_normalizers = dict([(n.name, True) for n in sum([v for v in self.normalizers.values()], [])])
        # fool-proof the list
        self.set_active_normalizers(self.active_normalizers)
        # build an ordered cache to speed things up
        self._cache = []
        # First normalizers to apply are the "raw" ones.
        for norm in self.normalizers["raw"]:
            # consider the normalizer to be inactive if not
            # explicitly in our list
            if self.active_normalizers.get(norm.name, False):
                self._cache.append(norm)
        # Then, apply the applicative normalization on "body":
        for norm in self.normalizers["body"]:
            if self.active_normalizers.get(norm.name, False):
                self._cache.append(norm)
        # Then, apply everything else
        for norm in sum([self.normalizers[u] for u in self.normalizers if u not in ["raw", "body"]], []):
            self._cache.append(norm)

    def get_active_normalizers(self):
        """Returns a dictionary of normalizers; keys are normalizers' names and
        values are True|False according to the normalizer's activation state."""
        return self.active_normalizers

    def set_active_normalizers(self, norms={}):
        """Sets the active/inactive normalizers. Default behavior is to
        deactivate every normalizer.
        
        @param norms: a dictionary, similar to the one returned by
        get_active_normalizers."""
        default = dict([(n.name, False) for n in sum([v for v in self.normalizers.values()], [])])
        default.update(norms)
        self.active_normalizers = default

    def lognormalize(self, data):
        """ This method is the entry point to normalize data (a log).

        data is passed through every activated normalizer
        and extra tagging occurs accordingly.
        
        data receives also an extra uuid tag.
        
        If data contains a key called _timezone, its value is used to convert
        any date into UTC. This value must be a valid timezone name; see
        the pytz module for more information.

        @param data: must be a dictionary with at least a key 'raw' or 'body'
                     with BaseString values (preferably Unicode).
        
        Here an example :
        >>> from logsparser import lognormalizer
        >>> from pprint import pprint
        >>> ln = lognormalizer.LogNormalizer('/usr/local/share/normalizers/')
        >>> mylog = {'raw' : 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)'}
        >>> ln.lognormalize(mylog)
        >>> pprint mylog
        {'body': '(root) CMD (/srv/git/redmine-changesets.sh)',
        'date': datetime.datetime(2011, 7, 18, 15, 35, 1),
        'pid': '14338',
        'program': '/USR/SBIN/CRON',
        'raw': 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)',
        'source': 'zoo',
        'uuid': 70851882840934161193887647073096992594L}
        """
        data = self.uuidify(data)
        data = self.normalize(data)
        # convert date to UTC
        if "_timezone" in data.keys():
            try:
                timezone = pytz.timezone(data["_timezone"])
                loc_date = timezone.localize(data["date"])
                data["date"] = loc_date.astimezone(pytz.utc)
                # turn the date into a "naive" object
                data["date"] = data["date"].replace(tzinfo=None)
                del data["_timezone"]
            except:
                warnings.warn("Invalid timezone %s, skipping UTC conversion" % data["_timezone"])

    # some more functions for clarity
    def uuidify(self, log):
        """Adds a unique UID to the normalized log."""
        log["uuid"] = _UUID_.uuid4().int
        return log

    def normalize(self, log):
        """plain normalization."""
        for norm in self._cache:
            log = norm.normalize(log)
        return log

    def _normalize(self, log):
        """Used for testing only, the normalizers' tags prerequisite are
        deactivated."""
        for norm in self._cache:
            log = norm.normalize(log, do_not_check_prereq=True)
        return