예제 #1
0
    def resolve_all(self, document, base_url):
        if isinstance(document, list):
            iterator = enumerate(document)
        elif isinstance(document, dict):
            inc = 'include' in document
            if 'id' in document or 'import' in document or 'include' in document:
                document = self.resolve_ref(document, base_url)
            if inc:
                return document

            for d in self.url_fields:
                if d in document:
                    if isinstance(document[d], basestring):
                        document[d] = expand_url(document[d], base_url)
                    elif isinstance(document[d], list):
                        document[d] = [expand_url(url, base_url) if isinstance(url, basestring) else url for url in document[d] ]
            iterator = document.iteritems()
        else:
            return document

        for key, val in iterator:
            try:
                document[key] = self.resolve_all(val, base_url)
            except validate.ValidationException as v:
                if isinstance(key, basestring):
                    raise validate.ValidationException("Validation error in field %s:\n%s" % (key, validate.indent(str(v))))
                else:
                    raise validate.ValidationException("Validation error in position %i:\n%s" % (key, validate.indent(str(v))))

        return document
 def validate_link(self, field, link):
     if field in self.nolinkcheck:
         return True
     if isinstance(link, basestring):
         if field in self.vocab_fields:
             if link not in self.vocab and link not in self.idx and link not in self.rvocab:
                 if not self.check_file(link):
                     raise validate.ValidationException(
                         "Field `%s` contains undefined reference to `%s`" %
                         (field, link))
         elif link not in self.idx and link not in self.rvocab:
             if not self.check_file(link):
                 raise validate.ValidationException(
                     "Field `%s` contains undefined reference to `%s`" %
                     (field, link))
     elif isinstance(link, list):
         errors = []
         for i in link:
             try:
                 self.validate_link(field, i)
             except validate.ValidationException as v:
                 errors.append(v)
         if errors:
             raise validate.ValidationException("\n".join(
                 [str(e) for e in errors]))
     elif isinstance(link, dict):
         self.validate_links(link)
     return True
예제 #3
0
    def validate_links(self, document):
        if isinstance(document, list):
            iterator = enumerate(document)
        elif isinstance(document, dict):
            for d in self.url_fields:
                if d in document:
                    if isinstance(document[d], basestring):
                        if document[d] not in self.idx:
                            raise validate.ValidationException("Invalid link `%s` in field `%s`" % (document[d], d))
                    elif isinstance(document[d], list):
                        for i in document[d]:
                            if isinstance(i, basestring) and i not in self.idx:
                                raise validate.ValidationException("Invalid link `%s` in field `%s`" % (i, d))
            iterator = document.iteritems()
        else:
            return

        try:
            for key, val in iterator:
                self.validate_links(val)
        except validate.ValidationException as v:
            if isinstance(key, basestring):
                raise validate.ValidationException("At field `%s`\n%s" % (key, validate.indent(str(v))))
            else:
                raise validate.ValidationException("At position %s\n%s" % (key, validate.indent(str(v))))

        return
    def validate_links(self, document):
        docid = self.getid(document)
        if docid is None:
            docid = ""

        errors = []
        if isinstance(document, list):
            iterator = enumerate(document)
        elif isinstance(document, dict):
            try:
                for d in self.url_fields:
                    if d not in self.identity_links and d in document:
                        self.validate_link(d, document[d])
            except validate.ValidationException as v:
                errors.append(v)
            iterator = document.iteritems()
        else:
            return

        for key, val in iterator:
            try:
                self.validate_links(val)
            except validate.ValidationException as v:
                if key not in self.nolinkcheck:
                    docid = self.getid(val)
                    if docid:
                        errors.append(
                            validate.ValidationException(
                                "While checking object `%s`\n%s" %
                                (docid, validate.indent(str(v)))))
                    else:
                        if isinstance(key, basestring):
                            errors.append(
                                validate.ValidationException(
                                    "While checking field `%s`\n%s" %
                                    (key, validate.indent(str(v)))))
                        else:
                            errors.append(
                                validate.ValidationException(
                                    "While checking position %s\n%s" %
                                    (key, validate.indent(str(v)))))

        if errors:
            if len(errors) > 1:
                raise validate.ValidationException("\n".join(
                    [str(e) for e in errors]))
            else:
                raise errors[0]
        return
 def validate_requirements(self, tool, field):
     for r in tool.get(field, []):
         try:
             if self.names.get_name(r["class"], "") is None:
                 raise validate.ValidationException("Unknown requirement %s" % (r["class"]))
             validate.validate_ex(self.names.get_name(r["class"], ""), r)
             if "requirements" in r:
                 self.validate_requirements(r, "requirements")
             if "hints" in r:
                 self.validate_requirements(r, "hints")
         except validate.ValidationException as v:
             err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v)))
             if field == "hints":
                 _logger.warn(err)
             else:
                 raise validate.ValidationException(err)
def validate_doc(schema_names, validate_doc, loader, strict):
    has_root = False
    for r in schema_names.names.values():
        if r.get_prop("documentRoot"):
            has_root = True
            break

    if not has_root:
        raise validate.ValidationException(
            "No document roots defined in the schema")

    if isinstance(validate_doc, list):
        pass
    elif isinstance(validate_doc, dict):
        validate_doc = [validate_doc]
    else:
        raise validate.ValidationException("Document must be dict or list")

    anyerrors = []
    for pos, item in enumerate(validate_doc):
        errors = []
        success = False
        for r in schema_names.names.values():
            if r.get_prop("documentRoot"):
                try:
                    validate.validate_ex(
                        r,
                        item,
                        loader.identifiers,
                        strict,
                        foreign_properties=loader.foreign_properties)
                    success = True
                    break
                except validate.ValidationException as e:
                    errors.append("Could not validate as `%s` because\n%s" %
                                  (r.get_prop("name"),
                                   validate.indent(str(e), nolead=False)))
        if not success:
            objerr = "Validation error at position %i" % pos
            for ident in loader.identifiers:
                if ident in item:
                    objerr = "Validation error in object %s" % (item[ident])
                    break
            anyerrors.append("%s\n%s" %
                             (objerr, validate.indent("\n".join(errors))))
    if anyerrors:
        raise validate.ValidationException("\n".join(anyerrors))
예제 #7
0
    def resolve_ref(self, ref, base_url=None):
        base_url = base_url or 'file://%s/' % os.path.abspath('.')

        obj = None

        # If `ref` is a dict, look for special directives.
        if isinstance(ref, dict):
            obj = ref
            if "import" in ref:
                if len(obj) == 1:
                    ref = obj["import"]
                    obj = None
                else:
                    raise ValueError("'import' must be the only field in %s" % (str(obj)))
            elif "include" in obj:
                if len(obj) == 1:
                    ref = obj["include"]
                else:
                    raise ValueError("'include' must be the only field in %s" % (str(obj)))
            else:
                if "id" in obj:
                    ref = obj["id"]
                else:
                    raise ValueError("Object `%s` does not have `id` field" % obj)

        if not isinstance(ref, basestring):
            raise ValueError("Must be string: `%s`" % str(ref))

        url = expand_url(ref, base_url)

        # Has this reference been loaded already?
        if url in self.idx:
            return self.idx[url]

        # "include" directive means load raw text
        if obj and "include" in obj:
            return self.fetch_text(url)

        if obj:
            obj["id"] = url
            self.idx[url] = obj
        else:
            # Load structured document
            doc_url, frg = urlparse.urldefrag(url)
            if doc_url in self.idx:
                raise validate.ValidationException("Reference `#%s` not found in file `%s`." % (frg, doc_url))
            obj = self.fetch(doc_url)

        # Recursively expand urls and resolve directives
        self.resolve_all(obj, url)

        # Requested reference should be in the index now, otherwise it's a bad reference
        if self.idx.get(url) is not None:
            return self.idx[url]
        else:
            raise RuntimeError("Reference `%s` is not in the index.  Index contains:\n  %s" % (url, "\n  ".join(self.idx)))
    def add_context(self, newcontext, baseuri=""):
        if self.vocab:
            raise validate.ValidationException(
                "Refreshing context that already has stuff in it")

        self.url_fields = set()
        self.vocab_fields = set()
        self.identifiers = set()
        self.identity_links = set()
        self.standalone = set()
        self.nolinkcheck = set()
        self.vocab = {}
        self.rvocab = {}

        self.ctx.update(
            {k: v
             for k, v in newcontext.iteritems() if k != "@context"})

        _logger.debug("ctx is %s", self.ctx)

        for c in self.ctx:
            if self.ctx[c] == "@id":
                self.identifiers.add(c)
                self.identity_links.add(c)
            elif isinstance(self.ctx[c],
                            dict) and self.ctx[c].get("@type") == "@id":
                self.url_fields.add(c)
                if self.ctx[c].get("identity", False):
                    self.identity_links.add(c)
            elif isinstance(self.ctx[c],
                            dict) and self.ctx[c].get("@type") == "@vocab":
                self.url_fields.add(c)
                self.vocab_fields.add(c)

            if isinstance(self.ctx[c],
                          dict) and self.ctx[c].get("noLinkCheck"):
                self.nolinkcheck.add(c)

            if isinstance(self.ctx[c], dict) and "@id" in self.ctx[c]:
                self.vocab[c] = self.ctx[c]["@id"]
            elif isinstance(self.ctx[c], basestring):
                self.vocab[c] = self.ctx[c]

        for k, v in self.vocab.items():
            self.rvocab[self.expand_url(v, "", scoped=False)] = k

        _logger.debug("identifiers is %s", self.identifiers)
        _logger.debug("identity_links is %s", self.identity_links)
        _logger.debug("url_fields is %s", self.url_fields)
        _logger.debug("vocab_fields is %s", self.vocab_fields)
        _logger.debug("vocab is %s", self.vocab)
 def fetch(self, url):
     if url in self.idx:
         return self.idx[url]
     try:
         result = yaml.load(self.fetch_text(url))
     except yaml.parser.ParserError as e:
         raise validate.ValidationException("Error loading '%s' %s" %
                                            (url, str(e)))
     if isinstance(result, dict):
         if "id" not in result:
             result["id"] = url
         self.idx[expand_url(result["id"], url)] = result
     else:
         self.idx[url] = result
     return result
 def fetch(self, url):
     if url in self.idx:
         return self.idx[url]
     try:
         text = StringIO.StringIO(self.fetch_text(url))
         text.name = url
         result = yaml.load(text)
     except yaml.parser.ParserError as e:
         raise validate.ValidationException("Syntax error %s" % (e))
     if isinstance(result, dict) and self.identifiers:
         for identifier in self.identifiers:
             if identifier not in result:
                 result[identifier] = url
             self.idx[self.expand_url(result[identifier], url)] = result
     else:
         self.idx[url] = result
     return result
    def expand_url(self, url, base_url, scoped=False, vocab_term=False):
        if url in ("@id", "@type"):
            return url

        if vocab_term and url in self.vocab:
            return url

        if self.vocab and ":" in url:
            prefix = url.split(":")[0]
            if prefix in self.vocab:
                url = self.vocab[prefix] + url[len(prefix) + 1:]

        split = urlparse.urlsplit(url)

        if url == "cwl:hints":
            _logger.debug("XXX (%s) %s", id(self), self.vocab.keys())
            raise validate.ValidationException("F*****G KIDDING ME")

        if split.scheme or url.startswith("$(") or url.startswith("${"):
            pass
        elif scoped and not split.fragment:
            splitbase = urlparse.urlsplit(base_url)
            frg = ""
            if splitbase.fragment:
                frg = splitbase.fragment + "/" + split.path
            else:
                frg = split.path
            url = urlparse.urlunsplit((splitbase.scheme, splitbase.netloc,
                                       splitbase.path, splitbase.query, frg))
        else:
            url = urlparse.urljoin(base_url, url)

        if vocab_term and url in self.rvocab:
            return self.rvocab[url]
        else:
            return url
def extend_and_specialize(items, loader):
    """Apply 'extend' and 'specialize' to fully materialize derived record
    types."""

    types = {t["name"]: t for t in items}
    n = []

    for t in items:
        t = copy.deepcopy(t)
        if "extends" in t:
            if "specialize" in t:
                spec = {
                    sp["specializeFrom"]: sp["specializeTo"]
                    for sp in aslist(t["specialize"])
                }
            else:
                spec = {}

            exfields = []
            exsym = []
            for ex in aslist(t["extends"]):
                if ex not in types:
                    raise Exception(
                        "Extends %s in %s refers to invalid base type" %
                        (t["extends"], t["name"]))

                basetype = copy.deepcopy(types[ex])

                if t["type"] == "record":
                    if spec:
                        basetype["fields"] = replace_type(
                            basetype.get("fields", []), spec, loader, set())

                    for f in basetype.get("fields", []):
                        if "inherited_from" not in f:
                            f["inherited_from"] = ex

                    exfields.extend(basetype.get("fields", []))
                elif t["type"] == "enum":
                    exsym.extend(basetype.get("symbols", []))

            if t["type"] == "record":
                exfields.extend(t.get("fields", []))
                t["fields"] = exfields

                fieldnames = set()
                for field in t["fields"]:
                    if field["name"] in fieldnames:
                        raise validate.ValidationException(
                            "Field name %s appears twice in %s" %
                            (field["name"], t["name"]))
                    else:
                        fieldnames.add(field["name"])

                for y in [x for x in t["fields"] if x["name"] == "class"]:
                    y["type"] = {
                        "type": "enum",
                        "symbols": [r["name"]],
                        "name": r["name"] + "_class",
                    }
                    y["doc"] = "Must be `%s` to indicate this is a %s object." % (
                        r["name"], r["name"])
            elif t["type"] == "enum":
                exsym.extend(t.get("symbols", []))
                t["symbol"] = exsym

            types[t["name"]] = t

        n.append(t)

    ex_types = {t["name"]: t for t in n}

    extended_by = {}
    for t in n:
        if "extends" in t:
            for ex in aslist(t["extends"]):
                if ex_types[ex].get("abstract"):
                    add_dictlist(extended_by, ex, ex_types[t["name"]])
                    add_dictlist(extended_by, avro_name(ex), ex_types[ex])

    for t in n:
        if "fields" in t:
            t["fields"] = replace_type(t["fields"], extended_by, loader, set())

    return n
    def resolve_all(self, document, base_url, file_base=None):
        loader = self
        metadata = {}
        if file_base is None:
            file_base = base_url

        if isinstance(document, dict):
            # Handle $import and $include
            if ('$import' in document or '$include' in document):
                return self.resolve_ref(document, file_base)
        elif isinstance(document, list):
            pass
        else:
            return document, metadata

        newctx = None
        if isinstance(document, dict):
            # Handle $base, $profile, $namespaces, $schemas and $graph
            if "$base" in document:
                base_url = document["$base"]

            if "$profile" in document:
                if not newctx:
                    newctx = SubLoader(self)
                prof = self.fetch(document["$profile"])
                newctx.add_namespaces(document.get("$namespaces", {}),
                                      document["$profile"])
                newctx.add_schemas(document.get("$schemas", []),
                                   document["$profile"])

            if "$namespaces" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_namespaces(document["$namespaces"])

            if "$schemas" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_schemas(document["$schemas"], file_base)

            if newctx:
                loader = newctx

            if "$graph" in document:
                metadata = {k: v for k, v in document.items() if k != "$graph"}
                document = document["$graph"]
                metadata, _ = loader.resolve_all(metadata, base_url, file_base)

        if isinstance(document, dict):
            for identifer in loader.identity_links:
                if identifer in document:
                    if isinstance(document[identifer], basestring):
                        document[identifer] = loader.expand_url(
                            document[identifer], base_url, scoped=True)
                        if document[identifer] not in loader.idx or isinstance(
                                loader.idx[document[identifer]], basestring):
                            loader.idx[document[identifer]] = document
                        base_url = document[identifer]
                    elif isinstance(document[identifer], list):
                        for n, v in enumerate(document[identifer]):
                            document[identifer][n] = loader.expand_url(
                                document[identifer][n], base_url, scoped=True)
                            if document[identifer][n] not in loader.idx:
                                loader.idx[document[identifer]
                                           [n]] = document[identifer][n]

            for d in document:
                d2 = loader.expand_url(d, "", scoped=False, vocab_term=True)
                if d != d2:
                    document[d2] = document[d]
                    del document[d]

            for d in loader.url_fields:
                if d in document:
                    if isinstance(document[d], basestring):
                        document[d] = loader.expand_url(
                            document[d],
                            base_url,
                            scoped=False,
                            vocab_term=(d in loader.vocab_fields))
                    elif isinstance(document[d], list):
                        document[d] = [
                            loader.expand_url(
                                url,
                                base_url,
                                scoped=False,
                                vocab_term=(d in loader.vocab_fields))
                            if isinstance(url, basestring) else url
                            for url in document[d]
                        ]

            try:
                for key, val in document.items():
                    document[key], _ = loader.resolve_all(
                        val, base_url, file_base)
            except validate.ValidationException as v:
                _logger.debug("loader is %s", id(loader))
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in field %s:\n%s" %
                    (id(loader), file_base, key, validate.indent(str(v))))

        elif isinstance(document, list):
            i = 0
            try:
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and "$import" in val:
                        l, _ = loader.resolve_ref(val, file_base)
                        if isinstance(l, list):
                            del document[i]
                            for item in aslist(l):
                                document.insert(i, item)
                                i += 1
                        else:
                            document[i] = l
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(
                            val, base_url, file_base)
                        i += 1
            except validate.ValidationException as v:
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in position %i:\n%s" %
                    (id(loader), file_base, i, validate.indent(str(v))))

            for identifer in loader.identity_links:
                if identifer in metadata:
                    if isinstance(metadata[identifer], basestring):
                        metadata[identifer] = loader.expand_url(
                            metadata[identifer], base_url, scoped=True)
                        loader.idx[metadata[identifer]] = document

        return document, metadata
    def resolve_ref(self, ref, base_url=None):
        base_url = base_url or 'file://%s/' % os.path.abspath('.')

        obj = None
        inc = False
        merge = None

        # If `ref` is a dict, look for special directives.
        if isinstance(ref, dict):
            obj = ref
            if "$import" in ref:
                if len(obj) == 1:
                    ref = obj["$import"]
                    obj = None
                else:
                    raise ValueError("'$import' must be the only field in %s" %
                                     (str(obj)))
            elif "$include" in obj:
                if len(obj) == 1:
                    ref = obj["$include"]
                    inc = True
                    obj = None
                else:
                    raise ValueError(
                        "'$include' must be the only field in %s" % (str(obj)))
            else:
                ref = None
                for identifier in self.identifiers:
                    if identifier in obj:
                        ref = obj[identifier]
                        break
                if not ref:
                    raise ValueError(
                        "Object `%s` does not have identifier field in %s" %
                        (obj, self.identifiers))

        if not isinstance(ref, basestring):
            raise ValueError("Must be string: `%s`" % str(ref))

        url = self.expand_url(ref, base_url, scoped=(obj is not None))

        # Has this reference been loaded already?
        if url in self.idx:
            if merge:
                obj = self.idx[url].copy()
            else:
                return self.idx[url], {}

        # "$include" directive means load raw text
        if inc:
            return self.fetch_text(url), {}

        if obj:
            for identifier in self.identifiers:
                obj[identifier] = url
            doc_url = url
        else:
            # Load structured document
            doc_url, frg = urlparse.urldefrag(url)
            if doc_url in self.idx:
                raise validate.ValidationException(
                    "Reference `#%s` not found in file `%s`." % (frg, doc_url))
            obj = self.fetch(doc_url)

        # Recursively expand urls and resolve directives
        obj, metadata = self.resolve_all(obj, doc_url)

        # Requested reference should be in the index now, otherwise it's a bad reference
        if url is not None:
            if url in self.idx:
                obj = self.idx[url]
            else:
                raise RuntimeError(
                    "Reference `%s` is not in the index.  Index contains:\n  %s"
                    % (url, "\n  ".join(self.idx)))

        if "$graph" in obj:
            metadata = {k: v for k, v in obj.items() if k != "$graph"}
            obj = obj["$graph"]
            return obj, metadata
        else:
            return obj, metadata