Exemplo n.º 1
0
    def load_locally(url):
        options = test.data.get('option', {})
        content_type = options.get('contentType')

        url_no_frag = strip_fragment(url)
        if not content_type and url_no_frag.endswith('.jsonld'):
            content_type = 'application/ld+json'
        if not content_type and url_no_frag.endswith('.json'):
            content_type = 'application/json'
        if not content_type and url_no_frag.endswith('.html'):
            content_type = 'text/html'
        if not content_type:
            content_type = 'application/octet-stream'
        doc = {
            'contentType': content_type,
            'contextUrl': None,
            'documentUrl': url,
            'document': None
        }
        if options and url == test.base:
            if ('redirectTo' in options and options.get('httpStatus') >= 300):
                doc['documentUrl'] = (test.manifest.data['baseIri'] +
                                      options['redirectTo'])
            elif 'httpLink' in options:
                link_header = options.get('httpLink', '')
                if isinstance(link_header, list):
                    link_header = ','.join(link_header)
                linked_context = jsonld.parse_link_header(link_header).get(
                    'http://www.w3.org/ns/json-ld#context')
                if linked_context and content_type != 'application/ld+json':
                    if isinstance(linked_context, list):
                        raise Exception('multiple context link headers')
                    doc['contextUrl'] = linked_context['target']
                linked_alternate = jsonld.parse_link_header(link_header).get(
                    'alternate')
                # if not JSON-LD, alternate may point there
                if (linked_alternate and linked_alternate.get('type')
                        == 'application/ld+json' and not re.match(
                            r'^application\/(\w*\+)?json$', content_type)):
                    doc['contentType'] = 'application/ld+json'
                    doc['documentUrl'] = jsonld.prepend_base(
                        url, linked_alternate['target'])
        global ROOT_MANIFEST_DIR
        if doc['documentUrl'].find(':') == -1:
            filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl'])
            doc['documentUrl'] = 'file://' + filename
        else:
            filename = test.dirname + strip_fragment(
                strip_base(doc['documentUrl']))
        try:
            doc['document'] = read_file(filename)
        except:
            raise Exception('loading document failed')
        return doc
Exemplo n.º 2
0
 def load_locally(url):
     doc = {'contextUrl': None, 'documentUrl': url, 'document': None}
     options = test.data.get('option')
     if options and url == test.base:
         if ('redirectTo' in options and options.get('httpStatus') >= 300):
             doc['documentUrl'] = (
                     test.manifest.data['baseIri'] + options['redirectTo'])
         elif 'httpLink' in options:
             content_type = options.get('contentType')
             if not content_type and url.endswith('.jsonld'):
                 content_type = 'application/ld+json'
             link_header = options.get('httpLink', '')
             if isinstance(link_header, list):
                 link_header = ','.join(link_header)
             link_header = jsonld.parse_link_header(
                 link_header).get('http://www.w3.org/ns/json-ld#context')
             if link_header and content_type != 'application/ld+json':
                 if isinstance(link_header, list):
                     raise Exception('multiple context link headers')
                 doc['contextUrl'] = link_header['target']
     global ROOT_MANIFEST_DIR
     if doc['documentUrl'].find(':') == -1:
         filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl'])
         doc['documentUrl'] = 'file://' + filename
     else:
         #filename = os.path.join(
         #    ROOT_MANIFEST_DIR, doc['documentUrl'][len(base):])
         filename = ROOT_MANIFEST_DIR + strip_base(doc['documentUrl'])
     try:
         doc['document'] = read_json(filename)
     except:
         raise Exception('loading document failed')
     return doc
Exemplo n.º 3
0
 def load_locally(url):
     doc = {"contextUrl": None, "documentUrl": url, "document": None}
     options = test.data.get("option")
     if options and url == test.base:
         if "redirectTo" in options and options.get("httpStatus") >= 300:
             doc["documentUrl"] = test.manifest.data["baseIri"] + options["redirectTo"]
         elif "httpLink" in options:
             content_type = options.get("contentType")
             if not content_type and url.endswith(".jsonld"):
                 content_type = "application/ld+json"
             link_header = options.get("httpLink", "")
             if isinstance(link_header, list):
                 link_header = ",".join(link_header)
             link_header = jsonld.parse_link_header(link_header).get("http://www.w3.org/ns/json-ld#context")
             if link_header and content_type != "application/ld+json":
                 if isinstance(link_header, list):
                     raise Exception("multiple context link headers")
                 doc["contextUrl"] = link_header["target"]
     global ROOT_MANIFEST_DIR
     if doc["documentUrl"].find(":") == -1:
         filename = os.path.join(ROOT_MANIFEST_DIR, doc["documentUrl"])
         doc["documentUrl"] = "file://" + filename
     else:
         # filename = os.path.join(
         #    ROOT_MANIFEST_DIR, doc['documentUrl'][len(base):])
         filename = ROOT_MANIFEST_DIR + doc["documentUrl"][len(base) :]
     try:
         doc["document"] = read_json(filename)
     except:
         raise Exception("loading document failed")
     return doc
Exemplo n.º 4
0
 def load_locally(url):
     doc = {'contextUrl': None, 'documentUrl': url, 'document': None}
     options = test.data.get('option')
     if options and url == test.base:
         if ('redirectTo' in options and options.get('httpStatus') >= 300):
             doc['documentUrl'] = (
                     test.manifest.data['baseIri'] + options['redirectTo'])
         elif 'httpLink' in options:
             content_type = options.get('contentType')
             if not content_type and url.endswith('.jsonld'):
                 content_type = 'application/ld+json'
             link_header = options.get('httpLink', '')
             if isinstance(link_header, list):
                 link_header = ','.join(link_header)
             link_header = jsonld.parse_link_header(
                 link_header).get('http://www.w3.org/ns/json-ld#context')
             if link_header and content_type != 'application/ld+json':
                 if isinstance(link_header, list):
                     raise Exception('multiple context link headers')
                 doc['contextUrl'] = link_header['target']
     global ROOT_MANIFEST_DIR
     if doc['documentUrl'].find(':') == -1:
         filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl'])
         doc['documentUrl'] = 'file://' + filename
     else:
         #filename = os.path.join(
         #    ROOT_MANIFEST_DIR, doc['documentUrl'][len(base):])
         filename = ROOT_MANIFEST_DIR + strip_base(doc['documentUrl'])
     try:
         doc['document'] = read_json(filename)
     except:
         raise Exception('loading document failed')
     return doc
Exemplo n.º 5
0
    def loader(url):
        """
        Retrieves JSON-LD at the given URL.

        :param url: the URL to retrieve.

        :return: the RemoteDocument.
        """
        try:
            # validate URL
            pieces = urllib_parse.urlparse(url)
            if (not all([pieces.scheme, pieces.netloc]) or
                pieces.scheme not in ['http', 'https'] or
                set(pieces.netloc) > set(
                    string.ascii_letters + string.digits + '-.:')):
                raise JsonLdError(
                    'URL could not be dereferenced; only "http" and "https" '
                    'URLs are supported.',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            if secure and pieces.scheme != 'https':
                raise JsonLdError(
                    'URL could not be dereferenced; secure mode enabled and '
                    'the URL\'s scheme is not "https".',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            headers = {
                'Accept': 'application/ld+json, application/json'
            }
            response = requests.get(url, headers=headers, **kwargs)

            doc = {
                'contextUrl': None,
                'documentUrl': response.url,
                'document': response.json()
            }
            content_type = response.headers.get('content-type')
            link_header = response.headers.get('link')
            if link_header and content_type != 'application/ld+json':
                link_header = parse_link_header(link_header).get(
                    LINK_HEADER_REL)
                # only 1 related link header permitted
                if isinstance(link_header, list):
                    raise JsonLdError(
                        'URL could not be dereferenced, it has more than one '
                        'associated HTTP Link Header.',
                        'jsonld.LoadDocumentError',
                        {'url': url},
                        code='multiple context link headers')
                if link_header:
                    doc['contextUrl'] = link_header['target']
            return doc
        except JsonLdError as e:
            raise e
        except Exception as cause:
            raise JsonLdError(
                'Could not retrieve a JSON-LD document from the URL.',
                'jsonld.LoadDocumentError', code='loading document failed',
                cause=cause)
Exemplo n.º 6
0
    def loader(url):
        """
        Retrieves JSON-LD at the given URL.

        :param url: the URL to retrieve.

        :return: the RemoteDocument.
        """
        try:
            # validate URL
            pieces = urllib_parse.urlparse(url)
            if (not all([pieces.scheme, pieces.netloc])
                    or pieces.scheme not in ['http', 'https']
                    or set(pieces.netloc) >
                    set(string.ascii_letters + string.digits + '-.:')):
                raise JsonLdError(
                    'URL could not be dereferenced; only "http" and "https" '
                    'URLs are supported.',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            if secure and pieces.scheme != 'https':
                raise JsonLdError(
                    'URL could not be dereferenced; secure mode enabled and '
                    'the URL\'s scheme is not "https".',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            headers = {'Accept': 'application/ld+json, application/json'}
            response = requests.get(url, headers=headers, **kwargs)

            doc = {
                'contextUrl': None,
                'documentUrl': response.url,
                'document': response.json()
            }
            content_type = response.headers.get('content-type')
            link_header = response.headers.get('link')
            if link_header and content_type != 'application/ld+json':
                link_header = parse_link_header(link_header).get(
                    LINK_HEADER_REL)
                # only 1 related link header permitted
                if isinstance(link_header, list):
                    raise JsonLdError(
                        'URL could not be dereferenced, it has more than one '
                        'associated HTTP Link Header.',
                        'jsonld.LoadDocumentError', {'url': url},
                        code='multiple context link headers')
                if link_header:
                    doc['contextUrl'] = link_header['target']
            return doc
        except JsonLdError as e:
            raise e
        except Exception as cause:
            raise JsonLdError(
                'Could not retrieve a JSON-LD document from the URL.',
                'jsonld.LoadDocumentError',
                code='loading document failed',
                cause=cause)
Exemplo n.º 7
0
    def loader(url):
        """
        Retrieves JSON-LD at the given URL.

        :param url: the URL to retrieve.

        :return: the RemoteDocument.
        """
        try:
            # validate URL
            pieces = urllib_parse.urlparse(url)
            if (not all([pieces.scheme, pieces.netloc])
                    or pieces.scheme not in ["http", "https"]
                    or set(pieces.netloc) >
                    set(string.ascii_letters + string.digits + "-.:")):
                raise JsonLdError(
                    'URL could not be dereferenced; only "http" and "https" '
                    "URLs are supported.",
                    "jsonld.InvalidUrl",
                    {"url": url},
                    code="loading document failed",
                )
            if secure and pieces.scheme != "https":
                raise JsonLdError(
                    "URL could not be dereferenced; secure mode enabled and "
                    'the URL\'s scheme is not "https".',
                    "jsonld.InvalidUrl",
                    {"url": url},
                    code="loading document failed",
                )
            headers = {"Accept": "application/ld+json, application/json"}
            response = session.get(url, headers=headers, **kwargs)

            doc = {
                "contextUrl": None,
                "documentUrl": response.url,
                "document": response.json(),
            }
            content_type = response.headers.get("content-type")
            link_header = response.headers.get("link")
            if link_header and content_type != "application/ld+json":
                link_header = parse_link_header(link_header).get(
                    LINK_HEADER_REL)
                # only 1 related link header permitted
                if isinstance(link_header, list):
                    raise JsonLdError(
                        "URL could not be dereferenced, it has more than one "
                        "associated HTTP Link Header.",
                        "jsonld.LoadDocumentError",
                        {"url": url},
                        code="multiple context link headers",
                    )
                if link_header:
                    doc["contextUrl"] = link_header["target"]
            return doc
        except JsonLdError as e:
            raise e
        except Exception as cause:
            raise JsonLdError(
                "Could not retrieve a JSON-LD document from the URL.",
                "jsonld.LoadDocumentError",
                code="loading document failed",
                cause=cause,
            )
Exemplo n.º 8
0
    def loader(url, options={}):
        """
        Retrieves JSON-LD at the given URL.
        :param url: the URL to retrieve.
        :return: the RemoteDocument.
        """
        try:
            # validate URL
            pieces = urllib_parse.urlparse(url)
            if (not all([pieces.scheme, pieces.netloc])
                    or pieces.scheme not in ['http', 'https']
                    or set(pieces.netloc) >
                    set(string.ascii_letters + string.digits + '-.:')):
                raise JsonLdError(
                    'URL could not be dereferenced; only "http" and "https" '
                    'URLs are supported.',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            if secure and pieces.scheme != 'https':
                raise JsonLdError(
                    'URL could not be dereferenced; secure mode enabled and '
                    'the URL\'s scheme is not "https".',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            headers = options.get('headers')
            if headers is None:
                headers = {'Accept': 'application/ld+json, application/json'}
            response = requests.get(url, headers=headers, **kwargs)

            content_type = response.headers.get('content-type')
            if not content_type:
                content_type = 'application/octet-stream'
            doc = {
                'contentType': content_type,
                'contextUrl': None,
                'documentUrl': response.url,
            }
            link_header = response.headers.get('link')
            if link_header:
                linked_context = parse_link_header(link_header).get(
                    LINK_HEADER_REL)
                # only 1 related link header permitted
                if linked_context and content_type != 'application/ld+json':
                    if isinstance(linked_context, list):
                        raise JsonLdError(
                            "URL could not be dereferenced, "
                            "it has more than one "
                            "associated HTTP Link Header.",
                            "jsonld.LoadDocumentError", {"url": url},
                            code="multiple context link headers")
                    doc["contextUrl"] = linked_context["target"]
                linked_alternate = parse_link_header(link_header).get(
                    'alternate')
                # if not JSON-LD, alternate may point there
                if (linked_alternate and linked_alternate.get('type')
                        == 'application/ld+json' and not re.match(
                            r'^application\/(\w*\+)?json$', content_type)):
                    doc['contentType'] = 'application/ld+json'
                    doc['documentUrl'] = prepend_base(
                        url, linked_alternate['target'])
                    return loader(doc['documentUrl'], options=options)
            doc["document"] = response.json()
            return doc
        except JsonLdError as e:
            raise e
        except Exception as cause:
            raise JsonLdError(
                'Could not retrieve a JSON-LD document from the URL.',
                'jsonld.LoadDocumentError',
                code='loading document failed',
                cause=cause)
Exemplo n.º 9
0
    async def async_loader(url, headers):
        """
        Retrieves JSON-LD at the given URL asynchronously.

        :param url: the URL to retrieve.

        :return: the RemoteDocument.
        """
        try:
            # validate URL
            pieces = urllib_parse.urlparse(url)
            if (not all([pieces.scheme, pieces.netloc])
                    or pieces.scheme not in ['http', 'https']
                    or set(pieces.netloc) >
                    set(string.ascii_letters + string.digits + '-.:')):
                raise JsonLdError(
                    'URL could not be dereferenced; '
                    'only "http" and "https" URLs are supported.',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            if secure and pieces.scheme != 'https':
                raise JsonLdError(
                    'URL could not be dereferenced; '
                    'secure mode enabled and '
                    'the URL\'s scheme is not "https".',
                    'jsonld.InvalidUrl', {'url': url},
                    code='loading document failed')
            async with aiohttp.ClientSession(loop=loop) as session:
                async with session.get(url, headers=headers,
                                       **kwargs) as response:
                    # Allow any content_type in trying to parse json
                    # similar to requests library
                    json_body = await response.json(content_type=None)
                    content_type = response.headers.get('content-type')
                    if not content_type:
                        content_type = 'application/octet-stream'
                    doc = {
                        'contentType': content_type,
                        'contextUrl': None,
                        'documentUrl': response.url.human_repr(),
                        'document': json_body
                    }
                    link_header = response.headers.get('link')
                    if link_header:
                        linked_context = parse_link_header(link_header).get(
                            LINK_HEADER_REL)
                        # only 1 related link header permitted
                        if linked_context and content_type != 'application/ld+json':
                            if isinstance(linked_context, list):
                                raise JsonLdError(
                                    'URL could not be dereferenced, '
                                    'it has more than one '
                                    'associated HTTP Link Header.',
                                    'jsonld.LoadDocumentError', {'url': url},
                                    code='multiple context link headers')
                            doc['contextUrl'] = linked_context['target']
                        linked_alternate = parse_link_header(link_header).get(
                            'alternate')
                        # if not JSON-LD, alternate may point there
                        if (linked_alternate and linked_alternate.get('type')
                                == 'application/ld+json' and
                                not re.match(r'^application\/(\w*\+)?json$',
                                             content_type)):
                            doc['contentType'] = 'application/ld+json'
                            doc['documentUrl'] = jsonld.prepend_base(
                                url, linked_alternate['target'])

                    return doc
        except JsonLdError as e:
            raise e
        except Exception as cause:
            raise JsonLdError(
                'Could not retrieve a JSON-LD document from the URL.',
                'jsonld.LoadDocumentError',
                code='loading document failed',
                cause=cause)