示例#1
0
def retrieve_object(obj, current_size, graph=None, session=None):
    # without graph return hash
    ret = None
    try:
        if not isinstance(obj, str):
            if graph is not None:
                graph.parse(obj, format="turtle")
            else:
                h = get_hashob()
                h.update(XSD.base64Binary.encode("utf8"))
                for chunk in iter(lambda: obj.read(BUFFER_SIZE), b''):
                    h.update(chunk)
                return h.finalize()

            return
        # obj is url
        params, inline_domain = get_requests_params(obj)
        if inline_domain:
            resp = Client().get(obj, SERVER_NAME=inline_domain)
            if resp.status_code != 200:
                raise exceptions.ValidationError(
                    _("Retrieval failed: %(reason)s"),
                    params={"reason": resp.reason},
                    code="error_code:{}".format(resp.status_code)
                )

            c_length = resp.get("content-length", None)
            if (
                c_length is None or
                not verify_download_size(c_length, current_size[0])
            ):
                resp.close()
                raise exceptions.ValidationError(
                    _("Content too big or size unset: %(size)s"),
                    params={"size": c_length},
                    code="invalid_size"
                )

            c_length = int(c_length)
            current_size[0] += c_length
            if graph is not None:
                graph.parse(getattr(
                    resp, "streaming_content", io.BytesIO(resp.content)
                ), format="turtle")
            else:
                h = get_hashob()
                h.update(XSD.base64Binary.encode("utf8"))
                for chunk in resp:
                    h.update(chunk)
                ret = h.finalize()
            resp.close()
        else:
            if not session:
                session = requests
            try:
                with session.get(
                    obj, stream=True, **params
                ) as resp:
                    if resp.status_code != 200:
                        raise exceptions.ValidationError(
                            _("Retrieval failed: %(reason)s"),
                            params={"reason": resp.reason},
                            code="error_code:{}".format(resp.status_code)
                        )

                    c_length = resp.headers.get("content-length", None)
                    if (
                        c_length is None or
                        not verify_download_size(c_length, current_size[0])
                    ):
                        raise exceptions.ValidationError(
                            _("Content too big or size unset: %(size)s"),
                            params={"size": c_length},
                            code="invalid_size"
                        )
                    c_length = int(c_length)
                    current_size[0] += c_length
                    if graph is not None:
                        graph.parse(resp.raw, format="turtle")
                    else:
                        h = get_hashob()
                        h.update(XSD.base64Binary.encode("utf8"))
                        for chunk in resp.iter_content(BUFFER_SIZE):
                            h.update(chunk)
                        ret = h.finalize()

            except requests.exceptions.Timeout:
                raise exceptions.ValidationError(
                    _('url timed out: %(url)s'),
                    params={"url": obj},
                    code="timeout_url"
                )
            except requests.exceptions.ConnectionError:
                raise exceptions.ValidationError(
                    _('invalid url: %(url)s'),
                    params={"url": obj},
                    code="invalid_url"
                )
    except Exception:
        logger.error("Parsing graph failed", exc_info=settings.DEBUG)
        raise exceptions.ValidationError(
            _('Invalid graph fromat'),
            code="invalid_format"
        )
    return ret