def retrieve_object(obj, current_size, graph=None, session=None): # without graph return hash ret = None try: if not isinstance(obj, str): if graph is not None: graph.parse(obj, format="turtle") else: h = get_hashob() h.update(XSD.base64Binary.encode("utf8")) for chunk in iter(lambda: obj.read(BUFFER_SIZE), b''): h.update(chunk) return h.finalize() return # obj is url params, inline_domain = get_requests_params(obj) if inline_domain: resp = Client().get(obj, SERVER_NAME=inline_domain) if resp.status_code != 200: raise exceptions.ValidationError( _("Retrieval failed: %(reason)s"), params={"reason": resp.reason}, code="error_code:{}".format(resp.status_code) ) c_length = resp.get("content-length", None) if ( c_length is None or not verify_download_size(c_length, current_size[0]) ): resp.close() raise exceptions.ValidationError( _("Content too big or size unset: %(size)s"), params={"size": c_length}, code="invalid_size" ) c_length = int(c_length) current_size[0] += c_length if graph is not None: graph.parse(getattr( resp, "streaming_content", io.BytesIO(resp.content) ), format="turtle") else: h = get_hashob() h.update(XSD.base64Binary.encode("utf8")) for chunk in resp: h.update(chunk) ret = h.finalize() resp.close() else: if not session: session = requests try: with session.get( obj, stream=True, **params ) as resp: if resp.status_code != 200: raise exceptions.ValidationError( _("Retrieval failed: %(reason)s"), params={"reason": resp.reason}, code="error_code:{}".format(resp.status_code) ) c_length = resp.headers.get("content-length", None) if ( c_length is None or not verify_download_size(c_length, current_size[0]) ): raise exceptions.ValidationError( _("Content too big or size unset: %(size)s"), params={"size": c_length}, code="invalid_size" ) c_length = int(c_length) current_size[0] += c_length if graph is not None: graph.parse(resp.raw, format="turtle") else: h = get_hashob() h.update(XSD.base64Binary.encode("utf8")) for chunk in resp.iter_content(BUFFER_SIZE): h.update(chunk) ret = h.finalize() except requests.exceptions.Timeout: raise exceptions.ValidationError( _('url timed out: %(url)s'), params={"url": obj}, code="timeout_url" ) except requests.exceptions.ConnectionError: raise exceptions.ValidationError( _('invalid url: %(url)s'), params={"url": obj}, code="invalid_url" ) except Exception: logger.error("Parsing graph failed", exc_info=settings.DEBUG) raise exceptions.ValidationError( _('Invalid graph fromat'), code="invalid_format" ) return ret