def add_file_headers(request, media_type, file_size, upload_name): """Adds the correct response headers in preparation for responding with the media. Args: request (twisted.web.http.Request) media_type (str): The media/content type. file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): request.setHeader( b"Content-Disposition", b"inline; filename=%s" % (urllib.quote(upload_name.encode("utf-8")), ), ) else: request.setHeader( b"Content-Disposition", b"inline; filename*=utf-8''%s" % (urllib.quote(upload_name.encode("utf-8")), ), ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") request.setHeader(b"Content-Length", b"%d" % (file_size, ))
def add_file_headers(request, media_type, file_size, upload_name): """Adds the correct response headers in preparation for responding with the media. Args: request (twisted.web.http.Request) media_type (str): The media/content type. file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): disposition = "inline; filename=%s" % (_quote(upload_name),) else: disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") request.setHeader(b"Content-Length", b"%d" % (file_size,))
def get_filename_from_headers(headers): """ Get the filename of the downloaded file by inspecting the Content-Disposition HTTP header. Args: headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. """ content_disposition = headers.get(b"Content-Disposition", [b""]) # No header, bail out. if not content_disposition[0]: return _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] # We have a filename*= section. This MUST be ASCII, and any UTF-8 # bytes are %-quoted. if PY3: try: # Once it is decoded, we can then unquote the %-encoded # parts strictly into a unicode string. upload_name = urllib.parse.unquote( upload_name_utf8.decode("ascii"), errors="strict") except UnicodeDecodeError: # Incorrect UTF-8. pass else: # On Python 2, we first unquote the %-encoded parts and then # decode it strictly using UTF-8. try: upload_name = urllib.parse.unquote( upload_name_utf8).decode("utf8") except UnicodeDecodeError: pass # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii.decode("ascii") # This may be None here, indicating we did not find a matching name. return upload_name
def get_filename_from_headers(headers): """ Get the filename of the downloaded file by inspecting the Content-Disposition HTTP header. Args: headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. """ content_disposition = headers.get(b"Content-Disposition", [b'']) # No header, bail out. if not content_disposition[0]: return _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] # We have a filename*= section. This MUST be ASCII, and any UTF-8 # bytes are %-quoted. if PY3: try: # Once it is decoded, we can then unquote the %-encoded # parts strictly into a unicode string. upload_name = urllib.parse.unquote( upload_name_utf8.decode('ascii'), errors="strict" ) except UnicodeDecodeError: # Incorrect UTF-8. pass else: # On Python 2, we first unquote the %-encoded parts and then # decode it strictly using UTF-8. try: upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') except UnicodeDecodeError: pass # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name
def _respond_with_file(self, request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): request.setHeader( b"Content-Disposition", b"inline; filename=%s" % ( urllib.quote(upload_name.encode("utf-8")), ), ) else: request.setHeader( b"Content-Disposition", b"inline; filename*=utf-8''%s" % ( urllib.quote(upload_name.encode("utf-8")), ), ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control request.setHeader( b"Cache-Control", b"public,max-age=86400,s-maxage=86400" ) if file_size is None: stat = os.stat(file_path) file_size = stat.st_size request.setHeader( b"Content-Length", b"%d" % (file_size,) ) with open(file_path, "rb") as f: yield FileSender().beginFileTransfer(f, request) finish_request(request) else: self._respond_404(request)
def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? # XXX: horrible duplication with base_resource's _download_remote_file() file_id = random_string(24) fname = self.filepaths.local_media_filepath(file_id) self.media_repo._makedirs(fname) try: with open(fname, "wb") as f: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) download_name = None # First check if there is a valid UTF-8 filename download_name_utf8 = params.get("filename*", None) if download_name_utf8: if download_name_utf8.lower().startswith("utf-8''"): download_name = download_name_utf8[7:] # If there isn't check for an ascii name. if not download_name: download_name_ascii = params.get("filename", None) if download_name_ascii and is_ascii(download_name_ascii): download_name = download_name_ascii if download_name: download_name = urlparse.unquote(download_name) try: download_name = download_name.decode("utf-8") except UnicodeDecodeError: download_name = None else: download_name = None yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, ) except Exception as e: os.remove(fname) raise SynapseError( 500, ("Failed to download content: %s" % e), Codes.UNKNOWN ) defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, })
def _download_remote_file(self, server_name, media_id): file_id = random_string(24) fname = self.filepaths.remote_media_filepath( server_name, file_id ) self._makedirs(fname) try: with open(fname, "wb") as f: request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, )) try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, max_size=self.max_upload_size, ) except Exception as e: logger.warn("Failed to fetch remoted media %r", e) raise SynapseError(502, "Failed to fetch remoted media") media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) try: upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: upload_name = None yield self.store.store_cached_remote_media( origin=server_name, media_id=media_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=upload_name, media_length=length, filesystem_id=file_id, ) except: os.remove(fname) raise media_info = { "media_type": media_type, "media_length": length, "upload_name": upload_name, "created_ts": time_now_ms, "filesystem_id": file_id, } yield self._generate_remote_thumbnails( server_name, media_id, media_info ) defer.returnValue(media_info)
def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? # XXX: horrible duplication with base_resource's _download_remote_file() file_id = random_string(24) fname = self.filepaths.url_cache_filepath(file_id) self.media_repo._makedirs(fname) try: with open(fname, "wb") as f: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0], ) download_name = None # First check if there is a valid UTF-8 filename download_name_utf8 = params.get("filename*", None) if download_name_utf8: if download_name_utf8.lower().startswith("utf-8''"): download_name = download_name_utf8[7:] # If there isn't check for an ascii name. if not download_name: download_name_ascii = params.get("filename", None) if download_name_ascii and is_ascii(download_name_ascii): download_name = download_name_ascii if download_name: download_name = urlparse.unquote(download_name) try: download_name = download_name.decode("utf-8") except UnicodeDecodeError: download_name = None else: download_name = None yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: os.remove(fname) raise SynapseError(500, ("Failed to download content: %s" % e), Codes.UNKNOWN) defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, })
def _download_remote_file(self, server_name, media_id, file_id): """Attempt to download the remote file from the given server name, using the given file_id as the local id. Args: server_name (str): Originating server media_id (str): The media ID of the content (as defined by the remote server). This is different than the file_id, which is locally generated. file_id (str): Local file ID Returns: Deferred[MediaInfo] """ file_info = FileInfo( server_name=server_name, file_id=file_id, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, )) try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, max_size=self.max_upload_size, args={ # tell the remote server to 404 if it doesn't # recognise the server_name, to make sure we don't # end up with a routing loop. "allow_remote": "false", }) except twisted.internet.error.DNSLookupError as e: logger.warn("HTTP error fetching remote media %s/%s: %r", server_name, media_id, e) raise NotFoundError() except HttpResponseException as e: logger.warn("HTTP error fetching remote media %s/%s: %s", server_name, media_id, e.response) if e.code == twisted.web.http.NOT_FOUND: raise SynapseError.from_http_response_exception(e) raise SynapseError(502, "Failed to fetch remote media") except SynapseError: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise except NotRetryingDestination: logger.warn("Not retrying destination %r", server_name) raise SynapseError(502, "Failed to fetch remote media") except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0], ) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) try: upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: upload_name = None logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( origin=server_name, media_id=media_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=upload_name, media_length=length, filesystem_id=file_id, ) media_info = { "media_type": media_type, "media_length": length, "upload_name": upload_name, "created_ts": time_now_ms, "filesystem_id": file_id, } yield self._generate_thumbnails( server_name, media_id, file_id, media_type, ) defer.returnValue(media_info)
def _download_remote_file(self, server_name, media_id): file_id = random_string(24) fpath = self.filepaths.remote_media_filepath_rel(server_name, file_id) fname = os.path.join(self.primary_base_path, fpath) self._makedirs(fname) try: with open(fname, "wb") as f: request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, )) try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, max_size=self.max_upload_size, args={ # tell the remote server to 404 if it doesn't # recognise the server_name, to make sure we don't # end up with a routing loop. "allow_remote": "false", }) except twisted.internet.error.DNSLookupError as e: logger.warn("HTTP error fetching remote media %s/%s: %r", server_name, media_id, e) raise NotFoundError() except HttpResponseException as e: logger.warn("HTTP error fetching remote media %s/%s: %s", server_name, media_id, e.response) if e.code == twisted.web.http.NOT_FOUND: raise SynapseError.from_http_response_exception(e) raise SynapseError(502, "Failed to fetch remote media") except SynapseError: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise except NotRetryingDestination: logger.warn("Not retrying destination %r", server_name) raise SynapseError(502, "Failed to fetch remote media") except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") yield self.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0], ) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) try: upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: upload_name = None logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( origin=server_name, media_id=media_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=upload_name, media_length=length, filesystem_id=file_id, ) except Exception: os.remove(fname) raise media_info = { "media_type": media_type, "media_length": length, "upload_name": upload_name, "created_ts": time_now_ms, "filesystem_id": file_id, } yield self._generate_thumbnails(server_name, media_id, media_info) defer.returnValue(media_info)
def get_filename_from_headers(headers): """ Get the filename of the downloaded file by inspecting the Content-Disposition HTTP header. Args: headers (twisted.web.http_headers.Headers): The HTTP request headers. Returns: A Unicode string of the filename, or None. """ content_disposition = headers.get(b"Content-Disposition", [b'']) # No header, bail out. if not content_disposition[0]: return # dict of unicode: bytes, corresponding to the key value sections of the # Content-Disposition header. params = {} parts = content_disposition[0].split(b";") for i in parts: # Split into key-value pairs, if able # We don't care about things like `inline`, so throw it out if b"=" not in i: continue key, value = i.strip().split(b"=") params[key.decode('ascii')] = value upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] # We have a filename*= section. This MUST be ASCII, and any UTF-8 # bytes are %-quoted. if PY3: try: # Once it is decoded, we can then unquote the %-encoded # parts strictly into a unicode string. upload_name = urllib.parse.unquote( upload_name_utf8.decode('ascii'), errors="strict") except UnicodeDecodeError: # Incorrect UTF-8. pass else: # On Python 2, we first unquote the %-encoded parts and then # decode it strictly using UTF-8. try: upload_name = urllib.parse.unquote( upload_name_utf8).decode('utf8') except UnicodeDecodeError: pass # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): # Make sure there's no %-quoted bytes. If there is, reject it as # non-valid ASCII. if b"%" not in upload_name_ascii: upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name
def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? file_id = datetime.date.today().isoformat() + '_' + random_string(16) file_info = FileInfo( server_name=None, file_id=file_id, url_cache=True, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % (traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: if b"Content-Type" in headers: media_type = headers[b"Content-Type"][0].decode('ascii') else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get(b"Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0], ) download_name = None # First check if there is a valid UTF-8 filename download_name_utf8 = params.get("filename*", None) if download_name_utf8: if download_name_utf8.lower().startswith("utf-8''"): download_name = download_name_utf8[7:] # If there isn't check for an ascii name. if not download_name: download_name_ascii = params.get("filename", None) if download_name_ascii and is_ascii(download_name_ascii): download_name = download_name_ascii if download_name: download_name = urlparse.unquote(download_name) try: download_name = download_name.decode("utf-8") except UnicodeDecodeError: download_name = None else: download_name = None yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: logger.error("Error handling downloaded %s: %r", url, e) # TODO: we really ought to delete the downloaded file in this # case, since we won't have recorded it in the db, and will # therefore not expire it. raise defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, })
def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? file_id = datetime.date.today().isoformat() + '_' + random_string(16) file_info = FileInfo( server_name=None, file_id=file_id, url_cache=True, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % ( traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: if b"Content-Type" in headers: media_type = headers[b"Content-Type"][0].decode('ascii') else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get(b"Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) download_name = None # First check if there is a valid UTF-8 filename download_name_utf8 = params.get("filename*", None) if download_name_utf8: if download_name_utf8.lower().startswith("utf-8''"): download_name = download_name_utf8[7:] # If there isn't check for an ascii name. if not download_name: download_name_ascii = params.get("filename", None) if download_name_ascii and is_ascii(download_name_ascii): download_name = download_name_ascii if download_name: download_name = urlparse.unquote(download_name) try: download_name = download_name.decode("utf-8") except UnicodeDecodeError: download_name = None else: download_name = None yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: logger.error("Error handling downloaded %s: %r", url, e) # TODO: we really ought to delete the downloaded file in this # case, since we won't have recorded it in the db, and will # therefore not expire it. raise defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, })
def _download_remote_file(self, server_name, media_id, file_id): """Attempt to download the remote file from the given server name, using the given file_id as the local id. Args: server_name (str): Originating server media_id (str): The media ID of the content (as defined by the remote server). This is different than the file_id, which is locally generated. file_id (str): Local file ID Returns: Deferred[MediaInfo] """ file_info = FileInfo( server_name=server_name, file_id=file_id, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, )) try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, max_size=self.max_upload_size, args={ # tell the remote server to 404 if it doesn't # recognise the server_name, to make sure we don't # end up with a routing loop. "allow_remote": "false", } ) except twisted.internet.error.DNSLookupError as e: logger.warn("HTTP error fetching remote media %s/%s: %r", server_name, media_id, e) raise NotFoundError() except HttpResponseException as e: logger.warn("HTTP error fetching remote media %s/%s: %s", server_name, media_id, e.response) if e.code == twisted.web.http.NOT_FOUND: raise SynapseError.from_http_response_exception(e) raise SynapseError(502, "Failed to fetch remote media") except SynapseError: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise except NotRetryingDestination: logger.warn("Not retrying destination %r", server_name) raise SynapseError(502, "Failed to fetch remote media") except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) try: upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: upload_name = None logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( origin=server_name, media_id=media_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=upload_name, media_length=length, filesystem_id=file_id, ) media_info = { "media_type": media_type, "media_length": length, "upload_name": upload_name, "created_ts": time_now_ms, "filesystem_id": file_id, } yield self._generate_thumbnails( server_name, media_id, file_id, media_type, ) defer.returnValue(media_info)
def get_filename_from_headers(headers): """ Get the filename of the downloaded file by inspecting the Content-Disposition HTTP header. Args: headers (twisted.web.http_headers.Headers): The HTTP request headers. Returns: A Unicode string of the filename, or None. """ content_disposition = headers.get(b"Content-Disposition", [b'']) # No header, bail out. if not content_disposition[0]: return # dict of unicode: bytes, corresponding to the key value sections of the # Content-Disposition header. params = {} parts = content_disposition[0].split(b";") for i in parts: # Split into key-value pairs, if able # We don't care about things like `inline`, so throw it out if b"=" not in i: continue key, value = i.strip().split(b"=") params[key.decode('ascii')] = value upload_name = None # First check if there is a valid UTF-8 filename upload_name_utf8 = params.get("filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] # We have a filename*= section. This MUST be ASCII, and any UTF-8 # bytes are %-quoted. if PY3: try: # Once it is decoded, we can then unquote the %-encoded # parts strictly into a unicode string. upload_name = urllib.parse.unquote( upload_name_utf8.decode('ascii'), errors="strict" ) except UnicodeDecodeError: # Incorrect UTF-8. pass else: # On Python 2, we first unquote the %-encoded parts and then # decode it strictly using UTF-8. try: upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') except UnicodeDecodeError: pass # If there isn't check for an ascii name. if not upload_name: upload_name_ascii = params.get("filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): # Make sure there's no %-quoted bytes. If there is, reject it as # non-valid ASCII. if b"%" not in upload_name_ascii: upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name