def post(self, app_id="blob", session_id = "session"): """ Handler a post request from a user uploading a blob. Args: app_id: The application triggering the upload. session_id: Authentication token to validate the upload. """ global datastore_path db = datastore_distributed.DatastoreDistributed( app_id, datastore_path, require_indexes=False) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', db) os.environ['APPLICATION_ID'] = app_id # Setup the app id in the datastore. # Get session info and upload success path. blob_session = get_session(session_id) if not blob_session: self.finish('Session has expired. Contact the owner of the ' + \ 'app for support.\n\n') return success_path = blob_session["success_path"] server_host = success_path[:success_path.rfind("/", 3)] if server_host.startswith("http://"): # Strip off the beginging of the server host server_host = server_host[len("http://"):] server_host = server_host.split('/')[0] blob_storage = datastore_blob_storage.DatastoreBlobStorage(app_id) uploadhandler = dev_appserver_upload.UploadCGIHandler(blob_storage) datastore.Delete(blob_session) # This request is sent to the upload handler of the app # in the hope it returns a redirect to be forwarded to the user urlrequest = urllib2.Request(success_path) # Forward all relevant headers and create data for request content_type = self.request.headers["Content-Type"] kv = split_content_type(content_type) boundary = None if "boundary" in kv: boundary = kv["boundary"] urlrequest.add_header("Content-Type", 'application/x-www-form-urlencoded') for name, value in self.request.headers.items(): if name.lower() not in STRIPPED_HEADERS: urlrequest.add_header(name, value) # Get correct redirect addresses, otherwise it will redirect back # to this port. urlrequest.add_header("Host", server_host) form = MultiPartForm(boundary) creation = datetime.datetime.now() # Loop on all files in the form. for filekey in self.request.files.keys(): data = {"blob_info_metadata": {filekey: []}} file = self.request.files[filekey][0] body = file["body"] size = len(body) filename = file["filename"] file_content_type = file["content_type"] blob_entity = uploadhandler.StoreBlob(file, creation) blob_key = str(blob_entity.key().name()) if not blob_key: self.finish('Status: 500\n\n') return creation_formatted = blobstore._format_creation(creation) form.add_file(filekey, filename, cStringIO.StringIO(blob_key), blob_key, blobstore.BLOB_KEY_HEADER, size, creation_formatted) md5_handler = hashlib.md5(str(body)) data["blob_info_metadata"][filekey].append( {"filename": filename, "creation-date": creation_formatted, "key": blob_key, "size": str(size), "content-type": file_content_type, "md5-hash": md5_handler.hexdigest()}) # Loop through form fields for fieldkey in self.request.arguments.keys(): form.add_field(fieldkey, self.request.arguments[fieldkey][0]) data[fieldkey] = self.request.arguments[fieldkey][0] logging.debug("Callback data: \n{}".format(data)) data = urllib.urlencode(data) urlrequest.add_header("Content-Length", str(len(data))) urlrequest.add_data(data) # We are catching the redirect error here # and extracting the Location to post the redirect. try: response = urllib2.urlopen(urlrequest) output = response.read() if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(output) f = gzip.GzipFile(fileobj=buf) data = f.read() output = data self.finish(output) except urllib2.HTTPError, e: if "Location" in e.hdrs: # Catch any errors, use the success path to # get the ip and port, use the redirect path # for the path. We split redirect_path just in case # its a full path. redirect_path = e.hdrs["Location"] self.redirect(redirect_path) return else: self.finish(UPLOAD_ERROR + "</br>" + str(e.hdrs) + "</br>" + str(e)) return
def _GenerateMIMEMessage(self, form, boundary=None): """Generate a new post from original form. Also responsible for storing blobs in the datastore. Args: form: Instance of cgi.FieldStorage representing the whole form derived from original post data. boundary: Boundary to use for resulting form. Used only in tests so that the boundary is always consistent. Returns: A MIMEMultipart instance representing the new HTTP post which should be forwarded to the developers actual CGI handler. DO NOT use the return value of this method to generate a string unless you know what you're doing and properly handle folding whitespace (from rfc822) properly. """ message = multipart.MIMEMultipart('form-data', boundary) for name, value in form.headers.items(): if name.lower() not in STRIPPED_HEADERS: message.add_header(name, value) def IterateForm(): """Flattens form in to single sequence of cgi.FieldStorage instances. The resulting cgi.FieldStorage objects are a little bit irregular in their structure. A single name can have mulitple sub-items. In this case, the root FieldStorage object has a list associated with that field name. Otherwise, the root FieldStorage object just refers to a single nested instance. Lists of FieldStorage instances occur when a form has multiple values for the same name. Yields: cgi.FieldStorage irrespective of their nesting level. """ for key in sorted(form): form_item = form[key] if isinstance(form_item, list): for list_item in form_item: yield list_item else: yield form_item creation = self.__now_func() for form_item in IterateForm(): disposition_parameters = {'name': form_item.name} if form_item.filename is None: variable = base.MIMEBase('text', 'plain') variable.set_payload(form_item.value) else: if not form_item.filename: continue disposition_parameters['filename'] = form_item.filename main_type, sub_type = _SplitMIMEType(form_item.type) blob_entity = self.StoreBlob(form_item, creation) variable = base.MIMEBase('message', 'external-body', access_type=blobstore.BLOB_KEY_HEADER, blob_key=blob_entity.key().name()) form_item.file.seek(0, 2) content_length = form_item.file.tell() form_item.file.seek(0) external = base.MIMEBase(main_type, sub_type, **form_item.type_options) headers = dict(form_item.headers) headers['Content-Length'] = str(content_length) headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = ( blobstore._format_creation(creation)) for key, value in headers.iteritems(): external.add_header(key, value) external_disposition_parameters = dict(disposition_parameters) external_disposition_parameters['filename'] = form_item.filename if not external.get('Content-Disposition'): external.add_header('Content-Disposition', 'form-data', **external_disposition_parameters) variable.set_payload([external]) variable.add_header('Content-Disposition', 'form-data', **disposition_parameters) message.attach(variable) return message
def _GenerateMIMEMessage(self, form, boundary=None, max_bytes_per_blob=None, max_bytes_total=None, bucket_name=None): """Generate a new post from original form. Also responsible for storing blobs in the datastore. Args: form: Instance of cgi.FieldStorage representing the whole form derived from original post data. boundary: Boundary to use for resulting form. Used only in tests so that the boundary is always consistent. max_bytes_per_blob: The maximum size in bytes that any single blob in the form is allowed to be. max_bytes_total: The maximum size in bytes that the total of all blobs in the form is allowed to be. bucket_name: The name of the Google Storage bucket to uplad the file. Returns: A MIMEMultipart instance representing the new HTTP post which should be forwarded to the developers actual CGI handler. DO NOT use the return value of this method to generate a string unless you know what you're doing and properly handle folding whitespace (from rfc822) properly. Raises: UploadEntityTooLargeError: The upload exceeds either the max_bytes_per_blob or max_bytes_total limits. FilenameOrContentTypeTooLargeError: The filename or the content_type of the upload is larger than the allowed size for a string type in the datastore. """ message = multipart.MIMEMultipart('form-data', boundary) for name, value in form.headers.items(): if name.lower() not in STRIPPED_HEADERS: message.add_header(name, value) def IterateForm(): """Flattens form in to single sequence of cgi.FieldStorage instances. The resulting cgi.FieldStorage objects are a little bit irregular in their structure. A single name can have mulitple sub-items. In this case, the root FieldStorage object has a list associated with that field name. Otherwise, the root FieldStorage object just refers to a single nested instance. Lists of FieldStorage instances occur when a form has multiple values for the same name. Yields: cgi.FieldStorage irrespective of their nesting level. """ for key in sorted(form): form_item = form[key] if isinstance(form_item, list): for list_item in form_item: yield list_item else: yield form_item creation = self.__now_func() total_bytes_uploaded = 0 created_blobs = [] upload_too_large = False filename_too_large = False content_type_too_large = False for form_item in IterateForm(): disposition_parameters = {'name': form_item.name} if form_item.filename is None: variable = base.MIMEBase('text', 'plain') variable.set_payload(form_item.value) else: if not form_item.filename: continue disposition_parameters['filename'] = form_item.filename main_type, sub_type = _SplitMIMEType(form_item.type) form_item.file.seek(0, 2) content_length = form_item.file.tell() form_item.file.seek(0) total_bytes_uploaded += content_length if max_bytes_per_blob is not None: if max_bytes_per_blob < content_length: upload_too_large = True break if max_bytes_total is not None: if max_bytes_total < total_bytes_uploaded: upload_too_large = True break if form_item.filename is not None: if MAX_STRING_NAME_LENGTH < len(form_item.filename): filename_too_large = True break if form_item.type is not None: if MAX_STRING_NAME_LENGTH < len(form_item.type): content_type_too_large = True break blob_entity = self.StoreBlob(form_item, creation) created_blobs.append(blob_entity) variable = base.MIMEBase('message', 'external-body', access_type=blobstore.BLOB_KEY_HEADER, blob_key=blob_entity.key().name()) form_item.file.seek(0) digester = hashlib.md5() while True: block = form_item.file.read(1 << 20) if not block: break digester.update(block) blob_key = base64.urlsafe_b64encode(digester.hexdigest()) form_item.file.seek(0) external = base.MIMEBase(main_type, sub_type, **form_item.type_options) headers = dict(form_item.headers) headers['Content-Length'] = str(content_length) headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = ( blobstore._format_creation(creation)) if bucket_name: headers[blobstore.CLOUD_STORAGE_OBJECT_HEADER] = ( '/gs/%s/fake-%s-%s' % (bucket_name, blob_entity.key().name(), blob_key)) headers['Content-MD5'] = blob_key for key, value in headers.iteritems(): external.add_header(key, value) external_disposition_parameters = dict(disposition_parameters) external_disposition_parameters['filename'] = form_item.filename if not external.get('Content-Disposition'): external.add_header('Content-Disposition', 'form-data', **external_disposition_parameters) variable.set_payload([external]) variable.add_header('Content-Disposition', 'form-data', **disposition_parameters) message.attach(variable) if upload_too_large or filename_too_large or content_type_too_large: for blob in created_blobs: datastore.Delete(blob) if upload_too_large: raise UploadEntityTooLargeError() elif filename_too_large: raise FilenameOrContentTypeTooLargeError('filename') else: raise FilenameOrContentTypeTooLargeError('content-type') return message
def _GenerateMIMEMessage(self, form, boundary=None): """Generate a new post from original form. Also responsible for storing blobs in the datastore. Args: form: Instance of cgi.FieldStorage representing the whole form derived from original post data. boundary: Boundary to use for resulting form. Used only in tests so that the boundary is always consistent. Returns: A MIMEMultipart instance representing the new HTTP post which should be forwarded to the developers actual CGI handler. DO NOT use the return value of this method to generate a string unless you know what you're doing and properly handle folding whitespace (from rfc822) properly. """ message = multipart.MIMEMultipart('form-data', boundary) for name, value in form.headers.items(): if name.lower() not in STRIPPED_HEADERS: message.add_header(name, value) def IterateForm(): """Flattens form in to single sequence of cgi.FieldStorage instances. The resulting cgi.FieldStorage objects are a little bit irregular in their structure. A single name can have mulitple sub-items. In this case, the root FieldStorage object has a list associated with that field name. Otherwise, the root FieldStorage object just refers to a single nested instance. Lists of FieldStorage instances occur when a form has multiple values for the same name. Yields: cgi.FieldStorage irrespective of their nesting level. """ for key in sorted(form): form_item = form[key] if isinstance(form_item, list): for list_item in form_item: yield list_item else: yield form_item creation = self.__now_func() for form_item in IterateForm(): disposition_parameters = {'name': form_item.name} if form_item.filename is None: variable = base.MIMEBase('text', 'plain') variable.set_payload(form_item.value) else: if not form_item.filename: continue disposition_parameters['filename'] = form_item.filename main_type, sub_type = _SplitMIMEType(form_item.type) blob_entity = self.StoreBlob(form_item, creation) variable = base.MIMEBase('message', 'external-body', access_type=blobstore.BLOB_KEY_HEADER, blob_key=blob_entity.key().name()) form_item.file.seek(0, 2) content_length = form_item.file.tell() form_item.file.seek(0) external = base.MIMEBase(main_type, sub_type, **form_item.type_options) headers = dict(form_item.headers) headers['Content-Length'] = str(content_length) headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = ( blobstore._format_creation(creation)) for key, value in headers.iteritems(): external.add_header(key, value) external_disposition_parameters = dict(disposition_parameters) external_disposition_parameters[ 'filename'] = form_item.filename if not external.get('Content-Disposition'): external.add_header('Content-Disposition', 'form-data', **external_disposition_parameters) variable.set_payload([external]) variable.add_header('Content-Disposition', 'form-data', **disposition_parameters) message.attach(variable) return message
def post(self, session_id="session"): """ Handler a post request from a user uploading a blob. Args: session_id: Authentication token to validate the upload. """ app_id = self.request.headers.get('X-Appengine-Inbound-Appid', '') global datastore_path db = datastore_distributed.DatastoreDistributed(app_id, datastore_path) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', db) os.environ['APPLICATION_ID'] = app_id # Setup the app id in the datastore. # Get session info and upload success path. blob_session = get_session(session_id) if not blob_session: self.finish('Session has expired. Contact the owner of the ' + \ 'app for support.\n\n') return success_path = blob_session["success_path"] if success_path.startswith('/'): success_path = urlparse.urljoin(self.request.full_url(), success_path) server_host = success_path[:success_path.rfind("/", 3)] if server_host.startswith("http://"): # Strip off the beginging of the server host server_host = server_host[len("http://"):] server_host = server_host.split('/')[0] blob_storage = datastore_blob_storage.DatastoreBlobStorage(app_id) uploadhandler = dev_appserver_upload.UploadCGIHandler(blob_storage) datastore.Delete(blob_session) # This request is sent to the upload handler of the app # in the hope it returns a redirect to be forwarded to the user urlrequest = urllib2.Request(success_path) # Forward all relevant headers and create data for request content_type = self.request.headers["Content-Type"] kv = split_content_type(content_type) boundary = None if "boundary" in kv: boundary = kv["boundary"] urlrequest.add_header("Content-Type", 'application/x-www-form-urlencoded') urlrequest.add_header('X-AppEngine-BlobUpload', 'true') for name, value in self.request.headers.items(): if name.lower() not in STRIPPED_HEADERS: urlrequest.add_header(name, value) # Get correct redirect addresses, otherwise it will redirect back # to this port. urlrequest.add_header("Host", server_host) form = MultiPartForm(boundary) creation = datetime.datetime.now() # Loop on all files in the form. for filekey in self.request.files.keys(): data = {"blob_info_metadata": {filekey: []}} file = self.request.files[filekey][0] body = file["body"] size = len(body) filename = file["filename"] file_content_type = file["content_type"] gs_path = '' if 'gcs_bucket' in blob_session: gcs_config = {'scheme': 'https', 'port': 443} try: gcs_config.update(deployment_config.get_config('gcs')) except ConfigInaccessible: self.send_error('Unable to fetch GCS configuration.') return if 'host' not in gcs_config: self.send_error('GCS host is not defined.') return gcs_path = '{scheme}://{host}:{port}'.format(**gcs_config) gcs_bucket_name = blob_session['gcs_bucket'] gcs_url = '/'.join([gcs_path, gcs_bucket_name, filename]) response = requests.post(gcs_url, headers={'x-goog-resumable': 'start'}) if (response.status_code != 201 or GCS_UPLOAD_ID_HEADER not in response.headers): self.send_error( reason='Unable to start resumable GCS upload.') return upload_id = response.headers[GCS_UPLOAD_ID_HEADER] total_chunks = int(math.ceil(float(size) / GCS_CHUNK_SIZE)) for chunk_num in range(total_chunks): offset = GCS_CHUNK_SIZE * chunk_num current_chunk_size = min(GCS_CHUNK_SIZE, size - offset) end_byte = offset + current_chunk_size current_range = '{}-{}'.format(offset, end_byte - 1) content_range = 'bytes {}/{}'.format(current_range, size) response = requests.put( gcs_url, data=body[offset:end_byte], headers={'Content-Range': content_range}, params={'upload_id': upload_id}) if chunk_num == total_chunks - 1: if response.status_code != 200: self.send_error( reason='Unable to complete GCS upload.') return else: if response.status_code != 308: self.send_error( reason='Unable to continue GCS upload.') return gs_path = '/gs/{}/{}'.format(gcs_bucket_name, filename) blob_key = 'encoded_gs_key:' + base64.b64encode(gs_path) else: form_item = cgi.FieldStorage( headers={'content-type': file_content_type}) form_item.file = cStringIO.StringIO(body) form_item.filename = filename blob_entity = uploadhandler.StoreBlob(form_item, creation) blob_key = str(blob_entity.key().name()) if not blob_key: self.finish('Status: 500\n\n') return creation_formatted = blobstore._format_creation(creation) form.add_file(filekey, filename, cStringIO.StringIO(blob_key), blob_key, blobstore.BLOB_KEY_HEADER, size, creation_formatted) md5_handler = hashlib.md5(str(body)) blob_info = { "filename": filename, "creation-date": creation_formatted, "key": blob_key, "size": str(size), "content-type": file_content_type, "md5-hash": md5_handler.hexdigest() } if 'gcs_bucket' in blob_session: blob_info['gs-name'] = gs_path data["blob_info_metadata"][filekey].append(blob_info) # Loop through form fields for fieldkey in self.request.arguments.keys(): form.add_field(fieldkey, self.request.arguments[fieldkey][0]) data[fieldkey] = self.request.arguments[fieldkey][0] logger.debug("Callback data: \n{}".format(data)) data = urllib.urlencode(data) urlrequest.add_header("Content-Length", str(len(data))) urlrequest.add_data(data) # We are catching the redirect error here # and extracting the Location to post the redirect. try: response = urllib2.urlopen(urlrequest) output = response.read() if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(output) f = gzip.GzipFile(fileobj=buf) data = f.read() output = data self.finish(output) except urllib2.HTTPError, e: if "Location" in e.hdrs: # Catch any errors, use the success path to # get the ip and port, use the redirect path # for the path. We split redirect_path just in case # its a full path. redirect_path = e.hdrs["Location"] self.redirect(redirect_path) return else: self.finish(UPLOAD_ERROR + "</br>" + str(e.hdrs) + "</br>" + str(e)) return
def post(self, app_id="blob", session_id = "session"): global datastore_path #file = self.request.files['file'][0] db = datastore_distributed.DatastoreDistributed( app_id, datastore_path, False, False) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', db) os.environ['APPLICATION_ID'] = app_id # setup the app id in the datastore # Get session info and upload success path blob_session = get_session(session_id) if not blob_session: self.finish('Session has expired. Contact the owner of the app for support.\n\n') return success_path = blob_session["success_path"] server_host = success_path[:success_path.rfind("/",3)] if server_host.startswith("http://"): # strip off the beginging server_host = server_host[len("http://"):] server_host = server_host.split('/')[0] blob_storage = datastore_blob_storage.DatastoreBlobStorage("", app_id) uploadhandler = dev_appserver_upload.UploadCGIHandler(blob_storage) datastore.Delete(blob_session) # This request is sent to the upload handler of the app # in the hope it returns a redirect to be forwarded to the user urlrequest = urllib2.Request(success_path) # Forward all relevant headers # Create data for request reqbody = self.request.body content_type = self.request.headers["Content-Type"] main, kv = split_content_type(content_type) boundary = None if "boundary" in kv: boundary = kv["boundary"] urlrequest.add_header("Content-Type",'multipart/form-data; boundary="%s"'%boundary) for name, value in self.request.headers.items(): if name.lower() not in STRIPPED_HEADERS: urlrequest.add_header(name, value) # Get correct redirect addresses, otherwise it will redirect back # to this port urlrequest.add_header("Host",server_host) form = MultiPartForm(boundary) creation = datetime.datetime.now() # Loop on all files in the form for filekey in self.request.files.keys(): file = self.request.files[filekey][0] body = file["body"] size = len(body) filetype = file["content_type"] filename = file["filename"] blob_entity = uploadhandler.StoreBlob(file, creation) blob_key = str(blob_entity.key().name()) if not blob_key: self.finish('Status: 500\n\n') return creation_formatted = blobstore._format_creation(creation) form.add_file(filekey, filename, cStringIO.StringIO(blob_key), blob_key, blobstore.BLOB_KEY_HEADER, size, creation_formatted) # Loop through form fields for fieldkey in self.request.arguments.keys(): form.add_field(fieldkey, self.request.arguments[fieldkey][0]) request_body = str(form) urlrequest.add_header("Content-Length",str(len(request_body))) urlrequest.add_data(request_body) opener = urllib2.build_opener(SmartRedirectHandler()) f = None redirect_path = None # We are catching the redirect error here # and extracting the Location to post the redirect try: f = opener.open(urlrequest) output = f.read() self.finish(output) except urllib2.HTTPError, e: if "Location" in e.hdrs: #catch any errors, use the success path to #get the ip and port, use the redirect path #for the path. We split redirect_path just in case #its a full path redirect_path = e.hdrs["Location"] success_path_toks = success_path.split('/') redirect_toks = redirect_path.split("/") final_redirect_path = success_path_toks[0] + '//' + success_path_toks[2] + '/' + redirect_toks[len(redirect_toks)-1] self.redirect(final_redirect_path) return else: self.finish(UPLOAD_ERROR + "</br>" + str(e.hdrs) + "</br>" + str(e)) return
def post(self, session_id = "session"): """ Handler a post request from a user uploading a blob. Args: session_id: Authentication token to validate the upload. """ app_id = self.request.headers.get('X-Appengine-Inbound-Appid', '') global datastore_path db = datastore_distributed.DatastoreDistributed( app_id, datastore_path) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', db) os.environ['APPLICATION_ID'] = app_id # Setup the app id in the datastore. # Get session info and upload success path. blob_session = get_session(session_id) if not blob_session: self.finish('Session has expired. Contact the owner of the ' + \ 'app for support.\n\n') return success_path = blob_session["success_path"] if success_path.startswith('/'): success_path = urlparse.urljoin(self.request.full_url(), success_path) server_host = success_path[:success_path.rfind("/", 3)] if server_host.startswith("http://"): # Strip off the beginging of the server host server_host = server_host[len("http://"):] server_host = server_host.split('/')[0] blob_storage = datastore_blob_storage.DatastoreBlobStorage(app_id) uploadhandler = dev_appserver_upload.UploadCGIHandler(blob_storage) datastore.Delete(blob_session) # This request is sent to the upload handler of the app # in the hope it returns a redirect to be forwarded to the user urlrequest = urllib2.Request(success_path) # Forward all relevant headers and create data for request content_type = self.request.headers["Content-Type"] kv = split_content_type(content_type) boundary = None if "boundary" in kv: boundary = kv["boundary"] urlrequest.add_header("Content-Type", 'application/x-www-form-urlencoded') for name, value in self.request.headers.items(): if name.lower() not in STRIPPED_HEADERS: urlrequest.add_header(name, value) # Get correct redirect addresses, otherwise it will redirect back # to this port. urlrequest.add_header("Host", server_host) form = MultiPartForm(boundary) creation = datetime.datetime.now() # Loop on all files in the form. for filekey in self.request.files.keys(): data = {"blob_info_metadata": {filekey: []}} file = self.request.files[filekey][0] body = file["body"] size = len(body) filename = file["filename"] file_content_type = file["content_type"] gs_path = '' if 'gcs_bucket' in blob_session: gcs_config = {'scheme': 'https', 'port': 443} try: gcs_config.update(deployment_config.get_config('gcs')) except ConfigInaccessible: self.send_error('Unable to fetch GCS configuration.') return if 'host' not in gcs_config: self.send_error('GCS host is not defined.') return gcs_path = '{scheme}://{host}:{port}'.format(**gcs_config) gcs_bucket_name = blob_session['gcs_bucket'] gcs_url = '/'.join([gcs_path, gcs_bucket_name, filename]) response = requests.post(gcs_url, headers={'x-goog-resumable': 'start'}) if (response.status_code != 201 or GCS_UPLOAD_ID_HEADER not in response.headers): self.send_error(reason='Unable to start resumable GCS upload.') return upload_id = response.headers[GCS_UPLOAD_ID_HEADER] total_chunks = int(math.ceil(float(size) / GCS_CHUNK_SIZE)) for chunk_num in range(total_chunks): offset = GCS_CHUNK_SIZE * chunk_num current_chunk_size = min(GCS_CHUNK_SIZE, size - offset) end_byte = offset + current_chunk_size current_range = '{}-{}'.format(offset, end_byte - 1) content_range = 'bytes {}/{}'.format(current_range, size) response = requests.put(gcs_url, data=body[offset:end_byte], headers={'Content-Range': content_range}, params={'upload_id': upload_id}) if chunk_num == total_chunks - 1: if response.status_code != 200: self.send_error(reason='Unable to complete GCS upload.') return else: if response.status_code != 308: self.send_error(reason='Unable to continue GCS upload.') return gs_path = '/gs/{}/{}'.format(gcs_bucket_name, filename) blob_key = 'encoded_gs_key:' + base64.b64encode(gs_path) else: form_item = cgi.FieldStorage( headers={'content-type': file_content_type}) form_item.file = cStringIO.StringIO(body) form_item.filename = filename blob_entity = uploadhandler.StoreBlob(form_item, creation) blob_key = str(blob_entity.key().name()) if not blob_key: self.finish('Status: 500\n\n') return creation_formatted = blobstore._format_creation(creation) form.add_file(filekey, filename, cStringIO.StringIO(blob_key), blob_key, blobstore.BLOB_KEY_HEADER, size, creation_formatted) md5_handler = hashlib.md5(str(body)) blob_info = {"filename": filename, "creation-date": creation_formatted, "key": blob_key, "size": str(size), "content-type": file_content_type, "md5-hash": md5_handler.hexdigest()} if 'gcs_bucket' in blob_session: blob_info['gs-name'] = gs_path data["blob_info_metadata"][filekey].append(blob_info) # Loop through form fields for fieldkey in self.request.arguments.keys(): form.add_field(fieldkey, self.request.arguments[fieldkey][0]) data[fieldkey] = self.request.arguments[fieldkey][0] logger.debug("Callback data: \n{}".format(data)) data = urllib.urlencode(data) urlrequest.add_header("Content-Length", str(len(data))) urlrequest.add_data(data) # We are catching the redirect error here # and extracting the Location to post the redirect. try: response = urllib2.urlopen(urlrequest) output = response.read() if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(output) f = gzip.GzipFile(fileobj=buf) data = f.read() output = data self.finish(output) except urllib2.HTTPError, e: if "Location" in e.hdrs: # Catch any errors, use the success path to # get the ip and port, use the redirect path # for the path. We split redirect_path just in case # its a full path. redirect_path = e.hdrs["Location"] self.redirect(redirect_path) return else: self.finish(UPLOAD_ERROR + "</br>" + str(e.hdrs) + "</br>" + str(e)) return
def store_and_build_forward_message(self, form, boundary=None, max_bytes_per_blob=None, max_bytes_total=None, bucket_name=None): """Reads form data, stores blobs data and builds the forward request. This finds all of the file uploads in a set of form fields, converting them into blobs and storing them in the blobstore. It also generates the HTTP request to forward to the user's application. Args: form: cgi.FieldStorage instance representing the whole form derived from original POST data. boundary: The optional boundary to use for the resulting form. If omitted, one is randomly generated. max_bytes_per_blob: The maximum size in bytes that any single blob in the form is allowed to be. max_bytes_total: The maximum size in bytes that the total of all blobs in the form is allowed to be. bucket_name: The name of the Google Storage bucket to store the uploaded files. Returns: A tuple (content_type, content_text), where content_type is the value of the Content-Type header, and content_text is a string containing the body of the HTTP request to forward to the application. Raises: webob.exc.HTTPException: The upload failed. """ message = multipart.MIMEMultipart('form-data', boundary) creation = self._now_func() total_bytes_uploaded = 0 created_blobs = [] mime_type_error = None too_many_conflicts = False upload_too_large = False filename_too_large = False content_type_too_large = False # Extract all of the individual form items out of the FieldStorage. form_items = [] # Sorting of forms is done merely to make testing a little easier since # it means blob-keys are generated in a predictable order. for key in sorted(form): form_item = form[key] if isinstance(form_item, list): form_items.extend(form_item) else: form_items.append(form_item) for form_item in form_items: disposition_parameters = {'name': form_item.name} variable = email.message.Message() if form_item.filename is None: # Copy as is variable.add_header('Content-Type', 'text/plain') variable.set_payload(form_item.value) else: # If there is no filename associated with this field it means that the # file form field was not filled in. This blob should not be created # and forwarded to success handler. if not form_item.filename: continue disposition_parameters['filename'] = form_item.filename try: main_type, sub_type = _split_mime_type(form_item.type) except _InvalidMIMETypeFormatError, ex: mime_type_error = str(ex) break # Seek to the end of file and use the pos as the length. form_item.file.seek(0, os.SEEK_END) content_length = form_item.file.tell() form_item.file.seek(0) total_bytes_uploaded += content_length if max_bytes_per_blob is not None: if content_length > max_bytes_per_blob: upload_too_large = True break if max_bytes_total is not None: if total_bytes_uploaded > max_bytes_total: upload_too_large = True break if form_item.filename is not None: if len(form_item.filename) > _MAX_STRING_NAME_LENGTH: filename_too_large = True break if form_item.type is not None: if len(form_item.type) > _MAX_STRING_NAME_LENGTH: content_type_too_large = True break # Compute the MD5 hash of the upload. digester = hashlib.md5() while True: block = form_item.file.read(1 << 20) if not block: break digester.update(block) form_item.file.seek(0) # Create the external body message containing meta-data about the blob. external = email.message.Message() external.add_header('Content-Type', '%s/%s' % (main_type, sub_type), **form_item.type_options) # NOTE: This is in violation of RFC 2616 (Content-MD5 should be the # base-64 encoding of the binary hash, not the hex digest), but it is # consistent with production. blob_key = base64.urlsafe_b64encode(digester.hexdigest()) # Create header MIME message headers = dict(form_item.headers) for name in _STRIPPED_FILE_HEADERS: if name in headers: del headers[name] headers['Content-Length'] = str(content_length) headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = ( blobstore._format_creation(creation)) headers['Content-MD5'] = blob_key if bucket_name: headers[blobstore.CLOUD_STORAGE_OBJECT_HEADER] = ( '/gs/%s/fake-%s' % (bucket_name, blob_key)) for key, value in headers.iteritems(): external.add_header(key, value) # Add disposition parameters (a clone of the outer message's field). if not external.get('Content-Disposition'): external.add_header('Content-Disposition', 'form-data', **disposition_parameters) # Store the actual contents in the blobstore. base64_encoding = (form_item.headers.get('Content-Transfer-Encoding') == 'base64') try: blob_entity = self.store_blob(external['content-type'], form_item.filename, digester, form_item.file, creation, base64_encoding=base64_encoding) except _TooManyConflictsError: too_many_conflicts = True break # Track created blobs in case we need to roll them back. created_blobs.append(blob_entity) variable.add_header('Content-Type', 'message/external-body', access_type=blobstore.BLOB_KEY_HEADER, blob_key=blob_entity.key().name()) variable.set_payload([external]) # Set common information. variable.add_header('Content-Disposition', 'form-data', **disposition_parameters) message.attach(variable)
def post(self, app_id="blob", session_id="session"): """ Handler a post request from a user uploading a blob. Args: app_id: The application triggering the upload. session_id: Authentication token to validate the upload. """ global datastore_path db = datastore_distributed.DatastoreDistributed(app_id, datastore_path, require_indexes=False) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', db) os.environ['APPLICATION_ID'] = app_id # Setup the app id in the datastore. # Get session info and upload success path. blob_session = get_session(session_id) if not blob_session: self.finish('Session has expired. Contact the owner of the ' + \ 'app for support.\n\n') return success_path = blob_session["success_path"] server_host = success_path[:success_path.rfind("/", 3)] if server_host.startswith("http://"): # Strip off the beginging of the server host server_host = server_host[len("http://"):] server_host = server_host.split('/')[0] blob_storage = datastore_blob_storage.DatastoreBlobStorage(app_id) uploadhandler = dev_appserver_upload.UploadCGIHandler(blob_storage) datastore.Delete(blob_session) # This request is sent to the upload handler of the app # in the hope it returns a redirect to be forwarded to the user urlrequest = urllib2.Request(success_path) # Forward all relevant headers and create data for request content_type = self.request.headers["Content-Type"] kv = split_content_type(content_type) boundary = None if "boundary" in kv: boundary = kv["boundary"] urlrequest.add_header("Content-Type", 'application/x-www-form-urlencoded') for name, value in self.request.headers.items(): if name.lower() not in STRIPPED_HEADERS: urlrequest.add_header(name, value) # Get correct redirect addresses, otherwise it will redirect back # to this port. urlrequest.add_header("Host", server_host) form = MultiPartForm(boundary) creation = datetime.datetime.now() # Loop on all files in the form. for filekey in self.request.files.keys(): data = {"blob_info_metadata": {filekey: []}} file = self.request.files[filekey][0] body = file["body"] size = len(body) filename = file["filename"] file_content_type = file["content_type"] blob_entity = uploadhandler.StoreBlob(file, creation) blob_key = str(blob_entity.key().name()) if not blob_key: self.finish('Status: 500\n\n') return creation_formatted = blobstore._format_creation(creation) form.add_file(filekey, filename, cStringIO.StringIO(blob_key), blob_key, blobstore.BLOB_KEY_HEADER, size, creation_formatted) md5_handler = hashlib.md5(str(body)) data["blob_info_metadata"][filekey].append({ "filename": filename, "creation-date": creation_formatted, "key": blob_key, "size": str(size), "content-type": file_content_type, "md5-hash": md5_handler.hexdigest() }) # Loop through form fields for fieldkey in self.request.arguments.keys(): form.add_field(fieldkey, self.request.arguments[fieldkey][0]) data[fieldkey] = self.request.arguments[fieldkey][0] logging.debug("Callback data: \n{}".format(data)) data = urllib.urlencode(data) urlrequest.add_header("Content-Length", str(len(data))) urlrequest.add_data(data) # We are catching the redirect error here # and extracting the Location to post the redirect. try: response = urllib2.urlopen(urlrequest) output = response.read() if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(output) f = gzip.GzipFile(fileobj=buf) data = f.read() output = data self.finish(output) except urllib2.HTTPError, e: if "Location" in e.hdrs: # Catch any errors, use the success path to # get the ip and port, use the redirect path # for the path. We split redirect_path just in case # its a full path. redirect_path = e.hdrs["Location"] self.redirect(redirect_path) return else: self.finish(UPLOAD_ERROR + "</br>" + str(e.hdrs) + "</br>" + str(e)) return
def store_and_build_forward_message(self, form, boundary=None, max_bytes_per_blob=None, max_bytes_total=None, bucket_name=None): """Reads form data, stores blobs data and builds the forward request. This finds all of the file uploads in a set of form fields, converting them into blobs and storing them in the blobstore. It also generates the HTTP request to forward to the user's application. Args: form: cgi.FieldStorage instance representing the whole form derived from original POST data. boundary: The optional boundary to use for the resulting form. If omitted, one is randomly generated. max_bytes_per_blob: The maximum size in bytes that any single blob in the form is allowed to be. max_bytes_total: The maximum size in bytes that the total of all blobs in the form is allowed to be. bucket_name: The name of the Google Storage bucket to store the uploaded files. Returns: A tuple (content_type, content_text), where content_type is the value of the Content-Type header, and content_text is a string containing the body of the HTTP request to forward to the application. Raises: webob.exc.HTTPException: The upload failed. """ message = multipart.MIMEMultipart('form-data', boundary) creation = self._now_func() total_bytes_uploaded = 0 created_blobs = [] mime_type_error = None too_many_conflicts = False upload_too_large = False filename_too_large = False content_type_too_large = False # Extract all of the individual form items out of the FieldStorage. form_items = [] # Sorting of forms is done merely to make testing a little easier since # it means blob-keys are generated in a predictable order. for key in sorted(form): form_item = form[key] if isinstance(form_item, list): form_items.extend(form_item) else: form_items.append(form_item) for form_item in form_items: disposition_parameters = {'name': form_item.name} variable = email.message.Message() if form_item.filename is None: # Copy as is variable.add_header('Content-Type', 'text/plain') variable.set_payload(form_item.value) else: # If there is no filename associated with this field it means that the # file form field was not filled in. This blob should not be created # and forwarded to success handler. if not form_item.filename: continue disposition_parameters['filename'] = form_item.filename try: main_type, sub_type = _split_mime_type(form_item.type) except _InvalidMIMETypeFormatError as ex: mime_type_error = str(ex) break # Seek to the end of file and use the pos as the length. form_item.file.seek(0, os.SEEK_END) content_length = form_item.file.tell() form_item.file.seek(0) total_bytes_uploaded += content_length if max_bytes_per_blob is not None: if content_length > max_bytes_per_blob: upload_too_large = True break if max_bytes_total is not None: if total_bytes_uploaded > max_bytes_total: upload_too_large = True break if form_item.filename is not None: if len(form_item.filename) > _MAX_STRING_NAME_LENGTH: filename_too_large = True break if form_item.type is not None: if len(form_item.type) > _MAX_STRING_NAME_LENGTH: content_type_too_large = True break # Compute the MD5 hash of the upload. digester = hashlib.md5() while True: block = form_item.file.read(1 << 20) if not block: break digester.update(block) form_item.file.seek(0) # Create the external body message containing meta-data about the blob. external = email.message.Message() external.add_header('Content-Type', '%s/%s' % (main_type, sub_type), **form_item.type_options) # NOTE: This is in violation of RFC 2616 (Content-MD5 should be the # base-64 encoding of the binary hash, not the hex digest), but it is # consistent with production. content_md5 = base64.urlsafe_b64encode(digester.hexdigest()) # Create header MIME message headers = dict(form_item.headers) for name in _STRIPPED_FILE_HEADERS: if name in headers: del headers[name] headers['Content-Length'] = str(content_length) headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = ( blobstore._format_creation(creation)) headers['Content-MD5'] = content_md5 gs_filename = None if bucket_name: random_key = str(self._generate_blob_key()) gs_filename = '%s/fake-%s' % (bucket_name, random_key) headers[blobstore.CLOUD_STORAGE_OBJECT_HEADER] = ( blobstore.GS_PREFIX + gs_filename) for key, value in six.iteritems(headers): external.add_header(key, value) # Add disposition parameters (a clone of the outer message's field). if not external.get('Content-Disposition'): external.add_header('Content-Disposition', 'form-data', **disposition_parameters) base64_encoding = (form_item.headers.get( 'Content-Transfer-Encoding') == 'base64') content_type, blob_file, filename = self._preprocess_data( external['content-type'], form_item.file, form_item.filename, base64_encoding) # Store the actual contents to storage. if gs_filename: info_entity = self.store_gs_file(content_type, gs_filename, blob_file, filename) else: try: info_entity = self.store_blob(content_type, filename, digester, blob_file, creation) except _TooManyConflictsError: too_many_conflicts = True break # Track created blobs in case we need to roll them back. created_blobs.append(info_entity) variable.add_header('Content-Type', 'message/external-body', access_type=blobstore.BLOB_KEY_HEADER, blob_key=info_entity.key().name()) variable.set_payload([external]) # Set common information. variable.add_header('Content-Disposition', 'form-data', **disposition_parameters) message.attach(variable) if (mime_type_error or too_many_conflicts or upload_too_large or filename_too_large or content_type_too_large): for blob in created_blobs: datastore.Delete(blob) if mime_type_error: self.abort(400, detail=mime_type_error) elif too_many_conflicts: self.abort(500, detail='Could not generate a blob key.') elif upload_too_large: self.abort(413) else: if filename_too_large: invalid_field = 'filename' elif content_type_too_large: invalid_field = 'Content-Type' detail = 'The %s exceeds the maximum allowed length of %s.' % ( invalid_field, _MAX_STRING_NAME_LENGTH) self.abort(400, detail=detail) message_out = io.StringIO() gen = email.generator.Generator(message_out, maxheaderlen=0) gen.flatten(message, unixfrom=False) # Get the content text out of the message. message_text = message_out.getvalue() content_start = message_text.find('\n\n') + 2 content_text = message_text[content_start:] content_text = content_text.replace('\n', '\r\n') return message.get('Content-Type'), content_text