def test_validate_data_pass(self): allow_restricted_fields = True # Call under test ret_val = ds.validate_data(self.valid_user_data, allow_restricted_fields, self.user_kind) self.assertTrue(ret_val)
def test_validate_data_field_restricted_fail(self): allow_restricted_fields = False # Call under test ret_val = ds.validate_data(self.valid_user_data, allow_restricted_fields, self.user_kind) self.assertFalse(ret_val)
def test_validate_data_field_not_allowed_fail(self): data = {'bad': 'data', 'evil': 'monsters'} data.update(self.valid_user_data) allow_restricted_fields = True # Call under test ret_val = ds.validate_data(data, allow_restricted_fields, self.user_kind) self.assertFalse(ret_val)
def _create_datastore_entry(self, datastore_client, filename, original_filename, user=None, upload_session_id=None, image_bucket=None, cc0_agree=False, public_agree=False): """ Creates and returns a datastore entity for a file with name filename uploaded by user user. Filename should be the new name we have generated that is <uuid>.<ext>. Raises FailedToSaveToDatastoreError if unsuccessful. """ # Create datastore entity key = datastore_client.key(self._datastore_kind, filename) entity = self.datastore.Entity(key=key, exclude_from_indexes=["exif_json"]) # Set datastore entity data entity['user'] = datastore_client.key(self._user_datastore_kind, user) entity['upload_session_id'] = upload_session_id entity['confirmed_by_user'] = False entity['original_filename'] = original_filename entity['in_gcs'] = False entity['processed'] = False entity['uploaded_date'] = self.datetime.now() entity['image_bucket'] = image_bucket entity['cc0_agree'] = cc0_agree entity['public_agree'] = public_agree if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO): self.logger.error('Invalid entity: {0}'.format(entity)) return None return entity
def test_validate_data_entity_not_allowed(self): kind = 'evilEntity' # Call under test ret_val = ds.validate_data(self.valid_user_data, True, kind) self.assertFalse(ret_val)
def _record_status_in_datastore(fpaths, success): """ Records GCS upload status in datastore for each file fpaths. `success` is a boolean corresponding to whether the files in fpaths were uploaded successfully to GCS or not. A list of files that failed to have their upload status updated are returned. """ error_msg = '' error = False try: client = _get_client('datastore') except CouldNotObtainCredentialsError as e: error_msg = 'Could not obtain datastore credentials: {0}'.format(e) error = True if not error: keys = list() for p in fpaths: key = _get_ds_key_for_file(p) keys.append(key) try: entities = client.get_multi(keys) except GCloudError as e: error_msg = str(e) error = True if not error: # Add new entities as necessary if len(entities) != len(fpaths): entities = _insert_missing_entities(entities, fpaths) if success is False: new_data = {'gcs_upload_failed': True} else: new_data = {'in_gcs': True} # We only want to validate the new data, as there may be restricted # fields in the entities we pulled from datastore. All new data must # be validated as follows before adding it to the entities that will be # pushed to datastore. if not ds.validate_data(new_data, allow_restricted_fields=False, kind=ds.DATASTORE_PHOTO): error_msg = 'Invalid data: {0}'.format(new_data) error = True if not error: # Update entities for i in range(len(entities)): entities[i].update(new_data) # Save to datastore try: client.put_multi(entities) except GCloudError as e: error_msg = str(e) error = True if error: msg = 'Failed to record {0} upload statuses in datastore: {1}' logging.error(msg.format(len(fpaths), error_msg)) return fpaths if error else list()
def _upload_single(fpath): """ Uploads single file to GCS. Returns a tuple containing (upload_success, fpath). """ try: bucket_name = config.GCS_BUCKET success = True try: datastore_client = _get_client('datastore') except CouldNotObtainCredentialsError as e: error_msg = 'Could not obtain datastore credentials: {0}'.format(str(e)) logging.error(error_msg) return False, fpath try: client = _get_client('storage') except CouldNotObtainCredentialsError as e: logging.error('Could not obtain GCS credentials: {0}'.format(str(e))) return False, fpath bucket = client.bucket(bucket_name) # Verify that filename already exists as key in database filename = os.path.basename(fpath) key = datastore_client.key('Photo', filename) entity = datastore_client.get(key) if entity is None: logging.error('Failed to find file: ' + filename) return False, fpath try: img = Image.open(fpath) format_ = img.format if format_ == 'TIFF': output_file = "/tmp/" + filename + ".jpg" img.save(output_file) _upload_derived(output_file, bucket) os.unlink(output_file) except IOError as e: try: with Raw(filename=fpath) as raw: tiff_output_file = "/tmp/" + filename + ".tiff" raw.save(filename=tiff_output_file) except Exception as e: logging.error("Failed to parse file with PIL or rawkit: %s (error: %s)" % (fpath, str(e))) # move the file out of the pending tree so it won't be processed next loop try: shutil.move(fpath, "/tmp/%s" % os.path.basename(fpath)) except IOError as e: logging.error("Unable to move bad file out of the way: %s (error: %s)" % (fpath, str(e))) return False, fpath jpg_output_file = "/tmp/" + filename + ".jpg" img = Image.open(tiff_output_file) img.save(jpg_output_file) _upload_derived(jpg_output_file, bucket) os.unlink(tiff_output_file) os.unlink(jpg_output_file) format_ = 'raw' is_adult = _check_adult_content(img) if is_adult: entity.update({'is_adult_content': True}) datastore_client.put(entity) os.unlink(fpath) return False, fpath else: entity.update({'is_adult_content': False}) metadata = {} metadata['reviews'] = [] metadata['num_reviews'] = 0 entity.update(metadata) width = img.width height = img.height metadata = _extract_image_metadata(filename, format_, width, height, bucket_name) entity.update(metadata) if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO): logging.error('Invalid entity: {0}'.format(entity)) return False, fpath datastore_client.put(entity) blob = storage.Blob(os.path.basename(fpath), bucket) try: blob.upload_from_filename(fpath) msg = 'Successfully uploaded {0} to GCS' logging.debug(msg.format(fpath)) except Exception as e: msg = '{0} failed to upload to GCS: {1}' logging.error(msg.format(fpath, e)) success = False return success, fpath except Exception as e: logging.error("Failed to upload file: %s" % fpath) traceback.print_exc(limit=50) logging.error("Returning false") return False, fpath
def _upload_post(self): """ Request handler for upload POST requests. Writes accepts files in POST request body and saves them to the local file system as <self._dir>/<uuid>.<file extension as uploaded> Creates datastore record for uploaded files and indicates that they have yet to be uploaded to Cloud Storage. Returns constants.HTTP_ERROR status if an error occurs with a short message. Returns constants.HTTP_OK response on success with no message. Returns constants.HTTP_OOM status if the server is under too much load to handle the request. """ # Fetch the user's identifier from the request, which # contains the oauth2 creds. try: token = flask.request.headers['X-IDTOKEN'] except Exception as e: return flask.Response('Missing credential token header', 405) try: idinfo = client.verify_id_token(token, sk.GOOGLE_OAUTH2_CLIENT_ID) if idinfo['iss'] not in [ 'accounts.google.com', 'https://accounts.google.com' ]: raise crypt.AppIdentityError("Wrong issuer.") except crypt.AppIdentityError: # Invalid token return flask.Response('Application identity error.', 405) user_id = idinfo['sub'] hash_id = hashlib.sha256(user_id).hexdigest() content_type = self.request.content_type datastore_client = self.datastore.Client(self.config['PROJECT_ID']) batch = datastore_client.batch() for file_ in self.request.files.getlist('datafile'): # In case an error occured and the filename was not sent # filename = self.request.headers.get(constants.HTTP_FILENAME_HEADER) or '' filename = file_.filename ext = self.os.path.splitext(filename)[1].strip('.') name = '.'.join((str(uuid4()), ext)) entity = self._create_datastore_entry(datastore_client, name, user=hash_id) if entity: batch.put(entity) # Local file system file paths local_file = self.os.path.join(self._dir, name) temp_file = local_file + self._file_not_ready_suffix try: self._write_data_to_file(temp_file, file_) except IOError as e: self.logger.error( 'Error occured writing to file: {0}'.format(e)) return self.Response('Failed to save file.', status=constants.HTTP_ERROR) except ClientDisconnected: # This error will occur if Gunicorn/Flask fails to respond before # the load balancer times the request out. In this situation, the # load balancer responds to the client with a 502 error, however # this is not detected by Flask until it reads to the end of the # buffered request from nginx at which point this exception will be # thrown by the call to self.request.stream.read in # _write_data_to_file. try: self.util.retry_func(self.os.remove, self._retrys, (OSError, ), temp_file) except RuntimeError: pass self.logger.error('Upload failed. Client disconnected.') return self.Response(status=constants.HTTP_ERROR) try: self.util.retry_func(self.os.rename, self._retrys, (OSError, ), temp_file, local_file) except RuntimeError: return self.Response('Failed to save file.', status=constants.HTTP_ERROR) try: batch.commit() except FailedToSaveToDatastoreError as e: self.logger.error(str(e)) # Continue on for now. The upload daemon will create a datastore # entity if it doesn't find one, it will just be missing the user # information. return self.Response(status=constants.HTTP_OK) def _create_datastore_entry(self, datastore_client, filename, user=None): """ Creates and returns a datastore entity for a file with name filename uploaded by user user. Filename should be the new name we have generated that is <uuid>.<ext>. Raises FailedToSaveToDatastoreError if unsuccessful. """ # Create datastore entity key = datastore_client.key(self._datastore_kind, filename) entity = self.datastore.Entity(key=key) # Set datastore entity data entity['user'] = datastore_client.key(self._user_datastore_kind, user) entity['in_gcs'] = False entity['processed'] = False entity['uploaded_date'] = self.datetime.now() if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO): msg = 'Invalid entity: {0}'.format(entity) return None return entity