def test_update_or_create__when_area_exists__retrieves_db_record(self): db_area = self.create_upload_area() area = UploadArea(uuid=db_area.uuid) area.update_or_create() self.assertEqual(db_area.id, area.db_id)
class ChecksumDaemonTest(UploadTestCaseUsingMockAWS): def _make_dbfile(self, upload_area, test_file, checksums=None): return DbFile(s3_key=f"{upload_area.uuid}/{test_file.name}", s3_etag=test_file.e_tag, upload_area_id=upload_area.db_id, name=test_file.name, size=test_file.size, checksums=checksums) def setUp(self): super().setUp() # Environment self.environment = { 'INGEST_AMQP_SERVER': 'foo', 'CSUM_DOCKER_IMAGE': 'bogoimage' } self.environmentor = EnvironmentSetup(self.environment) self.environmentor.enter() # Upload area self.area_uuid = str(uuid.uuid4()) self.upload_area = UploadArea(self.area_uuid) self.upload_area.update_or_create() # daemon context = Mock() self.daemon = ChecksumDaemon(context) # File self.small_file = FixtureFile.factory('foo') self.file_key = f"{self.area_uuid}/{self.small_file.name}" self.object = self.upload_bucket.Object(self.file_key) self.object.put(Key=self.file_key, Body=self.small_file.contents, ContentType=self.small_file.content_type) # Event self.events = {'Records': [ {'eventVersion': '2.0', 'eventSource': 'aws:s3', 'awsRegion': 'us-east-1', 'eventTime': '2017-09-15T00:05:10.378Z', 'eventName': 'ObjectCreated:Put', 'userIdentity': {'principalId': 'AWS:AROAI4WRRXW2K3Y2IFL6Q:upload-api-dev'}, 'requestParameters': {'sourceIPAddress': '52.91.56.220'}, 'responseElements': {'x-amz-request-id': 'FEBC85CADD1E3A66', 'x-amz-id-2': 'xxx'}, 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'NGZjNmM0M2ItZTk0Yi00YTExLWE2NDMtMzYzY2UwN2EyM2Nj', 'bucket': {'name': self.upload_config.bucket_name, 'ownerIdentity': {'principalId': 'A29PZ5XRQWJUUM'}, 'arn': f'arn:aws:s3:::{self.upload_config.bucket_name}'}, 'object': {'key': self.file_key, 'size': self.small_file.size, 'eTag': self.small_file.e_tag, 'sequencer': '0059BB193641C4EAB0'}}}]} self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session()
def test_format_and_send_notification(self, mock_send_notification): area_uuid = str(uuid.uuid4()) upload_area = UploadArea(area_uuid) upload_area.update_or_create() upload_area._db_load() file = upload_area.store_file("test_file_name", "test_file_content", "application/json; dcp-type=data") ingest_notifier = IngestNotifier("file_uploaded", file_id=file.db_id) test_payload = { 'names': "[test_file_name]", 'upload_area_id': area_uuid } notification_id = ingest_notifier.format_and_send_notification( test_payload) record = UploadDB().get_pg_record("notification", notification_id, column="id") self.assertEqual(record['status'], "DELIVERED") self.assertEqual(record['file_id'], file.db_id) self.assertEqual(record['payload'], test_payload)
class TestUploadedFile(UploadTestCaseUsingMockAWS): def setUp(self): super().setUp() self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create() def create_file_record(self, s3object, checksums=None): record = DbFile(s3_key=s3object.key, s3_etag=s3object.e_tag.strip('\"'), name=os.path.basename(s3object.key), upload_area_id=self.upload_area.db_id, size=s3object.content_length, checksums=checksums) self.db.add(record) self.db.commit() return record def tearDown(self): super().tearDown() def test_create__creates_a_new_s3_object_and_db_record(self): filename = f"file-{random.randint(0, 999999999)}" content_type = "application/octet-stream; dcp-type=data" file_content = "file1_content" uf = UploadedFile.create(upload_area=self.upload_area, name=filename, content_type=content_type, data=file_content) self.assertIsInstance(uf, UploadedFile) # S3 Object s3_key = f"{self.upload_area_id}/{filename}" s3object = self.upload_bucket.Object(s3_key) self.assertEqual(content_type, s3object.content_type) self.assertEqual(file_content.encode('utf8'), s3object.get()['Body'].read()) # DB Record record = self.db.query(DbFile).filter(DbFile.s3_key == s3_key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() self.assertEqual(s3_key, record.s3_key) self.assertEqual(filename, record.name) self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag) self.assertEqual(len(file_content), record.size) self.assertEqual(self.upload_area.db_id, record.upload_area_id) def test_init__given_existing_entities__initializes_properties_correctly(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile(self.upload_area, s3object=s3object) # Links to objects self.assertEqual(s3object, uf.s3object) self.assertEqual(self.upload_area, uf.upload_area) # Persisted properties self.assertEqual(file_record.id, uf.db_id) self.assertEqual(s3object.key, uf.s3_key) self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag) self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id']) self.assertEqual(file_record.name, uf.name) self.assertEqual(s3object.content_length, uf.size) def test_init__when_no_db_record_exists__creates_a_db_record(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") with self.assertRaises(NoResultFound): self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() uf = UploadedFile(upload_area=self.upload_area, s3object=s3object) record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() self.assertEqual(record.id, uf.db_id) self.assertEqual(s3object.key, record.s3_key) self.assertEqual(filename, record.name) self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag) self.assertEqual(s3object.content_length, record.size) self.assertEqual(self.upload_area.db_id, record.upload_area_id) def test_init__doesnt_create_db_record_if_one_already_exists(self): filename = f"file-{random.randint(0, 999999999)}" s3_key = f"{self.upload_area_id}/{filename}" s3object = self.create_s3_object(s3_key) self.create_file_record(s3object) record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() UploadedFile(upload_area=self.upload_area, s3object=s3object) record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() self.assertEqual(record_count_before, record_count_after) def test_from_s3_key__initializes_correctly(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_s3_key(self.upload_area, s3_key=s3object.key) self.assertEqual(self.upload_area, uf.upload_area) self.assertEqual(s3object, uf.s3object) self.assertEqual(file_record.id, uf.db_id) def test_from_db_id__initializes_correctly_and_figures_out_which_upload_area_to_use(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_db_id(file_record.id) self.assertEqual(self.upload_area.uuid, uf.upload_area.uuid) self.assertEqual(self.upload_area.db_id, uf.upload_area.db_id) self.assertEqual(s3object, uf.s3object) self.assertEqual(file_record.id, uf.db_id) def test_refresh__picks_up_changed_content_type(self): filename = f"file-{random.randint(0, 999999999)}" old_content_type = "application/octet-stream" # missing dcp-type new_content_type = "application/octet-stream; dcp-type=data" s3object = self.create_s3_object(object_key=f"{self.upload_area.uuid}/{filename}", content_type=old_content_type) # create UploadedFile uf = UploadedFile.from_s3_key(upload_area=self.upload_area, s3_key=s3object.key) # Change media type on S3 object s3object.copy_from(CopySource={'Bucket': self.upload_config.bucket_name, 'Key': s3object.key}, MetadataDirective="REPLACE", ContentType=new_content_type) self.assertEqual(old_content_type, uf.content_type) uf.refresh() self.assertEqual(new_content_type, uf.content_type) def test_checksums_setter_saves_db_record(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_db_id(file_record.id) uf.checksums = {'foo': 'bar'} self.db.refresh(file_record) self.assertEqual({'foo': 'bar'}, file_record.checksums) def test_info(self): test_file = FixtureFile.factory("foo") s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents) file_record = self.create_file_record(s3object, checksums=test_file.checksums) uf = UploadedFile(self.upload_area, s3object=s3object) self.assertEqual({ 'upload_area_id': self.upload_area.uuid, 'name': file_record.name, 'size': s3object.content_length, 'content_type': s3object.content_type, 'url': f"s3://{s3object.bucket_name}/{s3object.key}", 'checksums': test_file.checksums, 'last_modified': s3object.last_modified.isoformat() }, uf.info())
class TestValidationScheduler(UploadTestCaseUsingMockAWS): def setUp(self): super().setUp() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create() def tearDown(self): super().tearDown() pass @patch('upload.common.upload_area.UploadedFile.size', MAX_FILE_SIZE_IN_BYTES + 1) def test_check_files_can_be_validated__when_files_are_too_large_for_validation__returns_false( self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2", content_type="application/octet-stream; dcp-type=data", data="file2_content") scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) file_validatable = scheduler.check_files_can_be_validated() self.assertEqual(False, file_validatable) def test__create_validation_event__creates_event_with_correct_status(self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2#", content_type="application/octet-stream; dcp-type=data", data="file2_content") scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) validation_id = str(uuid.uuid4()) validation_event = scheduler._create_validation_event( "test_docker_image", validation_id, None) self.assertEqual(validation_event.docker_image, "test_docker_image") self.assertEqual(validation_event.status, "SCHEDULING_QUEUED") def test__update_validation_event__updates_event_status(self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2#", content_type="application/octet-stream; dcp-type=data", data="file2_content") scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) scheduler.batch_job_id = "123456" validation_id = str(uuid.uuid4()) validation_event = scheduler._create_validation_event( "test_docker_image", validation_id, None) self.assertEqual(validation_event.job_id, None) validation_event = scheduler._update_validation_event( "test_docker_image", validation_id, None) self.assertEqual(validation_event.job_id, "123456") self.assertEqual(validation_event.status, "SCHEDULED") @patch('upload.common.upload_area.UploadedFile.size', MAX_FILE_SIZE_IN_BYTES - 1) def test_check_files_can_be_validated__if_file_is_too_large__returns_true( self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2", content_type="application/octet-stream; dcp-type=data", data="file2_content") scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) file_validatable = scheduler.check_files_can_be_validated() self.assertEqual(True, file_validatable) def test_add_to_validation_sqs__adds_correct_event_to_queue(self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2", content_type="application/octet-stream; dcp-type=data", data="file2_content") validation_scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) validation_uuid = validation_scheduler.add_to_validation_sqs( ["filename123"], "test_docker_image", {"variable": "variable"}, "123456") message = self.sqs.meta.client.receive_message( QueueUrl='test_validation_q_url') message_body = json.loads(message['Messages'][0]['Body']) record = UploadDB().get_pg_record("validation", validation_uuid, column='id') self.assertEqual(message_body["filenames"], ["filename123"]) self.assertEqual(message_body["validation_id"], validation_uuid) self.assertEqual(message_body["validator_docker_image"], "test_docker_image") self.assertEqual(message_body["environment"], {"variable": "variable"}) self.assertEqual(message_body["orig_validation_id"], "123456") self.assertEqual(message_body["upload_area_uuid"], uploaded_file.upload_area.uuid) self.assertEqual(record["status"], "SCHEDULING_QUEUED")
class TestDssChecksums(UploadTestCaseUsingMockAWS): def setUp(self): super().setUp() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create() self.checksum_id = str(uuid.uuid4()) self.job_id = str(uuid.uuid4()) self.s3client = boto3.client('s3') def tearDown(self): super().tearDown() def test_it_acts_like_a_dict(self): checksums = DssChecksums(s3_object=None, checksums={ 'crc32c': 'a', 'sha1': 'b', 'sha256': 'c', 's3_etag': 'd' }) self.assertEqual(4, len(checksums)) self.assertEqual('b', checksums['sha1']) self.assertIn('sha256', checksums) self.assertEqual(['crc32c', 's3_etag', 'sha1', 'sha256'], sorted(checksums.keys())) def test_are_present__for_an_object_with_no_checksums__returns_false(self): filename = 'file1' s3obj = self.mock_upload_file_to_s3(self.upload_area_id, filename, checksums={}) self.assertFalse(DssChecksums(s3_object=s3obj).are_present()) def test_are_present__for_an_object_with_partial_checksums__returns_false( self): filename = 'file2' s3obj = self.mock_upload_file_to_s3(self.upload_area_id, filename, checksums={ 'sha1': '1', 'sha256': '2' }) self.assertFalse(DssChecksums(s3_object=s3obj).are_present()) def test_are_present__for_an_object_with_all_checksums__returns_true(self): filename = 'file3' s3obj = self.mock_upload_file_to_s3(self.upload_area_id, filename, checksums={ 'sha1': '1', 'sha256': '2', 's3_etag': '3', 'crc32c': '4' }) self.assertTrue(DssChecksums(s3_object=s3obj).are_present()) def test_init_reads_checksums_from_s3_object(self): s3obj = self.create_s3_object(object_key="file4") tagging = [{ 'Key': 'hca-dss-sha1', 'Value': '1' }, { 'Key': 'hca-dss-sha256', 'Value': '2' }, { 'Key': 'hca-dss-crc32c', 'Value': '3' }, { 'Key': 'hca-dss-s3_etag', 'Value': '4' }] self.s3client.put_object_tagging(Bucket=s3obj.bucket_name, Key=s3obj.key, Tagging={'TagSet': tagging}) checksums = DssChecksums(s3_object=s3obj) self.assertEqual( { 'crc32c': '3', 'sha1': '1', 'sha256': '2', 's3_etag': '4' }, checksums) def test_compute(self): test_file = FixtureFile.factory("foo") s3obj = self.mock_upload_file_to_s3(self.upload_area_id, test_file.name, contents=test_file.contents) self.assertEqual( DssChecksums(s3_object=s3obj).compute(), test_file.checksums) def test_save_as_tags_on_s3_object(self): s3obj = self.create_s3_object(object_key="foo") checksums = DssChecksums(s3obj, checksums={ 'sha1': 'a', 'sha256': 'b', 'crc32c': 'c', 's3_etag': 'd' }) checksums.save_as_tags_on_s3_object() self.assertEqual([{ 'Key': 'hca-dss-sha1', 'Value': 'a' }, { 'Key': 'hca-dss-sha256', 'Value': 'b' }, { 'Key': 'hca-dss-crc32c', 'Value': 'c' }, { 'Key': 'hca-dss-s3_etag', 'Value': 'd' }], self.s3client.get_object_tagging( Bucket=self.upload_area.bucket_name, Key=s3obj.key)['TagSet'])