class ChecksumDaemonTest(UploadTestCaseUsingMockAWS): def _make_dbfile(self, upload_area, test_file, checksums=None): return DbFile(s3_key=f"{upload_area.uuid}/{test_file.name}", s3_etag=test_file.e_tag, upload_area_id=upload_area.db_id, name=test_file.name, size=test_file.size, checksums=checksums) def setUp(self): super().setUp() # Environment self.environment = { 'INGEST_AMQP_SERVER': 'foo', 'CSUM_DOCKER_IMAGE': 'bogoimage' } self.environmentor = EnvironmentSetup(self.environment) self.environmentor.enter() # Upload area self.area_uuid = str(uuid.uuid4()) self.upload_area = UploadArea(self.area_uuid) self.upload_area.update_or_create() # daemon context = Mock() self.daemon = ChecksumDaemon(context) # File self.small_file = FixtureFile.factory('foo') self.file_key = f"{self.area_uuid}/{self.small_file.name}" self.object = self.upload_bucket.Object(self.file_key) self.object.put(Key=self.file_key, Body=self.small_file.contents, ContentType=self.small_file.content_type) # Event self.events = {'Records': [ {'eventVersion': '2.0', 'eventSource': 'aws:s3', 'awsRegion': 'us-east-1', 'eventTime': '2017-09-15T00:05:10.378Z', 'eventName': 'ObjectCreated:Put', 'userIdentity': {'principalId': 'AWS:AROAI4WRRXW2K3Y2IFL6Q:upload-api-dev'}, 'requestParameters': {'sourceIPAddress': '52.91.56.220'}, 'responseElements': {'x-amz-request-id': 'FEBC85CADD1E3A66', 'x-amz-id-2': 'xxx'}, 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'NGZjNmM0M2ItZTk0Yi00YTExLWE2NDMtMzYzY2UwN2EyM2Nj', 'bucket': {'name': self.upload_config.bucket_name, 'ownerIdentity': {'principalId': 'A29PZ5XRQWJUUM'}, 'arn': f'arn:aws:s3:::{self.upload_config.bucket_name}'}, 'object': {'key': self.file_key, 'size': self.small_file.size, 'eTag': self.small_file.e_tag, 'sequencer': '0059BB193641C4EAB0'}}}]} self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session()
class TestUploadedFile(UploadTestCaseUsingMockAWS): def setUp(self): super().setUp() self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create() def create_file_record(self, s3object, checksums=None): record = DbFile(s3_key=s3object.key, s3_etag=s3object.e_tag.strip('\"'), name=os.path.basename(s3object.key), upload_area_id=self.upload_area.db_id, size=s3object.content_length, checksums=checksums) self.db.add(record) self.db.commit() return record def tearDown(self): super().tearDown() def test_create__creates_a_new_s3_object_and_db_record(self): filename = f"file-{random.randint(0, 999999999)}" content_type = "application/octet-stream; dcp-type=data" file_content = "file1_content" uf = UploadedFile.create(upload_area=self.upload_area, name=filename, content_type=content_type, data=file_content) self.assertIsInstance(uf, UploadedFile) # S3 Object s3_key = f"{self.upload_area_id}/{filename}" s3object = self.upload_bucket.Object(s3_key) self.assertEqual(content_type, s3object.content_type) self.assertEqual(file_content.encode('utf8'), s3object.get()['Body'].read()) # DB Record record = self.db.query(DbFile).filter(DbFile.s3_key == s3_key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() self.assertEqual(s3_key, record.s3_key) self.assertEqual(filename, record.name) self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag) self.assertEqual(len(file_content), record.size) self.assertEqual(self.upload_area.db_id, record.upload_area_id) def test_init__given_existing_entities__initializes_properties_correctly(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile(self.upload_area, s3object=s3object) # Links to objects self.assertEqual(s3object, uf.s3object) self.assertEqual(self.upload_area, uf.upload_area) # Persisted properties self.assertEqual(file_record.id, uf.db_id) self.assertEqual(s3object.key, uf.s3_key) self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag) self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id']) self.assertEqual(file_record.name, uf.name) self.assertEqual(s3object.content_length, uf.size) def test_init__when_no_db_record_exists__creates_a_db_record(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") with self.assertRaises(NoResultFound): self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() uf = UploadedFile(upload_area=self.upload_area, s3object=s3object) record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() self.assertEqual(record.id, uf.db_id) self.assertEqual(s3object.key, record.s3_key) self.assertEqual(filename, record.name) self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag) self.assertEqual(s3object.content_length, record.size) self.assertEqual(self.upload_area.db_id, record.upload_area_id) def test_init__doesnt_create_db_record_if_one_already_exists(self): filename = f"file-{random.randint(0, 999999999)}" s3_key = f"{self.upload_area_id}/{filename}" s3object = self.create_s3_object(s3_key) self.create_file_record(s3object) record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() UploadedFile(upload_area=self.upload_area, s3object=s3object) record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() self.assertEqual(record_count_before, record_count_after) def test_from_s3_key__initializes_correctly(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_s3_key(self.upload_area, s3_key=s3object.key) self.assertEqual(self.upload_area, uf.upload_area) self.assertEqual(s3object, uf.s3object) self.assertEqual(file_record.id, uf.db_id) def test_from_db_id__initializes_correctly_and_figures_out_which_upload_area_to_use(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_db_id(file_record.id) self.assertEqual(self.upload_area.uuid, uf.upload_area.uuid) self.assertEqual(self.upload_area.db_id, uf.upload_area.db_id) self.assertEqual(s3object, uf.s3object) self.assertEqual(file_record.id, uf.db_id) def test_refresh__picks_up_changed_content_type(self): filename = f"file-{random.randint(0, 999999999)}" old_content_type = "application/octet-stream" # missing dcp-type new_content_type = "application/octet-stream; dcp-type=data" s3object = self.create_s3_object(object_key=f"{self.upload_area.uuid}/{filename}", content_type=old_content_type) # create UploadedFile uf = UploadedFile.from_s3_key(upload_area=self.upload_area, s3_key=s3object.key) # Change media type on S3 object s3object.copy_from(CopySource={'Bucket': self.upload_config.bucket_name, 'Key': s3object.key}, MetadataDirective="REPLACE", ContentType=new_content_type) self.assertEqual(old_content_type, uf.content_type) uf.refresh() self.assertEqual(new_content_type, uf.content_type) def test_checksums_setter_saves_db_record(self): filename = f"file-{random.randint(0, 999999999)}" s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}") file_record = self.create_file_record(s3object) uf = UploadedFile.from_db_id(file_record.id) uf.checksums = {'foo': 'bar'} self.db.refresh(file_record) self.assertEqual({'foo': 'bar'}, file_record.checksums) def test_info(self): test_file = FixtureFile.factory("foo") s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents) file_record = self.create_file_record(s3object, checksums=test_file.checksums) uf = UploadedFile(self.upload_area, s3object=s3object) self.assertEqual({ 'upload_area_id': self.upload_area.uuid, 'name': file_record.name, 'size': s3object.content_length, 'content_type': s3object.content_type, 'url': f"s3://{s3object.bucket_name}/{s3object.key}", 'checksums': test_file.checksums, 'last_modified': s3object.last_modified.isoformat() }, uf.info())
class UploadAreaTest(UploadTestCaseUsingMockAWS): def setUp(self): super().setUp() self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session()
class TestUploadService(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.batch = boto3.client('batch') self.uri = None self.db_session_maker = DBSessionMaker() def setUp(self): _start_time = time.time() self.api_url = f"https://{os.environ['API_HOST']}/v1" self.upload_config = UploadConfig() self.auth_headers = {'Api-Key': self.upload_config.api_key} self.deployment_stage = os.environ['DEPLOYMENT_STAGE'] self.upload_area_uuid = "deadbeef-dead-dead-dead-%012d" % random.randint( 0, 999999999999) self.verbose = True _end_time = time.time() print(f"Total startup time: {_end_time - _start_time} seconds.") def test__upload_small_file__successful(self): # Test variables _start_time = time.time() _small_file = FixtureFile.factory('small_file') # Run test print( f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n" ) self._execute_create_upload_area() self._execute_upload_file_using_cli(_small_file.path) self._verify_file_was_checksummed_inline(_small_file) _validation_id = self._execute_validate_file(_small_file) self._verify_file_validation_status( _validation_id ) # default parameters checks for success in validation self._execute_forget_upload_area() self._execute_delete_upload_area() _end_time = time.time() print( f"Total test_upload__small_file__successful time: {_end_time - _start_time} seconds." ) def test__upload_large_file__successful(self): # Test variables _start_time = time.time() _large_file = FixtureFile.factory('10241MB_file') # Run test print( f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n" ) self._execute_create_upload_area() self._execute_upload_file_using_cli(_large_file.url) self._verify_file_is_checksummed_via_batch(_large_file) self._execute_forget_upload_area() self._execute_delete_upload_area() _end_time = time.time() print( f"Total test__upload_large_file__successful time: {_end_time - _start_time} seconds." ) def test__upload_invalid_file__validation_result_shows_invalid_state(self): # Test variables _start_time = time.time() _invalid_file = FixtureFile.factory('small_invalid_file') # Run test print( f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n" ) self._execute_create_upload_area() self._execute_upload_file_using_cli(_invalid_file.path) self._verify_file_was_checksummed_inline(_invalid_file) _validation_id = self._execute_validate_file(_invalid_file) # Verify that the validation result of the file is invalid. This is designated by an exit code of 1 and the # presence of an error message saying that file is invalid. self._verify_file_validation_status(_validation_id, 1, "invalid") self._execute_forget_upload_area() self._execute_delete_upload_area() _end_time = time.time() print( f"Total test__upload_invalid_file__validation_result_shows_invalid_state time: {_end_time - _start_time} " f"seconds.") def _execute_create_upload_area(self): response = self._make_request( description="CREATE UPLOAD AREA", verb='POST', url=f"{self.api_url}/area/{self.upload_area_uuid}", headers=self.auth_headers, expected_status=201) data = json.loads(response) self.uri = data['uri'] self.assertEqual('UNLOCKED', self._get_upload_area_record_status()) def _execute_upload_file_using_cli(self, file_location): self._run_cli_command("SELECT UPLOAD AREA", ['hca', 'upload', 'select', self.uri]) self._run_cli_command("UPLOAD FILE USING CLI", ['hca', 'upload', 'files', file_location]) def _execute_validate_file(self, test_file): response = self._make_request( description="VALIDATE", verb='PUT', url= f"{self.api_url}/area/{self.upload_area_uuid}/{test_file.name}/validate", expected_status=200, headers=self.auth_headers, json={ "validator_image": "humancellatlas/upload-validator-example:14" }) validation_id = json.loads(response)['validation_id'] WaitFor(self._get_validation_record_status, validation_id) \ .to_return_value('SCHEDULED', timeout_seconds=MINUTE_SEC) validation_job_id = self._get_validation_record_job_id(validation_id) WaitFor(self._get_batch_job_status, validation_job_id) \ .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC) WaitFor(self._get_validation_record_status, validation_id) \ .to_return_value('VALIDATED', timeout_seconds=MINUTE_SEC) return validation_id def _execute_forget_upload_area(self): self._run_cli_command( "FORGET UPLOAD AREA", ['hca', 'upload', 'forget', self.upload_area_uuid]) def _execute_delete_upload_area(self): self._make_request(description="DELETE UPLOAD AREA", verb='DELETE', url=f"{self.api_url}/area/{self.upload_area_uuid}", headers=self.auth_headers, expected_status=202) WaitFor(self._get_upload_area_record_status) \ .to_return_value('DELETED', timeout_seconds=MINUTE_SEC) def _verify_file_was_checksummed_inline(self, test_file): """ For files that are smaller than 10G, we expect that the file will be check-summed inline. This means that there is no need to schedule a job in batch and no job id is given to the checksum record.""" print("VERIFYING FILE WAS CHECKSUMMED INLINE...") WaitFor(self._get_checksum_record_status, test_file.name) \ .to_return_value('CHECKSUMMED', timeout_seconds=300) # Verify that the inline checksum was not assigned a job id. checksum_record = self._get_checksum_record(test_file.name) self.assertIsNone(checksum_record.job_id) # Check file record now contains checksums db = self.db_session_maker.session() file_record = db.query(DbFile).get(checksum_record.file_id) self.assertEqual(test_file.checksums, file_record.checksums) # Check S3 object has checksum tags tagging = boto3.client('s3').get_object_tagging( Bucket=self.upload_config.bucket_name, Key=f"{self.upload_area_uuid}/{test_file.name}") self.assertEqual(sorted(tagging['TagSet'], key=lambda x: x['Key']), test_file.s3_tagset) def _verify_file_is_checksummed_via_batch(self, test_file): """ For files that are 10G or larger, we expect that the file will check-summed via batch. This means that it first will need to be scheduled and the checksum record will be given a respective job id.""" print("VERIFYING FILE WAS CHECKSUMMED VIA BATCH...") WaitFor(self._get_checksum_record_status, test_file.name) \ .to_return_value('SCHEDULED', timeout_seconds=30) checksum_record = self._get_checksum_record(test_file.name) WaitFor(self._get_batch_job_status, checksum_record.job_id) \ .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC) checksum_record = self._get_checksum_record(test_file.name) self.assertEqual('CHECKSUMMED', checksum_record.status) # Check file record now contains checksums db = self.db_session_maker.session() file_record = db.query(DbFile).get(checksum_record.file_id) self.assertEqual(test_file.checksums, file_record.checksums) # Check S3 object has checksum tags tagging = boto3.client('s3').get_object_tagging( Bucket=self.upload_config.bucket_name, Key=f"{self.upload_area_uuid}/{test_file.name}") self.assertEqual(sorted(tagging['TagSet'], key=lambda x: x['Key']), test_file.s3_tagset) def _verify_file_validation_status(self, validation_id, expected_exit_code=0, expected_error_msg=''): # Get the validation status of the file _validation_results = self._get_validation_record( validation_id).results _actual_exit_code = _validation_results['exit_code'] _actual_error_msg = _validation_results['stdout'] self.assertEqual(expected_exit_code, _actual_exit_code) self.assertIn(expected_error_msg, _actual_error_msg) def _get_upload_area_record_status(self): record = self._get_upload_area_record() return record.status if record else None def _get_checksum_record_status(self, filename): record = self._get_checksum_record(filename) return record.status if record else None def _get_validation_record_job_id(self, validation_id): record = self._get_validation_record(validation_id) return record.job_id if record else None def _get_validation_record_status(self, validation_id): record = self._get_validation_record(validation_id) return record.status if record else None def _get_upload_area_record(self): db = self.db_session_maker.session() return db.query(DbUploadArea).filter( DbUploadArea.uuid == self.upload_area_uuid).one_or_none() def _get_checksum_record(self, filename): db = self.db_session_maker.session() s3_key = f"{self.upload_area_uuid}/{filename}" file_record = db.query(DbFile).filter( DbFile.s3_key == s3_key).one_or_none() if file_record is None: return None checksum_record = db.query(DbChecksum).filter( DbChecksum.file_id == file_record.id).one_or_none() return checksum_record def _get_validation_record(self, validation_id): db = self.db_session_maker.session() return db.query(DbValidation).filter( DbValidation.id == validation_id).one_or_none() def _get_batch_job_status(self, job_id): response = self.batch.describe_jobs(jobs=[job_id]) self.assertEqual(1, len(response['jobs'])) return response['jobs'][0]['status'] def _make_request(self, description, verb, url, expected_status=None, **options): print(description + ": ") print(f"{verb.upper()} {url}") method = getattr(requests, verb.lower()) response = method(url, **options) print(f"-> {response.status_code}") if expected_status: self.assertEqual(expected_status, response.status_code) if response.content: print(response.content.decode('utf8')) return response.content def _run_cli_command(self, description, command, expected_returncode=0): print("\n" + description + ": ") print(' '.join(command)) completed_process = subprocess.run(command, stdout=None, stderr=None) self.assertEqual(expected_returncode, completed_process.returncode)
class TestUploadService(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.batch = boto3.client('batch') self.uri = None self.db_session_maker = DBSessionMaker() def setUp(self): self.test_start_time = time.time() self.upload_config = UploadConfig() self.upload_client = UploadService( deployment_stage=os.environ['DEPLOYMENT_STAGE'], api_token=self.upload_config.api_key) self.upload_area_uuid = "deadbeef-dead-dead-dead-%012d" % random.randint( 0, 999999999999) print("") self._execute_create_upload_area() print("\tstartup time: %0.02f seconds." % (time.time() - self.test_start_time)) def tearDown(self): test_end_time = time.time() print("\t%s took %0.02f seconds." % (self._testMethodName, test_end_time - self.test_start_time)) self._execute_delete_upload_area() print("\tteardown time: %0.02f seconds." % (time.time() - test_end_time)) # All tests are formatted into 2-3 sections separated by blank lines: # # Setup preconditions (optional) # # Do the thing we are testing # # Test the thing was done def test_store_file_using_api(self): metadata_file = FixtureFile.factory('metadata_file.json') self.upload_area.store_file( filename=metadata_file.name, file_content=metadata_file.contents, content_type='application/json; dcp-type=metadata') self._verify_file_was_checksummed_inline( metadata_file) # Implicitly tests file was created. def test_store_file_using_cli(self): """ Tests storing of a file directly in S3, then notification of Upload via REST API """ small_file = FixtureFile.factory('small_file') self._execute_upload_file_using_cli(small_file.path) self._verify_file_was_checksummed_inline( small_file) # Implicitly tests file was created. def test_store_file_using_cli__with_large_file__triggers_batch_checksumming( self): large_file = FixtureFile.factory('10241MB_file') self._execute_upload_file_using_cli(large_file.url) self._verify_file_is_checksummed_via_batch(large_file) def test_validate_file__with_valid_file__reports_validation_results(self): small_file = FixtureFile.factory('small_file') self.upload_area.store_file( filename=small_file.name, file_content=small_file.contents, content_type='application/json; dcp-type=data') response = self.upload_area.validate_files( file_list=[small_file.name], validator_image="humancellatlas/upload-validator-example:14") validation_id = response['validation_id'] self._wait_for_validation_to_complete(validation_id) self._verify_file_validation_status( validation_id ) # default parameters checks for success in validation def test__upload_invalid_file__validation_result_shows_invalid_state(self): invalid_file = FixtureFile.factory('small_invalid_file') self.upload_area.store_file( filename=invalid_file.name, file_content=invalid_file.contents, content_type='application/json; dcp-type=data') response = self.upload_area.validate_files( file_list=[invalid_file.name], validator_image="humancellatlas/upload-validator-example:14") validation_id = response['validation_id'] self._wait_for_validation_to_complete(validation_id) # Verify that the validation result of the file is invalid. This is designated by an exit code of 1 and the # presence of an error message saying that file is invalid. self._verify_file_validation_status(validation_id, expected_exit_code=1, expected_error_msg="invalid") def _execute_create_upload_area(self): self.upload_area = self.upload_client.create_area( self.upload_area_uuid) self.assertEqual('UNLOCKED', self._get_upload_area_record_status()) print(f"\tCreated upload area {self.upload_area_uuid}") def _execute_upload_file_using_cli(self, file_location): self._run_cli_command('hca', 'upload', 'select', str(self.upload_area.uri)) self._run_cli_command('hca', 'upload', 'files', file_location) self._run_cli_command('hca', 'upload', 'forget', self.upload_area.uuid) def _wait_for_validation_to_complete(self, validation_id): WaitFor(self._get_validation_record_status, validation_id) \ .to_return_value('SCHEDULED', timeout_seconds=MINUTE_SEC) validation_job_id = self._get_validation_record_job_id(validation_id) WaitFor(self._get_batch_job_status, validation_job_id) \ .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC) WaitFor(self._get_validation_record_status, validation_id) \ .to_return_value('VALIDATED', timeout_seconds=MINUTE_SEC) def _execute_delete_upload_area(self): print(f"\tDeleting upload area {self.upload_area.uuid}") self.upload_area.delete() WaitFor(self._get_upload_area_record_status) \ .to_return_value('DELETED', timeout_seconds=MINUTE_SEC) def _verify_file_was_checksummed_inline(self, test_file): """ For files that are smaller than 10G, we expect that the file will be check-summed inline. This means that there is no need to schedule a job in batch and no job id is given to the checksum record.""" print("\tVerifying file was checksummed inline...") WaitFor(self._get_checksum_record_status, test_file.name) \ .to_return_value('CHECKSUMMED', timeout_seconds=300) # Verify that the inline checksum was not assigned a job id. checksum_record = self._get_checksum_record(test_file.name) self.assertIsNone(checksum_record.job_id) # Check file record now contains checksums db = self.db_session_maker.session() file_record = db.query(DbFile).get(checksum_record.file_id) self.assertEqual(test_file.checksums, file_record.checksums) # Check S3 object has checksum tags tagging = boto3.client('s3').get_object_tagging( Bucket=self.upload_config.bucket_name, Key=f"{self.upload_area_uuid}/{test_file.name}") _actual_checksums = self._get_dict_representation_of_tagset_case_insensitive( tagging['TagSet']) _expected_checksums = self._get_dict_representation_of_tagset_case_insensitive( test_file.s3_tagset) self.assertDictEqual(_actual_checksums, _expected_checksums) def _verify_file_is_checksummed_via_batch(self, test_file): """ For files that are 10G or larger, we expect that the file will check-summed via batch. This means that it first will need to be scheduled and the checksum record will be given a respective job id.""" print("\tVerifying file was checksummed via batch...") WaitFor(self._get_checksum_record_status, test_file.name) \ .to_return_value('SCHEDULED', timeout_seconds=30) checksum_record = self._get_checksum_record(test_file.name) WaitFor(self._get_batch_job_status, checksum_record.job_id) \ .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC) checksum_record = self._get_checksum_record(test_file.name) self.assertEqual('CHECKSUMMED', checksum_record.status) # Check file record now contains checksums db = self.db_session_maker.session() file_record = db.query(DbFile).get(checksum_record.file_id) [ self.assertEquals( test_file.checksums[_checksum_function].lower(), file_record.checksums[_checksum_function].lower()) for _checksum_function in set( list(test_file.checksums.keys()) + list(file_record.checksums.keys())) ] # Check S3 object has checksum tags tagging = boto3.client('s3').get_object_tagging( Bucket=self.upload_config.bucket_name, Key=f"{self.upload_area_uuid}/{test_file.name}") _actual_checksums = self._get_dict_representation_of_tagset_case_insensitive( tagging['TagSet']) _expected_checksums = self._get_dict_representation_of_tagset_case_insensitive( test_file.s3_tagset) self.assertDictEqual(_actual_checksums, _expected_checksums) def _verify_file_validation_status(self, validation_id, expected_exit_code=0, expected_error_msg=''): # Get the validation status of the file _validation_results = self._get_validation_record( validation_id).results _actual_exit_code = _validation_results['exit_code'] _actual_error_msg = _validation_results['stdout'] self.assertEqual(expected_exit_code, _actual_exit_code) self.assertIn(expected_error_msg, _actual_error_msg) def _get_upload_area_record_status(self): record = self._get_upload_area_record() return record.status if record else None def _get_checksum_record_status(self, filename): record = self._get_checksum_record(filename) return record.status if record else None def _get_validation_record_job_id(self, validation_id): record = self._get_validation_record(validation_id) return record.job_id if record else None def _get_validation_record_status(self, validation_id): record = self._get_validation_record(validation_id) return record.status if record else None def _get_upload_area_record(self): db = self.db_session_maker.session() return db.query(DbUploadArea).filter( DbUploadArea.uuid == self.upload_area_uuid).one_or_none() def _get_checksum_record(self, filename): db = self.db_session_maker.session() s3_key = f"{self.upload_area_uuid}/{filename}" file_record = db.query(DbFile).filter( DbFile.s3_key == s3_key).one_or_none() if file_record is None: return None checksum_record = db.query(DbChecksum).filter( DbChecksum.file_id == file_record.id).one_or_none() return checksum_record def _get_validation_record(self, validation_id): db = self.db_session_maker.session() return db.query(DbValidation).filter( DbValidation.id == validation_id).one_or_none() def _get_batch_job_status(self, job_id): response = self.batch.describe_jobs(jobs=[job_id]) self.assertEqual(1, len(response['jobs'])) return response['jobs'][0]['status'] def _run_cli_command(self, *command, expected_returncode=0): print("\t" + ' '.join(command)) completed_process = subprocess.run(command, stdout=None, stderr=None) self.assertEqual(expected_returncode, completed_process.returncode) def _get_dict_representation_of_tagset_case_insensitive(self, tagset): _tagset_dict = {} for _item in tagset: _tagset_dict[_item['Key'].lower()] = _item['Value'].lower() return _tagset_dict