def test_validating_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) orig_val_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image", original_validation_id=orig_val_id) validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual(validation_id, record["id"]) self.assertEqual(orig_val_id, record["original_validation_id"]) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATING") mock_format_and_send_notification.assert_not_called()
def test_post_checksum__with_a_checksumming_payload__updates_db_record(self, mock_format_and_send_notification): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}", headers=self.authentication_header, json={ "status": "CHECKSUMMING", "job_id": checksum_event.job_id, "payload": uploaded_file.info() }) self.assertEqual(204, response.status_code) db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one() self.assertEqual("CHECKSUMMING", db_checksum.status) mock_format_and_send_notification.assert_not_called()
def test_update_event_with_validation_event(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED") validation_event.create_record() validation_event.status = "VALIDATING" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) validation_event.status = "VALIDATED" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results"))
def test_update_event_with_checksum_event(self, mock_format_and_send_notification): checksum_id = str(uuid.uuid4()) area_uuid = self._create_area() s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json') upload_area = UploadArea(area_uuid) uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() checksum_event.status = "CHECKSUMMING" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual(None, record["checksum_ended_at"]) checksum_event.status = "CHECKSUMMED" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
def test_post_checksum__for_an_obj_without_tags__updates_db_but_and_does_not_notify_ingest( self, mock_fasn): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json', checksums={}) uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.post( f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}", json={ "status": "CHECKSUMMED", "job_id": checksum_event.job_id, "payload": uploaded_file.info() }) self.assertEqual(204, response.status_code) db_checksum = self.db.query(DbChecksum).filter( DbChecksum.id == checksum_id).one() self.assertEqual("CHECKSUMMED", db_checksum.status) mock_fasn.assert_not_called()
def test_checksum_statuses_for_upload_area( self, mock_format_and_send_notification): db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) checksum1_id = str(uuid.uuid4()) checksum2_id = str(uuid.uuid4()) checksum3_id = str(uuid.uuid4()) s3obj1 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo1.json') s3obj2 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo2.json') s3obj3 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo3.json') s3obj4 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo4.json') s3obj5 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo5.json') f1 = UploadedFile(upload_area, s3object=s3obj1) f2 = UploadedFile(upload_area, s3object=s3obj2) f3 = UploadedFile(upload_area, s3object=s3obj3) UploadedFile(upload_area, s3object=s3obj4) UploadedFile(upload_area, s3object=s3obj5) checksum1_event = ChecksumEvent(file_id=f1.db_id, checksum_id=checksum1_id, job_id='123', status="SCHEDULED") checksum2_event = ChecksumEvent(file_id=f2.db_id, checksum_id=checksum2_id, job_id='456', status="CHECKSUMMING") checksum3_event = ChecksumEvent(file_id=f3.db_id, checksum_id=checksum3_id, job_id='789', status="CHECKSUMMED") checksum1_event.create_record() checksum2_event.create_record() checksum3_event.create_record() response = self.client.get(f"/v1/area/{upload_area.uuid}/checksums") expected_data = { 'CHECKSUMMED': 1, 'CHECKSUMMING': 1, 'CHECKSUMMING_UNSCHEDULED': 2, 'SCHEDULED': 1, 'TOTAL_NUM_FILES': 5 } assert response.get_json() == expected_data
def test_unscheduled_status_file_validation(self, mock_format_and_send_notification): area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) UploadedFile(upload_area, s3object=s3obj) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "UNSCHEDULED")
def test_validation_statuses_for_upload_area( self, mock_format_and_send_notification): area_id = self._create_area() upload_area = UploadArea(area_id) validation1_id = str(uuid.uuid4()) validation2_id = str(uuid.uuid4()) validation3_id = str(uuid.uuid4()) validation4_id = str(uuid.uuid4()) s3obj1 = self.mock_upload_file_to_s3(area_id, 'foo1.json') s3obj2 = self.mock_upload_file_to_s3(area_id, 'foo2.json') s3obj3 = self.mock_upload_file_to_s3(area_id, 'foo3.json') s3obj4 = self.mock_upload_file_to_s3(area_id, 'foo4.json') f1 = UploadedFile(upload_area, s3object=s3obj1) f2 = UploadedFile(upload_area, s3object=s3obj2) f3 = UploadedFile(upload_area, s3object=s3obj3) f4 = UploadedFile(upload_area, s3object=s3obj4) validation_event1 = ValidationEvent(file_ids=[f1.db_id], validation_id=validation1_id, job_id='12345', status="SCHEDULED") validation_event2 = ValidationEvent(file_ids=[f2.db_id], validation_id=validation2_id, job_id='23456', status="VALIDATING") validation_event3 = ValidationEvent(file_ids=[f3.db_id], validation_id=validation3_id, job_id='34567', status="VALIDATED") validation_event4 = ValidationEvent(file_ids=[f4.db_id], validation_id=validation4_id, job_id='45678', status="VALIDATING") validation_event3.results = 'VALID' validation_event1.create_record() validation_event2.create_record() validation_event3.create_record() validation_event4.create_record() response = self.client.get(f"/v1/area/{area_id}/validations") expected_data = {'SCHEDULED': 1, 'VALIDATED': 1, 'VALIDATING': 2} self.assertEqual(expected_data, response.get_json())
def test_init__doesnt_create_db_record_if_one_already_exists(self): s3object = self.create_s3_object(f"{self.upload_area_id}/foo") self.create_file_record(s3object) record_count_before = self.db.query(DbFile).count() UploadedFile(upload_area=self.upload_area, s3object=s3object) self.assertEqual(record_count_before, self.db.query(DbFile).count())
def test_get_checksum__for_a_file_with_no_checksum_records__returns_status_unscheduled(self, mock_fasn): db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') UploadedFile(upload_area, s3object=s3obj) # creates file record response = self.client.get(f"/v1/area/{upload_area.uuid}/foo.json/checksum") checksum_status = response.get_json()['checksum_status'] self.assertEqual("UNSCHEDULED", checksum_status)
def test_init__doesnt_create_db_record_if_one_already_exists(self): filename = f"file-{random.randint(0, 999999999)}" s3_key = f"{self.upload_area_id}/{filename}" s3object = self.create_s3_object(s3_key) self.create_file_record(s3object) record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() UploadedFile(upload_area=self.upload_area, s3object=s3object) record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count() self.assertEqual(record_count_before, record_count_after)
def test_scheduled_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED") validation_event.create_record() response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "SCHEDULED")
def test_info(self): test_file = FixtureFile.factory("foo") s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents) file_record = self.create_file_record(s3object, checksums=test_file.checksums) uf = UploadedFile(self.upload_area, s3object=s3object) self.assertEqual({ 'upload_area_id': self.upload_area.uuid, 'name': file_record.name, 'size': s3object.content_length, 'content_type': s3object.content_type, 'url': f"s3://{s3object.bucket_name}/{s3object.key}", 'checksums': test_file.checksums, 'last_modified': s3object.last_modified.isoformat() }, uf.info())
def test_init__given_existing_entities__initializes_properties_correctly(self): s3object = self.create_s3_object(f"{self.upload_area_id}/foo") file_record = self.create_file_record(s3object) uf = UploadedFile(self.upload_area, s3object=s3object) # Links to objects self.assertEqual(s3object, uf.s3object) self.assertEqual(self.upload_area, uf.upload_area) # Persisted properties self.assertEqual(file_record.id, uf.db_id) self.assertEqual(s3object.key, uf.s3_key) self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag) self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id']) self.assertEqual(file_record.name, uf.name) self.assertEqual(s3object.content_length, uf.size)
def test_validated_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image") validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) data = { "status": "VALIDATED", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) mock_format_and_send_notification.assert_called_once_with({ 'upload_area_id': area_id, 'name': 'foo.json', 'size': 3, 'last_modified': s3obj.last_modified.isoformat(), 'content_type': "application/json", 'url': f"s3://{self.upload_config.bucket_name}/{area_id}/foo.json", 'checksums': {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'} }) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATED")
def test_get_checksum__for_a_file_with_checksum_records__returns_the_most_recent_record_status(self, mock_fasn): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.get(f"/v1/area/{upload_area.uuid}/{uploaded_file.name}/checksum") info = response.get_json() self.assertEqual("SCHEDULED", info['checksum_status']) self.assertEqual(uploaded_file.checksums, info['checksums'])
def test_init__when_no_db_record_exists__creates_a_db_record(self): s3object = self.create_s3_object(f"{self.upload_area_id}/foo") with self.assertRaises(NoResultFound): self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() uf = UploadedFile(upload_area=self.upload_area, s3object=s3object) record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key, DbFile.s3_etag == s3object.e_tag.strip('\"')).one() self.assertEqual(record.id, uf.db_id) self.assertEqual(s3object.key, record.s3_key) self.assertEqual("foo", record.name) self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag) self.assertEqual(s3object.content_length, record.size) self.assertEqual(self.upload_area.db_id, record.upload_area_id)
def test_post_checksum__with_a_checksummed_payload__updates_db_records_and_notifies_ingest( self, mock_fasn): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() checksums = {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'} response = self.client.post( f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}", json={ "status": "CHECKSUMMED", "job_id": checksum_event.job_id, "payload": { "upload_area_id": upload_area.db_id, "name": uploaded_file.name, "checksums": checksums } }) self.assertEqual(204, response.status_code) # Checksum record status should be updated db_checksum = self.db.query(DbChecksum).filter( DbChecksum.id == checksum_id).one() self.assertEqual("CHECKSUMMED", db_checksum.status) # Checksums should be stored in File record db_file = self.db.query(DbFile).filter( DbFile.id == uploaded_file.db_id).one() self.assertEqual(checksums, db_file.checksums) # Ingest should be notified mock_fasn.assert_called()