Exemplo n.º 1
0
    def setUp(self):
        super().setUp()
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()

        self.upload_area_id = str(uuid.uuid4())
        self.upload_area = UploadArea(self.upload_area_id)
        self.upload_area.update_or_create()
Exemplo n.º 2
0
    def setUp(self):
        super().setUp()
        # Environment
        self.environment = {
            'CSUM_DOCKER_IMAGE': 'bogo_image',
        }
        self.environmentor = EnvironmentSetup(self.environment)
        self.environmentor.enter()

        self.db = DBSessionMaker().session()

        # Setup app
        self.client = client_for_test_api_server()
Exemplo n.º 3
0
    def setUp(self):
        super().setUp()
        # Environment
        self.api_key = "foo"
        self.environment = {
            'INGEST_API_KEY': self.api_key,
            'INGEST_AMQP_SERVER': 'foo',
            'CSUM_DOCKER_IMAGE': 'bogo_image',
        }
        self.environmentor = EnvironmentSetup(self.environment)
        self.environmentor.enter()

        self.db = DBSessionMaker().session()

        # Authentication
        self.authentication_header = {'Api-Key': self.api_key}
        # Setup app
        self.client = client_for_test_api_server()
    def setUp(self):
        super().setUp()
        # Environment
        self.environment = {
            'INGEST_AMQP_SERVER': 'foo',
            'CSUM_DOCKER_IMAGE': 'bogoimage'
        }
        self.environmentor = EnvironmentSetup(self.environment)
        self.environmentor.enter()

        # Upload area
        self.area_uuid = str(uuid.uuid4())
        self.upload_area = UploadArea(self.area_uuid)
        self.upload_area.update_or_create()
        # daemon
        context = Mock()
        self.daemon = ChecksumDaemon(context)
        # File
        self.small_file = FixtureFile.factory('foo')
        self.file_key = f"{self.area_uuid}/{self.small_file.name}"
        self.object = self.upload_bucket.Object(self.file_key)
        self.object.put(Key=self.file_key, Body=self.small_file.contents, ContentType=self.small_file.content_type)
        # Event
        self.events = {'Records': [
            {'eventVersion': '2.0', 'eventSource': 'aws:s3', 'awsRegion': 'us-east-1',
             'eventTime': '2017-09-15T00:05:10.378Z', 'eventName': 'ObjectCreated:Put',
             'userIdentity': {'principalId': 'AWS:AROAI4WRRXW2K3Y2IFL6Q:upload-api-dev'},
             'requestParameters': {'sourceIPAddress': '52.91.56.220'},
             'responseElements': {'x-amz-request-id': 'FEBC85CADD1E3A66',
                                  'x-amz-id-2': 'xxx'},
             's3': {'s3SchemaVersion': '1.0',
                    'configurationId': 'NGZjNmM0M2ItZTk0Yi00YTExLWE2NDMtMzYzY2UwN2EyM2Nj',
                    'bucket': {'name': self.upload_config.bucket_name,
                               'ownerIdentity': {'principalId': 'A29PZ5XRQWJUUM'},
                               'arn': f'arn:aws:s3:::{self.upload_config.bucket_name}'},
                    'object': {'key': self.file_key,
                               'size': self.small_file.size,
                               'eTag': self.small_file.e_tag,
                               'sequencer': '0059BB193641C4EAB0'}}}]}
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()
Exemplo n.º 5
0
 def create_upload_area(self,
                        area_uuid=None,
                        status='UNLOCKED',
                        db_session=None):
     area_uuid = area_uuid or str(uuid.uuid4())
     db_session = db_session or DBSessionMaker().session()
     db_area = DbUploadArea(uuid=area_uuid,
                            bucket_name=self.upload_config.bucket_name,
                            status=status)
     db_session.add(db_area)
     db_session.commit()
     return db_area
Exemplo n.º 6
0
class DbDumper:
    def __init__(self):
        self.db = DBSessionMaker().session()

    def dump_all(self):
        for area in self.db.query(DbUploadArea).all():
            self.print_area(area)

    def dump_one_area(self, upload_area_id, filename):
        area = self.db.query(DbUploadArea).filter(
            DbUploadArea.id == upload_area_id).one()
        self.print_area(area)
        if filename:
            file = self.db.query(DbFile).filter(
                DbFile.upload_area_id == upload_area_id,
                DbFile.name == filename).one()
            self.print_file(file)
        else:
            for file in area.files:
                self.print_file(file)

    def print_area(self, area):
        print(
            f"\nUPLOAD AREA {area.bucket_name}/{area.id}:\n"
            f"\tStatus {area.status} Created {area.created_at} Updated {area.updated_at}"
        )

    def print_file(self, file):
        print(f"\t{file.name}")
        for csum in file.checksums:
            print(f"\t\tchecksum: {csum.id} {csum.status}")
            print(f"\t\t          job_id {csum.job_id}")
            print(
                f"\t\t          started_at {csum.checksum_ended_at} ended_at {csum.checksum_ended_at}"
            )
        for validation in file.validations:
            print(
                f"\t\tvalidation: {validation.id} {validation.status} ended_at {validation.validation_ended_at}"
            )
Exemplo n.º 7
0
class TestChecksumApi(UploadTestCaseUsingMockAWS):

    def setUp(self):
        super().setUp()
        # Environment
        self.api_key = "foo"
        self.environment = {
            'INGEST_API_KEY': self.api_key,
            'INGEST_AMQP_SERVER': 'foo',
            'CSUM_DOCKER_IMAGE': 'bogo_image',
        }
        self.environmentor = EnvironmentSetup(self.environment)
        self.environmentor.enter()

        self.db = DBSessionMaker().session()

        # Authentication
        self.authentication_header = {'Api-Key': self.api_key}
        # Setup app
        self.client = client_for_test_api_server()

    def tearDown(self):
        super().tearDown()
        self.environmentor.exit()

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_get_checksum__for_a_file_with_no_checksum_records__returns_status_unscheduled(self, mock_fasn):
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        UploadedFile(upload_area, s3object=s3obj)  # creates file record

        response = self.client.get(f"/v1/area/{upload_area.uuid}/foo.json/checksum")

        checksum_status = response.get_json()['checksum_status']
        self.assertEqual("UNSCHEDULED", checksum_status)

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_get_checksum__for_a_file_with_checksum_records__returns_the_most_recent_record_status(self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/{uploaded_file.name}/checksum")

        info = response.get_json()
        self.assertEqual("SCHEDULED", info['checksum_status'])
        self.assertEqual(uploaded_file.checksums, info['checksums'])

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_post_checksum__with_a_checksumming_payload__updates_db_record(self, mock_format_and_send_notification):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
                                    headers=self.authentication_header,
                                    json={
                                        "status": "CHECKSUMMING",
                                        "job_id": checksum_event.job_id,
                                        "payload": uploaded_file.info()
                                    })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMING", db_checksum.status)

        mock_format_and_send_notification.assert_not_called()

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_post_checksum__with_a_checksummed_payload__updates_db_records_and_notifies_ingest(self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        checksums = {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'}
        response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
                                    headers=self.authentication_header,
                                    json={
                                        "status": "CHECKSUMMED",
                                        "job_id": checksum_event.job_id,
                                        "payload": {
                                            "upload_area_id": upload_area.db_id,
                                            "name": uploaded_file.name,
                                            "checksums": checksums
                                        }
                                    })

        self.assertEqual(204, response.status_code)

        # Checksum record status should be updated
        db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        # Checksums should be stored in File record
        db_file = self.db.query(DbFile).filter(DbFile.id == uploaded_file.db_id).one()
        self.assertEqual(checksums, db_file.checksums)

        # Ingest should be notified
        mock_fasn.assert_called()

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_post_checksum__for_an_obj_without_tags__updates_db_but_and_does_not_notify_ingest(self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json', checksums={})
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
                                    headers=self.authentication_header,
                                    json={
                                        "status": "CHECKSUMMED",
                                        "job_id": checksum_event.job_id,
                                        "payload": uploaded_file.info()
                                    })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        mock_fasn.assert_not_called()

    @patch('upload.lambdas.api_server.v1.area.IngestNotifier.format_and_send_notification')
    def test_checksum_statuses_for_upload_area(self, mock_format_and_send_notification):
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)

        checksum1_id = str(uuid.uuid4())
        checksum2_id = str(uuid.uuid4())
        checksum3_id = str(uuid.uuid4())

        s3obj1 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo1.json')
        s3obj2 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo2.json')
        s3obj3 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo3.json')
        s3obj4 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo4.json')
        s3obj5 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo5.json')

        f1 = UploadedFile(upload_area, s3object=s3obj1)
        f2 = UploadedFile(upload_area, s3object=s3obj2)
        f3 = UploadedFile(upload_area, s3object=s3obj3)
        UploadedFile(upload_area, s3object=s3obj4)
        UploadedFile(upload_area, s3object=s3obj5)

        checksum1_event = ChecksumEvent(file_id=f1.db_id, checksum_id=checksum1_id, job_id='123', status="SCHEDULED")
        checksum2_event = ChecksumEvent(file_id=f2.db_id, checksum_id=checksum2_id, job_id='456', status="CHECKSUMMING")
        checksum3_event = ChecksumEvent(file_id=f3.db_id, checksum_id=checksum3_id, job_id='789', status="CHECKSUMMED")
        checksum1_event.create_record()
        checksum2_event.create_record()
        checksum3_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/checksums")
        expected_data = {
            'CHECKSUMMED': 1,
            'CHECKSUMMING': 1,
            'CHECKSUMMING_UNSCHEDULED': 2,
            'SCHEDULED': 1,
            'TOTAL_NUM_FILES': 5
        }

        assert response.get_json() == expected_data
Exemplo n.º 8
0
class TestUploadedFile(UploadTestCaseUsingMockAWS):

    def setUp(self):
        super().setUp()
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()

        self.upload_area_id = str(uuid.uuid4())
        self.upload_area = UploadArea(self.upload_area_id)
        self.upload_area.update_or_create()

    def create_file_record(self, s3object, checksums=None):
        record = DbFile(s3_key=s3object.key,
                        s3_etag=s3object.e_tag.strip('\"'),
                        name=os.path.basename(s3object.key),
                        upload_area_id=self.upload_area.db_id,
                        size=s3object.content_length,
                        checksums=checksums)
        self.db.add(record)
        self.db.commit()
        return record

    def tearDown(self):
        super().tearDown()

    def test_create__creates_a_new_s3_object_and_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        content_type = "application/octet-stream; dcp-type=data"
        file_content = "file1_content"

        uf = UploadedFile.create(upload_area=self.upload_area,
                                 name=filename,
                                 content_type=content_type,
                                 data=file_content)

        self.assertIsInstance(uf, UploadedFile)
        # S3 Object
        s3_key = f"{self.upload_area_id}/{filename}"
        s3object = self.upload_bucket.Object(s3_key)
        self.assertEqual(content_type, s3object.content_type)
        self.assertEqual(file_content.encode('utf8'), s3object.get()['Body'].read())
        # DB Record
        record = self.db.query(DbFile).filter(DbFile.s3_key == s3_key,
                                              DbFile.s3_etag == s3object.e_tag.strip('\"')).one()
        self.assertEqual(s3_key, record.s3_key)
        self.assertEqual(filename, record.name)
        self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag)
        self.assertEqual(len(file_content), record.size)
        self.assertEqual(self.upload_area.db_id, record.upload_area_id)

    def test_init__given_existing_entities__initializes_properties_correctly(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile(self.upload_area, s3object=s3object)

        # Links to objects
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(self.upload_area, uf.upload_area)
        # Persisted properties
        self.assertEqual(file_record.id, uf.db_id)
        self.assertEqual(s3object.key, uf.s3_key)
        self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag)
        self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id'])
        self.assertEqual(file_record.name, uf.name)
        self.assertEqual(s3object.content_length, uf.size)

    def test_init__when_no_db_record_exists__creates_a_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")

        with self.assertRaises(NoResultFound):
            self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                         DbFile.s3_etag == s3object.e_tag.strip('\"')).one()

        uf = UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                              DbFile.s3_etag == s3object.e_tag.strip('\"')).one()
        self.assertEqual(record.id, uf.db_id)
        self.assertEqual(s3object.key, record.s3_key)
        self.assertEqual(filename, record.name)
        self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag)
        self.assertEqual(s3object.content_length, record.size)
        self.assertEqual(self.upload_area.db_id, record.upload_area_id)

    def test_init__doesnt_create_db_record_if_one_already_exists(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3_key = f"{self.upload_area_id}/{filename}"
        s3object = self.create_s3_object(s3_key)
        self.create_file_record(s3object)
        record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()

        UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()
        self.assertEqual(record_count_before, record_count_after)

    def test_from_s3_key__initializes_correctly(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile.from_s3_key(self.upload_area, s3_key=s3object.key)

        self.assertEqual(self.upload_area, uf.upload_area)
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(file_record.id, uf.db_id)

    def test_from_db_id__initializes_correctly_and_figures_out_which_upload_area_to_use(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile.from_db_id(file_record.id)

        self.assertEqual(self.upload_area.uuid, uf.upload_area.uuid)
        self.assertEqual(self.upload_area.db_id, uf.upload_area.db_id)
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(file_record.id, uf.db_id)

    def test_refresh__picks_up_changed_content_type(self):
        filename = f"file-{random.randint(0, 999999999)}"
        old_content_type = "application/octet-stream"  # missing dcp-type
        new_content_type = "application/octet-stream; dcp-type=data"
        s3object = self.create_s3_object(object_key=f"{self.upload_area.uuid}/{filename}",
                                         content_type=old_content_type)
        # create UploadedFile
        uf = UploadedFile.from_s3_key(upload_area=self.upload_area, s3_key=s3object.key)
        # Change media type on S3 object
        s3object.copy_from(CopySource={'Bucket': self.upload_config.bucket_name, 'Key': s3object.key},
                           MetadataDirective="REPLACE",
                           ContentType=new_content_type)

        self.assertEqual(old_content_type, uf.content_type)

        uf.refresh()

        self.assertEqual(new_content_type, uf.content_type)

    def test_checksums_setter_saves_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)
        uf = UploadedFile.from_db_id(file_record.id)

        uf.checksums = {'foo': 'bar'}

        self.db.refresh(file_record)
        self.assertEqual({'foo': 'bar'}, file_record.checksums)

    def test_info(self):
        test_file = FixtureFile.factory("foo")
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents)
        file_record = self.create_file_record(s3object, checksums=test_file.checksums)
        uf = UploadedFile(self.upload_area, s3object=s3object)

        self.assertEqual({
            'upload_area_id': self.upload_area.uuid,
            'name': file_record.name,
            'size': s3object.content_length,
            'content_type': s3object.content_type,
            'url': f"s3://{s3object.bucket_name}/{s3object.key}",
            'checksums': test_file.checksums,
            'last_modified': s3object.last_modified.isoformat()
        }, uf.info())
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.batch = boto3.client('batch')
     self.uri = None
     self.db_session_maker = DBSessionMaker()
class TestUploadService(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.batch = boto3.client('batch')
        self.uri = None
        self.db_session_maker = DBSessionMaker()

    def setUp(self):
        _start_time = time.time()
        self.api_url = f"https://{os.environ['API_HOST']}/v1"
        self.upload_config = UploadConfig()
        self.auth_headers = {'Api-Key': self.upload_config.api_key}
        self.deployment_stage = os.environ['DEPLOYMENT_STAGE']
        self.upload_area_uuid = "deadbeef-dead-dead-dead-%012d" % random.randint(
            0, 999999999999)
        self.verbose = True
        _end_time = time.time()
        print(f"Total startup time: {_end_time - _start_time} seconds.")

    def test__upload_small_file__successful(self):
        # Test variables
        _start_time = time.time()
        _small_file = FixtureFile.factory('small_file')

        # Run test
        print(
            f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n"
        )
        self._execute_create_upload_area()

        self._execute_upload_file_using_cli(_small_file.path)
        self._verify_file_was_checksummed_inline(_small_file)

        _validation_id = self._execute_validate_file(_small_file)
        self._verify_file_validation_status(
            _validation_id
        )  # default parameters checks for success in validation

        self._execute_forget_upload_area()
        self._execute_delete_upload_area()

        _end_time = time.time()
        print(
            f"Total test_upload__small_file__successful time: {_end_time - _start_time} seconds."
        )

    def test__upload_large_file__successful(self):
        # Test variables
        _start_time = time.time()
        _large_file = FixtureFile.factory('10241MB_file')

        # Run test
        print(
            f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n"
        )
        self._execute_create_upload_area()

        self._execute_upload_file_using_cli(_large_file.url)
        self._verify_file_is_checksummed_via_batch(_large_file)

        self._execute_forget_upload_area()
        self._execute_delete_upload_area()

        _end_time = time.time()
        print(
            f"Total test__upload_large_file__successful time: {_end_time - _start_time} seconds."
        )

    def test__upload_invalid_file__validation_result_shows_invalid_state(self):
        # Test variables
        _start_time = time.time()
        _invalid_file = FixtureFile.factory('small_invalid_file')

        # Run test
        print(
            f"\n\nUsing environment {self.deployment_stage} at URL {self.api_url}.\n"
        )
        self._execute_create_upload_area()

        self._execute_upload_file_using_cli(_invalid_file.path)
        self._verify_file_was_checksummed_inline(_invalid_file)

        _validation_id = self._execute_validate_file(_invalid_file)

        # Verify that the validation result of the file is invalid. This is designated by an exit code of 1 and the
        # presence of an error message saying that file is invalid.
        self._verify_file_validation_status(_validation_id, 1, "invalid")

        self._execute_forget_upload_area()
        self._execute_delete_upload_area()

        _end_time = time.time()
        print(
            f"Total test__upload_invalid_file__validation_result_shows_invalid_state time: {_end_time - _start_time} "
            f"seconds.")

    def _execute_create_upload_area(self):
        response = self._make_request(
            description="CREATE UPLOAD AREA",
            verb='POST',
            url=f"{self.api_url}/area/{self.upload_area_uuid}",
            headers=self.auth_headers,
            expected_status=201)
        data = json.loads(response)
        self.uri = data['uri']
        self.assertEqual('UNLOCKED', self._get_upload_area_record_status())

    def _execute_upload_file_using_cli(self, file_location):
        self._run_cli_command("SELECT UPLOAD AREA",
                              ['hca', 'upload', 'select', self.uri])
        self._run_cli_command("UPLOAD FILE USING CLI",
                              ['hca', 'upload', 'files', file_location])

    def _execute_validate_file(self, test_file):
        response = self._make_request(
            description="VALIDATE",
            verb='PUT',
            url=
            f"{self.api_url}/area/{self.upload_area_uuid}/{test_file.name}/validate",
            expected_status=200,
            headers=self.auth_headers,
            json={
                "validator_image": "humancellatlas/upload-validator-example:14"
            })
        validation_id = json.loads(response)['validation_id']

        WaitFor(self._get_validation_record_status, validation_id) \
            .to_return_value('SCHEDULED', timeout_seconds=MINUTE_SEC)

        validation_job_id = self._get_validation_record_job_id(validation_id)

        WaitFor(self._get_batch_job_status, validation_job_id) \
            .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC)

        WaitFor(self._get_validation_record_status, validation_id) \
            .to_return_value('VALIDATED', timeout_seconds=MINUTE_SEC)

        return validation_id

    def _execute_forget_upload_area(self):
        self._run_cli_command(
            "FORGET UPLOAD AREA",
            ['hca', 'upload', 'forget', self.upload_area_uuid])

    def _execute_delete_upload_area(self):
        self._make_request(description="DELETE UPLOAD AREA",
                           verb='DELETE',
                           url=f"{self.api_url}/area/{self.upload_area_uuid}",
                           headers=self.auth_headers,
                           expected_status=202)
        WaitFor(self._get_upload_area_record_status) \
            .to_return_value('DELETED', timeout_seconds=MINUTE_SEC)

    def _verify_file_was_checksummed_inline(self, test_file):
        """ For files that are smaller than 10G, we expect that the file will be check-summed inline. This means that
        there is no need to schedule a job in batch and no job id is given to the checksum record."""
        print("VERIFYING FILE WAS CHECKSUMMED INLINE...")

        WaitFor(self._get_checksum_record_status, test_file.name) \
            .to_return_value('CHECKSUMMED', timeout_seconds=300)

        # Verify that the inline checksum was not assigned a job id.
        checksum_record = self._get_checksum_record(test_file.name)
        self.assertIsNone(checksum_record.job_id)

        # Check file record now contains checksums
        db = self.db_session_maker.session()
        file_record = db.query(DbFile).get(checksum_record.file_id)
        self.assertEqual(test_file.checksums, file_record.checksums)

        # Check S3 object has checksum tags
        tagging = boto3.client('s3').get_object_tagging(
            Bucket=self.upload_config.bucket_name,
            Key=f"{self.upload_area_uuid}/{test_file.name}")
        self.assertEqual(sorted(tagging['TagSet'], key=lambda x: x['Key']),
                         test_file.s3_tagset)

    def _verify_file_is_checksummed_via_batch(self, test_file):
        """ For files that are 10G or larger, we expect that the file will check-summed via batch. This means that it
        first will need to be scheduled and the checksum record will be given a respective job id."""
        print("VERIFYING FILE WAS CHECKSUMMED VIA BATCH...")

        WaitFor(self._get_checksum_record_status, test_file.name) \
            .to_return_value('SCHEDULED', timeout_seconds=30)
        checksum_record = self._get_checksum_record(test_file.name)
        WaitFor(self._get_batch_job_status, checksum_record.job_id) \
            .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC)
        checksum_record = self._get_checksum_record(test_file.name)

        self.assertEqual('CHECKSUMMED', checksum_record.status)

        # Check file record now contains checksums
        db = self.db_session_maker.session()
        file_record = db.query(DbFile).get(checksum_record.file_id)
        self.assertEqual(test_file.checksums, file_record.checksums)

        # Check S3 object has checksum tags
        tagging = boto3.client('s3').get_object_tagging(
            Bucket=self.upload_config.bucket_name,
            Key=f"{self.upload_area_uuid}/{test_file.name}")
        self.assertEqual(sorted(tagging['TagSet'], key=lambda x: x['Key']),
                         test_file.s3_tagset)

    def _verify_file_validation_status(self,
                                       validation_id,
                                       expected_exit_code=0,
                                       expected_error_msg=''):
        # Get the validation status of the file
        _validation_results = self._get_validation_record(
            validation_id).results
        _actual_exit_code = _validation_results['exit_code']
        _actual_error_msg = _validation_results['stdout']

        self.assertEqual(expected_exit_code, _actual_exit_code)
        self.assertIn(expected_error_msg, _actual_error_msg)

    def _get_upload_area_record_status(self):
        record = self._get_upload_area_record()
        return record.status if record else None

    def _get_checksum_record_status(self, filename):
        record = self._get_checksum_record(filename)
        return record.status if record else None

    def _get_validation_record_job_id(self, validation_id):
        record = self._get_validation_record(validation_id)
        return record.job_id if record else None

    def _get_validation_record_status(self, validation_id):
        record = self._get_validation_record(validation_id)
        return record.status if record else None

    def _get_upload_area_record(self):
        db = self.db_session_maker.session()
        return db.query(DbUploadArea).filter(
            DbUploadArea.uuid == self.upload_area_uuid).one_or_none()

    def _get_checksum_record(self, filename):
        db = self.db_session_maker.session()
        s3_key = f"{self.upload_area_uuid}/{filename}"
        file_record = db.query(DbFile).filter(
            DbFile.s3_key == s3_key).one_or_none()
        if file_record is None:
            return None
        checksum_record = db.query(DbChecksum).filter(
            DbChecksum.file_id == file_record.id).one_or_none()
        return checksum_record

    def _get_validation_record(self, validation_id):
        db = self.db_session_maker.session()
        return db.query(DbValidation).filter(
            DbValidation.id == validation_id).one_or_none()

    def _get_batch_job_status(self, job_id):
        response = self.batch.describe_jobs(jobs=[job_id])
        self.assertEqual(1, len(response['jobs']))
        return response['jobs'][0]['status']

    def _make_request(self,
                      description,
                      verb,
                      url,
                      expected_status=None,
                      **options):
        print(description + ": ")
        print(f"{verb.upper()} {url}")

        method = getattr(requests, verb.lower())
        response = method(url, **options)

        print(f"-> {response.status_code}")
        if expected_status:
            self.assertEqual(expected_status, response.status_code)

        if response.content:
            print(response.content.decode('utf8'))

        return response.content

    def _run_cli_command(self, description, command, expected_returncode=0):
        print("\n" + description + ": ")
        print(' '.join(command))
        completed_process = subprocess.run(command, stdout=None, stderr=None)
        self.assertEqual(expected_returncode, completed_process.returncode)
Exemplo n.º 11
0
 def setUp(self):
     super().setUp()
     self.db_session_maker = DBSessionMaker()
     self.db = self.db_session_maker.session()
Exemplo n.º 12
0
class UploadAreaTest(UploadTestCaseUsingMockAWS):
    def setUp(self):
        super().setUp()
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()
Exemplo n.º 13
0
 def __init__(self):
     self.db = DBSessionMaker().session()
Exemplo n.º 14
0
class TestUploadService(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.batch = boto3.client('batch')
        self.uri = None
        self.db_session_maker = DBSessionMaker()

    def setUp(self):
        self.test_start_time = time.time()
        self.upload_config = UploadConfig()
        self.upload_client = UploadService(
            deployment_stage=os.environ['DEPLOYMENT_STAGE'],
            api_token=self.upload_config.api_key)
        self.upload_area_uuid = "deadbeef-dead-dead-dead-%012d" % random.randint(
            0, 999999999999)
        print("")
        self._execute_create_upload_area()
        print("\tstartup time: %0.02f seconds." %
              (time.time() - self.test_start_time))

    def tearDown(self):
        test_end_time = time.time()
        print("\t%s took %0.02f seconds." %
              (self._testMethodName, test_end_time - self.test_start_time))
        self._execute_delete_upload_area()
        print("\tteardown time: %0.02f seconds." %
              (time.time() - test_end_time))

        # All tests are formatted into 2-3 sections separated by blank lines:
        #
        #   Setup preconditions (optional)
        #
        #   Do the thing we are testing
        #
        #   Test the thing was done

    def test_store_file_using_api(self):
        metadata_file = FixtureFile.factory('metadata_file.json')

        self.upload_area.store_file(
            filename=metadata_file.name,
            file_content=metadata_file.contents,
            content_type='application/json; dcp-type=metadata')

        self._verify_file_was_checksummed_inline(
            metadata_file)  # Implicitly tests file was created.

    def test_store_file_using_cli(self):
        """ Tests storing of a file directly in S3, then notification of Upload via REST API """
        small_file = FixtureFile.factory('small_file')

        self._execute_upload_file_using_cli(small_file.path)

        self._verify_file_was_checksummed_inline(
            small_file)  # Implicitly tests file was created.

    def test_store_file_using_cli__with_large_file__triggers_batch_checksumming(
            self):
        large_file = FixtureFile.factory('10241MB_file')

        self._execute_upload_file_using_cli(large_file.url)

        self._verify_file_is_checksummed_via_batch(large_file)

    def test_validate_file__with_valid_file__reports_validation_results(self):
        small_file = FixtureFile.factory('small_file')
        self.upload_area.store_file(
            filename=small_file.name,
            file_content=small_file.contents,
            content_type='application/json; dcp-type=data')

        response = self.upload_area.validate_files(
            file_list=[small_file.name],
            validator_image="humancellatlas/upload-validator-example:14")

        validation_id = response['validation_id']
        self._wait_for_validation_to_complete(validation_id)
        self._verify_file_validation_status(
            validation_id
        )  # default parameters checks for success in validation

    def test__upload_invalid_file__validation_result_shows_invalid_state(self):
        invalid_file = FixtureFile.factory('small_invalid_file')
        self.upload_area.store_file(
            filename=invalid_file.name,
            file_content=invalid_file.contents,
            content_type='application/json; dcp-type=data')

        response = self.upload_area.validate_files(
            file_list=[invalid_file.name],
            validator_image="humancellatlas/upload-validator-example:14")

        validation_id = response['validation_id']
        self._wait_for_validation_to_complete(validation_id)
        # Verify that the validation result of the file is invalid. This is designated by an exit code of 1 and the
        # presence of an error message saying that file is invalid.
        self._verify_file_validation_status(validation_id,
                                            expected_exit_code=1,
                                            expected_error_msg="invalid")

    def _execute_create_upload_area(self):
        self.upload_area = self.upload_client.create_area(
            self.upload_area_uuid)
        self.assertEqual('UNLOCKED', self._get_upload_area_record_status())
        print(f"\tCreated upload area {self.upload_area_uuid}")

    def _execute_upload_file_using_cli(self, file_location):
        self._run_cli_command('hca', 'upload', 'select',
                              str(self.upload_area.uri))
        self._run_cli_command('hca', 'upload', 'files', file_location)
        self._run_cli_command('hca', 'upload', 'forget', self.upload_area.uuid)

    def _wait_for_validation_to_complete(self, validation_id):
        WaitFor(self._get_validation_record_status, validation_id) \
            .to_return_value('SCHEDULED', timeout_seconds=MINUTE_SEC)

        validation_job_id = self._get_validation_record_job_id(validation_id)

        WaitFor(self._get_batch_job_status, validation_job_id) \
            .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC)

        WaitFor(self._get_validation_record_status, validation_id) \
            .to_return_value('VALIDATED', timeout_seconds=MINUTE_SEC)

    def _execute_delete_upload_area(self):
        print(f"\tDeleting upload area {self.upload_area.uuid}")
        self.upload_area.delete()
        WaitFor(self._get_upload_area_record_status) \
            .to_return_value('DELETED', timeout_seconds=MINUTE_SEC)

    def _verify_file_was_checksummed_inline(self, test_file):
        """ For files that are smaller than 10G, we expect that the file will be check-summed inline. This means that
        there is no need to schedule a job in batch and no job id is given to the checksum record."""
        print("\tVerifying file was checksummed inline...")

        WaitFor(self._get_checksum_record_status, test_file.name) \
            .to_return_value('CHECKSUMMED', timeout_seconds=300)

        # Verify that the inline checksum was not assigned a job id.
        checksum_record = self._get_checksum_record(test_file.name)
        self.assertIsNone(checksum_record.job_id)

        # Check file record now contains checksums
        db = self.db_session_maker.session()
        file_record = db.query(DbFile).get(checksum_record.file_id)
        self.assertEqual(test_file.checksums, file_record.checksums)

        # Check S3 object has checksum tags
        tagging = boto3.client('s3').get_object_tagging(
            Bucket=self.upload_config.bucket_name,
            Key=f"{self.upload_area_uuid}/{test_file.name}")

        _actual_checksums = self._get_dict_representation_of_tagset_case_insensitive(
            tagging['TagSet'])
        _expected_checksums = self._get_dict_representation_of_tagset_case_insensitive(
            test_file.s3_tagset)
        self.assertDictEqual(_actual_checksums, _expected_checksums)

    def _verify_file_is_checksummed_via_batch(self, test_file):
        """ For files that are 10G or larger, we expect that the file will check-summed via batch. This means that it
        first will need to be scheduled and the checksum record will be given a respective job id."""
        print("\tVerifying file was checksummed via batch...")

        WaitFor(self._get_checksum_record_status, test_file.name) \
            .to_return_value('SCHEDULED', timeout_seconds=30)
        checksum_record = self._get_checksum_record(test_file.name)
        WaitFor(self._get_batch_job_status, checksum_record.job_id) \
            .to_return_value('SUCCEEDED', timeout_seconds=20 * MINUTE_SEC)
        checksum_record = self._get_checksum_record(test_file.name)

        self.assertEqual('CHECKSUMMED', checksum_record.status)

        # Check file record now contains checksums
        db = self.db_session_maker.session()
        file_record = db.query(DbFile).get(checksum_record.file_id)
        [
            self.assertEquals(
                test_file.checksums[_checksum_function].lower(),
                file_record.checksums[_checksum_function].lower())
            for _checksum_function in set(
                list(test_file.checksums.keys()) +
                list(file_record.checksums.keys()))
        ]

        # Check S3 object has checksum tags
        tagging = boto3.client('s3').get_object_tagging(
            Bucket=self.upload_config.bucket_name,
            Key=f"{self.upload_area_uuid}/{test_file.name}")

        _actual_checksums = self._get_dict_representation_of_tagset_case_insensitive(
            tagging['TagSet'])
        _expected_checksums = self._get_dict_representation_of_tagset_case_insensitive(
            test_file.s3_tagset)
        self.assertDictEqual(_actual_checksums, _expected_checksums)

    def _verify_file_validation_status(self,
                                       validation_id,
                                       expected_exit_code=0,
                                       expected_error_msg=''):
        # Get the validation status of the file
        _validation_results = self._get_validation_record(
            validation_id).results
        _actual_exit_code = _validation_results['exit_code']
        _actual_error_msg = _validation_results['stdout']

        self.assertEqual(expected_exit_code, _actual_exit_code)
        self.assertIn(expected_error_msg, _actual_error_msg)

    def _get_upload_area_record_status(self):
        record = self._get_upload_area_record()
        return record.status if record else None

    def _get_checksum_record_status(self, filename):
        record = self._get_checksum_record(filename)
        return record.status if record else None

    def _get_validation_record_job_id(self, validation_id):
        record = self._get_validation_record(validation_id)
        return record.job_id if record else None

    def _get_validation_record_status(self, validation_id):
        record = self._get_validation_record(validation_id)
        return record.status if record else None

    def _get_upload_area_record(self):
        db = self.db_session_maker.session()
        return db.query(DbUploadArea).filter(
            DbUploadArea.uuid == self.upload_area_uuid).one_or_none()

    def _get_checksum_record(self, filename):
        db = self.db_session_maker.session()
        s3_key = f"{self.upload_area_uuid}/{filename}"
        file_record = db.query(DbFile).filter(
            DbFile.s3_key == s3_key).one_or_none()
        if file_record is None:
            return None
        checksum_record = db.query(DbChecksum).filter(
            DbChecksum.file_id == file_record.id).one_or_none()
        return checksum_record

    def _get_validation_record(self, validation_id):
        db = self.db_session_maker.session()
        return db.query(DbValidation).filter(
            DbValidation.id == validation_id).one_or_none()

    def _get_batch_job_status(self, job_id):
        response = self.batch.describe_jobs(jobs=[job_id])
        self.assertEqual(1, len(response['jobs']))
        return response['jobs'][0]['status']

    def _run_cli_command(self, *command, expected_returncode=0):
        print("\t" + ' '.join(command))
        completed_process = subprocess.run(command, stdout=None, stderr=None)
        self.assertEqual(expected_returncode, completed_process.returncode)

    def _get_dict_representation_of_tagset_case_insensitive(self, tagset):
        _tagset_dict = {}
        for _item in tagset:
            _tagset_dict[_item['Key'].lower()] = _item['Value'].lower()
        return _tagset_dict