示例#1
0
    def test_update_or_create__when_area_exists__retrieves_db_record(self):
        db_area = self.create_upload_area()

        area = UploadArea(uuid=db_area.uuid)
        area.update_or_create()

        self.assertEqual(db_area.id, area.db_id)
class ChecksumDaemonTest(UploadTestCaseUsingMockAWS):

    def _make_dbfile(self, upload_area, test_file, checksums=None):
        return DbFile(s3_key=f"{upload_area.uuid}/{test_file.name}", s3_etag=test_file.e_tag,
                      upload_area_id=upload_area.db_id, name=test_file.name, size=test_file.size,
                      checksums=checksums)

    def setUp(self):
        super().setUp()
        # Environment
        self.environment = {
            'INGEST_AMQP_SERVER': 'foo',
            'CSUM_DOCKER_IMAGE': 'bogoimage'
        }
        self.environmentor = EnvironmentSetup(self.environment)
        self.environmentor.enter()

        # Upload area
        self.area_uuid = str(uuid.uuid4())
        self.upload_area = UploadArea(self.area_uuid)
        self.upload_area.update_or_create()
        # daemon
        context = Mock()
        self.daemon = ChecksumDaemon(context)
        # File
        self.small_file = FixtureFile.factory('foo')
        self.file_key = f"{self.area_uuid}/{self.small_file.name}"
        self.object = self.upload_bucket.Object(self.file_key)
        self.object.put(Key=self.file_key, Body=self.small_file.contents, ContentType=self.small_file.content_type)
        # Event
        self.events = {'Records': [
            {'eventVersion': '2.0', 'eventSource': 'aws:s3', 'awsRegion': 'us-east-1',
             'eventTime': '2017-09-15T00:05:10.378Z', 'eventName': 'ObjectCreated:Put',
             'userIdentity': {'principalId': 'AWS:AROAI4WRRXW2K3Y2IFL6Q:upload-api-dev'},
             'requestParameters': {'sourceIPAddress': '52.91.56.220'},
             'responseElements': {'x-amz-request-id': 'FEBC85CADD1E3A66',
                                  'x-amz-id-2': 'xxx'},
             's3': {'s3SchemaVersion': '1.0',
                    'configurationId': 'NGZjNmM0M2ItZTk0Yi00YTExLWE2NDMtMzYzY2UwN2EyM2Nj',
                    'bucket': {'name': self.upload_config.bucket_name,
                               'ownerIdentity': {'principalId': 'A29PZ5XRQWJUUM'},
                               'arn': f'arn:aws:s3:::{self.upload_config.bucket_name}'},
                    'object': {'key': self.file_key,
                               'size': self.small_file.size,
                               'eTag': self.small_file.e_tag,
                               'sequencer': '0059BB193641C4EAB0'}}}]}
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()
    def test_format_and_send_notification(self, mock_send_notification):
        area_uuid = str(uuid.uuid4())
        upload_area = UploadArea(area_uuid)
        upload_area.update_or_create()
        upload_area._db_load()
        file = upload_area.store_file("test_file_name", "test_file_content",
                                      "application/json; dcp-type=data")
        ingest_notifier = IngestNotifier("file_uploaded", file_id=file.db_id)

        test_payload = {
            'names': "[test_file_name]",
            'upload_area_id': area_uuid
        }
        notification_id = ingest_notifier.format_and_send_notification(
            test_payload)

        record = UploadDB().get_pg_record("notification",
                                          notification_id,
                                          column="id")
        self.assertEqual(record['status'], "DELIVERED")
        self.assertEqual(record['file_id'], file.db_id)
        self.assertEqual(record['payload'], test_payload)
示例#4
0
class TestUploadedFile(UploadTestCaseUsingMockAWS):

    def setUp(self):
        super().setUp()
        self.db_session_maker = DBSessionMaker()
        self.db = self.db_session_maker.session()

        self.upload_area_id = str(uuid.uuid4())
        self.upload_area = UploadArea(self.upload_area_id)
        self.upload_area.update_or_create()

    def create_file_record(self, s3object, checksums=None):
        record = DbFile(s3_key=s3object.key,
                        s3_etag=s3object.e_tag.strip('\"'),
                        name=os.path.basename(s3object.key),
                        upload_area_id=self.upload_area.db_id,
                        size=s3object.content_length,
                        checksums=checksums)
        self.db.add(record)
        self.db.commit()
        return record

    def tearDown(self):
        super().tearDown()

    def test_create__creates_a_new_s3_object_and_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        content_type = "application/octet-stream; dcp-type=data"
        file_content = "file1_content"

        uf = UploadedFile.create(upload_area=self.upload_area,
                                 name=filename,
                                 content_type=content_type,
                                 data=file_content)

        self.assertIsInstance(uf, UploadedFile)
        # S3 Object
        s3_key = f"{self.upload_area_id}/{filename}"
        s3object = self.upload_bucket.Object(s3_key)
        self.assertEqual(content_type, s3object.content_type)
        self.assertEqual(file_content.encode('utf8'), s3object.get()['Body'].read())
        # DB Record
        record = self.db.query(DbFile).filter(DbFile.s3_key == s3_key,
                                              DbFile.s3_etag == s3object.e_tag.strip('\"')).one()
        self.assertEqual(s3_key, record.s3_key)
        self.assertEqual(filename, record.name)
        self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag)
        self.assertEqual(len(file_content), record.size)
        self.assertEqual(self.upload_area.db_id, record.upload_area_id)

    def test_init__given_existing_entities__initializes_properties_correctly(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile(self.upload_area, s3object=s3object)

        # Links to objects
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(self.upload_area, uf.upload_area)
        # Persisted properties
        self.assertEqual(file_record.id, uf.db_id)
        self.assertEqual(s3object.key, uf.s3_key)
        self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag)
        self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id'])
        self.assertEqual(file_record.name, uf.name)
        self.assertEqual(s3object.content_length, uf.size)

    def test_init__when_no_db_record_exists__creates_a_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")

        with self.assertRaises(NoResultFound):
            self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                         DbFile.s3_etag == s3object.e_tag.strip('\"')).one()

        uf = UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                              DbFile.s3_etag == s3object.e_tag.strip('\"')).one()
        self.assertEqual(record.id, uf.db_id)
        self.assertEqual(s3object.key, record.s3_key)
        self.assertEqual(filename, record.name)
        self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag)
        self.assertEqual(s3object.content_length, record.size)
        self.assertEqual(self.upload_area.db_id, record.upload_area_id)

    def test_init__doesnt_create_db_record_if_one_already_exists(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3_key = f"{self.upload_area_id}/{filename}"
        s3object = self.create_s3_object(s3_key)
        self.create_file_record(s3object)
        record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()

        UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()
        self.assertEqual(record_count_before, record_count_after)

    def test_from_s3_key__initializes_correctly(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile.from_s3_key(self.upload_area, s3_key=s3object.key)

        self.assertEqual(self.upload_area, uf.upload_area)
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(file_record.id, uf.db_id)

    def test_from_db_id__initializes_correctly_and_figures_out_which_upload_area_to_use(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile.from_db_id(file_record.id)

        self.assertEqual(self.upload_area.uuid, uf.upload_area.uuid)
        self.assertEqual(self.upload_area.db_id, uf.upload_area.db_id)
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(file_record.id, uf.db_id)

    def test_refresh__picks_up_changed_content_type(self):
        filename = f"file-{random.randint(0, 999999999)}"
        old_content_type = "application/octet-stream"  # missing dcp-type
        new_content_type = "application/octet-stream; dcp-type=data"
        s3object = self.create_s3_object(object_key=f"{self.upload_area.uuid}/{filename}",
                                         content_type=old_content_type)
        # create UploadedFile
        uf = UploadedFile.from_s3_key(upload_area=self.upload_area, s3_key=s3object.key)
        # Change media type on S3 object
        s3object.copy_from(CopySource={'Bucket': self.upload_config.bucket_name, 'Key': s3object.key},
                           MetadataDirective="REPLACE",
                           ContentType=new_content_type)

        self.assertEqual(old_content_type, uf.content_type)

        uf.refresh()

        self.assertEqual(new_content_type, uf.content_type)

    def test_checksums_setter_saves_db_record(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3object = self.create_s3_object(f"{self.upload_area_id}/{filename}")
        file_record = self.create_file_record(s3object)
        uf = UploadedFile.from_db_id(file_record.id)

        uf.checksums = {'foo': 'bar'}

        self.db.refresh(file_record)
        self.assertEqual({'foo': 'bar'}, file_record.checksums)

    def test_info(self):
        test_file = FixtureFile.factory("foo")
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents)
        file_record = self.create_file_record(s3object, checksums=test_file.checksums)
        uf = UploadedFile(self.upload_area, s3object=s3object)

        self.assertEqual({
            'upload_area_id': self.upload_area.uuid,
            'name': file_record.name,
            'size': s3object.content_length,
            'content_type': s3object.content_type,
            'url': f"s3://{s3object.bucket_name}/{s3object.key}",
            'checksums': test_file.checksums,
            'last_modified': s3object.last_modified.isoformat()
        }, uf.info())
示例#5
0
class TestValidationScheduler(UploadTestCaseUsingMockAWS):
    def setUp(self):
        super().setUp()

        self.upload_area_id = str(uuid.uuid4())
        self.upload_area = UploadArea(self.upload_area_id)
        self.upload_area.update_or_create()

    def tearDown(self):
        super().tearDown()
        pass

    @patch('upload.common.upload_area.UploadedFile.size',
           MAX_FILE_SIZE_IN_BYTES + 1)
    def test_check_files_can_be_validated__when_files_are_too_large_for_validation__returns_false(
            self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file])

        file_validatable = scheduler.check_files_can_be_validated()

        self.assertEqual(False, file_validatable)

    def test__create_validation_event__creates_event_with_correct_status(self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2#",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file])
        validation_id = str(uuid.uuid4())

        validation_event = scheduler._create_validation_event(
            "test_docker_image", validation_id, None)

        self.assertEqual(validation_event.docker_image, "test_docker_image")
        self.assertEqual(validation_event.status, "SCHEDULING_QUEUED")

    def test__update_validation_event__updates_event_status(self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2#",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file])
        scheduler.batch_job_id = "123456"
        validation_id = str(uuid.uuid4())
        validation_event = scheduler._create_validation_event(
            "test_docker_image", validation_id, None)

        self.assertEqual(validation_event.job_id, None)
        validation_event = scheduler._update_validation_event(
            "test_docker_image", validation_id, None)

        self.assertEqual(validation_event.job_id, "123456")
        self.assertEqual(validation_event.status, "SCHEDULED")

    @patch('upload.common.upload_area.UploadedFile.size',
           MAX_FILE_SIZE_IN_BYTES - 1)
    def test_check_files_can_be_validated__if_file_is_too_large__returns_true(
            self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file])

        file_validatable = scheduler.check_files_can_be_validated()

        self.assertEqual(True, file_validatable)

    def test_add_to_validation_sqs__adds_correct_event_to_queue(self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        validation_scheduler = ValidationScheduler(self.upload_area_id,
                                                   [uploaded_file])

        validation_uuid = validation_scheduler.add_to_validation_sqs(
            ["filename123"], "test_docker_image", {"variable": "variable"},
            "123456")

        message = self.sqs.meta.client.receive_message(
            QueueUrl='test_validation_q_url')
        message_body = json.loads(message['Messages'][0]['Body'])
        record = UploadDB().get_pg_record("validation",
                                          validation_uuid,
                                          column='id')
        self.assertEqual(message_body["filenames"], ["filename123"])
        self.assertEqual(message_body["validation_id"], validation_uuid)
        self.assertEqual(message_body["validator_docker_image"],
                         "test_docker_image")
        self.assertEqual(message_body["environment"], {"variable": "variable"})
        self.assertEqual(message_body["orig_validation_id"], "123456")
        self.assertEqual(message_body["upload_area_uuid"],
                         uploaded_file.upload_area.uuid)
        self.assertEqual(record["status"], "SCHEDULING_QUEUED")
示例#6
0
class TestDssChecksums(UploadTestCaseUsingMockAWS):
    def setUp(self):
        super().setUp()

        self.upload_area_id = str(uuid.uuid4())
        self.upload_area = UploadArea(self.upload_area_id)
        self.upload_area.update_or_create()

        self.checksum_id = str(uuid.uuid4())
        self.job_id = str(uuid.uuid4())

        self.s3client = boto3.client('s3')

    def tearDown(self):
        super().tearDown()

    def test_it_acts_like_a_dict(self):
        checksums = DssChecksums(s3_object=None,
                                 checksums={
                                     'crc32c': 'a',
                                     'sha1': 'b',
                                     'sha256': 'c',
                                     's3_etag': 'd'
                                 })
        self.assertEqual(4, len(checksums))
        self.assertEqual('b', checksums['sha1'])
        self.assertIn('sha256', checksums)
        self.assertEqual(['crc32c', 's3_etag', 'sha1', 'sha256'],
                         sorted(checksums.keys()))

    def test_are_present__for_an_object_with_no_checksums__returns_false(self):
        filename = 'file1'
        s3obj = self.mock_upload_file_to_s3(self.upload_area_id,
                                            filename,
                                            checksums={})

        self.assertFalse(DssChecksums(s3_object=s3obj).are_present())

    def test_are_present__for_an_object_with_partial_checksums__returns_false(
            self):
        filename = 'file2'
        s3obj = self.mock_upload_file_to_s3(self.upload_area_id,
                                            filename,
                                            checksums={
                                                'sha1': '1',
                                                'sha256': '2'
                                            })

        self.assertFalse(DssChecksums(s3_object=s3obj).are_present())

    def test_are_present__for_an_object_with_all_checksums__returns_true(self):
        filename = 'file3'
        s3obj = self.mock_upload_file_to_s3(self.upload_area_id,
                                            filename,
                                            checksums={
                                                'sha1': '1',
                                                'sha256': '2',
                                                's3_etag': '3',
                                                'crc32c': '4'
                                            })

        self.assertTrue(DssChecksums(s3_object=s3obj).are_present())

    def test_init_reads_checksums_from_s3_object(self):
        s3obj = self.create_s3_object(object_key="file4")
        tagging = [{
            'Key': 'hca-dss-sha1',
            'Value': '1'
        }, {
            'Key': 'hca-dss-sha256',
            'Value': '2'
        }, {
            'Key': 'hca-dss-crc32c',
            'Value': '3'
        }, {
            'Key': 'hca-dss-s3_etag',
            'Value': '4'
        }]
        self.s3client.put_object_tagging(Bucket=s3obj.bucket_name,
                                         Key=s3obj.key,
                                         Tagging={'TagSet': tagging})

        checksums = DssChecksums(s3_object=s3obj)

        self.assertEqual(
            {
                'crc32c': '3',
                'sha1': '1',
                'sha256': '2',
                's3_etag': '4'
            }, checksums)

    def test_compute(self):
        test_file = FixtureFile.factory("foo")
        s3obj = self.mock_upload_file_to_s3(self.upload_area_id,
                                            test_file.name,
                                            contents=test_file.contents)

        self.assertEqual(
            DssChecksums(s3_object=s3obj).compute(), test_file.checksums)

    def test_save_as_tags_on_s3_object(self):
        s3obj = self.create_s3_object(object_key="foo")

        checksums = DssChecksums(s3obj,
                                 checksums={
                                     'sha1': 'a',
                                     'sha256': 'b',
                                     'crc32c': 'c',
                                     's3_etag': 'd'
                                 })
        checksums.save_as_tags_on_s3_object()

        self.assertEqual([{
            'Key': 'hca-dss-sha1',
            'Value': 'a'
        }, {
            'Key': 'hca-dss-sha256',
            'Value': 'b'
        }, {
            'Key': 'hca-dss-crc32c',
            'Value': 'c'
        }, {
            'Key': 'hca-dss-s3_etag',
            'Value': 'd'
        }],
                         self.s3client.get_object_tagging(
                             Bucket=self.upload_area.bucket_name,
                             Key=s3obj.key)['TagSet'])