def test_sync_fail_boto3_client_exception(self, mock_boto3): """Test that if an client error, we raise that error.""" client_error = ClientError(error_response={"Error": {"Code": fake.word()}}, operation_name=Mock()) source_bucket_name = fake.slug() destination_bucket_name = fake.slug() schema_name = self.schema start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) source_object = Mock() source_object.key = f"{settings.S3_BUCKET_PATH}/{schema_name}{fake.file_path()}" source_object.bucket_name = source_bucket_name self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object,) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from mock_copy_from.side_effect = client_error with self.assertRaises(ClientError): syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(schema_name, destination_bucket_name, date_range) source_object.restore_object.assert_not_called()
def test_sync_file_fail_disabled(self, mock_boto3): """Test syncing a file from one S3 bucket to another fails due to it being disabled.""" source_bucket_name = fake.slug() destination_bucket_name = fake.slug() account = fake.word() start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) source_object = Mock() source_object.key = f"{settings.S3_BUCKET_PATH}/{account}{fake.file_path()}" source_object.bucket_name = source_bucket_name self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object,) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from with self.settings(ENABLE_S3_ARCHIVING=False): syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(account, destination_bucket_name, date_range) mock_resource.assert_called_with("s3", settings.S3_REGION) mock_buckets.assert_called_once_with(source_bucket_name) mock_filter.assert_not_called() mock_destination_object.assert_not_called() mock_copy_from.assert_not_called()
def test_sync_file_in_glacier(self, mock_boto3): """Test syncing a file in glacier will call restore, and raise an exception.""" client_error_glacier = ClientError( error_response={'Error': { 'Code': 'InvalidObjectState' }}, operation_name=Mock(), ) source_bucket_name = fake.slug() destination_bucket_name = fake.slug() schema_name = self.schema start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) source_object = Mock() source_object.key = f'{settings.S3_BUCKET_PATH}/{schema_name}{fake.file_path()}' source_object.bucket_name = source_bucket_name source_object.storage_class = 'GLACIER' self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object, ) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from mock_copy_from.side_effect = client_error_glacier with self.assertRaises(SyncedFileInColdStorageError): syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(schema_name, destination_bucket_name, date_range) source_object.restore_object.assert_called()
def test_sync_glacier_file_restore_in_progress(self, mock_boto3): """Test syncing a file that is currently being restored from glacier will raise an exception.""" restore_in_progress_error = ClientError( error_response={"Error": {"Code": "RestoreAlreadyInProgress"}}, operation_name=Mock() ) source_bucket_name = fake.slug() destination_bucket_name = fake.slug() schema_name = self.schema start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) source_object = Mock() source_object.key = f"{settings.S3_BUCKET_PATH}/{schema_name}{fake.file_path()}" source_object.bucket_name = source_bucket_name source_object.storage_class = "GLACIER" self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object,) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from mock_copy_from.side_effect = restore_in_progress_error with self.assertRaises(SyncedFileInColdStorageError): syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(schema_name, destination_bucket_name, date_range) source_object.restore_object.assert_not_called()
def test_sync_single_file_success(self, mock_boto3): """ Test syncing a file from one S3 bucket to another succeeds. Also assert that all the appropriate provider filters are iterated. """ source_bucket_name = fake.slug() destination_bucket_name = fake.slug() schema_name = self.schema start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) end_date = end_date - timedelta(days=1) days = rrule(DAILY, dtstart=start_date, until=end_date) months = rrule(MONTHLY, dtstart=start_date, until=end_date) source_object = Mock() source_object.key = f'{settings.S3_BUCKET_PATH}/{schema_name}{fake.file_path()}' source_object.bucket_name = source_bucket_name self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object, ) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(schema_name, destination_bucket_name, date_range) mock_resource.assert_called_with('s3', settings.S3_REGION) mock_buckets.assert_any_call(source_bucket_name) mock_buckets.assert_any_call(destination_bucket_name) expected_filter_calls = self.get_expected_filter_calls( schema_name, days, months) mock_filter.assert_has_calls(expected_filter_calls, any_order=True) self.assertEqual(len(mock_filter.call_args_list), len(expected_filter_calls)) mock_destination_object.assert_called_with(source_object.key) mock_copy_from.assert_called_with( ACL='bucket-owner-full-control', CopySource={ 'Bucket': source_bucket_name, 'Key': source_object.key }, )
def test_sync_file_success(self, mock_boto3): """Test syncing a file from one S3 bucket to another succeeds.""" source_bucket_name = fake.slug() destination_bucket_name = fake.slug() account = fake.word() start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) end_date = end_date - timedelta(days=1) days = rrule(DAILY, dtstart=start_date, until=end_date) months = rrule(MONTHLY, dtstart=start_date, until=end_date) source_object = Mock() source_object.key = f'{settings.S3_BUCKET_PATH}/{account}{fake.file_path()}' source_object.bucket_name = source_bucket_name self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = (source_object, ) mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(account, destination_bucket_name, date_range) mock_resource.assert_called_with('s3', settings.S3_REGION) mock_buckets.assert_any_call(source_bucket_name) mock_buckets.assert_any_call(destination_bucket_name) for day, provider_type in product(days, _PROVIDER_TYPES): mock_filter.assert_any_call( Prefix=(f'{settings.S3_BUCKET_PATH}/{account}/{provider_type}/' f'{day.year:04d}/{day.month:02d}/{day.day:02d}/')) for month, provider_type in product(months, _PROVIDER_TYPES): mock_filter.assert_any_call( Prefix=(f'{settings.S3_BUCKET_PATH}/{account}/{provider_type}/' f'{day.year:04d}/{day.month:02d}/{day.day:02d}/')) mock_destination_object.assert_called_with(source_object.key) mock_copy_from.assert_called_with( ACL='bucket-owner-full-control', CopySource={ 'Bucket': source_bucket_name, 'Key': source_object.key }, )
def sync_data_to_customer(dump_request_uuid): """ Scheduled task to sync normalized data to our customers S3 bucket. If the sync request raises SyncedFileInColdStorageError, this task will automatically retry in a set amount of time. This time is to give the storage solution time to retrieve a file from cold storage. This task will retry 5 times, and then fail. """ dump_request = DataExportRequest.objects.get(uuid=dump_request_uuid) dump_request.status = DataExportRequest.PROCESSING dump_request.save() try: syncer = AwsS3Syncer(settings.S3_BUCKET_NAME) syncer.sync_bucket( dump_request.created_by.customer.schema_name, dump_request.bucket_name, (dump_request.start_date, dump_request.end_date), ) except ClientError: LOG.exception( f"Encountered an error while processing DataExportRequest " f"{dump_request.uuid}, for {dump_request.created_by}." ) dump_request.status = DataExportRequest.ERROR dump_request.save() return except SyncedFileInColdStorageError: LOG.info( f"One of the requested files is currently in cold storage for " f"DataExportRequest {dump_request.uuid}. This task will automatically retry." ) dump_request.status = DataExportRequest.WAITING dump_request.save() try: raise sync_data_to_customer.retry(countdown=10, max_retries=5) except MaxRetriesExceededError: LOG.exception( f"Max retires exceeded for restoring a file in cold storage for " f"DataExportRequest {dump_request.uuid}, for {dump_request.created_by}." ) dump_request.status = DataExportRequest.ERROR dump_request.save() return dump_request.status = DataExportRequest.COMPLETE dump_request.save()
def test_sync_file_fail_no_file(self, mock_boto3): """Test syncing a file from one S3 bucket to another fails due to no matching files.""" source_bucket_name = fake.slug() destination_bucket_name = fake.slug() account = fake.word() start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) end_date = end_date - timedelta(days=1) days = rrule(DAILY, dtstart=start_date, until=end_date) months = rrule(MONTHLY, dtstart=start_date, until=end_date) self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = () mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(account, destination_bucket_name, date_range) mock_resource.assert_called_with('s3', settings.S3_REGION) mock_buckets.assert_any_call(source_bucket_name) mock_buckets.assert_any_call(destination_bucket_name) for day in days: mock_filter.assert_any_call( Prefix= f'{settings.S3_BUCKET_PATH}/{account}/{day.month:02d}/{day.day:02d}/' ) for month in months: mock_filter.assert_any_call( Prefix= f'{settings.S3_BUCKET_PATH}/{account}/{month.month:02d}/00/') mock_destination_object.assert_not_called() mock_copy_from.assert_not_called()
def test_sync_file_fail_no_file(self, mock_boto3): """Test syncing a file from one S3 bucket to another fails due to no matching files.""" source_bucket_name = fake.slug() destination_bucket_name = fake.slug() schema_name = self.schema start_date = date(2019, 1, 1) end_date = date(2019, 3, 1) date_range = (start_date, end_date) end_date = end_date - timedelta(days=1) days = rrule(DAILY, dtstart=start_date, until=end_date) months = rrule(MONTHLY, dtstart=start_date, until=end_date) self.assertNotEqual(source_bucket_name, destination_bucket_name) mock_resource = mock_boto3.resource mock_buckets = mock_resource.return_value.Bucket mock_filter = mock_buckets.return_value.objects.filter mock_filter.return_value = () mock_destination_object = mock_buckets.return_value.Object mock_copy_from = mock_destination_object.return_value.copy_from syncer = AwsS3Syncer(source_bucket_name) syncer.sync_bucket(schema_name, destination_bucket_name, date_range) mock_resource.assert_called_with("s3", settings.S3_REGION) mock_buckets.assert_any_call(source_bucket_name) mock_buckets.assert_any_call(destination_bucket_name) expected_filter_calls = self.get_expected_filter_calls( schema_name, days, months) mock_filter.assert_has_calls(expected_filter_calls, any_order=True) self.assertEqual(len(mock_filter.call_args_list), len(expected_filter_calls)) mock_destination_object.assert_not_called() mock_copy_from.assert_not_called()