def test_execute_no_suffix_without_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_2, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_hook.return_value.copy.assert_has_calls(MOCK_CALLS_EMPTY)
def test_execute_no_suffix_without_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_2, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_hook.return_value.copy.assert_has_calls(MOCK_CALLS_EMPTY)
def test_execute_prefix_and_suffix(self, mock_hook): operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_MIDDLE, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_hook.return_value.list.assert_called_once_with( TEST_BUCKET, prefix="test", delimiter="object" )
def test_execute_prefix_and_suffix(self, mock_hook): operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_MIDDLE, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_hook.return_value.list.assert_called_once_with( TEST_BUCKET, prefix="test", delimiter="object" )
def test_no_prefix_with_last_modified_time_with_false_cond(self, mock_hook): mock_hook.return_value.is_updated_after.return_value = False operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=DESTINATION_BUCKET, destination_object=SOURCE_OBJECT_NO_WILDCARD, last_modified_time=MOD_TIME_1) operator.execute(None) mock_hook.return_value.rewrite.assert_not_called()
def test_execute_no_prefix_with_no_last_modified_time(self, mock_hook): operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=DESTINATION_BUCKET, destination_object=SOURCE_OBJECT_NO_WILDCARD, last_modified_time=None) operator.execute(None) mock_hook.return_value.rewrite.assert_called_once_with( TEST_BUCKET, 'test_object.txt', DESTINATION_BUCKET, 'test_object.txt')
def test_execute_no_prefix_with_no_last_modified_time(self, mock_hook): operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=DESTINATION_BUCKET, destination_object=SOURCE_OBJECT_NO_WILDCARD, last_modified_time=None) operator.execute(None) mock_hook.return_value.rewrite.assert_called_once_with( TEST_BUCKET, 'test_object.txt', DESTINATION_BUCKET, 'test_object.txt')
def test_no_prefix_with_last_modified_time_with_false_cond(self, mock_hook): mock_hook.return_value.is_updated_after.return_value = False operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=DESTINATION_BUCKET, destination_object=SOURCE_OBJECT_NO_WILDCARD, last_modified_time=MOD_TIME_1) operator.execute(None) mock_hook.return_value.rewrite.assert_not_called()
def test_wc_with_last_modified_time_with_one_true_cond(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST mock_hook.return_value.is_updated_after.side_effect = [True, False, False] operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET, last_modified_time=MOD_TIME_1) operator.execute(None) mock_hook.return_value.rewrite.assert_called_once_with( TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt')
def test_wc_with_last_modified_time_with_one_true_cond(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST mock_hook.return_value.is_updated_after.side_effect = [True, False, False] operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET, last_modified_time=MOD_TIME_1) operator.execute(None) mock_hook.return_value.rewrite.assert_called_once_with( TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt')
def test_execute_more_than_1_wildcard(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_MULTIPLE_WILDCARDS, destination_bucket=DESTINATION_BUCKET, destination_object=DESTINATION_OBJECT_PREFIX) total_wildcards = operator.source_object.count(WILDCARD) error_msg = "Only one wildcard '[*]' is allowed in source_object parameter. " \ "Found {}".format(total_wildcards, SOURCE_OBJECT_MULTIPLE_WILDCARDS) with self.assertRaisesRegexp(AirflowException, error_msg): operator.execute(None)
def test_execute_wildcard_without_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_calls_none = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
def test_execute_with_empty_destination_bucket(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=None, destination_object=DESTINATION_OBJECT_PREFIX) with patch.object(operator.log, 'warning') as mock_warn: operator.execute(None) mock_warn.assert_called_with( 'destination_bucket is None. Defaulting it to source_bucket (%s)', TEST_BUCKET ) self.assertEquals(operator.destination_bucket, operator.source_bucket)
def test_execute_with_empty_destination_bucket(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=None, destination_object=DESTINATION_OBJECT_PREFIX) with patch.object(operator.log, 'warning') as mock_warn: operator.execute(None) mock_warn.assert_called_with( 'destination_bucket is None. Defaulting it to source_bucket (%s)', TEST_BUCKET ) self.assertEquals(operator.destination_bucket, operator.source_bucket)
def test_execute_wildcard_without_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET) operator.execute(None) mock_calls_none = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
def test_execute_more_than_1_wildcard(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_MULTIPLE_WILDCARDS, destination_bucket=DESTINATION_BUCKET, destination_object=DESTINATION_OBJECT_PREFIX) total_wildcards = operator.source_object.count(WILDCARD) error_msg = "Only one wildcard '[*]' is allowed in source_object parameter. " \ "Found {}".format(total_wildcards) with six.assertRaisesRegex(self, AirflowException, error_msg): operator.execute(None)
def test_wc_with_no_last_modified_time(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_4, destination_bucket=DESTINATION_BUCKET, last_modified_time=None) operator.execute(None) mock_calls_none = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
def test_execute_wildcard_with_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_4, destination_bucket=DESTINATION_BUCKET, destination_object=DESTINATION_OBJECT_PREFIX) operator.execute(None) mock_calls = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'foo/bar/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'foo/bar/file2.txt'), ] mock_hook.return_value.copy.assert_has_calls(mock_calls)
def test_wc_with_no_last_modified_time(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_4, destination_bucket=DESTINATION_BUCKET, last_modified_time=None) operator.execute(None) mock_calls_none = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
def test_execute_wildcard_with_destination_object(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_4, destination_bucket=DESTINATION_BUCKET, destination_object=DESTINATION_OBJECT_PREFIX) operator.execute(None) mock_calls = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'foo/bar/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'foo/bar/file2.txt'), ] mock_hook.return_value.copy.assert_has_calls(mock_calls)
def test_execute_wildcard_with_destination_object_retained_prefix(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET, destination_object='{}/{}'.format(DESTINATION_OBJECT_PREFIX, SOURCE_OBJECT_WILDCARD_SUFFIX[:-1]) ) operator.execute(None) mock_calls_retained = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'foo/bar/test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'foo/bar/test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_retained)
def test_execute_wildcard_with_destination_object_retained_prefix(self, mock_hook): mock_hook.return_value.list.return_value = SOURCE_FILES_LIST operator = GoogleCloudStorageToGoogleCloudStorageOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=DESTINATION_BUCKET, destination_object='{}/{}'.format(DESTINATION_OBJECT_PREFIX, SOURCE_OBJECT_WILDCARD_SUFFIX[:-1]) ) operator.execute(None) mock_calls_retained = [ mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'foo/bar/test_object/file1.txt'), mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'foo/bar/test_object/file2.txt'), ] mock_hook.return_value.rewrite.assert_has_calls(mock_calls_retained)