def test_upload_to_s3_with_custom_key(mock_session, mock_progress): mock_progress.return_value = None s = locopy.S3() s.upload_to_s3(LOCAL_TEST_FILE, S3_DEFAULT_BUCKET, CUSTOM_KEY) mock_progress.assert_called_with("test file") s.s3.upload_file.assert_called_with( LOCAL_TEST_FILE, S3_DEFAULT_BUCKET, CUSTOM_KEY, ExtraArgs={"ServerSideEncryption": "AES256"}, Callback=None, ) s = locopy.S3(kms_key=KMS_KEY) s.upload_to_s3(LOCAL_TEST_FILE, S3_DEFAULT_BUCKET, CUSTOM_KEY) s.s3.upload_file.assert_called_with( LOCAL_TEST_FILE, S3_DEFAULT_BUCKET, CUSTOM_KEY, ExtraArgs={ "SSEKMSKeyId": KMS_KEY, "ServerSideEncryption": "aws:kms" }, Callback=None, )
def test_mock_s3_set_client(mock_session, mock_config, dbapi): s = locopy.S3(profile=PROFILE) mock_config.assert_called_with(signature_version="s3v4") mock_config.side_effect = Exception("_set_client Exception") with pytest.raises(S3InitializationError): locopy.S3(profile=PROFILE)
def test_mock_s3_set_client(mock_session, mock_config, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: s = locopy.S3(dbapi=dbapi, **rs_creds) mock_config.assert_called_with(signature_version='s3v4') mock_config.side_effect = Exception("_set_client Exception") with pytest.raises(S3InitializationError): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: locopy.S3(dbapi=dbapi, **rs_creds)
def test_mock_s3_init_exception(mock_session, dbapi): mock_session.side_effect = S3Error() with pytest.raises(S3Error): locopy.S3() mock_session.side_effect = None mock_session.return_value.get_credentials.return_value = None with pytest.raises(S3CredentialsError): locopy.S3()
def test_s3_upload_download_file(s3_bucket, dbapi): with locopy.S3(dbapi=dbapi, **CREDS_DICT) as s3: s3.upload_to_s3(LOCAL_FILE, S3_BUCKET, "myfile.txt") with locopy.S3(dbapi=dbapi, **CREDS_DICT) as s3: s3.download_from_s3(S3_BUCKET, "myfile.txt", LOCAL_FILE_DL) assert filecmp.cmp(LOCAL_FILE, LOCAL_FILE_DL) os.remove(LOCAL_FILE_DL)
def test_s3_only_upload_download_file(s3_bucket): with locopy.S3(profile=CREDS_DICT['profile'], s3_only=True) as s3: s3.upload_to_s3(LOCAL_FILE, S3_BUCKET, "myfile.txt") with locopy.S3(profile=CREDS_DICT['profile'], s3_only=True) as s3: s3.download_from_s3(S3_BUCKET, "myfile.txt", LOCAL_FILE_DL) assert filecmp.cmp(LOCAL_FILE, LOCAL_FILE_DL) os.remove(LOCAL_FILE_DL)
def test_generate_unload_path(mock_session): s = locopy.S3() assert s._generate_unload_path("TEST", "FOLDER/") == "s3://TEST/FOLDER/" assert s._generate_unload_path( "TEST SPACE", "FOLDER SPACE/") == "s3://TEST SPACE/FOLDER SPACE/" assert s._generate_unload_path("TEST", "PREFIX") == "s3://TEST/PREFIX" assert s._generate_unload_path("TEST", None) == "s3://TEST"
def test_delete_list_from_s3_exception(mock_session, mock_delete): s = locopy.S3() mock_delete.side_effect = S3UploadError("Upload Exception") with pytest.raises(S3UploadError): s.delete_list_from_s3([ "test_bucket/test_folder/test.1", "test_bucket/test_folder/test.2" ])
def test_get_credentials(mock_cred, aws_creds): s = locopy.S3() mock_cred.return_value = aws_creds cred_string = s._credentials_string() expected = "aws_access_key_id=access;" "aws_secret_access_key=secret;" "token=token" assert cred_string == expected aws_creds.token = None mock_cred.return_value = aws_creds cred_string = s._credentials_string() expected = "aws_access_key_id=access;" "aws_secret_access_key=secret" assert cred_string == expected mock_cred.side_effect = Exception("Exception") with pytest.raises(Exception): locopy.S3()
def test_unload_generated_fields(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: r = locopy.S3(dbapi=dbapi, **rs_creds) r._unload_generated_files() assert mock_connect.return_value.cursor.return_value.execute.called assert mock_connect.return_value.cursor.return_value.fetchall.called
def test_redshift_copy_to_redshift(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: test_redshift = locopy.S3(dbapi=dbapi, **rs_creds) test_redshift._copy_to_redshift("table", "s3bucket") assert mock_connect.return_value.cursor.return_value.execute.called (mock_connect.return_value .cursor.return_value .execute.assert_called_with( "COPY table FROM 's3bucket' CREDENTIALS " "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' " "DELIMITER '|' DATEFORMAT 'auto' COMPUPDATE ON " "TRUNCATECOLUMNS;".format( test_redshift.session.get_credentials().access_key, test_redshift.session.get_credentials().secret_key, test_redshift.session.get_credentials().token), None)) # tab delim test_redshift._copy_to_redshift("table", "s3bucket", delim='\t') assert mock_connect.return_value.cursor.return_value.execute.called (mock_connect.return_value .cursor.return_value .execute.assert_called_with( "COPY table FROM 's3bucket' CREDENTIALS " "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' " "DELIMITER '\t' DATEFORMAT 'auto' COMPUPDATE ON " "TRUNCATECOLUMNS;".format( test_redshift.session.get_credentials().access_key, test_redshift.session.get_credentials().secret_key, test_redshift.session.get_credentials().token), None))
def test_download_list_from_s3_single(mock_session, mock_download): calls = [ mock.call("bucket", "test.1", os.path.join(os.getcwd(), "test.1")) ] s = locopy.S3() res = s.download_list_from_s3(["s3://bucket/test.1"]) assert res == [os.path.join(os.getcwd(), "test.1")] mock_download.assert_has_calls(calls)
def test_get_column_names(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: r = locopy.S3(dbapi=dbapi, **rs_creds) r._get_column_names("query") sql = "SELECT * FROM (query) WHERE 1 = 0" assert (mock_connect.return_value.cursor.return_value .execute.called_with(sql, None))
def test_super_init_s3_only(mock_session, dbapi): s = locopy.S3(dbapi=dbapi, config_yaml='MY_YAML_FILE.yml', s3_only=True) assert hasattr(s, 'host') == False assert hasattr(s, 'port') == False assert hasattr(s, 'dbname') == False assert hasattr(s, 'user') == False assert hasattr(s, 'password') == False assert s._is_connected() == False
def test_delete_list_from_s3_multiple_without_folder(mock_session, mock_delete): calls = [ mock.call("test_bucket", "test.1"), mock.call("test_bucket", "test.2") ] s = locopy.S3() s.delete_list_from_s3(["test_bucket/test.1", "test_bucket/test.2"]) mock_delete.assert_has_calls(calls)
def test_upload_list_to_s3_multiple_without_folder(mock_session, mock_upload): calls = [ mock.call("test.1", "test_bucket", "test.1"), mock.call("test.2", "test_bucket", "test.2"), ] s = locopy.S3() res = s.upload_list_to_s3(["test.1", "test.2"], "test_bucket") assert res == ["test_bucket/test.1", "test_bucket/test.2"] mock_upload.assert_has_calls(calls)
def test_delete_list_from_s3_single_with_folder_and_special_chars( mock_session, mock_delete): calls = [ mock.call("test_bucket", r"test_folder/#$#@$@#$dffksdojfsdf\\\\\/test.1") ] s = locopy.S3() s.delete_list_from_s3( [r"test_bucket/test_folder/#$#@$@#$dffksdojfsdf\\\\\/test.1"]) mock_delete.assert_has_calls(calls)
def test_download_from_s3(mock_session, mock_config): s = locopy.S3() s.download_from_s3(S3_DEFAULT_BUCKET, LOCAL_TEST_FILE, LOCAL_TEST_FILE) s.s3.download_file.assert_called_with(S3_DEFAULT_BUCKET, LOCAL_TEST_FILE, os.path.basename(LOCAL_TEST_FILE), Config=mock_config()) mock_config.side_effect = Exception() with pytest.raises(S3DownloadError): s.download_from_s3(S3_DEFAULT_BUCKET, LOCAL_TEST_FILE, LOCAL_TEST_FILE)
def test_download_list_from_s3_single_with_localpath(mock_session, mock_download): tmp_path = tempfile.TemporaryDirectory() calls = [ mock.call("bucket", "test.1", os.path.join(tmp_path.name, "test.1")) ] s = locopy.S3() res = s.download_list_from_s3(["s3://bucket/test.1"], tmp_path.name) assert res == [os.path.join(tmp_path.name, "test.1")] mock_download.assert_has_calls(calls) tmp_path.cleanup()
def test_redshift_copy_to_redshift_exception( mock_connected, mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: test_redshift = locopy.S3(dbapi=dbapi, **rs_creds) mock_connected.return_value = False with pytest.raises(RedshiftConnectionError): test_redshift._copy_to_redshift("table", "s3bucket") mock_connected.return_value = True (mock_connect.return_value .cursor.return_value .execute.side_effect) = Exception('COPY Exception') with pytest.raises(RedshiftError): test_redshift._copy_to_redshift("table", "s3bucket")
def test_parse_s3_url(mock_session): s = locopy.S3() assert s.parse_s3_url("s3://bucket/folder/file.txt") == ("bucket", "folder/file.txt") assert s.parse_s3_url("s3://bucket/folder/") == ("bucket", "folder/") assert s.parse_s3_url("s3://bucket") == ("bucket", "") assert s.parse_s3_url(r"s3://bucket/!@#$%\\\/file.txt") == ( "bucket", r"!@#$%\\\/file.txt") assert s.parse_s3_url("s3://") == ("", "") assert s.parse_s3_url("bucket/folder/file.txt") == ("bucket", "folder/file.txt") assert s.parse_s3_url("bucket/folder/") == ("bucket", "folder/") assert s.parse_s3_url("bucket") == ("bucket", "") assert s.parse_s3_url(r"bucket/!@#$%\\\/file.txt") == ( "bucket", r"!@#$%\\\/file.txt") assert s.parse_s3_url("") == ("", "")
def test_super_init(mock_session, rs_creds, dbapi): # Test that the super class (locopy) gets the right stuff with mock.patch(dbapi.__name__ + '.connect') as mock_connect: s = locopy.S3(dbapi=dbapi, **rs_creds) assert s.host == "host" assert s.port == "port" assert s.dbname == "dbname" assert s.user == "user" assert s.password == "password" if dbapi.__name__ == 'pg8000': mock_connect.assert_called_with( host='host', user='******', port='port', password='******', database='dbname', ssl=True) else: mock_connect.assert_called_with( host='host', user='******', port='port', password='******', database='dbname', sslmode='require')
def test_super_init_yaml(mock_session, dbapi): # Test that the super class (locopy) gets the right stuff with a yaml with mock.patch(dbapi.__name__ + '.connect') as mock_connect: s = locopy.S3(dbapi=dbapi, config_yaml='MY_YAML_FILE.yml') assert s.host == "host" assert s.port == 1234 assert s.dbname == 'db' assert s.user == "userid" assert s.password == "pass" if dbapi.__name__ == 'pg8000': mock_connect.assert_called_with( host='host', user='******', port=1234, password='******', database='db', ssl=True) else: mock_connect.assert_called_with( host='host', user='******', port=1234, password='******', database='db', sslmode='require')
def test_mock_s3_init_exception(mock_session, dbapi): with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, host='host', port='port', dbname='dbname', user='******') with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, host='host', port='port', dbname='dbname', password='******') with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, host='host', port='port', user='******', password='******') with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, host='host', dbname='dbname', user='******', password='******') with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, port='port', dbname='dbname', user='******', password='******') with pytest.raises(RedshiftCredentialsError): locopy.S3(dbapi=dbapi, port='port', dbname='dbname', user='******', password='******', kms_key=KMS_KEY)
def test_mock_s3_session_profile_with_kms(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: s = locopy.S3(dbapi=dbapi, kms_key=KMS_KEY, **rs_creds) mock_session.assert_called_with(profile_name=None) assert s.kms_key == KMS_KEY
def test_mock_s3_session_with_profile(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: s = locopy.S3(dbapi=dbapi, profile=PROFILE, **rs_creds) mock_session.assert_called_with(profile_name=PROFILE) assert s.kms_key == None
def test_unload_to_s3(mock_session, rs_creds, dbapi): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: r = locopy.S3(dbapi=dbapi, **rs_creds) r._unload_to_s3("query", "path") assert mock_connect.return_value.cursor.return_value.execute.called
def test_unload( mock_session, mock_generate_unload_path, mock_unload_to_s3, mock_unload_generated_files, mock_get_col_names, mock_write, mock_delete_from_s3, mock_remove, rs_creds, dbapi): def reset_mocks(): mock_session.reset_mock() mock_generate_unload_path.reset_mock() mock_unload_generated_files.reset_mock() mock_get_col_names.reset_mock() mock_write.reset_mock() mock_delete_from_s3.reset_mock() mock_remove.reset_mock() with mock.patch(dbapi.__name__ + '.connect') as mock_connect: r = locopy.S3(dbapi=dbapi, **rs_creds) ## Test 1: check that basic export pipeline functions are called mock_unload_generated_files.return_value = ['dummy_file'] mock_get_col_names.return_value = ['dummy_col_name'] mock_generate_unload_path.return_value = "dummy_s3_path" ## ensure nothing is returned when read=False assert r.run_unload( query="query", s3_bucket="s3_bucket", s3_folder=None, export_path=False, delimiter=',', delete_s3_after=False, parallel_off=False) is None assert mock_unload_generated_files.called assert not mock_write.called, ('write_file should only be called ' 'if export_path != False') mock_generate_unload_path.assert_called_with("s3_bucket", None) mock_get_col_names.assert_called_with("query") mock_unload_to_s3.assert_called_with(query="query", s3path="dummy_s3_path", unload_options=["DELIMITER ','"]) ## Test 2: different delimiter reset_mocks() mock_unload_generated_files.return_value = ['dummy_file'] mock_get_col_names.return_value = ['dummy_col_name'] mock_generate_unload_path.return_value = "dummy_s3_path" assert r.run_unload( query="query", s3_bucket="s3_bucket", s3_folder=None, export_path=False, delimiter='|', delete_s3_after=False, parallel_off=True) is None ## check that unload options are modified based on supplied args mock_unload_to_s3.assert_called_with( query="query", s3path="dummy_s3_path", unload_options=["DELIMITER '|'", "PARALLEL OFF"]) ## Test 3: ensure exception is raised when no column names are retrieved reset_mocks() mock_unload_generated_files.return_value = ['dummy_file'] mock_generate_unload_path.return_value = "dummy_s3_path" mock_get_col_names.return_value = None with pytest.raises(Exception): r.run_unload("query", "s3_bucket", None) ## Test 4: ensure exception is raised when no files are returned reset_mocks() mock_generate_unload_path.return_value = "dummy_s3_path" mock_get_col_names.return_value = ['dummy_col_name'] mock_unload_generated_files.return_value = None with pytest.raises(Exception): r.run_unload("query", "s3_bucket", None) ## Test 5: ensure file writing is initiated when export_path is supplied reset_mocks() mock_get_col_names.return_value = ['dummy_col_name'] mock_generate_unload_path.return_value = "dummy_s3_path" mock_unload_generated_files.return_value = ['/dummy_file'] with mock.patch("locopy.s3.open") as mock_open: r.run_unload(query="query", s3_bucket="s3_bucket", s3_folder=None, export_path="my_output.csv", delimiter=',', delete_s3_after=True, parallel_off=False) mock_open.assert_called_with('my_output.csv', 'ab') assert mock_write.called assert mock_delete_from_s3.called_with('s3_bucket', 'my_output.csv')
def test_run_copy( mock_split_file, mock_compress_file, mock_session, mock_s3_delete, mock_s3_upload, mock_rs_copy, mock_remove, rs_creds, dbapi): def reset_mocks(): mock_split_file.reset_mock() mock_compress_file.reset_mock() mock_s3_upload.reset_mock() mock_s3_delete.reset_mock() mock_rs_copy.reset_mock() mock_remove.reset_mock() with mock.patch(dbapi.__name__ + '.connect') as mock_connect: r = locopy.S3(dbapi=dbapi, **rs_creds) expected_calls_no_folder = [ mock.call('/path/local_file.0', 's3_bucket', 'local_file.0'), mock.call('/path/local_file.1', 's3_bucket', 'local_file.1'), mock.call('/path/local_file.2', 's3_bucket', 'local_file.2')] expected_calls_no_folder_gzip = [ mock.call('/path/local_file.0.gz', 's3_bucket', 'local_file.0.gz'), mock.call('/path/local_file.1.gz', 's3_bucket', 'local_file.1.gz'), mock.call('/path/local_file.2.gz', 's3_bucket', 'local_file.2.gz')] expected_calls_folder = [ mock.call('/path/local_file.0', 's3_bucket', 'test/local_file.0'), mock.call('/path/local_file.1', 's3_bucket', 'test/local_file.1'), mock.call('/path/local_file.2', 's3_bucket', 'test/local_file.2')] expected_calls_folder_gzip = [ mock.call('/path/local_file.0.gz', 's3_bucket', 'test/local_file.0.gz'), mock.call('/path/local_file.1.gz', 's3_bucket', 'test/local_file.1.gz'), mock.call( '/path/local_file.2.gz', 's3_bucket', 'test/local_file.2.gz')] r.run_copy( '/path/local_file.txt', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION']) # assert assert not mock_split_file.called mock_compress_file.assert_called_with( '/path/local_file.txt', '/path/local_file.txt.gz') mock_remove.assert_called_with('/path/local_file.txt') mock_s3_upload.assert_called_with( '/path/local_file.txt.gz', 's3_bucket', 'local_file.txt.gz') mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/local_file', '|', copy_options=['SOME OPTION', 'GZIP']) assert not mock_s3_delete.called, 'Only delete when explicit' reset_mocks() mock_split_file.return_value = ['/path/local_file.0', '/path/local_file.1', '/path/local_file.2'] r.run_copy( '/path/local_file', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION'], splits=3, delete_s3_after=True) # assert mock_split_file.assert_called_with( '/path/local_file', '/path/local_file', splits=3) mock_compress_file.assert_called_with( '/path/local_file.2', '/path/local_file.2.gz') mock_remove.assert_called_with('/path/local_file.2') mock_s3_upload.assert_has_calls(expected_calls_no_folder_gzip) mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/local_file', '|', copy_options=['SOME OPTION', 'GZIP']) assert mock_s3_delete.called_with('s3_bucket', 'local_file.0.gz') assert mock_s3_delete.called_with('s3_bucket', 'local_file.1.gz') assert mock_s3_delete.called_with('s3_bucket', 'local_file.2.gz') reset_mocks() r.run_copy( '/path/local_file', 's3_bucket', 'table_name', delim=",", copy_options=['SOME OPTION'], compress=False) # assert assert not mock_split_file.called assert not mock_compress_file.called assert not mock_remove.called mock_s3_upload.assert_called_with( '/path/local_file', 's3_bucket', 'local_file') mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/local_file', ',', copy_options=['SOME OPTION']) assert not mock_s3_delete.called, 'Only delete when explicit' reset_mocks() mock_split_file.return_value = ['/path/local_file.0', '/path/local_file.1', '/path/local_file.2'] r.run_copy( '/path/local_file', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION'], splits=3, compress=False) # assert mock_split_file.assert_called_with( '/path/local_file', '/path/local_file', splits=3) assert not mock_compress_file.called assert not mock_remove.called mock_s3_upload.assert_has_calls(expected_calls_no_folder) mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/local_file', '|', copy_options=['SOME OPTION']) assert not mock_s3_delete.called # with a s3_folder included and no splits reset_mocks() mock_split_file.return_value = ['/path/local_file.0', '/path/local_file.1', '/path/local_file.2'] r.run_copy( '/path/local_file.txt', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION'], compress=False, s3_folder='test') # assert assert not mock_split_file.called assert not mock_compress_file.called assert not mock_remove.called mock_s3_upload.assert_called_with( '/path/local_file.txt', 's3_bucket', 'test/local_file.txt') mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/test/local_file', '|', copy_options=['SOME OPTION']) assert not mock_s3_delete.called # with a s3_folder included and splits reset_mocks() r.run_copy( '/path/local_file', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION'], splits=3, compress=False, s3_folder='test', delete_s3_after=True) # assert mock_split_file.assert_called_with( '/path/local_file', '/path/local_file', splits=3) assert not mock_compress_file.called assert not mock_remove.called mock_s3_upload.assert_has_calls(expected_calls_folder) mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/test/local_file', '|', copy_options=['SOME OPTION']) assert mock_s3_delete.called_with('s3_bucket', 'test/local_file.0') assert mock_s3_delete.called_with('s3_bucket', 'test/local_file.1') assert mock_s3_delete.called_with('s3_bucket', 'test/local_file.2') # with a s3_folder included , splits, and gzip reset_mocks() r.run_copy( '/path/local_file', 's3_bucket', 'table_name', delim="|", copy_options=['SOME OPTION'], splits=3, s3_folder='test') # assert mock_split_file.assert_called_with( '/path/local_file', '/path/local_file', splits=3) assert mock_compress_file.called assert mock_remove.called mock_s3_upload.assert_has_calls(expected_calls_folder_gzip) mock_rs_copy.assert_called_with( 'table_name', 's3://s3_bucket/test/local_file', '|', copy_options=['SOME OPTION', 'GZIP']) assert not mock_s3_delete.called
def test_mock_s3_set_session_exception(mock_session, dbapi): mock_session.side_effect = Exception("_set_session exception") with pytest.raises(S3Error): with mock.patch(dbapi.__name__ + '.connect') as mock_connect: locopy.S3(dbapi=dbapi, host='host', port='port', dbname='dbname', user='******', password='******')