def test_copy_into_operation_failure(mock_redshift): expected = [ RedshiftOperation( extracted_schema={ "s3://test/test.json.*": [ Column( dataset_name="s3://test/test.json", name="*", alias="s3://test/test.json.*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "copy into FAIL from s3://test/test.json CREDENTIALS = (AWS_KEY_ID = 'test' AWS_SECRET_KEY = 'test');", query_id=None, success=False, op_type=DbndTargetOperationType.read, error="Exception Mock", source_name="s3://test/test.json", target_name="FAIL", ), RedshiftOperation( extracted_schema={ "FAIL.*": [ Column( dataset_name="FAIL", name="*", alias="FAIL.*", is_file=False, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "copy into FAIL from s3://test/test.json CREDENTIALS = (AWS_KEY_ID = 'test' AWS_SECRET_KEY = 'test');", query_id=None, success=False, op_type=DbndTargetOperationType.write, error="Exception Mock", source_name="s3://test/test.json", target_name="FAIL", ), ] with pytest.raises(Exception) as e: run_tracker_custom_query(COPY_INTO_TABLE_FAIL_QUERY, expected) assert str(e.value) == ERROR_MESSAGE
def test_copy_into_s3_graceful_failure(mock_redshift): expected = [ RedshiftOperation( extracted_schema={ "s3://my/bucket/file.csv.*": [ Column( dataset_name="s3://my/bucket/file.csv", name="*", alias="s3://my/bucket/file.csv.*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"schema_fail\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.read, error=None, source_name="s3://my/bucket/file.csv", target_name="schema_fail", ), RedshiftOperation( extracted_schema={ '"schema_fail".*': [ Column( dataset_name='"schema_fail"', name="*", alias='"schema_fail".*', is_file=False, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"schema_fail\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.write, error=None, source_name="s3://my/bucket/file.csv", target_name="schema_fail", ), ] return run_tracker_custom_query( COPY_INTO_TABLE_FROM_S3_FILE_GRACEFUL_FAIL_QUERY, expected)
def test_copy_into_failure(mock_snowflake): expected = [ SqlOperation( extracted_schema={ "s3://test/test.json.*": [ Column( dataset_name="s3://test/test.json", alias="s3://test/test.json.*", name="*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=0, query=COPY_INTO_TABLE_FAIL_QUERY, query_id=SFQID, success=False, op_type=DbndTargetOperationType.read, error=ERROR_MESSAGE, ), SqlOperation( extracted_schema={ "FAIL.*": [ Column( dataset_name="FAIL", alias="FAIL.*", name="*", is_file=False, is_stage=False, ) ] }, dtypes=None, records_count=0, query=COPY_INTO_TABLE_FAIL_QUERY, query_id=SFQID, success=False, op_type=DbndTargetOperationType.write, error=ERROR_MESSAGE, ), ] with pytest.raises(Exception) as e: run_tracker_custom_query(mock_snowflake, COPY_INTO_TABLE_FAIL_QUERY, expected) assert str(e.value) == ERROR_MESSAGE
def generate_sql_operation_mock(op_type, table, is_file, is_stage): return SqlOperation( extracted_schema={ table: [ Column( dataset_name=table, alias=table, name="*", is_file=is_file, is_stage=is_stage, ) ] }, dtypes=None, records_count=1, query="", query_id=1, success=True, op_type=op_type, error=None, )
def test_multiple_psycopg2_connections(mock_redshift): expected1 = [ RedshiftOperation( extracted_schema={ "s3://my/bucket/file.csv.*": [ Column( dataset_name="s3://my/bucket/file.csv", name="*", alias="s3://my/bucket/file.csv.*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"MY_TABLE\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.read, error=None, source_name="s3://my/bucket/file.csv", target_name="MY_TABLE", ), RedshiftOperation( extracted_schema={ '"MY_TABLE".*': [ Column( dataset_name='"MY_TABLE"', name="*", alias='"MY_TABLE".*', is_file=False, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"MY_TABLE\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.write, error=None, source_name="s3://my/bucket/file.csv", target_name="MY_TABLE", ), ] expected2 = [ RedshiftOperation( extracted_schema={ "s3://my/bucket/file2.csv.*": [ Column( dataset_name="s3://my/bucket/file2.csv", name="*", alias="s3://my/bucket/file2.csv.*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"MY_TABLE\" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.read, error=None, source_name="s3://my/bucket/file2.csv", target_name="MY_TABLE", ), RedshiftOperation( extracted_schema={ '"MY_TABLE".*': [ Column( dataset_name='"MY_TABLE"', name="*", alias='"MY_TABLE".*', is_file=False, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query= "COPY \"MY_TABLE\" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv", query_id=None, success=True, op_type=DbndTargetOperationType.write, error=None, source_name="s3://my/bucket/file2.csv", target_name="MY_TABLE", ), ] COPY_INTO_TABLE_FROM_S3_FILE_QUERY_2 = """COPY "MY_TABLE" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv""" redshift_tracker = RedshiftTracker() with redshift_tracker as tracker: with _redshift_connect() as con1, _redshift_connect() as con2: c1 = con1.cursor() c2 = con2.cursor() try: c1.execute(COPY_INTO_TABLE_FROM_S3_FILE_QUERY) c2.execute(COPY_INTO_TABLE_FROM_S3_FILE_QUERY_2) finally: assert compare_eq_operations( redshift_tracker.connections.get_operations(c1), expected1) assert compare_eq_operations( redshift_tracker.connections.get_operations(c2), expected2) # flush operations assert redshift_tracker.connections.get_operations(c1) == [] assert redshift_tracker.connections.get_operations(c2) == []
def parse_first_query(sqlquery): return sqlparse.parse(sqlquery)[0] @pytest.mark.parametrize( "sqlquery, expected", [( """ copy into JSON_TABLE from s3://bucket/file.json CREDENTIALS = (AWS_KEY_ID = '12345' AWS_SECRET_KEY = '12345') """, { DbndTargetOperationType.read: { "s3://bucket/file.json.*": [ Column( dataset_name="s3://bucket/file.json", name="*", alias="s3://bucket/file.json.*", is_file=True, ) ] }, DbndTargetOperationType.write: { "JSON_TABLE.*": [ Column( dataset_name="JSON_TABLE", name="*", alias="JSON_TABLE.*", is_file=False, is_stage=False, ) ] },
) @pytest.mark.parametrize( "query, expected", [ ( COPY_INTO_TABLE_FROM_S3_FILE_QUERY, [ SqlOperation( extracted_schema={ "s3://test/test.json.*": [ Column( dataset_name="s3://test/test.json", alias="s3://test/test.json.*", name="*", is_file=True, is_stage=False, ) ] }, dtypes=None, records_count=NUMBER_OF_ROWS_INSERTED, query=COPY_INTO_TABLE_FROM_S3_FILE_QUERY, query_id=SFQID, success=True, op_type=DbndTargetOperationType.read, error=None, ), SqlOperation( extracted_schema={