def test_trigger_connection_fail(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000" })) with pytest.raises(Failure, match="Exceeded max number of retries"): ab_resource.sync_and_poll("some_connection")
def test_trigger_connection_fail(): ab_resource = airbyte_resource( build_init_resource_context( config={"host": "some_host", "port": "8000", "request_max_retries": 1} ) ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", status=500 ) with pytest.raises(Failure, match="Exceeded max number of retries"): ab_resource.sync_and_poll("some_connection")
def test_sync_and_poll(state): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json=get_sample_connection_json(), status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={"job": { "id": 1, "status": state }}, status=200, ) if state == AirbyteState.ERROR: with pytest.raises(Failure, match="Job failed"): ab_resource.sync_and_poll("some_connection", 0) elif state == AirbyteState.CANCELLED: with pytest.raises(Failure, match="Job was cancelled"): ab_resource.sync_and_poll("some_connection", 0) elif state == "unrecognized": with pytest.raises(Failure, match="unexpected state"): ab_resource.sync_and_poll("some_connection", 0) else: result = ab_resource.sync_and_poll("some_connection", 0) assert result == AirbyteOutput( job_details={"job": { "id": 1, "status": state }}, connection_details=get_sample_connection_json(), )
def test_sync_and_poll_timeout(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json={}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={"job": { "id": 1, "status": "pending" }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={"job": { "id": 1, "status": "running" }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={"job": { "id": 1, "status": "running" }}, status=200, ) poll_wait_second = 2 timeout = 1 with pytest.raises(Failure, match="Timeout: Airbyte job"): ab_resource.sync_and_poll("some_connection", poll_wait_second, timeout)
def test_get_job_status_bad_out_fail(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json=None, status=204, ) with pytest.raises(check.CheckError): ab_resource.get_job_status("some_connection")
def test_trigger_connection(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) resp = ab_resource.start_sync("some_connection") assert resp == {"job": {"id": 1}}
def test_assets(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json=get_sample_connection_json(), status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json=get_sample_job_json(), status=200, ) airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None) materializations = list(generate_materializations(airbyte_output, [])) assert len(materializations) == 3 assert MetadataEntry("bytesEmitted", value=1234) in materializations[0].metadata_entries assert MetadataEntry("recordsCommitted", value=4321) in materializations[0].metadata_entries
def test_logging_multi_attempts(capsys): def _get_attempt(ls): return {"logs": {"logLines": ls}} ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json={}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={"job": { "id": 1, "status": "pending" }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": "running" }, "attempts": [_get_attempt(ls) for ls in [["log1a"]]], }, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": "running" }, "attempts": [_get_attempt(ls) for ls in [["log1a", "log1b"]]], }, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": "running" }, "attempts": [ _get_attempt(ls) for ls in [["log1a", "log1b", "log1c"], ["log2a", "log2b"]] ], }, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": AirbyteState.SUCCEEDED }, "attempts": [ _get_attempt(ls) for ls in [["log1a", "log1b", "log1c"], ["log2a", "log2b"]] ], }, status=200, ) ab_resource.sync_and_poll("some_connection", 0, None) captured = capsys.readouterr() assert captured.out == "\n".join( ["log1a", "log1b", "log1c", "log2a", "log2b"]) + "\n"
def test_assets(schema_prefix): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) destination_tables = ["foo", "bar"] if schema_prefix: destination_tables = [schema_prefix + t for t in destination_tables] ab_assets = build_airbyte_assets( "12345", destination_tables=destination_tables, asset_key_prefix=["some", "prefix"], ) assert ab_assets[0].asset_keys == { AssetKey(["some", "prefix", t]) for t in destination_tables } assert len(ab_assets[0].op.output_defs) == 2 responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json=get_sample_connection_json(prefix=schema_prefix), status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json=get_sample_job_json(schema_prefix=schema_prefix), status=200, ) ab_job = build_assets_job( "ab_job", ab_assets, resource_defs={ "airbyte": airbyte_resource.configured({ "host": "some_host", "port": "8000", }) }, ) res = ab_job.execute_in_process() materializations = [ event.event_specific_data.materialization for event in res.events_for_node("airbyte_sync_12345") if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 3 assert {m.asset_key for m in materializations} == { AssetKey(["some", "prefix", schema_prefix + "foo"]), AssetKey(["some", "prefix", schema_prefix + "bar"]), AssetKey(["some", "prefix", schema_prefix + "baz"]), } assert MetadataEntry("bytesEmitted", value=1234) in materializations[0].metadata_entries assert MetadataEntry("recordsCommitted", value=4321) in materializations[0].metadata_entries assert (MetadataEntry( "schema", value=TableSchema(columns=[ TableColumn(name="a", type="str"), TableColumn(name="b", type="int"), ]), ) in materializations[0].metadata_entries)
def test_assets(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) ab_assets = build_airbyte_assets("12345", ["foo", "bar"], asset_key_prefix=["some", "prefix"]) assert len(ab_assets[0].op.output_defs) == 2 responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json={ "name": "xyz", "syncCatalog": { "streams": [ { "stream": { "name": "foo", "jsonSchema": { "properties": { "a": { "type": "str" }, "b": { "type": "int" } } }, }, "config": { "selected": True }, }, { "stream": { "name": "bar", "jsonSchema": { "properties": { "c": { "type": "str" }, } }, }, "config": { "selected": True }, }, { "stream": { "name": "baz", "jsonSchema": { "properties": { "d": { "type": "str" }, } }, }, "config": { "selected": True }, }, ] }, }, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": AirbyteState.SUCCEEDED }, "attempts": [{ "attempt": { "streamStats": [ { "streamName": "foo", "stats": { "bytesEmitted": 1234, "recordsCommitted": 4321, }, }, { "streamName": "bar", "stats": { "bytesEmitted": 1234, "recordsCommitted": 4321, }, }, { "streamName": "baz", "stats": { "bytesEmitted": 1111, "recordsCommitted": 1111, }, }, ] } }], }, status=200, ) ab_job = build_assets_job( "ab_job", ab_assets, resource_defs={ "airbyte": airbyte_resource.configured({ "host": "some_host", "port": "8000", }) }, ) res = ab_job.execute_in_process() materializations = [ event for event in res.events_for_node("airbyte_sync_12345") if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 3 assert (MetadataEntry.text("a,b", "columns") in materializations[0]. event_specific_data.materialization.metadata_entries) assert (MetadataEntry.int(1234, "bytesEmitted") in materializations[0]. event_specific_data.materialization.metadata_entries) assert (MetadataEntry.int(4321, "recordsCommitted") in materializations[0]. event_specific_data.materialization.metadata_entries)
def test_assets(): ab_resource = airbyte_resource( build_init_resource_context(config={ "host": "some_host", "port": "8000", })) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/get", json={ "name": "xyz", "syncCatalog": { "streams": [ { "stream": { "name": "foo", "jsonSchema": { "properties": { "a": { "type": "str" }, "b": { "type": "int" } } }, }, "config": { "selected": True }, }, { "stream": { "name": "bar", "jsonSchema": { "properties": { "c": { "type": "str" }, } }, }, "config": { "selected": True }, }, { "stream": { "name": "baz", "jsonSchema": { "properties": { "d": { "type": "str" }, } }, }, "config": { "selected": False }, }, ] }, }, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", json={"job": { "id": 1 }}, status=200, ) responses.add( method=responses.POST, url=ab_resource.api_base_url + "/jobs/get", json={ "job": { "id": 1, "status": AirbyteState.SUCCEEDED }, "attempts": [{ "attempt": { "streamStats": [ { "streamName": "foo", "stats": { "bytesEmitted": 1234, "recordsCommitted": 4321, }, }, { "streamName": "bar", "stats": { "bytesEmitted": 1234, "recordsCommitted": 4321, }, }, ] } }], }, status=200, ) airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None) materializations = list(generate_materializations(airbyte_output, [])) assert len(materializations) == 2 assert MetadataEntry.text( "a,b", "columns") in materializations[0].metadata_entries assert MetadataEntry.int( 1234, "bytesEmitted") in materializations[0].metadata_entries assert MetadataEntry.int( 4321, "recordsCommitted") in materializations[0].metadata_entries