def test_s3_file_manager_resource_with_profile(): resource_config = { "use_unsigned_session": True, "region_name": "us-west-1", "endpoint_url": "http://alternate-s3-host.io", "s3_bucket": "some-bucket", "s3_prefix": "some-prefix", "profile_name": "some-profile", } @op(required_resource_keys={"file_manager"}) def test_op(context): # placeholder function to test resource initialization return context.log.info("return from test_solid") with pytest.raises(DagsterResourceFunctionError) as e: context = build_op_context(resources={ "file_manager": configured(s3_file_manager)(resource_config) }, ) test_op(context) assert isinstance(e.value.user_exception, exceptions.ProfileNotFound) assert str(e.value.user_exception ) == "The config profile (some-profile) could not be found"
def test_pagerduty_resource(): @op(required_resource_keys={"pagerduty"}) def pagerduty_op(context): assert context.resources.pagerduty with responses.RequestsMock() as rsps: rsps.add( rsps.POST, "https://events.pagerduty.com/v2/enqueue/", status=202, json={"status": "success", "message": "Event processed", "dedup_key": "foobar"}, ) context.resources.pagerduty.EventV2_create( summary="PING OK - Packet loss = 0%, RTA = 1.41 ms Host 'acme-andromeda-sv1-c40" ":: 179.21.24.50' is DOWN", source="prod05.theseus.acme-widgets.com", severity="error", event_action="trigger", dedup_key="foobar", timestamp="2015-07-17T08:42:58.315+0000", component="mysql", group="prod-datapipe", event_class="High CPU", custom_details={"ping time": "1500ms", "load avg": 0.75}, ) return True with build_op_context( resources={ "pagerduty": pagerduty_resource.configured( {"routing_key": "0123456789abcdef0123456789abcdef"} ) } ) as context: assert pagerduty_op(context)
def test_adls_file_manager_resource(MockADLS2FileManager, MockADLS2Resource): did_it_run = dict(it_ran=False) resource_config = { "storage_account": "some-storage-account", "credential": { "key": "some-key", }, "adls2_file_system": "some-file-system", "adls2_prefix": "some-prefix", } @op(required_resource_keys={"file_manager"}) def test_solid(context): # test that we got back a ADLS2FileManager assert context.resources.file_manager == MockADLS2FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockADLS2FileManager.assert_called_once_with( adls2_client=MockADLS2Resource.return_value.adls2_client, file_system=resource_config["adls2_file_system"], prefix=resource_config["adls2_prefix"], ) MockADLS2Resource.assert_called_once_with( resource_config["storage_account"], resource_config["credential"]["key"]) did_it_run["it_ran"] = True context = build_op_context(resources={ "file_manager": configured(adls2_file_manager)(resource_config) }, ) test_solid(context) assert did_it_run["it_ran"]
def test_op_config(): @op(config_schema={"conf_str": str}) def my_op(context): assert context.op_config == {"conf_str": "foo"} my_op(build_op_context(config={"conf_str": "foo"})) @graph def basic(): my_op() result = basic.execute_in_process( run_config={"ops": { "my_op": { "config": { "conf_str": "foo" } } }}) assert result.success result = basic.to_job(config={ "ops": { "my_op": { "config": { "conf_str": "foo" } } } }).execute_in_process() assert result.success
def test_database_resource(): class BasicDatabase: def execute_query(self, query): pass op_requires_resources( build_op_context(resources={"database": BasicDatabase()}))
def test_seed_op(conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument dbt_resource = dbt_cli_resource.configured( {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir} ) dbt_result = dbt_seed_op(build_op_context(resources={"dbt": dbt_resource})) assert len(dbt_result.result["results"]) == 1
def test_another_new_op(capsys): assert another_configured_example.name == "another_configured_example" context = build_op_context(config=6) another_configured_example(context) captured = capsys.readouterr() assert captured.err.count("wheaties") == 6
def test_invoking_asset_with_context(): @asset def asset_with_context(context, arg1): assert isinstance(context, OpExecutionContext) return arg1 ctx = build_op_context() out = asset_with_context(ctx, 1) assert out == 1
def test_partition_config_ops_compile_and_execute(): ops = [ my_partitioned_asset_op, ] for op in ops: context = build_op_context(config={"date": "2020-01-01"}) op(context)
def test_gcs_resource(): @op(required_resource_keys={"gcs"}) def gcs_op(context): assert context.resources.gcs assert context.resources.gcs.project == PROJECT_ID return 1 context = build_op_context(resources={"gcs": gcs_resource.configured({"project": PROJECT_ID})}) assert gcs_op(context)
def test_op(mock_athena_client): # pylint: disable=unused-argument from dagster import build_op_context, op from dagster_aws.athena import fake_athena_resource @op(required_resource_keys={"athena"}) def example_athena_op(context): return context.resources.athena.execute_query("SELECT 1", fetch_results=True) context = build_op_context(resources={"athena": fake_athena_resource}) assert example_athena_op(context) == [("1",)]
def test_add_output_metadata_after_output(): @op def the_op(context): yield Output(value=1) context.add_output_metadata({"foo": "bar"}) with pytest.raises( DagsterInvariantViolationError, match= "In op 'the_op', attempted to log output metadata for output 'result' which has already been yielded. Metadata must be logged before the output is yielded.", ): list(the_op(build_op_context()))
def test_log_metadata_after_dynamic_output(): @op(out=DynamicOut()) def the_op(context): yield DynamicOutput(1, mapping_key="one") context.add_output_metadata({"foo": "bar"}, mapping_key="one") with pytest.raises( DagsterInvariantViolationError, match= "In op 'the_op', attempted to log output metadata for output 'result' with mapping_key 'one' which has already been yielded. Metadata must be logged before the output is yielded.", ): list(the_op(build_op_context()))
def test_run_op( dbt_seed, conn_string, test_project_dir, dbt_config_dir ): # pylint: disable=unused-argument dbt_resource = dbt_cli_resource.configured( {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir} ) dbt_results = list(dbt_run_op(build_op_context(resources={"dbt": dbt_resource}))) # includes asset materializations assert len(dbt_results) == 5 assert len(dbt_results[-1].value.result["results"]) == 4
def test_context_manager_resource(): event_list = [] @resource @contextmanager def cm_resource(): try: event_list.append("foo") yield "foo" finally: event_list.append("finally") @op(required_resource_keys={"cm"}) def basic(context): event_list.append("compute") assert context.resources.cm == "foo" with build_op_context(resources={"cm": cm_resource}) as context: basic(context) assert event_list == ["foo", "compute", "finally"] # Ensures that we teardown after compute with pytest.raises( DagsterInvariantViolationError, match= "At least one provided resource is a generator, but attempting to access resources " "outside of context manager scope.", ): basic(build_op_context(resources={"cm": cm_resource})) @graph def call_basic(): basic() event_list = [] assert call_basic.execute_in_process(resources={"cm": cm_resource}).success assert event_list == ["foo", "compute", "finally"]
def test_add_output_metadata(): @op(out={"out1": Out(), "out2": Out()}) def the_op(context): context.add_output_metadata({"foo": "bar"}, output_name="out1") yield Output(value=1, output_name="out1") context.add_output_metadata({"bar": "baz"}, output_name="out2") yield Output(value=2, output_name="out2") context = build_op_context() events = list(the_op(context)) assert len(events) == 2 assert context.get_output_metadata("out1") == {"foo": "bar"} assert context.get_output_metadata("out2") == {"bar": "baz"}
def test_s3_file_manager_resource(MockS3FileManager, mock_boto3_resource): did_it_run = dict(it_ran=False) resource_config = { "use_unsigned_session": True, "region_name": "us-west-1", "endpoint_url": "http://alternate-s3-host.io", "s3_bucket": "some-bucket", "s3_prefix": "some-prefix", } mock_s3_session = mock_boto3_resource.return_value.meta.client @op(required_resource_keys={"file_manager"}) def test_op(context): # test that we got back a S3FileManager assert context.resources.file_manager == MockS3FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockS3FileManager.assert_called_once_with( s3_session=mock_s3_session, s3_bucket=resource_config["s3_bucket"], s3_base_key=resource_config["s3_prefix"], ) _, call_kwargs = mock_boto3_resource.call_args mock_boto3_resource.assert_called_once_with( "s3", region_name=resource_config["region_name"], endpoint_url=resource_config["endpoint_url"], use_ssl=True, config=call_kwargs["config"], ) assert call_kwargs["config"].retries["max_attempts"] == 5 did_it_run["it_ran"] = True context = build_op_context( resources={ "file_manager": configured(s3_file_manager)(resource_config) }) test_op(context) assert did_it_run["it_ran"]
def test_logged_user_events(): @op def logs_events(context): context.log_event(AssetMaterialization("first")) context.log_event(Materialization("second")) context.log_event(ExpectationResult(success=True)) context.log_event(AssetObservation("fourth")) yield AssetMaterialization("fifth") yield Output("blah") context = build_op_context() list(logs_events(context)) assert [type(event) for event in context.get_events()] == [ AssetMaterialization, Materialization, ExpectationResult, AssetObservation, ]
def test_hello(): """ This is an example test for a Dagster op. For hints on how to test your Dagster ops, see our documentation tutorial on Testing: https://docs.dagster.io/tutorial/testable """ with build_op_context( resources={ "partition_bounds": partition_bounds.configured({ "start": "2020-12-30 00:00:00", "end": "2020-12-30 01:00:00", }), "hn_client": hn_snapshot_client, }) as context: id_range_for_time(context)
def test_missing_column(): def fetch_item_by_id(_): return { "id": 5, "parent": 1.0, "time": 5, "type": "a", "by": "a", "text": "a", "kids": ["a", "b"], "title": "a", "descendants": 1.0, "url": "a", } client = MagicMock(fetch_item_by_id=fetch_item_by_id) context = build_op_context(resources={"hn_client": client}) table = download_items(context, id_range=(0, 1)).value assert "score" in table.columns
def test_log_metadata_multiple_dynamic_outputs(): @op(out={"out1": DynamicOut(), "out2": DynamicOut()}) def the_op(context): context.add_output_metadata({"one": "one"}, output_name="out1", mapping_key="one") yield DynamicOutput(value=1, output_name="out1", mapping_key="one") context.add_output_metadata({"two": "two"}, output_name="out1", mapping_key="two") context.add_output_metadata({"three": "three"}, output_name="out2", mapping_key="three") yield DynamicOutput(value=2, output_name="out1", mapping_key="two") yield DynamicOutput(value=3, output_name="out2", mapping_key="three") context.add_output_metadata({"four": "four"}, output_name="out2", mapping_key="four") yield DynamicOutput(value=4, output_name="out2", mapping_key="four") context = build_op_context() events = list(the_op(context)) assert len(events) == 4 assert context.get_output_metadata("out1", mapping_key="one") == { "one": "one" } assert context.get_output_metadata("out1", mapping_key="two") == { "two": "two" } assert context.get_output_metadata("out2", mapping_key="three") == { "three": "three" } assert context.get_output_metadata("out2", mapping_key="four") == { "four": "four" }
def test_cm_resource_op(): with build_op_context(resources={"db_connection": db_connection}) as context: use_db_connection(context)
def test_op_with_context(): context = build_op_context(resources={"foo": "bar"}) assert op_requires_foo(context) == "found bar"
def test_op_resource_def(): context = build_op_context( resources={"foo": my_foo_resource.configured({"my_str": "bar"})}) assert op_requires_foo(context) == "found bar"
def test_hey(): context = build_op_context() assert hey(context, "Yo") is None
def test_datadog_resource( event, gauge, increment, decrement, histogram, distribution, statsd_set, service_check, timed, timing, ): @op(required_resource_keys={"datadog"}) def datadog_op(context): assert context.resources.datadog # event context.resources.datadog.event("Man down!", "This server needs assistance.") event.assert_called_with("Man down!", "This server needs assistance.") # gauge context.resources.datadog.gauge("users.online", 1001, tags=["protocol:http"]) gauge.assert_called_with("users.online", 1001, tags=["protocol:http"]) # increment context.resources.datadog.increment("page.views") increment.assert_called_with("page.views") # decrement context.resources.datadog.decrement("page.views") decrement.assert_called_with("page.views") context.resources.datadog.histogram("album.photo.count", 26, tags=["gender:female"]) histogram.assert_called_with("album.photo.count", 26, tags=["gender:female"]) context.resources.datadog.distribution("album.photo.count", 26, tags=["color:blue"]) distribution.assert_called_with("album.photo.count", 26, tags=["color:blue"]) context.resources.datadog.set("visitors.uniques", 999, tags=["browser:ie"]) statsd_set.assert_called_with("visitors.uniques", 999, tags=["browser:ie"]) context.resources.datadog.service_check("svc.check_name", context.resources.datadog.WARNING) service_check.assert_called_with("svc.check_name", context.resources.datadog.WARNING) context.resources.datadog.timing("query.response.time", 1234) timing.assert_called_with("query.response.time", 1234) @context.resources.datadog.timed("run_fn") def run_fn(): pass run_fn() timed.assert_called_with("run_fn") return True context = build_op_context( resources={ "datadog": datadog_resource.configured({"api_key": "NOT_USED", "app_key": "NOT_USED"}) } ) assert datadog_op(context)
def test_uses_context(): context = build_op_context(resources={"foo": "bar"}) result = uses_context(context) assert result == "bar"
def test_my_configurable_op(): my_configurable_op( build_op_context(config={"api_endpoint": "https://localhost:3000"}))
def test_context_op(): context_op(build_op_context(config={"name": "my_name"}))
def test_ops_compile_and_execute(): observation_op(None) observes_dataset_op(None) context = build_op_context(config={"date": "2020-01-01"}) partitioned_dataset_op(context)