def test_execute_display_command(): with tempfile.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) run_config = { "solids": { "create_string_1_asset": {"config": {"input_str": "apple"}}, "take_string_1_asset": {"config": {"input_str": "apple"}}, }, "resources": {"object_manager": {"config": {"base_dir": temp_dir}}}, } # write run config to temp file # file is temp because intermediate storage directory is temporary with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f: f.write(yaml.dump(run_config)) kwargs = { "config": (os.path.join(temp_dir, "pipeline_config.yaml"),), "pipeline": "asset_pipeline", "python_file": file_relative_path( __file__, "../../core_tests/execution_tests/memoized_dev_loop_pipeline.py" ), "tags": '{"dagster/is_memoized_run": "true"}', } with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output # execute the pipeline once so that addresses have been populated. result = execute_pipeline( asset_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output
def get_ephemeral_instance(temp_dir): run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_launcher=DefaultRunLauncher(), run_coordinator=DefaultRunCoordinator(), ) return instance
def broken_compute_log_manager_instance(fail_on_setup=False, fail_on_teardown=False): with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): yield DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=SqliteRunStorage.from_local(temp_dir), event_storage=SqliteEventLogStorage(temp_dir), compute_log_manager=BrokenComputeLogManager( fail_on_setup=fail_on_setup, fail_on_teardown=fail_on_teardown ), run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), )
def test_fs_stores(): @pipeline def simple(): @solid def easy(context): context.log.info("easy") return "easy" easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), settings={"telemetry": { "enabled": False }}, ) result = execute_pipeline(simple, instance=instance) assert run_store.has_run(result.run_id) assert run_store.get_run_by_id( result.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_store.get_logs_for_run(result.run_id) if event.is_dagster_event ] stats = event_store.get_stats_for_run(result.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket): @op def easy(context): context.log.info("easy") @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) PREFIX = "my_prefix" manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix=PREFIX, skip_empty_files=True) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) stderr_object = mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err" ).get() assert stderr_object with pytest.raises(ClientError): # stdout is not uploaded because we do not print anything to stdout mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out" ).get()
def test_compute_log_manager(mock_s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager( bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key="{prefix}/storage/{run_id}/compute_logs/easy.err".format( prefix="my_prefix", run_id=result.run_id ), ) stderr_s3 = six.ensure_str(s3_object.get()["Body"].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager_with_envvar(gcs_bucket): @job def simple(): @op def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with open(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")) as f: with tempfile.TemporaryDirectory() as temp_dir: with environ({"ENV_VAR": f.read(), "DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = GCSComputeLogManager( bucket=gcs_bucket, prefix="my_prefix", local_dir=temp_dir, json_credentials_envvar="ENV_VAR", ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) compute_steps = [ event.step_key for event in result.all_node_events if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check GCS directly stderr_gcs = ( storage.Client() .bucket(gcs_bucket) .blob(f"my_prefix/storage/{result.run_id}/compute_logs/easy.err") .download_as_bytes() .decode("utf-8") ) for expected in EXPECTED_LOGS: assert expected in stderr_gcs # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager(mock_s3_bucket): @op def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) compute_steps = [ event.step_key for event in result.all_node_events if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err") stderr_s3 = s3_object.get()["Body"].read().decode("utf-8") for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_dev_loop_changing_versions(): with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_launcher=DefaultRunLauncher(), run_coordinator=DefaultRunCoordinator(), ) run_config = { "solids": { "create_string_1_asset": { "config": { "input_str": "apple" } }, "take_string_1_asset": { "config": { "input_str": "apple" } }, }, "resources": { "object_manager": { "config": { "base_dir": temp_dir } } }, } result = execute_pipeline( asset_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success assert not get_step_keys_to_execute(asset_pipeline, run_config, "only_mode") run_config["solids"]["take_string_1_asset"]["config"][ "input_str"] = "banana" assert get_step_keys_to_execute( asset_pipeline, run_config, "only_mode") == ["take_string_1_asset"] result = execute_pipeline( asset_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success assert not get_step_keys_to_execute(asset_pipeline, run_config, "only_mode")