def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file( "foo_file", "bar_function", "/path/to/working_dir" ) assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_directory}".format( foo_file=os.path.abspath(os.path.expanduser("foo_file")), working_directory=os.path.abspath(os.path.expanduser("/path/to/working_dir")), ) recon_file = ReconstructableRepository.for_file("foo_file", "bar_function") assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_dir}".format( foo_file=os.path.abspath(os.path.expanduser("foo_file")), working_dir=os.getcwd() ) recon_module = ReconstructableRepository.for_module("foo_module", "bar_function") assert recon_module.get_cli_args() == "-m foo_module -a bar_function"
def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function', '/path/to/working_dir') assert recon_file.get_cli_args( ) == '-f {foo_file} -a bar_function -d /path/to/working_dir'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file'))) recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function') assert recon_file.get_cli_args( ) == '-f {foo_file} -a bar_function -d {working_dir}'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file')), working_dir=os.getcwd()) recon_module = ReconstructableRepository.for_module( 'foo_module', 'bar_function') assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
def test_bad_should_execute(): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): instance = DagsterInstance.get() recon_repo = ReconstructableRepository.for_file( __file__, 'the_repo') bad_should_execute = recon_repo.get_reconstructable_schedule( 'bad_should_execute_schedule') result = sync_launch_scheduled_execution( bad_should_execute.get_origin()) assert isinstance(result, ScheduledExecutionFailed) assert ( 'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule' in result.errors[0].to_string()) assert not result.run_id ticks = instance.get_schedule_ticks( bad_should_execute.get_origin_id()) assert ticks[0].status == ScheduleTickStatus.FAILURE assert ( 'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule' in ticks[0].error.message)
def test_run_finished(self, graphql_context): instance = graphql_context.instance pipeline = ReconstructableRepository.for_file( file_relative_path(__file__, "setup.py"), "test_repo", ).get_reconstructable_pipeline("noop_pipeline") pipeline_result = execute_pipeline(pipeline, instance=instance) assert pipeline_result.success assert pipeline_result.run_id time.sleep(0.05) # guarantee execution finish result = execute_dagster_graphql( graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": pipeline_result.run_id} ) assert ( result.data["terminatePipelineExecution"]["__typename"] == "TerminatePipelineExecutionFailure" ) assert ( "is not in a started state. Current status is SUCCESS" in result.data["terminatePipelineExecution"]["message"] )
def cli_api_schedule_origin(schedule_name): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): recon_repo = ReconstructableRepository.for_file( __file__, 'the_repo') schedule = recon_repo.get_reconstructable_schedule(schedule_name) yield schedule.get_origin()
def recon_repo_for_cli_args(kwargs): """Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations for repo loading above. """ check.dict_param(kwargs, "kwargs") _cli_load_invariant(kwargs.get("pipeline_name") is None) if kwargs.get("workspace"): check.not_implemented( "Workspace not supported yet in this cli command") elif kwargs.get("module_name") and kwargs.get("fn_name"): _cli_load_invariant(kwargs.get("repository_yaml") is None) _cli_load_invariant(kwargs.get("python_file") is None) return ReconstructableRepository.for_module( kwargs["module_name"], kwargs["fn_name"], get_working_directory_from_kwargs(kwargs), ) elif kwargs.get("python_file") and kwargs.get("fn_name"): _cli_load_invariant(kwargs.get("repository_yaml") is None) _cli_load_invariant(kwargs.get("module_name") is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs["python_file"]), kwargs["fn_name"], get_working_directory_from_kwargs(kwargs), ) else: _cli_load_invariant(False)
def recon_repo_for_cli_args(kwargs): '''Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations for repo loading above. ''' check.dict_param(kwargs, 'kwargs') _cli_load_invariant(kwargs.get('pipeline_name') is None) if kwargs.get('repository_yaml') or all_none(kwargs): _cli_load_invariant(kwargs.get('module_name') is None) _cli_load_invariant(kwargs.get('python_file') is None) _cli_load_invariant(kwargs.get('fn_name') is None) repo_yaml = ( os.path.abspath(kwargs.get('repository_yaml')) if kwargs.get('repository_yaml') else DEFAULT_REPOSITORY_YAML_FILENAME ) _cli_load_invariant( os.path.exists(repo_yaml), 'Expected to use file "{}" to load repository but it does not exist. ' 'Verify your current working directory or CLI arguments.'.format(repo_yaml), ) return ReconstructableRepository.from_yaml(repo_yaml) elif kwargs.get('module_name') and kwargs.get('fn_name'): _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructableRepository.for_module(kwargs['module_name'], kwargs['fn_name']) elif kwargs.get('python_file') and kwargs.get('fn_name'): _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ) else: _cli_load_invariant(False)
def get_test_project_external_pipeline(pipeline_name): return (InProcessRepositoryLocation( ReconstructableRepository.for_file( file_relative_path(__file__, 'test_pipelines/repo.py'), 'define_demo_execution_repo', )).get_repository('demo_execution_repo').get_full_external_pipeline( pipeline_name))
def test_origin_ids_stable(monkeypatch): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage # stable exe path for test monkeypatch.setattr(sys, 'executable', '/fake/python') file_repo = ReconstructableRepository.for_file('/path/to/file', 'the_repo') # ensure monkeypatch worked assert file_repo.get_origin().executable_path == '/fake/python' assert file_repo.get_origin_id( ) == '9ce1e0f6f059725bb6f85deeaea0322734bd77d6' schedule = file_repo.get_reconstructable_schedule('simple_schedule') assert schedule.get_origin_id( ) == 'f1a21671fccf4c986d20f40cac0730e47daa0183' module_repo = ReconstructableRepository.for_module('dummy_module', 'the_repo') assert module_repo.get_origin_id( ) == '86503fc349d4ecf44bd22ca1de64c10f8ffcebbd' module_schedule = module_repo.get_reconstructable_schedule( 'simple_schedule') assert module_schedule.get_origin_id( ) == 'e4c7131b74ad600969876d8fa461f215ced9631a'
def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function') assert recon_file.get_cli_args() == '-f {foo_file} -a bar_function'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file'))) recon_module = ReconstructableRepository.for_module( 'foo_module', 'bar_function') assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
def get_test_project_recon_pipeline(pipeline_name, container_image=None): return ReOriginatedReconstructablePipelineForTest( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", container_image=container_image, ).get_reconstructable_pipeline(pipeline_name))
def get_test_project_external_pipeline(pipeline_name): return (InProcessRepositoryLocationOrigin( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", )).create_handle().create_location().get_repository( "demo_execution_repo").get_full_external_pipeline(pipeline_name))
def define_subprocess_context_for_file(python_file, fn_name, instance): check.inst_param(instance, 'instance', DagsterInstance) return DagsterGraphQLInProcessRepositoryContext( recon_repo=ReconstructableRepository.for_file(python_file, fn_name), instance=instance, execution_manager=SubprocessExecutionManager(instance), )
def test_origin_ids_stable(monkeypatch): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage # stable exe path for test monkeypatch.setattr(sys, "executable", "/fake/python") file_repo = ReconstructableRepository.for_file("/path/to/file", "the_repo", "/path/to/working_dir") # ensure monkeypatch worked assert file_repo.get_origin().executable_path == "/fake/python" assert file_repo.get_origin_id( ) == "3766b1c554fd961b88b9301756250febff3d0ffa" schedule = file_repo.get_reconstructable_schedule("simple_schedule") assert schedule.get_origin_id( ) == "7c60d01588673ffcaea16b6fd59d998dc63ed3c3" module_repo = ReconstructableRepository.for_module("dummy_module", "the_repo") assert module_repo.get_origin_id( ) == "86503fc349d4ecf44bd22ca1de64c10f8ffcebbd" module_schedule = module_repo.get_reconstructable_schedule( "simple_schedule") assert module_schedule.get_origin_id( ) == "e4c7131b74ad600969876d8fa461f215ced9631a"
def _get_unloadable_sensor_origin(): working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) return ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin(recon_repo), "fake_repository").get_job_origin("doesnt_exist")
def get_test_project_external_repo(container_image=None): with InProcessRepositoryLocationOrigin( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", container_image=container_image, )).create_location() as location: yield location.get_repository("demo_execution_repo")
def define_context_for_file(python_file, fn_name, instance): check.inst_param(instance, 'instance', DagsterInstance) return DagsterGraphQLContext( locations=[ InProcessRepositoryLocation(ReconstructableRepository.for_file(python_file, fn_name)) ], instance=instance, )
def workspace(instance, image): with in_process_test_workspace( instance, ReconstructableRepository.for_file(repo.__file__, repo.repository.__name__, container_image=image), ) as workspace: yield workspace
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp( temp_dir, overrides={ "run_launcher": { "module": "dagster.core.launcher.grpc_run_launcher", "class": "GrpcRunLauncher", } }, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id recon_repo = ReconstructableRepository.for_file(__file__, "nope") loadable_target_origin = LoadableTargetOrigin.from_python_origin( recon_repo.get_origin()) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance=instance, run=pipeline_run, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def get_reconstructable_repository_from_origin_kwargs(kwargs): if kwargs.get('python_file'): _check_cli_arguments_none(kwargs, 'module_name') return ReconstructableRepository.for_file(kwargs.get('python_file'), kwargs.get('attribute')) if kwargs.get('module_name'): return ReconstructableRepository.for_module(kwargs.get('module_name'), kwargs.get('attribute')) check.failed('invalid')
def get_test_project_external_repo(container_image=None): return RepositoryLocation.from_handle( RepositoryLocationHandle.create_from_repository_location_origin( InProcessRepositoryLocationOrigin( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", container_image=container_image, )))).get_repository("demo_execution_repo")
def define_in_process_context(python_file, fn_name, instance): check.inst_param(instance, "instance", DagsterInstance) return WorkspaceProcessContext( workspace=Workspace([ InProcessRepositoryLocationOrigin( ReconstructableRepository.for_file(python_file, fn_name)) ]), instance=instance, ).create_request_context()
def get_test_project_workspace(instance, container_image=None): with in_process_test_workspace( instance, recon_repo=ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", container_image=container_image, ), ) as workspace: yield workspace
def test_dev_pipelines(): with instance_for_test() as instance: recon_repo = ReconstructableRepository.for_file( __file__, "graph_job_dev_repo") dev_pipelines = graph_job_dev_repo.get_all_pipelines() assert len(dev_pipelines) == 5 for pipeline in dev_pipelines: execute_pipeline( recon_repo.get_reconstructable_pipeline(pipeline.name), instance=instance, )
def define_subprocess_context_for_file(python_file, fn_name, instance): check.inst_param(instance, 'instance', DagsterInstance) return DagsterGraphQLContext( environments=[ InProcessDagsterEnvironment( ReconstructableRepository.for_file(python_file, fn_name), execution_manager=SubprocessExecutionManager(instance), ) ], instance=instance, )
def define_test_snapshot_context(): return DagsterGraphQLContext( instance=DagsterInstance.ephemeral(), environments=[ InProcessDagsterEnvironment( ReconstructableRepository.for_file(__file__, 'define_repository'), execution_manager=SynchronousExecutionManager(), ) ], )
def test_skip(): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): instance = DagsterInstance.get() recon_repo = ReconstructableRepository.for_file( __file__, 'the_repo') skip = recon_repo.get_reconstructable_schedule('skip_schedule') result = sync_launch_scheduled_execution(skip.get_origin()) assert isinstance(result, ScheduledExecutionSkipped) ticks = instance.get_schedule_ticks(skip.get_origin_id()) assert ticks[0].status == ScheduleTickStatus.SKIPPED
def test_execute_hammer_through_dagit(): recon_repo = ReconstructableRepository.for_file( file_relative_path(__file__, '../../../../examples/dagster_examples/toys/hammer.py'), 'hammer_pipeline', ) instance = DagsterInstance.local_temp() context = DagsterGraphQLContext( locations=[InProcessRepositoryLocation(recon_repo)], instance=instance, ) selector = get_legacy_pipeline_selector(context, 'hammer_pipeline') executor = SyncExecutor() variables = { 'executionParams': { 'runConfigData': { 'storage': {'filesystem': {}}, 'execution': {'dask': {'config': {'cluster': {'local': {}}}}}, }, 'selector': selector, 'mode': 'default', } } start_pipeline_result = graphql( request_string=START_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) if start_pipeline_result.errors: raise Exception('{}'.format(start_pipeline_result.errors)) run_id = start_pipeline_result.data['startPipelineExecution']['run']['runId'] context.drain_outstanding_executions() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages']] assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages
def test_bad_load(): with _default_instance() as instance: instance = DagsterInstance.get() working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") result = sync_launch_scheduled_execution(schedule.get_origin()) assert isinstance(result, ScheduledExecutionFailed) assert "doesnt_exist not found at module scope in file" in result.errors[0].to_string() ticks = instance.get_schedule_ticks(schedule.get_origin_id()) assert ticks[0].status == ScheduleTickStatus.FAILURE assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_bad_load(): with schedule_instance() as instance: working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") fake_origin = schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", get_timestamp_from_utc_datetime(get_current_datetime_in_utc()), ) instance.add_schedule_state(schedule_state) frozen_datetime.tick(delta=timedelta(seconds=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message frozen_datetime.tick(delta=timedelta(days=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 2 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message