def dagster_instance_config(base_dir, config_filename=DAGSTER_CONFIG_YAML_FILENAME, overrides=None): check.str_param(base_dir, "base_dir") check.invariant(os.path.isdir(base_dir), "base_dir should be a directory") overrides = check.opt_dict_param(overrides, "overrides") config_yaml_path = os.path.join(base_dir, config_filename) if not os.path.exists(config_yaml_path): warnings.warn(( "The dagster instance configuration file ({config_filename}) is not present at " "{base_dir}. Dagster uses this file to know where and how to store " "local artifacts, information about past runs, and structured events.\n" "If nothing is specified, Dagster will store this information " "in the local filesystem in the {base_dir} directory.").format( config_filename=config_filename, base_dir=base_dir)) dagster_config_dict = merge_dicts(load_yaml_from_globs(config_yaml_path), overrides) dagster_config = validate_config(dagster_instance_config_schema(), dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError( "Errors whilst loading dagster instance config at {}.".format( config_filename), dagster_config.errors, dagster_config_dict, ) return dagster_config.value
def test_from_glob_list(): assert load_yaml_from_glob_list( [script_relative_path('yamls/yaml_one.yaml')]) == { 'key_one': { 'key_one_one': 'value_one' } } assert load_yaml_from_glob_list([ script_relative_path('yamls/yaml_one.yaml'), script_relative_path('yamls/yaml_two.yaml') ]) == { 'key_one': { 'key_one_one': 'value_one', 'key_one_two': 'value_two' } } assert load_yaml_from_glob_list([script_relative_path('yamls/*.yaml') ]) == { 'key_one': { 'key_one_one': 'value_one', 'key_one_two': 'value_two' } } assert load_yaml_from_globs( script_relative_path('yamls/yaml_one.yaml'), script_relative_path('yamls/yaml_two.yaml')) == { 'key_one': { 'key_one_one': 'value_one', 'key_one_two': 'value_two' } }
def dagster_instance_config(base_dir): dagster_config_dict = load_yaml_from_globs( os.path.join(base_dir, DAGSTER_CONFIG_YAML_FILENAME)) dagster_config_type = define_dagster_config_cls().inst() dagster_config = evaluate_config(dagster_config_type, dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError(None, dagster_config.errors, dagster_config_dict) return dagster_config.value
def dagster_instance_config( base_dir, config_filename=DAGSTER_CONFIG_YAML_FILENAME, overrides=None, ): check.str_param(base_dir, "base_dir") check.invariant(os.path.isdir(base_dir), "base_dir should be a directory") overrides = check.opt_dict_param(overrides, "overrides") config_yaml_path = os.path.join(base_dir, config_filename) if not os.path.exists(config_yaml_path) and is_dagster_home_set(): warnings.warn( f"No dagster instance configuration file ({config_filename}) found at " f"{base_dir}. Defaulting to loading and storing all metadata with {base_dir}. " f"If this is the desired behavior, create an empty {config_filename} file in {base_dir}." ) dagster_config_dict = merge_dicts(load_yaml_from_globs(config_yaml_path), overrides) if "instance_class" in dagster_config_dict: custom_instance_class_data = dagster_config_dict["instance_class"] validate_custom_config = validate_config( configurable_class_schema(), custom_instance_class_data, ) if not validate_custom_config.success: raise DagsterInvalidConfigError( "Errors whilst loading dagster custom class config at {}". format(config_filename), validate_custom_config.errors, custom_instance_class_data, ) custom_instance_class = class_from_code_pointer( custom_instance_class_data["module"], custom_instance_class_data["class"]) schema = merge_dicts(dagster_instance_config_schema(), custom_instance_class.config_schema()) else: custom_instance_class = None schema = dagster_instance_config_schema() dagster_config = validate_config(schema, dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError( "Errors whilst loading dagster instance config at {}.".format( config_filename), dagster_config.errors, dagster_config_dict, ) return (dagster_config.value, custom_instance_class)
def dagster_instance_config(base_dir, config_filename=DAGSTER_CONFIG_YAML_FILENAME, overrides=None): overrides = check.opt_dict_param(overrides, 'overrides') dagster_config_dict = merge_dicts( load_yaml_from_globs(os.path.join(base_dir, config_filename)), overrides ) dagster_config_type = define_dagster_config_cls().inst() dagster_config = evaluate_config(dagster_config_type, dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError(None, dagster_config.errors, dagster_config_dict) return dagster_config.value
def test_dask_pipeline(): run_config = load_yaml_from_globs( file_relative_path(__file__, "../../docs_snippets/deploying/dask_hello_world.yaml") ) result = execute_pipeline( reconstructable(dask_pipeline), run_config=run_config, instance=DagsterInstance.local_temp(), ) assert result.success assert result.result_for_solid("hello_world").output_value() == "Hello, World!"
def dagster_instance_config(base_dir, config_filename=DAGSTER_CONFIG_YAML_FILENAME, overrides=None): overrides = check.opt_dict_param(overrides, 'overrides') dagster_config_dict = merge_dicts( load_yaml_from_globs(os.path.join(base_dir, config_filename)), overrides) dagster_config_type = resolve_to_config_type(define_dagster_config_cls()) dagster_config = validate_config(dagster_config_type, dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError( 'Errors whilst loading dagster instance config at {}.'.format( config_filename), dagster_config.errors, dagster_config_dict, ) return dagster_config.value
def dagster_instance_config(base_dir, config_filename=DAGSTER_CONFIG_YAML_FILENAME, overrides=None): check.str_param(base_dir, 'base_dir') check.invariant(os.path.isdir(base_dir), 'base_dir should be a directory') overrides = check.opt_dict_param(overrides, 'overrides') dagster_config_dict = merge_dicts( load_yaml_from_globs(os.path.join(base_dir, config_filename)), overrides) dagster_config = validate_config(dagster_instance_config_schema(), dagster_config_dict) if not dagster_config.success: raise DagsterInvalidConfigError( 'Errors whilst loading dagster instance config at {}.'.format( config_filename), dagster_config.errors, dagster_config_dict, ) return dagster_config.value
def test_from_glob_list(): assert load_yaml_from_glob_list([script_relative_path('yamls/yaml_one.yaml')]) == { 'key_one': {'key_one_one': 'value_one'} } assert load_yaml_from_glob_list( [script_relative_path('yamls/yaml_one.yaml'), script_relative_path('yamls/yaml_two.yaml')] ) == {'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'}} assert load_yaml_from_glob_list([script_relative_path('yamls/*.yaml')]) == { 'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'} } assert load_yaml_from_globs( script_relative_path('yamls/yaml_one.yaml'), script_relative_path('yamls/yaml_two.yaml') ) == {'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'}} with pytest.raises(check.CheckError): load_yaml_from_glob_list(['flskhfhjsdf'])
def test_yaml_schema(): dagster_yaml_folder = file_relative_path( __file__, "../../../docs_snippets/overview/instances/") res = dagster_instance_config(dagster_yaml_folder) assert sorted(list(res.keys())) == [ "compute_logs", "event_log_storage", "local_artifact_storage", "opt_in", "run_launcher", "run_storage", "schedule_storage", "scheduler", "telemetry", ] res = load_yaml_from_globs( file_relative_path( __file__, "../../../docs_snippets/overview/instances/pipeline_run.yaml")) assert res == { "execution": { "multiprocess": { "config": { "max_concurrent": 4 } } }, "storage": { "filesystem": None }, "loggers": { "console": { "config": { "log_level": "DEBUG" } } }, }
def test_yaml_schema(): dagster_yaml_folder = file_relative_path( __file__, '../../../docs_snippets/overview/instances/') res = dagster_instance_config(dagster_yaml_folder) assert sorted(list(res.keys())) == [ 'compute_logs', 'dagit', 'event_log_storage', 'local_artifact_storage', 'run_launcher', 'run_storage', 'schedule_storage', 'scheduler', ] res = load_yaml_from_globs( file_relative_path( __file__, '../../../docs_snippets/overview/instances/pipeline_run.yaml')) assert res == { 'execution': { 'multiprocess': { 'config': { 'max_concurrent': 4 } } }, 'storage': { 'filesystem': None }, 'loggers': { 'console': { 'config': { 'log_level': 'DEBUG' } } }, }
def test_from_glob_list(): assert load_yaml_from_glob_list([file_relative_path(__file__, "yamls/yaml_one.yaml")]) == { "key_one": {"key_one_one": "value_one"} } assert load_yaml_from_glob_list( [ file_relative_path(__file__, "yamls/yaml_one.yaml"), file_relative_path(__file__, "yamls/yaml_two.yaml"), ] ) == {"key_one": {"key_one_one": "value_one", "key_one_two": "value_two"}} assert load_yaml_from_glob_list([file_relative_path(__file__, "yamls/*.yaml")]) == { "key_one": {"key_one_one": "value_one", "key_one_two": "value_two"} } assert load_yaml_from_globs( file_relative_path(__file__, "yamls/yaml_one.yaml"), file_relative_path(__file__, "yamls/yaml_two.yaml"), ) == {"key_one": {"key_one_one": "value_one", "key_one_two": "value_two"}} assert load_yaml_from_glob_list(["flskhfhjsdf"]) == {}
def test_from_glob_list(): assert load_yaml_from_glob_list([file_relative_path(__file__, 'yamls/yaml_one.yaml')]) == { 'key_one': {'key_one_one': 'value_one'} } assert load_yaml_from_glob_list( [ file_relative_path(__file__, 'yamls/yaml_one.yaml'), file_relative_path(__file__, 'yamls/yaml_two.yaml'), ] ) == {'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'}} assert load_yaml_from_glob_list([file_relative_path(__file__, 'yamls/*.yaml')]) == { 'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'} } assert load_yaml_from_globs( file_relative_path(__file__, 'yamls/yaml_one.yaml'), file_relative_path(__file__, 'yamls/yaml_two.yaml'), ) == {'key_one': {'key_one_one': 'value_one', 'key_one_two': 'value_two'}} assert load_yaml_from_glob_list(['flskhfhjsdf']) == {}
def _dagster_config(base_dir): return load_yaml_from_globs(os.path.join(base_dir, "dagster.yaml"))