def intro_tutorial_path(path): return script_relative_path('../../../dagster/tutorials/intro_tutorial/{}'.format(path))
def build_docker_image(docker_client): with pushd(script_relative_path('test_project')): subprocess.check_output(['./build.sh'], shell=True) return IMAGE
import dagstermill as dm from dagster import InputDefinition, job from dagster.utils import script_relative_path from docs_snippets.legacy.data_science.download_file import download_file k_means_iris = dm.define_dagstermill_op( "k_means_iris", script_relative_path("iris-kmeans_2.ipynb"), output_notebook_name="iris_kmeans_output", input_defs=[ InputDefinition("path", str, description="Local path to the Iris dataset") ], ) @job(resource_defs={ "output_notebook_io_manager": dm.local_output_notebook_io_manager, }) def iris_classify(): k_means_iris(download_file())
def csv_hello_world_solids_config_fs_storage(): return { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('../data/num.csv')}}}, 'storage': {'filesystem': {}}, }
import dagstermill as dm from dagster_examples.util import download_file from dagster import Field, InputDefinition, Int, pipeline from dagster.utils import script_relative_path k_means_iris = dm.define_dagstermill_solid( 'k_means_iris', script_relative_path('iris-kmeans_2.ipynb'), input_defs=[ InputDefinition('path', str, description='Local path to the Iris dataset') ], config=Field(Int, default_value=3, is_required=False, description='The number of clusters to find'), ) @pipeline def iris_pipeline(): k_means_iris(download_file())
import dagstermill as dm from dagster import PipelineDefinition from dagster.utils import script_relative_path k_means_iris_solid = dm.define_dagstermill_solid( 'k_means_iris', script_relative_path('iris-kmeans.ipynb')) def define_iris_pipeline(): return PipelineDefinition(name='iris_pipeline', solid_defs=[k_means_iris_solid])
), } ) ), ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config() @pipeline( preset_defs=[ PresetDefinition.from_files( name='prod', environment_files=[script_relative_path('../environments/csv_hello_world_prod.yaml')], ), PresetDefinition.from_files( name='test', environment_files=[script_relative_path('../environments/csv_hello_world_test.yaml')], ), ] ) def csv_hello_world(): return sum_sq_solid(sum_df=sum_solid()) @pipeline def csv_hello_world_with_expectations(): ss = sum_solid() sum_sq_solid(sum_df=ss)
def test_notebook_view(): notebook_path = script_relative_path('render_uuid_notebook.ipynb') html_response, code = notebook_view({'path': notebook_path}) assert '6cac0c38-2c97-49ca-887c-4ac43f141213' in html_response assert code == 200
def test_successful_host_dagit_ui(): with mock.patch('gevent.pywsgi.WSGIServer'): handle = ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml')) host_dagit_ui(log=False, log_dir=None, handle=handle, use_sync=True, host=None, port=2343)
'field_six_nullable_int_list': Field(List[Optional[Int]], is_optional=True), })), })), ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config() @pipeline(preset_defs=[ PresetDefinition.from_files( name='prod', environment_files=[ script_relative_path('../environments/csv_hello_world_prod.yaml') ], ), PresetDefinition.from_files( name='test', environment_files=[ script_relative_path('../environments/csv_hello_world_test.yaml') ], ), PresetDefinition( name='test_inline', environment_dict={ 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path("../data/num.csv")
def test_create_app(): handle = ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml')) pipeline_run_storage = PipelineRunStorage() assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=True) assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=False)
PipelineDefinition, RepositoryDefinition, SolidDefinition, ) from dagster.utils import script_relative_path from dagster_graphql.implementation.context import DagsterGraphQLContext from dagster_graphql.implementation.pipeline_execution_manager import SynchronousExecutionManager from dagster_graphql.implementation.pipeline_run_storage import PipelineRunStorage from dagster_graphql.test.utils import execute_dagster_graphql from dagster_graphql_tests.graphql.setup import define_context, define_repository # This is needed to find production query in all cases sys.path.insert(0, os.path.abspath(script_relative_path('.'))) from production_query import ( # pylint: disable=wrong-import-position,wrong-import-order PRODUCTION_QUERY, ) from setup import PoorMansDataFrame # pylint: disable=no-name-in-module def test_enum_query(): ENUM_QUERY = '''{ pipeline(params: { name:"pipeline_with_enum_config" }){ name configTypes { __typename name ... on EnumConfigType { values
def test_intro_tutorial_cli_actual_dag(): check_cli_execute_file_pipeline( script_relative_path('../../dagster_examples/intro_tutorial/actual_dag.py'), 'define_diamond_dag_pipeline', )
from dagster_airflow_tests.marks import nettest from dagster import ExecutionTargetHandle from dagster.core.utils import make_new_run_id from dagster.utils import load_yaml_from_glob_list, script_relative_path from .utils import validate_pipeline_execution # TODO (Nate): Will remove in follow-up diff ENVIRONMENTS_PATH = script_relative_path( os.path.join( '..', '..', '..', '..', '.buildkite', 'images', 'docker', 'test_project', 'test_pipelines', 'environments', )) @nettest def test_s3_storage( dagster_airflow_k8s_operator_pipeline, dagster_docker_image, environments_path, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline'
def path_to_tutorial_file(path): return script_relative_path(os.path.join("../../docs_snippets/intro_tutorial/", path))
def test_list_command(): runner = CliRunner() execute_list_command( { 'repository_yaml': None, 'python_file': script_relative_path('test_cli_commands.py'), 'module_name': None, 'fn_name': 'define_bar_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, [ '-f', script_relative_path('test_cli_commands.py'), '-n', 'define_bar_repo' ], ) assert result.exit_code == 0 assert result.output == ('Repository bar\n' '**************\n' 'Pipeline: baz\n' 'Description:\n' 'Not much tbh\n' 'Solids: (Execution Order)\n' ' do_input\n' '*************\n' 'Pipeline: foo\n' 'Solids: (Execution Order)\n' ' do_something\n') execute_list_command( { 'repository_yaml': None, 'python_file': None, 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'define_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, ['-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo']) assert result.exit_code == 0 assert result.output == ('Repository demo_repository\n' '**************************\n' 'Pipeline: repo_demo_pipeline\n' 'Solids: (Execution Order)\n' ' hello_world\n') execute_list_command( { 'repository_yaml': script_relative_path('repository.yaml'), 'python_file': None, 'module_name': None, 'fn_name': None, }, no_print, ) result = runner.invoke( pipeline_list_command, ['-y', script_relative_path('repository.yaml')]) assert result.exit_code == 0 assert result.output == ('Repository demo_repository\n' '**************************\n' 'Pipeline: repo_demo_pipeline\n' 'Solids: (Execution Order)\n' ' hello_world\n') with pytest.raises(CliUsageError): execute_list_command( { 'repository_yaml': None, 'python_file': 'foo.py', 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'define_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, [ '-f', 'foo.py', '-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo' ], ) assert result.exit_code == 1 assert isinstance(result.exception, CliUsageError) with pytest.raises(CliUsageError): execute_list_command( { 'repository_yaml': None, 'python_file': None, 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': None, }, no_print, ) result = runner.invoke(pipeline_list_command, ['-m', 'dagster_examples.intro_tutorial.repos']) assert result.exit_code == 1 assert isinstance(result.exception, CliUsageError) with pytest.raises(CliUsageError): execute_list_command( { 'repository_yaml': None, 'python_file': script_relative_path('test_cli_commands.py'), 'module_name': None, 'fn_name': None, }, no_print, ) result = runner.invoke( pipeline_list_command, ['-f', script_relative_path('test_cli_commands.py')]) assert result.exit_code == 1 assert isinstance(result.exception, CliUsageError)
def load_custom_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path('./ebike_trips.csv'), parse_dates=['start_time', 'end_time'], date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'), )
def test_presets(): @solid(config_field=Field(Dict(fields={'error': Field(Bool)}))) def can_fail(context): if context.solid_config['error']: raise Exception('I did an error') return 'cool' @lambda_solid def always_fail(): raise Exception('I always do this') pipeline = PipelineDefinition( name='simple', solid_defs=[can_fail, always_fail], preset_defs=[ PresetDefinition.from_files( 'passing', environment_files=[script_relative_path('pass_env.yaml')], solid_subset=['can_fail'], ), PresetDefinition( 'passing_direct_dict', environment_dict={ 'solids': { 'can_fail': { 'config': { 'error': False } } } }, solid_subset=['can_fail'], ), PresetDefinition.from_files( 'failing_1', environment_files=[script_relative_path('fail_env.yaml')], solid_subset=['can_fail'], ), PresetDefinition.from_files( 'failing_2', environment_files=[script_relative_path('pass_env.yaml')]), ], ) with pytest.raises(DagsterInvalidDefinitionError): PresetDefinition.from_files( 'invalid_1', environment_files=[script_relative_path('not_a_file.yaml')]) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( 'invalid_2', environment_files=[ script_relative_path('test_repository_definition.py') ]) assert execute_pipeline_with_preset(pipeline, 'passing').success assert execute_pipeline_with_preset(pipeline, 'passing_direct_dict').success with pytest.raises(DagsterExecutionStepExecutionError): execute_pipeline_with_preset(pipeline, 'failing_1') with pytest.raises(DagsterExecutionStepExecutionError): execute_pipeline_with_preset(pipeline, 'failing_2') with pytest.raises(DagsterInvariantViolationError, match="Could not find preset"): execute_pipeline_with_preset(pipeline, 'not_failing')
def test_sort(): with open(script_relative_path("../../../docs_snippets/intro_tutorial/cereal.csv"), "r",) as fd: cereals = [row for row in csv.DictReader(fd)] execute_solid(sort_by_calories, input_values={"cereals": cereals})
def create_persisted_context(): full_path = script_relative_path('testdb.db') engine = sa.create_engine( 'sqlite:///{full_path}'.format(full_path=full_path), echo=False) return dagster_sqlalchemy.common.create_sql_alchemy_context_params_from_engine( engine=engine)
def csv_hello_world_solids_config(): return {'solids': {'sum_solid': {'inputs': {'num': script_relative_path('../data/num.csv')}}}}
def _get_sql_script_path(name): return script_relative_path( '../../dagster_sqlalchemy/examples/sql_project_example/sql_files/{name}.sql' .format(name=name))
def path_to_file(path): return script_relative_path(os.path.join("./", path))
def test_import_module_from_path(): foo_module = seven.import_module_from_path( 'foo_module', script_relative_path('foo_module.py')) assert foo_module.FOO == 7
def environment_dict(s3_bucket): env_dict = load_yaml_from_path(script_relative_path('test_project/env.yaml')) env_dict['storage'] = {'s3': {'s3_bucket': s3_bucket}} yield env_dict
def test_execute_hammer_through_dagit(): handle = ExecutionTargetHandle.for_pipeline_python_file( script_relative_path( '../../../examples/dagster_examples/toys/hammer.py'), 'hammer_pipeline') instance = DagsterInstance.local_temp() execution_manager = MultiprocessingExecutionManager() context = DagsterGraphQLContext(handle=handle, execution_manager=execution_manager, instance=instance) executor = SyncExecutor() variables = { 'executionParams': { 'environmentConfigData': { 'storage': { 'filesystem': {} }, 'execution': { 'dask': {} } }, 'selector': { 'name': handle.build_pipeline_definition().name }, 'mode': 'default', } } start_pipeline_result = graphql( request_string=START_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) run_id = start_pipeline_result.data['startPipelineExecution']['run'][ 'runId'] context.execution_manager.join() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [ x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages'] ] assert 'PipelineProcessStartEvent' in messages assert 'PipelineProcessStartedEvent' in messages assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages assert 'PipelineProcessExitedEvent' in messages
def test_tutorial_config_schema(): with pushd( script_relative_path( "../../../docs_snippets/intro_tutorial/basics/e02_solids/")): result = execute_pipeline( config_pipeline, run_config={ "solids": { "read_csv": { "inputs": { "csv_path": { "value": "cereal.csv" } } }, "sort_by_calories": { "config": { "reverse": False } }, } }, ) assert result.success assert len(result.solid_result_list) == 2 assert isinstance( result.result_for_solid("sort_by_calories").output_value(), dict) assert result.result_for_solid("sort_by_calories").output_value() == { "least_caloric": OrderedDict([ ("name", "All-Bran with Extra Fiber"), ("mfr", "K"), ("type", "C"), ("calories", "50"), ("protein", "4"), ("fat", "0"), ("sodium", "140"), ("fiber", "14"), ("carbo", "8"), ("sugars", "0"), ("potass", "330"), ("vitamins", "25"), ("shelf", "3"), ("weight", "1"), ("cups", "0.5"), ("rating", "93.704912"), ]), "most_caloric": OrderedDict([ ("name", "Mueslix Crispy Blend"), ("mfr", "K"), ("type", "C"), ("calories", "160"), ("protein", "3"), ("fat", "2"), ("sodium", "150"), ("fiber", "3"), ("carbo", "17"), ("sugars", "13"), ("potass", "160"), ("vitamins", "25"), ("shelf", "3"), ("weight", "1.5"), ("cups", "0.67"), ("rating", "30.313351"), ]), } return result
"""isort:skip_file""" import sys from dagster import repository from dagster.utils import script_relative_path sys.path.append(script_relative_path(".")) from hello_cereal import hello_cereal_pipeline from complex_pipeline import complex_pipeline @repository def hello_cereal_repository(): # Note that we can pass a dict of functions, rather than a list of # pipeline definitions. This allows us to construct pipelines lazily, # if, e.g., initializing a pipeline involves any heavy compute return { "pipelines": { "hello_cereal_pipeline": lambda: hello_cereal_pipeline, "complex_pipeline": lambda: complex_pipeline, } }
def nb_test_path(name): return script_relative_path('notebooks/{name}.ipynb'.format(name=name))
def read_sauce(_context, path): with open(script_relative_path(path), 'r') as fd: return Sauce(fd.read())