Пример #1
0
 def RF_solid():
     return dagstermill.define_dagstermill_solid(
         "random_forest_regression",
         nb_test_path("tutorial_RF"),
         input_defs=[InputDefinition(name="df", dagster_type=DataFrame)],
     )
Пример #2
0
def define_hello_world_with_output():
    return dagstermill.define_dagstermill_solid(
        'hello_world_output', nb_test_path('hello_world_output'), [],
        [OutputDefinition()])
Пример #3
0
 def clean_data_solid():
     return dagstermill.define_dagstermill_solid(
         'clean_data',
         nb_test_path('clean_data'),
         output_defs=[OutputDefinition(DataFrame)])
Пример #4
0
def define_hello_world_with_output_notebook_solid():
    return dagstermill.define_dagstermill_solid(
        'hello_world_with_output_notebook',
        nb_test_path('hello_world'),
        output_notebook='notebook',
    )
Пример #5
0
    svd.fit(user_story_matrix.matrix)

    total_explained_variance = svd.explained_variance_ratio_.sum()

    yield Output(
        svd,
        metadata={
            "Total explained variance ratio": total_explained_variance,
            "Number of components": n_components,
        },
    )


model_perf_notebook = define_dagstermill_solid(
    "recommender_model_perf",
    notebook_path=file_relative_path(__file__, "../notebooks/recommender_model_perf.ipynb"),
    input_defs=[InputDefinition(dagster_type=TruncatedSVD, name="recommender_model")],
    output_notebook_name="perf_notebook",
)


@op(
    ins={
        "story_titles": In(
            root_manager_key="warehouse_loader",
            metadata={
                "table": "hackernews.stories",
                "columns": ["id", "title"],
            },
        ),
    },
    out=Out(
Пример #6
0

@solid_definition
def bad_kernel_solid():
    return dagstermill.define_dagstermill_solid('bad_kernel_solid',
                                                nb_test_path('bad_kernel'))


def define_bad_kernel_pipeline():
    return PipelineDefinition(name='bad_kernel_pipeline',
                              solid_defs=[bad_kernel_solid])


reimport_solid = dagstermill.define_dagstermill_solid(
    'reimport',
    nb_test_path('reimport'),
    input_defs=[InputDefinition('l', List[int])],
    output_defs=[OutputDefinition()],
)


@solid
def lister(_):
    return [1, 2, 3]


@pipeline
def reimport_pipeline():
    reimport_solid(lister())


@solid_definition
Пример #7
0
def bad_kernel_solid():
    return dagstermill.define_dagstermill_solid('bad_kernel_solid',
                                                nb_test_path('bad_kernel'))
Пример #8
0
def define_hello_world_with_output_notebook_solid():
    return dagstermill.define_dagstermill_solid(
        "hello_world_with_output_notebook", nb_test_path("hello_world"), output_notebook="notebook",
    )
Пример #9
0
        description=
        'An inventory of the right sizing operations that are recommended and validated.',
        metadata_entries=[
            EventMetadataEntry.path(output_path, 'operation_inventory_path')
        ],
    )
    yield Output(None)


right_size_report = dm.define_dagstermill_solid(
    'right_size_report',
    script_relative_path('rightsizereport.ipynb'),
    input_defs=[
        InputDefinition('advisor_analysis', Dict[str, RightSizeAnalysis]),
        InputDefinition('local_analysis', Dict[str, RightSizeAnalysis]),
        InputDefinition('cpu_utilization', UtilizationDataFrame),
        InputDefinition('mem_utilization', UtilizationDataFrame),
        InputDefinition('disk_utilization', UtilizationDataFrame),
        InputDefinition('compute_specs', AzureComputeSpecifications),
        InputDefinition('resources', ResourcesDataFrame),
    ],
    output_notebook='output_notebook')


@solid(input_defs=[InputDefinition('report_notebook', FileHandle)])
def write_html_report(context: SolidExecutionContext,
                      report_notebook: FileHandle) -> Nothing:
    with context.file_manager.read(report_notebook) as node_file:
        node = nbformat.read(node_file, nbformat.NO_CONVERT)
    html = convert_nodebook_node_to_html(node, full_width=True)
    handle = context.file_manager.write_data(html.encode(), ext='html')
Пример #10
0
def define_hello_world_solid():
    return dagstermill.define_dagstermill_solid("hello_world", nb_test_path("hello_world"))
Пример #11
0
def define_hello_world_config_solid():
    return dagstermill.define_dagstermill_solid(
        "hello_world_config",
        nb_test_path("hello_world_config"),
        config_schema={"greeting": Field(String, is_required=False, default_value="hello")},
    )
Пример #12
0
def yield_obj_solid():
    return dagstermill.define_dagstermill_solid(
        "yield_obj", nb_test_path("yield_obj"), [], [OutputDefinition(Any)],
    )
Пример #13
0
def yield_3_solid():
    return dagstermill.define_dagstermill_solid(
        "yield_3", nb_test_path("yield_3"), [], [OutputDefinition(Int)],
    )
Пример #14
0
def bad_kernel_solid():
    return dagstermill.define_dagstermill_solid("bad_kernel_solid", nb_test_path("bad_kernel"))
Пример #15
0
def test_default_description():
    test_solid = define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH)
    assert test_solid.description.startswith("This solid is backed by the notebook at ")
Пример #16
0
    pipeline,
    repository,
)

from ..data_frame import DataFrame
from .pandas_hello_world.pipeline import pandas_hello_world


def nb_test_path(name):
    return file_relative_path(__file__,
                              "notebooks/{name}.ipynb".format(name=name))


hello_world = dagstermill.define_dagstermill_solid(
    name="papermill_pandas_hello_world",
    notebook_path=nb_test_path("papermill_pandas_hello_world"),
    input_defs=[InputDefinition(name="df", dagster_type=DataFrame)],
    output_defs=[OutputDefinition(DataFrame)],
)


@pipeline(
    mode_defs=[ModeDefinition(resource_defs={"io_manager": fs_io_manager})],
    preset_defs=[
        PresetDefinition.from_files(
            "test",
            config_files=[
                file_relative_path(
                    __file__,
                    "pandas_hello_world/environments/papermill_pandas_hello_world_test.yaml",
                )
            ],
Пример #17
0
def test_custom_description():
    test_description = "custom description"
    test_solid = define_dagstermill_solid(
        BACKING_NB_NAME, BACKING_NB_PATH, description=test_description
    )
    assert test_solid.description == test_description
Пример #18
0

def build_hello_world_job():
    @job(resource_defs={
        "output_notebook_io_manager": local_output_notebook_io_manager,
    })
    def hello_world_job():
        hello_world_op()

    return hello_world_job


hello_world_with_custom_tags_and_description = dagstermill.define_dagstermill_solid(
    name="hello_world_custom",
    notebook_path=nb_test_path("hello_world"),
    output_notebook_name="notebook",
    tags={"foo": "bar"},
    description="custom description",
)


@pipeline(mode_defs=default_mode_defs)
def hello_world_with_custom_tags_and_description_pipeline():
    hello_world_with_custom_tags_and_description()


hello_world_config = test_nb_solid(
    "hello_world_config",
    config_schema={
        "greeting": Field(String, is_required=False, default_value="hello")
    },
Пример #19
0
 def RF_solid():
     return dagstermill.define_dagstermill_solid(
         'random_forest_regression',
         nb_test_path('tutorial_RF'),
         input_defs=[InputDefinition(name='df', dagster_type=DataFrame)],
     )
Пример #20
0
import dagstermill as dm
from docs_snippets.legacy.data_science.download_file import download_file

from dagster import InputDefinition, pipeline
from dagster.utils import script_relative_path

k_means_iris = dm.define_dagstermill_solid(
    'k_means_iris',
    script_relative_path('iris-kmeans_2.ipynb'),
    input_defs=[InputDefinition('path', str, description='Local path to the Iris dataset')],
)


@pipeline
def iris_pipeline():
    k_means_iris(download_file())
Пример #21
0
def define_hello_world_solid():
    return dagstermill.define_dagstermill_solid('hello_world',
                                                nb_test_path('hello_world'))
Пример #22
0
from dagster import pipeline
from dagster.utils import file_relative_path
from dagstermill import define_dagstermill_solid

hello_world_notebook_solid = define_dagstermill_solid(
    "hello_world_notebook_solid",
    file_relative_path(__file__, "hello_world.ipynb"),
)


@pipeline
def hello_world_notebook_pipeline():
    hello_world_notebook_solid()
Пример #23
0
@pipeline(mode_defs=default_mode_defs)
def hello_world_with_custom_tags_and_description_pipeline():
    hello_world_with_custom_tags_and_description()


hello_world_config = test_nb_solid(
    "hello_world_config",
    config_schema={
        "greeting": Field(String, is_required=False, default_value="hello")
    },
)

goodbye_config = dagstermill.define_dagstermill_solid(
    name="goodbye_config",
    notebook_path=nb_test_path("print_dagstermill_context_solid_config"),
    output_notebook="notebook",
    config_schema={
        "farewell": Field(String, is_required=False, default_value="goodbye")
    },
)


@pipeline(mode_defs=default_mode_defs)
def hello_world_config_pipeline():
    hello_world_config()
    goodbye_config()


@pipeline(mode_defs=default_mode_defs)
def alias_config_pipeline():
    hello_world_config.alias("aliased_greeting")()
    goodbye_config.alias("aliased_goodbye")()
Пример #24
0
def define_error_pipeline():
    return PipelineDefinition(
        name='error_pipeline',
        solids=[dm.define_dagstermill_solid('error_solid', nb_test_path('error_notebook'))],
    )
Пример #25
0
def notebook_solid(name, notebook_path, input_defs, output_defs):
    return define_dagstermill_solid(name, _notebook_path(notebook_path),
                                    input_defs, output_defs)
Пример #26
0
def no_repo_reg_solid():
    return dm.define_dagstermill_solid(
        'no_repo_reg',
        nb_test_path('no_repo_reg_error'),
        outputs=[OutputDefinition(name='df', dagster_type=ComplexDagsterType)],
    )
Пример #27
0
def define_hello_logging_solid():
    return dagstermill.define_dagstermill_solid('hello_logging',
                                                nb_test_path('hello_logging'))
Пример #28
0
def test_reserved_tags_not_overridden():
    with pytest.raises(CheckError, match="key is reserved for use by Dagster"):
        define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH, tags={"notebook_path": "~"})

    with pytest.raises(CheckError, match="key is reserved for use by Dagster"):
        define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH, tags={"kind": "py"})
Пример #29
0
 def LR_solid():
     return dagstermill.define_dagstermill_solid(
         'linear_regression',
         nb_test_path('tutorial_LR'),
         input_defs=[InputDefinition(name='df', dagster_type=DataFrame)],
     )
Пример #30
0
 def LR_solid():
     return dagstermill.define_dagstermill_solid(
         "linear_regression",
         nb_test_path("tutorial_LR"),
         input_defs=[InputDefinition(name="df", dagster_type=DataFrame)],
     )