Пример #1
0
    def launch_step(
        self,
        step_context: StepExecutionContext,
        prior_attempts_count: int,
    ) -> Iterator[DagsterEvent]:
        step_run_ref = step_context_to_step_run_ref(step_context, prior_attempts_count)
        run_id = step_context.pipeline_run.run_id

        step_run_dir = os.path.join(self.scratch_dir, run_id, step_run_ref.step_key)
        os.makedirs(step_run_dir)

        step_run_ref_file_path = os.path.join(step_run_dir, PICKLED_STEP_RUN_REF_FILE_NAME)
        with open(step_run_ref_file_path, "wb") as step_pickle_file:
            pickle.dump(step_run_ref, step_pickle_file)

        command_tokens = [
            sys.executable,
            "-m",
            "dagster.core.execution.plan.local_external_step_main",
            step_run_ref_file_path,
        ]
        # If this is being called within a `capture_interrupts` context, allow interrupts
        # while waiting for the subprocess to complete, so that we can terminate slow or
        # hanging steps
        with raise_execution_interrupts():
            subprocess.call(command_tokens, stdout=sys.stdout, stderr=sys.stderr)

        events_file_path = os.path.join(step_run_dir, PICKLED_EVENTS_FILE_NAME)
        file_manager = LocalFileManager(".")
        events_file_handle = LocalFileHandle(events_file_path)
        events_data = file_manager.read_data(events_file_handle)
        events = pickle.loads(events_data)

        yield from events
Пример #2
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(step_context,
                                                    prior_attempts_count)
        run_id = step_context.pipeline_run.run_id

        step_run_dir = os.path.join(self.scratch_dir, run_id,
                                    step_run_ref.step_key)
        os.makedirs(step_run_dir)

        step_run_ref_file_path = os.path.join(step_run_dir,
                                              PICKLED_STEP_RUN_REF_FILE_NAME)
        with open(step_run_ref_file_path, 'wb') as step_pickle_file:
            pickle.dump(step_run_ref, step_pickle_file)

        command_tokens = [
            'python',
            '-m',
            'dagster.core.execution.plan.local_external_step_main',
            step_run_ref_file_path,
        ]
        subprocess.call(command_tokens, stdout=sys.stdout, stderr=sys.stderr)

        events_file_path = os.path.join(step_run_dir, PICKLED_EVENTS_FILE_NAME)
        file_manager = LocalFileManager('.')
        events_file_handle = LocalFileHandle(events_file_path)
        events_data = file_manager.read_data(events_file_handle)
        events = pickle.loads(events_data)

        for event in events:
            yield event
Пример #3
0
def main(step_run_ref_path):
    file_manager = LocalFileManager(".")
    file_handle = LocalFileHandle(step_run_ref_path)
    step_run_ref = pickle.loads(file_manager.read_data(file_handle))

    events = list(run_step_from_ref(step_run_ref))
    events_out_path = os.path.join(os.path.dirname(step_run_ref_path),
                                   PICKLED_EVENTS_FILE_NAME)
    with open(events_out_path, "wb") as events_file:
        pickle.dump(events, events_file)
Пример #4
0
def main(step_run_ref_path: str) -> None:
    file_manager = LocalFileManager(".")
    file_handle = LocalFileHandle(step_run_ref_path)
    step_run_ref = pickle.loads(file_manager.read_data(file_handle))

    with DagsterInstance.ephemeral() as instance:
        events = list(run_step_from_ref(step_run_ref, instance))
        events_out_path = os.path.join(os.path.dirname(step_run_ref_path),
                                       PICKLED_EVENTS_FILE_NAME)
        with open(events_out_path, "wb") as events_file:
            pickle.dump(events, events_file)
Пример #5
0
def create_test_pipeline_execution_context(logger_defs=None):
    loggers = check.opt_dict_param(
        logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solid_defs=[], mode_defs=[mode_def]
    )
    environment_dict = {'loggers': {key: {} for key in loggers}}
    pipeline_run = PipelineRun(
        pipeline_name='test_legacy_context', environment_dict=environment_dict
    )
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, environment_dict=environment_dict)
    creation_data = create_context_creation_data(
        pipeline_def, environment_dict, pipeline_run, instance, execution_plan
    )
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor_config = create_executor_config(creation_data)
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_instance(instance, pipeline_run.run_id),
        ),
        log_manager=log_manager,
        executor_config=executor_config,
        raise_on_error=True,
    )
Пример #6
0
def create_mem_system_storage_data(init_context):
    return SystemStorageData(
        run_storage=InMemoryRunStorage(),
        intermediates_manager=InMemoryIntermediatesManager(),
        file_manager=LocalFileManager.for_run_id(
            init_context.run_config.run_id),
    )
Пример #7
0
def create_test_pipeline_execution_context(logger_defs=None):
    from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage

    loggers = check.opt_dict_param(
        logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name="test_legacy_context", solid_defs=[], mode_defs=[mode_def]
    )
    run_config = {"loggers": {key: {} for key in loggers}}
    pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config)
    creation_data = create_context_creation_data(execution_plan, run_config, pipeline_run, instance)
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor = create_executor(creation_data)

    return SystemPipelineExecutionContext(
        construct_execution_context_data(
            context_creation_data=creation_data,
            scoped_resources_builder=scoped_resources_builder,
            intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id),
            system_storage_data=SystemStorageData(
                intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id),
                file_manager=LocalFileManager.for_instance(instance, pipeline_run.run_id),
            ),
            log_manager=log_manager,
            retries=executor.retries,
            raise_on_error=True,
        ),
        executor=executor,
        log_manager=log_manager,
    )
Пример #8
0
def create_test_pipeline_execution_context(
    logger_defs=None, scoped_resources_builder=None, tags=None, run_config_loggers=None
):
    run_id = str(uuid.uuid4())
    loggers = check.opt_dict_param(
        logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solid_defs=[], mode_defs=[mode_def]
    )
    run_config_loggers = check.opt_list_param(
        run_config_loggers, 'run_config_loggers', of_type=logging.Logger
    )
    run_config = RunConfig(run_id, tags=tags, loggers=run_config_loggers)
    environment_dict = {'loggers': {key: {} for key in loggers}}
    creation_data = create_context_creation_data(pipeline_def, environment_dict, run_config)
    log_manager = create_log_manager(creation_data)

    scoped_resources_builder = check.opt_inst_param(
        scoped_resources_builder,
        'scoped_resources_builder',
        ScopedResourcesBuilder,
        default=ScopedResourcesBuilder(),
    )
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            run_storage=InMemoryRunStorage(),
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_run_id(run_id),
        ),
        log_manager=log_manager,
    )
def my_local_file_manager(instance, run_id):
    manager = None
    try:
        manager = LocalFileManager.for_instance(instance, run_id)
        yield manager
    finally:
        if manager:
            manager.delete_local_temp()
Пример #10
0
def main(step_run_ref_path: str) -> None:
    file_manager = LocalFileManager(".")
    file_handle = LocalFileHandle(step_run_ref_path)
    step_run_ref = pickle.loads(file_manager.read_data(file_handle))

    all_events: List[EventLogEntry] = []

    try:
        instance = external_instance_from_step_run_ref(
            step_run_ref, event_listener_fn=all_events.append)
        # consume entire step iterator
        list(run_step_from_ref(step_run_ref, instance))
    finally:
        events_out_path = os.path.join(os.path.dirname(step_run_ref_path),
                                       PICKLED_EVENTS_FILE_NAME)
        with open(events_out_path, "wb") as events_file:
            pickle.dump(serialize_value(all_events), events_file)
Пример #11
0
def fs_system_storage(init_context):
    base_dir = init_context.system_storage_config.get('base_dir')
    return SystemStorageData(
        file_manager=LocalFileManager.for_run_id(init_context.run_config.run_id),
        run_storage=FileSystemRunStorage(base_dir=base_dir),
        intermediates_manager=IntermediateStoreIntermediatesManager(
            FileSystemIntermediateStore(
                run_id=init_context.run_config.run_id,
                type_storage_plugin_registry=init_context.type_storage_plugin_registry,
                base_dir=base_dir,
            )
        ),
    )
Пример #12
0
def create_test_pipeline_execution_context(logger_defs=None,
                                           scoped_resources_builder=None,
                                           tags=None):
    run_id = str(uuid.uuid4())
    loggers = check.opt_dict_param(logger_defs,
                                   'logger_defs',
                                   key_type=str,
                                   value_type=LoggerDefinition)
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(name='test_legacy_context',
                                      solid_defs=[],
                                      mode_defs=[mode_def])
    run_config = RunConfig(run_id, tags=tags)
    environment_dict = {'loggers': {key: {} for key in loggers}}
    instance = DagsterInstance.ephemeral()
    creation_data = create_context_creation_data(pipeline_def,
                                                 environment_dict, run_config,
                                                 instance)
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = check.opt_inst_param(
        scoped_resources_builder,
        'scoped_resources_builder',
        ScopedResourcesBuilder,
        default=ScopedResourcesBuilder(),
    )
    executor_config = create_executor_config(creation_data)
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_instance(instance, run_id),
        ),
        log_manager=log_manager,
        executor_config=executor_config,
        raise_on_error=True,
    )