Пример #1
0
        def get_run():
            candidate_run = self.get_run_by_id(pipeline_run.run_id)

            field_diff = _check_run_equality(pipeline_run, candidate_run)

            if field_diff:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id {run_id}. Runs differ in:'
                    '\n{field_diff}'.format(
                        run_id=pipeline_run.run_id,
                        field_diff=_format_field_diff(field_diff),
                    ), )
            return candidate_run
Пример #2
0
    def get_or_create_run(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        solid_subset=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):
        # The last usage of this method is in dagster-airflow. When the usage is removed, this method
        # should be deleted.
        # https://github.com/dagster-io/dagster/issues/2412

        pipeline_run = self._construct_run_with_snapshots(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_snapshot,
            execution_plan_snapshot=execution_plan_snapshot,
        )

        if self.has_run(pipeline_run.run_id):
            candidate_run = self.get_run_by_id(pipeline_run.run_id)

            field_diff = _check_run_equality(pipeline_run, candidate_run)

            if field_diff:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id {run_id}. Runs differ in:'
                    '\n{field_diff}'.format(
                        run_id=pipeline_run.run_id,
                        field_diff=_format_field_diff(field_diff),
                    ), )
            return candidate_run

        return self._run_storage.add_run(pipeline_run)
Пример #3
0
    def get_or_create_run(self, pipeline_run):
        # This eventually needs transactional/locking semantics
        if self.has_run(pipeline_run.run_id):
            candidate_run = self.get_run_by_id(pipeline_run.run_id)
            if not candidate_run == pipeline_run:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id. Expected {pipeline_run}, found {candidate_run}.'
                    .format(pipeline_run=pipeline_run,
                            candidate_run=candidate_run))
            return candidate_run
        else:
            # We will need a more principled way of doing this
            try:
                return self.create_run(pipeline_run)
            except DagsterRunAlreadyExists:
                if not self.has_run(pipeline_run.run_id):
                    check.failed(
                        'Inconsistent run storage: could not get or create pipeline run with run_id {run_id}'
                        .format(run_id=pipeline_run.run_id))

                return self.get_run_by_id(pipeline_run.run_id)
Пример #4
0
    def get_or_create_run(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):

        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            selector=selector,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)

            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)

                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        if self.has_run(pipeline_run.run_id):
            candidate_run = self.get_run_by_id(pipeline_run.run_id)

            field_diff = _check_run_equality(pipeline_run, candidate_run)

            if field_diff:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id {run_id}. Runs differ in:'
                    '\n{field_diff}'.format(
                        run_id=pipeline_run.run_id,
                        field_diff=_format_field_diff(field_diff),
                    ), )
            return candidate_run

        return self._run_storage.add_run(pipeline_run)