示例#1
0
class _TaskScaffolding:
    """Helper class aggregating information about a `PipelineTask`, used when
    constructing a `QuantumGraph`.

    See `_PipelineScaffolding` for a top-down description of the full
    scaffolding data structure.

    Parameters
    ----------
    taskDef : `TaskDef`
        Data structure that identifies the task class and its config.
    parent : `_PipelineScaffolding`
        The parent data structure that will hold the instance being
        constructed.
    datasetTypes : `TaskDatasetTypes`
        Data structure that categorizes the dataset types used by this task.

    Raises
    ------
    GraphBuilderError
        Raised if the task's dimensions are not a subset of the union of the
        pipeline's dataset dimensions.
    """
    def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes):
        universe = parent.dimensions.universe
        self.taskDef = taskDef
        self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions)
        if not self.dimensions.issubset(parent.dimensions):
            raise GraphBuilderError(f"Task with label '{taskDef.label}' has dimensions "
                                    f"{self.dimensions} that are not a subset of "
                                    f"the pipeline dimensions {parent.dimensions}.")

        # Initialize _DatasetScaffoldingDicts as subsets of the one or two
        # corresponding dicts in the parent _PipelineScaffolding.
        self.initInputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initInputs,
                                                             parent.initInputs, parent.initIntermediates)
        self.initOutputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initOutputs,
                                                              parent.initIntermediates, parent.initOutputs)
        self.inputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.inputs,
                                                         parent.inputs, parent.intermediates)
        self.outputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.outputs,
                                                          parent.intermediates, parent.outputs)
        self.prerequisites = _DatasetScaffoldingDict.fromSubset(datasetTypes.prerequisites,
                                                                parent.prerequisites)
        # Add backreferences to the _DatasetScaffolding objects that point to
        # this Task.
        for dataset in itertools.chain(self.initInputs.values(), self.inputs.values(),
                                       self.prerequisites.values()):
            dataset.consumers[self.taskDef.label] = self
        for dataset in itertools.chain(self.initOutputs.values(), self.outputs.values()):
            assert dataset.producer is None
            dataset.producer = self
        self.dataIds = set()
        self.quanta = []

    taskDef: TaskDef
    """Data structure that identifies the task class and its config
    (`TaskDef`).
    """

    dimensions: DimensionGraph
    """The dimensions of a single `Quantum` of this task (`DimensionGraph`).
    """

    initInputs: _DatasetScaffoldingDict
    """Dictionary containing information about datasets used to construct this
    task (`_DatasetScaffoldingDict`).
    """

    initOutputs: _DatasetScaffoldingDict
    """Dictionary containing information about datasets produced as a
    side-effect of constructing this task (`_DatasetScaffoldingDict`).
    """

    inputs: _DatasetScaffoldingDict
    """Dictionary containing information about datasets used as regular,
    graph-constraining inputs to this task (`_DatasetScaffoldingDict`).
    """

    outputs: _DatasetScaffoldingDict
    """Dictionary containing information about datasets produced by this task
    (`_DatasetScaffoldingDict`).
    """

    prerequisites: _DatasetScaffoldingDict
    """Dictionary containing information about input datasets that must be
    present in the repository before any Pipeline containing this task is run
    (`_DatasetScaffoldingDict`).
    """

    dataIds: Set[ExpandedDataCoordinate]
    """Data IDs for all quanta for this task in the graph (`set` of
    `ExpandedDataCoordinate`).

    Populated after construction by `_PipelineScaffolding.fillDataIds`.
    """

    quanta: List[Quantum]
    """All quanta for this task in the graph (`list` of `Quantum`).

    Populated after construction by `_PipelineScaffolding.fillQuanta`.
    """

    def addQuantum(self, quantum: Quantum):
        config = self.taskDef.config
        connectionClass = config.connections.ConnectionsClass
        connectionInstance = connectionClass(config=config)
        # This will raise if one of the check conditions is not met, which is the intended
        # behavior
        result = connectionInstance.adjustQuantum(quantum.predictedInputs)
        quantum._predictedInputs = NamedKeyDict(result)

        # If this function has reached this far add the quantum
        self.quanta.append(quantum)

    def makeQuantumGraphTaskNodes(self) -> QuantumGraphTaskNodes:
        """Create a `QuantumGraphTaskNodes` instance from the information in
        ``self``.

        Returns
        -------
        nodes : `QuantumGraphTaskNodes`
            The `QuantumGraph` elements corresponding to this task.
        """
        return QuantumGraphTaskNodes(
            taskDef=self.taskDef,
            quanta=self.quanta,
            initInputs=self.initInputs.unpackRefs(),
            initOutputs=self.initOutputs.unpackRefs(),
        )
示例#2
0
class _TaskScaffolding:
    """Helper class aggregating information about a `PipelineTask`, used when
    constructing a `QuantumGraph`.

    See `_PipelineScaffolding` for a top-down description of the full
    scaffolding data structure.

    Parameters
    ----------
    taskDef : `TaskDef`
        Data structure that identifies the task class and its config.
    parent : `_PipelineScaffolding`
        The parent data structure that will hold the instance being
        constructed.
    datasetTypes : `TaskDatasetTypes`
        Data structure that categorizes the dataset types used by this task.
    """
    def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding,
                 datasetTypes: TaskDatasetTypes):
        universe = parent.dimensions.universe
        self.taskDef = taskDef
        self.dimensions = DimensionGraph(universe,
                                         names=taskDef.connections.dimensions)
        assert self.dimensions.issubset(parent.dimensions)
        # Initialize _DatasetDicts as subsets of the one or two
        # corresponding dicts in the parent _PipelineScaffolding.
        self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs,
                                                  parent.initInputs,
                                                  parent.initIntermediates)
        self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs,
                                                   parent.initIntermediates,
                                                   parent.initOutputs)
        self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs,
                                              parent.inputs,
                                              parent.intermediates)
        self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs,
                                               parent.intermediates,
                                               parent.outputs)
        self.prerequisites = _DatasetDict.fromSubset(
            datasetTypes.prerequisites, parent.prerequisites)
        self.dataIds = set()
        self.quanta = {}

    def __repr__(self):
        # Default dataclass-injected __repr__ gets caught in an infinite loop
        # because of back-references.
        return f"_TaskScaffolding(taskDef={self.taskDef}, ...)"

    taskDef: TaskDef
    """Data structure that identifies the task class and its config
    (`TaskDef`).
    """

    dimensions: DimensionGraph
    """The dimensions of a single `Quantum` of this task (`DimensionGraph`).
    """

    initInputs: _DatasetDict
    """Dictionary containing information about datasets used to construct this
    task (`_DatasetDict`).
    """

    initOutputs: _DatasetDict
    """Dictionary containing information about datasets produced as a
    side-effect of constructing this task (`_DatasetDict`).
    """

    inputs: _DatasetDict
    """Dictionary containing information about datasets used as regular,
    graph-constraining inputs to this task (`_DatasetDict`).
    """

    outputs: _DatasetDict
    """Dictionary containing information about datasets produced by this task
    (`_DatasetDict`).
    """

    prerequisites: _DatasetDict
    """Dictionary containing information about input datasets that must be
    present in the repository before any Pipeline containing this task is run
    (`_DatasetDict`).
    """

    quanta: Dict[DataCoordinate, _QuantumScaffolding]
    """Dictionary mapping data ID to a scaffolding object for the Quantum of
    this task with that data ID.
    """

    def makeQuantumSet(self) -> Set[Quantum]:
        """Create a `set` of `Quantum` from the information in ``self``.

        Returns
        -------
        nodes : `set` of `Quantum
            The `Quantum` elements corresponding to this task.
        """
        return set(q.makeQuantum() for q in self.quanta.values())