Exemplo n.º 1
0
 def _createEitherOrStep(self, inputs: Tuple[StepInformation], pipeline):
     for input_step in inputs:
         input_step.step.last = False
     step = EitherOrStep({x.step.name + f"{i}": x.step for i, x in enumerate(inputs)})
     step_id = pipeline.add(module=step,
                            input_ids=list(map(lambda x: x.step.id, inputs)))
     step.id = step_id
     return StepInformation(step, pipeline)
Exemplo n.º 2
0
 def __getitem__(self, item: str):
     """
     Returns the step_information for the start step corresponding to the item
     """
     if item not in self.start_steps.keys():
         start_step = StartStep(item)
         self.start_steps[item] = start_step, StepInformation(
             step=start_step, pipeline=self)
         start_step.id = self.add(module=start_step,
                                  input_ids=[],
                                  target_ids=[])
     return self.start_steps[item][-1]
Exemplo n.º 3
0
    def from_folder(load_path, file_manager_path=None):
        """
        Loads the pipeline from the pipeline.json in the specified folder
        .. warning::
            Sometimes from_folder use unpickle for loading modules. Note that this is not safe.
            Consequently, load only pipelines you trust with `from_folder`.
            For more details about pickling see https://docs.python.org/3/library/pickle.html


        :param load_path: path to the pipeline.json
        :type load_path: str
        :param file_manager_path: path for the results and outputs
        :type file_manager_path: str
        """
        if not os.path.isdir(load_path):
            logger.error("Path %s for loading pipeline does not exist",
                         load_path)
            raise IOException(
                f"Path {load_path} does not exist"
                f"Check the path which you passed to the from_folder method.")

        # load json file
        file_path = os.path.join(load_path, 'pipeline.json')
        with open(file_path, 'r') as outfile:
            json_dict = json.load(outfile)

        # load general pipeline config
        if file_manager_path is None:
            file_manager_path = json_dict.get('path', ".")

        batch = pd.Timedelta(
            json_dict.get("batch")) if json_dict.get("batch") else None

        pipeline = Pipeline(file_manager_path, batch)
        # 1. load all modules
        modules = {
        }  # create a dict of all modules with their id from the json
        for i, json_module in enumerate(json_dict["modules"]):
            modules[i] = pipeline._load_modules(json_module)

        # 2. Load all steps
        for step in json_dict["steps"]:
            step = pipeline._load_step(modules, step)
            pipeline.id_to_step[step.id] = step

        pipeline.start_steps = {
            element.index:
            (element, StepInformation(step=element, pipeline=pipeline))
            for element in filter(lambda x: isinstance(x, StartStep),
                                  pipeline.id_to_step.values())
        }

        return pipeline
Exemplo n.º 4
0
    def create_step(self,
                    module: Base,
                    kwargs: Dict[str, Union[StepInformation, Tuple[StepInformation, ...]]],
                    use_inverse_transform: bool, use_predict_proba: bool,
                    callbacks: List[Union[BaseCallback, Callable[[Dict[str, xr.DataArray]], None]]],
                    condition,
                    batch_size,
                    computation_mode,
                    train_if):
        """
        Creates a appropriate step for the current situation.

        :param module: The module which should be added to the pipeline
        :param kwargs: The input steps for the current step
        :param targets: The target steps for the currrent step
        :param use_inverse_transform: Should inverse_transform be called instead of transform
        :param use_predict_proba: Should probabilistic_transform be called instead of transform
        :param callbacks: Callbacks to use after results are processed.
        :param condition: A function returning True or False which indicates if the step should be performed
        :param batch_size: The size of the past time range which should be used for relearning the module
        :param computation_mode: The computation mode of the step
        :param train_if: A method for determining if the step should be fitted at a specific timestamp.
        :return: StepInformation
        """

        arguments = inspect.signature(module.transform).parameters.keys()

        if "kwargs" not in arguments and not isinstance(module, Pipeline):
            for argument in arguments:
                if argument not in kwargs.keys():
                    raise StepCreationException(
                        f"The module {module.name} miss {argument} as input. The module needs {arguments} as input. "
                        f"{kwargs} are given as input."
                        f"Add {argument}=<desired_input> when adding {module.name} to the pipeline.",
                        module
                    )

        # TODO needs to check that inputs are unambigious -> I.e. check that each input has only one output
        pipeline = self._check_ins(kwargs)

        input_steps, target_steps = self._split_input_target_steps(kwargs, pipeline)

        if isinstance(module, Pipeline):
            step = PipelineStep(module, input_steps, pipeline.file_manager, targets=target_steps,
                                callbacks=callbacks, computation_mode=computation_mode, condition=condition,
                                batch_size=batch_size, train_if=train_if)
        elif use_inverse_transform:
            step = InverseStep(module, input_steps, pipeline.file_manager, targets=target_steps,
                               callbacks=callbacks, computation_mode=computation_mode, condition=condition)
        elif use_predict_proba:
            step = ProbablisticStep(module, input_steps, pipeline.file_manager, targets=target_steps,
                                    callbacks=callbacks, computation_mode=computation_mode, condition=condition)
        else:
            step = Step(module, input_steps, pipeline.file_manager, targets=target_steps,
                        callbacks=callbacks, computation_mode=computation_mode, condition=condition,
                        batch_size=batch_size, train_if=train_if)

        step_id = pipeline.add(module=step,
                               input_ids=[step.id for step in input_steps.values()],
                               target_ids=[step.id for step in target_steps.values()])
        step.id = step_id

        if len(target_steps) > 1:
            step.last = False
            for target in target_steps:
                r_step = step.get_result_step(target)
                r_id = pipeline.add(module=step, input_ids=[step_id])
                r_step.id = r_id

        return StepInformation(step, pipeline)