示例#1
0
def test_cache_historical_state_using(data_setup):
    cache = OperationsCache()
    train, _ = data_setup
    chain = chain_first()

    # chain fitted, model goes to cache
    chain.fit(input_data=train)
    cache.save_chain(chain)
    new_node = SecondaryNode(operation_type='logit')
    old_node = chain.root_node.nodes_from[0]

    # change child node to new one
    chain.update_node(old_node=old_node, new_node=new_node)
    # cache is not actual
    assert not cache.get(chain.root_node)
    # fit modified chain
    chain.fit(input_data=train)
    cache.save_chain(chain)
    # cache is actual now
    assert cache.get(chain.root_node)

    # change node back
    chain.update_node(old_node=chain.root_node.nodes_from[0],
                      new_node=old_node)
    # cache is actual without new fitting,
    # because the cached model was saved after first fit
    assert cache.get(chain.root_node)
示例#2
0
def test_cache_actuality_after_model_change(data_setup):
    """The non-affected nodes has actual cache after changing the model"""

    cache = OperationsCache()

    chain = chain_first()
    train, _ = data_setup
    chain.fit(input_data=train)
    cache.save_chain(chain)
    new_node = SecondaryNode(operation_type='logit')
    chain.update_node(old_node=chain.root_node.nodes_from[0],
                      new_node=new_node)

    root_parent_first = chain.root_node.nodes_from[0]

    nodes_with_non_actual_cache = [chain.root_node, root_parent_first]
    nodes_with_actual_cache = [
        node for node in chain.nodes if node not in nodes_with_non_actual_cache
    ]

    # non-affected nodes are actual
    assert all(
        [cache.get(node) is not None for node in nodes_with_actual_cache])
    # affected nodes and their childs has no any actual cache
    assert all(
        [cache.get(node) is None for node in nodes_with_non_actual_cache])
示例#3
0
def test_cache_actuality_after_primary_node_changed_to_subtree(data_setup):
    """ The non-affected nodes has actual cache after changing the primary node to pre-fitted subtree"""
    cache = OperationsCache()
    train, _ = data_setup
    chain = chain_first()
    other_chain = chain_second()
    chain.fit(input_data=train)
    cache.save_chain(chain)
    other_chain.fit(input_data=train)
    chain.update_subtree(chain.root_node.nodes_from[0].nodes_from[0],
                         other_chain.root_node.nodes_from[0])
    cache.save_chain(Chain(other_chain.root_node.nodes_from[0]))
    root_parent_first = chain.root_node.nodes_from[0]

    nodes_with_non_actual_cache = [chain.root_node, root_parent_first]
    nodes_with_actual_cache = [
        node for node in chain.nodes if node not in nodes_with_non_actual_cache
    ]

    # non-affected nodes of initial chain and fitted nodes of new subtree are actual
    assert all([cache.get(node) for node in nodes_with_actual_cache])
    # affected root nodes and their childs has no any actual cache
    assert not any([cache.get(node) for node in nodes_with_non_actual_cache])
示例#4
0
def test_cache_actuality_after_subtree_change_to_identical(data_setup):
    """The non-affected nodes has actual cache after changing the subtree to other pre-fitted subtree"""
    cache = OperationsCache()
    train, _ = data_setup
    chain = chain_first()
    other_chain = chain_second()
    chain.fit(input_data=train)
    cache.save_chain(chain)
    other_chain.fit(input_data=train)
    cache.save_chain(Chain(other_chain.root_node.nodes_from[0]))

    chain.update_subtree(chain.root_node.nodes_from[0],
                         other_chain.root_node.nodes_from[0])

    nodes_with_actual_cache = [
        node for node in chain.nodes if node not in [chain.root_node]
    ]

    # non-affected nodes of initial chain and fitted nodes of new subtree are actual
    assert all(
        [cache.get(node) is not None for node in nodes_with_actual_cache])
    # affected root node has no any actual cache
    assert cache.get(chain.root_node) is None
示例#5
0
def test_multi_chain_caching_with_cache(data_setup):
    train, _ = data_setup
    cache = OperationsCache()

    main_chain = chain_second()
    other_chain = chain_first()

    # fit other_chain that contains the parts identical to main_chain
    other_chain.fit(input_data=train)
    cache.save_chain(other_chain)

    nodes_with_non_actual_cache = [main_chain.root_node, main_chain.root_node.nodes_from[0]] + \
                                  [_ for _ in main_chain.root_node.nodes_from[0].nodes_from]
    nodes_with_actual_cache = [
        node for node in main_chain.nodes
        if node not in nodes_with_non_actual_cache
    ]

    # check that using of other_chain make identical of the main_chain fitted,
    # despite the main_chain.fit() was not called
    assert all([cache.get(node) for node in nodes_with_actual_cache])
    # the non-identical parts are still not fitted
    assert not any([cache.get(node) for node in nodes_with_non_actual_cache])

    # check the same case with another chains
    cache = OperationsCache()

    main_chain = chain_fourth()

    prev_chain_first = chain_third()
    prev_chain_second = chain_fifth()

    prev_chain_first.fit(input_data=train)
    cache.save_chain(prev_chain_first)
    prev_chain_second.fit(input_data=train)
    cache.save_chain(prev_chain_second)

    nodes_with_non_actual_cache = [
        main_chain.root_node, main_chain.root_node.nodes_from[1]
    ]
    nodes_with_actual_cache = [
        child for child in main_chain.root_node.nodes_from[0].nodes_from
    ]

    assert not any([cache.get(node) for node in nodes_with_non_actual_cache])
    assert all([cache.get(node) for node in nodes_with_actual_cache])
示例#6
0
class GPComposer(Composer):
    """
    Genetic programming based composer
    :param optimiser: optimiser generated in GPComposerBuilder
    :param metrics: metrics used to define the quality of found solution.
    :param composer_requirements: requirements for composition process
    :param initial_chain: defines the initial state of the population. If None then initial population is random.
    """
    def __init__(
            self,
            optimiser=None,
            composer_requirements: Optional[GPComposerRequirements] = None,
            metrics: Union[List[MetricsEnum], MetricsEnum] = None,
            initial_chain: Optional[Chain] = None,
            logger: Log = None):

        super().__init__(metrics=metrics,
                         composer_requirements=composer_requirements,
                         initial_chain=initial_chain)

        self.cache = OperationsCache()

        self.optimiser = optimiser
        self.cache_path = None
        self.use_existing_cache = False

        if not logger:
            self.log = default_log(__name__)
        else:
            self.log = logger

    def compose_chain(
        self,
        data: InputData,
        is_visualise: bool = False,
        is_tune: bool = False,
        on_next_iteration_callback: Optional[Callable] = None
    ) -> Union[Chain, List[Chain]]:
        """ Function for optimal chain structure searching

        :param data: InputData for chain composing
        :param is_visualise: is it needed to visualise
        :param is_tune: is it needed to tune chain after composing TODO integrate new tuner
        :param on_next_iteration_callback: TODO add description

        :return best_chain: obtained result after composing: one chain for single-objective optimization;
            For the multi-objective case, the list of the chain is returned.
            In the list, the chains are ordered by the descending of primary metric (the first is the best)
        """

        if self.composer_requirements.max_chain_fit_time:
            set_multiprocess_start_method()

        if not self.optimiser:
            raise AttributeError(
                f'Optimiser for chain composition is not defined')

        train_data, test_data = train_test_data_setup(
            data, sample_split_ration_for_tasks[data.task.task_type])
        if self.cache_path is None:
            self.cache.clear()
        else:
            self.cache = OperationsCache(
                self.cache_path, clear_exiting=not self.use_existing_cache)

        metric_function_for_nodes = partial(self.composer_metric, self.metrics,
                                            train_data, test_data)

        best_chain = self.optimiser.optimise(
            metric_function_for_nodes,
            on_next_iteration_callback=on_next_iteration_callback)

        self.log.info('GP composition finished')

        if is_tune:
            self.tune_chain(best_chain, data,
                            self.composer_requirements.max_lead_time)
        return best_chain

    def composer_metric(self, metrics, train_data: InputData,
                        test_data: InputData,
                        chain: Chain) -> Optional[Tuple[Any]]:
        try:
            validate(chain)
            chain.log = self.log

            if type(metrics) is not list:
                metrics = [metrics]

            if self.cache is not None:
                # TODO improve cache
                chain.fit_from_cache(self.cache)

            if not chain.is_fitted:
                self.log.debug(
                    f'Chain {chain.root_node.descriptive_id} fit started')
                chain.fit(input_data=train_data,
                          time_constraint=self.composer_requirements.
                          max_chain_fit_time)
                self.cache.save_chain(chain)

            evaluated_metrics = ()
            for metric in metrics:
                if callable(metric):
                    metric_func = metric
                else:
                    metric_func = MetricsRepository().metric_by_id(metric)
                evaluated_metrics = evaluated_metrics + (metric_func(
                    chain, reference_data=test_data), )

            self.log.debug(
                f'Chain {chain.root_node.descriptive_id} with metrics: {list(evaluated_metrics)}'
            )

        except Exception as ex:
            self.log.info(f'Chain assessment warning: {ex}. Continue.')
            evaluated_metrics = None

        return evaluated_metrics

    @staticmethod
    def tune_chain(chain: Chain, data: InputData, time_limit):
        raise NotImplementedError()

    @property
    def history(self):
        return self.optimiser.history