示例#1
0
def generate_history(generations, pop_size):
    history = OptHistory()
    converter = GraphGenerationParams().adapter
    for gen in range(generations):
        new_pop = []
        for idx in range(pop_size):
            pipeline = pipeline_first()
            ind = Individual(converter.adapt(pipeline))
            ind.fitness = 1 / (gen * idx + 1)
            new_pop.append(ind)
        history.add_to_history(new_pop)
    return history
示例#2
0
    def build(self) -> Composer:
        optimiser_type = GPGraphOptimiser
        if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free:
            optimiser_type = GPGraphParameterFreeOptimiser

        graph_generation_params = GraphGenerationParams(
            adapter=PipelineAdapter(self._composer.log),
            advisor=PipelineChangeAdvisor())

        archive_type = None
        if len(self._composer.metrics) > 1:
            archive_type = tools.ParetoFront()
            # TODO add possibility of using regularization in MO alg
            self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none
            self.optimiser_parameters.multi_objective = True

        if self.optimiser_parameters.mutation_types is None:
            self.optimiser_parameters.mutation_types = [
                boosting_mutation, parameter_change_mutation,
                single_edge_mutation, single_change_mutation,
                single_drop_mutation, single_add_mutation
            ]

        optimiser = optimiser_type(
            initial_graph=self._composer.initial_pipeline,
            requirements=self._composer.composer_requirements,
            graph_generation_params=graph_generation_params,
            parameters=self.optimiser_parameters,
            log=self._composer.log,
            archive_type=archive_type,
            metrics=self._composer.metrics)

        self._composer.optimiser = optimiser

        return self._composer
示例#3
0
def test_drop_mutation_for_linear_graph():
    """
    Tests single_drop mutation can remove node
    """

    linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')]))

    linear_one_node = OptGraph(OptNode('logit'))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_drop = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_drop],
                                        params=graph_params,
                                        ind=Individual(linear_two_nodes),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_drop:
            successful_mutation_drop = \
                graph_after_mutation.root_node.descriptive_id == linear_one_node.root_node.descriptive_id
        else:
            break
    assert successful_mutation_drop
示例#4
0
def test_edge_mutation_for_graph():
    """
    Tests edge mutation can add edge between nodes
    """
    graph_without_edge = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])]))

    primary = OptNode('scaling')
    graph_with_edge = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [primary]), primary]))

    composer_requirements = GPComposerRequirements(
        primary=['scaling', 'one_hot_encoding'],
        secondary=['logit', 'scaling'],
        mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_edge = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_edge],
                                        params=graph_params,
                                        ind=Individual(graph_without_edge),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=graph_with_edge.depth).graph
        if not successful_mutation_edge:
            successful_mutation_edge = \
                graph_after_mutation.root_node.descriptive_id == graph_with_edge.root_node.descriptive_id
        else:
            break
    assert successful_mutation_edge
示例#5
0
def test_intermediate_add_mutation_for_linear_graph():
    """
    Tests single_add mutation can add node between two existing nodes
    """

    linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')]))
    linear_three_nodes_inner = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])]))

    composer_requirements = GPComposerRequirements(
        primary=['scaling'], secondary=['one_hot_encoding'], mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_inner = False

    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_add],
                                        params=graph_params,
                                        ind=Individual(linear_two_nodes),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=3).graph
        if not successful_mutation_inner:
            successful_mutation_inner = \
                graph_after_mutation.root_node.descriptive_id == linear_three_nodes_inner.root_node.descriptive_id
        else:
            break

    assert successful_mutation_inner
示例#6
0
def test_evaluate_individuals():
    project_root_path = str(fedot_project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    pipelines_to_evaluate = [
        pipeline_first(),
        pipeline_second(),
        pipeline_third(),
        pipeline_fourth()
    ]

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ratio_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    adapter = PipelineAdapter()
    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=0.001)
    params = GraphGenerationParams(adapter=PipelineAdapter(),
                                   advisor=PipelineChangeAdvisor())
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=5)
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
示例#7
0
def test_selection():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())

    selected_individuals = selection(types=[SelectionTypesEnum.tournament],
                                     population=population,
                                     pop_size=num_of_inds,
                                     params=graph_params)
    assert (all([ind in population for ind in selected_individuals])
            and len(selected_individuals) == num_of_inds)
示例#8
0
def test_individuals_selection_random_individuals():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    types = [SelectionTypesEnum.tournament]
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())
    selected_individuals = individuals_selection(types=types,
                                                 individuals=population,
                                                 pop_size=num_of_inds,
                                                 graph_params=graph_params)
    selected_individuals_ref = [str(ind) for ind in selected_individuals]
    assert (len(set(selected_individuals_ref)) == len(selected_individuals)
            and len(selected_individuals) == num_of_inds)
示例#9
0
def rand_population_gener_and_eval(pop_size=4):
    models_set = ['knn', 'logit', 'rf']
    requirements = GPComposerRequirements(primary=models_set,
                                          secondary=models_set,
                                          max_depth=1)
    pipeline_gener_params = GraphGenerationParams(
        advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter())
    random_pipeline_function = partial(random_graph,
                                       params=pipeline_gener_params,
                                       requirements=requirements)
    population = [
        Individual(random_pipeline_function()) for _ in range(pop_size)
    ]
    # evaluation
    for ind in population:
        ind.fitness = obj_function()
    return population
示例#10
0
def test_boosting_mutation_for_linear_graph():
    """
    Tests boosting mutation can add correct boosting cascade
    """

    linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')]))

    init_node = OptNode('scaling')
    model_node = OptNode('knn', [init_node])

    boosting_graph = \
        OptGraph(
            OptNode('logit',
                    [model_node, OptNode('linear',
                                         [OptNode('class_decompose',
                                                  [model_node, init_node])])]))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=PipelineAdapter(),
        advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)),
        rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_boosting = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[boosting_mutation],
                                        params=graph_params,
                                        ind=Individual(linear_one_node),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_boosting:
            successful_mutation_boosting = \
                graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id
        else:
            break
    assert successful_mutation_boosting

    # check that obtained pipeline can be fitted
    pipeline = PipelineAdapter().restore(graph_after_mutation)
    data = file_data()
    pipeline.fit(data)
    result = pipeline.predict(data)
    assert result is not None
示例#11
0
def run_custom_example(
        timeout: datetime.timedelta = datetime.timedelta(minutes=0.2)):
    data = pd.read_csv(
        os.path.join(fedot_project_root(), 'examples', 'data',
                     'custom_encoded.csv'))
    nodes_types = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10']
    rules = [has_no_self_cycled_nodes, has_no_cycle, _has_no_duplicates]

    initial = CustomGraphModel(nodes=[
        CustomGraphNode(nodes_from=None, content=node_type)
        for node_type in nodes_types
    ])

    requirements = GPComposerRequirements(primary=nodes_types,
                                          secondary=nodes_types,
                                          max_arity=10,
                                          max_depth=10,
                                          pop_size=5,
                                          num_of_generations=5,
                                          crossover_prob=0.8,
                                          mutation_prob=0.9,
                                          timeout=timeout)

    optimiser_parameters = GPGraphOptimiserParameters(
        genetic_scheme_type=GeneticSchemeTypesEnum.steady_state,
        mutation_types=[custom_mutation],
        crossover_types=[CrossoverTypesEnum.none],
        regularization_type=RegularizationTypesEnum.none)

    graph_generation_params = GraphGenerationParams(adapter=DirectAdapter(
        base_graph_class=CustomGraphModel, base_node_class=CustomGraphNode),
                                                    rules_for_constraint=rules)

    optimizer = GPGraphOptimiser(
        graph_generation_params=graph_generation_params,
        metrics=[],
        parameters=optimiser_parameters,
        requirements=requirements,
        initial_graph=initial,
        log=default_log(logger_name='Bayesian', verbose_level=1))

    optimized_network = optimizer.optimise(partial(custom_metric, data=data))

    optimized_network.show()
示例#12
0
def test_mutation():
    adapter = PipelineAdapter()
    ind = Individual(adapter.adapt(pipeline_first()))
    mutation_types = [MutationTypesEnum.none]
    log = default_log(__name__)
    graph_gener_params = GraphGenerationParams()
    task = Task(TaskTypesEnum.classification)
    primary_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)
    secondary_model_types = ['xgboost', 'knn', 'lda', 'qda']
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=1)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    mutation_types = [MutationTypesEnum.growth]
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=0)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    ind = Individual(adapter.adapt(pipeline_fifth()))
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
示例#13
0
def test_custom_graph_opt():
    nodes_types = ['A', 'B', 'C', 'D']
    rules = [has_no_self_cycled_nodes]

    requirements = GPComposerRequirements(primary=nodes_types,
                                          secondary=nodes_types,
                                          max_arity=3,
                                          max_depth=3,
                                          pop_size=5,
                                          num_of_generations=5,
                                          crossover_prob=0.8,
                                          mutation_prob=0.9)

    optimiser_parameters = GPGraphOptimiserParameters(
        genetic_scheme_type=GeneticSchemeTypesEnum.steady_state,
        mutation_types=[
            MutationTypesEnum.simple, MutationTypesEnum.reduce,
            MutationTypesEnum.growth, MutationTypesEnum.local_growth
        ],
        regularization_type=RegularizationTypesEnum.none)

    graph_generation_params = GraphGenerationParams(adapter=DirectAdapter(
        CustomModel, CustomNode),
                                                    rules_for_constraint=rules)

    optimizer = GPGraphOptimiser(
        graph_generation_params=graph_generation_params,
        metrics=[],
        parameters=optimiser_parameters,
        requirements=requirements,
        initial_graph=None)

    optimized_network = optimizer.optimise(custom_metric)

    assert optimized_network is not None
    assert isinstance(optimized_network, CustomModel)
    assert isinstance(optimized_network.nodes[0], CustomNode)

    assert 'custom_A' in [str(_) for _ in optimized_network.nodes]