def build(self) -> Composer: optimiser_type = GPGraphOptimiser if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free: optimiser_type = GPGraphParameterFreeOptimiser graph_generation_params = GraphGenerationParams( adapter=PipelineAdapter(self._composer.log), advisor=PipelineChangeAdvisor()) archive_type = None if len(self._composer.metrics) > 1: archive_type = tools.ParetoFront() # TODO add possibility of using regularization in MO alg self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none self.optimiser_parameters.multi_objective = True if self.optimiser_parameters.mutation_types is None: self.optimiser_parameters.mutation_types = [ boosting_mutation, parameter_change_mutation, single_edge_mutation, single_change_mutation, single_drop_mutation, single_add_mutation ] optimiser = optimiser_type( initial_graph=self._composer.initial_pipeline, requirements=self._composer.composer_requirements, graph_generation_params=graph_generation_params, parameters=self.optimiser_parameters, log=self._composer.log, archive_type=archive_type, metrics=self._composer.metrics) self._composer.optimiser = optimiser return self._composer
class ComposerRequirements: """ This dataclass is for defining the requirements of composition process :attribute primary: List of operation types (str) for Primary Nodes :attribute secondary: List of operation types (str) for Secondary Nodes :attribute timeout: max time in minutes available for composition process :attribute max_depth: max depth of the result pipeline :attribute max_pipeline_fit_time: time constraint for operation fitting (minutes) :attribute max_arity: maximal number of parent for node :attribute min_arity: minimal number of parent for node :attribute cv_folds: integer or None to use cross validation """ primary: List[str] secondary: List[str] timeout: Optional[datetime.timedelta] = datetime.timedelta(minutes=5) max_pipeline_fit_time: Optional[datetime.timedelta] = None max_depth: int = 3 max_arity: int = 2 min_arity: int = 2 cv_folds: Optional[int] = None advisor: Optional[PipelineChangeAdvisor] = PipelineChangeAdvisor() def __post_init__(self): if self.max_depth < 0: raise ValueError(f'invalid max_depth value') if self.max_arity < 0: raise ValueError(f'invalid max_arity value') if self.min_arity < 0: raise ValueError(f'invalid min_arity value') if self.cv_folds is not None and self.cv_folds <= 1: raise ValueError(f'Number of folds for KFold cross validation must be 2 or more.')
def test_evaluate_individuals(): project_root_path = str(fedot_project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') full_path_train = os.path.join(str(fedot_project_root()), file_path_train) task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(full_path_train, task=task) available_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) metric_function = ClassificationMetricsEnum.ROCAUC_penalty composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function) composer = builder.build() pipelines_to_evaluate = [ pipeline_first(), pipeline_second(), pipeline_third(), pipeline_fourth() ] train_data, test_data = train_test_data_setup( dataset_to_compose, sample_split_ratio_for_tasks[dataset_to_compose.task.task_type]) metric_function_for_nodes = partial(composer.composer_metric, composer.metrics, train_data, test_data) adapter = PipelineAdapter() population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=0.001) params = GraphGenerationParams(adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor()) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 1 assert population[0].fitness is not None population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=5) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 4 assert all([ind.fitness is not None for ind in population])
def test_selection(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = selection(types=[SelectionTypesEnum.tournament], population=population, pop_size=num_of_inds, params=graph_params) assert (all([ind in population for ind in selected_individuals]) and len(selected_individuals) == num_of_inds)
def test_individuals_selection_random_individuals(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) types = [SelectionTypesEnum.tournament] graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = individuals_selection(types=types, individuals=population, pop_size=num_of_inds, graph_params=graph_params) selected_individuals_ref = [str(ind) for ind in selected_individuals] assert (len(set(selected_individuals_ref)) == len(selected_individuals) and len(selected_individuals) == num_of_inds)
def rand_population_gener_and_eval(pop_size=4): models_set = ['knn', 'logit', 'rf'] requirements = GPComposerRequirements(primary=models_set, secondary=models_set, max_depth=1) pipeline_gener_params = GraphGenerationParams( advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) random_pipeline_function = partial(random_graph, params=pipeline_gener_params, requirements=requirements) population = [ Individual(random_pipeline_function()) for _ in range(pop_size) ] # evaluation for ind in population: ind.fitness = obj_function() return population
def test_boosting_mutation_for_linear_graph(): """ Tests boosting mutation can add correct boosting cascade """ linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')])) init_node = OptNode('scaling') model_node = OptNode('knn', [init_node]) boosting_graph = \ OptGraph( OptNode('logit', [model_node, OptNode('linear', [OptNode('class_decompose', [model_node, init_node])])])) composer_requirements = GPComposerRequirements(primary=['scaling'], secondary=['logit'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_boosting = False for _ in range(100): graph_after_mutation = mutation(types=[boosting_mutation], params=graph_params, ind=Individual(linear_one_node), requirements=composer_requirements, log=default_log(__name__), max_depth=2).graph if not successful_mutation_boosting: successful_mutation_boosting = \ graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id else: break assert successful_mutation_boosting # check that obtained pipeline can be fitted pipeline = PipelineAdapter().restore(graph_after_mutation) data = file_data() pipeline.fit(data) result = pipeline.predict(data) assert result is not None