def two_level_chain(): first = PrimaryNode(model_type='logit') second = PrimaryNode(model_type='knn') third = SecondaryNode(model_type='xgboost', nodes_from=[first, second]) chain = Chain() for node in [first, second, third]: chain.add_node(node) return chain
def chain_with_self_cycle(): first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) second.nodes_from.append(second) chain = Chain() chain.add_node(first) chain.add_node(second) return chain
def get_class_chain(): # Chain composition first = PrimaryNode(model_type='xgboost') second = PrimaryNode(model_type='knn') final = SecondaryNode(model_type='logit', nodes_from=[first, second]) chain = Chain() chain.add_node(final) return chain
def get_regr_chain(): # Chain composition first = PrimaryNode(model_type='xgbreg') second = PrimaryNode(model_type='knnreg') final = SecondaryNode(model_type='linear', nodes_from=[first, second]) chain = Chain() chain.add_node(final) return chain
def chain_third(): # QDA # | \ # RF RF chain = Chain() new_node = SecondaryNode('qda') for model_type in ('rf', 'rf'): new_node.nodes_from.append(PrimaryNode(model_type)) chain.add_node(new_node) [chain.add_node(node_from) for node_from in new_node.nodes_from] return chain
def chain_second(): # XG # | \ # DT KNN # | \ | \ # KNN KNN LR LDA chain = chain_first() new_node = SecondaryNode('dt') for model_type in ('knn', 'knn'): new_node.nodes_from.append(PrimaryNode(model_type)) chain.replace_node_with_parents(chain.root_node.nodes_from[0], new_node) return chain
def compose_chain() -> Chain: chain = Chain() node_first = PrimaryNode('svc') node_second = PrimaryNode('lda') node_third = SecondaryNode('rf') node_third.nodes_from.append(node_first) node_third.nodes_from.append(node_second) chain.add_node(node_third) return chain
def chain_fifth(): # KNN # | \ # XG KNN # | \ | \ # LR LDA KNN KNN chain = chain_first() new_node = SecondaryNode('knn') chain.update_node(chain.root_node, new_node) new_node = PrimaryNode('knn') chain.update_node(chain.root_node.nodes_from[1].nodes_from[0], new_node) chain.update_node(chain.root_node.nodes_from[1].nodes_from[1], new_node) return chain
def test_chain_sequential_fit_correct(data_setup): data = data_setup train, _ = train_test_data_setup(data) first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) third = SecondaryNode(model_type='logit', nodes_from=[second]) final = SecondaryNode(model_type='logit', nodes_from=[third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) train_predicted = chain.fit(input_data=train, use_cache=False) assert chain.root_node.descriptive_id == ('(((/n_logit_default_params;)/' 'n_logit_default_params;)/' 'n_logit_default_params;)/' 'n_logit_default_params') assert chain.length == 4 assert chain.depth == 4 assert train_predicted.predict.shape[0] == train.target.shape[0] assert final.cache.actual_cached_state is not None
def test_cache_actuality_after_model_change(data_setup): """The non-affected nodes has actual cache after changing the model""" chain = chain_first() train, _ = data_setup chain.fit(input_data=train) new_node = SecondaryNode(model_type='logit') chain.update_node(old_node=chain.root_node.nodes_from[0], new_node=new_node) root_parent_first = chain.root_node.nodes_from[0] nodes_with_non_actual_cache = [chain.root_node, root_parent_first] nodes_with_actual_cache = [node for node in chain.nodes if node not in nodes_with_non_actual_cache] # non-affected nodes are actual assert all([node.cache.actual_cached_state for node in nodes_with_actual_cache]) # affected nodes and their childs has no any actual cache assert not any([node.cache.actual_cached_state for node in nodes_with_non_actual_cache])
def chain_first(): # XG # | \ # XG KNN # | \ | \ # LR LDA LR LDA chain = Chain() root_of_tree, root_child_first, root_child_second = \ [SecondaryNode(model) for model in ('xgboost', 'xgboost', 'knn')] for root_node_child in (root_child_first, root_child_second): for requirement_model in ('logit', 'lda'): new_node = PrimaryNode(requirement_model) root_node_child.nodes_from.append(new_node) chain.add_node(new_node) chain.add_node(root_node_child) root_of_tree.nodes_from.append(root_node_child) chain.add_node(root_of_tree) return chain
def test_chain_with_custom_params_for_model(data_setup): data = data_setup custom_params = dict(n_neighbors=1, weights='uniform', p=1) first = PrimaryNode(model_type='logit') second = PrimaryNode(model_type='lda') final = SecondaryNode(model_type='knn', nodes_from=[first, second]) chain = Chain() chain.add_node(final) chain_default_params = deepcopy(chain) chain.root_node.custom_params = custom_params chain_default_params.fit(data) chain.fit(data) custom_params_prediction = chain.predict(data).predict default_params_prediction = chain_default_params.predict(data).predict assert not np.array_equal(custom_params_prediction, default_params_prediction)
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str): train_data = InputData.from_csv(train_file_path) test_data = InputData.from_csv(test_file_path) training_features = train_data.features testing_features = test_data.features training_target = train_data.target testing_target = test_data.target # Average CV score on the training set was: 0.93755 exported_pipeline = make_pipeline( StackingEstimator(estimator=BernoulliNB()), RandomForestClassifier()) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 1) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict_proba(testing_features)[:, 1] roc_auc_value = roc_auc(y_true=testing_target, y_score=results) print(roc_auc_value) chain = Chain() node_first = PrimaryNode('direct_data_model') node_second = PrimaryNode('bernb') node_third = SecondaryNode('rf') node_third.nodes_from.append(node_first) node_third.nodes_from.append(node_second) chain.add_node(node_third) chain.fit(train_data) results = chain.predict(test_data) roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict) print(roc_auc_value) return roc_auc_value
def real_chain(chain_template, with_cache=True): nodes_by_templates = [] for level in range(0, len(chain_template)): for template in chain_template[level]: if len(template.parents) == 0: node = PrimaryNode(model_type=template.model_type) else: node = SecondaryNode(nodes_from=real_parents( nodes_by_templates, template), model_type=template.model_type) node.model = template.model_instance if with_cache: cache = FittedModelCache(related_node=node) cache.append( CachedState(preprocessor=template.preprocessor, model=template.fitted_model)) node.cache = cache nodes_by_templates.append((node, template)) chain = Chain() for node, _ in nodes_by_templates: chain.add_node(node) return chain