Пример #1
0
def test_chain_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[first])
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    train_predicted = chain.fit(input_data=train, use_cache=False)

    assert chain.root_node.descriptive_id == (
        '((/n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams;;(/'
        'n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams')

    assert chain.length == 4
    assert chain.depth == 3
    assert train_predicted.predict.shape == train.target.shape
Пример #2
0
    def compose_chain(self,
                      data: InputData,
                      initial_chain: Optional[Chain],
                      composer_requirements: ComposerRequirements,
                      metrics: Optional[Callable],
                      optimiser_parameters=None,
                      is_visualise: bool = False) -> Chain:
        new_chain = Chain()

        if self.dummy_chain_type == DummyChainTypeEnum.hierarchical:
            # (y1, y2) -> y
            last_node = NodeGenerator.secondary_node(
                composer_requirements.secondary[0])

            for requirement_model in composer_requirements.primary:
                new_node = NodeGenerator.primary_node(requirement_model)
                new_chain.add_node(new_node)
                last_node.nodes_from.append(new_node)
            new_chain.add_node(last_node)
        elif self.dummy_chain_type == DummyChainTypeEnum.flat:
            # (y1) -> (y2) -> y
            first_node = NodeGenerator.primary_node(
                composer_requirements.primary[0])
            new_chain.add_node(first_node)
            prev_node = first_node
            for requirement_model in composer_requirements.secondary:
                new_node = NodeGenerator.secondary_node(requirement_model)
                new_node.nodes_from = [prev_node]
                prev_node = new_node
                new_chain.add_node(new_node)
        else:
            raise NotImplementedError()
        return new_chain
Пример #3
0
def chain_with_secondary_nodes_only():
    first = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[])
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
def baseline_chain():
    chain = Chain()
    last_node = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.xgboost, nodes_from=[])
    for requirement_model in [ModelTypesIdsEnum.knn, ModelTypesIdsEnum.logit]:
        new_node = NodeGenerator.primary_node(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Пример #5
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda)
    for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Пример #6
0
def chain_with_self_cycle():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Пример #7
0
def chain_with_cycle():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #8
0
def chain_with_multiple_roots():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    root_first = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.logit, nodes_from=[first])
    root_second = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.logit, nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Пример #9
0
def chain_second():
    #    XG
    #  |     \
    # DT      KNN
    # |  \    |  \
    # KNN KNN LR  LDA
    chain = chain_first()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.dt)
    for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.knn):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain.replace_node_with_parents(chain.root_node.nodes_from[0], new_node)
    return chain
Пример #10
0
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup):
    data = data_setup
    train, test = train_test_data_setup(data)
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.lda)
    third = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.knn)
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.xgboost,
                                         nodes_from=[first, second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    first = deepcopy(first)
    second = deepcopy(second)
    third = deepcopy(third)
    final_shuffled = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.xgboost,
        nodes_from=[third, first, second])

    chain_shuffled = Chain()
    # change order of nodes in list
    for node in [final_shuffled, third, first, second]:
        chain_shuffled.add_node(node)

    train_predicted = chain.fit(input_data=train)

    train_predicted_shuffled = chain_shuffled.fit(input_data=train)

    # train results should be invariant
    assert chain.root_node.descriptive_id == chain_shuffled.root_node.descriptive_id
    assert all(
        np.equal(train_predicted.predict, train_predicted_shuffled.predict))

    test_predicted = chain.predict(input_data=test)
    test_predicted_shuffled = chain_shuffled.predict(input_data=test)

    # predict results should be invariant
    assert all(
        np.equal(test_predicted.predict, test_predicted_shuffled.predict))

    # change parents order for the nodes fitted chain
    nodes_for_change = chain.nodes[3].nodes_from
    chain.nodes[3].nodes_from = [
        nodes_for_change[2], nodes_for_change[0], nodes_for_change[1]
    ]
    chain.nodes[3].cache.clear()
    chain.fit(train)
    test_predicted_re_shuffled = chain.predict(input_data=test)

    # predict results should be invariant
    assert all(
        np.equal(test_predicted.predict, test_predicted_re_shuffled.predict))
Пример #11
0
def default_valid_chain():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[first])
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    return chain
Пример #12
0
def chain_fifth():
    #    KNN
    #  |     \
    # XG      KNN
    # |  \    |  \
    # LR LDA KNN  KNN
    chain = chain_first()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.knn)
    chain.update_node(chain.root_node, new_node)
    new_node = NodeGenerator.primary_node(ModelTypesIdsEnum.knn)
    chain.update_node(chain.root_node.nodes_from[1].nodes_from[0], new_node)
    chain.update_node(chain.root_node.nodes_from[1].nodes_from[1], new_node)

    return chain
Пример #13
0
def chain_with_isolated_nodes():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second])
    isolated = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                            nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Пример #14
0
def chain_third():
    #      XG
    #   |  |  \
    #  KNN LDA KNN
    root_of_tree = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost)
    for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.lda,
                       ModelTypesIdsEnum.knn):
        root_of_tree.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain = Chain()

    for node in root_of_tree.nodes_from:
        chain.add_node(node)
    chain.add_node(root_of_tree)

    return chain
Пример #15
0
def chain_second():
    #      XG
    #   |      \
    #  XG      KNN
    #  | \      |  \
    # LR XG   LR   LDA
    #    |  \
    #   KNN  LDA
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost)
    for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.lda):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain = chain_first()
    chain.replace_node_with_parents(
        chain.root_node.nodes_from[0].nodes_from[1], new_node)

    return chain
Пример #16
0
def chain_fourth():
    #      XG
    #   |  \  \
    #  KNN  XG  KNN
    #      |  \
    #    KNN   KNN

    chain = chain_third()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost)
    [
        new_node.nodes_from.append(
            NodeGenerator.primary_node(ModelTypesIdsEnum.knn))
        for _ in range(2)
    ]
    chain.replace_node_with_parents(chain.root_node.nodes_from[1], new_node)

    return chain
Пример #17
0
def test_node_factory_log_reg_correct(data_setup):
    model_type = ModelTypesIdsEnum.logit
    node = NodeGenerator().primary_node(model_type=model_type)

    expected_model = Model(model_type=model_type).__class__
    actual_model = node.model.__class__

    assert node.__class__ == PrimaryNode
    assert expected_model == actual_model
Пример #18
0
def chain_fourth():
    #          XG
    #      |         \
    #     XG          KNN
    #   |    \        |  \
    # QDA     KNN     LR  LDA
    # |  \    |    \
    # RF  RF  KNN KNN
    chain = chain_first()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda)
    for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain.replace_node_with_parents(
        chain.root_node.nodes_from[0].nodes_from[1], new_node)
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.knn)
    for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.knn):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain.replace_node_with_parents(
        chain.root_node.nodes_from[0].nodes_from[0], new_node)
    return chain
Пример #19
0
def test_nodes_sequence_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train, _ = train_test_data_setup(data)
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.lda,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.qda,
                                         nodes_from=[first])
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.knn,
                                         nodes_from=[second, third])

    train_predicted = final.fit(input_data=train)

    assert final.descriptive_id == (
        '((/n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.lda_defaultparams;;(/'
        'n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.qda_defaultparams;)/'
        'n_ModelTypesIdsEnum.knn_defaultparams')

    assert train_predicted.predict.shape == train.target.shape
    assert final.cache.actual_cached_state is not None
Пример #20
0
def chain_example():
    #    XG
    #  |     \
    # XG     KNN
    # |  \    |  \
    # LR LDA LR  LDA
    chain = Chain()

    root_of_tree, root_child_first, root_child_second = \
        [NodeGenerator.secondary_node(model) for model in (ModelTypesIdsEnum.xgboost, ModelTypesIdsEnum.xgboost,
                                                           ModelTypesIdsEnum.knn)]

    for root_node_child in (root_child_first, root_child_second):
        for requirement_model in (ModelTypesIdsEnum.logit, ModelTypesIdsEnum.lda):
            new_node = NodeGenerator.primary_node(requirement_model)
            root_node_child.nodes_from.append(new_node)
            chain.add_node(new_node)
        chain.add_node(root_node_child)
        root_of_tree.nodes_from.append(root_node_child)

    chain.add_node(root_of_tree)
    return chain
Пример #21
0
def test_eval_strategy_logreg(data_setup):
    data_set = data_setup
    train, test = train_test_data_setup(data=data_set)
    test_skl_model = LogisticRegression(C=10.,
                                        random_state=1,
                                        solver='liblinear',
                                        max_iter=10000,
                                        verbose=0)
    test_skl_model.fit(train.features, train.target)
    expected_result = test_skl_model.predict(test.features)

    test_model_node = NodeGenerator.primary_node(
        model_type=ModelTypesIdsEnum.logit)
    test_model_node.fit(input_data=train)
    actual_result = test_model_node.predict(input_data=test)

    assert len(actual_result.predict) == len(expected_result)
Пример #22
0
def test_cache_actuality_after_model_change(data_setup):
    """The non-affected nodes has actual cache after changing the model"""
    chain = chain_first()
    train, _ = data_setup
    chain.fit(input_data=train)
    new_node = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit)
    chain.update_node(old_node=chain.root_node.nodes_from[0],
                      new_node=new_node)

    root_parent_first = chain.root_node.nodes_from[0]

    nodes_with_non_actual_cache = [chain.root_node, root_parent_first]
    nodes_with_actual_cache = [
        node for node in chain.nodes if node not in nodes_with_non_actual_cache
    ]

    # non-affected nodes are actual
    assert all(
        [node.cache.actual_cached_state for node in nodes_with_actual_cache])
    # affected nodes and their childs has no any actual cache
    assert not any([
        node.cache.actual_cached_state for node in nodes_with_non_actual_cache
    ])
Пример #23
0
def test_cache_historical_state_using(data_setup):
    train, _ = data_setup
    chain = chain_first()

    # chain fitted, model goes to cache
    chain.fit(input_data=train)
    new_node = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit)
    old_node = chain.root_node.nodes_from[0]

    # change child node to new one
    chain.update_node(old_node=old_node, new_node=new_node)
    # cache is not actual
    assert not chain.root_node.cache.actual_cached_state
    # fit modified chain
    chain.fit(input_data=train)
    # cache is actual now
    assert chain.root_node.cache.actual_cached_state

    # change node back
    chain.update_node(old_node=chain.root_node.nodes_from[0],
                      new_node=old_node)
    # cache is actual without new fitting,
    # because the cached model was saved after first fit
    assert chain.root_node.cache.actual_cached_state
from core.composer.node import NodeGenerator
from core.models.model import *
from benchmark.benchmark_utils import get_scoring_case_data_paths

train_file_path, test_file_path = get_scoring_case_data_paths()

train_data = InputData.from_csv(train_file_path)
test_data = InputData.from_csv(test_file_path)

training_features = train_data.features
testing_features = test_data.features
training_target = train_data.target
testing_target = test_data.target

chain = Chain()
node0 = NodeGenerator.primary_node(ModelTypesIdsEnum.tpot)
node1 = NodeGenerator.primary_node(ModelTypesIdsEnum.lda)
node2 = NodeGenerator.secondary_node(ModelTypesIdsEnum.rf)

node2.nodes_from.append(node0)
node2.nodes_from.append(node1)

chain.add_node(node0)
chain.add_node(node1)
chain.add_node(node2)

chain.fit(train_data)
results = chain.predict(test_data)

roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
print(roc_auc_value)