Пример #1
0
def run_chain_from_automl(train_file_path: str,
                          test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_tpot = PrimaryNode('tpot')
    node_tpot.model.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = PrimaryNode('lda')
    node_rf = SecondaryNode('rf')

    node_rf.nodes_from = [node_tpot, node_lda]

    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Пример #2
0
def test_chain_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[first])
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    train_predicted = chain.fit(input_data=train, use_cache=False)

    assert chain.root_node.descriptive_id == (
        '((/n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams;;(/'
        'n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams;)/'
        'n_ModelTypesIdsEnum.logit_defaultparams')

    assert chain.length == 4
    assert chain.depth == 3
    assert train_predicted.predict.shape == train.target.shape
Пример #3
0
def chain_with_secondary_nodes_only():
    first = SecondaryNode(model_type='logit', nodes_from=[])
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Пример #4
0
def chain_with_cycle():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #5
0
def chain_with_multiple_roots():
    first = PrimaryNode(model_type='logit')
    root_first = SecondaryNode(model_type='logit', nodes_from=[first])
    root_second = SecondaryNode(model_type='logit', nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Пример #6
0
def baseline_chain():
    chain = Chain()
    last_node = SecondaryNode(model_type='xgboost', nodes_from=[])
    for requirement_model in ['knn', 'logit']:
        new_node = PrimaryNode(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Пример #7
0
def get_regr_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgbreg')
    second = PrimaryNode(model_type='knnreg')
    final = SecondaryNode(model_type='linear', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Пример #8
0
def chain_with_self_cycle():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Пример #9
0
def get_class_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgboost')
    second = PrimaryNode(model_type='knn')
    final = SecondaryNode(model_type='logit', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Пример #10
0
def chain_with_secondary_nodes_only():
    first = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[])
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Пример #11
0
def two_level_chain():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='knn')
    third = SecondaryNode(model_type='xgboost', nodes_from=[first, second])

    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #12
0
def valid_chain():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    last = SecondaryNode(model_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, last]:
        chain.add_node(node)

    return chain
Пример #13
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda)
    for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf):
        new_node.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Пример #14
0
def chain_with_isolated_components():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[])
    fourth = SecondaryNode(model_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, fourth]:
        chain.add_node(node)

    return chain
def baseline_chain():
    chain = Chain()
    last_node = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.xgboost, nodes_from=[])
    for requirement_model in [ModelTypesIdsEnum.knn, ModelTypesIdsEnum.logit]:
        new_node = NodeGenerator.primary_node(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Пример #16
0
def chain_with_self_cycle():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Пример #17
0
def chain_with_isolated_nodes():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    isolated = SecondaryNode(model_type='logit', nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Пример #18
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Пример #19
0
def compose_chain() -> Chain:
    chain = Chain()
    node_first = PrimaryNode('svc')
    node_second = PrimaryNode('lda')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    return chain
Пример #20
0
def chain_with_multiple_roots():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    root_first = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.logit, nodes_from=[first])
    root_second = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.logit, nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Пример #21
0
def chain_with_cycle():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #22
0
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup):
    data = data_setup
    train, test = train_test_data_setup(data)
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.lda)
    third = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.knn)
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.xgboost,
                                         nodes_from=[first, second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    first = deepcopy(first)
    second = deepcopy(second)
    third = deepcopy(third)
    final_shuffled = NodeGenerator.secondary_node(
        model_type=ModelTypesIdsEnum.xgboost,
        nodes_from=[third, first, second])

    chain_shuffled = Chain()
    # change order of nodes in list
    for node in [final_shuffled, third, first, second]:
        chain_shuffled.add_node(node)

    train_predicted = chain.fit(input_data=train)

    train_predicted_shuffled = chain_shuffled.fit(input_data=train)

    # train results should be invariant
    assert chain.root_node.descriptive_id == chain_shuffled.root_node.descriptive_id
    assert all(
        np.equal(train_predicted.predict, train_predicted_shuffled.predict))

    test_predicted = chain.predict(input_data=test)
    test_predicted_shuffled = chain_shuffled.predict(input_data=test)

    # predict results should be invariant
    assert all(
        np.equal(test_predicted.predict, test_predicted_shuffled.predict))

    # change parents order for the nodes fitted chain
    nodes_for_change = chain.nodes[3].nodes_from
    chain.nodes[3].nodes_from = [
        nodes_for_change[2], nodes_for_change[0], nodes_for_change[1]
    ]
    chain.nodes[3].cache.clear()
    chain.fit(train)
    test_predicted_re_shuffled = chain.predict(input_data=test)

    # predict results should be invariant
    assert all(
        np.equal(test_predicted.predict, test_predicted_re_shuffled.predict))
Пример #23
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=64,
                                     is_visualise=False):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    chain = get_composite_lstm_chain()

    chain_simple = Chain()
    node_single = PrimaryNode('ridge')
    chain_simple.add_node(node_single)

    chain_lstm = Chain()
    node_lstm = PrimaryNode('lstm')
    chain_lstm.add_node(node_lstm)

    chain.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid = calculate_validation_metric(
        chain.predict(dataset_to_validate), dataset_to_validate,
        f'full-composite_{forecast_length}', is_visualise)

    chain_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_lstm.predict(dataset_to_validate), dataset_to_validate,
        f'full-lstm-only_{forecast_length}', is_visualise)

    chain_simple.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_simple = calculate_validation_metric(
        chain_simple.predict(dataset_to_validate), dataset_to_validate,
        f'full-simple_{forecast_length}', is_visualise)

    print(f'RMSE composite: {rmse_on_valid}')
    print(f'RMSE simple: {rmse_on_valid_simple}')
    print(f'RMSE LSTM only: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple
Пример #24
0
def get_composite_lstm_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_lstm_trend = SecondaryNode('lasso', nodes_from=[node_trend])

    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('ridge', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'additive_data_model',
        nodes_from=[node_ridge_residual, node_lstm_trend])
    chain.add_node(node_final)
    return chain
Пример #25
0
    def compose_chain(self,
                      data: InputData,
                      initial_chain: Optional[Chain],
                      composer_requirements: ComposerRequirements,
                      metrics: Optional[Callable],
                      optimiser_parameters=None,
                      is_visualise: bool = False) -> Chain:
        new_chain = Chain()

        if self.dummy_chain_type == DummyChainTypeEnum.hierarchical:
            # (y1, y2) -> y
            last_node = NodeGenerator.secondary_node(
                composer_requirements.secondary[0])

            for requirement_model in composer_requirements.primary:
                new_node = NodeGenerator.primary_node(requirement_model)
                new_chain.add_node(new_node)
                last_node.nodes_from.append(new_node)
            new_chain.add_node(last_node)
        elif self.dummy_chain_type == DummyChainTypeEnum.flat:
            # (y1) -> (y2) -> y
            first_node = NodeGenerator.primary_node(
                composer_requirements.primary[0])
            new_chain.add_node(first_node)
            prev_node = first_node
            for requirement_model in composer_requirements.secondary:
                new_node = NodeGenerator.secondary_node(requirement_model)
                new_node.nodes_from = [prev_node]
                prev_node = new_node
                new_chain.add_node(new_node)
        else:
            raise NotImplementedError()
        return new_chain
Пример #26
0
def chain_with_isolated_nodes():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second])
    isolated = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                            nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Пример #27
0
def default_valid_chain():
    first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit)
    second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                          nodes_from=[first])
    third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[first])
    final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit,
                                         nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    return chain
Пример #28
0
def get_composite_lstm_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_trend.labels = ["fixed"]
    node_lstm_trend = SecondaryNode('linear', nodes_from=[node_trend])
    node_trend.labels = ["fixed"]
    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('linear', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'additive_data_model',
        nodes_from=[node_ridge_residual, node_lstm_trend])
    node_final.labels = ["fixed"]
    chain.add_node(node_final)
    return chain
Пример #29
0
def test_regression_chain_fit_correct():
    data = get_synthetic_ts_data()

    chain = Chain()
    node_rfr = PrimaryNode('rfr')
    chain.add_node(node_rfr)

    train_data, test_data = train_test_data_setup(data)

    chain.fit(input_data=train_data)
    _, rmse_on_test = get_rmse_value(chain, train_data, test_data)

    rmse_threshold = np.std(data.target) * 1.5

    assert rmse_on_test < rmse_threshold
Пример #30
0
def chain_third():
    #      XG
    #   |  |  \
    #  KNN LDA KNN
    root_of_tree = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost)
    for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.lda,
                       ModelTypesIdsEnum.knn):
        root_of_tree.nodes_from.append(NodeGenerator.primary_node(model_type))
    chain = Chain()

    for node in root_of_tree.nodes_from:
        chain.add_node(node)
    chain.add_node(root_of_tree)

    return chain