Пример #1
0
def get_model(x, y, model_config):
    if model_config["model"]["type"] == "regression":
        regression_model = PolynomialRegressionModel(
            model_config["model_name"],
            model_config["model"]["polynomial_degree"])
        regression_model.train(x, y)

        return regression_model
    elif model_config["model"]["type"] == "neural_net":
        neural_net_model = NeuralNetModel(model_config["model_name"])
        neural_net_model.train(x, y, model_config["model"])

        return neural_net_model

    return None
Пример #2
0
def get_model(x, y, model_config):
    if model_config["model"]["type"] == "regression":
        regression_model = PolynomialRegressionModel(
            model_config["model_name"],
            model_config["model"]["polynomial_degree"])
        regression_model.train(x, y)

        return regression_model
    elif model_config["model"]["type"] == "neural_net":
        neural_net_model = NeuralNetModel(model_config["model_name"])
        neural_net_model.train(x, y,
                               model_config["model"]["hidden_layer_sizes"],
                               model_config["model"]["learning_rate"],
                               model_config["model"]["max_iter"])

        return neural_net_model

    return None
Пример #3
0
def make_predictions(data_raw, data_name, regression_model):
    """
    makes predictions for NN and LR models
    :param data_raw: input data not preprocessed
    :param data_name: name of data
    :param regression_model: initialized model for regression task
    :return:
    """
    # define column names in data. Usually would be a console input
    key_columns = '(Opportunity_Name,Product)'
    update_col = 'Upload_date'
    created_col = 'Created'
    opp_name_col = 'Opportunity_Name'
    product_name_col = 'Product'

    key_columns = key_columns[1:-1].split(',')
    target = 'future stage'  # target for the first part --> stage Won or Lost
    target_second_part = 'time_diff_to_close'  # target for the second part --> time till closing

    # Models parameters
    nn_activation_list = ['identity', 'logistic', 'tanh', 'relu']
    nn_solver_list = ['lbfgs', 'sgd', 'adam']
    nn_nodes_list = ['(2, 2, 2)', '(100, 100)', '(20, 16, 10, 4)', '(100, 80, 60, 40)']
    lr_solver_list = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
    c_list = [x * .1 for x in range(1, 11)]

    # preprocess data
    X, y, index_train, index_test, second_part, y_proba_guessed, updates, data_won = preprocess(
        data_raw, target, key_columns, update_col, created_col, opp_name_col, product_name_col)

    # train model for periods prediction first, because the same model will be used each time
    train_regression_model(regression_model, second_part, index_train, target_second_part)

    X_train = X.loc[index_train]
    X_test = X.loc[index_test]
    y_train = y.loc[index_train]
    y_test = y.loc[index_test]

    y_guessed = [1 if x >= 0.5 else 0 for x in y_proba_guessed]
    print('Guessed probabilities'.upper())
    print(classification_report(y_test, y_guessed))
    print('Confusion matrix'.upper())
    print(confusion_matrix(y_test, y_guessed))
    ns_auc = roc_auc_score(y_test, y_proba_guessed)
    print('No Skill: ROC AUC=%.3f' % ns_auc)

    best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics()

    for nn_activation in nn_activation_list:
        for nn_solver in nn_solver_list:
            for nn_nodes in nn_nodes_list:
                nn = NeuralNetModel(X_train, X_test, y_train, y_test, index_test,
                                    index_train, second_part, target, update_col,
                                    y_proba_guessed,
                                    updates,
                                    data_won, data_name, regression_model)
                nn.define_model(solver=nn_solver, activation=nn_activation, n_nodes=nn_nodes)
                auc, mae_guessed, mae_weighted, mae_unweighted, model = nn.fit_predict()
                if auc > best_auc:
                    best_auc = auc
                    best_model_auc = model
                if mae_weighted < best_mae_weighted:
                    best_mae_weighted = mae_weighted
                    best_model_mae_weighted = model
                if mae_unweighted < best_mae_unweighted:
                    best_mae_unweighted = mae_unweighted
                    best_model_mae_unweighted = model
    print(
        'best nn model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc))
    print(
        'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed))
    print(
        'best nn model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() +
        " with MAE {:.2f}".format(best_mae_weighted))
    print(
        'best nn model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() +
        " with MAE {:.2f}".format(best_mae_unweighted))

    best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics()

    for lr_solver in lr_solver_list:
        for c in c_list:
            lr = LogRegModel(X_train, X_test, y_train, y_test, index_test,
                             index_train, second_part, target, update_col, y_proba_guessed,
                             updates, data_won, data_name, regression_model)
            lr.define_model(solver=lr_solver, c=c)
            auc, mae_guessed, mae_weighted, mae_unweighted, model = lr.fit_predict()
            if auc > best_auc:
                best_auc = auc
                best_model_auc = model
            if mae_weighted < best_mae_weighted:
                best_mae_weighted = mae_weighted
                best_model_mae_weighted = model
            if mae_unweighted < best_mae_unweighted:
                best_mae_unweighted = mae_unweighted
                best_model_mae_unweighted = model
    print(
        'best lr model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc))
    print(
        'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed))
    print(
        'best lr model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() +
        " with MAE {:.2f}".format(best_mae_weighted))
    print(
        'best lr model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() +
        " with MAE {:.2f}".format(best_mae_unweighted))