def get_model(x, y, model_config): if model_config["model"]["type"] == "regression": regression_model = PolynomialRegressionModel( model_config["model_name"], model_config["model"]["polynomial_degree"]) regression_model.train(x, y) return regression_model elif model_config["model"]["type"] == "neural_net": neural_net_model = NeuralNetModel(model_config["model_name"]) neural_net_model.train(x, y, model_config["model"]) return neural_net_model return None
def get_model(x, y, model_config): if model_config["model"]["type"] == "regression": regression_model = PolynomialRegressionModel( model_config["model_name"], model_config["model"]["polynomial_degree"]) regression_model.train(x, y) return regression_model elif model_config["model"]["type"] == "neural_net": neural_net_model = NeuralNetModel(model_config["model_name"]) neural_net_model.train(x, y, model_config["model"]["hidden_layer_sizes"], model_config["model"]["learning_rate"], model_config["model"]["max_iter"]) return neural_net_model return None
def make_predictions(data_raw, data_name, regression_model): """ makes predictions for NN and LR models :param data_raw: input data not preprocessed :param data_name: name of data :param regression_model: initialized model for regression task :return: """ # define column names in data. Usually would be a console input key_columns = '(Opportunity_Name,Product)' update_col = 'Upload_date' created_col = 'Created' opp_name_col = 'Opportunity_Name' product_name_col = 'Product' key_columns = key_columns[1:-1].split(',') target = 'future stage' # target for the first part --> stage Won or Lost target_second_part = 'time_diff_to_close' # target for the second part --> time till closing # Models parameters nn_activation_list = ['identity', 'logistic', 'tanh', 'relu'] nn_solver_list = ['lbfgs', 'sgd', 'adam'] nn_nodes_list = ['(2, 2, 2)', '(100, 100)', '(20, 16, 10, 4)', '(100, 80, 60, 40)'] lr_solver_list = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'] c_list = [x * .1 for x in range(1, 11)] # preprocess data X, y, index_train, index_test, second_part, y_proba_guessed, updates, data_won = preprocess( data_raw, target, key_columns, update_col, created_col, opp_name_col, product_name_col) # train model for periods prediction first, because the same model will be used each time train_regression_model(regression_model, second_part, index_train, target_second_part) X_train = X.loc[index_train] X_test = X.loc[index_test] y_train = y.loc[index_train] y_test = y.loc[index_test] y_guessed = [1 if x >= 0.5 else 0 for x in y_proba_guessed] print('Guessed probabilities'.upper()) print(classification_report(y_test, y_guessed)) print('Confusion matrix'.upper()) print(confusion_matrix(y_test, y_guessed)) ns_auc = roc_auc_score(y_test, y_proba_guessed) print('No Skill: ROC AUC=%.3f' % ns_auc) best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics() for nn_activation in nn_activation_list: for nn_solver in nn_solver_list: for nn_nodes in nn_nodes_list: nn = NeuralNetModel(X_train, X_test, y_train, y_test, index_test, index_train, second_part, target, update_col, y_proba_guessed, updates, data_won, data_name, regression_model) nn.define_model(solver=nn_solver, activation=nn_activation, n_nodes=nn_nodes) auc, mae_guessed, mae_weighted, mae_unweighted, model = nn.fit_predict() if auc > best_auc: best_auc = auc best_model_auc = model if mae_weighted < best_mae_weighted: best_mae_weighted = mae_weighted best_model_mae_weighted = model if mae_unweighted < best_mae_unweighted: best_mae_unweighted = mae_unweighted best_model_mae_unweighted = model print( 'best nn model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc)) print( 'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed)) print( 'best nn model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() + " with MAE {:.2f}".format(best_mae_weighted)) print( 'best nn model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() + " with MAE {:.2f}".format(best_mae_unweighted)) best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics() for lr_solver in lr_solver_list: for c in c_list: lr = LogRegModel(X_train, X_test, y_train, y_test, index_test, index_train, second_part, target, update_col, y_proba_guessed, updates, data_won, data_name, regression_model) lr.define_model(solver=lr_solver, c=c) auc, mae_guessed, mae_weighted, mae_unweighted, model = lr.fit_predict() if auc > best_auc: best_auc = auc best_model_auc = model if mae_weighted < best_mae_weighted: best_mae_weighted = mae_weighted best_model_mae_weighted = model if mae_unweighted < best_mae_unweighted: best_mae_unweighted = mae_unweighted best_model_mae_unweighted = model print( 'best lr model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc)) print( 'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed)) print( 'best lr model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() + " with MAE {:.2f}".format(best_mae_weighted)) print( 'best lr model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() + " with MAE {:.2f}".format(best_mae_unweighted))