Пример #1
0
def predict_with_dwt(dataset, testnum, featurenum):
    ca, cd = dwt.dwt(dataset)
    ca_matrix = ca[np.newaxis, :]
    print('DWT finish.')
    x_train, x_test, y_train, y_test = generate_data(ca_matrix,
                                                     int(testnum / 2),
                                                     featurenum)
    min_max_scaler = MinMaxScaler()
    x_train = min_max_scaler.fit_transform(x_train)
    x_test = min_max_scaler.transform(x_test)
    dbn1 = dbn.DBN(x_train=x_train,
                   y_train=y_train,
                   x_test=x_test,
                   y_test=y_test,
                   hidden_layer=[250],
                   learning_rate_rbm=0.0005,
                   batch_size_rbm=150,
                   n_epochs_rbm=200,
                   verbose_rbm=1,
                   random_seed_rbm=500,
                   activation_function_nn='tanh',
                   learning_rate_nn=0.005,
                   batch_size_nn=150,
                   n_epochs_nn=1500,
                   verbose_nn=1,
                   decay_rate=0)
    dbn1.pretraining()
    dbn1.finetuning()
    ca_pred = dbn1.result[:, 0]
    print('Lowpass coefficient estimation finish.')
    mu, sigma_2, cd_pred = generateData(cd[0:len(cd) - int(testnum / 2)],
                                        outputnum=int(testnum / 2))
    print('Highpass coefficient estimation finish.')
    dataset_pred = dwt.idwt(ca_pred, cd_pred)
    print('IDWT finish.')
    dataset_test = dataset[len(dataset) - testnum:len(dataset)]
    ca_test, cd_test = dwt.dwt(dataset_test)
    plt.figure(figsize=(12, 9), dpi=100)
    plt.subplot(3, 1, 1)
    plt.plot(ca_test)
    plt.plot(ca_pred)
    plt.legend(['lowpass_real', 'lowpass_prediction'], loc='upper right')
    plt.title('lowpass coefficient prediction result', fontsize=16)
    plt.subplot(3, 1, 2)
    plt.plot(cd_test)
    plt.plot(cd_pred)
    plt.legend(['highpass_real', 'highpass_prediction'], loc='upper right')
    plt.title('highpass coefficient prediction result', fontsize=16)
    plt.subplot(3, 1, 3)
    mse = mean_squared_error(dataset_pred, dataset_test)
    plt.plot(dataset_test)
    plt.plot(dataset_pred)
    plt.legend(['dataset_real', 'dataset_prediction'], loc='upper right')
    plt.title('sequence prediction result', fontsize=16)
    plt.xlabel('MSE = %f' % mse)
    plt.draw()
    #plt.show()
    return dataset_pred, mse
Пример #2
0
def train_model_func(learning_rate_rbm, learning_rate, batch_size, feature,
                     label, path_out_png, pred_num, train_deep):
    X_train, X_test, Y_train, Y_test = train_test_split(feature,
                                                        label,
                                                        test_size=0.2,
                                                        shuffle=False)

    print("Training model...")
    print("RMSE (on training data):")
    root_mean_squared_errors = []
    message_queue = Queue()

    for deep in range(1, train_deep + 1):
        RMSE_total = 0

        for i in range(0, pred_num):
            starttime = datetime.datetime.now()

            x_train = np.array(X_train[X_train.shape[0] - i -
                                       deep:X_train.shape[0] - i])
            y_trian = np.array(Y_train[Y_train.shape[0] - i -
                                       deep:Y_train.shape[0] - i])
            x_test = np.array(X_test)
            y_test = np.array(Y_test)

            _process = Process(target=train_model,
                               args=(learning_rate_rbm, learning_rate,
                                     batch_size, x_train, y_trian, x_test,
                                     message_queue))
            _process.start()
            _process.join()
            predictions = message_queue.get()

            root_mean_squared_error = math.sqrt(
                mean_squared_error(y_test, predictions))
            endtime = datetime.datetime.now()
            print("\t\ti:\t", root_mean_squared_error, "\t\tusing seconds:\t",
                  (endtime - starttime).seconds)
            RMSE_total += root_mean_squared_error

        RMSE_avg = RMSE_total / pred_num
        root_mean_squared_errors.append(RMSE_avg)
        print("train_deep:", deep, "\tRMSE_avg:", RMSE_avg)

        # Output a graph of loss metrics over periods.
        # plt.subplot(1, 2, 2)
        plt.ylabel('RMSE')
        plt.xlabel('train_deep')
        plt.title("Root Mean Squared Error vs. Train Deep")
        plt.tight_layout()
        plt.plot(root_mean_squared_errors)
        plt.savefig(path_out_png)

    print("finished.")
Пример #3
0
def _print_regressionMetrics(_linear, _X, _y, _predict):
	metrics = [['Regresión Lineal', 'Datos obtenidos'],
			   ['Coeficiente', _linear.coef_],
			   ['Interceptación', _linear.intercept_],
			   ['Calificación (score)', _linear.score(_X, _y)],
			   ['Variance Score', r2_score(_y, _predict)],
			   ['Explained Variance Score', explained_variance_score(_y, _predict)],
			   ['Mean Squared Error', mean_squared_error(_y, _predict)],
			   ['Mean Absolute Error', mean_absolute_error(_y, _predict)], ]
	
	print('\nMinería de Datos - Regresión Lineal - <VORT>', '\n')
	print(_linear, '\n')
	print(look(metrics))
Пример #4
0
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv):
    clf = MLPClassifier(solver='sgd',
                        alpha=1e-5,
                        hidden_layer_sizes=(100, 1),
                        activation='logistic',
                        batch_size=BATCH_HUMAN,
                        shuffle=True,
                        max_iter=5000)

    scaler = StandardScaler()
    scaler.fit(train)
    train1 = scaler.transform(train)
    # apply same transformation to test data
    test = scaler.transform(test)
    train_out = train_out.astype(float)
    clf.fit(X=train1, y=train_out)
    predict_test = clf.predict(test)
    predict_val = clf.predict(val)
    print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test),
          acc_manual(test_out, predict_test))
    print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val),
          acc_manual(val_out, predict_test))
Пример #5
0
def run_gradient_desc_gsc_con():
    train, test, val, train_out, test_out, val_out, BigSigma_inv = process_gsc__data_con(
    )
    Weight_final = np.ones((31, ), dtype=float64)

    erms = []
    for start in range(0, len(train), BATCH):
        end = start + BATCH
        phi_train = getphi(train[start:end], BigSigma_inv)
        phi_train = np.insert(phi_train, 0, 1, 1)
        hx = phi_train.dot(Weight_final)
        hx = hx.astype(np.int64)
        Weight_final = gradient_desc(phi_train, hx, Weight_final,
                                     train_out[start:end])

    hx = phi_train.dot(Weight_final)
    hx = hx.astype(np.int64)
    Gx = getGX(hx)
    phi_test = getphi(test, BigSigma_inv)
    phi_test = np.insert(phi_test, 0, 1, 1)
    hx_test = phi_test.dot(Weight_final)
    hx_test = hx_test.astype(np.int64)
    Gx_test = getGX(hx_test)
    phi_val = getphi(val, BigSigma_inv)
    phi_val = np.insert(phi_val, 0, 1, 1)
    hx_val = phi_val.dot(Weight_final)
    hx_val = hx_val.astype(np.int64)
    Gx_val = getGX(hx_val)

    print(
        "=========================Logistic Regression using Gradient Descent for Concatenated GSC Data==========================="
    )
    print("ERMS Train", mean_squared_error(train_out[start:end], Gx),
          "Accuracy", acc_manual(train_out[start:end], Gx))
    print("ERMS Test", mean_squared_error(test_out, Gx_test), "Accuracy",
          acc_manual(test_out, Gx))
    print("ERMS VAL", mean_squared_error(val_out, Gx_val), "Accuracy",
          acc_manual(val_out, Gx_val))
Пример #6
0
def run_logistic_human_sub(train, test, val, train_out, test_out, val_out,
                           BigSigma_inv):

    Weight_final = np.ones((CENTERS + 1, ), dtype=float64)

    erms = []
    for start in range(0, len(train), BATCH_HUMAN):
        end = start + BATCH_HUMAN
        phi_train = getphi(train[start:end], BigSigma_inv)
        phi_train = np.insert(phi_train, 0, 1, 1)
        hx = phi_train.dot(Weight_final)
        hx = hx.astype(np.int64)
        Weight_final = gradient_desc(phi_train, hx, Weight_final,
                                     train_out[start:end])

    hx = phi_train.dot(Weight_final)
    hx = hx.astype(np.int64)
    Gx = getGX(hx)
    phi_test = getphi(test, BigSigma_inv)
    phi_test = np.insert(phi_test, 0, 1, 1)
    hx_test = phi_test.dot(Weight_final)
    hx_test = hx_test.astype(np.int64)
    Gx_test = getGX(hx_test)
    phi_val = getphi(val, BigSigma_inv)
    phi_val = np.insert(phi_val, 0, 1, 1)
    hx_val = phi_val.dot(Weight_final)
    hx_val = hx_val.astype(np.int64)
    Gx_val = getGX(hx_val)

    print(
        "=========================Logistic Regression using Gradient Descent for Subtracted HOD Data==========================="
    )
    print("ERMS Train", mean_squared_error(train_out[start:end], Gx),
          "Accuracy", acc_manual(train_out[start:end], Gx))
    print("ERMS Test", mean_squared_error(test_out, Gx_test), "Accuracy",
          acc_manual(test_out, Gx))
    print("ERMS VAL", mean_squared_error(val_out, Gx_val), "Accuracy",
          acc_manual(val_out, Gx_val))
 def score(self, curr_sk_ids=[]):
     """ This method take current ids and return Previous application score
     """
     orig_data = pd.read_csv(self.path_to_data_store + '/previous_application.csv')
     orig_data = orig_data.loc[ (orig_data['SK_ID_CURR'].isin(curr_sk_ids)) & (orig_data['FLAG_LAST_APPL_PER_CONTRACT'] == 'Y') \
                               & (orig_data['NAME_CONTRACT_STATUS'] != 'Canceled')]
     sk_ids_from_payments = orig_data['SK_ID_CURR']
     orig_data['NAME_CONTRACT_STATUS'].replace(self.target_map, inplace=True)
     test_data = self.__curate__(orig_data)
     y_test = test_data['NAME_CONTRACT_STATUS']
     X_test = test_data.drop(['NAME_CONTRACT_STATUS'], axis=1)
     preds = self.model.predict(X_test)
     print('MSE Score : ', mean_squared_error(y_test, preds))
     return self.__adjustDuplicates__(preds, sk_ids_from_payments, curr_sk_ids)
Пример #8
0
def evaluate(ytrue, ypred):
    """

    :param ytrue: true value of the dependent variable, numpy array
    :param ypred: predictions for the dependent variable, numpy array
    :return: different evaluation metrics: R squared, mean square error, mean absolute error, and fraction of variance
    explained and Spearman ranking correlation coefficient
    """
    r2 = r2_score(ytrue, ypred)
    mse = mean_squared_error(ytrue, ypred)
    mae = mean_absolute_error(ytrue, ypred)
    variance_explained = explained_variance_score(ytrue, ypred)
    spearman = spearmanr(ytrue, ypred)[0]
    return r2, mse, mae, variance_explained, spearman
def test_DBN(finetune_lr=0.1,
             pretraining_epochs=100,
             pretrain_lr=0.01,
             k=1,
             training_epochs=100,
             dataset=3,
             batch_size=10,
             layers=[1000, 1000, 1000]):

    # title
    temp_title = [
        "DNA Methylation", "Gene Expression HTSeq", "miRNA Expression"
    ]
    print("\nSurvival Rate Regression with " + temp_title[dataset - 1] +
          " (Tensorflow)\n")

    # Loading dataset
    X, Y = load_data(dataset)

    # Splitting data
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=1337)

    # Data scaling
    min_max_scaler = MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train)

    # Training
    regressor = SupervisedDBNRegression(hidden_layers_structure=layers,
                                        learning_rate_rbm=pretrain_lr,
                                        learning_rate=finetune_lr,
                                        n_epochs_rbm=pretraining_epochs,
                                        n_iter_backprop=training_epochs,
                                        batch_size=batch_size,
                                        activation_function='relu')
    regressor.fit(X_train, Y_train)

    # Test
    X_test = min_max_scaler.transform(X_test)
    Y_pred = regressor.predict(X_test)
    try:
        print('Done.\nR-squared: %f\nMSE: %f' %
              (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
    except Exception as e:
        print(
            "Infinity or a value too large for dtype('float32'). Please try different layer settings."
        )
Пример #10
0
    def update_mse(tmp_input_element, tmp_list):
        data_train, label_train, data_test, label_test = \
            HSMemory.create_train_and_test_data(tmp_list, tmp_input_element.number_visible_input)
        tmp_regression = SupervisedDBNRegression(
            hidden_layers_structure=[
                tmp_input_element.number_visible_input,
                tmp_input_element.number_hidden_input
            ],
            learning_rate_rbm=tmp_input_element.learning_rate_rbm,
            learning_rate=tmp_input_element.learning_rate,
            n_epochs_rbm=tmp_input_element.n_epochs_rbm,
            n_iter_backprop=tmp_input_element.n_iter_back_prop,
            contrastive_divergence_iter=tmp_input_element.
            contrastive_divergence_iter,
            batch_size=tmp_input_element.batch_size,
            activation_function=tmp_input_element.activation_function,
            n_hidden_layers_mlp=tmp_input_element.n_hidden_layers_mlp,
            cost_function_name=tmp_input_element.cost_function_name)

        tmp_regression.fit(data_train, label_train)  # train data
        tmp_input_element.train_mse = sum(
            tmp_regression.train_loss) / HSElement.config_n_iter_back_prop

        y_pred_test = tmp_regression.predict(data_test)
        check_nan = np.isnan(y_pred_test).any()

        if check_nan:
            tmp_input_element.test_mse = 1000
        else:
            tmp_input_element.test_mse = mean_squared_error(
                label_test, y_pred_test)
        if np.isnan(tmp_input_element.train_mse) or np.isinf(
                tmp_input_element.train_mse):
            tmp_input_element.train_mse = 1000

        # add to export result
        tmp_result_data = [
            tmp_input_element.learning_rate_rbm,
            tmp_input_element.learning_rate,
            tmp_input_element.number_visible_input,
            tmp_input_element.number_hidden_input, tmp_input_element.train_mse,
            tmp_input_element.test_mse, '', '', '', '', '', '', '', ''
        ]
        TensorGlobal.followHs.append(tmp_result_data)

        TensorGlobal.sessFlg = True
        tf.reset_default_graph()
        del tmp_regression
        return tmp_input_element
Пример #11
0
def get_regression_metrics(ground_truth_value, predicted_value):
    regression_metric_dict = dict({})
    regression_metric_dict['r2_score'] = r2_score(ground_truth_value,
                                                  predicted_value)
    regression_metric_dict['mean_squared_error'] = mean_squared_error(
        ground_truth_value, predicted_value)
    #regression_metric_dict['mean_squared_log_error'] = mean_squared_log_error(ground_truth_value, predicted_value)
    regression_metric_dict['mean_absolute_error'] = mean_absolute_error(
        ground_truth_value, predicted_value)
    regression_metric_dict[
        'explained_variance_score'] = explained_variance_score(
            ground_truth_value, predicted_value)
    regression_metric_dict['median_absolute_error'] = median_absolute_error(
        ground_truth_value, predicted_value)
    regression_metric_dict['max_error'] = max_error(ground_truth_value,
                                                    predicted_value)

    return regression_metric_dict
Пример #12
0
def read_input_data():
    X_train, Y_train, X_test, Y_test = normalize_data()
    rng = numpy.random.RandomState(123)
    # print(normalized_X_train.values, normalized_Y_train)
    # print(normalized_X_train.shape, normalized_Y_train.shape)
    dbn = DBN(input=X_train,
              label=Y_train,
              n_ins=X_train.shape[1],
              hidden_layer_sizes=[80] * 10,
              n_outs=1,
              rng=rng)
    dbn.pretrain(lr=0.001, k=1, epochs=1000)
    dbn.finetune(lr=0.001, epochs=200)
    resutls = dbn.predict(X_test)
    print("results", resutls, Y_test)
    print(Y_test.shape)
    print(r2_score(resutls, Y_test), mean_squared_error(resutls, Y_test))
    print(mean_absolute_error(resutls, Y_test))
def get_resutls_column(model, trainfolds_dfs, testfolds_dfs, train_set,
                       test_set, feature_set, target_col_name):
    MSEs = [None] * len(testfolds_dfs)
    MAEs = [None] * len(testfolds_dfs)
    SPs = [None] * len(testfolds_dfs)
    PNs = [None] * len(testfolds_dfs)
    for i in range(len(testfolds_dfs)):
        train_X = trainfolds_dfs[i].loc[:, feature_set].values
        train_Y = trainfolds_dfs[i].loc[:, target_col_name].values
        test_X = testfolds_dfs[i].loc[:, feature_set].values
        test_Y = testfolds_dfs[i].loc[:, target_col_name].values
        model.fit(train_X, train_Y)
        test_pred = model.predict(test_X)
        MAEs[i] = MAE(test_pred, test_Y)
        MSEs[i] = MSE(test_pred, test_Y)
        SPs[i] = SPC(test_pred, test_Y)
        PNs[i] = PNC(test_pred, test_Y)

    train_cvavg_MAE = numpy.mean(MAEs)
    train_cvavg_MSE = numpy.mean(MSEs)
    train_cvavg_PN = numpy.mean(PNs)
    train_cvavg_SP = numpy.mean(SPs)

    test_Y = test_set.loc[:, target_col].values
    test_X = test_set.loc[:, feature_set].values
    train_X = train_set.loc[:, feature_set].values
    train_Y = train_set.loc[:, target_col].values

    model.fit(train_X, train_Y)
    test_pred = model.predict(test_X)

    testset_pn, _ = pearsonr(test_Y, test_pred)
    testset_sp, _ = spearmanr(test_Y, test_pred)
    testset_mae = mean_absolute_error(test_Y, test_pred)
    testset_mse = mean_squared_error(test_Y, test_pred)

    column = [
        testset_mae, testset_mse, testset_pn, testset_sp, train_cvavg_MAE,
        train_cvavg_MSE, train_cvavg_PN, train_cvavg_SP, feature_set,
        len(feature_set)
    ]
    column += list(test_pred)
    return column
    def score(self):
        test_target = None
        test_data = pd.read_csv(self.path_to_data_store +
                                '/application_test.csv')
        sk_id_curr = test_data[['SK_ID_CURR']]
        if self.score_type == 'actual':
            test_data = self.createEnsembleData(test_data[['SK_ID_CURR']])
        else:
            test_target = test_data['TARGET']
            test_data = self.createEnsembleData(test_data[['SK_ID_CURR']])

        score = self.ensemble.predict(test_data)
        if test_target is not None:
            print(mean_squared_error(test_target.values, score))
            output = self.format_output(sk_id_curr, score, test_target)
            output.to_csv('submission.csv', index=False)
        else:
            output = self.format_output(sk_id_curr, score)
            output.to_csv('submission.csv', index=False)
def test_DBN(finetune_lr=0.1,
    pretraining_epochs=100,
    pretrain_lr=0.01,
    training_epochs=100,
    dataset=6, batch_size=10,
    layers=[1000, 1000, 1000],
    dropout=0.2,
    pca=2,
    optimizer=1):
	
    # title
    temp_title = ["DNA Methylation Platform GPL8490",
                  "DNA Methylation Platform GPL16304",
                  "Gene Expression HTSeq Count",
                  "Gene Expression HTSeq FPKM",
                  "Gene Expression HTSeq FPKM-UQ",
                  "miRNA Expression"]
    print("\nSurvival Rate Regression with " + temp_title[dataset-1] + " (Tensorflow)\n")
    
    # Loading dataset
    X, Y = load_data(dataset, pca)

    # Splitting data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1337)

    # Training
    regressor = SupervisedDBNRegression(hidden_layers_structure=layers,
                                    	learning_rate_rbm=pretrain_lr,
                                    	learning_rate=finetune_lr,
                                    	n_epochs_rbm=pretraining_epochs,
                                    	n_iter_backprop=training_epochs,
                                    	batch_size=batch_size,
                                    	activation_function='relu',
                                        dropout_p=dropout)
    regressor.fit(X_train, Y_train)

    # Test
    Y_pred = regressor.predict(X_test)
    Y_pred = numpy.transpose(Y_pred)[0]
    Y_pred_train = regressor.predict(X_train)
    Y_pred_train = numpy.transpose(Y_pred_train)[0]
    print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
    print('Done. \ntraining R-squared: %f\nMSE: %f' % (r2_score(Y_train, Y_pred_train), mean_squared_error(Y_train, Y_pred_train)))
 def test_data(self):
    housing_data      = self.strat_test_set
    X                 = housing_data.drop("median_house_value", axis=1)
    y                 = housing_data["median_house_value"].copy()
    X_prepared        = self.full_pipeline.transform(X)
    final_predictions = self.final_model.predict(X_prepared)
    
    final_mse  = mean_squared_error(y, final_predictions)
    final_rmse = np.sqrt(final_mse)
    
    print('\n')
    print('final root mean squared error (RMSE):\n{0}\n'.format(final_rmse))
    
    confidence     = 0.95
    squared_errors = (final_predictions - y) ** 2
    mean           = squared_errors.mean()
    scale          = stats.sem(squared_errors)
    m              = len(squared_errors)
    interval95     = np.sqrt(stats.t.interval(confidence, m-1, loc=mean,scale=scale))
    print('95% confidence interval for the RMSE:\n{0}\n'.format(interval95))
Пример #17
0
    def score(self, X, y, step=1, method="r2"):
        """
        Produce multi-step prediction of y, and compute the metrics against y.
        Nan is ignored when computing the metrics.

        :param array-like X: exogenous input time series, shape = (n_samples,
                             n_exog_inputs)
        :param array-like y: target time series to predict, shape = (n_samples)
        :param int step: prediction step.
        :param string method: could be "r2" (R Square) or "mse" (Mean Square
                              Error).

        :return: prediction metric. Nan is ignored when computing the metrics.
        """
        ypred = self.predict(X, y, step=step)
        mask = np.isnan(y) | np.isnan(ypred)
        if method == "r2":
            return r2_score(y[~mask], ypred[~mask])
        elif method == "mse":
            return mean_squared_error(y[~mask], ypred[~mask])
 def _get_error(self, used, used_bounds):
     if self.cv:
         cv_errors = []
         for train_idx, test_idx, in self.kf.split(self.X):
             self._refit_model(
                 self.types[used], self.bounds[used_bounds],
                 self.X[train_idx.reshape(-1, 1), used],
                 self.y[train_idx])  # refit the model every round
             pred = self.model.predict(self.X[test_idx.reshape(-1, 1),
                                              used])[0]
             cv_errors.append(
                 np.sqrt(mean_squared_error(self.y[test_idx], pred)))
         cve = np.mean(cv_errors)
     else:
         cve = np.float('inf')
     self._refit_model(self.types[used], self.bounds[used_bounds],
                       self.X[:,
                              used], self.y)  # refit the model every round
     oob = self.model.rf.out_of_bag_error()
     return oob, cve
Пример #19
0
 def train(self, data, training):
     'En esta funcion se realiza 10-Fold CV para entrenar la red con una expansion de entre 20-75%.'
     'El algoritmo de entrenamiento es Descenso por Gradiente Estocastico o Extreme Learning Machine.'
     # 10-Fold Cross Validation
     folds = 10; iters = 10;
     kf = KFold(data.shape[0], n_folds=folds)
     hiddenNodes = arange(2*data.shape[1])+1
     Error_HNodes = []
     Nets_HNodes = []
     for j in hiddenNodes:
         self.setHiddenNodes([j])
         Mean_error_iter = []
         Mean_nets_iter = []
         for train_index, val_index in kf:
             X, Xval = data[train_index], data[val_index]
             Error_iter = []
             Nets_iter = []
             for i in np.arange(iters):
                 self.initialization() # Inicializaciones comunes
                 if training == 'elm':
                     Out,H,N = self.sim(X)
                     H = H[-1]
                     pseudoinverse = pinv(H)
                     beta = np.dot(pseudoinverse,X)
                     self.Weights[-1] = beta
                     # Validation
                     Out_val,H_val,N_val = self.sim(Xval)
                     # Se guarda el error y la red
                     MSE = [mean_squared_error(Xval,Out_val)]
                     Networks = [self.Weights]
                 Error_iter.append(np.min(MSE))
                 Nets_iter.append(Networks[np.argmin(MSE)])
             Mean_error_iter.append(np.mean(Error_iter))
             Mean_nets_iter.append(Nets_iter[np.argmin(Error_iter)])
         Error_HNodes.append(np.mean(Mean_error_iter))
         Nets_HNodes.append(Mean_nets_iter[np.argmin(Mean_error_iter)])
     self.Weights = Nets_HNodes[np.argmin(Error_HNodes)]
     Final_Error = np.min(Error_HNodes)
     selected_Nodes = hiddenNodes[np.argmin(Error_HNodes)]
     self.setHiddenNodes([selected_Nodes])
     return Final_Error
def fnLinearRegression(TrainData, Target, Title):

    regr = linear_model.LinearRegression()
    regr.fit(TrainData, Target)

    prediction = regr.predict(TrainData)

    plt.figure(1)

    plt.title(Title)
    plt.scatter(TrainData, Target)
    plt.plot(TrainData, prediction, color='blue')

    plt.show()
    ''' Find out mean sqs uared error between prediction and target '''

    MSE = mean_squared_error(Target, prediction)

    R2_Score = r2_score(Target, prediction)

    return MSE, regr.coef_, regr.intercept_, R2_Score
Пример #21
0
    def score(self, X, y, method="r2", verbose=False):
        """
        Produce multi-step prediction of y, and compute the metrics against y.
        Nan is ignored when computing the metrics.

        :param array-like X: exogenous input time series, shape = (n_samples,
                             n_exog_inputs)
        :param array-like y: target time series to predict, shape = (n_samples)
        :param string method: could be "r2" (R Square) or "mse" (Mean Square
                              Error).

        :return: prediction metric. Nan is ignored when computing the metrics.
        """
        ypred = self.predict(X, y)
        mask = np.isnan(y) | np.isnan(ypred)
        if verbose:
            print('Evaluating {} score, {} of {} data points are evaluated.'.
                  format(method, np.sum(~mask), y.shape[0]))
        if method == "r2":
            return r2_score(y[~mask], ypred[~mask])
        elif method == "mse":
            return mean_squared_error(y[~mask], ypred[~mask])
Пример #22
0
def predict_without_dwt(dataset, testnum, featurenum):
    dataset = dataset[np.newaxis, :]
    x_train, x_test, y_train, y_test = generate_data(dataset, testnum,
                                                     featurenum)
    min_max_scaler = MinMaxScaler()
    x_train = min_max_scaler.fit_transform(x_train)
    x_test = min_max_scaler.transform(x_test)
    dbn1 = dbn.DBN(x_train=x_train,
                   y_train=y_train,
                   x_test=x_test,
                   y_test=y_test,
                   hidden_layer=[250],
                   learning_rate_rbm=0.0005,
                   batch_size_rbm=150,
                   n_epochs_rbm=200,
                   verbose_rbm=1,
                   random_seed_rbm=500,
                   activation_function_nn='tanh',
                   learning_rate_nn=0.005,
                   batch_size_nn=150,
                   n_epochs_nn=1500,
                   verbose_nn=1,
                   decay_rate=0)
    dbn1.pretraining()
    dbn1.finetuning()
    dataset_pred = dbn1.result[:, 0]
    dataset_test = dataset[0, dataset.shape[1] - testnum:dataset.shape[1]]
    mse = mean_squared_error(dataset_pred, dataset_test)
    plt.figure(figsize=(12, 9), dpi=100)
    plt.plot(dataset_test)
    plt.plot(dataset_pred)
    plt.legend(['dataset_real', 'dataset_prediction'], loc='upper right')
    plt.title('sequence prediction result', fontsize=16)
    plt.xlabel('MSE = %f' % mse)
    plt.draw()
    #plt.show()
    return dataset_pred, mse
Пример #23
0
def regressionSummary(y_true, y_pred):
    """ print regression performance metrics 
    
    Input:
        y_true: actual values
        y_pred: predicted values
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    y_res = y_true - y_pred
    metrics = [
        ('Mean Error (ME)', sum(y_res) / len(y_res)),
        ('Root Mean Squared Error (RMSE)',
         math.sqrt(regression.mean_squared_error(y_true, y_pred))),
        ('Mean Absolute Error (MAE)', sum(abs(y_res)) / len(y_res)),
        ('Mean Percentage Error (MPE)',
         100 * sum(y_res / y_true) / len(y_res)),
        ('Mean Absolute Percentage Error (MAPE)',
         100 * sum(abs(y_res / y_true) / len(y_res))),
    ]
    fmt1 = '{{:>{}}} : {{:.4f}}'.format(max(len(m[0]) for m in metrics))
    print('\nRegression statistics\n')
    for metric, value in metrics:
        print(fmt1.format(metric, value))
Пример #24
0
def train_model_func(learning_rate_rbm, learning_rate, batch_size, feature, label, path_out_png, pred_num, train_deep):
    X_train, X_test, Y_train, Y_test = train_test_split(feature, label, test_size=0.2, shuffle=False)

    print("Training model...")
    print("RMSE (on training data):")
    root_mean_squared_errors = []
    for deep in range(1, train_deep + 1):
        RMSE_total = 0
        for i in range(0, pred_num):
            x_train = np.array(X_train[X_train.shape[0] - i - deep:X_train.shape[0] - i])
            y_trian = np.array(Y_train[Y_train.shape[0] - i - deep:Y_train.shape[0] - i])
            x_test = np.array(X_test)
            y_test = np.array(Y_test)

            predictions = train_model(learning_rate_rbm=learning_rate_rbm, learning_rate=learning_rate,
                                      batch_size=batch_size, x_train=x_train,
                                      y_trian=y_trian, x_test=x_test)

            root_mean_squared_error = math.sqrt(mean_squared_error(y_test, predictions))
            print("\t\ti:\t", root_mean_squared_error)
            RMSE_total += root_mean_squared_error

        RMSE_avg = RMSE_total / pred_num
        root_mean_squared_errors.append(RMSE_avg)
        print("train_deep:", deep, "\tRMSE_avg:", RMSE_avg)

        # Output a graph of loss metrics over periods.
        # plt.subplot(1, 2, 2)
        plt.ylabel('RMSE')
        plt.xlabel('train_deep')
        plt.title("Root Mean Squared Error vs. Train Deep")
        plt.tight_layout()
        plt.plot(root_mean_squared_errors)
        plt.savefig(path_out_png)

    print("finished.")
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression


# Loading dataset
boston = load_boston()
X, Y = boston.data, boston.target

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

# Training
regressor = SupervisedDBNRegression(hidden_layers_structure=[100],
                                    learning_rate_rbm=0.01,
                                    learning_rate=0.01,
                                    n_epochs_rbm=20,
                                    n_iter_backprop=200,
                                    batch_size=16,
                                    activation_function='relu')
regressor.fit(X_train, Y_train)

# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = regressor.predict(X_test)
print 'Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred))
Пример #26
0
X_train = min_max_scaler.fit_transform(X_train)

#print('x_train number: ', X_train[0])
#print('x_test number: ', X_test.shape[1])
#print('Y_train number: ', Y_train[0])
#print('y_test number: ', Y_test.shape[0])

# Training
regressor = SupervisedDBNRegression(hidden_layers_structure=[100],
                                    learning_rate_rbm=0.01,
                                    learning_rate=0.01,
                                    n_epochs_rbm=20,
                                    n_iter_backprop=200,
                                    batch_size=16,
                                    activation_function='relu')
#regressor.fit(X_train, Y_train)

# Save the model
#regressor.save('model_regression_128.pkl')

# Restore it
regressor = SupervisedDBNRegression.load('models/model_regression.pkl')

# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = regressor.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' %
      (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))

#print(Y_pred)
Пример #27
0
#Computing the RMSE
def ComputeRMSE(result, predicted) :
Пример #28
0
tpr
thresholds

# =============================================================================
# REGRESSION EVALUATION METRICS
# =============================================================================

from sklearn.metrics.regression import (r2_score, explained_variance_score,
                                        mean_absolute_error,
                                        median_absolute_error,
                                        mean_squared_error,
                                        mean_squared_log_error)

y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
mean_squared_error(y_true, y_pred)
r2_score(y_true, y_pred)

y_true = [[0.5, 1], [-1, 1], [7, -6]]
y_pred = [[0, 2], [-1, 2], [8, -5]]
mean_squared_error(y_true, y_pred)
r2_score(y_true, y_pred)

# =============================================================================
# CLUSTERING EVALUATION METRICS
# =============================================================================

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
Пример #29
0
    regressor.fit(data_train, label_train)
    tmp_train_mse = sum(regressor.train_loss) / RandomRegression.number_iter_backprop
    tmp_min_mse_label = 'MORE BAD'
    stop_time = time.time()
    print("THE TIME FOR TRAINING: " + str((stop_time - start_time)) + ' second')

    # Test
    start_time = time.time()
    Y_pred_test = regressor.predict(data_test)
    print("Begin to call mean_squared_error")
    check_nan = np.isnan(Y_pred_test).any()

    if check_nan:
        tmp_test_mse = 1000
    else:
        tmp_test_mse = mean_squared_error(label_test, Y_pred_test)
    if np.isnan(tmp_train_mse) or np.isinf(tmp_train_mse):
        tmp_train_mse = 1000

    stop_time = time.time()
    print("THE TIME FOR TEST: " + str((stop_time - start_time)) + ' second')
    TensorGlobal.sessFlg = True
    tf.reset_default_graph()
    del regressor
    tmp_element_data = [tmp_lrr, tmp_lr, RandomRegression.number_visible_input, RandomRegression.number_hidden_input,
                        tmp_train_mse, tmp_test_mse]
    result_data.append(tmp_element_data)

# Export result to excel file
print("Begin to print result")
now = datetime.now()
Пример #30
0
def build_predictor(fname):

    s = []
    d1, d2 = 0, 0
    for line in open(fname):
        s.append(map(float, line.split()))
    # s = s[5000:]

    # print '--- Error on complete set'
    # y = map(lambda u: u[0], s)
    # for idx, val in enumerate(["reqtime", "tsafrir", "sgd"]):
    #     xi = map(lambda u: u[idx+1], s)
    #     print regression.mean_squared_error(y, xi), val
    #     accuracy(y, xi)

    # s = []
    # for i in range(-100,100,3):
    #     # s.append([7*(i**3)-5*i*i-17*i+8, i,i*i,i*i*i])
    #     s.append([7*(i**3)-5*i*i-17*i+8, i])

    import random
    random.shuffle(s)

    # s=[ [1, 6,8,9], [5, 6,8,9], [7, 6,8,9], [2, 7,8,9]]

    # keyfunc = lambda u: u[0]
    # s.sort(key=keyfunc)

    print "original data size", len(s)

    # mp = {}
    # for r in s:
    #     key, val = r[0], r[1:]
    #     if not key in mp:
    #         mp[key] = []
    #     mp[key].append(val)

    # s = []
    # for key in mp:
    #     l = mp[key]
    #     if len (l) > 1:
    #         t = reduce(lambda u,v: [i+j for i,j in izip(u,v)], l)
    #         t = map(lambda u: 1.0*u/len(l), t)
    #         mp[key] = t
    #     else:
    #         mp[key] = l[0]
    #     s.append([key] + mp[key])

    # print "data size later", len(s)

    mp = {}
    for r in s:
        key, val = r[0], tuple(r[1:])
        if not val in mp:
            mp[val] = []
        mp[val].append(key)

    s = []
    for key in mp:
        l = mp[key]
        mp[key] = sum(l)/len(l)
        s.append([mp[key]] + list(key))

    print "data size later", len(s)

    X = map(lambda u: u[1:], s)
    # X = map(lambda u: [u[1]], s)
    y = map(lambda u: u[0], s)

    # print len(X), len(set(tuple(u) for u in X))

    import itertools

    for x in X:
        t = []
        for i in x:
            t.extend([i*i,i**3])
        for a,b in itertools.combinations(x, 2):
            t.append(a*b)
        for a,b,c in itertools.combinations(x, 3):
            t.append(a*b*c)
        x.extend(t)


    # prepare the training and testing data for the model
    nCases = len(y)
    nTrain = int(np.floor(nCases * 0.8))
    trainX = X[:nTrain]
    trainY = y[:nTrain]
    testX = X[nTrain:]
    testY = y[nTrain:]


    # print type(X), X[0]
    svr = SVR(kernel='linear', C=1.0, epsilon=0.2)
    svr = SVR(kernel='rbf', C=1.0, epsilon=0.2, gamma=.0001)
    log = LinearRegression(normalize=True)

    # train both models
    # svr.fit(trainX, trainY)
    log.fit(trainX, trainY)

    # predict test labels from both models
    predLog = log.predict(testX)
    # predSvr = svr.predict(testX)

    # show it on the plot
    # plt.plot(testY, testY, label='true data')
    # # plt.plot(testY, predSvr, 'co', label='SVR')
    # plt.plot(testY, predLog, 'mo', label='LogReg')
    # plt.legend()
    # plt.show()


    print '--- Error on test set'
    meta_mse = regression.mean_squared_error(testY, predLog)
    print int(meta_mse), "meta predictor"
    # print regression.mean_squared_error(testY, predSvr)

    well_estimated = sum([1 if abs(u-v)<2*u else 0 for u,v in izip(testY, predLog)])
    print "well estimated, all, percent: ", well_estimated, len(testY), int(100.0*well_estimated/len(testY)), "%"
    exit(0)

    for idx, val in enumerate(["reqtime", "tsafrir", "sgd"]):
        mse = regression.mean_squared_error(testY, map(lambda u: u[idx], testX))
        print "%d %.2f %% %s" % (mse, 100*(mse-meta_mse)/mse, val)
Пример #31
0
    path3 = 'modelo_treinamento_2019_11_01_14_10_48_modelo_1_[90, 90, 90].pkl'

    pathCompleto = path1 + path2 + path3

    regressor = SupervisedDBNRegression.load(pathCompleto)

    # Teste
    Y_pred = regressor.predict(X_test)

    # if conjTreino == 'degrauUnitario.csv':

    #     Y_pred = Y_pred / 4.6    # 4.62073146825719

    r2Score = r2_score(Y_test, Y_pred)
    MSE = mean_squared_error(Y_test, Y_pred)

    print('\nDone.\nR-squared: %f\nMSE: %f' % (r2Score, MSE))

    arquivoResultados = pd.DataFrame(data={
        "Arquivo": [conjTreino],
        "r2Score": [r2Score],
        "MSE": [MSE]
    })

    arquivoResultados.to_csv(r'./Resultados/resultados_teste_' +
                             indiceTreinamento + '.csv',
                             sep=',',
                             index=False,
                             mode='a',
                             header=primeiraExecucao)
def test_DBN(finetune_lr=0.1,
    pretraining_epochs=100,
    pretrain_lr=0.01, k=1,
    training_epochs=100,
    dataset=6,
    batch_size=10,
    layers=[1000, 1000, 1000],
    dropout=0.2,
    pca=2,
    optimizer=1):

    # Title
    temp_title = ["DNA Methylation Platform GPL8490",
                  "DNA Methylation Platform GPL16304",
                  "Gene Expression HTSeq Count",
                  "Gene Expression HTSeq FPKM",
                  "Gene Expression HTSeq FPKM-UQ",
                  "miRNA Expression"]
    print("\Survival Rate Regression with " + temp_title[dataset-1] + " (Theano)\n")
    
    #########################
    #### PREPARE DATASET ####
    #########################
    # Load datasets
    datasets = load_data(dataset, pca)

    # Split dataset into training and test set
    train_input_set, test_input_set, train_label_set, test_label_set = train_test_split(datasets[0], datasets[1], test_size=0.25, random_state=100)
    
    # Size of input layer
    _, nr_in = train_input_set.shape
    
    # Number of training batches
    n_train_batches = train_input_set.shape[0] // batch_size

    # cast inputs and labels as shared variable to accelerate computation
    train_set_x, train_set_y = shared_dataset(data_xy = (train_input_set,train_label_set))
    test_set_x, test_set_y = shared_dataset(data_xy = (test_input_set,test_label_set))

    
    #########################
    ##### BUILD NN MODEL ####
    #########################
    print('Build NN Model')
    numpy_rng = numpy.random.RandomState(123)
    dbn = DBN(numpy_rng=numpy_rng, n_ins=nr_in, hidden_layers_sizes=layers, n_outs=1)


    #########################
    ### PRETRAIN NN MODEL ###
    #########################
    print('Pretrain NN Model')
    
    # Get the pretraining functions. It is on the amount of the number of layers.
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k)

    # iterate for each RBMs
    for i in range(dbn.n_layers):
        # iterate for pretraining epochs
        for epoch in range(pretraining_epochs):
            c = []
            # iterate for number of training batches
            for batch_index in range(n_train_batches):
                # c is a list of monitoring cost per batch for RBM[i]
                c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr))
            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
            print(numpy.mean(c, dtype='float64'))

    
    #########################
    ### FINETUNE NN MODEL ###
    #########################
    print('Train NN Model')
    
    # Get the training functions.
    train_fn = dbn.build_finetune_functions(train_set_x=train_set_x, train_set_y=train_set_y, batch_size=batch_size, learning_rate=finetune_lr, dropout=dropout, optimizer=optimizer)

    # iterate for training epochs
    for j in range(training_epochs):
        # iterate for number of training batches
        for minibatch_index in range(n_train_batches):
            train_fn(minibatch_index)

    
    #########################
    ##### TEST NN MODEL #####
    #########################
    print('Test NN Model')
    
    # Get the test functions.
    test_model = dbn.predict(test_set_x=test_set_x, dropout=dropout)

    # take the test result
    test_predicted_label_set = test_model()
    print(test_label_set)
    print(test_predicted_label_set)

    # accuracy, p, r, f, s
    mse = mean_squared_error(test_label_set, test_predicted_label_set)
    r2 = r2_score(test_label_set, test_predicted_label_set)

    # print results
    print("MSE = " + str(mse))
    print("R2 = " + str(r2))
        momentum = 1e-4
        # do weight updates in imperative
        for pname, W, G in zip(cost_classification.list_arguments(),
                               executor.arg_arrays, executor.grad_arrays):
            # Don't update inputs
            # MXNet makes no distinction between weights and data.
            if pname in ['data', 'lro']:
                continue
            # what ever fancy update to modify the parameters
            auto_momentum = mx.nd.minimum(momentum,
                                          mx.nd.power(mx.nd.sum(G), 2.0))
            auto_k = mx.nd.minimum(0, mx.nd.minimum(1, 1 - auto_momentum))
            vw = W * auto_momentum - .001 * G
            vn = W * momentum - .001 * G
            # print(auto_momentum.asnumpy(), auto_k.asnumpy())
            W[:] = W + auto_k * vn + (1 - auto_k) * vw

    # Evaluation at each epoch
    output = []
    for x in range(0, len(teIdx), batch_size):
        batchX = teIdx[x:x + batch_size]
        batchY = teIdy[x:x + batch_size]
        if batchX.shape[0] != batch_size:
            continue
        # use the test executor as we don't care about gradients
        executor_test.arg_dict['data'][:] = batchX
        executor_test.forward()
        output.extend(executor_test.outputs[0].asnumpy().tolist())
    # print (str(num_correct) + ",")
    print(mean_squared_error(teIdy[:len(output)], output))
 #rd = lm.LogisticRegression(penalty='l2', dual=True, tol=0.0001, 
 #                        C=1, fit_intercept=True, intercept_scaling=1.0, 
 #                       class_weight=None, random_state=None)
 rd=SVR(kernel='linear', degree=3, gamma='auto', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1)
 
 
 
  
 print "Training data"
  
 rd.fit(data.toarray(),y_train.values)
 scores1 = cross_val_score(rd, data , y_train, cv=cv, scoring='mean_squared_error')
 print scores1
 pred = rd.predict(tfv.transform(list(np.asarray(x_test))).toarray())
 print pred
 print sqrt(mean_squared_error(pred,y_test))
 
 pred=np.round(pred)
 pred=pred.astype(int)
 print 'predicting actual test set...'
 predicted=rd.predict(tfv.transform(list(np.asarray(test))).toarray())
 
 #for p in predicted:
     #print p
 predicted=np.round(predicted)
 predicted=predicted.astype(int)
 test["publication year"]=predicted
 test["record Id"]=record_id
 test.to_csv(submission,
 columns=['record Id','publication year'],index=False,sep='\t')
 
Пример #35
-1
'''
norm1 =  np.linalg.norm(y_train)    
if norm1 != 0:   
    y_train, y_test =  y_train/norm1, y_test/norm1
print norm1
'''

print y_train.shape

model = SVR(C=1.0, gamma=1.0)
model = LinearRegression()

lasso = Lasso(alpha=0.1).fit(X_train, y_train)
enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train)

y_pred = lasso.predict(X_test)

print "MSE", mean_squared_error(y_test, y_pred)
m = np.mean(y_test)
print "MSE (Mean)",mean_squared_error(y_test, m*np.ones(len(y_test)))


print "r^2 on test data", r2_score(y_test, y_pred)

plt.plot(enet.coef_, label='Elastic net coefficients')
plt.plot(lasso.coef_, label='Lasso coefficients')
plt.legend(loc='best')
plt.title("Lasso R^2: %f, Elastic Net R^2: %f"
          % (r2_score(y_test, lasso.predict(X_test)), r2_score(y_test, enet.predict(X_test))))
plt.show()