示例#1
0
def score_model(model, alpha=False):
    ''' 
    This function fits a model using the training set, predicts using the test set, and then calculates 
    and reports goodness of fit metrics and alpha if specified and available.
    
    All of the model parameters are also reported, which I find extremely useful.
    
    I wanted to include all of the available regression metrics to see how they compare and comove.
    I ran into an ValueError when trying to include MSLE (mean squared log error). 
    Could be related to ln0 being undefined?
    '''
    model.fit(Xtrain, ytrain)
    yhat = model.predict(Xtest)
    r2 = r2_score(ytest, yhat)
    me = mse(ytest, yhat)
    ae = mae(ytest, yhat)
    mede = medae(ytest, yhat)
    ev = evs(ytest, yhat)

    if alpha == True:
        print(
            "Results from {}: \nr2={:0.3f} \nMSE={:0.3f}               \nMAE={:0.3f} \nMEDAE={:0.3f} \nEVS={:0.3f} \nalpha={:0.3f}"
            .format(model, r2, me, ae, mede, ev, model.alpha_))
    else:
        print(
            "Results from {}: \nr2={:0.3f} \nMSE={:0.3f}               \nMAE={:0.3f} \nMEDAE={:0.3f} \nEVS={:0.3f}"
            .format(model, r2, me, ae, mede, ev))
def eval_reg(y_test, predictions):
    '''
    Function:
    Evaluates a regression model through its main metrics
    '''
    print("### MEASURES OF REGRESSION MODEL ###")
    print("------------------------------------\n")

    print("R2 = {0:.4f}\n".format(r2_score(y_test, predictions)))  # R2
    print("RMSE = {0:.4f}\n".format(mse(
        y_test, predictions, squared=False)))  # Root Mean Squared Error
    print("MSE = {0:.4f}\n".format(mse(y_test, predictions,
                                       squared=True)))  # Mean Squared Error

    if len(predictions[predictions < 0]) > 0:
        print(
            "MSLE not possible to be applied. Predicitons contain negative values.\n"
        )
    else:
        print("MSLE = {0:.4f}\n".format(msle(
            y_test, predictions)))  # Mean Squared Log Error

    print("MAE = {0:.4f}\n".format(mae(y_test,
                                       predictions)))  # Mean Absolute Error
    print("EVS = {0:.4%}\n".format(evs(
        y_test, predictions)))  # Explained Variance Score
def val(mdl, crit, ldr):
    '''
    This routine handles the validation loop

    Arguments:
    mdl         : the model to be validated                     // nn.Module
    crit        : the criterion (loss) function                 // nn loss function
    ldr         : the dataloader for validation                 // dataloader

    Parameters:
    val_los     : tracks the validation loss                    // float
    val_acc     : tracks the validation accuracy using evs      // float
    X           : the feature set for the current datapoint     // tensor
    y           : the target for the current datapoint          // tensor
    y_pred      : the predictions returned by the model         // tensor
    los         : the calculated loss for the current datapoint // nn loss object

    Returns:
    val_los_avg : the averaged validation loss                  // float
    val_acc_avg : the averaged validation accuracy              // float
    '''

    mdl.eval() # set model to evaluation mode for validation
    val_los, val_acc = 0, 0 # initialise validation loss and accuracy metrics
    for X, y in ldr:
        with torch.no_grad(): # set torch to not calculate gradient for validation
            X, y = X.to(device), y.to(device)
            y_pred = mdl(X) # return model predictions
            los = crit(y_pred, y.unsqueeze(1)) # return loss from criterion function
            val_los += los*X.size(0)
            val_acc += evs(y.unsqueeze(1).cpu().numpy(), y_pred.cpu().numpy())*X.size(0) # use explained_variance_score for accuracy
            
    return val_los/len(ldr.dataset), val_acc/len(ldr.dataset) # take averages for the loss and accuracy
示例#4
0
def modela_kvalitate(y_test, resultats):
    # Kvalitate virs 0.6 ir OK
    print(
        cl('Explained Variance Score (dispersija): {}'.format(
            evs(y_test, resultats)),
           attrs=['bold']))
    print(
        cl('R-Squared (kvadratiska novirze): {}'.format(r2(y_test, resultats)),
           attrs=['bold']))
示例#5
0
def score_model(model):
    """
    Fits a model using the training set, predicts using the test set, and then calculates
    and reports goodness of fit metrics and alpha.
    """
    model.fit(Xtrain, ytrain)
    yhat = model.predict(Xtest)
    r2 = r2_score(ytest, yhat)
    me = mse(ytest, yhat)
    ae = mae(ytest, yhat)
    mede = medae(ytest, yhat)
    ev = evs(ytest, yhat)
    print("Results from {}: \nr2={:0.3f} \nMSE={:0.3f} \nMAE={:0.3f} \nMEDAE={:0.3f} \nEVS={:0.3f} \nalpha={:0.3f}".format(model, r2, me, ae, mede, ev, model.alpha_))
def score_model(model, alpha=False):
    ''' 
    This function fits a model using the training set, predicts using the test set, and then calculates 
    and reports goodness of fit metrics and alpha if specified and available.
    '''
    model.fit(Xtrain, ytrain)
    yhat = model.predict(Xtest)
    r2 = r2_score(ytest, yhat)
    me = mse(ytest, yhat)
    ae = mae(ytest, yhat)
    mede = medae(ytest, yhat)
    ev = evs(ytest, yhat)
    
    if alpha == True:
        print("Results from {}: \nr2={:0.3f} \nMSE={:0.3f}               \nMAE={:0.3f} \nMEDAE={:0.3f} \nEVS={:0.3f} \nalpha={:0.3f}".format(model, r2, me, 
                                                                                  ae, mede, ev, model.alpha_))
    else:
        print("Results from {}: \nr2={:0.3f} \nMSE={:0.3f}               \nMAE={:0.3f} \nMEDAE={:0.3f} \nEVS={:0.3f}".format(model, r2, me, ae, mede, ev))
def trn(mdl, opti, crit, ldr):
    '''
    This routine handles the training loop

    Arguments:
    mdl         : the model to be trained                       // nn.Module
    opti        : the optimiser object                          // optim
    crit        : the criterion (loss) function                 // nn loss function
    ldr         : the dataloader for training                   // dataloader

    Parameters:
    trn_los     : tracks the training loss                      // float
    trn_acc     : tracks the training accuracy using evs        // float
    X           : the feature set for the current datapoint     // tensor
    y           : the target for the current datapoint          // tensor
    y_pred      : the predictions returned by the model         // tensor
    los         : the calculated loss for the current datapoint // nn loss object

    Returns:
    trn_los_avg : the averaged training loss                    // float
    trn_acc_avg : the averaged training accuracy                // float
    '''

    mdl.train()  # set model to training mode
    trn_los, trn_acc = 0, 0  # initialise training loss and accuracy metrics
    for X, y in ldr:
        X, y = X.to(device), y.to(device)
        opti.zero_grad()  # set optimiser to zero grad for training
        y_pred = mdl(X)  # return model predictions
        los = crit(y_pred,
                   y.unsqueeze(1))  # return loss from criterion function
        los.backward()  # implement backpropagation
        trn_los += los * X.size(0)
        trn_acc += evs(y.cpu().numpy(),
                       y_pred.detach().cpu().numpy()) * X.size(
                           0)  # use explained_variance_score for accuracy
        opti.step()  # step the optimiser

    return trn_los / len(ldr.dataset), trn_acc / len(
        ldr.dataset)  # take averages for the loss and accuracy
示例#8
0
def model_significance(model, actual, predicted):
    '''
    Takes in a model, the actual y values, and the predicted y values.
    Calculates the significance of the model by comparing it's p-value to an
    alpha of 0.05.
    Prints out whether or not the model is significant.
    Returns the Explained Variance of Squares.
    '''
    p = model.f_pvalue
    alpha = 0.05

    EVS = evs(actual, predicted)

    if p < 0.05:
        print(
            f'p: {p} is less than alpha: {alpha}, therefore our model is significant.'
        )
    else:
        print(
            f'p: {p} is more than alpha: {alpha}, therefore our model is not significant.'
        )
    print('\n')
    print(f'EVS: {EVS}')
    return evs
示例#9
0
def accuracy(data1, data2):
    poi_rem = [5, 6, 7, 8, 11, 12, 13, 15, 17]
    poi_ori = [l for l in range(18)]
    poi_new = [l for l in poi_ori if l not in poi_rem]
    poi_list = ['Nose', 'Neck', 'Right Shoulder', 'Right Elbow', 'Right Wrist', 'Right Knee', 'Right Ankle', 'Right Eye', 'Right Ear']
    col_x, col_y, col_c = [], [], []
    for i in poi_new:
        col_x.append('x_'+str(i))
        col_y.append('y_'+str(i))
        col_c.append('c_'+str(i))
    col = col_x + col_y + col_c
    d2 = {}
    for i in col:
        if len(data1) > len(data2):
            if (len(data1) - len(data2)) % 2 == 0:
                m = int((len(data1) - len(data2)) / 2) #number of rows to be added at the top of user's dataframe
                n = int((len(data1) - len(data2)) / 2) #number of rows to be added at the bottom of user's dataframe
            else:
                m = int(np.ceil((len(data1) - len(data2)) / 2)) #number of rows to be added at the top of user's dataframe
                n = int(np.floor((len(data1) - len(data2)) / 2)) #number of rows to be added at the bottom of user's dataframe
            l = list(data2[i])
            l_n = []
            for j in range(m):
                l_n.append(np.mean(l)) #adding mean values of the column at the top of list
            l_n = l_n + l #appending the user's list to the new list
            for j in range(n):
                l_n.append(np.mean(l)) #adding mean values of the column at the bottom of list
            d2[i] = l_n #adding list to the dictionary
        else:
            if (len(data2) - len(data1)) % 2 == 0:
                m = int((len(data2) - len(data1)) / 2) #number of rows to be removed from the top of user's dataframe
                n = int((len(data2) - len(data1)) / 2) #number of rows to be removed from the bottom of user's dataframe
            else:
                m = int(np.ceil((len(data2) - len(data1)) / 2)) #number of rows to be removed from the top of user's dataframe
                n = int(np.floor((len(data2) - len(data1)) / 2)) #number of rows to be removed from the bottom of user's dataframe
            l = list(data2[i])
            l1 = l[m:] #removing first m elements from the user's list
            l_n = l1[:-n] #removing last n elements from the user's list
            d2[i] = l_n 
    data2 = pd.DataFrame(d2, columns=col) #converting dictionary to dataframe
    d1 = minMaxNorm(data1)
    d2 = minMaxNorm(data2)
    evs_x = []
    evs_y = []
    for i, j in zip(col_x, col_y):
        evs_x.append(evs(d1[i], d2[i])) #computing accuracy for each joint in x dimension
        evs_y.append(evs(d1[j], d2[j])) #computing accuracy for each joint in y direction
    evs_t = []
    for i, j in zip(evs_x, evs_y):
        evs_t.append(np.mean([i, j])) #computing mean accuracy across x and y dimension
    evs_f = []
    for i in range(len(poi_new)): #categorizing the accuracy
        if evs_t[i] < 0:
            evs_f.append(poi_list[i]+' movement is entirely incorrect')
        elif evs_t[i] < 0.2 and evs_t[i] >= 0:
            evs_f.append(poi_list[i]+' movement is majorly incorrect')
        elif evs_t[i] < 0.4 and evs_t[i] >= 0.2:
            evs_f.append(poi_list[i]+' movement is minorly incorrect')
        elif evs_t[i] < 0.6 and evs_t[i] >= 0.4:
            evs_f.append(poi_list[i]+' movement is minorly correct')
        elif evs_t[i] < 0.8 and evs_t[i] >= 0.6:
            evs_f.append(poi_list[i]+' movement is majorly correct')
        else:
            evs_f.append(poi_list[i]+' Movement is entirely correct')
    return evs_t, evs_f
     rmse_m.append(rmse(test_y, mlr_p))
     rmse_m.append(rmse(test_y, svr_p))
     rmse_m.append(rmse(test_y, dt_p))
     rmse_m.append(rmse(test_y, pr_p))
     #MAE for models
     mae_m.append(mae(test_y, mlr_p))
     mae_m.append(mae(test_y, svr_p))
     mae_m.append(mae(test_y, dt_p))
     mae_m.append(mae(test_y, pr_p))
     #MDAE for models
     mdae_m.append(mdae(test_y, mlr_p))
     mdae_m.append(mdae(test_y, svr_p))
     mdae_m.append(mdae(test_y, dt_p))
     mdae_m.append(mdae(test_y, pr_p))
     #EVS for models
     evs_m.append(evs(test_y, mlr_p))
     evs_m.append(evs(test_y, svr_p))
     evs_m.append(evs(test_y, dt_p))
     evs_m.append(evs(test_y, pr_p))
     #R2 for models
     r2_m.append(r2(test_y, mlr_p))
     r2_m.append(r2(test_y, svr_p))
     r2_m.append(r2(test_y, dt_p))
     r2_m.append(r2(test_y, pr_p))
     c += 1
 #Converting the results to dict to dataframe
 d = {}
 d['Method'] = meth
 d['MSE'] = mse_m
 d['RMSE'] = rmse_m
 d['MAE'] = mae_m
示例#11
0
dt_yhat = dt.predict(x_test)

# Modelling: Linear Regression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_yhat = lr.predict(x_test)

# Modelling: Bayesian
bayesian = BayesianRidge()
bayesian.fit(x_train, y_train)
bayesian_yhat = bayesian.predict(x_train)

# Determining accuracy

rf_accuracy = rf.score(x_test, y_test)
rf_evs = evs(y_test, rf_yhat)

print("Random Forest Training Accuracy:", rf.score(x_train, y_train))
print("Random Forest Testing Accuracy:", rf_accuracy)
print("Random Forest Explained Variance Score:", rf_evs)

dt_accuracy = dt.score(x_test, y_test)
dt_evs = evs(y_test, dt_yhat)

print("Decision Tree Training Accuracy:", dt.score(x_train, y_train))
print("Decision Tree Testing Accuracy:", dt_accuracy)
print("Decision Tree Explained Variance Score:", dt_evs)

lr_accuracy = lr.score(x_test, y_test)
lr_evs = evs(y_test, lr_yhat)
示例#12
0
     test_x = test.drop(columns=['District', 'Index', 'Rainfall'])
     test_y = test['Rainfall']
     for j in deg:
         print(i, c, j)
         poly = pf(degree=j)
         poly_tr = poly.fit_transform(train_x)
         poly_ts = poly.fit_transform(test_x)
         pr.fit(poly_tr, train_y)
         pr_p = pr.predict(poly_ts)
         #Error values
         d.append(j)
         mse_d.append(mse(test_y, pr_p))
         rmse_d.append(rmse(test_y, pr_p))
         mae_d.append(mae(test_y, pr_p))
         mdae_d.append(mdae(test_y, pr_p))
         evs_d.append(evs(test_y, pr_p))
         r2_d.append(r2(test_y, pr_p))
     c += 1
 t = {}
 t['Degree'] = d
 t['MSE'] = mse_d
 t['RMSE'] = rmse_d
 t['MAE'] = mae_d
 t['MDAE'] = mdae_d
 t['EVS'] = evs_d
 t['R2'] = r2_d
 tf = pd.DataFrame(
     t, columns=['Degree', 'MSE', 'RMSE', 'MAE', 'MDAE', 'EVS', 'R2'])
 for j in deg:
     temp = tf[tf['Degree'] == j]
     dl.append(i)
示例#13
0
def better_than_baseline(RMSE, RMSE_baseline) -> None:
    '''
    Takes in the RMSE for a model as well as the RMSE for the baseline and compares the two.
    Prints out if the model is better or worse than the baseline.
    '''
    if RMSE < RMSE_baseline:
         print('Model is better than baseline')
    else: 
        print('Model sucks, should have done full stack dev instead.')

def model_significance(model, actual, predicted):
    '''
    Takes in a model, the actual y values, and the predicted y values.
    Calculates the significance of the model by comparing it's p value to an alpha of 0.05.
    Prints out whether or not the model is significant.
    Returns the Explained Variance of Squares.
    '''
    p = model.f_pvalue
    alpha = 0.05
ß
    EVS = evs(actual, predicted)

    if p < 0.05:
        print(f'p: {p} is less than alpha: {alpha}, therefore our model is significant.')
    else:
        print(f'p: {p} is greather than alpha: {alpha}, therefore our model is not significant.')

    print('\n')
    print(f"EVS: {EVS}")
    
    return evs
示例#14
0
    def train(self):
        from sklearn.metrics import (mean_squared_log_error as msle, max_error
                                     as max, mean_absolute_error as mae,
                                     mean_squared_error as mse,
                                     explained_variance_score as evs, r2_score
                                     as r2, mean_tweedie_deviance as tweedie)
        for round in range(1):
            try:
                os.mkdir('%s/%d' % (self.path, round))
            except FileExistsError:
                pass

            # get data split of one fold
            train_idx, test_idx, label_ind, unlab_ind = self.alibox.get_split(
                round)
            # get intermediate results saver for one fold experiment
            saver = self.alibox.get_stateio(round)

            # set initial performance point
            model = self.model
            net = NN.NeuralNetworkRegressor(model=model,
                                            batch_size=1,
                                            device_ids=[0],
                                            epochs=50)
            net.lr_fc = 0.01

            net.initiate(self.dataset[label_ind.index],
                         self.labels[label_ind.index])

            net.predict(self.testset)
            pred = net.preds

            # evaluation
            all = len(label_ind) + len(unlab_ind)
            lab_init = len(label_ind)
            self.mse.append(mse(self.testlab, pred))
            self.mae.append(mae(self.testlab, pred))
            self.max.append(max(self.testlab, pred))
            self.evs.append(evs(self.testlab, pred))
            self.r2.append(r2(self.testlab, pred))
            self.sample.append(len(label_ind.index))

            saver.set_initial_point(mse(self.testlab, pred))
            iteration = 0

            while not self.stopping_criterion.is_stop():
                # select subsets of Uind samples according to query strategy
                iteration += 1

                lr_fc = net.lr_fc * (1 - len(label_ind.index) / (all * 1.001))
                for p in net.optimizer.param_groups:
                    p['lr'] = lr_fc
                print('learning rate is',
                      net.optimizer.state_dict()['param_groups'][0]['lr'])

                if self.phase == 'active':
                    if self.measure != 'residue':
                        net.predict(self.dataset[unlab_ind.index])
                    else:
                        net.predict(self.dataset[label_ind])
                    pred = net.preds

                    if self.measure == 'distance':
                        if iteration == 1:
                            self._update_previous_prediction(pred)
                        else:
                            self._update_previous_prediction(
                                pred, select_ind, unlab_ind_save)
                        previous = self._get_previous_prediction()
                    else:
                        previous = None

                    if len(label_ind) < all * 0.6:
                        if iteration % 10:
                            select_ind = self.query_strategy.select_by_prediction(
                                unlabel_index=unlab_ind,
                                predict=pred,
                                labels=self.labels[label_ind.index],
                                batch_size=int(lab_init * 1),
                                X_lab=self.dataset[label_ind.index],
                                X_unlab=self.dataset[unlab_ind.index],
                                previous=previous)
                        else:
                            select_ind = self.random.select(label_ind,
                                                            unlab_ind,
                                                            batch_size=int(
                                                                lab_init * 1))
                    else:
                        select_ind = self.query_strategy.select_by_prediction(
                            unlabel_index=unlab_ind,
                            predict=pred,
                            labels=self.labels[label_ind.index],
                            batch_size=int(len(label_ind) * 0.3),
                            X_lab=self.dataset[label_ind.index],
                            X_unlab=self.dataset[unlab_ind.index],
                            previous=previous)
                elif self.phase == 'passive':
                    if len(label_ind) < all * 0.6:
                        select_ind = self.random.select(label_ind,
                                                        unlab_ind,
                                                        batch_size=int(
                                                            lab_init * 1))
                        # select_ind = self.random.select(label_ind, unlab_ind, batch_size=1)
                    else:
                        select_ind = self.random.select(
                            label_ind,
                            unlab_ind,
                            batch_size=int(len(label_ind) * 0.3))
                        # select_ind = self.random.select(label_ind, unlab_ind, batch_size=1)

                # update the datasets and previous prediction
                unlab_ind_save = unlab_ind.index
                label_ind.update(select_ind)
                unlab_ind.difference_update(select_ind)

                # update model and calc performance accoding to the updated model
                loss = net.train(self.dataset[label_ind.index],
                                 self.labels[label_ind.index])

                # if not iteration%2:
                net.predict(self.testset)
                pred = net.preds

                # evaluation
                self.mse.append(mse(self.testlab, pred))
                self.mae.append(mae(self.testlab, pred))
                self.max.append(max(self.testlab, pred))
                self.evs.append(evs(self.testlab, pred))
                self.r2.append(r2(self.testlab, pred))
                self.sample.append(len(label_ind.index))
                self.loss.append(loss)

                # save the results
                st = self.alibox.State(select_ind, mse(self.testlab, pred))
                saver.add_state(st)
                saver.save()

                self.stopping_criterion.update_information(saver)
                torch.save(self.model,
                           './%s/%d/model%d' % (self.path, round, iteration))

            self.stopping_criterion.reset()
            self.unc_result.append(copy.deepcopy(saver))
            joblib.dump(self.mse, './%s/%d/mse' % (self.path, round))
            joblib.dump(self.mae, './%s/%d/mae' % (self.path, round))
            joblib.dump(self.max, './%s/%d/max' % (self.path, round))
            joblib.dump(self.evs, './%s/%d/evs' % (self.path, round))
            joblib.dump(self.r2, './%s/%d/r2' % (self.path, round))
            joblib.dump(self.sample, './%s/%d/sample' % (self.path, round))
            joblib.dump(self.loss, './%s/%d/loss' % (self.path, round))
            joblib.dump(self.testlab, './%s/%d/testlab' % (self.path, round))
            joblib.dump(pred, './%s/%d/pred' % (self.path, round))
        self.analyser = self.alibox.get_experiment_analyser(
            x_axis='num_of_queries')
        self.analyser.add_method(
            method_name='QueryInstanceDistribution-distance',
            method_results=self.unc_result)
        print(self.analyser)
示例#15
0
     mse_clus.append(mse(test_clus_y, clus_p))
     mse_gen.append(mse(test_gen_y, gen_p))
     #RMSE for models
     rmse_ds.append(rmse(test_ds_y, ds_p))
     rmse_clus.append(rmse(test_clus_y, clus_p))
     rmse_gen.append(rmse(test_gen_y, gen_p))
     #MAE for models
     mae_ds.append(mae(test_ds_y, ds_p))
     mae_clus.append(mae(test_clus_y, clus_p))
     mae_gen.append(mae(test_gen_y, gen_p))
     #MDAE for models
     mdae_ds.append(mdae(test_ds_y, ds_p))
     mdae_clus.append(mdae(test_clus_y, clus_p))
     mdae_gen.append(mdae(test_gen_y, gen_p))
     #EVS for models
     evs_ds.append(evs(test_ds_y, ds_p))
     evs_clus.append(evs(test_clus_y, clus_p))
     evs_gen.append(evs(test_gen_y, gen_p))
     #R2 for models
     r2_ds.append(r2(test_ds_y, ds_p))
     r2_clus.append(r2(test_clus_y, clus_p))
     r2_gen.append(r2(test_gen_y, gen_p))
 #Mean of MSE for models
 mse_ts_ds.append(np.mean(mse_ds))
 mse_ts_clus.append(np.mean(mse_clus))
 mse_ts_gen.append(np.mean(mse_gen))
 #Mean of RMSE for models
 rmse_ts_ds.append(np.mean(rmse_ds))
 rmse_ts_clus.append(np.mean(rmse_clus))
 rmse_ts_gen.append(np.mean(rmse_gen))
 #Mean of MAE for models
#pricipal component analaysis
'''from sklearn.decomposition import PCA
pca=PCA(n_components=65)
x_train=pca.fit_transform(x_train)
variance=pca.explained_variance_ratio_'''
#train_test split
from sklearn.model_selection import train_test_split
x_train1, x_test1, y_train1, y_test1 = train_test_split(x_train,
                                                        y_train,
                                                        test_size=0.2,
                                                        random_state=0)

#model creation
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train1, y_train1)
y_pred = regressor.predict(x_test1)

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import explained_variance_score as evs
from sklearn.metrics import r2_score
mse = mse(y_test1, y_pred)
evs = evs(y_test1, y_pred)
r2 = r2_score(y_test1, y_pred)

#more advance Evloution K flod cross validation  #model for different dataset
from sklearn.model_selection import cross_val_score as cvs
accuracies = cvs(estimator=regressor, X=x_train1, y=y_train1, cv=10)
accuracies.std()
accuracies.mean()
mse_f = []
rmse_f = []
mae_f = []
mdae_f = []
evs_f = []
r2_f = []
y = data['Actual']
for i in comb_names:
    print(i)
    df = data[i]
    p = df.mean(axis=1)
    mse_f.append(mse(y, p))
    rmse_f.append(rmse(y, p))
    mae_f.append(mae(y, p))
    mdae_f.append(mdae(y, p))
    evs_f.append(evs(y, p))
    r2_f.append(r2(y, p))
d = {}
d['Combinations'] = comb_names
d['MSE'] = mse_f
d['RMSE'] = rmse_f
d['MAE'] = mae_f
d['MDAE'] = mdae_f
d['EVS'] = evs_f
d['R2'] = r2_f
df = pd.DataFrame(
    d, columns=['Combinations', 'MSE', 'RMSE', 'MAE', 'MDAE', 'EVS', 'R2'])
print(df)
df.to_csv(
    'C:\\Users\\Preetham G\\Documents\\Research Projects\\Ensemble Rainfall\\Results\\Main - SA Final.csv',
    index=False)
     train_y = train['Rainfall']
     test_x = test.drop(
         columns=['District', 'Index', 'Rainfall', 'Minimum Temperature'])
     test_y = test['Rainfall']
     for j in ker:
         print(i, c, j)
         svr = SVR(kernel=j, C=1, epsilon=0.1)
         svr.fit(train_x, train_y)
         svr_p = svr.predict(test_x)
         #Error values
         k.append(j)
         mse_k.append(mse(test_y, svr_p))
         rmse_k.append(rmse(test_y, svr_p))
         mae_k.append(mae(test_y, svr_p))
         mdae_k.append(mdae(test_y, svr_p))
         evs_k.append(evs(test_y, svr_p))
         r2_k.append(r2(test_y, svr_p))
     c += 1
 t = {}
 t['Kernel'] = k
 t['MSE'] = mse_k
 t['RMSE'] = rmse_k
 t['MAE'] = mae_k
 t['MDAE'] = mdae_k
 t['EVS'] = evs_k
 t['R2'] = r2_k
 tf = pd.DataFrame(
     t, columns=['Kernel', 'MSE', 'RMSE', 'MAE', 'MDAE', 'EVS', 'R2'])
 for j in ker:
     temp = tf[tf['Kernel'] == j]
     dl.append(i)
示例#19
0
training_objects_dir = 'training_objects/'
testing_file = 'small_subset_test.csv'
model_file = 'small_subset_model.obj'
vectorizer_file = 'small_subset_vectorizer.obj'

print(f"Reading testing data from {input_files_dir + testing_file}...")
data = pandas.read_csv(input_files_dir + testing_file,
                       header=None,
                       encoding='latin-1')

print(
    f"Loading model from {training_objects_dir + model_file} and vectorizer from {training_objects_dir + vectorizer_file}..."
)
with open(training_objects_dir + vectorizer_file, 'rb') as f:
    vectorizer = pickle.load(f)
with open(training_objects_dir + model_file, 'rb') as f:
    clf = pickle.load(f)

x_test = vectorizer.transform(data.iloc[:, 5].values)
x = hstack([x_test])

# predict on test data
print("Predicting test data...")
results = clf.predict(x)

# for tweet, prediction, correct in zip(data.iloc[:, 5].values, results, data.iloc[:, 0].values):
#     print(tweet, "prediction:", prediction, ", correct:", correct)

print("Explained variance score (1.0 is best):",
      evs(data.iloc[:, 0].values, results))
    mdae_t = []
    evs_t = []
    r2_t = []
    for c in range(1, 101):
        print(i, c)
        data = pd.read_csv(
            'C:\\Users\\Preetham G\\Documents\\Research Projects\\Ensemble Rainfall\\Results\\Adaboost\\Main Results\\R'
            + str(c) + '.csv')
        y = data['True']
        data = data[i]
        p = data.mean(axis=1)
        mse_t.append(mse(y, p))
        rmse_t.append(rmse(y, p))
        mae_t.append(mae(y, p))
        mdae_t.append(mdae(y, p))
        evs_t.append(evs(y, p))
        r2_t.append(r2(y, p))
    mse_f.append(np.mean(mse_t))
    rmse_f.append(np.mean(rmse_t))
    mae_f.append(np.mean(mae_t))
    mdae_f.append(np.mean(mdae_t))
    evs_f.append(np.mean(evs_t))
    r2_f.append(np.mean(r2_t))
d = {}
d['Combinations'] = comb_names
d['MSE'] = mse_f
d['RMSE'] = rmse_f
d['MAE'] = mae_f
d['MDAE'] = mdae_f
d['EVS'] = evs_f
d['R2'] = r2_f
示例#21
0
    lasso.fit(x_train, y_train)
    lasso_result = lasso.predict(x_test)
    lasso_file = open("data/lasso.model", 'wb')
    pickle.dump(lasso, lasso_file)
    lasso_file.close()

    bayesian = BayesianRidge()
    bayesian.fit(x_train, y_train)
    bayesian_result = bayesian.predict(x_test)
    bayesian_file = open("data/bayesian.model", 'wb')
    pickle.dump(bayesian, bayesian_file)
    bayesian_file.close()

    elastic = ElasticNet(alpha=0.01)
    elastic.fit(x_train, y_train)
    elastic_result = elastic.predict(x_test)
    elastic_file = open("data/elastic.model", 'wb')
    pickle.dump(elastic, elastic_file)
    elastic_file.close()

    return y_test, [
        model_1_result, ridge_result, elastic_result, lasso_result,
        bayesian_result
    ]


y_test, results = model()
for result in results:
    score = evs(result, y_test)
    print(score)
示例#22
0
Y = X["imdb_score"]
X.drop(["imdb_score"], axis=1,inplace=True)
X.drop(["movie_title"], axis=1,inplace=True)

#clean and scale
X = X.fillna(0)
X["gross"] = X["gross"]/10000
X["num_voted_users"] = X["num_voted_users"]/10000
X["cast_total_facebook_likes"] = X["cast_total_facebook_likes"]/1000
X["movie_facebook_likes"] = X["movie_facebook_likes"]/1000
X["budget"] = X["budget"]/10000
X["actor_1_facebook_likes"] = X["actor_1_facebook_likes"]/1000
X["actor_2_facebook_likes"] = X["actor_2_facebook_likes"]/1000
X["actor_3_facebook_likes"] = X["actor_3_facebook_likes"]/1000

#split into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2)

#create regressor
reg = Pipeline([('poly', PolynomialFeatures(degree=2)),('linear', linear_model.LinearRegression(fit_intercept=False))])

#train
reg.fit(X_train, Y_train)

#predict
Y_pred = reg.predict(X_test)

#metrics
print('Variance:' + str(evs(Y_test, Y_pred)))
print('Mean square error:' + str(mse(Y_test, Y_pred)))
示例#23
0
    r2_t = []
    for tr_i, ts_i in rkf.split(data):
        print(i, c)
        train, test = data.iloc[tr_i], data.iloc[ts_i]
        train_x = train.drop(columns=['Rainfall'])
        train_y = train['Rainfall']
        test_x = test.drop(columns=['Rainfall'])
        test_y = test['Rainfall']
        model = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=i)#, min_samples_leaf=j, min_samples_split=k)
        model.fit(train_x, train_y)
        ts_p = model.predict(test_x)
        mse_t.append(mse(test_y, ts_p))
        rmse_t.append(rmse(test_y, ts_p))
        mae_t.append(mae(test_y, ts_p))
        mdae_t.append(mdae(test_y, ts_p))
        evs_t.append(evs(test_y, ts_p))
        r2_t.append(r2(test_y, ts_p))
        c += 1
    dep_f.append(i)
#    saml_f.append(j)
#    sams_f.append(k)
    mse_f.append(np.mean(mse_t))
    rmse_f.append(np.mean(rmse_t))
    mae_f.append(np.mean(mae_t))
    mdae_f.append(np.mean(mdae_t))
    evs_f.append(np.mean(evs_t))
    r2_f.append(np.mean(r2_t))
d = {}
d['Max Depth'] = dep_f
#d['Min Samples Leaf'] = saml_f
#d['Min Samples Split'] = sams_f
ols = LinearRegression()
ols.fit(X_train, y_train)
ols_yhat = ols.predict(X_test)

#print(X_test)
#print(ols_yhat)

df_ols = pd.DataFrame(data=ols_yhat)
df_ols.columns = ['ols predicted']

print(df_ols)
#print(cl(df_ols.dtypes, attrs = ['bold']))

print(
    cl('Explained Variance Score of OLS model is {}'.format(
        evs(y_test, ols_yhat)),
       attrs=['bold']))
print(
    cl('R-Squared of OLS model is {}'.format(r2(y_test, ols_yhat)),
       attrs=['bold']))

# 2. Ridge

ridge = Ridge(alpha=0.5)
ridge.fit(X_train, y_train)
ridge_yhat = ridge.predict(X_test)

#print(ridge_yhat)

df_ridge = pd.DataFrame(data=ridge_yhat)
df_ridge.columns = ['ridge predicted']
示例#25
0
     train_y = train['Rainfall']
     test_x = test.drop(columns=['Rainfall'])
     test_y = test['Rainfall']
     model = AdaBoostRegressor(n_estimators=i)
     model.fit(train_x, train_y)
     tr_p = model.predict(train_x)
     ts_p = model.predict(test_x)
     mse_tr_t.append(mse(train_y, tr_p))
     mse_ts_t.append(mse(test_y, ts_p))
     rmse_tr_t.append(rmse(train_y, tr_p))
     rmse_ts_t.append(rmse(test_y, ts_p))
     mae_tr_t.append(mae(train_y, tr_p))
     mae_ts_t.append(mae(test_y, ts_p))
     mdae_tr_t.append(mdae(train_y, tr_p))
     mdae_ts_t.append(mdae(test_y, ts_p))
     evs_tr_t.append(evs(train_y, tr_p))
     evs_ts_t.append(evs(test_y, ts_p))
     r2_tr_t.append(r2(train_y, tr_p))
     r2_ts_t.append(r2(test_y, ts_p))
 mse_tr_f.append(np.mean(mse_tr_t))
 mse_ts_f.append(np.mean(mse_ts_t))
 rmse_tr_f.append(np.mean(rmse_tr_t))
 rmse_ts_f.append(np.mean(rmse_ts_t))
 mae_tr_f.append(np.mean(mae_tr_t))
 mae_ts_f.append(np.mean(mae_ts_t))
 mdae_tr_f.append(np.mean(mdae_tr_t))
 mdae_ts_f.append(np.mean(mdae_ts_t))
 evs_tr_f.append(np.mean(evs_tr_t))
 evs_ts_f.append(np.mean(evs_ts_t))
 r2_tr_f.append(np.mean(r2_tr_t))
 r2_ts_f.append(np.mean(r2_ts_t))
示例#26
0
     train_x = train.drop(columns=['District', 'Index', 'Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['District', 'Index', 'Rainfall'])
     test_y = test['Rainfall']
     for j in dep:
         print(i, c, j)
         dt = dtr(max_depth=j)
         dt.fit(train_x, train_y)
         dt_p = dt.predict(test_x)
         #Error values
         d.append(j)
         mse_d.append(mse(test_y, dt_p))
         rmse_d.append(rmse(test_y, dt_p))
         mae_d.append(mae(test_y, dt_p))
         mdae_d.append(mdae(test_y, dt_p))
         evs_d.append(evs(test_y, dt_p))
         r2_d.append(r2(test_y, dt_p))
     c += 1
 t = {}
 t['Depth'] = d
 t['MSE'] = mse_d
 t['RMSE'] = rmse_d
 t['MAE'] = mae_d
 t['MDAE'] = mdae_d
 t['EVS'] = evs_d
 t['R2'] = r2_d
 tf = pd.DataFrame(
     t, columns=['Depth', 'MSE', 'RMSE', 'MAE', 'MDAE', 'EVS', 'R2'])
 for j in dep:
     temp = tf[tf['Depth'] == j]
     dl.append(i)