# be expensive, so I recommend running it once per dataset and using those
# hyperparameter values in train() to save computational time

# make predictions
m = model.predict(x_test)  #returns the median prediction if more than one tree

#evaluate results
acc = sklearn.metrics.accuracy_score(y_test, m.flatten())
print('Accuracy', acc)

#close model
model.close_model()

print("Reload model and continue training")
# reload model; can also open it using cPickle.load()
model2 = djinn.load(model_name="class_djinn_test")

#continue training for 20 epochs using same learning rate, etc as before
model2.continue_training(x_train,
                         y_train,
                         20,
                         learnrate,
                         batchsize,
                         random_state=1)

#make updated predictions
m2 = model2.predict(x_test)

#evaluate results
acc = sklearn.metrics.accuracy_score(y_test, m.flatten())
print('Accuracy', acc)
Exemplo n.º 2
0
# evaluate results
for i in [0, 1]:
    mse = sklearn.metrics.mean_squared_error(y_test[:, i], m[:, i])
    mabs = sklearn.metrics.mean_absolute_error(y_test[:, i], m[:, i])
    exvar = sklearn.metrics.explained_variance_score(y_test[:, i], m[:, i])
    print('MSE', mse)
    print('M Abs Err', mabs)
    print('Expl. Var.', exvar)

# close model
model.close_model()

print("Reload model and continue training for 50 epochs")

# reload model and continue training for 50 more epochs
model2 = djinn.load(model_name="multireg_djinn_test")

model2.continue_training(x_train, y_train, 50, learnrate, batchsize)

m2 = model2.predict(x_test)

# evaluate results
mse2 = sklearn.metrics.mean_squared_error(y_test, m2)
mabs2 = sklearn.metrics.mean_absolute_error(y_test, m2)
exvar2 = sklearn.metrics.explained_variance_score(y_test, m2)
print('MSE', mse2)
print('M Abs Err', mabs2)
print('Expl. Var.', exvar2)

print("Create Bayesian-DJINN model with multiple outputs")
modelname = "multireg_bdjinn_test"  #name the model
Exemplo n.º 3
0
m = model.predict(x_test)  #returns the median prediction if more than one tree

#evaluate results
mse = sklearn.metrics.mean_squared_error(y_test, m)
mabs = sklearn.metrics.mean_absolute_error(y_test, m)
exvar = sklearn.metrics.explained_variance_score(y_test, m)
print('MSE', mse)
print('M Abs Err', mabs)
print('Expl. Var.', exvar)

#close model
model.close_model()

print("Reload model and continue training for 10 epochs")
# reload model; can also open it using cPickle.load()
model2 = djinn.load(model_name="djinn_test")

#continue training for 20 epochs using same learning rate, etc as before
model2.continue_training(x_train,
                         y_train,
                         20,
                         learnrate,
                         batchsize,
                         random_state=1)

#make updated predictions
m2 = model2.predict(x_test)

#evaluate results
mse2 = sklearn.metrics.mean_squared_error(y_test, m2)
mabs2 = sklearn.metrics.mean_absolute_error(y_test, m2)
Exemplo n.º 4
0
def computeMSE(x_train, x_test, y_train, y_test, feature):

    x_train[:, feature] = 0
    x_test[:, feature] = 0

    print("djinn example")
    modelname = "reg_djinn_test"  # name the model
    ntrees = 1  # number of trees = number of neural nets in ensemble
    maxdepth = 4  # max depth of tree -- optimize this for each data set
    dropout_keep = 1.0  # dropout typically set to 1 for non-Bayesian models

    #initialize the model
    model = djinn.DJINN_Regressor(ntrees, maxdepth, dropout_keep)

    # find optimal settings: this function returns dict with hyper-parameters
    # each djinn function accepts random seeds for reproducible behavior
    optimal = model.get_hyperparameters(x_train, y_train, random_state=1)
    batchsize = optimal['batch_size']
    learnrate = optimal['learn_rate']
    epochs = optimal['epochs']

    # train the model with hyperparameters determined above
    model.train(x_train,
                y_train,
                epochs=epochs,
                learn_rate=learnrate,
                batch_size=batchsize,
                display_step=1,
                save_files=True,
                file_name=modelname,
                save_model=True,
                model_name=modelname,
                random_state=1)

    # *note there is a function model.fit(x_train,y_train, ... ) that wraps
    # get_hyperparameters() and train(), so that you do not have to manually
    # pass hyperparameters to train(). However, get_hyperparameters() can
    # be expensive, so I recommend running it once per dataset and using those
    # hyperparameter values in train() to save computational time

    # make predictions
    m = model.predict(
        x_test)  #returns the median prediction if more than one tree

    #evaluate results
    mse = sklearn.metrics.mean_squared_error(y_test, m)
    mabs = sklearn.metrics.mean_absolute_error(y_test, m)
    exvar = sklearn.metrics.explained_variance_score(y_test, m)
    print('MSE', mse)
    print('M Abs Err', mabs)
    print('Expl. Var.', exvar)

    #close model
    model.close_model()

    print("Reload model and continue training for 20 epochs")
    # reload model; can also open it using cPickle.load()
    model2 = djinn.load(model_name="reg_djinn_test")

    #continue training for 20 epochs using same learning rate, etc as before
    model2.continue_training(x_train,
                             y_train,
                             20,
                             learnrate,
                             batchsize,
                             random_state=1)

    #make updated predictions
    m2 = model2.predict(x_test)

    #evaluate results
    mse2 = sklearn.metrics.mean_squared_error(y_test, m2)
    mabs2 = sklearn.metrics.mean_absolute_error(y_test, m2)
    exvar2 = sklearn.metrics.explained_variance_score(y_test, m2)
    print('MSE', mse2)
    print('M Abs Err', mabs2)
    print('Expl. Var.', exvar2)

    # Bayesian formulation with dropout. Recommend dropout keep
    # probability ~0.95, 5-10 trees.
    print("Bayesian djinn example")
    ntrees = 10
    dropout_keep = 0.95
    modelname = "reg_bdjinn_test"

    # initialize a model
    bmodel = djinn.DJINN_Regressor(ntrees, maxdepth, dropout_keep)

    # "fit()" does what get_hyperparameters + train does, in one step:
    bmodel.fit(x_train,
               y_train,
               display_step=1,
               save_files=True,
               file_name=modelname,
               save_model=True,
               model_name=modelname,
               random_state=1)

    # evaluate: niters is the number of times you evaluate the network for
    # a single sample. higher niters = better resolved distribution of predictions
    niters = 100
    bl, bm, bu, results = bmodel.bayesian_predict(x_test,
                                                  n_iters=niters,
                                                  random_state=1)
    # bayesian_predict returns 25, 50, 75 percentile and results dict with all predictions

    # evaluate performance on median predictions
    mse = sklearn.metrics.mean_squared_error(y_test, bm)
    mabs = sklearn.metrics.mean_absolute_error(y_test, bm)
    exvar = sklearn.metrics.explained_variance_score(y_test, bm)
    print('MSE', mse)
    print('M Abs Err', mabs)
    print('Expl. Var.', exvar)

    #     # make a pretty plot
    #     g=np.linspace(np.min(y_test),np.max(y_test),10)
    #     fig, axs = plt.subplots(1,1, figsize=(8,8), facecolor='w', edgecolor='k')
    #     fig.subplots_adjust(hspace = .15, wspace=.1)
    #     sc=axs.scatter(y_test, bm, linewidth=0,s=6,
    #                       alpha=0.8, c='#68d1ca')
    #     a,b,c=axs.errorbar(y_test, bm, yerr=[bm-bl,bu-bm], marker='',ls='',zorder=0,
    #                        alpha=0.5, ecolor='black')
    #     axs.set_xlabel("True")
    #     axs.set_ylabel("B-DJINN Prediction")
    #     axs.plot(g,g,color='red')
    # #     axs.text(0.5, 0.9,filename, fontsize = 28, ha='center', va='center', transform=axs.transAxes)
    #     axs.text(0.4, 0.75,'mesTest=%.5f' %mse, fontsize = 28, ha='center', va='center', transform=axs.transAxes)

    #     # plt.show()
    #     plt.savefig(resultPath+'allTest', bbox_inches="tight", dpi = 300)
    #     # collect_tree_predictions gathers predictions in results dict
    #     # in a more intuitive way for easy plotting, etc
    #     p=bmodel.collect_tree_predictions(results['predictions'])

    return mse