Пример #1
0
    def train_pass(self):
        start_time = t()
        for step, batch in enumerate(self.trn_set):
            elapsed_time = t() - start_time
            time_per_step = elapsed_time / (step + 1)
            exp_rem_time = (len(self.trn_set) - (step + 1)) * time_per_step
            print("\r" + str(step + 1) + "/" + str(len(self.trn_set)) +
                  " processed batches (elapsed time: " +
                  utils.format_time(elapsed_time) + " exp. rem. time: " +
                  utils.format_time(exp_rem_time) + ")",
                  end="")

            # Get next input
            input_enc = batch["encoded_image"]
            if P.GPU: input_enc = input_enc.cuda()

            # Run the network on the input
            self.network.train(True)
            self.network.run(inputs={"X": input_enc}, time=self.config.TIME)
            self.network.train(False)
            # Reset network state
            self.network.reset_state_variables()

            # Evaluate performance at fixed intervals
            if (((step + 1) * self.config.BATCH_SIZE) %
                    self.config.EVAL_INTERVAL
                    == 0) or (step == len(self.trn_set) - 1):
                print("\nEvaluating...")
                print("Computing train accuracy...")
                self.eval_pass(self.trn_set4eval, train=True)
                print("Computing validation accuracy...")
                self.eval_pass(self.val_set, train=False)
                # Print results
                print("Current evaluation step: " +
                      str(len(self.stats_manager.eval_accuracy)))
                print("Current trn. accuracy: " +
                      str(100 * self.stats_manager.trn_accuracy[-1]) + "%")
                print("Current val. accuracy: " +
                      str(100 * self.stats_manager.eval_accuracy[-1]) + "%")
                print("Top accuracy so far: " +
                      str(100 * self.stats_manager.best_acc) + "%" +
                      " at evaluation step: " +
                      str(self.stats_manager.best_step))
                # Plot results
                utils.plot_performance(
                    self.stats_manager.trn_accuracy,
                    self.stats_manager.eval_accuracy,
                    self.config.RESULT_FOLDER + "/accuracy.png")
                # Check if accuracy improved
                if self.stats_manager.check_improvement():  # Save model
                    print("Top accuracy improved! Saving new best model...")
                    self.network.save(self.config.RESULT_FOLDER + "/model.pt")
                    print("Model saved!")
                print("Evaluation complete!")
                print("Continuing training...")
Пример #2
0
def train_lazy():
    # Load the dataset
    X, y = load_data()
    # Split the data
    X_train, X_val, y_train, y_val = split_dataset(X, y)
    # # Normalize
    X_train = normalize(X_train)
    X_val = normalize(X_val)

    # uncomment to check the performance of the 25 models
    # clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
    # # fit
    # scores,_ = clf.fit(X_train, X_val, y_train, y_val)
    # # print
    # print(scores)

    # Final model
    # check if model exist
    if os.path.isfile(config.MODEL_PATH):
        model = XGBClassifier()
        model.load_model(config.MODEL_PATH)
    else:
        model = XGBClassifier()
        model.fit(X_train,
                  y_train,
                  eval_metric="error",
                  eval_set=[(X_train, y_train), (X_val, y_val)],
                  verbose=True)
        # save model
        model.save_model(config.MODEL_PATH)
    # performance on train set
    y_pred = model.predict(X_train)
    # evaluate predictions
    print_performance(y_train, y_pred, 'train')

    # performance on val set
    y_pred = model.predict(X_val)
    # evaluate predictions
    print_performance(y_val, y_pred, 'val')

    # Load the test dataset
    X_test, y_test = load_test_data()
    # # Normalize
    X_test = normalize(X_test)
    # get prediction
    y_pred = model.predict(X_test)
    # evaluate predictions
    print_performance(y_test, y_pred, 'test')
    # print
    plot_performance(model)
Пример #3
0
def main():
    epochs = 20
    batch_size = 128
    x_list, y_list = generate_training_data_lists()
    steps = len(x_list) / batch_size
    train_sequence = TrainSequence(x_list, y_list, batch_size)
    model_name = 'Dual-stream 3D Convolution Neural Network'
    model = get_model(model_name)(protein_data_shape=(None, None, None, 2),
                                  ligand_data_shape=(None, None, None, 2))

    plot_model(model, to_file='./{}.png'.format(model_name))
    print('Model Summary:')
    model.summary()
    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['acc'])
    history = model.fit_generator(train_sequence,
                                  epochs=epochs,
                                  steps_per_epoch=steps,
                                  verbose=1).history
    dump_pickle('./history.pkl', history)
    model.save_weights('./{}_weights.h5'.format(model_name))
    plot_performance(history, model_name, epochs, batch_size)
Пример #4
0
def main():
    # Prepare data
    x_list, y_list = generate_training_data_lists()
    steps = len(x_list) / batch_size
    train_sequence = TrainSequence(x_list, y_list, batch_size)

    # Prepare model
    model = mlp(10000)
    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['acc'])

    # Plot model    
    plot_model(model, to_file='./{}.png'.format(model_name))

    # Fit model
    history = model.fit_generator(train_sequence, epochs=epochs, steps_per_epoch=steps, verbose=1).history

    # Plot loss vs accuracy
    plot_performance(history, model_name, epochs, batch_size)

    # Write loss and acc to file
    dump_pickle('./history.pkl', history)

    # Save model
    model.save_weights('./{}_weights.h5'.format(model_name))
] = nn.trainNN(sess, loss, train_step, num_epochs, x, y, y_, weights, biases,
               trainDataCombined, trainTargetsCombined, validationDataCombined,
               validationTargetsCombined)

# Output - Final weights
for index, weight in enumerate(weights):
    sess.run(weight.assign(final_weights[index]))
for index, bias in enumerate(biases):
    sess.run(bias.assign(final_biases[index]))
trainOutputFinal = sess.run(y, feed_dict={x: trainDataCombined})
validationOutputFinal = sess.run(y, feed_dict={x: validationDataCombined})
testOutputFinal = sess.run(y, feed_dict={x: testDataCombined})

# Evaluate Model
name = "noisy+symbols-noisy_symbols - " + str(datetime.datetime.now())
utils.plot_performance(train_losses, validation_losses, num_epochs,
                       "Noisy with Symbols to Noisy with Symbols", name)
#utils.render_output(validationOutputFinal, testOutputFinal, validationDataCombined, testDataCombined, validationTargetsCombined, name + "-final")
[testScoreFinal, testResultsFinal] = utils.validate(testOutputFinal,
                                                    testTargets)
#[trainScoreFinal, trainResultsFinal] = utils.validate(trainOutputFinal, trainTargetClasses)
[validationScoreFinal,
 validationResultsFinal] = utils.validate(validationOutputFinal,
                                          validationTargetClasses)
testSymbolsScore = utils.validateSymbols(testOutputFinal, testTargetsSymbols)
f = open("summaries/" + name + ".txt", mode="w")
f.write(
    "Noisy with Symbols to Noisy with Symbols - 2 hidden layers - 100 units each - 1000 epochs - AdamOptimizer - Learning Rate: 0.01 \n"
)
f.write("Minimum Training Loss: " + str(min_training_loss) + "\n")
f.write("Minimum Validation Loss: " + str(min_validation_loss) + "\n")
f.write("Test Score: " + str(testScoreFinal) + "\n")
Пример #6
0
trainOutput = sess.run(y, feed_dict={x: trainData})
validationOutput = sess.run(y, feed_dict={x: validationData})
testOutput = sess.run(y, feed_dict={x: testData})

# Output - Final weights
for index, weight in enumerate(weights):
    sess.run(weight.assign(final_weights[index]))
for index, bias in enumerate(biases):
    sess.run(bias.assign(final_biases[index]))
trainOutputFinal = sess.run(y, feed_dict={x: trainData})
validationOutputFinal = sess.run(y, feed_dict={x: validationData})
testOutputFinal = sess.run(y, feed_dict={x: testData})

# Evaluate Model
name = "noisy-noisy - " + str(datetime.datetime.now())
utils.plot_performance(train_losses, validation_losses, num_epochs,
                       "Noisy to Noisy", name)
utils.render_output(validationOutput, testOutput, validationData, testData,
                    validationTargets, name + "-optimum")
utils.render_output(validationOutputFinal, testOutputFinal, validationData,
                    testData, validationTargets, name + "-final")
[testScore, testResults] = utils.validate(testOutput, testTargets)
[testScoreFinal, testResultsFinal] = utils.validate(testOutputFinal,
                                                    testTargets)
[trainScore, trainResults] = utils.validate(trainOutput, trainTargetClasses)
[trainScoreFinal, trainResultsFinal] = utils.validate(trainOutputFinal,
                                                      trainTargetClasses)
[validationScore, validationResults] = utils.validate(validationOutput,
                                                      validationTargetClasses)
[validationScoreFinal,
 validationResultsFinal] = utils.validate(validationOutputFinal,
                                          validationTargetClasses)
Пример #7
0
            running_loss += (loss_t - running_loss) / (batch_index + 1)

            # compute the accuracy
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
            running_acc += (acc_t - running_acc) / (batch_index + 1)
            val_bar.set_postfix(loss=running_loss,
                                acc=running_acc,
                                epoch=epoch_index)
            val_bar.update()

        train_state['val_loss'].append(running_loss)
        train_state['val_acc'].append(running_acc)

        train_state = update_train_state(args=args,
                                         model=classifier,
                                         train_state=train_state)

        scheduler.step(train_state['val_loss'][-1])

        if train_state['stop_early']:
            break

        train_bar.n = 0
        val_bar.n = 0
        epoch_bar.update()

except KeyboardInterrupt:
    print("Exiting loop")

plot_performance(train_state)
Пример #8
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets.base import load_boston
from sklearn.model_selection import train_test_split

from skpro.parametric import ParametricEstimator
from skpro.parametric.estimators import Constant
from skpro.metrics import log_loss

# Define the parametric model
model = ParametricEstimator(point=RandomForestRegressor(),
                            std=Constant('std(y)'),
                            shape='norm')

# Train and predict on boston housing data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
y_pred = model.fit(X_train, y_train).predict(X_test)

# Obtain the loss
loss = log_loss(y_test, y_pred, sample=True, return_std=True)
print('Loss: %f+-%f' % loss)

# Plot the performance
import sys
sys.path.append('../')
import utils
utils.plot_performance(y_test, y_pred)
Пример #9
0
def backtest(start_date, end_date, start_asset, mode="lstm"):
    if mode == "uniform":
        rebalance = uniform_portfolio
    elif mode == "random":
        rebalance = random_portfolio
    else:
        rebalance = rebalance_porfolio

    # ================= Initialize Configurations ====================
    # 1. Set Start Asset
    # 2. Set Back Test Dates
    # 3. Initialize Back Test Records
    # 4. Load Companies List
    # 5. Load Companies Data

    # 1. Set Start Asset
    current_asset = start_asset
    current_portfolio = dict()

    # 2. Set Back Test Dates
    day_out = 5
    path = os.path.join(DIR_CONFIG["DATA_DIR"], 'AAPL_data.csv')
    df = pd.read_csv(path)
    df = df[(df["Date"] > start_date) & (df["Date"] <= end_date)]
    backtest_dates = df["Date"].values
    print("Back Test Dates: ", backtest_dates)

    # 3. Initialize Back Test Records
    benchmark_records = []
    backtest_records = []
    portfolio_records = []

    # 4. Load Companies List
    companies_list = []
    with open(FILE_CONFIG["COMPANY_LIST"], 'r') as f:
        for line in f:
            companies_list.append(line.rstrip("\n"))

    # 5. Load All Companies Data
    companies_data = dict()
    for company in companies_list:
        path = os.path.join(DIR_CONFIG["DATA_DIR"],
                            '{}_data.csv'.format(company))
        companies_data[company] = pd.read_csv(path)

    # ================================================================

    print("======================Start Backtest==========================")
    print("Start Date: ", start_date)
    print("End Date: ", end_date)
    print("Start Asset: ", start_asset)
    print("Mode: ", mode)

    # Start Back Test
    for i in range(len(backtest_dates)):
        # Rebalance Portfolio Every 5 Days
        if (i % 5) == 0 and (i + day_out) < len(backtest_dates):
            current_portfolio, percentages_portfolio = rebalance(
                date=backtest_dates[i],
                asset=current_asset,
                companies_data=companies_data)
            portfolio_records.append(percentages_portfolio)
        # Evaluate Porfolio Every Day
        current_asset = evaluate_portfolio(date=backtest_dates[i],
                                           portfolio=current_portfolio,
                                           companies_data=companies_data)
        backtest_records.append(current_asset)

        print("Current Asset Value: ", current_asset)
        print("Records: ", backtest_records)

    # Plot Portfolio Performance Graph
    plot_performance(filename='portfolio_graph_{}_{}'.format(mode, start_date),
                     backtest_dates=backtest_dates,
                     backtest_records=backtest_records)

    # Save Performance and Portfolio Records
    save_json(filename='portfolio_performance_{}_{}'.format(mode, start_date),
              records=backtest_records)
    save_json(filename='portfolio_records_{}_{}'.format(mode, start_date),
              records=portfolio_records)
Пример #10
0
from sklearn.datasets.base import load_boston
from sklearn.model_selection import train_test_split

from skpro.baselines import DensityBaseline
from skpro.metrics import log_loss

# Load boston housing data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train and predict on boston housing data using a baseline model
y_pred = DensityBaseline().fit(X_train, y_train)\
                          .predict(X_test)
# Obtain the loss
loss = log_loss(y_test, y_pred, sample=True, return_std=True)

print('Loss: %f+-%f' % loss)

# Plot performance
from utils import plot_performance
plot_performance(y_test, y_pred)
Пример #11
0
def test(model,
         saver,
         sess,
         exp_string,
         data_generator,
         test_num_updates=None):
    num_classes = data_generator.num_classes  # for classification, 1 otherwise

    np.random.seed(1)
    random.seed(1)

    steps = range(1000, 61000, 1000)
    accs = []

    inner_loops = 5
    for step in steps:
        print(f"Load model {step}")
        load_model(FLAGS.logdir, exp_string, saver, sess, step)

        metaval_accuracies = []

        for _ in range(NUM_TEST_POINTS):
            if 'generate' not in dir(data_generator):
                feed_dict = {}
                feed_dict = {model.meta_lr: 0.0}
            else:
                batch_x, batch_y, amp, phase = data_generator.generate(
                    train=False)

                if FLAGS.baseline == 'oracle':  # NOTE - this flag is specific to sinusoid
                    batch_x = np.concatenate([
                        batch_x,
                        np.zeros([batch_x.shape[0], batch_x.shape[1], 2])
                    ], 2)
                    batch_x[0, :, 1] = amp[0]
                    batch_x[0, :, 2] = phase[0]

                inputa = batch_x[:, :num_classes * FLAGS.update_batch_size, :]
                inputb = batch_x[:, num_classes * FLAGS.update_batch_size:, :]
                labela = batch_y[:, :num_classes * FLAGS.update_batch_size, :]
                labelb = batch_y[:, num_classes * FLAGS.update_batch_size:, :]

                feed_dict = {
                    model.inputa: inputa,
                    model.inputb: inputb,
                    model.labela: labela,
                    model.labelb: labelb,
                    model.meta_lr: 0.0
                }

            if model.classification:
                result = sess.run([model.metaval_total_accuracy1] +
                                  model.metaval_total_accuracies2, feed_dict)
            else:  # this is for sinusoid
                result = sess.run([model.total_loss1] + model.total_losses2,
                                  feed_dict)
            metaval_accuracies.append(result[inner_loops])

        accs.append(np.array(metaval_accuracies))

    plot_performance(steps, accs, NUM_TEST_POINTS)
Пример #12
0
    with model:
        # Priors
        alpha = pm.Normal('alpha', mu=y.mean(), sd=10)
        betas = pm.Normal('beta', mu=0, sd=10, shape=X.get_value(borrow=True).shape[1])
        sigma = pm.HalfNormal('sigma', sd=1)

        # Model (defines y_pred)
        mu = alpha + pm.math.dot(betas, X.T)
        y_pred = pm.Normal("y_pred", mu=mu, sd=sigma, observed=y)


# Plug the model definition into the PyMC interface

model = BayesianVendorEstimator(
    model=PymcInterface(model_definition=pymc_linear_regression)
)


# Run prediction, print and plot the results

data = DataManager('boston')
y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test)
print('Log loss: ', log_loss(data.y_test, y_pred, return_std=True))

# Plot the performance
import sys
sys.path.append('../')
import utils
utils.plot_performance(data.y_test, y_pred)