Exemplo n.º 1
0
def evaluate_model(train, test, model):
    n_input = 365
    # fit model
    # model = build_model(train)
    # history is a list of yearly data
    history = [x for x in train]
    # walk-forward validation over each year
    prediction = list()
    for i in range(len(test)):
        # predict the year
        yhat_sequence = forecast(model, history, n_input)
        # store the predictions
        prediction.append(yhat_sequence)
        # get real observation and add to history for predicting the next year
        history.append(test[i, :])
    # get array of predictions
    prediction = np.array(prediction)
    prediction = np.ravel(prediction)
    # get array of actual values from test set
    actual = test[:, :, 0]
    actual[actual == 0] = np.nanmean(actual)
    actual = np.ravel(actual)
    # calaculate and print scores
    rmse, mape = calculate_scores(actual, prediction)
    print('RMSE: %.3f' % rmse)
    print('MAPE: %.3f' % mape)
    # plot prediction
    plot_prediction(actual, prediction)
    # clear keras model
    K.clear_session()
    return mape
def evaluate_model(train, test, n_input, epochs, batch_size, filters,
                   kernel_size, pool_size, dense1, dense2):
    # fit model
    model = build_model(train, n_input, epochs, batch_size, filters,
                        kernel_size, pool_size, dense1, dense2)
    # history is a list of yearly data
    history = [x for x in train]
    # walk-forward validation over each year
    prediction = list()
    for i in range(len(test)):
        # predict the year
        yhat_sequence = forecast(model, history, n_input)
        # store the predictions
        prediction.append(yhat_sequence)
        # get real observation and add to history for predicting the next year
        history.append(test[i, :])
    # get array of predictions
    prediction = np.array(prediction)
    prediction = np.ravel(prediction)
    # get array of actual values from test set
    actual = test[:, :, 0]
    actual[actual == 0] = np.nanmean(actual)
    actual = np.ravel(actual)
    # print test parameters
    print(
        'Epochs: %d Batch Size: %d Filters: %d Kernels: %d Pool Size: %d Dense 1: %d Dense 2: %d'
        %
        (epochs, batch_size, filters, kernel_size, pool_size, dense1, dense2))
    # calaculate and print scores
    scores = calculate_various_scores(actual, prediction)
    rmse, mape = calculate_scores(actual, prediction)
    print('RMSE: %.3f' % rmse)
    print('MAPE: %.3f' % mape)
    # plot prediction plot
    save_prediction_plot(actual, prediction, epochs, batch_size, filters,
                         kernel_size, pool_size, dense1, dense2)
    # save result summary
    result_summary = [
        epochs, batch_size, filters, kernel_size, pool_size, dense1, dense2,
        rmse, mape
    ]
    save_results('temp/multihead_gridsearch_result_summary.csv',
                 result_summary)
    # save all results incl prediction values and various scores
    result_all = [
        epochs, batch_size, filters, kernel_size, pool_size, dense1, dense2,
        scores, prediction
    ]
    save_results('temp/multihead_gridsearch_results.csv', result_all)
    # clear keras model
    K.clear_session()
Exemplo n.º 3
0
def get_activation_vector(model, dataset, layer_number, save_path):
    # create directory for saving activations
    if not os.path.exists(os.path.dirname(save_path)):
        try:
            os.makedirs(os.path.dirname(save_path))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    # load model
    m = load_model(model)

    # define layer
    layer = m.layers[layer_number].output  # e.g. 4
    # build layer model
    layer_model = Model(inputs=m.input, outputs=layer)
    layer_model.summary()

    # load dataset
    df = pd.read_pickle(dataset)
    df = pd.DataFrame(data=df.values, index=df.index, columns=['netto'])
    # assign features for month, weekday, year
    df = df.assign(month=df.index.month)
    df = df.assign(weekday=df.index.weekday)
    df = df.assign(year=df.index.year)
    # define the time period for which to calculate the activation vectors
    df = df.loc['2014-01-01':]  # has to be dividable into years (365 days)
    # split into first_year and other_years of time period
    first_year, other_years = df[:365], df[365:]
    # restructure into windows of yearly data
    first_year = np.array(np.split(first_year.values, len(first_year) / 365))
    other_years = np.array(np.split(other_years.values,
                                    len(other_years) /
                                    365))  # prepare input data
    # define input size (365 days)
    n_input = 365

    # history is a list of yearly data
    history = [x for x in first_year]

    # walk-forward validation over each year
    prediction = list()
    for i in range(len(other_years)):
        # predict the year based on last year history
        yhat_year = forecast(layer_model, history, n_input)
        # store the predictions
        prediction.append(yhat_year)
        # get real observation and add to history for predicting the next year
        history.append(other_years[i, :])
    # predict the last year in history
    last_year = forecast(layer_model, history, n_input)
    prediction.append(last_year)

    activations = np.array(prediction)
    num_data_points, number_inputs, neurons = activations.shape
    reshaped_activations = activations.reshape(
        (num_data_points * number_inputs, neurons))
    np.savetxt(save_path + str(layer_number) + ".csv",
               reshaped_activations,
               delimiter=",")
    del m, layer_model
    K.clear_session()
Exemplo n.º 4
0
    date_range = map(lambda datetime: datetime.date().isoformat(),
                     pd.date_range(region.index[0], region.index[-1]))

    # Forecast date is equal to the date of the last known datapoint, unless manually supplied
    forecast_date = region.index[-1]
    region = region[region.index <= forecast_date]

    # Early exit: If there are less than DATAPOINT_COUNT_MIN datapoints
    if len(region) < DATAPOINT_COUNT_MIN: continue

    # Define the subfolder that will hold the output assets
    forecast_chart = ROOT / 'output' / 'charts' / ('%s_US_%s.svg' %
                                                   (forecast_date, key))

    # Perform forecast
    forecast_data = forecast(region['Confirmed'], predict_window)

    # Output charts as SVG files
    plot_forecast(forecast_chart, region['Confirmed'], forecast_data)

    # Aggregate forecast data
    for idx in forecast_data.index:
        forecast_df.loc[(key, idx), 'CountryCode'] = country_code
        forecast_df.loc[(key, idx), 'CountryName'] = country_name
        forecast_df.loc[(key, idx), 'ForecastDate'] = forecast_date
        forecast_df.loc[(key, idx), 'Estimated'] = '%.03f' % forecast_data[idx]
        forecast_df.loc[(key, idx),
                        'ForecastChart'] = forecast_chart.relative_to(ROOT /
                                                                      'output')
    for idx in region['Confirmed'].index:
        forecast_df.loc[(key, idx), 'Confirmed'] = int(region.loc[idx,
Exemplo n.º 5
0
        #model.fit(X_train, y_train, batch_size=450, nb_epoch=10, validation_split=0.05)
    else:
        print('Loading model...')
        model = model_from_json(open(arch_fname).read())
        model.load_weights(weights_fname)

    if save_model:
        print("Saving model...")
        json_string = model.to_json()
        open(arch_fname, 'w+').write(json_string)
        model.save_weights(weights_fname, overwrite=True)

    if run_model:
        print('Running forecast...')
        window = 1
        predicted = forecast(model, X_test[0, :, :], n_points=len(X_test))
        wrong_predicted = model.predict(X_test)

        rmse = np.sqrt(((predicted - y_test)**2).mean(axis=0)).mean()
        print("RMSE:", rmse)

    if save_results:
        print('Saving results...')
        pickle.dump((predicted, y_test), open(results_fname, 'wb+'))
else:
    print('Loading results...')
    predicted, y_test = pickle.load(open(results_fname, 'r'))

if plot_results:
    print('Plotting results...')
    fig = plt.figure()
Exemplo n.º 6
0
def main():
    p = optparse.OptionParser()
    p.add_option('--load_data', action="store_true", default=False)
    p.add_option('--save_data', action="store_true", default=False)
    p.add_option('--load_model', action="store_true", default=False)
    p.add_option('--no_run_model', action="store_false", dest="run_model", default=True)
    p.add_option('--no_save_model', action="store_false", dest="save_model", default=True)
    p.add_option('--load_results', action="store_true", default=False)
    p.add_option('--no_save_results', action="store_false", dest="save_results", default=True)
    p.add_option('--no_plot_results', action="store_false", dest="plot_results", default=True)
    p.add_option('--model_name', default='shallow_RNN', type="string",
                 help='Options: shallow_RNN,shallow_LSTM,shallow_GRU,'
                      'deep_RNN, deep_LSTM, deep_GRU, seq2seq')
    p.add_option('--base_path', default="~/machine_learning/stock_sandbox/")
    p.add_option('--dataset', default='jigsaw', type="string", help='Options: jigsaw, synthetic, sp500')
    p.add_option('--n_samples', default=100, type="int")
    p.add_option('--n_ahead', default=50, type="int")
    p.add_option('--patience', default=5, type="int")
    p.add_option('--batch_size', default=20, type="int")
    p.add_option('--max_epochs', default=1000, type="int")
    ops, args = p.parse_args()

    if (not ops.load_results and not ops.run_model) and ops.save_results:
        raise ValueError("Cannot save what has not been loaded or run ")

    if not os.path.exists(os.path.expanduser(ops.base_path + 'results')):
        os.makedirs(ops.base_path + 'results')
    if not os.path.exists(os.path.expanduser(ops.base_path + 'data')):
        os.makedirs(ops.base_path + 'data')
    base_name = ops.dataset + '_' + ops.model_name
    data_fname = ops.base_path + 'data/' + ops.dataset + "_data.pkl"
    data_fname = os.path.expanduser(data_fname)
    arch_fname = ops.base_path + 'results/' + base_name + '_model_architecture.json'
    arch_fname = os.path.expanduser(arch_fname)
    weights_fname = ops.base_path + 'results/' + base_name + '_model_weights.h5'
    weights_fname = os.path.expanduser(weights_fname)
    plot_fname = ops.base_path + 'results/' + base_name + '_results.png'
    plot_fname = os.path.expanduser(plot_fname)
    results_fname = ops.base_path + 'results/' + ops.model_name + '_results.pkl'
    results_fname = os.path.expanduser(results_fname)


    #########################BEGIN CODE#######################################
    # tickers = ['AAPL','VZ','NKE','KMI','M','MS','WMT','DOW','MPC']
    tickers = None

    if not ops.load_results:

        if ops.load_data:
            print('Loading data...')
            data = pickle.load(open(data_fname, 'r'))
            if tickers:
                data.loc(tickers)
        else:

            if ops.dataset == "sp500":
                ##### Real Stock Data
                print('Using sp500 data')
                data = load_s_and_p_data(start="2014-1-1", tickers=tickers)
            elif ops.dataset == "synthetic":
                ##### Synthetic data for testing purposes
                print('Using Synthetic data')
                values = 10000
                s = pd.Series(range(values))
                noise = pd.Series(np.random.randn(values))
                s = s / 1000  # + noise / 100
                d = {'one': s * s * 100 / values,
                     'two': np.sin(s * 10.),
                     'three': np.cos(s * 10),
                     'four': np.sin(s * s / 10) * np.sqrt(s)}
                data = pd.DataFrame(d)
            elif ops.dataset == "jigsaw":
                ##### Easy synthetic data for testing purposes
                print('Using jigsaw data')
                flow = (list(range(1, 10, 1)) + list(range(10, 1, -1))) * 1000
                pdata = pd.DataFrame({"a": flow, "b": flow})
                pdata.b = pdata.b.shift(9)
                data = pdata.iloc[10:] * random.random()  # some noise
            else:
                raise ValueError('Not a legal dataset name')

        if ops.save_data:
            print('Saving data...')
            pickle.dump(data, open(data_fname, 'wb+'))

        if ops.model_name == 'seq2seq':
            (X_train, y_train), (X_test, y_test) = test_train_split(data, splitting_method='seq2seq',
                                                                    n_samples=ops.n_samples, n_ahead=ops.n_ahead)
            print(X_train.shape, y_train.shape)
        else:
            (X_train, y_train), (X_test, y_test) = test_train_split(data, n_samples=ops.n_samples, n_ahead=ops.n_ahead)

        if not ops.load_model:
            print('compiling model')
            in_out_neurons = len(data.columns)

            if ops.model_name == "shallow_RNN":
                model = make_RNN(X_train.shape, [300], SimpleRNN, dropout=0)
            elif ops.model_name == "shallow_LSTM":
                model = make_RNN(X_train.shape, [300], LSTM, dropout=0)
            elif ops.model_name == "shallow_GRU":
                model = make_RNN(X_train.shape, [300], GRU, dropout=0)
            elif ops.model_name == "deep_RNN":
                model = make_RNN(X_train.shape, [300, 500, 200], SimpleRNN, dropout=.2)
            elif ops.model_name == "deep_LSTM":
                model = make_RNN(X_train.shape, [300, 500, 200], LSTM, dropout=.2)
            elif ops.model_name == "deep_GRU":
                model = make_RNN(X_train.shape, [300, 500, 200], GRU, dropout=.2)
            elif ops.model_name == "seq2seq":
                maxlen = 100  # length of input sequence and output sequence
                hidden_dim = 500  # memory size of seq2seq
                seq2seq = Seq2seq(input_length=X_train.shape[1], input_dim=X_train.shape[2], hidden_dim=hidden_dim,
                                  output_dim=X_train.shape[2], output_length=y_train.shape[1],
                                  batch_size=ops.batch_size, depth=4)

                model = Sequential()
                model.add(seq2seq)
                model.compile(loss="mean_squared_error", optimizer="rmsprop")
            else:
                raise ValueError('Not a legal model name')

            model.compile(loss="mean_squared_error", optimizer="rmsprop")
            print('Training model...')
            early_stopping = EarlyStopping(monitor='val_loss', patience=ops.patience, verbose=0)
            model.fit(X_train, y_train, batch_size=ops.batch_size, nb_epoch=ops.max_epochs,
                      validation_split=0.1, callbacks=[early_stopping])
        else:
            print('Loading model...')
            model = model_from_json(open(arch_fname).read())
            model.load_weights(weights_fname)

        if ops.save_model:
            print("Saving model...")
            json_string = model.to_json()
            open(arch_fname, 'w+').write(json_string)
            model.save_weights(weights_fname, overwrite=True)

        if ops.run_model:
            print('Running forecast...')
            forecasted = forecast(model, X_train[-1, :, :], n_ahead=len(y_test[0]))
            predicted = model.predict(X_test)
            rmse = np.sqrt(((predicted - y_test) ** 2).mean(axis=0)).mean()
            print("RMSE:", rmse)

        if ops.save_results:
            print('Saving results...')
            pickle.dump((predicted, forecasted, y_test), open(results_fname, 'wb+'))
    else:
        print('Loading results...')
        predicted, forecasted, y_test = pickle.load(open(results_fname, 'r'))

    if ops.plot_results:
        print('Plotting results...')
        print(predicted.shape, y_test.shape, forecasted.shape)
        fig = plt.figure()
        for i in range(min(4, predicted.shape[2])):
            ax = fig.add_subplot(2, 2, i + 1)
            ax.plot(forecasted[:, i], color='r')
            ax.plot(predicted[0, :, i], color='g')
            ax.plot(y_test[0, :, i], color='b')
            if tickers:
                ax.set_title(tickers[i])

        fig.savefig(plot_fname)
Exemplo n.º 7
0
        #model.fit(X_train, y_train, batch_size=450, nb_epoch=10, validation_split=0.05)
    else:
        print('Loading model...')
        model = model_from_json(open(arch_fname).read())
        model.load_weights(weights_fname)

    if save_model:
        print("Saving model...")
        json_string = model.to_json()
        open(arch_fname, 'w+').write(json_string)
        model.save_weights(weights_fname, overwrite=True)

    if run_model:
        print('Running forecast...')
        window = 1
        predicted = forecast(model, X_test[0, :, :], n_points=len(X_test))
        wrong_predicted = model.predict(X_test)

        rmse = np.sqrt(((predicted - y_test) ** 2).mean(axis=0)).mean()
        print("RMSE:", rmse)

    if save_results:
        print('Saving results...')
        pickle.dump((predicted, y_test), open(results_fname, 'wb+'))
else:
    print('Loading results...')
    predicted, y_test = pickle.load(open(results_fname, 'r'))

if plot_results:
    print('Plotting results...')
    fig = plt.figure()
Exemplo n.º 8
0
                print('{} - dayout: {}'.format(state, dayout))
                dayout = datetime.strptime(dayout, '%d/%m/%Y')

                print('Print inputs/outputs shapes: \n X: {} \n y: {}'.format(
                    X_train.shape, y_train.shape))
                X_train, y_train = utils.check_inputs(X_train, y_train)
                print(
                    'Print inputs/outputs shapes corrected: \n X: {} \n y: {}'.
                    format(X_train.shape, y_train.shape))

                # Fit model
                model.fit(X_train, y_train)
                # Forecast
                df_out = utils.forecast(model,
                                        future=DAYS_TO_PREDICT,
                                        dayone=dayone,
                                        date_string='%d/%m/%Y',
                                        dayout=dayout,
                                        date_string_output='%d/%m/%Y')

                # Check if predictions are belows yesterday
                last_value = df_filtered.cases.iloc[y_train.shape[0]]
                df_out['yhat'] = utils.rescale_yhat(df_out['yhat'].values,
                                                    last_value)

                print(df_out)
                df_out.set_index('ds', inplace=True)
                # Set vars
                column_name = f'yhat_model_{int(X_train[0])}_to_{int(X_train[-1])}'
                # Update columns
                new_column = pd.Series(data=df_out.yhat,
                                       name=column_name,