示例#1
0
model_id = configs['model']['model_id']
save_dir = configs['model']['save_dir']

dataloader = DataLoader()
x_scaler_filename = save_dir + "/" + model_id + "-x.scaler"
y_scaler_filename = save_dir + "/" + model_id + "-y.scaler"
dataloader.restore_scalers(x_scaler_filename, y_scaler_filename)

filename = os.path.join('data', configs['data']['filename'])
dataframe = pandas.read_csv(filename, sep=',', encoding='utf-8')
dataframe.index.name = 'fecha'
x_data = dataframe.get(configs['data']['x_cols'], ).values

in_seq_len = configs['data']['input_sequence_length']
x_data = x_data[:, :]  # pick three sequences to make predictions
input_data = dataloader.prepare_input_data(x_data, in_seq_len)
print("Input vector shape: " + str(x_data.shape))

model_filename = sys.argv[2]
model = Model(configs['data']['output_mode'])
model.load_model(filepath=model_filename)

print("Plotting predictions point by point on validation set")
predictions = model.predict_point_by_point(input_data)
print(predictions.shape)
unscaled_predictions = dataloader.recompose_results(predictions[:, 0, :],
                                                    side="y")
plot_results(unscaled_predictions,
             x_data[configs['data']['input_sequence_length']:, :])
示例#2
0
def main():
    """
    Keras Regression Metrics
        •Mean Squared Error: mean_squared_error, MSE or mse
        •Mean Absolute Error: mean_absolute_error, MAE, mae
        •Mean Absolute Percentage Error: mean_absolute_percentage_error, MAPE, mape
        •Cosine Proximity: cosine_proximity, cosine

    Keras Classification Metrics
        •Binary Accuracy: binary_accuracy, acc
        •Categorical Accuracy: categorical_accuracy, acc
        •Sparse Categorical Accuracy: sparse_categorical_accuracy
        •Top k Categorical Accuracy: top_k_categorical_accuracy (requires you specify a k parameter)
        •Sparse Top k Categorical Accuracy: sparse_top_k_categorical_accuracy (requires you specify a k parameter)
    """

    configs = json.load(open(sys.argv[1], 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    # check that the given configuration makes sense
    assert configs['data']['output_mode'] in ["many_to_many", "many_to_one"]
    assert configs['data']['input_mode'] in ["many_to_many", "one_to_many"]
    if configs['data']['input_mode'] == "one_to_many":  # 1-1 dense net
        assert configs['data']['input_sequence_length'] == 0
        assert configs['model']['validation_folds'] >= 1
    else:  # lstm mode
        assert configs['data']['input_sequence_length'] > 0
        assert configs['model']['validation_folds'] == 1

    # load datasets
    filename = os.path.join('data', configs['data']['filename'])
    dataframe = pd.read_csv(filename, sep=',', encoding='utf-8')
    if configs['data']['input_mode'] == "many_to_many":
        dataframe.index.name = 'fecha'

    dataloader = DataLoader()
    dataloader.load_dataset(
        dataframe,
        configs['data']['train_test_split'],
        configs['data']['x_cols'],
        configs['data']['y_cols'],
        configs['data']['cathegorical_cols'],
        model_id=configs['model']['model_id'],
        save_dir=configs['model']['save_dir'],
    )

    # start the k-folded cross-validation
    scores = []
    for i in range(configs['model']['validation_folds']):
        print("Training with fold %d" % i)
        model, history = train_network(configs, dataloader)
        loss = history.history['val_loss'][-1]
        scores.append((loss, model, history))
        if configs['model']['validation_folds'] > 1:
            print("Shuffling data for next fold validation!")
            dataloader.shuffle_data()

    sorted(scores, key=lambda x: x[0])
    model = scores[0][1]  # todo: is the last loss metric the best one to sort
    model.save(save_dir=configs['model']['save_dir'],
               model_id=configs['model']['model_id'])
    print("Best model has %f loss rate!" % scores[0][0])
    plot_historials([x[2] for x in scores])

    # test the thing!
    out_seq_len = configs['data']['input_sequence_length'] if configs['data'][
        'output_mode'] == "many_to_many" else 1
    x_test, y_test = dataloader.get_test_data(
        in_seq_len=configs['data']['input_sequence_length'],
        out_seq_len=out_seq_len)
    unscaled_y_test = dataloader.recompose_results(y_test[:, 0, :],
                                                   side="y").values

    # predict point by point
    print("Plotting predictions point by point on validation set")
    predictions = model.predict_point_by_point(x_test)
    unscaled_predictions = dataloader.recompose_results(predictions[:, 0, :],
                                                        side="y").values
    plot_results(unscaled_predictions, unscaled_y_test)

    if model.predictions_are_refeedeable():  # only for lstm mode
        print("Plotting predictions as refeeding window on validation set")
        predictions = model.predict_sequence_full(x_test[1, :, :],
                                                  len(unscaled_y_test))
        unscaled_predictions = dataloader.recompose_results(predictions[:,
                                                                        0, :],
                                                            side="y").values
        plot_results(unscaled_predictions, unscaled_y_test)