Exemplo n.º 1
0
def main(args: argparse.Namespace):
    dataset = Dataset(args.train)

    model_args = {
        'hidden_size': args.hidden_size,
        'input_size': args.input_size,
        'feature_size': len(dataset.dataframe.columns),
    }

    train_x, train_y = dataset(**model_args)
    test_frame = dataset.load(args.test)
    test = np.concatenate(
        (train_x[0],
         dataset.transform(
             np.hstack((test_frame, np.zeros((len(test_frame), 1)))))))

    model = Model.from_file(args.weight)

    # Test sequences
    print('Testing ...')
    for index in range(len(test) - args.input_size):
        test_input = np.expand_dims(test[index:index + args.input_size], 0)
        pred = model.predict(test_input).squeeze()
        test[index + args.input_size, -1] = pred

    test_frame[dataset.dataframe.columns[-1]] = dataset.inverse_transform(
        test[args.input_size:, -1])
    test_frame.to_csv(str(out.joinpath('test-prediction.csv')), index=None)

    prediction = test_frame[dataset.dataframe.columns[-1]]
    prediction += abs(prediction.min())
    label = pd.read_csv(args.label).values[:, -1]

    label = MinMaxScaler().fit_transform(label.reshape(-1, 1))
    prediction = MinMaxScaler().fit_transform(prediction.reshape(-1, 1))

    mse = mean_squared_error(label, prediction)
    mae = mean_absolute_error(label, prediction)
    mape = mean_absolute_percentage_error(label, prediction, args.epsilon)

    print(f'MSE: {mse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}')
Exemplo n.º 2
0
def main(args: argparse.Namespace):
    init(args.seed)

    dataset = Dataset(args.train)

    model_args = {
        'hidden_size': args.hidden_size,
        'input_size': args.input_size,
        'feature_size': len(dataset.dataframe.columns),
        'nested': args.nested,
        'dropout': args.dropout,
    }
    optim_args = {
        'lr': args.lr,
        'beta_1': args.beta_1,
        'beta_2': args.beta_2,
        'decay': args.decay,
    }

    train_x, train_y = dataset(**model_args)
    test_frame = dataset.load(args.test)
    label = test_frame.values[:, -1]
    test = np.concatenate((train_x[0],
                           dataset.transform(
                               np.hstack((test_frame.values[:, :-1],
                                          np.zeros((len(test_frame), 1)))))))

    if args.use_test:
        test_y = test_frame.values[args.input_size:, -1]
        test_x = np.empty((len(test_frame) - args.input_size, args.input_size,
                           len(dataset.dataframe.columns)))

        for i in range(len(test_y) - args.input_size):
            test_x[i] = test_frame.values[i:i + args.input_size]

        train_x = np.vstack((train_x, test_x))
        train_y = np.concatenate((train_y, test_y))

    out = Path(args.output)
    out.mkdir(exist_ok=True, parents=True)

    if args.model:
        print(f'Model load from {args.model} ...')
        model = Model.from_file(args.model)

    else:
        model = Model(model_args, optim_args)

        if not args.silence:
            model.summary()

        # Train sequences
        print('Training ...')
        model.fit(train_x,
                  train_y,
                  epochs=args.epoch,
                  shuffle=False,
                  batch_size=args.batch,
                  verbose=not args.silence,
                  callbacks=model.callbacks(early_stop=not args.no_stop))
        model.save(str(out.joinpath('model.h5')))

    # Test sequences
    print('Testing ...')
    for index in tqdm(range(len(test) - args.input_size)):
        test_input = np.expand_dims(test[index:index + args.input_size], 0)
        pred = model.predict(test_input).squeeze()
        test[index + args.input_size, -1] = pred

    test_frame[dataset.dataframe.columns[-1]] = test[args.input_size:, -1]
    test_frame.to_csv(str(out.joinpath('prediction.csv')), index=None)

    test_frame[dataset.dataframe.columns[-1]] = dataset.inverse_transform(
        test[args.input_size:, -1])
    test_frame.to_csv(str(out.joinpath('prediction-scaled.csv')), index=None)

    if not args.no_fig:
        def scaler(values: np.ndarray)\
            -> np.ndarray:
            min_, max_ = values.min(), values.max()
            return (values - min_) / (max_ - min_)

        import matplotlib
        import matplotlib.pyplot as plt
        matplotlib.use('Agg')

        label_scale = scaler(label)
        prediction = scaler(test_frame.values[:, -1])
        x_range = np.arange(np.size(prediction, 0))

        error = mean_absolute_percentage_error(label_scale, prediction)
        plt.title(f'MAPE: {error:.4}')
        plt.ylim(0, 1)
        plt.plot(x_range, label_scale, c='r')
        plt.plot(x_range, prediction, c='b')
        plt.savefig(str(out.joinpath('figure.jpg')), dpi=400)

        print(f'MAPE: {error:.4}')