Пример #1
0
def run(model):
    app = QtGui.QApplication(sys.argv)
    app.setStyle(QtGui.QStyleFactory.create('Cleanlooks'))
    sol = ModelSolver(model)
    print "free:", list(sol.get_freevars())
    root = get_control_for_model(model)(model, sol)
    root.build(None)
    root.widget.show()
    sol.invoke_observers()
    model.invoke_observers()
    return app.exec_()
Пример #2
0
def main():
    parse = argparse.ArgumentParser()
    # ---------- environment setting: which gpu -------
    parse.add_argument('-gpu',
                       '--gpu',
                       type=str,
                       default='0',
                       help='which gpu to use: 0 or 1')
    parse.add_argument('-folder_name',
                       '--folder_name',
                       type=str,
                       default='datasets/citibike-data/data/')
    parse.add_argument('-output_folder_name',
                       '--output_folder_name',
                       type=str,
                       default='output/citibike-data/data/')
    # ---------- input/output settings -------
    parse.add_argument('-input_steps',
                       '--input_steps',
                       type=int,
                       default=6,
                       help='number of input steps')
    # ---------- model ----------
    parse.add_argument('-model',
                       '--model',
                       type=str,
                       default='GCN',
                       help='model: DyST, GCN, AttGCN')
    parse.add_argument('-num_layers',
                       '--num_layers',
                       type=int,
                       default=2,
                       help='number of layers in model')
    parse.add_argument('-num_units',
                       '--num_units',
                       type=int,
                       default=64,
                       help='dim of hidden states')
    parse.add_argument('-trained_adj_mx',
                       '--trained_adj_mx',
                       type=int,
                       default=0,
                       help='if training adjacent matrix')
    parse.add_argument('-filter_type',
                       '--filter_type',
                       type=str,
                       default='dual_random_walk',
                       help='laplacian, random_walk, or dual_random_walk')
    parse.add_argument('-delta',
                       '--delta',
                       type=int,
                       default=1e7,
                       help='delta to calculate rescaled weighted matrix')
    parse.add_argument('-epsilon',
                       '--epsilon',
                       type=float,
                       default=0.8,
                       help='epsilon to calculate rescaled weighted matrix')
    #
    parse.add_argument(
        '-dy_temporal',
        '--dy_temporal',
        type=int,
        default=0,
        help='whether to use temporal attention module before output layer')
    parse.add_argument(
        '-multi_loss',
        '--multi_loss',
        type=int,
        default=0,
        help='whether to only consider last prediction into loss function.')
    parse.add_argument('-att_units',
                       '--att_units',
                       type=int,
                       default=64,
                       help='dim of hidden states')
    #
    parse.add_argument(
        '-dy_adj',
        '--dy_adj',
        type=int,
        default=1,
        help=
        'whether to use dynamic adjacent matrix for lower feature extraction layer'
    )
    parse.add_argument(
        '-dy_filter',
        '--dy_filter',
        type=int,
        default=0,
        help='whether to use dynamic filter generate region-specific filter ')
    #parse.add_argument('-att_dynamic_adj', '--att_dynamic_adj', type=int, default=1, help='whether to use dynamic adjacent matrix in attention parts')
    parse.add_argument('-model_save',
                       '--model_save',
                       type=str,
                       default='gcn',
                       help='folder name to save model')
    parse.add_argument('-pretrained_model',
                       '--pretrained_model_path',
                       type=str,
                       default=None,
                       help='path to the pretrained model')
    # ---------- params for CNN ------------
    parse.add_argument('-num_filters',
                       '--num_filters',
                       type=int,
                       default=32,
                       help='number of filters in CNN')
    parse.add_argument('-pooling_units',
                       '--pooling_units',
                       type=int,
                       default=64,
                       help='number of pooling units')
    parse.add_argument('-dropout_keep_prob',
                       '--dropout_keep_prob',
                       type=float,
                       default=0.5,
                       help='keep probability in dropout layer')
    # ---------- training parameters --------
    parse.add_argument('-n_epochs',
                       '--n_epochs',
                       type=int,
                       default=20,
                       help='number of epochs')
    parse.add_argument('-batch_size',
                       '--batch_size',
                       type=int,
                       default=8,
                       help='batch size for training')
    parse.add_argument('-show_batches',
                       '--show_batches',
                       type=int,
                       default=100,
                       help='show how many batches have been processed.')
    parse.add_argument('-lr',
                       '--learning_rate',
                       type=float,
                       default=0.0002,
                       help='learning rate')
    parse.add_argument('-update_rule',
                       '--update_rule',
                       type=str,
                       default='adam',
                       help='update rule')
    # ---------- train or predict -------
    parse.add_argument('-train',
                       '--train',
                       type=int,
                       default=1,
                       help='whether to train')
    parse.add_argument('-test', '--test', type=int, default=0, help='if test')
    #
    args = parse.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    print('load train, test data...')
    # train: 20140401 - 20140831
    # validate: 20140901 - 20140910
    # test: 20140911 - 20140930
    split = [3672, 240, 480]
    #split = [3912, 480]
    data, train_data, val_data, test_data = load_npy_data(filename=[
        args.folder_name + 'd_station.npy', args.folder_name + 'p_station.npy'
    ],
                                                          split=split)
    # data: [num, station_num, 2]
    #f_data, train_f_data, val_f_data, test_f_data = load_pkl_data(args.folder_name + 'f_data_list.pkl', split=split)
    f_data, train_f_data, val_f_data, test_f_data = load_npy_data(
        filename=[args.folder_name + 'citibike_flow_data.npy'], split=split)
    print(len(f_data))
    print('preprocess train/val/test flow data...')
    #f_preprocessing = StandardScaler()
    f_preprocessing = MinMaxNormalization01()
    f_preprocessing.fit(train_f_data)
    train_f_data = f_preprocessing.transform(train_f_data)
    if val_f_data is not None:
        val_f_data = f_preprocessing.transform(val_f_data)
    test_f_data = f_preprocessing.transform(test_f_data)
    print('preprocess train/val/test data...')
    pre_process = MinMaxNormalization01()
    #pre_process = StandardScaler()
    pre_process.fit(train_data)
    train_data = pre_process.transform(train_data)
    if val_data is not None:
        val_data = pre_process.transform(val_data)
    test_data = pre_process.transform(test_data)
    #
    num_station = data.shape[1]
    print('number of station: %d' % num_station)
    #
    train_loader = DataLoader_graph(train_data,
                                    train_f_data,
                                    args.input_steps,
                                    flow_format='identity')
    if val_data is not None:
        val_loader = DataLoader_graph(val_data,
                                      val_f_data,
                                      args.input_steps,
                                      flow_format='identity')
    else:
        val_loader = None
    test_loader = DataLoader_graph(test_data,
                                   test_f_data,
                                   args.input_steps,
                                   flow_format='identity')
    # f_adj_mx = None
    if os.path.isfile(args.folder_name + 'f_adj_mx.npy'):
        f_adj_mx = np.load(args.folder_name + 'f_adj_mx.npy')
    else:
        f_adj_mx = train_loader.get_flow_adj_mx()
        np.save(args.folder_name + 'f_adj_mx.npy', f_adj_mx)
    #
    #
    if args.filter_type == 'laplacian':
        w = np.load(args.folder_name + 'w.npy')
        # w = np.array(w, dtype=np.float32)
        W = get_rescaled_W(w, delta=args.delta, epsilon=args.epsilon)
        # Calculate graph kernel
        L = scaled_laplacian(W)
        #
        f_adj_mx = L

    if args.model == 'FC_LSTM':
        model = FC_LSTM(num_station,
                        args.input_steps,
                        num_layers=args.num_layers,
                        num_units=args.num_units,
                        batch_size=args.batch_size)
    if args.model == 'FC_GRU':
        model = FC_GRU(num_station,
                       args.input_steps,
                       num_layers=args.num_layers,
                       num_units=args.num_units,
                       batch_size=args.batch_size)
    if args.model == 'GCN':
        model = GCN(num_station,
                    args.input_steps,
                    num_layers=args.num_layers,
                    num_units=args.num_units,
                    dy_adj=args.dy_adj,
                    dy_filter=args.dy_filter,
                    f_adj_mx=f_adj_mx,
                    trained_adj_mx=args.trained_adj_mx,
                    filter_type=args.filter_type,
                    batch_size=args.batch_size)
    if args.model == 'flow_GCN':
        model = flow_GCN(num_station,
                         args.input_steps,
                         num_layers=args.num_layers,
                         num_units=args.num_units,
                         f_adj_mx=f_adj_mx,
                         trained_adj_mx=args.trained_adj_mx,
                         filter_type=args.filter_type,
                         batch_size=args.batch_size)
    if args.model == 'Coupled_GCN':
        model = Coupled_GCN(num_station,
                            args.input_steps,
                            num_layers=args.num_layers,
                            num_units=args.num_units,
                            f_adj_mx=f_adj_mx,
                            trained_adj_mx=args.trained_adj_mx,
                            filter_type=args.filter_type,
                            dy_temporal=args.dy_temporal,
                            att_units=args.att_units,
                            multi_loss=args.multi_loss,
                            batch_size=args.batch_size)
    #
    model_path = os.path.join(args.output_folder_name, 'model_save',
                              args.model_save)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #model_path = os.path.join(args.folder_name, 'model_save', args.model_save)
    solver = ModelSolver(
        model,
        train_loader,
        val_loader,
        test_loader,
        pre_process,
        batch_size=args.batch_size,
        show_batches=args.show_batches,
        n_epochs=args.n_epochs,
        pretrained_model=args.pretrained_model_path,
        update_rule=args.update_rule,
        learning_rate=args.learning_rate,
        model_path=model_path,
    )
    results_path = os.path.join(model_path, 'results')
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if args.train:
        print('==================== begin training ======================')
        test_target, test_prediction = solver.train(
            os.path.join(model_path, 'out'))
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
    if args.test:
        print('==================== begin test ==========================')
        test_target, test_prediction = solver.test()
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
Пример #3
0
def main():
    parse = argparse.ArgumentParser()
    # ---------- environment setting: which gpu -------
    parse.add_argument('-gpu',
                       '--gpu',
                       type=str,
                       default='0',
                       help='which gpu to use: 0 or 1')
    parse.add_argument('-folder_name',
                       '--folder_name',
                       type=str,
                       default='datasets/citibike-data/data/')
    parse.add_argument(
        '-if_minus_mean',
        '--if_minus_mean',
        type=int,
        default=0,
        help='use MinMaxNormalize01 or MinMaxNormalize01_minus_mean')
    # ---------- input/output settings -------
    parse.add_argument('-input_steps',
                       '--input_steps',
                       type=int,
                       default=6,
                       help='number of input steps')
    parse.add_argument('-output_steps',
                       '--output_steps',
                       type=int,
                       default=1,
                       help='number of output steps')
    # ---------- station embeddings --------
    parse.add_argument('-pretrained_embeddings',
                       '--pretrained_embeddings',
                       type=int,
                       default=1,
                       help='whether to use pretrained embeddings')
    parse.add_argument('-embedding_size',
                       '--embedding_size',
                       type=int,
                       default=100,
                       help='dim of embedding')
    # ---------- model ----------
    #parse.add_argument('-model', '--model', type=str, default='DyST', help='model: NN, LSTM, biLSTM, CNN')
    parse.add_argument('-dynamic_context',
                       '--dynamic_context',
                       type=int,
                       default=1,
                       help='whether to add dynamic_context part')
    parse.add_argument('-dynamic_spatial',
                       '--dynamic_spatial',
                       type=int,
                       default=1,
                       help='whether to add dynamic_spatial part')
    parse.add_argument('-add_ext',
                       '--add_ext',
                       type=int,
                       default=1,
                       help='whether to add external factors')
    parse.add_argument('-model_save',
                       '--model_save',
                       type=str,
                       default='',
                       help='folder name to save model')
    parse.add_argument('-pretrained_model',
                       '--pretrained_model_path',
                       type=str,
                       default=None,
                       help='path to the pretrained model')
    # ---------- params for CNN ------------
    parse.add_argument('-num_filters',
                       '--num_filters',
                       type=int,
                       default=32,
                       help='number of filters in CNN')
    parse.add_argument('-pooling_units',
                       '--pooling_units',
                       type=int,
                       default=64,
                       help='number of pooling units')
    parse.add_argument('-dropout_keep_prob',
                       '--dropout_keep_prob',
                       type=float,
                       default=0.5,
                       help='keep probability in dropout layer')
    # ---------- training parameters --------
    parse.add_argument('-n_epochs',
                       '--n_epochs',
                       type=int,
                       default=50,
                       help='number of epochs')
    parse.add_argument('-batch_size',
                       '--batch_size',
                       type=int,
                       default=8,
                       help='batch size for training')
    parse.add_argument('-show_batches',
                       '--show_batches',
                       type=int,
                       default=100,
                       help='show how many batches have been processed.')
    parse.add_argument('-lr',
                       '--learning_rate',
                       type=float,
                       default=0.002,
                       help='learning rate')
    parse.add_argument('-update_rule',
                       '--update_rule',
                       type=str,
                       default='adam',
                       help='update rule')
    # ------ train or predict -------
    parse.add_argument('-train',
                       '--train',
                       type=int,
                       default=1,
                       help='whether to train')
    parse.add_argument('-test', '--test', type=int, default=0, help='if test')
    #
    parse.add_argument('-pretrain',
                       '--pretrain',
                       type=int,
                       default=0,
                       help='whether to pretrain')
    parse.add_argument('-partial_pretrain',
                       '--partial_pretrain',
                       type=int,
                       default=0,
                       help='whether to load pretrained vars')
    args = parse.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    print('load train, test data...')
    # train: 20140401 - 20140910
    # test: 20140911 - 20140930
    split = [3912, 480]
    data, train_data, test_data, _ = load_npy_data(filename=[
        args.folder_name + 'd_station.npy', args.folder_name + 'p_station.npy'
    ],
                                                   split=split)
    # data: [num, station_num, 2]
    f_data, train_f_data, test_f_data, _ = load_pkl_data(args.folder_name +
                                                         'f_data_list.pkl',
                                                         split=split)
    print(len(f_data))
    # e_data: [num, ext_dim]
    e_data, train_e_data, test_e_data, _ = load_mat_data(args.folder_name +
                                                         'fea2.mat',
                                                         'fea',
                                                         split=split)
    # e_preprocess = MinMaxNormalization01()
    # e_preprocess.fit(train_e_data)
    # train_e_data = e_preprocess.transform(train_e_data)
    # test_e_data = e_preprocess.transform(test_e_data)
    print('preprocess train/test data...')
    #pre_process = MinMaxNormalization01_by_axis()
    if args.if_minus_mean:
        pre_process = MinMaxNormalization01_minus_mean()
        pre_process.fit(train_data)
        norm_mean_data = pre_process.transform(data)
        train_data = norm_mean_data[:split[0]]
        test_data = norm_mean_data[split[0]:]
    else:
        pre_process = MinMaxNormalization01()
        pre_process.fit(train_data)
        train_data = pre_process.transform(train_data)
        test_data = pre_process.transform(test_data)
    # embeddings
    #id_map = load_pickle(args.folder_name+'station_map.pkl')
    #num_station = len(id_map)
    num_station = data.shape[1]
    print('number of station: %d' % num_station)
    if args.pretrained_embeddings:
        print('load pretrained embeddings...')
        embeddings = get_embedding_from_file(
            args.folder_name + 'embeddings.txt', num_station)
    else:
        print('train station embeddings via Word2Vec model...')
        trip_data = load_pickle(args.folder_name + 'all_trip_data.pkl')
        word2vec_model = Word2Vec(sentences=trip_data,
                                  size=args.embedding_size)
        print('save Word2Vec model and embeddings...')
        word2vec_model.save(args.folder_name + 'word2vec_model')
        word2vec_model.wv.save_word2vec_format(args.folder_name +
                                               'embeddings.txt',
                                               binary=False)
        del word2vec_model
        embeddings = get_embedding_from_file(
            args.folder_name + 'embeddings.txt', num_station)
    train_loader = DataLoader(train_data, train_f_data, train_e_data,
                              args.input_steps, args.output_steps, num_station)
    # val_loader = DataLoader(val_data, val_f_data,
    #                           args.input_steps, args.output_steps,
    #                           num_station, pre_process)
    test_loader = DataLoader(test_data, test_f_data, test_e_data,
                             args.input_steps, args.output_steps, num_station)
    model = DyST2(num_station,
                  args.input_steps,
                  args.output_steps,
                  embedding_dim=args.embedding_size,
                  embeddings=embeddings,
                  ext_dim=e_data.shape[-1],
                  batch_size=args.batch_size,
                  dynamic_context=args.dynamic_context,
                  dynamic_spatial=args.dynamic_spatial,
                  add_ext=args.add_ext)
    model_path = os.path.join(args.folder_name, 'model_save', args.model_save)
    solver = ModelSolver(model,
                         train_loader,
                         test_loader,
                         pre_process,
                         batch_size=args.batch_size,
                         show_batches=args.show_batches,
                         n_epochs=args.n_epochs,
                         pretrained_model=args.pretrained_model_path,
                         update_rule=args.update_rule,
                         learning_rate=args.learning_rate,
                         model_path=model_path,
                         partial_pretrain=args.partial_pretrain)
    results_path = os.path.join(model_path, 'results')
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if args.pretrain:
        print '==================== begin pretrain ======================'
        w_att_1, w_att_2, w_h_in, w_h_out = solver.pretrain(
            os.path.join(model_path, 'pretrain_out'))
        np.save(os.path.join(model_path, 'w_att_1.npy'), w_att_1)
        np.save(os.path.join(model_path, 'w_att_2.npy'), w_att_2)
        np.save(os.path.join(model_path, 'w_h_in.npy'), w_h_in)
        np.save(os.path.join(model_path, 'w_h_out.npy'), w_h_out)
    if args.train:
        print '==================== begin training ======================'
        test_target, test_prediction = solver.train(
            os.path.join(model_path, 'out'))
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
    if args.test:
        print '==================== begin test =========================='
        test_target, test_prediction = solver.test()
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
Пример #4
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    # preprocessing class
    pre_process = MinMaxNormalization01()
    print('load train, validate, test data...')
    split = [43824, 8760, 8760]
    data, train_data, val_data, test_data = load_data(filename=['data/taxi/p_map.mat', 'data/taxi/d_map.mat'],
                                                      split=split)
    # data: [num, row, col, channel]
    print('preprocess train data...')
    pre_process.fit(train_data)

    if 'ResNet' in FLAGS.model:
        pre_index = max(FLAGS.closeness * 1, FLAGS.period * 7, FLAGS.trend * 7 * 24)
        all_timestamps = gen_timestamps(['2009', '2010', '2011', '2012', '2013', '2014', '2015'])
        data = pre_process.transform(data)
        # train_data = train_data
        train_data = data[:split[0]]
        val_data = data[split[0] - pre_index:split[0] + split[1]]
        test_data = data[split[0] + split[1] - pre_index:split[0] + split[1] + split[2]]
        del data
        # get train, validate, test timestamps
        train_timestamps = all_timestamps[:split[0]]
        val_timestamps = all_timestamps[split[0] - pre_index:split[0] + split[1]]
        test_timestamps = all_timestamps[split[0] + split[1] - pre_index:split[0] + split[1] + split[2]]
        # get x, y
        train_x, train_y = batch_data_cpt_ext(train_data, train_timestamps,
                                              batch_size=FLAGS.batch_size, close=FLAGS.closeness, period=FLAGS.period,
                                              trend=FLAGS.trend)
        val_x, val_y = batch_data_cpt_ext(val_data, val_timestamps,
                                          batch_size=FLAGS.batch_size, close=FLAGS.closeness, period=FLAGS.period,
                                          trend=FLAGS.trend)
        test_x, test_y = batch_data_cpt_ext(test_data, test_timestamps,
                                            batch_size=FLAGS.batch_size, close=FLAGS.closeness, period=FLAGS.period,
                                            trend=FLAGS.trend)
        train = {'x': train_x, 'y': train_y}
        val = {'x': val_x, 'y': val_y}
        test = {'x': test_x, 'y': test_y}
        nb_flow = train_data.shape[-1]
        row = train_data.shape[1]
        col = train_data.shape[2]
        if FLAGS.model == 'AttResNet':
            print('k-means to cluster...')
            model_path = 'taxi-results/model_save/AttResNet/'
            log_path = 'taxi-results/log/AttResNet/'
            if FLAGS.pre_saved_cluster:
                cluster_centroid = np.load(model_path + 'cluster_centroid.npy')
            else:
                vector_data = np.reshape(train_data, (train_data.shape[0], -1))
                kmeans = KMeans(n_clusters=FLAGS.cluster_num, init='random', n_init=FLAGS.kmeans_run_num,
                                tol=0.00000001).fit(vector_data)
                cluster_centroid = kmeans.cluster_centers_
                cluster_centroid = np.reshape(cluster_centroid,
                                              (-1, train_data.shape[1], train_data.shape[2], train_data.shape[3]))
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                if not os.path.exists(log_path):
                    os.makedirs(log_path)
                np.save(model_path + 'cluster_centroid.npy', cluster_centroid)
            print('build AttResNet model...')
            model = AttResNet(input_conf=[[FLAGS.closeness, nb_flow, row, col], [FLAGS.period, nb_flow, row, col],
                                          [FLAGS.trend, nb_flow, row, col], [8]],
                              att_inputs=cluster_centroid, att_nodes=FLAGS.att_nodes,
                              att_layer=['conv', 'conv'],
                              att_layer_param=[[[3, 3], [1, 1, 1, 1], 8], [[3, 3], [1, 1, 1, 1], 2]],
                              batch_size=FLAGS.batch_size,
                              layer=['conv', 'res_net', 'conv'],
                              layer_param=[[[3, 3], [1, 1, 1, 1], 64],
                                           [3, [[[3, 3], [1, 1, 1, 1], 64], [[3, 3], [1, 1, 1, 1], 64]]],
                                           [[3, 3], [1, 1, 1, 1], 2]]
                              )
        else:
            print('build ResNet model...')
            model_path = 'taxi-results/model_save/ResNet/'
            log_path = 'taxi-results/log/ResNet/'
            model = ResNet(input_conf=[[FLAGS.closeness, nb_flow, row, col], [FLAGS.period, nb_flow, row, col],
                                       [FLAGS.trend, nb_flow, row, col], [8]], batch_size=FLAGS.batch_size,
                           layer=['conv', 'res_net', 'conv'],
                           layer_param=[[[3, 3], [1, 1, 1, 1], 64],
                                        [3, [[[3, 3], [1, 1, 1, 1], 64], [[3, 3], [1, 1, 1, 1], 64]]],
                                        [[3, 3], [1, 1, 1, 1], 2]]
                           )
        print('model solver...')
        solver = ModelSolver(model, train, val, preprocessing=pre_process,
                             n_epochs=FLAGS.n_epochs,
                             batch_size=FLAGS.batch_size,
                             update_rule=FLAGS.update_rule,
                             learning_rate=FLAGS.lr, save_every=FLAGS.save_every,
                             pretrained_model=FLAGS.pretrained_model, model_path=model_path,
                             test_model='taxi-results/model_save/ResNet/model-' + str(FLAGS.n_epochs),
                             log_path=log_path,
                             cross_val=False, cpt_ext=True)
        if FLAGS.train:
            print('begin training...')
            test_n = {'data': test_data, 'timestamps': test_timestamps}
            _, test_prediction = solver.train(test, test_n, output_steps=FLAGS.output_steps)
            # get test_target and test_prediction
            i = pre_index
            test_target = []
            while i < len(test_data) - FLAGS.output_steps:
                test_target.append(test_data[i:i + FLAGS.output_steps])
                i += 1
            test_target = np.asarray(test_target)
        if FLAGS.test:
            print('begin testing for predicting next 1 step')
            solver.test(test)
            print('begin testing for predicting next' + str(FLAGS.output_steps) + 'steps')
            test_n = {'data': test_data, 'timestamps': test_timestamps}
            solver.test_1_to_n(test_n)
    else:
        train_data = pre_process.transform(train_data)
        train_x, train_y = batch_data(data=train_data, batch_size=FLAGS.batch_size,
                                      input_steps=FLAGS.input_steps, output_steps=FLAGS.output_steps)
        val_data = pre_process.transform(val_data)
        val_x, val_y = batch_data(data=val_data, batch_size=FLAGS.batch_size,
                                  input_steps=FLAGS.input_steps, output_steps=FLAGS.output_steps)
        test_data = pre_process.transform(test_data)
        test_x, test_y = batch_data(data=test_data, batch_size=FLAGS.batch_size,
                                    input_steps=FLAGS.input_steps, output_steps=FLAGS.output_steps)
        train = {'x': train_x, 'y': train_y}
        val = {'x': val_x, 'y': val_y}
        test = {'x': test_x, 'y': test_y}
        input_dim = [train_data.shape[1], train_data.shape[2], train_data.shape[3]]
        if FLAGS.model == 'ConvLSTM':
            print('build ConvLSTM model...')
            model = ConvLSTM(input_dim=input_dim, batch_size=FLAGS.batch_size,
                             layer={'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                                    'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv']},
                             layer_param={'encoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                      [[3, 3], [1, 2, 2, 1], 16],
                                                      [[16, 16], [3, 3], 64],
                                                      [[16, 16], [3, 3], 64]],
                                          'decoder': [[[16, 16], [3, 3], 64],
                                                      [[16, 16], [3, 3], 64],
                                                      [[3, 3], [1, 2, 2, 1], 8],
                                                      [[3, 3], [1, 2, 2, 1], 2]]},
                             input_steps=10, output_steps=10)
            print('model solver...')
            solver = ModelSolver(model, train, val, preprocessing=pre_process,
                                 n_epochs=FLAGS.n_epochs,
                                 batch_size=FLAGS.batch_size,
                                 update_rule=FLAGS.update_rule,
                                 learning_rate=FLAGS.lr, save_every=FLAGS.save_every,
                                 pretrained_model=FLAGS.pretrained_model,
                                 model_path='taxi-results/model_save/ConvLSTM/',
                                 test_model='taxi-results/model_save/ConvLSTM/model-' + str(FLAGS.n_epochs),
                                 log_path='taxi-results/log/ConvLSTM/')
        elif 'AttConvLSTM' in FLAGS.model:
            # train_data: [num, row, col, channel]
            if FLAGS.use_ae:
                # auto-encoder to cluster train_data
                print('auto-encoder to cluster...')
                model_path = 'taxi-results/model_save/AEAttConvLSTM/'
                log_path = 'taxi-results/log/AEAttConvLSTM/'
                if FLAGS.pre_saved_cluster:
                    cluster_centroid = np.load(model_path + 'cluster_centroid.npy')
                else:
                    ae = AutoEncoder(input_dim=input_dim, z_dim=[16, 16, 16],
                                     layer={'encoder': ['conv', 'conv'],
                                            'decoder': ['conv', 'conv']},
                                     layer_param={'encoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                              [[3, 3], [1, 2, 2, 1], 16]],
                                                  'decoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                              [[3, 3], [1, 2, 2, 1], 2]]},
                                     model_save_path=model_path,
                                     batch_size=FLAGS.batch_size)
                    if FLAGS.ae_train:
                        ae.train(train_data, batch_size=FLAGS.batch_size, learning_rate=FLAGS.lr, n_epochs=20,
                                 pretrained_model=FLAGS.ae_pretrained_model)
                    train_z_data = ae.get_z(train_data, pretrained_model=FLAGS.ae_pretrained_model)
                    print(train_z_data.shape)
                    # k-means to cluster train_z_data
                    vector_data = np.reshape(train_z_data, (train_z_data.shape[0], -1))
                    kmeans = KMeans(n_clusters=FLAGS.cluster_num, init='random', n_init=FLAGS.kmeans_run_num,
                                    tol=0.00000001).fit(vector_data)
                    cluster_centroid = kmeans.cluster_centers_
                    print(np.array(cluster_centroid).shape)
                    # reshape to [cluster_num, row, col, channel]
                    cluster_centroid = np.reshape(cluster_centroid,
                                                  (-1, train_z_data.shape[1], train_z_data.shape[2],
                                                   train_z_data.shape[3]))
                    # decoder to original space
                    cluster_centroid = ae.get_y(cluster_centroid, pretrained_model=FLAGS.ae_pretrained_model)
                    print(cluster_centroid.shape)
                    np.save(model_path + 'cluster_centroid.npy', cluster_centroid)
            else:
                # k-means to cluster train_data
                print('k-means to cluster...')
                model_path = 'taxi-results/model_save/' + FLAGS.model + '/'
                log_path = 'taxi-results/log/' + FLAGS.model + '/'
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                if not os.path.exists(log_path):
                    os.makedirs(log_path)
                if FLAGS.pre_saved_cluster:
                    cluster_centroid = np.load(model_path + 'cluster_centroid.npy')
                else:
                    vector_data = np.reshape(train_data, (train_data.shape[0], -1))
                    # init_vectors = vector_data[:FLAGS.cluster_num, :]
                    # cluster_centroid = init_vectors
                    kmeans = KMeans(n_clusters=FLAGS.cluster_num, init='random', n_init=FLAGS.kmeans_run_num,
                                    tol=0.00000001).fit(vector_data)
                    cluster_centroid = kmeans.cluster_centers_
                    # reshape to [cluster_num, row, col, channel]
                    cluster_centroid = np.reshape(cluster_centroid,
                                                  (-1, train_data.shape[1], train_data.shape[2], train_data.shape[3]))
                    np.save(model_path + 'cluster_centroid.npy', cluster_centroid)
            # build model
            print('build ' + FLAGS.model + ' model...')
            if FLAGS.model == 'AttConvLSTM':
                model = AttConvLSTM(input_dim=input_dim,
                                    att_inputs=cluster_centroid, att_nodes=FLAGS.att_nodes,
                                    batch_size=FLAGS.batch_size,
                                    layer={'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                                           'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                                           'attention': ['conv', 'conv']},
                                    layer_param={'encoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                             [[3, 3], [1, 2, 2, 1], 16],
                                                             [[16, 16], [3, 3], 64],
                                                             [[16, 16], [3, 3], 64]],
                                                 'decoder': [[[16, 16], [3, 3], 64],
                                                             [[16, 16], [3, 3], 64],
                                                             [[3, 3], [1, 2, 2, 1], 8],
                                                             [[3, 3], [1, 2, 2, 1], 2]],
                                                 'attention': [[[3, 3], [1, 2, 2, 1], 8],
                                                               [[3, 3], [1, 2, 2, 1], 16]]},
                                    input_steps=10, output_steps=10)
            elif FLAGS.model == 'MultiAttConvLSTM':
                model = MultiAttConvLSTM(input_dim=input_dim,
                                         att_inputs=cluster_centroid, att_nodes=FLAGS.att_nodes,
                                         batch_size=FLAGS.batch_size,
                                         layer={'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                                                'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                                                'attention': ['conv', 'conv']},
                                         layer_param={'encoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                                  [[3, 3], [1, 2, 2, 1], 16],
                                                                  [[16, 16], [3, 3], 64],
                                                                  [[16, 16], [3, 3], 64]],
                                                      'decoder': [[[16, 16], [3, 3], 64],
                                                                  [[16, 16], [3, 3], 64],
                                                                  [[3, 3], [1, 2, 2, 1], 8],
                                                                  [[3, 3], [1, 2, 2, 1], 2]],
                                                      'attention': [[[3, 3], [1, 2, 2, 1], 8],
                                                                    [[3, 3], [1, 2, 2, 1], 16]]},
                                         input_steps=10, output_steps=10)
            print('model solver...')
            solver = ModelSolver(model, train, val, preprocessing=pre_process,
                                 n_epochs=FLAGS.n_epochs,
                                 batch_size=FLAGS.batch_size,
                                 update_rule=FLAGS.update_rule,
                                 learning_rate=FLAGS.lr, save_every=FLAGS.save_every,
                                 pretrained_model=FLAGS.pretrained_model, model_path=model_path,
                                 test_model=model_path + 'model-' + str(FLAGS.n_epochs), log_path=log_path)
        if FLAGS.train:
            print('begin training...')
            test_prediction, _ = solver.train(test)
            test_target = np.asarray(test_y)
        if FLAGS.test:
            print('test trained model...')
            solver.test_model = solver.model_path + FLAGS.pretrained_model
            test_prediction = solver.test(test)
            test_target = np.asarray(test_y)
    np.save('taxi-results/results/'+FLAGS.model+'/test_target.npy', test_target)
    np.save('taxi-results/results/'+FLAGS.model+'/test_prediction.npy', test_prediction)
    print(test_prediction.shape)
Пример #5
0
def main():
    parse = argparse.ArgumentParser()
    # ---------- environment setting: which gpu -------
    parse.add_argument('-gpu',
                       '--gpu',
                       type=str,
                       default='0',
                       help='which gpu to use: 0 or 1')
    parse.add_argument('-folder_name',
                       '--folder_name',
                       type=str,
                       default='datasets/didi-data/data/')
    parse.add_argument('-output_folder_name',
                       '--output_folder_name',
                       type=str,
                       default='output/didi-data/data/')
    # ---------- input/output settings -------
    parse.add_argument('-input_steps',
                       '--input_steps',
                       type=int,
                       default=6,
                       help='number of input steps')
    # ---------- model ----------
    parse.add_argument('-model',
                       '--model',
                       type=str,
                       default='GCN',
                       help='model: GCN, ConvLSTM, flow_ConvLSTM')
    parse.add_argument('-num_layers',
                       '--num_layers',
                       type=int,
                       default=2,
                       help='number of layers in model')
    parse.add_argument('-num_units',
                       '--num_units',
                       type=int,
                       default=64,
                       help='dim of hidden states')
    parse.add_argument('-kernel_size',
                       '--kernel_size',
                       type=int,
                       default=3,
                       help='kernel size in convolutional operations')
    #
    parse.add_argument(
        '-dy_adj',
        '--dy_adj',
        type=int,
        default=1,
        help=
        'whether to use dynamic adjacent matrix for lower feature extraction layer'
    )
    parse.add_argument(
        '-dy_filter',
        '--dy_filter',
        type=int,
        default=0,
        help='whether to use dynamic filter generate region-specific filter ')
    parse.add_argument(
        '-att_dynamic_adj',
        '--att_dynamic_adj',
        type=int,
        default=0,
        help='whether to use dynamic adjacent matrix in attention parts')
    #
    parse.add_argument('-model_save',
                       '--model_save',
                       type=str,
                       default='gcn',
                       help='folder name to save model')
    parse.add_argument('-pretrained_model',
                       '--pretrained_model_path',
                       type=str,
                       default=None,
                       help='path to the pretrained model')
    # ---------- params for CNN ------------
    parse.add_argument('-num_filters',
                       '--num_filters',
                       type=int,
                       default=32,
                       help='number of filters in CNN')
    parse.add_argument('-pooling_units',
                       '--pooling_units',
                       type=int,
                       default=64,
                       help='number of pooling units')
    parse.add_argument('-dropout_keep_prob',
                       '--dropout_keep_prob',
                       type=float,
                       default=0.5,
                       help='keep probability in dropout layer')
    # ---------- training parameters --------
    parse.add_argument('-n_epochs',
                       '--n_epochs',
                       type=int,
                       default=20,
                       help='number of epochs')
    parse.add_argument('-batch_size',
                       '--batch_size',
                       type=int,
                       default=8,
                       help='batch size for training')
    parse.add_argument('-show_batches',
                       '--show_batches',
                       type=int,
                       default=100,
                       help='show how many batches have been processed.')
    parse.add_argument('-lr',
                       '--learning_rate',
                       type=float,
                       default=0.0002,
                       help='learning rate')
    parse.add_argument('-update_rule',
                       '--update_rule',
                       type=str,
                       default='adam',
                       help='update rule')
    # ---------- train or predict -------
    parse.add_argument('-train',
                       '--train',
                       type=int,
                       default=1,
                       help='whether to train')
    parse.add_argument('-test', '--test', type=int, default=0, help='if test')
    #
    args = parse.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    print('load train, test data...')
    #
    # train: 20161101 - 20161125
    # validate: 20161126 - 20161127
    # test: 20161128 - 20161130
    split = [2400, 192, 288]
    data, train_data, val_data, test_data = load_npy_data(
        filename=[args.folder_name + 'cd_didi_data.npy'], split=split)
    # data: [num, station_num, 2]
    print(data.shape)
    #
    if 'GCN' in args.model or 'FC' in args.model:
        dataloader = DataLoader_graph
    else:
        data = np.reshape(data, (-1, 20, 20, 2))
        train_data = np.reshape(train_data, (-1, 20, 20, 2))
        val_data = np.reshape(val_data, (-1, 20, 20, 2))
        test_data = np.reshape(test_data, (-1, 20, 20, 2))
        # data: [num, height, width, 2]
        print(data.shape)
        #
        dataloader = DataLoader_map
    #
    map_size = data.shape[1:-1]
    input_dim = data.shape[-1]
    num_station = np.prod(data.shape[1:-1])
    #
    f_data, train_f_data, val_f_data, test_f_data = load_npy_data(
        [args.folder_name + 'cd_didi_flow_in.npy'], split=split)
    print(len(f_data))
    print('preprocess train/val/test flow data...')
    #f_preprocessing = StandardScaler()
    f_preprocessing = MinMaxNormalization01()
    f_preprocessing.fit(train_f_data)
    train_f_data = f_preprocessing.transform(train_f_data)
    val_f_data = f_preprocessing.transform(val_f_data)
    test_f_data = f_preprocessing.transform(test_f_data)
    print('preprocess train/val/test data...')
    # pre_process = StandardScaler()
    pre_process = MinMaxNormalization01()
    pre_process.fit(train_data)
    train_data = pre_process.transform(train_data)
    val_data = pre_process.transform(val_data)
    test_data = pre_process.transform(test_data)
    #

    print('number of station: %d' % num_station)
    #
    train_loader = dataloader(train_data,
                              train_f_data,
                              args.input_steps,
                              flow_format='identity')
    val_loader = dataloader(val_data,
                            val_f_data,
                            args.input_steps,
                            flow_format='identity')
    test_loader = dataloader(test_data,
                             test_f_data,
                             args.input_steps,
                             flow_format='identity')
    # f_adj_mx = None
    if os.path.isfile(args.folder_name + 'f_adj_mx.npy'):
        f_adj_mx = np.load(args.folder_name + 'f_adj_mx.npy')
    else:
        f_adj_mx = train_loader.get_flow_adj_mx()
        np.save(args.folder_name + 'f_adj_mx.npy', f_adj_mx)
    #
    # if args.model == 'FC_LSTM':
    #     model = FC_LSTM(num_station, args.input_steps,
    #                     num_layers=args.num_layers, num_units=args.num_units,
    #                     batch_size=args.batch_size)
    if args.model == 'FC_GRU':
        model = FC_GRU(num_station,
                       args.input_steps,
                       num_layers=args.num_layers,
                       num_units=args.num_units,
                       batch_size=args.batch_size)
    if args.model == 'GCN':
        model = GCN(num_station,
                    args.input_steps,
                    num_layers=args.num_layers,
                    num_units=args.num_units,
                    dy_adj=args.dy_adj,
                    dy_filter=args.dy_filter,
                    f_adj_mx=f_adj_mx,
                    batch_size=args.batch_size)
    if args.model == 'ConvGRU':
        model = ConvGRU(input_shape=[map_size[0], map_size[1], input_dim],
                        input_steps=args.input_steps,
                        num_layers=args.num_layers,
                        num_units=args.num_units,
                        kernel_shape=[args.kernel_size, args.kernel_size],
                        batch_size=args.batch_size)
    # if args.model == 'flow_ConvGRU':
    #     model = flow_ConvGRU(input_shape=[20, 20, input_dim], input_steps=args.input_steps,
    #                           num_layers=args.num_layers, num_units=args.num_units, kernel_shape=[args.kernel_size, args.kernel_size],
    #                           f_adj_mx=f_adj_mx,
    #                           batch_size=args.batch_size)
    if args.model == 'Coupled_ConvGRU':
        model = CoupledConvGRU(
            input_shape=[20, 20, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            batch_size=args.batch_size)

    ##
    # flow_ConvGRU_2 is Stack_ConvGRU with 2 conv layers.
    if args.model == 'flow_ConvGRU_2':
        model = flow_ConvGRU_2(
            input_shape=[20, 20, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            f_adj_mx=f_adj_mx,
            batch_size=args.batch_size)

    if args.model == 'Stack_ConvGRU':
        model = Stack_ConvGRU(
            input_shape=[20, 20, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            f_adj_mx=f_adj_mx,
            batch_size=args.batch_size)
    #
    model_path = os.path.join(args.output_folder_name, 'model_save',
                              args.model_save)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #model_path = os.path.join(args.folder_name, 'model_save', args.model_save)
    solver = ModelSolver(
        model,
        train_loader,
        val_loader,
        test_loader,
        pre_process,
        batch_size=args.batch_size,
        show_batches=args.show_batches,
        n_epochs=args.n_epochs,
        pretrained_model=args.pretrained_model_path,
        update_rule=args.update_rule,
        learning_rate=args.learning_rate,
        model_path=model_path,
    )
    results_path = os.path.join(model_path, 'results')
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if args.train:
        print('==================== begin training ======================')
        test_target, test_prediction = solver.train(
            os.path.join(model_path, 'out'))
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
    if args.test:
        print('==================== begin test ==========================')
        test_target, test_prediction = solver.test()
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
Пример #6
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    # preprocessing class
    pre_process = MinMaxNormalization01()
    print('load train, validate, test data...')
    split = [17520, 4416, 4368]
    data, train_data, val_data, test_data = load_npy_data(
        filename=['data/citybike/p_map.npy', 'data/citybike/d_map.npy'],
        split=split)
    # data: [num, row, col, channel]
    print('preprocess train data...')
    pre_process.fit(train_data)

    train_data = pre_process.transform(train_data)
    train_x, train_y = batch_data(data=train_data,
                                  batch_size=FLAGS.batch_size,
                                  input_steps=FLAGS.input_steps,
                                  output_steps=FLAGS.output_steps)
    val_data = pre_process.transform(val_data)
    val_x, val_y = batch_data(data=val_data,
                              batch_size=FLAGS.batch_size,
                              input_steps=FLAGS.input_steps,
                              output_steps=FLAGS.output_steps)
    test_data = pre_process.transform(test_data)
    test_x, test_y = batch_data(data=test_data,
                                batch_size=FLAGS.batch_size,
                                input_steps=FLAGS.input_steps,
                                output_steps=FLAGS.output_steps)
    train = {'x': train_x, 'y': train_y}
    val = {'x': val_x, 'y': val_y}
    test = {'x': test_x, 'y': test_y}
    input_dim = [train_data.shape[1], train_data.shape[2], train_data.shape[3]]
    if 'AttConvLSTM' in FLAGS.model:
        # train_data: [num, row, col, channel]
        model_path = 'citybike-results/model_save/' + FLAGS.model + '/'
        log_path = 'citybike-results/log/' + FLAGS.model + '/'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        if FLAGS.pre_saved_cluster:
            cluster_centroid = np.load(model_path + 'cluster_centroid.npy')
        else:
            vector_data = np.reshape(train_data, (train_data.shape[0], -1))
            cluster_centroid_1 = None
            cluster_centroid_2 = None
            cluster_centroid = None
            if FLAGS.kmeans_cluster:
                print('k-means to cluster...')
                kmeans = KMeans(n_clusters=FLAGS.cluster_num,
                                init='random',
                                n_init=FLAGS.kmeans_run_num,
                                tol=0.00000001).fit(vector_data)
                cluster_centroid_1 = kmeans.cluster_centers_
            if FLAGS.average_cluster:
                print('average cluster...')
                if FLAGS.average_cluster == 24:
                    cluster_centroid_2 = average_cluster_24(vector_data)
                elif FLAGS.average_cluster == 48:
                    cluster_centroid_2 = average_cluster_48(vector_data)
            if cluster_centroid_1 is not None:
                cluster_centroid = cluster_centroid_1
            if cluster_centroid_2 is not None:
                if cluster_centroid is not None:
                    cluster_centroid = np.concatenate(
                        (cluster_centroid_1, cluster_centroid_2), axis=0)
                else:
                    cluster_centroid = cluster_centroid_2
            # reshape to [cluster_num, row, col, channel]
            cluster_centroid = np.reshape(
                cluster_centroid, (-1, train_data.shape[1],
                                   train_data.shape[2], train_data.shape[3]))
            np.save(model_path + 'cluster_centroid.npy', cluster_centroid)
        # build model
        print 'build ' + FLAGS.model + ' model...'
        if FLAGS.model == 'AttConvLSTM':
            model = AttConvLSTM(input_dim=input_dim,
                                att_inputs=cluster_centroid,
                                att_nodes=FLAGS.att_nodes,
                                batch_size=FLAGS.batch_size,
                                layer={
                                    'encoder':
                                    ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                                    'decoder':
                                    ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                                    'attention': ['conv', 'conv']
                                },
                                layer_param={
                                    'encoder': [[[3, 3], [1, 1, 1, 1], 8],
                                                [[3, 3], [1, 1, 1, 1], 16],
                                                [[16, 16], [3, 3], 64],
                                                [[16, 16], [3, 3], 64]],
                                    'decoder': [[[16, 16], [3, 3], 64],
                                                [[16, 16], [3, 3], 64],
                                                [[3, 3], [1, 1, 1, 1], 8],
                                                [[3, 3], [1, 1, 1, 1], 2]],
                                    'attention': [[[3, 3], [1, 1, 1, 1], 8],
                                                  [[3, 3], [1, 1, 1, 1], 16]]
                                },
                                input_steps=10,
                                output_steps=10)
        elif FLAGS.model == 'MultiAttConvLSTM':
            model = MultiAttConvLSTM(
                input_dim=input_dim,
                att_inputs=cluster_centroid,
                att_nodes=FLAGS.att_nodes,
                batch_size=FLAGS.batch_size,
                layer={
                    'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                    'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                    'attention': ['conv', 'conv']
                },
                layer_param={
                    'encoder': [[[3, 3], [1, 1, 1, 1], 8],
                                [[3, 3], [1, 1, 1, 1], 16],
                                [[16, 16], [3, 3], 64], [[16, 16], [3, 3],
                                                         64]],
                    'decoder': [[[16, 16], [3, 3], 64], [[16, 16], [3, 3], 64],
                                [[3, 3], [1, 1, 1, 1], 8],
                                [[3, 3], [1, 1, 1, 1], 2]],
                    'attention': [[[3, 3], [1, 1, 1, 1], 8],
                                  [[3, 3], [1, 1, 1, 1], 16]]
                },
                input_steps=10,
                output_steps=10)
        print('model solver...')
        solver = ModelSolver(model,
                             train,
                             val,
                             preprocessing=pre_process,
                             n_epochs=FLAGS.n_epochs,
                             batch_size=FLAGS.batch_size,
                             update_rule=FLAGS.update_rule,
                             learning_rate=FLAGS.lr,
                             save_every=FLAGS.save_every,
                             pretrained_model=FLAGS.pretrained_model,
                             model_path=model_path,
                             test_model=model_path + 'model-' +
                             str(FLAGS.n_epochs),
                             log_path=log_path)
    if FLAGS.train:
        print('begin training...')
        test_prediction, _ = solver.train(test)
        test_target = np.asarray(test_y)
    if FLAGS.test:
        print('test trained model...')
        solver.test_model = solver.model_path + FLAGS.pretrained_model
        test_prediction = solver.test(test)
        test_target = np.asarray(test_y)
    np.save('citybike-results/results/' + FLAGS.model + '/test_target.npy',
            test_target)
    np.save('citybike-results/results/' + FLAGS.model + '/test_prediction.npy',
            test_prediction)