Пример #1
0
    def __init__(self, is_rgb=False):
        super(Generator_Net, self).__init__()
        if is_rgb == True:
            self.vgg16 = vgg16(channel_num=3)
        else:
            self.vgg16 = vgg16(channel_num=1)

        self.conlstm1 = ConvLSTM(input_size=(28, 28),
                                 input_dim=512,
                                 hidden_dim=[256],
                                 kernel_size=(3, 3),
                                 num_layers=1,
                                 batch_first=True,
                                 bias=True,
                                 return_all_layers=True)

        self.conlstm2 = ConvLSTM(input_size=(28, 28),
                                 input_dim=256,
                                 hidden_dim=[512],
                                 kernel_size=(3, 3),
                                 num_layers=1,
                                 batch_first=True,
                                 bias=True,
                                 return_all_layers=True)

        if is_rgb == True:
            self.spatial_decoder = spatial_decoder(channel_num=3)
        else:
            self.spatial_decoder = spatial_decoder(channel_num=1)
Пример #2
0
 def __init__(self, channel=256):
     super(conv_lstm, self).__init__()
     self.lstm = ConvLSTM(channel,
                          hidden_dim=channel,
                          kernel_size=(3, 3),
                          num_layers=1)
     self.conv = nn.Conv2d(channel * 2, channel, kernel_size=1)
Пример #3
0
    def __init__(self, ignore_index=255, mode='fea', use_weight=True, pool=2):
        super(CriterionLSTMGAN, self).__init__()
        self.ignore_index = ignore_index
        self.use_weight = use_weight
        self.pool = pool
        self.mode = mode
        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)

        self.attn = Cos_Attn_self('relu')
        self.criterion_sd = torch.nn.MSELoss()
        nf = 1
        self.convlstm = ConvLSTM(input_size=1, hidden_size=nf * 4, kernel_size=3)
Пример #4
0
def main():
    parse = argparse.ArgumentParser()
    # ---------- environment setting: which gpu -------
    parse.add_argument('-gpu',
                       '--gpu',
                       type=str,
                       default='0',
                       help='which gpu to use: 0 or 1')
    parse.add_argument('-folder_name',
                       '--folder_name',
                       type=str,
                       default='datasets/taxi-data/graph-data/')
    parse.add_argument('-output_folder_name',
                       '--output_folder_name',
                       type=str,
                       default='output/taxi-data/graph-data/')
    # ---------- input/output settings -------
    parse.add_argument('-input_steps',
                       '--input_steps',
                       type=int,
                       default=6,
                       help='number of input steps')
    # ---------- model ----------
    parse.add_argument('-model',
                       '--model',
                       type=str,
                       default='GCN',
                       help='model: GCN, ConvLSTM, flow_ConvLSTM')
    parse.add_argument('-num_layers',
                       '--num_layers',
                       type=int,
                       default=2,
                       help='number of layers in model')
    parse.add_argument('-num_units',
                       '--num_units',
                       type=int,
                       default=64,
                       help='dim of hidden states')
    parse.add_argument('-kernel_size',
                       '--kernel_size',
                       type=int,
                       default=3,
                       help='kernel size in convolutional operations')
    #
    parse.add_argument(
        '-dy_adj',
        '--dy_adj',
        type=int,
        default=1,
        help=
        'whether to use dynamic adjacent matrix for lower feature extraction layer'
    )
    parse.add_argument(
        '-dy_filter',
        '--dy_filter',
        type=int,
        default=0,
        help='whether to use dynamic filter generate region-specific filter ')
    #parse.add_argument('-att_dynamic_adj', '--att_dynamic_adj', type=int, default=0, help='whether to use dynamic adjacent matrix in attention parts')
    #
    parse.add_argument('-model_save',
                       '--model_save',
                       type=str,
                       default='gcn',
                       help='folder name to save model')
    parse.add_argument('-pretrained_model',
                       '--pretrained_model_path',
                       type=str,
                       default=None,
                       help='path to the pretrained model')
    # ---------- params for CNN ------------
    parse.add_argument('-num_filters',
                       '--num_filters',
                       type=int,
                       default=32,
                       help='number of filters in CNN')
    parse.add_argument('-pooling_units',
                       '--pooling_units',
                       type=int,
                       default=64,
                       help='number of pooling units')
    parse.add_argument('-dropout_keep_prob',
                       '--dropout_keep_prob',
                       type=float,
                       default=0.5,
                       help='keep probability in dropout layer')
    # ---------- training parameters --------
    parse.add_argument('-n_epochs',
                       '--n_epochs',
                       type=int,
                       default=20,
                       help='number of epochs')
    parse.add_argument('-batch_size',
                       '--batch_size',
                       type=int,
                       default=8,
                       help='batch size for training')
    parse.add_argument('-show_batches',
                       '--show_batches',
                       type=int,
                       default=100,
                       help='show how many batches have been processed.')
    parse.add_argument('-lr',
                       '--learning_rate',
                       type=float,
                       default=0.0002,
                       help='learning rate')
    parse.add_argument('-update_rule',
                       '--update_rule',
                       type=str,
                       default='adam',
                       help='update rule')
    # ---------- train or predict -------
    parse.add_argument('-train',
                       '--train',
                       type=int,
                       default=1,
                       help='whether to train')
    parse.add_argument('-test', '--test', type=int, default=0, help='if test')
    #
    args = parse.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    print('load train, test data...')
    # train: 20140101 - 20150430
    # validate: 20150501 - 20150531
    # test: 20150601 - 20150630
    split = [11640, 744, 720]
    data, train_data, val_data, test_data = load_npy_data(
        filename=[args.folder_name + 'nyc_taxi_data.npy'], split=split)
    # data: [num, station_num, 2]
    print(data.shape)
    #
    if 'GCN' in args.model or 'FC' in args.model:
        dataloader = DataLoader_graph
    else:
        data = np.reshape(data, (-1, 20, 10, 2))
        train_data = np.reshape(train_data, (-1, 20, 10, 2))
        val_data = np.reshape(val_data, (-1, 20, 10, 2))
        test_data = np.reshape(test_data, (-1, 20, 10, 2))
        # data: [num, height, width, 2]
        print(data.shape)
        #
        dataloader = DataLoader_map
    #
    map_size = data.shape[1:-1]
    input_dim = data.shape[-1]
    num_station = np.prod(data.shape[1:-1])
    #
    f_data, train_f_data, val_f_data, test_f_data = load_npy_data(
        [args.folder_name + 'nyc_taxi_flow_in.npy'], split=split)
    print(len(f_data))
    print('preprocess train/val/test flow data...')
    #f_preprocessing = StandardScaler()
    f_preprocessing = MinMaxNormalization01()
    f_preprocessing.fit(train_f_data)
    train_f_data = f_preprocessing.transform(train_f_data)
    val_f_data = f_preprocessing.transform(val_f_data)
    test_f_data = f_preprocessing.transform(test_f_data)
    print('preprocess train/val/test data...')
    # pre_process = StandardScaler()
    pre_process = MinMaxNormalization01()
    pre_process.fit(train_data)
    train_data = pre_process.transform(train_data)
    val_data = pre_process.transform(val_data)
    test_data = pre_process.transform(test_data)
    #

    print('number of station: %d' % num_station)
    #
    train_loader = dataloader(train_data,
                              train_f_data,
                              args.input_steps,
                              flow_format='identity')
    val_loader = dataloader(val_data,
                            val_f_data,
                            args.input_steps,
                            flow_format='identity')
    test_loader = dataloader(test_data,
                             test_f_data,
                             args.input_steps,
                             flow_format='identity')
    # f_adj_mx = None
    if os.path.isfile(args.folder_name + 'f_adj_mx.npy'):
        f_adj_mx = np.load(args.folder_name + 'f_adj_mx.npy')
    else:
        f_adj_mx = train_loader.get_flow_adj_mx()
        np.save(args.folder_name + 'f_adj_mx.npy', f_adj_mx)

    if args.model == 'FC_LSTM':
        model = FC_LSTM(num_station,
                        args.input_steps,
                        num_layers=args.num_layers,
                        num_units=args.num_units,
                        batch_size=args.batch_size)
    if args.model == 'FC_GRU':
        model = FC_GRU(num_station,
                       args.input_steps,
                       num_layers=args.num_layers,
                       num_units=args.num_units,
                       batch_size=args.batch_size)
    if args.model == 'GCN':
        model = GCN(num_station,
                    args.input_steps,
                    num_layers=args.num_layers,
                    num_units=args.num_units,
                    dy_adj=args.dy_adj,
                    dy_filter=args.dy_filter,
                    f_adj_mx=f_adj_mx,
                    batch_size=args.batch_size)
    if args.model == 'ConvGRU':
        model = ConvGRU(input_shape=[map_size[0], map_size[1], input_dim],
                        input_steps=args.input_steps,
                        num_layers=args.num_layers,
                        num_units=args.num_units,
                        kernel_shape=[args.kernel_size, args.kernel_size],
                        batch_size=args.batch_size)
    if args.model == 'ConvLSTM':
        model = ConvLSTM(input_shape=[map_size[0], map_size[1], input_dim],
                         input_steps=args.input_steps,
                         num_layers=args.num_layers,
                         num_units=args.num_units,
                         kernel_shape=[args.kernel_size, args.kernel_size],
                         batch_size=args.batch_size)
    # if args.model == 'flow_ConvGRU':
    #     model = flow_ConvGRU(input_shape=[20, 10, input_dim], input_steps=args.input_steps,
    #                           num_layers=args.num_layers, num_units=args.num_units,kernel_shape=[args.kernel_size, args.kernel_size],
    #                           f_adj_mx=f_adj_mx,
    #                           batch_size=args.batch_size)
    if args.model == 'Coupled_ConvGRU':
        model = CoupledConvGRU(
            input_shape=[20, 10, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            batch_size=args.batch_size)
    ##
    # flow_ConvGRU_2 is stack_ConvGRU with 2 layers.
    if args.model == 'flow_ConvGRU_2':
        model = flow_ConvGRU_2(
            input_shape=[20, 10, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            f_adj_mx=f_adj_mx,
            batch_size=args.batch_size)
    if args.model == 'Stack_ConvGRU':
        model = Stack_ConvGRU(
            input_shape=[20, 10, input_dim],
            input_steps=args.input_steps,
            num_layers=args.num_layers,
            num_units=args.num_units,
            kernel_shape=[args.kernel_size, args.kernel_size],
            f_adj_mx=f_adj_mx,
            batch_size=args.batch_size)
    #
    model_path = os.path.join(args.output_folder_name, 'model_save',
                              args.model_save)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #model_path = os.path.join(args.folder_name, 'model_save', args.model_save)
    solver = ModelSolver(
        model,
        train_loader,
        val_loader,
        test_loader,
        pre_process,
        batch_size=args.batch_size,
        show_batches=args.show_batches,
        n_epochs=args.n_epochs,
        pretrained_model=args.pretrained_model_path,
        update_rule=args.update_rule,
        learning_rate=args.learning_rate,
        model_path=model_path,
    )
    results_path = os.path.join(model_path, 'results')
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if args.train:
        print('==================== begin training ======================')
        test_target, test_prediction = solver.train(
            os.path.join(model_path, 'out'))
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
    if args.test:
        print('==================== begin test ==========================')
        test_target, test_prediction = solver.test()
        np.save(os.path.join(results_path, 'test_target.npy'), test_target)
        np.save(os.path.join(results_path, 'test_prediction.npy'),
                test_prediction)
Пример #5
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    # preprocessing class
    pre_process = MinMaxNormalization01()
    print('load train, validate, test data...')
    split = [17520, 4416, 4368]
    data, train_data, val_data, test_data = load_npy_data(
        filename=['data/citybike/p_map.npy', 'data/citybike/d_map.npy'],
        split=split)
    # data: [num, row, col, channel]
    print('preprocess train data...')
    pre_process.fit(train_data)

    if 'ResNet' in FLAGS.model:
        pre_index = max(FLAGS.closeness * 1, FLAGS.period * 7,
                        FLAGS.trend * 7 * 24)
        all_timestamps = gen_timestamps(['2013', '2014', '2015', '2016'])
        all_timestamps = all_timestamps[4344:-4416]
        data = pre_process.transform(data)
        # train_data = train_data
        train_data = data[:split[0]]
        val_data = data[split[0] - pre_index:split[0] + split[1]]
        test_data = data[split[0] + split[1] - pre_index:split[0] + split[1] +
                         split[2]]
        # get train, validate, test timestamps
        train_timestamps = all_timestamps[:split[0]]
        val_timestamps = all_timestamps[split[0] - pre_index:split[0] +
                                        split[1]]
        test_timestamps = all_timestamps[split[0] + split[1] -
                                         pre_index:split[0] + split[1] +
                                         split[2]]
        # get x, y
        train_x, train_y = batch_data_cpt_ext(train_data,
                                              train_timestamps,
                                              batch_size=FLAGS.batch_size,
                                              close=FLAGS.closeness,
                                              period=FLAGS.period,
                                              trend=FLAGS.trend)
        val_x, val_y = batch_data_cpt_ext(val_data,
                                          val_timestamps,
                                          batch_size=FLAGS.batch_size,
                                          close=FLAGS.closeness,
                                          period=FLAGS.period,
                                          trend=FLAGS.trend)
        test_x, test_y = batch_data_cpt_ext(test_data,
                                            test_timestamps,
                                            batch_size=FLAGS.batch_size,
                                            close=FLAGS.closeness,
                                            period=FLAGS.period,
                                            trend=FLAGS.trend)
        train = {'x': train_x, 'y': train_y}
        val = {'x': val_x, 'y': val_y}
        test = {'x': test_x, 'y': test_y}
        nb_flow = train_data.shape[-1]
        row = train_data.shape[1]
        col = train_data.shape[2]
        if FLAGS.model == 'AttResNet':
            print('k-means to cluster...')
            model_path = 'citybike-results/model_save/AttResNet/'
            log_path = 'citybike-results/log/AttResNet/'
            if FLAGS.pre_saved_cluster:
                cluster_centroid = np.load(model_path + 'cluster_centroid.npy')
            else:
                vector_data = np.reshape(train_data, (train_data.shape[0], -1))
                # init_vectors = vector_data[:FLAGS.cluster_num, :]
                # cluster_centroid = init_vectors
                kmeans = KMeans(n_clusters=FLAGS.cluster_num,
                                init='random',
                                n_init=FLAGS.kmeans_run_num,
                                tol=0.00000001).fit(vector_data)
                cluster_centroid = kmeans.cluster_centers_
                # reshape to [cluster_num, row, col, channel]
                cluster_centroid = np.reshape(
                    cluster_centroid,
                    (-1, train_data.shape[1], train_data.shape[2],
                     train_data.shape[3]))
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                if not os.path.exists(log_path):
                    os.makedirs(log_path)
                np.save(model_path + 'cluster_centroid.npy', cluster_centroid)
            print('build AttResNet model...')
            model = AttResNet(input_conf=[[FLAGS.closeness, nb_flow, row, col],
                                          [FLAGS.period, nb_flow, row, col],
                                          [FLAGS.trend, nb_flow, row, col],
                                          [8]],
                              att_inputs=cluster_centroid,
                              att_nodes=FLAGS.att_nodes,
                              att_layer=['conv', 'conv'],
                              att_layer_param=[[[3, 3], [1, 1, 1, 1], 8],
                                               [[3, 3], [1, 1, 1, 1], 2]],
                              batch_size=FLAGS.batch_size,
                              layer=['conv', 'res_net', 'conv'],
                              layer_param=[[[3, 3], [1, 1, 1, 1], 64],
                                           [
                                               3,
                                               [[[3, 3], [1, 1, 1, 1], 64],
                                                [[3, 3], [1, 1, 1, 1], 64]]
                                           ], [[3, 3], [1, 1, 1, 1], 2]])
        else:
            print('build ResNet model...')
            model_path = 'citybike-results/model_save/ResNet/'
            log_path = 'citybike-results/log/ResNet/'
            model = ResNet(input_conf=[[FLAGS.closeness, nb_flow, row, col],
                                       [FLAGS.period, nb_flow, row, col],
                                       [FLAGS.trend, nb_flow, row, col], [8]],
                           batch_size=FLAGS.batch_size,
                           layer=['conv', 'res_net', 'conv'],
                           layer_param=[[[3, 3], [1, 1, 1, 1], 64],
                                        [
                                            3,
                                            [[[3, 3], [1, 1, 1, 1], 64],
                                             [[3, 3], [1, 1, 1, 1], 64]]
                                        ], [[3, 3], [1, 1, 1, 1], 2]])
        print('model solver...')
        solver = ModelSolver(
            model,
            train,
            val,
            preprocessing=pre_process,
            n_epochs=FLAGS.n_epochs,
            batch_size=FLAGS.batch_size,
            update_rule=FLAGS.update_rule,
            learning_rate=FLAGS.lr,
            save_every=FLAGS.save_every,
            pretrained_model=FLAGS.pretrained_model,
            model_path=model_path,
            test_model='citybike-results/model_save/ResNet/model-' +
            str(FLAGS.n_epochs),
            log_path=log_path,
            cross_val=False,
            cpt_ext=True)
        if FLAGS.train:
            print('begin training...')
            test_n = {'data': test_data, 'timestamps': test_timestamps}
            _, test_prediction = solver.train(test,
                                              test_n,
                                              output_steps=FLAGS.output_steps)
            # get test_target and test_prediction
            i = pre_index
            test_target = []
            while i < len(test_data) - FLAGS.output_steps:
                test_target.append(test_data[i:i + FLAGS.output_steps])
                i += 1
            test_target = np.asarray(test_target)
            # np.save('results/ResNet/test_target.npy', test_target)
            # np.save('results/ResNet/test_prediction.npy', test_prediction)
        if FLAGS.test:
            print('begin testing for predicting next 1 step')
            solver.test(test)
            # test 1 to n
            print('begin testing for predicting next ' +
                  str(FLAGS.output_steps) + ' steps')
            test_n = {'data': test_data, 'timestamps': test_timestamps}
            solver.test_1_to_n(test_n)
            #solver.test_1_to_n(test_n, n=FLAGS.output_steps, close=FLAGS.closeness, period=FLAGS.period, trend=FLAGS.trend)
    else:
        train_data = pre_process.transform(train_data)
        train_x, train_y = batch_data(data=train_data,
                                      batch_size=FLAGS.batch_size,
                                      input_steps=FLAGS.input_steps,
                                      output_steps=FLAGS.output_steps)
        val_data = pre_process.transform(val_data)
        val_x, val_y = batch_data(data=val_data,
                                  batch_size=FLAGS.batch_size,
                                  input_steps=FLAGS.input_steps,
                                  output_steps=FLAGS.output_steps)
        test_data = pre_process.transform(test_data)
        test_x, test_y = batch_data(data=test_data,
                                    batch_size=FLAGS.batch_size,
                                    input_steps=FLAGS.input_steps,
                                    output_steps=FLAGS.output_steps)
        train = {'x': train_x, 'y': train_y}
        val = {'x': val_x, 'y': val_y}
        test = {'x': test_x, 'y': test_y}
        input_dim = [
            train_data.shape[1], train_data.shape[2], train_data.shape[3]
        ]
        if FLAGS.model == 'ConvLSTM':
            print('build ConvLSTM model...')
            model = ConvLSTM(input_dim=input_dim,
                             batch_size=FLAGS.batch_size,
                             layer={
                                 'encoder':
                                 ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                                 'decoder':
                                 ['conv_lstm', 'conv_lstm', 'conv', 'conv']
                             },
                             layer_param={
                                 'encoder': [[[3, 3], [1, 1, 1, 1], 8],
                                             [[3, 3], [1, 1, 1, 1], 16],
                                             [[16, 16], [3, 3], 64],
                                             [[16, 16], [3, 3], 64]],
                                 'decoder': [[[16, 16], [3, 3], 64],
                                             [[16, 16], [3, 3], 64],
                                             [[3, 3], [1, 1, 1, 1], 8],
                                             [[3, 3], [1, 1, 1, 1], 2]]
                             },
                             input_steps=10,
                             output_steps=10)
            print('model solver...')
            solver = ModelSolver(
                model,
                train,
                val,
                preprocessing=pre_process,
                n_epochs=FLAGS.n_epochs,
                batch_size=FLAGS.batch_size,
                update_rule=FLAGS.update_rule,
                learning_rate=FLAGS.lr,
                save_every=FLAGS.save_every,
                pretrained_model=FLAGS.pretrained_model,
                model_path='citybike-results/model_save/ConvLSTM/',
                test_model='citybike-results/model_save/ConvLSTM/model-' +
                str(FLAGS.n_epochs),
                log_path='citybike-results/log/ConvLSTM/')
        elif 'AttConvLSTM' in FLAGS.model:
            # train_data: [num, row, col, channel]
            if FLAGS.use_ae:
                # auto-encoder to cluster train_data
                print('auto-encoder to cluster...')
                model_path = 'citybike-results/model_save/AEAttConvLSTM/'
                log_path = 'citybike-results/log/AEAttConvLSTM/'
                if FLAGS.pre_saved_cluster:
                    cluster_centroid = np.load(model_path +
                                               'cluster_centroid.npy')
                else:
                    ae = AutoEncoder(input_dim=input_dim,
                                     z_dim=[4, 4, 16],
                                     layer={
                                         'encoder': ['conv', 'conv'],
                                         'decoder': ['conv', 'conv']
                                     },
                                     layer_param={
                                         'encoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                     [[3, 3], [1, 2, 2, 1],
                                                      16]],
                                         'decoder': [[[3, 3], [1, 2, 2, 1], 8],
                                                     [[3, 3], [1, 2, 2, 1], 2]]
                                     },
                                     model_save_path=model_path,
                                     batch_size=FLAGS.batch_size)
                    if FLAGS.ae_train:
                        ae.train(train_data,
                                 batch_size=FLAGS.batch_size,
                                 learning_rate=FLAGS.lr,
                                 n_epochs=20,
                                 pretrained_model=FLAGS.ae_pretrained_model)
                    train_z_data = ae.get_z(
                        train_data, pretrained_model=FLAGS.ae_pretrained_model)
                    print train_z_data.shape
                    # k-means to cluster train_z_data
                    vector_data = np.reshape(train_z_data,
                                             (train_z_data.shape[0], -1))
                    # save vector data to visualize
                    np.save(model_path + 'vector_data.npy', vector_data)
                    kmeans = KMeans(n_clusters=FLAGS.cluster_num,
                                    init='random',
                                    n_init=FLAGS.kmeans_run_num,
                                    tol=0.00000001).fit(vector_data)
                    cluster_centroid = kmeans.cluster_centers_
                    print np.array(cluster_centroid).shape
                    # reshape to [cluster_num, row, col, channel]
                    cluster_centroid = np.reshape(
                        cluster_centroid,
                        (-1, train_z_data.shape[1], train_z_data.shape[2],
                         train_z_data.shape[3]))
                    # decoder to original space
                    cluster_centroid = ae.get_y(
                        cluster_centroid,
                        pretrained_model=FLAGS.ae_pretrained_model)
                    print cluster_centroid.shape
                    np.save(model_path + 'cluster_centroid.npy',
                            cluster_centroid)
            else:
                # k-means to cluster train_data
                print('k-means to cluster...')
                model_path = 'citybike-results/model_save/' + FLAGS.model + '/'
                log_path = 'citybike-results/log/' + FLAGS.model + '/'
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                if not os.path.exists(log_path):
                    os.makedirs(log_path)
                if FLAGS.pre_saved_cluster:
                    cluster_centroid = np.load(model_path +
                                               'cluster_centroid.npy')
                else:
                    vector_data = np.reshape(train_data,
                                             (train_data.shape[0], -1))
                    #init_vectors = vector_data[:FLAGS.cluster_num, :]
                    #cluster_centroid = init_vectors
                    kmeans = KMeans(n_clusters=FLAGS.cluster_num,
                                    init='random',
                                    n_init=FLAGS.kmeans_run_num,
                                    tol=0.00000001).fit(vector_data)
                    cluster_centroid = kmeans.cluster_centers_
                    # reshape to [cluster_num, row, col, channel]
                    cluster_centroid = np.reshape(
                        cluster_centroid,
                        (-1, train_data.shape[1], train_data.shape[2],
                         train_data.shape[3]))
                    np.save(model_path + 'cluster_centroid.npy',
                            cluster_centroid)
            # build model
            print 'build ' + FLAGS.model + ' model...'
            if FLAGS.model == 'AttConvLSTM':
                model = AttConvLSTM(
                    input_dim=input_dim,
                    att_inputs=cluster_centroid,
                    att_nodes=FLAGS.att_nodes,
                    batch_size=FLAGS.batch_size,
                    layer={
                        'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                        'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                        'attention': ['conv', 'conv']
                    },
                    layer_param={
                        'encoder': [[[3, 3], [1, 1, 1, 1], 8],
                                    [[3, 3], [1, 1, 1, 1], 16],
                                    [[16, 16], [3, 3], 64],
                                    [[16, 16], [3, 3], 64]],
                        'decoder': [[[16, 16], [3, 3], 64],
                                    [[16, 16], [3, 3], 64],
                                    [[3, 3], [1, 1, 1, 1], 8],
                                    [[3, 3], [1, 1, 1, 1], 2]],
                        'attention': [[[3, 3], [1, 1, 1, 1], 8],
                                      [[3, 3], [1, 1, 1, 1], 16]]
                    },
                    input_steps=10,
                    output_steps=10)
            elif FLAGS.model == 'MultiAttConvLSTM':
                model = MultiAttConvLSTM(
                    input_dim=input_dim,
                    att_inputs=cluster_centroid,
                    att_nodes=FLAGS.att_nodes,
                    batch_size=FLAGS.batch_size,
                    layer={
                        'encoder': ['conv', 'conv', 'conv_lstm', 'conv_lstm'],
                        'decoder': ['conv_lstm', 'conv_lstm', 'conv', 'conv'],
                        'attention': ['conv', 'conv']
                    },
                    layer_param={
                        'encoder': [[[3, 3], [1, 1, 1, 1], 8],
                                    [[3, 3], [1, 1, 1, 1], 16],
                                    [[16, 16], [3, 3], 64],
                                    [[16, 16], [3, 3], 64]],
                        'decoder': [[[16, 16], [3, 3], 64],
                                    [[16, 16], [3, 3], 64],
                                    [[3, 3], [1, 1, 1, 1], 8],
                                    [[3, 3], [1, 1, 1, 1], 2]],
                        'attention': [[[3, 3], [1, 1, 1, 1], 8],
                                      [[3, 3], [1, 1, 1, 1], 16]]
                    },
                    input_steps=10,
                    output_steps=10)
            print('model solver...')
            solver = ModelSolver(model,
                                 train,
                                 val,
                                 preprocessing=pre_process,
                                 n_epochs=FLAGS.n_epochs,
                                 batch_size=FLAGS.batch_size,
                                 update_rule=FLAGS.update_rule,
                                 learning_rate=FLAGS.lr,
                                 save_every=FLAGS.save_every,
                                 pretrained_model=FLAGS.pretrained_model,
                                 model_path=model_path,
                                 test_model=model_path + 'model-' +
                                 str(FLAGS.n_epochs),
                                 log_path=log_path)
        if FLAGS.train:
            print('begin training...')
            test_prediction, _ = solver.train(test)
            test_target = np.asarray(test_y)
        if FLAGS.test:
            print('test trained model...')
            solver.test_model = solver.model_path + FLAGS.pretrained_model
            test_prediction = solver.test(test)
            test_target = np.asarray(test_y)
    np.save('citybike-results/results/' + FLAGS.model + '/test_target.npy',
            test_target)
    np.save('citybike-results/results/' + FLAGS.model + '/test_prediction.npy',
            test_prediction)