Пример #1
0
        len_val=len_val, preprocess_name=preprocess_name, meta_data=True, datapath=DATAPATH)
    if CACHEDATA:
        cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val, Y_val,
              X_test, Y_test, external_dim, timestamp_train_all,
              timestamp_train, timestamp_val, timestamp_test)

print(external_dim)
print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])

# build model
model = build_model(len_closeness,
                    len_period,
                    len_trend,
                    nb_flow,
                    map_height,
                    map_width,
                    external_dim=external_dim,
                    encoder_blocks=3,
                    filters=[64, 64, 64, 64, 16],
                    kernel_size=3,
                    num_res=2)

## single-step-prediction no TL
nb_epoch = 150
batch_size = 16
hyperparams_name = 'model3_roma32x32'
fname_param = os.path.join('MODEL_ROMA_BERGAMO',
                           '{}.best.h5'.format(hyperparams_name))
model_checkpoint = ModelCheckpoint(fname_param,
                                   monitor='val_rmse',
                                   verbose=0,
def taxibj_evaluation():
    # parameters
    DATAPATH = '../data'
    T = 48  # number of time intervals in one day
    CACHEDATA = True  # cache data or NOT

    len_closeness = 4  # length of closeness dependent sequence
    len_period = 0  # length of peroid dependent sequence
    len_trend = 0  # length of trend dependent sequence

    nb_flow = 2  # there are two types of flows: new-flow and end-flow
    days_test = 4 * 7  # 4 weeks
    len_test = T * days_test
    len_val = 2 * len_test

    map_height, map_width = 32, 32  # grid size

    cache_folder = 'Autoencoder/model3'
    path_cache = os.path.join(DATAPATH, 'CACHE', cache_folder)  # cache path
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    fname = os.path.join(
        path_cache,
        'TaxiBJ_withMeteo_C{}_P{}_T{}.h5'.format(len_closeness, len_period,
                                                 len_trend))
    if os.path.exists(fname) and CACHEDATA:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = read_cache(
            fname, 'preprocessing_bj.pkl')
        print("load %s successfully" % fname)
    else:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = TaxiBJ3d.load_data(
            T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test,
            len_val=len_val, preprocess_name='preprocessing_bj.pkl', meta_data=True, meteorol_data=True, holiday_data=True, datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val,
                  Y_val, X_test, Y_test, external_dim, timestamp_train_all,
                  timestamp_train, timestamp_val, timestamp_test)

    print(external_dim)
    print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])

    # build model
    model = build_model(len_closeness,
                        len_period,
                        len_trend,
                        nb_flow,
                        map_height,
                        map_width,
                        external_dim=external_dim,
                        encoder_blocks=3,
                        filters=[64, 64, 64, 64, 16],
                        kernel_size=3,
                        num_res=2)

    model_fname = 'model3resunit_doppia_attention.TaxiBJ9.c4.p0.t0.encoderblocks_3.kernel_size_3.lr_0.0007.batchsize_16.best.h5'
    model.load_weights(os.path.join('../best_models', 'model3', model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model,
                                     X_test,
                                     Y_test,
                                     mmn,
                                     len_closeness,
                                     step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'taxibj_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)
def train_model(batch_size,
                encoder_block,
                filters,
                save_results=False,
                i='',
                freeze=True,
                spatial=False):
    # build model
    model = build_model(len_closeness,
                        len_period,
                        len_trend,
                        nb_flow,
                        map_height,
                        map_width,
                        external_dim=external_dim,
                        encoder_blocks=encoder_block,
                        filters=filters,
                        kernel_size=3,
                        num_res=2)
    if encoder_block == 3:
        if freeze:
            #load weight
            model_fname = 'model3resunit_doppia_attention.TaxiBJ1.c4.p0.t0.encoderblocks_3.kernel_size_3.lr_0.0007.batchsize_16.noMeteo.best.h5'
            model.load_weights(
                os.path.join('../best_models', 'model3', model_fname))
            if not spatial:
                #freeze all layers except attention
                for layer in model.layers[:-28]:
                    layer.trainable = False
                hyperparams_name = 'Roma_32x32_iterazione{}_trained_attention_accuracy'.format(
                    i)
            else:
                #freeze all layers except attention
                for layer in model.layers[:-13]:
                    layer.trainable = False
                hyperparams_name = 'Roma_32x32_iterazione{}_trained_only_spatial_accuracy'.format(
                    i)
        else:
            hyperparams_name = 'Roma_32x32_iterazione{}_trained_random_weight_accuracy'.format(
                i)
    else:
        if freeze:
            # load weight
            model_fname = 'model3resunit_doppia_attention.TaxiNYC5.c4.p0.t0.encoderblocks_2.kernel_size_3.lr_0.00086.batchsize_48.best.h5'
            model.load_weights(
                os.path.join('../best_models', 'model3', model_fname))
            if not spatial:
                # freeze all layers except attention
                for layer in model.layers[:-28]:
                    layer.trainable = False
                hyperparams_name = 'Roma_16x8_iterazione{}_trained_attention_accuracy'.format(
                    i)
            else:
                # freeze all layers except attention
                for layer in model.layers[:-13]:
                    layer.trainable = False
                hyperparams_name = 'Roma_16x8_iterazione{}_trained_only_attention_accuracy'.format(
                    i)
        else:
            hyperparams_name = 'Roma_16x8_iterazione{}_trained_random_weight_accuracy'.format(
                i)

    fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))
    model_checkpoint = ModelCheckpoint(fname_param,
                                       monitor='val_rmse',
                                       verbose=0,
                                       save_best_only=True,
                                       mode='min')

    # train model
    print("training model...")
    ts = time.time()
    print(f'Iteration {i}')
    np.random.seed(i * 18)
    tf.random.set_seed(i * 18)
    history = model.fit(X_train_all,
                        Y_train_all,
                        epochs=nb_epoch,
                        batch_size=batch_size,
                        validation_data=(X_test, Y_test),
                        callbacks=[model_checkpoint],
                        verbose=0)
    print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))
    tempo = time.time() - ts

    # evaluate
    model.load_weights(fname_param)
    score = model.evaluate(X_test, Y_test, batch_size=128, verbose=0)
    print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))

    if (save_results):
        print(
            'evaluating using the model that has the best loss on the valid set'
        )
        model.load_weights(
            fname_param)  # load best weights for current iteration

        Y_pred = model.predict(X_test)  # compute predictions
        score = evaluate(Y_test, Y_pred, mmn,
                         rmse_factor=1)  # evaluate performance

        # save h5 file to generate map
        save_map(Y_pred, i, freeze, spatial)

        # save to csv
        if freeze:
            if not spatial:
                csv_name = os.path.join(
                    'results',
                    f'Roma_{map_height}x{map_width}_trained_attention_results.csv'
                )
            else:
                csv_name = os.path.join(
                    'results',
                    f'Roma_{map_height}x{map_width}_trained_only_spatial_results.csv'
                )
        else:
            csv_name = os.path.join(
                'results',
                f'Roma_{map_height}x{map_width}_trained_random_weight_results.csv'
            )
        if not os.path.isfile(csv_name):
            if os.path.isdir('results') is False:
                os.mkdir('results')
            with open(csv_name, 'a', encoding="utf-8") as file:
                file.write('iteration,'
                           'rsme_in,rsme_out,rsme_tot,'
                           'mape_in,mape_out,mape_tot,'
                           'ape_in,ape_out,ape_tot,'
                           'tempo_esecuzione')
                file.write("\n")
                file.close()
        with open(csv_name, 'a', encoding="utf-8") as file:
            file.write(
                f'{i},{score[0]},{score[1]},{score[2]},{score[3]},'
                f'{score[4]},{score[5]},{score[6]},{score[7]},{score[8]},'
                f'{tempo}')
            file.write("\n")
            file.close()
        K.clear_session()
def bikenyc_evaluation():
    DATAPATH = '../data'
    T = 24  # number of time intervals in one day
    CACHEDATA = True  # cache data or NOT

    len_closeness = 4  # length of closeness dependent sequence
    len_period = 0  # length of peroid dependent sequence
    len_trend = 0  # length of trend dependent sequence

    nb_flow = 2
    days_test = 10
    len_test = T * days_test
    len_val = 2 * len_test

    map_height, map_width = 16, 8

    cache_folder = 'Autoencoder/model3'
    path_cache = os.path.join(DATAPATH, 'CACHE', cache_folder)
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    fname = os.path.join(
        path_cache, 'BikeNYC_C{}_P{}_T{}.h5'.format(len_closeness, len_period,
                                                    len_trend))
    if os.path.exists(fname) and CACHEDATA:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = read_cache(
            fname, 'preprocessing_bikenyc.pkl')
        print("load %s successfully" % fname)
    else:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = BikeNYC3d.load_data(
            T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend,
            len_test=len_test,
            len_val=len_val, preprocess_name='preprocessing_bikenyc.pkl', meta_data=True, datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val,
                  Y_val, X_test, Y_test, external_dim, timestamp_train_all,
                  timestamp_train, timestamp_val, timestamp_test)

    # build model
    model = build_model(len_closeness,
                        len_period,
                        len_trend,
                        nb_flow,
                        map_height,
                        map_width,
                        external_dim=external_dim,
                        encoder_blocks=2,
                        filters=[64, 64, 64, 16],
                        kernel_size=3,
                        num_res=2)

    model_fname = 'model3resunit_doppia_attention.BikeNYC6.c4.p0.t0.encoderblocks_2.kernel_size_3.lr_0.0001.batchsize_16.best2.h5'
    model.load_weights(os.path.join('../best_models', 'model3', model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model,
                                     X_test,
                                     Y_test,
                                     mmn,
                                     len_closeness,
                                     step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'bikenyc_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)