Exemplo n.º 1
0
def test_cities(cities, repeat=1):
    beijing_only = True

    for city in cities:
        for i in range(repeat):
            train_data, valid_data = load_data3(stations=city2stations[city],
                                                starttime=starttime,
                                                endtime=endtime,
                                                filter=(not beijing_only),
                                                normalize_target=False)
            X_train, y_train, X_valid, y_valid = build_lstm_dataset(
                train_data, valid_data, pred_range=pred_range, hist_len=3)
            print 'X_train[0].shape =', X_train[0].shape
            name = city
            rlstm = build_rlstm(X_train[0].shape[-1],
                                h0_dim=20,
                                h1_dim=20,
                                rec_layer_init='zero',
                                fix_b_f=True,
                                base_name=name,
                                add_input_noise=beijing_only,
                                add_target_noise=False)
            rlstm.name = name + str(i)
            rlstm.data = [train_data, valid_data]

            rlstm.X_mask = np.ones((X_train[0].shape[-1], ), dtype='int')
            rlstm.X_mask[-1:] = not beijing_only  # pm25 mean

            print '\ntraining', rlstm.name
            X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
            rlstm.save_normalization_info(name + '_norm_info.pkl')
            batch_size = (1 + (not beijing_only)) * 64
            #            patience = (1 + int(beijing_only)) * 10
            patience = 10
            train(X_train,
                  y_train,
                  X_valid,
                  y_valid,
                  rlstm,
                  batch_size=batch_size,
                  patience=patience,
                  nb_epoch=300)

    for city in cities:
        for i in range(repeat):
            name = city
            rlstm = model_from_yaml(
                open(model_savedir + name + '.yaml').read())
            rlstm.name = name + str(i)
            rlstm.load_normalization_info(model_savedir + name +
                                          '_norm_info.pkl')
            rlstm.load_weights(model_savedir + rlstm.name + '_weights.hdf5')

            train_data, valid_data = load_data3(stations=city2stations[city],
                                                starttime=starttime,
                                                endtime=endtime)
            rlstm.data = [train_data, valid_data]
            print '\n' + rlstm.name
            test_model(rlstm, dataset='train', show_details=False)
            test_model(rlstm, dataset='valid', show_details=True)
Exemplo n.º 2
0
def train_model(name, is_city=False, latest=True):
    if is_city:
        if name == 'beijing':
            train_data, valid_data, train_data2 = load_data3(stations=city2stations[name],
                                                starttime='20150901', endtime='20160123',
                                                train_stop=630, valid_start=680,
                                                latest=True,
                                                filter=False)
        else:
            train_data, valid_data, train_data2 = load_data3(stations=city2stations[name],
                                                latest=latest,
                                                filter=False)
    else:
        train_data, valid_data, train_data2 = load_data3(lon_range=area2lonlat[name][0], lat_range=area2lonlat[name][1],
                                            latest=latest,
                                            filter=True)
    X_train, y_train, X_valid, y_valid = build_lstm_dataset(train_data, valid_data, pred_range=pred_range, hist_len=3)
    print 'X_train[0].shape =', X_train[0].shape
    rlstm = build_rlstm(X_train[0].shape[-1], h0_dim=20, h1_dim=20, 
                               rec_layer_init='zero', fix_b_f=is_city, base_name=name,
                               add_input_noise=is_city, add_target_noise=False)
    rlstm.name = name + '_valid'
    rlstm.data = [train_data, valid_data]
    
    rlstm.X_mask = np.ones((X_train[0].shape[-1],), dtype='int')
    rlstm.X_mask[-1:] = not is_city  # pm25 mean
    
    print '\ntraining', rlstm.name
    X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
    rlstm.save_normalization_info(model_savedir + rlstm.name + '_norm_info.pkl')
    batch_size = (1 + int(not is_city)) * 64
    patience = 10 + int(is_city) * 10
    train(X_train, y_train, X_valid, y_valid, rlstm, batch_size=batch_size, patience=patience, nb_epoch=300)
    
    result_str = get_train_result(rlstm)
    print 'result_str =', result_str
    with open(model_savedir + name + '.log', 'a') as f:
        f.write(result_str + '\n')
    epoch = estimate_early_stop_epoch(name)
    
    X_train, y_train, X_valid, y_valid = build_lstm_dataset(train_data2, valid_data, pred_range=pred_range, hist_len=3)
    print 'X_train[0].shape =', X_train[0].shape
    rlstm = build_rlstm(X_train[0].shape[-1], h0_dim=20, h1_dim=20, 
                               rec_layer_init='zero', fix_b_f=is_city, base_name=name,
                               add_input_noise=is_city, add_target_noise=False)
    rlstm.name = name
    rlstm.data = [train_data, valid_data]
    
    rlstm.X_mask = np.ones((X_train[0].shape[-1],), dtype='int')
    rlstm.X_mask[-1:] = not is_city  # pm25 mean
    
    print '\ntraining', rlstm.name
    X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
    rlstm.save_normalization_info(model_savedir + rlstm.name + '_norm_info.pkl')
    batch_size = (1 + int(not is_city)) * 64
    patience = 10 + int(is_city) * 10
    train(X_train, y_train, X_valid, y_valid, rlstm, batch_size=batch_size, patience=patience, nb_epoch=epoch)
Exemplo n.º 3
0
def test_areas(areas, repeat=1):
    beijing_only = False

    for area in areas:
        for i in range(repeat):
            train_data, valid_data = load_data3(
                lon_range=area2lonlat[area][0],
                lat_range=area2lonlat[area][1],
                starttime=starttime,
                endtime=endtime,
                filter=(not beijing_only),
            )
            X_train, y_train, X_valid, y_valid = build_lstm_dataset(
                train_data, valid_data, pred_range=pred_range, hist_len=3
            )
            print "X_train[0].shape =", X_train[0].shape
            name = area
            rlstm = build_rlstm(
                X_train[0].shape[-1],
                h0_dim=20,
                h1_dim=20,
                rec_layer_init="zero",
                fix_b_f=False,
                base_name=name,
                add_input_noise=beijing_only,
                add_target_noise=False,
            )
            rlstm.name = name + str(i)
            rlstm.data = [train_data, valid_data]

            rlstm.X_mask = np.ones((X_train[0].shape[-1],), dtype="int")
            rlstm.X_mask[-1:] = not beijing_only  # pm25 mean

            print "\ntraining", rlstm.name
            X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
            rlstm.save_normalization_info(name + "_norm_info.pkl")
            batch_size = (1 + (not beijing_only)) * 64
            #            patience = (1 + int(beijing_only)) * 10
            patience = 10
            train(X_train, y_train, X_valid, y_valid, rlstm, batch_size=batch_size, patience=patience, nb_epoch=300)

    for area in areas:
        for i in range(repeat):
            name = area
            rlstm = model_from_yaml(open(name + ".yaml").read())
            rlstm.name = name + str(i)
            rlstm.load_normalization_info(name + "_norm_info.pkl")
            rlstm.load_weights(rlstm.name + "_weights.hdf5")

            train_data, valid_data = load_data3(
                lon_range=area2lonlat[area][0], lat_range=area2lonlat[area][1], starttime=starttime, endtime=endtime
            )
            rlstm.data = [train_data, valid_data]
            print "\n" + rlstm.name
            test_model(rlstm, dataset="train", show_details=False)
            test_model(rlstm, dataset="valid", show_details=True)
Exemplo n.º 4
0
def test_cities(cities, repeat=1):
    beijing_only = True
    
    for city in cities:
        for i in range(repeat):
            train_data, valid_data = load_data3(stations=city2stations[city], 
                                                starttime=starttime, endtime=endtime,
                                                filter=(not beijing_only), normalize_target=False)
            X_train, y_train, X_valid, y_valid = build_lstm_dataset(train_data, valid_data, pred_range=pred_range, hist_len=3)
            print 'X_train[0].shape =', X_train[0].shape
            name = city
            rlstm = build_rlstm(X_train[0].shape[-1], h0_dim=20, h1_dim=20, 
                                       rec_layer_init='zero', fix_b_f=True, base_name=name,
                                       add_input_noise=beijing_only, add_target_noise=False)
            rlstm.name = name + str(i)
            rlstm.data = [train_data, valid_data]
            
            rlstm.X_mask = np.ones((X_train[0].shape[-1],), dtype='int')
            rlstm.X_mask[-1:] = not beijing_only  # pm25 mean
            
            print '\ntraining', rlstm.name
            X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
            rlstm.save_normalization_info(name + '_norm_info.pkl')
            batch_size = (1 + (not beijing_only)) * 64
#            patience = (1 + int(beijing_only)) * 10
            patience = 10
            train(X_train, y_train, X_valid, y_valid, rlstm, batch_size=batch_size, patience=patience, nb_epoch=300)
      
    for city in cities:
        for i in range(repeat):
            name = city
            rlstm = model_from_yaml(open(model_savedir + name + '.yaml').read())
            rlstm.name = name + str(i)
            rlstm.load_normalization_info(model_savedir + name + '_norm_info.pkl')
            rlstm.load_weights(model_savedir + rlstm.name + '_weights.hdf5')
            
            train_data, valid_data = load_data3(stations=city2stations[city], 
                                                starttime=starttime, endtime=endtime)
            rlstm.data = [train_data, valid_data]
            print '\n' + rlstm.name
            test_model(rlstm, dataset='train', show_details=False)
            test_model(rlstm, dataset='valid', show_details=True)
Exemplo n.º 5
0
def test_area_on_cities(area, i, cities):
    name = area
    rlstm = model_from_yaml(open(name + ".yaml").read())
    rlstm.name = name + str(i)
    rlstm.load_normalization_info(name + "_norm_info.pkl")
    rlstm.load_weights(rlstm.name + "_weights.hdf5")
    print "\n In" + area
    for city in cities:
        train_data, valid_data = load_data3(stations=city2stations[city], starttime=starttime, endtime=endtime)
        rlstm.data = [train_data, valid_data]
        print "\n" + city
        test_model(rlstm, dataset="train", show_details=False)
        test_model(rlstm, dataset="valid", show_details=True)
Exemplo n.º 6
0
def test_area_on_cities(area, i, cities):  
    name = area      
    rlstm = model_from_yaml(open(model_savedir + name + '.yaml').read())
    rlstm.name = name + str(i)
    rlstm.load_normalization_info(model_savedir + name + '_norm_info.pkl')
    rlstm.load_weights(model_savedir + rlstm.name + '_weights.hdf5')
    print '\n In' + area
    for city in cities:
        train_data, valid_data = load_data3(stations=city2stations[city], 
                                            starttime=starttime, endtime=endtime,
                                            )
        rlstm.data = [train_data, valid_data]
        print '\n' + city
        test_model(rlstm, dataset='train', show_details=False)
        test_model(rlstm, dataset='valid', show_details=True)
Exemplo n.º 7
0
def test_area_on_cities(area, i, cities):
    name = area
    rlstm = model_from_yaml(open(model_savedir + name + '.yaml').read())
    rlstm.name = name + str(i)
    rlstm.load_normalization_info(model_savedir + name + '_norm_info.pkl')
    rlstm.load_weights(model_savedir + rlstm.name + '_weights.hdf5')
    print '\n In' + area
    for city in cities:
        train_data, valid_data = load_data3(
            stations=city2stations[city],
            starttime=starttime,
            endtime=endtime,
        )
        rlstm.data = [train_data, valid_data]
        print '\n' + city
        test_model(rlstm, dataset='train', show_details=False)
        test_model(rlstm, dataset='valid', show_details=True)
Exemplo n.º 8
0
def train_model(name, is_city=False, latest=True):
    if is_city:
        if name == 'beijing':
            train_data, valid_data, train_data2 = load_data3(
                stations=city2stations[name],
                starttime='20150901',
                endtime='20160123',
                train_stop=630,
                valid_start=680,
                latest=True,
                filter=False)
        else:
            train_data, valid_data, train_data2 = load_data3(
                stations=city2stations[name], latest=latest, filter=False)
    else:
        train_data, valid_data, train_data2 = load_data3(
            lon_range=area2lonlat[name][0],
            lat_range=area2lonlat[name][1],
            latest=latest,
            filter=True)
    X_train, y_train, X_valid, y_valid = build_lstm_dataset(
        train_data, valid_data, pred_range=pred_range, hist_len=3)
    print 'X_train[0].shape =', X_train[0].shape
    rlstm = build_rlstm(X_train[0].shape[-1],
                        h0_dim=20,
                        h1_dim=20,
                        rec_layer_init='zero',
                        fix_b_f=is_city,
                        base_name=name,
                        add_input_noise=is_city,
                        add_target_noise=False)
    rlstm.name = name + '_valid'
    rlstm.data = [train_data, valid_data]

    rlstm.X_mask = np.ones((X_train[0].shape[-1], ), dtype='int')
    rlstm.X_mask[-1:] = not is_city  # pm25 mean

    print '\ntraining', rlstm.name
    X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
    rlstm.save_normalization_info(model_savedir + rlstm.name +
                                  '_norm_info.pkl')
    batch_size = (1 + int(not is_city)) * 64
    patience = 10 + int(is_city) * 10
    train(X_train,
          y_train,
          X_valid,
          y_valid,
          rlstm,
          batch_size=batch_size,
          patience=patience,
          nb_epoch=300)

    result_str = get_train_result(rlstm)
    print 'result_str =', result_str
    with open(model_savedir + name + '.log', 'a') as f:
        f.write(result_str + '\n')
    epoch = estimate_early_stop_epoch(name)

    X_train, y_train, X_valid, y_valid = build_lstm_dataset(
        train_data2, valid_data, pred_range=pred_range, hist_len=3)
    print 'X_train[0].shape =', X_train[0].shape
    rlstm = build_rlstm(X_train[0].shape[-1],
                        h0_dim=20,
                        h1_dim=20,
                        rec_layer_init='zero',
                        fix_b_f=is_city,
                        base_name=name,
                        add_input_noise=is_city,
                        add_target_noise=False)
    rlstm.name = name
    rlstm.data = [train_data, valid_data]

    rlstm.X_mask = np.ones((X_train[0].shape[-1], ), dtype='int')
    rlstm.X_mask[-1:] = not is_city  # pm25 mean

    print '\ntraining', rlstm.name
    X_train[0], X_valid[0] = normalize(X_train[0], X_valid[0], rlstm)
    rlstm.save_normalization_info(model_savedir + rlstm.name +
                                  '_norm_info.pkl')
    batch_size = (1 + int(not is_city)) * 64
    patience = 10 + int(is_city) * 10
    train(X_train,
          y_train,
          X_valid,
          y_valid,
          rlstm,
          batch_size=batch_size,
          patience=patience,
          nb_epoch=epoch)