def process_latents_vae(): df_20 = pd.read_feather(data_path / 'df_20.feather') df_20 = df_20.set_index('id') ''' Processes and saves latents ''' table = df_20 seeds = list(range(10)) latent_dims = [6] X_raw, distances, labels = generate_theme(table, 'all_towns', bandwise=True, max_dist=800) X_trans = StandardScaler().fit_transform(X_raw) # prepare the splits n_d = len(distances) epochs = 5 batch = 256 split_input_dims = (int(2 * n_d), int(9 * n_d), int(2 * n_d)) split_latent_dims = (4, 6, 2) split_hidden_layer_dims = ([24, 24, 24], [32, 32, 32], [8, 8, 8]) betas = [0, 1, 2, 4, 8, 16, 32, 64] theme_base = f'VAE_e{epochs}' # iterate for latent_dim in latent_dims: for beta in betas: caps = [0, 4, 8, 12, 16, 20] if beta == 0: caps = [0] for cap in caps: for seed in seeds: print(f'...collecting data from model seed {seed}') vae = sig_models.SplitVAE( raw_dim=X_trans.shape[1], latent_dim=latent_dim, beta=beta, capacity=cap, epochs=epochs, split_input_dims=split_input_dims, split_latent_dims=split_latent_dims, split_hidden_layer_dims=split_hidden_layer_dims, theme_base=theme_base, seed=seed, name='VAE') print(vae.theme) dir_path = pathlib.Path( weights_path / f'signatures_weights/seed_{seed}/{vae.theme}_epochs_{epochs}_batch_{batch}_train' ) # visualise vae.load_weights(str(dir_path / 'weights')) print('preparing latents...') # save the latents Z_mu, Z_log_var, Z = vae.encode(X_trans, training=False) # paths p = str(weights_path / f'signatures_weights/data/model_{vae.theme}') np.save(p + '_latent', Z) np.save(p + '_z_mu', Z_mu) np.save(p + '_z_log_var', Z_log_var)
def load_data(selected_data, max_dist=None): ''' Map boundary level targets to individual points ''' # load data and prep new town band data # the feather dataset only contains towns 19 through to 650 to save on space # i.e. smallest through largest new town like locations df_20 = pd.read_feather(data_path / 'df_20_all.feather') df_20 = df_20.set_index('id') X_raw, distances, labels = generate_theme(df_20, 'all_towns', bandwise=True, add_city_pop_id=True, max_dist=max_dist) # create targets column on df_20 X_raw['targets'] = 0 # map from boundaries for pop_idx, row in selected_data.iterrows(): X_raw.loc[X_raw.city_pop_id == pop_idx, 'targets'] = row.targets # generate the standardised data, but exclude city_pop_id X_clean = X_raw.drop(['city_pop_id', 'targets'], axis=1) # also drop city_pop_id from labels labels = labels[1:] return df_20, X_raw, X_clean, distances, labels
''' import matplotlib.pyplot as plt import numpy as np import pandas as pd from scipy import stats from src import util_funcs from src.explore.theme_setup import data_path from src.explore.theme_setup import generate_theme # %% load from disk df_full = pd.read_feather(data_path / 'df_full_all.feather') df_full = df_full.set_index('id') X_raw, distances, labels = generate_theme(df_full, 'all_towns', bandwise=False, add_city_pop_id=True) # %% # db connection params db_config = { 'host': 'localhost', 'port': 5433, 'user': '******', 'database': 'gareth', 'password': '' } # load boundaries data bound_data = util_funcs.load_data_as_pd_df( db_config, [
import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from src import util_funcs from src.explore import plot_funcs from src.explore.signatures import sig_models, sig_model_runners from src.explore.theme_setup import data_path, logs_path, weights_path from src.explore.theme_setup import generate_theme # load data and prep df_20 = pd.read_feather(data_path / 'df_20.feather') df_20 = df_20.set_index('id') X_raw, distances, labels = generate_theme(df_20, 'all_towns', bandwise=True, max_dist=800) X_trans = StandardScaler().fit_transform(X_raw) test_idx = util_funcs.train_test_idxs(df_20, 200) # 200 gives about 25% # setup paramaters epochs = 5 batch = 256 theme_base = f'VAE_e{epochs}' n_d = len(distances) split_input_dims = (int(2 * n_d), int(9 * n_d), int(2 * n_d)) split_latent_dims = (4, 6, 2) split_hidden_layer_dims = ([24, 24, 24], [32, 32, 32], [8, 8, 8]) latent_dim = 6 lr = 1e-3 # seed = 0
# towns vs london location = 'london' theme = f'MMML_{location}_hmmz' title = f'{theme}_iter{iters}_seed{seed}_cs{cap_step}_cj{cap_jitter}_mc{max_cap}_nt{new_threshold}_dd{dwell_density}' graph_a = step_C1_graphs.york_burb() graph_b = step_C1_graphs.grid_ville() graph_c = step_C1_graphs.suburb() # prepare data for transformer # required by ML models for mapping data space to prediction data space df_20 = pd.read_feather(data_path / 'df_20.feather') df_20 = df_20.set_index('id') X_raw, distances, labels = generate_theme(df_20, 'pred_sim', bandwise=False) # load models into a dictionary epochs = 100 models = {} transformers = {} target_distances = [400, 1600, 400] target_col_templates = [ 'ac_commercial_{dist}', 'ac_manufacturing_{dist}', 'ac_retail_food_{dist}' ] for col_template, target_dist in zip(target_col_templates, target_distances): # prepare model target_w_dist = col_template.format(dist=target_dist) reg = pred_models.LandUsePredictor( theme_base=f'{target_w_dist}_e{epochs}_{location}_{target_dist}') ml_model_path = pathlib.Path(weights_path / f'{reg.theme}')
from tensorflow.keras import backend as K from tensorflow.keras import losses from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, TerminateOnNaN, \ ModelCheckpoint from src import util_funcs from src.explore import plot_funcs from src.explore.theme_setup import data_path, logs_path, weights_path from src.explore.theme_setup import generate_theme # %% # load data and prep df_20 = pd.read_feather(data_path / 'df_20.feather') df_20 = df_20.set_index('id') # generate theme X_raw, distances, labels = generate_theme(df_20, 'pred_lu', bandwise=True) # transform X X_trans_all = StandardScaler().fit_transform(X_raw).astype(np.float32) # get y y_all = df_20['ac_eating_400'].values # test split - use spatial splitting - 300 modulo gives about 11% xy_test_idx = util_funcs.train_test_idxs(df_20, 300) X_trans_train = X_trans_all[~xy_test_idx] X_trans_test = X_trans_all[xy_test_idx] y_train = y_all[~xy_test_idx] y_test = y_all[xy_test_idx] # validation split - 200 modulo gives about 25% xy_val_idx = util_funcs.train_test_idxs(df_20[~xy_test_idx], 200) X_trans_val = X_trans_train[ xy_val_idx] # do first before repurposing variable name X_trans_train = X_trans_train[~xy_val_idx]
from src.explore import plot_funcs from src.explore.signatures import sig_models, sig_model_runners from src.explore.theme_setup import data_path, weights_path from src.explore.theme_setup import generate_theme # %% ''' VaDE ''' # PREPARE # %% load data and prep df_20 = pd.read_feather(data_path / 'df_20.feather') df_20 = df_20.set_index('id') table = df_20 X_raw, distances, labels = generate_theme(table, 'all', bandwise=True, max_dist=800) X_trans = StandardScaler().fit_transform(X_raw) # setup paramaters seed = 0 latent_dim = 32 dropout = 0 epochs = 25 n_components = 21 theme_base = f'VaDE' # vade = sig_models.VaDE(raw_dim=X_trans.shape[1], latent_dim=latent_dim, n_components=n_components, theme_base=theme_base, seed=seed,