示例#1
0
def process_latents_vae():
    df_20 = pd.read_feather(data_path / 'df_20.feather')
    df_20 = df_20.set_index('id')
    '''
    Processes and saves latents
    '''
    table = df_20
    seeds = list(range(10))
    latent_dims = [6]
    X_raw, distances, labels = generate_theme(table,
                                              'all_towns',
                                              bandwise=True,
                                              max_dist=800)
    X_trans = StandardScaler().fit_transform(X_raw)
    # prepare the splits
    n_d = len(distances)
    epochs = 5
    batch = 256
    split_input_dims = (int(2 * n_d), int(9 * n_d), int(2 * n_d))
    split_latent_dims = (4, 6, 2)
    split_hidden_layer_dims = ([24, 24, 24], [32, 32, 32], [8, 8, 8])
    betas = [0, 1, 2, 4, 8, 16, 32, 64]
    theme_base = f'VAE_e{epochs}'
    # iterate
    for latent_dim in latent_dims:
        for beta in betas:
            caps = [0, 4, 8, 12, 16, 20]
            if beta == 0:
                caps = [0]
            for cap in caps:
                for seed in seeds:
                    print(f'...collecting data from model seed {seed}')
                    vae = sig_models.SplitVAE(
                        raw_dim=X_trans.shape[1],
                        latent_dim=latent_dim,
                        beta=beta,
                        capacity=cap,
                        epochs=epochs,
                        split_input_dims=split_input_dims,
                        split_latent_dims=split_latent_dims,
                        split_hidden_layer_dims=split_hidden_layer_dims,
                        theme_base=theme_base,
                        seed=seed,
                        name='VAE')
                    print(vae.theme)
                    dir_path = pathlib.Path(
                        weights_path /
                        f'signatures_weights/seed_{seed}/{vae.theme}_epochs_{epochs}_batch_{batch}_train'
                    )
                    # visualise
                    vae.load_weights(str(dir_path / 'weights'))
                    print('preparing latents...')
                    # save the latents
                    Z_mu, Z_log_var, Z = vae.encode(X_trans, training=False)
                    # paths
                    p = str(weights_path /
                            f'signatures_weights/data/model_{vae.theme}')
                    np.save(p + '_latent', Z)
                    np.save(p + '_z_mu', Z_mu)
                    np.save(p + '_z_log_var', Z_log_var)
示例#2
0
def load_data(selected_data, max_dist=None):
    '''
    Map boundary level targets to individual points
    '''
    # load data and prep new town band data
    # the feather dataset only contains towns 19 through to 650 to save on space
    # i.e. smallest through largest new town like locations
    df_20 = pd.read_feather(data_path / 'df_20_all.feather')
    df_20 = df_20.set_index('id')
    X_raw, distances, labels = generate_theme(df_20,
                                              'all_towns',
                                              bandwise=True,
                                              add_city_pop_id=True,
                                              max_dist=max_dist)
    # create targets column on df_20
    X_raw['targets'] = 0
    # map from boundaries
    for pop_idx, row in selected_data.iterrows():
        X_raw.loc[X_raw.city_pop_id == pop_idx, 'targets'] = row.targets
    # generate the standardised data, but exclude city_pop_id
    X_clean = X_raw.drop(['city_pop_id', 'targets'], axis=1)
    # also drop city_pop_id from labels
    labels = labels[1:]
    return df_20, X_raw, X_clean, distances, labels
示例#3
0
'''

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

from src import util_funcs
from src.explore.theme_setup import data_path
from src.explore.theme_setup import generate_theme

#  %% load from disk
df_full = pd.read_feather(data_path / 'df_full_all.feather')
df_full = df_full.set_index('id')
X_raw, distances, labels = generate_theme(df_full,
                                          'all_towns',
                                          bandwise=False,
                                          add_city_pop_id=True)

#  %%
# db connection params
db_config = {
    'host': 'localhost',
    'port': 5433,
    'user': '******',
    'database': 'gareth',
    'password': ''
}

# load boundaries data
bound_data = util_funcs.load_data_as_pd_df(
    db_config, [
示例#4
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

from src import util_funcs
from src.explore import plot_funcs
from src.explore.signatures import sig_models, sig_model_runners
from src.explore.theme_setup import data_path, logs_path, weights_path
from src.explore.theme_setup import generate_theme

# load data and prep
df_20 = pd.read_feather(data_path / 'df_20.feather')
df_20 = df_20.set_index('id')
X_raw, distances, labels = generate_theme(df_20,
                                          'all_towns',
                                          bandwise=True,
                                          max_dist=800)
X_trans = StandardScaler().fit_transform(X_raw)
test_idx = util_funcs.train_test_idxs(df_20, 200)  # 200 gives about 25%

# setup paramaters
epochs = 5
batch = 256
theme_base = f'VAE_e{epochs}'
n_d = len(distances)
split_input_dims = (int(2 * n_d), int(9 * n_d), int(2 * n_d))
split_latent_dims = (4, 6, 2)
split_hidden_layer_dims = ([24, 24, 24], [32, 32, 32], [8, 8, 8])
latent_dim = 6
lr = 1e-3
# seed = 0
示例#5
0
# towns vs london
location = 'london'

theme = f'MMML_{location}_hmmz'
title = f'{theme}_iter{iters}_seed{seed}_cs{cap_step}_cj{cap_jitter}_mc{max_cap}_nt{new_threshold}_dd{dwell_density}'

graph_a = step_C1_graphs.york_burb()
graph_b = step_C1_graphs.grid_ville()
graph_c = step_C1_graphs.suburb()

# prepare data for transformer
# required by ML models for mapping data space to prediction data space
df_20 = pd.read_feather(data_path / 'df_20.feather')
df_20 = df_20.set_index('id')
X_raw, distances, labels = generate_theme(df_20, 'pred_sim', bandwise=False)

# load models into a dictionary
epochs = 100
models = {}
transformers = {}
target_distances = [400, 1600, 400]
target_col_templates = [
    'ac_commercial_{dist}', 'ac_manufacturing_{dist}', 'ac_retail_food_{dist}'
]
for col_template, target_dist in zip(target_col_templates, target_distances):
    # prepare model
    target_w_dist = col_template.format(dist=target_dist)
    reg = pred_models.LandUsePredictor(
        theme_base=f'{target_w_dist}_e{epochs}_{location}_{target_dist}')
    ml_model_path = pathlib.Path(weights_path / f'{reg.theme}')
示例#6
0
from tensorflow.keras import backend as K
from tensorflow.keras import losses
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, TerminateOnNaN, \
    ModelCheckpoint

from src import util_funcs
from src.explore import plot_funcs
from src.explore.theme_setup import data_path, logs_path, weights_path
from src.explore.theme_setup import generate_theme

#  %%
# load data and prep
df_20 = pd.read_feather(data_path / 'df_20.feather')
df_20 = df_20.set_index('id')
# generate theme
X_raw, distances, labels = generate_theme(df_20, 'pred_lu', bandwise=True)
# transform X
X_trans_all = StandardScaler().fit_transform(X_raw).astype(np.float32)
# get y
y_all = df_20['ac_eating_400'].values
# test split - use spatial splitting - 300 modulo gives about 11%
xy_test_idx = util_funcs.train_test_idxs(df_20, 300)
X_trans_train = X_trans_all[~xy_test_idx]
X_trans_test = X_trans_all[xy_test_idx]
y_train = y_all[~xy_test_idx]
y_test = y_all[xy_test_idx]
# validation split - 200 modulo gives about 25%
xy_val_idx = util_funcs.train_test_idxs(df_20[~xy_test_idx], 200)
X_trans_val = X_trans_train[
    xy_val_idx]  # do first before repurposing variable name
X_trans_train = X_trans_train[~xy_val_idx]
示例#7
0
from src.explore import plot_funcs
from src.explore.signatures import sig_models, sig_model_runners
from src.explore.theme_setup import data_path, weights_path
from src.explore.theme_setup import generate_theme

#  %%
'''
VaDE
'''
# PREPARE
#  %% load data and prep
df_20 = pd.read_feather(data_path / 'df_20.feather')
df_20 = df_20.set_index('id')
table = df_20
X_raw, distances, labels = generate_theme(table,
                                          'all',
                                          bandwise=True,
                                          max_dist=800)
X_trans = StandardScaler().fit_transform(X_raw)
# setup paramaters
seed = 0
latent_dim = 32
dropout = 0
epochs = 25
n_components = 21
theme_base = f'VaDE'
#
vade = sig_models.VaDE(raw_dim=X_trans.shape[1],
                       latent_dim=latent_dim,
                       n_components=n_components,
                       theme_base=theme_base,
                       seed=seed,