示例#1
0
def run_model():
    # Loads/generate data
    dataset = load_lorenz()
    params = set_parameters(dataset)
    print(params)
    print 'Dimensionality of the observations: ', dataset['dim_observations']
    print 'Data type of features:', dataset['data_type']
    for dtype in ['train','valid','test']:
        print 'dtype: ',dtype, ' type(dataset[dtype]): ', type(dataset[dtype])
        print [(k,type(dataset[dtype][k]), dataset[dtype][k].shape) for k in dataset[dtype]]
        print '--------\n'

    # Specify the file where `params` corresponding for this choice of model and data will be saved
    pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl'

    print 'Checkpoint prefix: ', pfile
    dmm = DMM(params, paramFile=pfile)

    # savef specifies the prefix for the checkpoints - we'll use the same save directory as before
    savef = os.path.join(params['savedir'],params['unique_id'])
    savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0,
                               epoch_end=params['epochs'],
                               batch_size=200,
                               savefreq=params['savefreq'],
                               savefile=savef,
                               dataset_eval=dataset['valid'],
                               shuffle=True)
示例#2
0
def sample_from_model():
    # Sampling from the model
    DIR = './chkpt/lorenz/'
    prefix = 'DMM_lr-0_0008-dh-40-ds-'+str(DIM_STOCHASTIC)+'-nl-relu-bs-200-ep-1000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid'
    pfile = os.path.join(DIR, prefix + '-config.pkl')
    params = readPickle(pfile, quiet=True)[0]
    EP = '-EP990'
    reloadFile = os.path.join(DIR, prefix + EP + '-params.npz')
    print 'Model parameters in: ', reloadFile
    params['validate_only'] = True
    dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile)

    # (mu, logcov): parameters of emission distributions
    # z_vec = sample in latent space
    (mu, logcov), zvec = DMM_evaluate.sample(dmm_reloaded, T=40, nsamples=10)

    print("mu.shape=" + str(mu.shape))
    print("zvec.shape=" + str(mu.shape))

    visualize_data(mu, n_samples=10)
    plt.title("Mean trajectories")


    fig, axlist_x = plt.subplots(3, 1, figsize=(8, 10))
    nsamples = 10
    T = zvec.shape[1]
    SNUM = range(nsamples)
    for idx, ax in enumerate(axlist_x.ravel()):
         z = zvec[SNUM, :, idx]
         ax.plot(np.arange(T), np.transpose(z), '-*', label='Dim' + str(idx))
         ax.legend()
    ax.set_xlabel('Time')
    plt.suptitle('3 dimensional samples of latent space')
    plt.show()
示例#3
0
def reload_dmm(prefix, dmm_dir='./dmm_models/', ep='-EP100'):
    pfile = os.path.join(dmm_dir, prefix + '-config.pkl')
    print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile)
    params = readPickle(pfile, quiet=True)[0]

    reload_file = os.path.join(dmm_dir, prefix + ep + '-params.npz')
    print 'Model parameters in: ', reload_file

    # Don't load the training functions for the model since its time consuming
    params['validate_only'] = True
    dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reload_file)
    return dmm_reloaded
示例#4
0
def reload_model(epoch):
    DIR = './chkpt/lorenz_coupled/'
    pfile = DIR + 'DMM_lr-0_0008-dh-40-ds-9-nl-relu-bs-100-ep-10000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-config.pkl'

    # The hyperparameters are saved in a pickle file - lets load them here
    hparam = readPickle(pfile, quiet=True)[0]
    reloadFile = DIR + 'DMM_lr-0_0008-dh-40-ds-9-nl-relu-bs-100-ep-10000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP' + str(epoch) + '-params.npz'

    # Don't load the training functions for the model since its time consuming
    hparam['validate_only'] = True
    dmm = DMM(hparam, paramFile=pfile, reloadFile=reloadFile)
    return dmm
示例#5
0
def get_latent_space(dmm, x, id, dim_latent):
    z_q, _, _ = DMM._q_z_x(dmm, x)
    z_q_arr = z_q.eval()
    temp = np.array([
        np.hstack((
            z_q_arr[i, :, :],
            np.ones((z_q_arr.shape[1], 1)) * id[i],
            np.reshape(range(1, z_q_arr.shape[1] + 1), (z_q_arr.shape[1], 1)),
        )) for i in range(len(id))
    ])
    arr = np.concatenate(temp, axis=0)
    return pandas.DataFrame(
        data=arr,
        columns=np.concatenate(
            (['dmm{}'.format(i)
              for i in range(int(dim_latent))], ['TRR_ID', 't_elapsed'])))
示例#6
0
def run_model(dataset, params):
    set_extra_parameters(params=params, dataset=dataset)
    for key, value in params.items():
        print(key, value)

    # Specify the file where `params` corresponding for this choice of model and data will be saved
    pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl'

    print 'Checkpoint prefix: ', pfile
    dmm = DMM(params, paramFile=pfile)

    # savef specifies the prefix for the checkpoints - we'll use the same save directory as before
    savef = os.path.join(params['savedir'],params['unique_id'])
    savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0,
                               epoch_end=params['epochs'],
                               batch_size=params['batch_size'],
                               savefreq=params['savefreq'],
                               savefile=savef,
                               dataset_eval=dataset['valid'],
                               shuffle=True)
示例#7
0
params['dim_observations'] = dataset['dim_observations']

#The dataset is small, lets change some of the default parameters and the unique ID
params['dim_stochastic'] = 2
params['dim_hidden']     = 40
params['rnn_size']       = 80
params['epochs']         = 40
params['batch_size']     = 200
params['unique_id'] = params['unique_id'].replace('ds-100','ds-2').replace('dh-200','dh-40').replace('rs-600','rs-80')
params['unique_id'] = params['unique_id'].replace('ep-2000','ep-40').replace('bs-20','bs-200')

#Create a temporary directory to save checkpoints
params['savedir']   = params['savedir']+'synthetic/'
os.system('mkdir -p '+params['savedir'])

#Specify the file where `params` corresponding for this choice of model and data will be saved
pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl'

print 'Checkpoint prefix: ', pfile
dmm  = DMM(params, paramFile = pfile)

#savef specifies the prefix for the checkpoints - we'll use the same save directory as before
savef = os.path.join(params['savedir'],params['unique_id'])
savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start =0 ,
                                epoch_end = params['epochs'],
                                batch_size = 200,
                                savefreq   = params['savefreq'],
                                savefile   = savef,
                                dataset_eval=dataset['valid'],
                                shuffle    = True )
示例#8
0
for k in opt_params:
    print k, opt_params[k].shape

import glob, os, sys, time
sys.path.append('../')
from utils.misc import getConfigFile, readPickle, displayTime
start_time = time.time()
from model_th.dmm import DMM
import model_th.learning as DMM_learn
import model_th.evaluate as DMM_evaluate
displayTime('importing DMM', start_time, time.time())

#This is the prefix we will use
DIR = './chkpt-ipython/'
prefix = 'DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid'
pfile = os.path.join(DIR, prefix + '-config.pkl')
print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile)

#The hyperparameters are saved in a pickle file - lets load them here
params = readPickle(pfile, quiet=True)[0]

#Reload the model at Epoch 30
EP = '-EP30'
#File containing model paramters
reloadFile = os.path.join(DIR, prefix + EP + '-params.npz')
print 'Model parameters in: ', reloadFile
#Don't load the training functions for the model since its time consuming
params['validate_only'] = True
dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile)

print 'Model Reloaded: ', type(dmm_reloaded)
示例#9
0
文件: train.py 项目: yzhou26/dmm
import model_th.learning as DMM_learn
import model_th.evaluate as DMM_evaluate
displayTime('import DMM', start_time, time.time())
dmm = None
"""
Reload from savefile or train new model
"""
start_time = time.time()
removeIfExists('./NOSUCHFILE')
reloadFile = params.pop('reloadFile')
if os.path.exists(reloadFile):
    pfile = params.pop('paramFile')
    assert os.path.exists(pfile), pfile + ' not found. Need paramfile'
    print 'Reloading trained model from : ', reloadFile
    print 'Assuming ', pfile, ' corresponds to model'
    dmm = DMM(params, paramFile=pfile, reloadFile=reloadFile)
else:
    pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl'
    print 'Training model from scratch. Parameters in: ', pfile
    dmm = DMM(params, paramFile=pfile)
displayTime('Building dmm', start_time, time.time())
"""
Savefile where model checkpoints will be saved
"""
savef = os.path.join(params['savedir'], params['unique_id'])
print 'Savefile: ', savef
start_time = time.time()
""" Training loop """
savedata = DMM_learn.learn(dmm,
                           dataset['train'],
                           epoch_start=0,
示例#10
0
def train_dmm(train_data,
              test_data,
              cols,
              dim_latent,
              unique_id,
              time_col='time_since_transplant',
              id_col='TRR_ID'):
    df = pandas.concat([train_data, test_data])

    # Format time so that it can be indexed
    df['time2'] = df[time_col].apply(mod_time)

    # This is different than time since transplant since some people miss albumin, bilirubin,
    # creatinine or acute rej episode at time of transplant
    df['t_elapsed'] = df.groupby(
        id_col, group_keys=False).apply(lambda g: g.time2 - g.time2.min())
    df.t_elapsed = df.t_elapsed.astype(int)

    nontest_ids = train_data[id_col].drop_duplicates()
    train_ids = nontest_ids.sample(frac=0.9)
    train = df[df[id_col].isin(train_ids)]
    val = df[np.logical_not(df[id_col].isin(train_ids))]
    test = df[df[id_col].isin(test_data[id_col])]

    # Reformat to a matrix
    x_train, id_train = df_to_padded(df=train,
                                     column_names=cols,
                                     id_col=id_col,
                                     t_col='t_elapsed')
    x_val, id_val = df_to_padded(df=val,
                                 column_names=cols,
                                 id_col=id_col,
                                 t_col='t_elapsed',
                                 max_seq_len=x_train.shape[1])
    x_test, id_test = df_to_padded(df=test,
                                   column_names=cols,
                                   id_col=id_col,
                                   t_col='t_elapsed',
                                   max_seq_len=x_train.shape[1])
    mask_value = -1.3371337
    x_train_masked = x_train.copy()
    x_train_masked[np.isnan(x_train_masked)] = mask_value
    x_val_masked = x_val.copy()
    x_val_masked[np.isnan(x_val_masked)] = mask_value
    x_test_masked = x_test.copy()
    x_test_masked[np.isnan(x_test_masked)] = mask_value

    dataset = {
        'dim_observations': cols.shape[0],
        'data_type': 'real',
        'train': {
            'tensor': x_train_masked,
            'mask': np.logical_not(np.isnan(x_train[:, :, 0])),
            'id': id_train
        },
        'valid': {
            'tensor': x_val_masked,
            'mask': np.logical_not(np.isnan(x_val[:, :, 0])),
            'id': id_val
        },
        'test': {
            'tensor': x_test_masked,
            'mask': np.logical_not(np.isnan(x_test[:, :, 0])),
            'id': id_test
        },
    }

    max_visits = x_train.shape[1]
    params = {
        'dim_observations': dataset['dim_observations'],
        'data_type': dataset['data_type'],
        'dataset': 'srtr',
        'epochs': 10,
        'seed': 1,
        'init_weight': 0.1,
        'dim_stochastic': dim_latent,
        'expt_name': 'something',
        'reg_value': 0.05,
        'reloadFile': './NOSUCHFILE',
        'reg_spec': '_',
        'dim_hidden': max_visits,
        'lr': 0.0008,
        'reg_type': 'l2',
        'init_scheme': 'uniform',
        'optimizer': 'adam',
        'use_generative_prior': 'approx',
        'maxout_stride': 4,
        'batch_size': 512,
        'savedir': './dmm_models',
        'forget_bias': -5.0,
        'inference_model': 'R',
        'emission_layers': 2,
        'savefreq': 100,
        'rnn_cell': 'lstm',
        'rnn_size': max_visits,
        'paramFile': './NOSUCHFILE',
        'nonlinearity': 'relu',
        'rnn_dropout': 0.1,
        'transition_layers': 2,
        'anneal_rate': 2.0,
        'debug': False,
        'validate_only': False,
        'transition_type': 'mlp',
        'unique_id': unique_id,
        'leaky_param': 0.0
    }

    # Create a temporary directory to save checkpoints
    os.system('mkdir -p ' + params['savedir'])

    # Specify the file where `params` corresponding for this choice of model and data will be saved
    pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl'

    print 'Checkpoint prefix: ', pfile
    dmm = DMM(params, paramFile=pfile)

    savef = os.path.join(params['savedir'], params['unique_id'])
    savedata = DMM_learn.learn(dmm,
                               dataset['train'],
                               epoch_start=0,
                               epoch_end=101,
                               batch_size=params['batch_size'],
                               savefreq=params['savefreq'],
                               savefile=savef,
                               dataset_eval=dataset['valid'],
                               shuffle=True)
    return savedata