def run_model(): # Loads/generate data dataset = load_lorenz() params = set_parameters(dataset) print(params) print 'Dimensionality of the observations: ', dataset['dim_observations'] print 'Data type of features:', dataset['data_type'] for dtype in ['train','valid','test']: print 'dtype: ',dtype, ' type(dataset[dtype]): ', type(dataset[dtype]) print [(k,type(dataset[dtype][k]), dataset[dtype][k].shape) for k in dataset[dtype]] print '--------\n' # Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) # savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=params['epochs'], batch_size=200, savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True)
def sample_from_model(): # Sampling from the model DIR = './chkpt/lorenz/' prefix = 'DMM_lr-0_0008-dh-40-ds-'+str(DIM_STOCHASTIC)+'-nl-relu-bs-200-ep-1000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid' pfile = os.path.join(DIR, prefix + '-config.pkl') params = readPickle(pfile, quiet=True)[0] EP = '-EP990' reloadFile = os.path.join(DIR, prefix + EP + '-params.npz') print 'Model parameters in: ', reloadFile params['validate_only'] = True dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile) # (mu, logcov): parameters of emission distributions # z_vec = sample in latent space (mu, logcov), zvec = DMM_evaluate.sample(dmm_reloaded, T=40, nsamples=10) print("mu.shape=" + str(mu.shape)) print("zvec.shape=" + str(mu.shape)) visualize_data(mu, n_samples=10) plt.title("Mean trajectories") fig, axlist_x = plt.subplots(3, 1, figsize=(8, 10)) nsamples = 10 T = zvec.shape[1] SNUM = range(nsamples) for idx, ax in enumerate(axlist_x.ravel()): z = zvec[SNUM, :, idx] ax.plot(np.arange(T), np.transpose(z), '-*', label='Dim' + str(idx)) ax.legend() ax.set_xlabel('Time') plt.suptitle('3 dimensional samples of latent space') plt.show()
def reload_dmm(prefix, dmm_dir='./dmm_models/', ep='-EP100'): pfile = os.path.join(dmm_dir, prefix + '-config.pkl') print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile) params = readPickle(pfile, quiet=True)[0] reload_file = os.path.join(dmm_dir, prefix + ep + '-params.npz') print 'Model parameters in: ', reload_file # Don't load the training functions for the model since its time consuming params['validate_only'] = True dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reload_file) return dmm_reloaded
def reload_model(epoch): DIR = './chkpt/lorenz_coupled/' pfile = DIR + 'DMM_lr-0_0008-dh-40-ds-9-nl-relu-bs-100-ep-10000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-config.pkl' # The hyperparameters are saved in a pickle file - lets load them here hparam = readPickle(pfile, quiet=True)[0] reloadFile = DIR + 'DMM_lr-0_0008-dh-40-ds-9-nl-relu-bs-100-ep-10000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP' + str(epoch) + '-params.npz' # Don't load the training functions for the model since its time consuming hparam['validate_only'] = True dmm = DMM(hparam, paramFile=pfile, reloadFile=reloadFile) return dmm
def get_latent_space(dmm, x, id, dim_latent): z_q, _, _ = DMM._q_z_x(dmm, x) z_q_arr = z_q.eval() temp = np.array([ np.hstack(( z_q_arr[i, :, :], np.ones((z_q_arr.shape[1], 1)) * id[i], np.reshape(range(1, z_q_arr.shape[1] + 1), (z_q_arr.shape[1], 1)), )) for i in range(len(id)) ]) arr = np.concatenate(temp, axis=0) return pandas.DataFrame( data=arr, columns=np.concatenate( (['dmm{}'.format(i) for i in range(int(dim_latent))], ['TRR_ID', 't_elapsed'])))
def run_model(dataset, params): set_extra_parameters(params=params, dataset=dataset) for key, value in params.items(): print(key, value) # Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) # savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=params['epochs'], batch_size=params['batch_size'], savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True)
params['dim_observations'] = dataset['dim_observations'] #The dataset is small, lets change some of the default parameters and the unique ID params['dim_stochastic'] = 2 params['dim_hidden'] = 40 params['rnn_size'] = 80 params['epochs'] = 40 params['batch_size'] = 200 params['unique_id'] = params['unique_id'].replace('ds-100','ds-2').replace('dh-200','dh-40').replace('rs-600','rs-80') params['unique_id'] = params['unique_id'].replace('ep-2000','ep-40').replace('bs-20','bs-200') #Create a temporary directory to save checkpoints params['savedir'] = params['savedir']+'synthetic/' os.system('mkdir -p '+params['savedir']) #Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile = pfile) #savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start =0 , epoch_end = params['epochs'], batch_size = 200, savefreq = params['savefreq'], savefile = savef, dataset_eval=dataset['valid'], shuffle = True )
for k in opt_params: print k, opt_params[k].shape import glob, os, sys, time sys.path.append('../') from utils.misc import getConfigFile, readPickle, displayTime start_time = time.time() from model_th.dmm import DMM import model_th.learning as DMM_learn import model_th.evaluate as DMM_evaluate displayTime('importing DMM', start_time, time.time()) #This is the prefix we will use DIR = './chkpt-ipython/' prefix = 'DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid' pfile = os.path.join(DIR, prefix + '-config.pkl') print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile) #The hyperparameters are saved in a pickle file - lets load them here params = readPickle(pfile, quiet=True)[0] #Reload the model at Epoch 30 EP = '-EP30' #File containing model paramters reloadFile = os.path.join(DIR, prefix + EP + '-params.npz') print 'Model parameters in: ', reloadFile #Don't load the training functions for the model since its time consuming params['validate_only'] = True dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile) print 'Model Reloaded: ', type(dmm_reloaded)
import model_th.learning as DMM_learn import model_th.evaluate as DMM_evaluate displayTime('import DMM', start_time, time.time()) dmm = None """ Reload from savefile or train new model """ start_time = time.time() removeIfExists('./NOSUCHFILE') reloadFile = params.pop('reloadFile') if os.path.exists(reloadFile): pfile = params.pop('paramFile') assert os.path.exists(pfile), pfile + ' not found. Need paramfile' print 'Reloading trained model from : ', reloadFile print 'Assuming ', pfile, ' corresponds to model' dmm = DMM(params, paramFile=pfile, reloadFile=reloadFile) else: pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl' print 'Training model from scratch. Parameters in: ', pfile dmm = DMM(params, paramFile=pfile) displayTime('Building dmm', start_time, time.time()) """ Savefile where model checkpoints will be saved """ savef = os.path.join(params['savedir'], params['unique_id']) print 'Savefile: ', savef start_time = time.time() """ Training loop """ savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0,
def train_dmm(train_data, test_data, cols, dim_latent, unique_id, time_col='time_since_transplant', id_col='TRR_ID'): df = pandas.concat([train_data, test_data]) # Format time so that it can be indexed df['time2'] = df[time_col].apply(mod_time) # This is different than time since transplant since some people miss albumin, bilirubin, # creatinine or acute rej episode at time of transplant df['t_elapsed'] = df.groupby( id_col, group_keys=False).apply(lambda g: g.time2 - g.time2.min()) df.t_elapsed = df.t_elapsed.astype(int) nontest_ids = train_data[id_col].drop_duplicates() train_ids = nontest_ids.sample(frac=0.9) train = df[df[id_col].isin(train_ids)] val = df[np.logical_not(df[id_col].isin(train_ids))] test = df[df[id_col].isin(test_data[id_col])] # Reformat to a matrix x_train, id_train = df_to_padded(df=train, column_names=cols, id_col=id_col, t_col='t_elapsed') x_val, id_val = df_to_padded(df=val, column_names=cols, id_col=id_col, t_col='t_elapsed', max_seq_len=x_train.shape[1]) x_test, id_test = df_to_padded(df=test, column_names=cols, id_col=id_col, t_col='t_elapsed', max_seq_len=x_train.shape[1]) mask_value = -1.3371337 x_train_masked = x_train.copy() x_train_masked[np.isnan(x_train_masked)] = mask_value x_val_masked = x_val.copy() x_val_masked[np.isnan(x_val_masked)] = mask_value x_test_masked = x_test.copy() x_test_masked[np.isnan(x_test_masked)] = mask_value dataset = { 'dim_observations': cols.shape[0], 'data_type': 'real', 'train': { 'tensor': x_train_masked, 'mask': np.logical_not(np.isnan(x_train[:, :, 0])), 'id': id_train }, 'valid': { 'tensor': x_val_masked, 'mask': np.logical_not(np.isnan(x_val[:, :, 0])), 'id': id_val }, 'test': { 'tensor': x_test_masked, 'mask': np.logical_not(np.isnan(x_test[:, :, 0])), 'id': id_test }, } max_visits = x_train.shape[1] params = { 'dim_observations': dataset['dim_observations'], 'data_type': dataset['data_type'], 'dataset': 'srtr', 'epochs': 10, 'seed': 1, 'init_weight': 0.1, 'dim_stochastic': dim_latent, 'expt_name': 'something', 'reg_value': 0.05, 'reloadFile': './NOSUCHFILE', 'reg_spec': '_', 'dim_hidden': max_visits, 'lr': 0.0008, 'reg_type': 'l2', 'init_scheme': 'uniform', 'optimizer': 'adam', 'use_generative_prior': 'approx', 'maxout_stride': 4, 'batch_size': 512, 'savedir': './dmm_models', 'forget_bias': -5.0, 'inference_model': 'R', 'emission_layers': 2, 'savefreq': 100, 'rnn_cell': 'lstm', 'rnn_size': max_visits, 'paramFile': './NOSUCHFILE', 'nonlinearity': 'relu', 'rnn_dropout': 0.1, 'transition_layers': 2, 'anneal_rate': 2.0, 'debug': False, 'validate_only': False, 'transition_type': 'mlp', 'unique_id': unique_id, 'leaky_param': 0.0 } # Create a temporary directory to save checkpoints os.system('mkdir -p ' + params['savedir']) # Specify the file where `params` corresponding for this choice of model and data will be saved pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) savef = os.path.join(params['savedir'], params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=101, batch_size=params['batch_size'], savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True) return savedata