def batch_feed_dict(self, e, i, j, t, x, outcomes): batch_x = x[i:j, :] batch_t = t[i:j] batch_risk = risk_set(batch_t) batch_impute_mask = get_missing_mask(batch_x, self.imputation_values) batch_e = e[i:j] idx_observed = batch_e == 1 feed_dict = { self.x: batch_x, self.impute_mask: batch_impute_mask, self.t: batch_t, self.t_lab: batch_t[idx_observed], self.e: batch_e, self.risk_set: batch_risk, self.batch_size_tensor: len(batch_t), self.is_training: False, self.noise_alpha: np.ones(shape=self.noise_dim) } # TODO replace with abstract methods updated_feed_dic = self.outcomes_function(idx=i, j=j, feed_dict=feed_dict, outcomes=outcomes) return updated_feed_dic
def get_dict(x, t, e): observed_idx = e == 1 feed_dict = { self.x: x, self.impute_mask: get_missing_mask(x, self.imputation_values), self.t: t, self.t_lab: t[observed_idx], self.e: e, self.batch_size_tensor: len(t), self.is_training: False, self.noise_alpha: np.ones(shape=self.noise_dim) } return {'feed_dict': feed_dict, 'outcomes': {}}
def get_dict(x, t, e): observed_idx = e == 1 feed_dict = {self.x: x, self.x_lab: x[observed_idx], self.x_unlab: x[np.logical_not(observed_idx)], self.impute_mask: get_missing_mask(x, self.imputation_values), self.t: t, self.t_lab: t[observed_idx], self.t_unlab: t[np.logical_not(observed_idx)], self.e: e, self.batch_size_tensor: len(t), self.is_training: False} return {'feed_dict': feed_dict, 'outcomes': {}}
def save_time_samples(self, x, t, e, name, cens=False): observed = e == 1 feed_dict = {self.x: x, self.impute_mask: get_missing_mask(x, self.imputation_values), self.t: t, self.t_lab: t[observed], self.e: e, self.risk_set: risk_set(t), self.batch_size_tensor: len(t), self.is_training: False} mean, log_var = self.session.run([self.t_mu, self.t_log_var], feed_dict=feed_dict) predicted_time = sample_log_normal(log_var=log_var, mean=mean, sample_size=self.sample_size) # print("predicted_time_samples:{}".format(predicted_time.shape)) #np.save('matrix/{}_{}_samples_predicted_time'.format('Test', name), predicted_time) # np.save('{}_{}_samples_predicted_time'.format('Test', name), predicted_time) # plot_predicted_distribution(predicted=predicted_time, empirical=t, data='Test_' + name, cens=cens) return
def generate_time_samples(self, e, x): # observed = e == 1 feed_dict = { self.x: x, self.impute_mask: get_missing_mask(x, self.imputation_values), # self.t: t, # self.t_lab: t[observed], self.e: e, # self.risk_set: risk_set(t), self.batch_size_tensor: len(x), self.is_training: False, self.noise_alpha: np.ones(shape=self.noise_dim) } predicted_time = [] for p in range(self.sample_size): gen_time = self.session.run(self.predicted_time, feed_dict=feed_dict) predicted_time.append(gen_time) predicted_time = np.array(predicted_time) return predicted_time
def train_neural_network(self): train_print = "Training {0} Model:".format(self.model) params_print = "Parameters:, l2_reg:{}, learning_rate:{}," \ " momentum: beta1={} beta2={}, batch_size:{}, batch_norm:{}," \ " hidden_dim:{}, latent_dim:{}, num_of_batches:{}, keep_prob:{}, disc_update:{}" \ .format(self.l2_reg, self.learning_rate, self.beta1, self.beta2, self.batch_size, self.batch_norm, self.hidden_dim, self.latent_dim, self.num_batches, self.keep_prob, self.disc_updates) # print(train_print) # print(params_print) # logging.debug(train_print) # logging.debug(params_print) self.session.run(tf.global_variables_initializer()) best_ci = 0 best_t_reg = np.inf best_validation_epoch = 0 last_improvement = 0 start_time = time.time() epochs = 0 # show_all_variables() j = 0 for i in range(self.num_iterations): # Batch Training run_options = tf.RunOptions(timeout_in_ms=4000) x_batch, t_batch, e_batch = self.session.run( [self.x_batch, self.t_batch, self.e_batch], options=run_options) risk_batch = risk_set(data_t=t_batch) batch_impute_mask = get_missing_mask(x_batch, self.imputation_values) batch_size = len(t_batch) idx_observed = e_batch == 1 # TODO simplify batch processing feed_dict_train = { self.x: x_batch, self.impute_mask: batch_impute_mask, self.t: t_batch, self.t_lab: t_batch[idx_observed], self.e: e_batch, self.risk_set: risk_batch, self.batch_size_tensor: batch_size, self.is_training: True, self.noise_alpha: np.ones(shape=self.noise_dim) } for k in range(self.disc_updates): _ = self.session.run([self.disc_solver], feed_dict=feed_dict_train) for m in range(self.gen_updates): _ = self.session.run([self.gen_solver], feed_dict=feed_dict_train) summary, train_time, train_cost, train_ranking, train_rae, train_reg, train_gen, train_layer_one_recon, \ train_t_reg, train_t_mse, train_disc = self.session.run( [self.merged, self.predicted_time, self.cost, self.ranking_partial_lik, self.total_rae, self.reg_loss, self.gen_one_loss, self.layer_one_recon, self.t_regularization_loss, self.t_mse, self.disc_one_loss], feed_dict=feed_dict_train) try: train_ci = concordance_index( event_times=t_batch, predicted_scores=np.nan_to_num(train_time).reshape( t_batch.shape), event_observed=e_batch) except IndexError: train_ci = 0.0 print("C-Index IndexError") ##### ibs / ibll ##### train_time_grid = np.linspace(t_batch.min(), t_batch.max(), 100) train_ds = np.array(train_time_grid - np.array([0.0] + train_time_grid[:-1].tolist())) # print(t_batch) # print(e_batch) train_bs, train_bll = get_scores( y_train=t_batch, delta_train=e_batch, y_test=t_batch, delta_test=e_batch, pred_train=train_time.reshape(t_batch.shape), pred_test=train_time.reshape(t_batch.shape), time_grid=train_time_grid, surv_residual=False, cens_residual=False) train_ibs = sum(train_bs * train_ds) / (train_time_grid.max() - train_time_grid.min()) train_ibll = sum(train_bll * train_ds) / (train_time_grid.max() - train_time_grid.min()) ###################### tf.verify_tensor_all_finite(train_cost, "Training Cost has Nan or Infinite") if j >= self.num_examples: epochs += 1 is_epoch = True # idx = 0 j = 0 else: # idx = j j += self.batch_size is_epoch = False if i % 100 == 0: train_print = "it:{}, trainCI:{}, train_ranking:{}, train_RAE:{}, train_Gen:{}, train_Disc:{}, " \ "train_reg:{}, train_t_reg:{}, train_t_mse:{}, train_layer_one_recon:{}".format( i, train_ci, train_ranking, train_rae, train_gen, train_disc, train_reg, train_t_reg, train_t_mse, train_layer_one_recon) # print(train_print) # logging.debug(train_print) if is_epoch or (i == (self.num_iterations - 1)): improved_str = '' # Calculate Vaid CI the CI self.train_ci.append(train_ci) self.train_cost.append(train_cost) self.train_t_rae.append(train_rae) self.train_gen.append(train_gen) self.train_disc.append(train_disc) self.train_ranking.append(train_ranking) self.train_layer_one_recon.append(train_layer_one_recon) self.train_writer.add_summary(summary, i) valid_ci, valid_cost, valid_rae, valid_ranking, valid_gen, valid_reg, valid_disc, valid_layer_one_recon, valid_t_reg, valid_t_mse = self.predict_concordance_index( x=self.valid_x, e=self.valid_e, t=self.valid_t) self.valid_cost.append(valid_cost) self.valid_ci.append(valid_ci) self.valid_t_rae.append(valid_rae) self.valid_gen.append(valid_gen) self.valid_disc.append(valid_disc) self.valid_ranking.append(valid_ranking) self.valid_layer_one_recon.append(valid_layer_one_recon) tf.verify_tensor_all_finite( valid_cost, "Validation Cost has Nan or Infinite") if valid_t_reg < best_t_reg: self.saver.save(sess=self.session, save_path=self.save_path) best_validation_epoch = epochs best_t_reg = valid_t_reg last_improvement = epochs improved_str = '*' # Save Best Perfoming all variables of the TensorFlow graph to file. # update best validation accuracy # optimization_print = "Iteration: {} epochs:{}, Training: RAE:{}, Loss: {}," \ # " Ranking:{}, Reg:{}, Gen:{}, Disc:{}, Recon_One:{}, T_Reg:{},T_MSE:{}, CI:{}" \ # " Validation RAE:{} Loss:{}, Ranking:{}, Reg:{}, Gen:{}, Disc:{}, " \ # "Recon_One:{}, T_Reg:{}, T_MSE:{}, CI:{}, {}" \ # .format(i + 1, epochs, train_rae, train_cost, train_ranking, train_reg, train_gen, # train_disc, train_layer_one_recon, train_t_reg, train_t_mse, # train_ci, valid_rae, valid_cost, valid_ranking, valid_reg, valid_gen, valid_disc, # valid_layer_one_recon, valid_t_reg, valid_t_mse, valid_ci, improved_str) optimization_print = "Iteration: {} || TRAIN loss: {}, CI: {}, IBS: {}, IBLL: {} || VAL loss: {}, CI:{}, improved: {}".format( i + 1, np.round(train_cost, 4), np.round(train_ci, 4), np.round(train_ibs, 4), np.round(train_ibll, 4), np.round(valid_cost, 4), np.round(valid_ci, 4), improved_str) if (i + 1) % 50 == 0: print(optimization_print) # logging.debug(optimization_print) if epochs - last_improvement > self.require_improvement or math.isnan( train_cost) or epochs >= self.max_epochs: # if i - last_improvement > self.require_improvement: print( "No improvement found in a while, stopping optimization." ) # Break out from the for-loop. break # Ending time. end_time = time.time() time_dif = end_time - start_time time_dif_print = "Time usage: " + str( timedelta(seconds=int(round(time_dif)))) # print(time_dif_print) # logging.debug(time_dif_print) # shutdown everything to avoid zombies self.session.run(self.queue.close(cancel_pending_enqueues=True)) self.coord.request_stop() self.coord.join(self.threads) return best_validation_epoch, epochs