def net(TRAIN_VAE = True, load_model = True, real_data=True, bayesian_opt = True, anomaly_threshold = 10, fp_ratio=1,fn_ratio=10, verbose=True, plots=True, batch_norm = False, dropout = True): # Set random generator seed for reproducibility np.random.seed(168) tf.set_random_seed(168) # Reset the default graph tf.reset_default_graph() ############################################################################### # ======================= file and directory names ========================== # ############################################################################### mode = "supervised" run_comment = "lin_VAE_rnn_ecg200_test10zsamples"#"lin_VAE_rnn_ecg200_10xRcLoss" # dataset = "ecg200" save_dir = ("./"+dataset+"/") save_sum_dir = save_dir+"logs/" save_print_dir = save_dir+"prints/" if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(save_sum_dir): os.makedirs(save_sum_dir) if not os.path.exists(save_print_dir): os.makedirs(save_print_dir) ############################################################################### ########## Set model parameters ########## ############################################################################### # RNN parameters RNN_size = 128; n_rnn_layers = 3; # hidden and latent space parameters z_dim = 10; h_dim = 512 # running parameters (hyper-parameters epochs = 56; batch_size = 48; early_stopping = 20 learning_rate = 1e-4; l2_reg = 1e-3; drop_probs = 0.25 val_epochs = 1; anneal_rate = 1e-0; n_test_steps = 100 # sampling parameters & classification criterion num_z_samples = 10; output_samples = 1; criterion = 'precision'; # load data directory #directory1 = 'LSTM_Data/' #data_filename = directory1+'ecg_data3.npz' data_filename = 'dataset/' + dataset + "_normal_anomaly_seperate.npz" # run parameters split_ratio = 0.5 anom_split = str(np.round(1-split_ratio,decimals=3)).replace(".","_") file_name = run_comment+'_'+dataset+"_h_"+str(h_dim)+"_z_"+str(z_dim) load_file_name = file_name ############################################################################### # ========================= get data and variables ========================== # ############################################################################### # get data x_train, x_valid, x_test, anom_valid, anom_test = fetch_data() #get_data(data_filename,split_ratio,real_data=real_data) # calculate sizes train_size, valid_size, test_size, anom_valid_size, anom_test_size, X_dim = \ x_train.shape[0], x_valid.shape[0],x_test.shape[0], anom_valid.shape[0], anom_test.shape[0], x_train.shape[1] # other num_batches = train_size//batch_size; epoch = 0; save_epochs = 0; b_t_ratio = 2#0train_size//batch_size # Training losses containers best_eval_loss = np.inf training_loss = [] validation_loss= [] kl_training_loss = [] rc_training_loss =[] # initiallize regularizer and graph regularize = tf.contrib.layers.l2_regularizer(l2_reg, scope=None) initialize = tf.contrib.layers.xavier_initializer(uniform=False,seed=None,dtype=tf.float32) graph = tf.Graph() # put placeholders on graph with graph.as_default(): # ========================= Placeholders ================================== with tf.name_scope("input"): X = tf.placeholder(tf.float32, shape=[None, X_dim],name="input_X") # y = tf.placeholder(tf.float32, shape=[None, y_dim],name="label") drop_prob = tf.placeholder(tf.float32, shape=(),name='dropout_prob') alpha_KL = tf.placeholder_with_default(input = 1.0,shape=(),name='KL_annealing') rc_weight = tf.placeholder_with_default(input = 1.0,shape=(),name='reconstruction_loss_weight') is_train = tf.placeholder_with_default(input = False,shape=(),name='train_test_state') l_rate = tf.placeholder_with_default(input=learning_rate, shape=(), name='var_learning_rate') with tf.name_scope("latent"): z = tf.placeholder(tf.float32, shape=[None, z_dim],name="latent_vars") # introduce convenience function for batch norm batch_norm_layer = partial(tf.layers.batch_normalization, training=is_train, momentum=0.95) # =============================== Q(z|X) ==================================== def encode(x, scope='encoder', reuse=False, drop_prob=drop_probs, is_train=is_train, batch_norm=batch_norm, dropout = dropout): ''' Discriminative model (decoder) Input: x : input data Returns: z_mu, Z_logvar : mean and standard deviation of z ''' with tf.variable_scope("encoder", reuse = reuse): # ====== Qz(x) ======# inputs = x h = tf.layers.dense(inputs, h_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='e_hidden_1') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) z_mu = tf.layers.dense(h, z_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='z_mu') if batch_norm: z_mu = batch_norm_layer(z_mu) z_logvar = tf.layers.dense(h, z_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='z_logvar') if batch_norm: z_logvar = batch_norm_layer(z_logvar) return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps # =============================== P(X|z) ==================================== def decode(z, scope = 'decoder', reuse=False, drop_prob=drop_probs, is_train=is_train, batch_norm=batch_norm, dropout = dropout): ''' Generative model (decoder) Input: z : latent space data Returns: x : generated data ''' with tf.variable_scope("decoder", reuse=reuse): #====== Px(z) ======# inputs = z # tf.concat(axis=1, values=[z]) # calculate hidden h = tf.layers.dense(inputs, 2*n_rnn_layers*RNN_size, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='RNN_state_init_layer') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) #h = tf.unstack(h, axis=0) h = tf.reshape(h,[n_rnn_layers,2,-1,RNN_size]) init_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(h[idx][0], h[idx][1]) for idx in range(n_rnn_layers)]) memory_cell = [tf.contrib.rnn.LSTMCell(RNN_size, use_peepholes= False, forget_bias=1.0, state_is_tuple=True) for cell in range(n_rnn_layers)] memory_cell = tf.contrib.rnn.MultiRNNCell(memory_cell) #memory_cell = tf.layers.dropout(memory_cell, rate=drop_prob, training = is_train, seed=128) inputs = tf.expand_dims(inputs,-1) # [10,50,1] rnn_outputs, states = tf.nn.dynamic_rnn(memory_cell, inputs=inputs,#dtype=tf.float32) initial_state=init_state,dtype=tf.float32) # [10,50,128] stacked_outputs = tf.reshape(rnn_outputs,[-1,RNN_size*z_dim]) if batch_norm: stacked_outputs = batch_norm_layer(stacked_outputs) # [10,50*128] # calculate the mean of the output (Gausian) x_mu = tf.layers.dense(stacked_outputs, X_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='x_mu') if batch_norm: x_mu = batch_norm_layer(x_mu) # [10,100] #x_mu = tf.reshape(x_mu,[-1,X_dim]) # [500,100] v.s. [10 100] # x_logvar = tf.layers.dense(stacked_outputs, X_dim, activation=None,kernel_initializer=initialize, # kernel_regularizer=regularize,name='x_logvar') # if batch_norm: # x_logvar = batch_norm_layer(x_logvar) #x_logvar = tf.reshape(x_logvar,[-1,X_dim]) #assert ph.shape(x_logvar)==(50,100) #print(ph.shape(x_logvar)) return x_mu #, x_logvar # =============================== ELBO ==================================== def loss(X,x_sample,z_mu,z_logvar,reuse=None, n_z_samples=1,alpha_KL=alpha_KL,rc_weight=rc_weight): with tf.name_scope("loss"): # E[log P(X|z)] # print(ph.shape(x_sample)) # print(ph.shape(X)) recon_loss = 0.5 * tf.reduce_sum(tf.square(x_sample-X),axis=1) / n_z_samples #tf.cond(is_train,False): for i in range(n_z_samples-1): z_sample = sample_z(z_mu, z_logvar) #x_mu, x_logvar = decode(z_sample,reuse=reuse) x_mu = decode(z_sample, reuse=reuse) # x_sample = sample_z(x_mu, x_logvar) x_sample = x_mu recon_loss += 0.5 * tf.reduce_sum(tf.square(X-x_sample),axis=1) / n_z_samples # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, axis=1) # print(tf.shape(kl_loss)) # print(tf.shape(recon_loss)) # Regularisation cost reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # VAE loss ELBO = rc_weight*recon_loss + alpha_KL *kl_loss vae_loss = tf.add_n([tf.reduce_mean(ELBO)+ tf.reduce_sum(reg_variables)]) #batch_loss = tf.reduce_mean(ELBO) # summary with tf.name_scope("Summaries"): #tf.summary.scalar("ELBO",ELBO) tf.summary.scalar("Batch_loss",tf.reduce_mean(ELBO)) merger = tf.summary.merge_all() return vae_loss,ELBO,merger, tf.reduce_mean(kl_loss), tf.reduce_mean(recon_loss) # =============================== TRAINING ==================================== # embed (encode) z_mu, z_logvar = encode(X) with tf.name_scope("latent"): z_sample = sample_z(z_mu, z_logvar) # generate (decode) # x_mu, x_logvar = decode(z_sample, reuse=None) x_mu = decode(z_sample, reuse=None) # sample x #x_sample = sample_z(x_mu,x_logvar) x_sample = x_mu # loss vae_loss, ELBO, merger,kl_loss,rec_loss= loss(X,x_sample,z_mu,z_logvar,reuse=True, n_z_samples=10, alpha_KL=alpha_KL,rc_weight=rc_weight) with tf.name_scope("optimiser"): # updater train_step = tf.train.AdamOptimizer(learning_rate=l_rate) grads = train_step.compute_gradients(vae_loss) clipped_grads = [(tf.clip_by_value(grad, -2,2),var) for grad,var in grads] solver = train_step.apply_gradients(clipped_grads) bn_update = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # =============================== TESTING =================================== # with tf.name_scope("Testing"): z_mu_t, z_logvar_t = encode(X,reuse=True) z_sample_t = sample_z(z_mu_t, z_logvar_t) # x_mu_t, x_logvar_t = decode(z_sample_t, reuse=True) x_mu_t = decode(z_sample_t, reuse=True) # sample x #x_sample_t = sample_z(x_mu_t, x_logvar_t) x_sample_t = x_mu_t # loss _,ELBO_t,_,_,_ = loss(X,x_sample_t,z_mu_t,z_logvar_t,reuse=True,\ n_z_samples=num_z_samples,alpha_KL=1.0) # =============================== Session =============================== # if TRAIN_VAE: sum_writter = tf.summary.FileWriter(save_sum_dir,graph) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer(),{is_train: False}) if load_model: print("Loading saved model") if save_dir is None: raise ValueError("Filename and path not supplied! Aborting...") else: try: tf.train.Saver().restore(sess,save_dir+load_file_name) print("Done! Continuing training...\n") loaded = np.load(save_dir+file_name+'.npz') best_eval_loss = loaded['best_eval_loss'] except Exception: print("Could not find saved file. Training from scratch") # ========================= START TRAINING ========================== # # normalize data train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid,train_mean, train_std) print("Ttraining set length:{}".format(x_train.shape[0])) print(" -----Training Started-----") count = 0 for epoch in range(epochs): epoch_loss = 0 r_loss = 0 kl = 0 epoch_time = time.time() x_train = ph.shuffle_x_data(x_train) # For each epoch train with mini-batches of size (batch_size) for batch_num in range(train_size//batch_size): # anneal KL cost (full cost after 50.000 batches) kl_a = 2*(-.5+1/(1+np.exp(-count*anneal_rate))) X_mb = x_train[batch_num*batch_size:(batch_num+1)*batch_size,] X_mb = random_rescale(X_mb) X_mb = add_noise(X_mb) #X_mb = random_shift(X_mb) # train train_dict = {X: X_mb, is_train: True, drop_prob: drop_probs, alpha_KL: kl_a, rc_weight: b_t_ratio, l_rate: learning_rate} _, loss,k_,r_ = sess.run([solver, vae_loss,kl_loss,rec_loss], feed_dict= train_dict) # DELETED ,bn_update epoch_loss+=loss; kl+= k_; r_loss+= r_; count+=1 # print progress ph.progress(epoch,(epoch_loss/num_batches),(r_loss/num_batches),(kl/num_batches),\ time.time()-epoch_time) training_loss.append(epoch_loss/num_batches) rc_training_loss.append(r_loss / num_batches) kl_training_loss.append(kl / num_batches) # validate if epoch >0 and epoch%val_epochs ==0: vloss = 0 valid_dict={X: x_valid,is_train:False,drop_prob:0.0, alpha_KL : 1.0} vloss, vaeloss = sess.run([vae_loss,merger], feed_dict=valid_dict ) sum_writter.add_summary(vaeloss, epoch) # print progress print('Validation_Training_Epoch: {}'.format(epoch)) print('Loss: {}'. format(vloss)) validation_loss.append(vloss) if vloss < best_eval_loss: # update best result and save checkpoint best_eval_loss = vloss saver.save(sess, save_dir+ file_name) save_epochs = epoch # early stopping condition if epoch - save_epochs > early_stopping//2: learning_rate/=2 if epoch - save_epochs >early_stopping : print("Early stopping condition reached. No progress for {} epochs".format(early_stopping)) break # write summary to npz file description = "Dataset: "+dataset+", model: "+run_comment+", h: "+str(h_dim)\ + ", z: "+str(z_dim)+", learning_rate: "+str(learning_rate)+ ", L2: "+str(l2_reg)\ + ", batch_size: " + str(batch_size)+", split: "+str(split_ratio)\ + ", epochs"+str(save_epochs) np.savez(save_dir+file_name,\ training_loss=training_loss,validation_loss=validation_loss,\ best_eval_loss = best_eval_loss, description=description) ph.save2txt(save_print_dir,file_name, dataset, run_comment, h_dim, z_dim, num_z_samples, learning_rate, batch_size, drop_probs, l2_reg,save_epochs, early_stopping,X_dim,train_size,valid_size,test_size,0,anom_valid_size,anom_test_size,save_dir) # print training curves plt.figure() tl=np.array(rc_training_loss) + np.array(kl_training_loss) plt.plot(tl, 'b', label='training loss') plt.plot(rc_training_loss, 'm', label='reconstruction loss') plt.plot(validation_loss, 'r', label='validation loss') plt.plot(kl_training_loss, 'g', label='KL loss') plt.title('Training Curves\nDataset:{}, Method:{}'.format(dataset,mode)) plt.xlabel('Training epoch') plt.ylabel('Loss') plt.legend(loc="upper right") plt.show() plt.figure() plt.plot(tl, 'b', label='training loss') plt.plot(validation_loss, 'r', label='validation loss') plt.title('Training Curves\nDataset:{}, Method:{}'.format(dataset,mode)) plt.xlabel('Training epoch') plt.ylabel('Loss') plt.legend(loc="upper right") plt.show() else: # load saved model saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) print("Loading saved model") if save_dir is None: raise ValueError("Filename and path not supplied! Aborting...") else: try: saver.restore(sess,save_dir+load_file_name) print("Done! \n") except Exception: print("Could not find saved file.") # normalize data if not TRAIN_VAE: train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid,train_mean, train_std) anom_valid = ph.p_normalize(anom_valid,train_mean,train_std) # break the validation set evaluation into 'n_test_steps' steps to avoid memory overflow normal_valid_size = x_valid.shape[0] normal_elbo = np.zeros([normal_valid_size, 1]) anomaly_elbo = np.zeros([anom_valid_size, 1]) # evaluate ELBO on the normal validation-set for j in range(n_test_steps - 1): start = j * (normal_valid_size // n_test_steps); stop = (j + 1) * (normal_valid_size // n_test_steps) normal_valid_dict = {X: x_valid[start:stop], is_train: False, drop_prob: 0.0} x_elbo_v = sess.run([ELBO_t], feed_dict=normal_valid_dict) normal_elbo[start:stop, 0] = x_elbo_v[0] # compute the last slice separately since it might have more points normal_valid_dict = {X: x_valid[stop:], is_train: False, drop_prob: 0.0} x_elbo_v = sess.run([ELBO_t], feed_dict=normal_valid_dict) normal_elbo[stop:, 0] = x_elbo_v[0] normal_elbo = np.clip(normal_elbo, None, 1e4) # evaluate ELBO on the anomaly valildation-set for j in range(n_test_steps - 1): start = j * (anom_valid_size // n_test_steps); stop = (j + 1) * (anom_valid_size // n_test_steps) anomalous_valid_dict = {X: anom_valid.reshape([-1, X_dim])[start:stop], is_train: False, drop_prob: 0.0} a_elbo_v = sess.run([ELBO_t], feed_dict=anomalous_valid_dict) anomaly_elbo[start:stop, 0] = a_elbo_v[0] # compute the last slice separately since it might have more points anomalous_valid_dict = {X: anom_valid.reshape([-1, X_dim])[stop:], is_train: False, drop_prob: 0.0} a_elbo_v = sess.run([ELBO_t], feed_dict=anomalous_valid_dict) anomaly_elbo[stop:, 0] = a_elbo_v[0] anomaly_elbo = np.clip(anomaly_elbo, None, 1e4) ph.plot_hist(normal_elbo, title='ELBO distribution\n validation set (normal data)', print_dir= save_print_dir, f_name="valid_normal", figsize=(8,5), dataset=dataset, plots=plots) ph.plot_hist(anomaly_elbo,title='ELBO distribution\n validation set (anomaly data)', print_dir= save_print_dir,\ f_name="valid_anomaly", figsize=(8,5), dataset=dataset, plots=plots) # print stats ph.report_stats("\nValidation Statistics - Normal Data\n",normal_elbo, verbose=verbose) ph.report_stats("\nValidation Statistics - Anomaly Data\n",anomaly_elbo, verbose=verbose) ########################################################################################### ###### THRESHOLD SELECTION ####### ########################################################################################### x_mean_elbo_valid = np.mean(normal_elbo) a_mean_elbo_valid = np.mean(anomaly_elbo) # set threshold value valid_elbo_dict = {'n_mean':np.mean(normal_elbo), 'n_std':np.std(normal_elbo), 'a_mean':np.mean(anomaly_elbo), 'a_std':np.std(anomaly_elbo) } if a_mean_elbo_valid > x_mean_elbo_valid: #anomaly_threshold = 25 anomaly_threshold = ph.select_threshold(valid_elbo_dict, criterion) else: print('Training error! Anomaly loss smaller than normal loss! Aborting...') anomaly_threshold = ph.select_threshold(valid_elbo_dict, criterion) # If Bayesian optimisation is selected then send the data to the scoring function and call # the bayesian opt routine -> and collect the results if bayesian_opt: # anomaly_threshold = 150 anomaly_threshold, bo_results = threshold_selector(normal_elbo, anomaly_elbo, \ fp_ratio=fp_ratio, fn_ratio=fn_ratio) plot_bayes_opt(bo_results, fn_ratio, 'figure 3', save_print_dir, dataset, plots=plots) elbo_hist(normal_elbo, anomaly_elbo, anomaly_threshold, 'Minas_Validation Set', \ 'figure 4', save_print_dir, dataset, plots=plots) #=================== Evaluation on Test Set ==========================# # normalize data x_test = ph.p_normalize(x_test,train_mean,train_std) anom_test = ph.p_normalize(anom_test,train_mean,train_std) #SSSSSOOOOOSSSSS print(x_test.shape) print(anom_test.shape) if verbose: print("#=========================Test Set=============================#") print('Anomaly threshold: {}', anomaly_threshold) start_time = time.time() t_normal_elbo = np.zeros([test_size, 1]) t_anomaly_elbo = np.zeros([anom_test_size, 1]) x_samples_normal = np.zeros([test_size,X_dim]) x_samples_anomaly = np.zeros([anom_test_size,X_dim]) # evaluate ELBO on the normal validation set for j in range(n_test_steps - 1): start = j * (test_size // n_test_steps); stop = (j + 1) * (test_size // n_test_steps) normal_test_dict = {X: x_test[start:stop], is_train: False, drop_prob: 0.0} x_elbo_t = sess.run([ELBO_t], feed_dict=normal_test_dict) t_normal_elbo[start:stop, 0] = x_elbo_t[0] x_samples_normal[start:stop, :] = sess.run([x_sample_t], feed_dict=normal_test_dict)[0] # compute the last slice separately since it might have more points normal_test_dict = {X: x_test[stop:], is_train: False, drop_prob: 0.0} #.reshape([-1,X_dim]) x_elbo_t = sess.run([ELBO_t], feed_dict=normal_test_dict) t_normal_elbo[stop:, 0] = x_elbo_t[0] t_normal_elbo = np.clip( t_normal_elbo,None,1e4) x_samples_normal[stop:, :] = sess.run([x_sample_t], feed_dict=normal_test_dict)[0] start = stop = 0 # evaluate ELBO on the anomaly test-set for j in range(n_test_steps - 1): start = j * (anom_test_size // n_test_steps); stop = (j + 1) * (anom_test_size // n_test_steps) anomalous_test_dict = {X: anom_test[start:stop].reshape([-1,X_dim]), is_train: False, drop_prob: 0.0} a_elbo_t = sess.run([ELBO_t], feed_dict=anomalous_test_dict) t_anomaly_elbo[start:stop, 0] = a_elbo_t[0] x_samples_anomaly[start:stop, :] = sess.run([x_sample_t], feed_dict=anomalous_test_dict)[0] # compute the last slice separately since it might have more points anomalous_test_dict = {X: anom_test[stop:].reshape([-1,X_dim]), is_train: False, drop_prob: 0.0} a_elbo_t = sess.run([ELBO_t], feed_dict=anomalous_test_dict) t_anomaly_elbo[stop:, 0] = a_elbo_t[0] t_anomaly_elbo = np.clip(t_anomaly_elbo,None,1e4) x_samples_anomaly[stop:, :] = sess.run([x_sample_t], feed_dict=anomalous_test_dict)[0] # save accuracy rates tn = np.sum(t_normal_elbo < anomaly_threshold) fp = np.sum(t_normal_elbo > anomaly_threshold) tp = np.sum(t_anomaly_elbo > anomaly_threshold) fn = np.sum(t_anomaly_elbo < anomaly_threshold) y_pred_n = t_normal_elbo > anomaly_threshold y_pred_a = t_anomaly_elbo > anomaly_threshold end_time = time.time() y_pred = np.concatenate((y_pred_n, y_pred_a), axis=0) y_true = np.concatenate((np.zeros([test_size]), np.ones([anom_test_size])), axis=0) # calculate the AUC-score t_elbo = np.concatenate((t_normal_elbo, t_anomaly_elbo), axis=0) # calculate and report total stats # scores precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * precision * recall / (precision + recall) f01 = (1 + (1 / fn_ratio) ** 2) * precision * recall / ((1 / fn_ratio ** 2) * precision + recall) auc, thresh = ph.auc_plot(t_elbo, y_true, anomaly_threshold, f01) print("AUC:", auc) ph.report_stats("\nTest Statistics - Normal Data", t_normal_elbo, verbose=verbose) ph.report_stats("\nTest Statistics - Anomaly Data", t_anomaly_elbo, verbose=verbose) ph.scores_x(tp,fp,fn,tn, verbose=verbose) elbo_hist(t_normal_elbo, t_anomaly_elbo, anomaly_threshold, 'Test Set','Minas_figure 5', save_print_dir, dataset, plots=plots) ph.plot_hist(t_normal_elbo, title='ELBO distribution\n test set (normal data)', print_dir=save_print_dir, \ f_name="test_normal", figsize=(8, 5), dataset=dataset, plots=plots) ph.plot_hist(t_anomaly_elbo, title='ELBO distribution\n test set (anomaly data)', print_dir=save_print_dir, \ f_name="test_anomaly", figsize=(8, 5), dataset=dataset, plots=plots) # Compute confusion matrix cnf_matrix = np.array([[int(tp),int(fn)], [int(fp),int(tn)]]) np.set_printoptions(precision=2) class_names = np.array(['Anomaly','Normal'],dtype='<U10') # Plot non-normalized confusion matrix if plots: ph.plot_confusion_matrix(cnf_matrix, classes=class_names, title=" Confusion matrix\n"+"Dataset: "+dataset+" - "+mode, plots=plots) plt.savefig(save_print_dir + dataset + "_threshold_" + str(round(anomaly_threshold, 2)) + '_conf_mat.png') plt.show() if verbose: print('total inference time for {} data points:{:6.3}s '.format(y_true.shape[0],\ (end_time-start_time))) test_elbo_dict = {'n_mean':t_normal_elbo.mean(), 'n_std':t_normal_elbo.std(), 'a_mean':t_anomaly_elbo.mean(), 'a_std':t_anomaly_elbo.std()} # save all elbo results to a file for post-processing np.savez(save_dir + file_name + 'res', descr=run_comment, val_norm_elbo=normal_elbo,val_anom_elbo=anomaly_elbo,x_val=x_valid, tst_norm_elbo=t_normal_elbo,tst_anom_elbo=t_anomaly_elbo,x_tst=x_test) ph.saveres2txt(save_print_dir, file_name, dataset,round(anomaly_threshold,2), tp,fp,tn,fn,f1, auc, f01,precision,recall,valid_elbo_dict,test_elbo_dict,learning_rate) # return statements return [tp, fp, tn, fn],x_samples_normal,x_samples_anomaly, x_test, anom_test, \ normal_elbo,anomaly_elbo,t_normal_elbo,t_anomaly_elbo
def net(TRAIN_VAE = True, load_model = True, real_data=True, batch_norm=False, dropout=False, anomaly_threshold = 10, verbose=True, plots=True ): tf.reset_default_graph() ############################################################################### # ======================= file and directory names ========================== # ############################################################################### run_comment = "lin_VAE_rnn_state_TEST" dataset = "ECG" save_dir = ("./"+dataset+"/") save_sum_dir = save_dir+"logs/" save_print_dir = save_dir+"prints/" if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(save_sum_dir): os.makedirs(save_sum_dir) if not os.path.exists(save_print_dir): os.makedirs(save_print_dir) ############################################################################### ########## Set model parameters ########## ############################################################################### # RNN parameters RNN_size = 64; n_rnn_layers = 2; # hidden and latent space parameters z_dim = 100; h_dim = 512 # running parameters (hyper-parameters epochs = 51; batch_size = 100; early_stopping = 10 learning_rate = 1e-4; l2_reg = 3e-3; drop_probs = 0.2 # sampling parameters & classification criterion n_z_samples = 10; output_samples = 1; criterion = 'precision'; # load data directory directory1 = 'LSTM_Data/' data_filename = directory1+'ecg_data.npz' # run parameters split_ratio = 0.5 anom_split = str(np.round(1-split_ratio,decimals=3)).replace(".","_") file_name = run_comment+'_'+dataset+"_h_dim_"+str(h_dim)+"_zDim_"+str(z_dim)\ + "_split_ratio"+anom_split load_file_name = file_name ############################################################################### # ========================= get data and variables ========================== # ############################################################################### # get data x_train, x_valid, x_test, l_norm_train, l_norm_valid, l_norm_test, \ anom_valid, anom_test, l_anom_valid, l_anom_test = get_data(data_filename,split_ratio,real_data=real_data) # calculate sizes train_size, valid_size, test_size, anom_valid_size, anom_test_size, X_dim = \ x_train.shape[0], x_valid.shape[0],x_test.shape[0], anom_valid.shape[0], anom_test.shape[0], x_train.shape[1] # other num_batches = train_size//batch_size; epoch = 0; save_epochs = 0; # Training losses containers # print(train_size) # print(valid_size) # print(test_size) # print(anom_valid_size) # print(anom_test_size) best_eval_loss = np.inf training_loss = [] validation_loss= [] # initiallize regularizer and graph regularize = tf.contrib.layers.l2_regularizer(l2_reg, scope=None) initialize = tf.contrib.layers.xavier_initializer(uniform=False,seed=None,dtype=tf.float32) graph = tf.Graph() # put placeholders on graph with graph.as_default(): # ========================= Placeholders ================================== with tf.name_scope("input"): X = tf.placeholder(tf.float32, shape=[None, X_dim],name="input_X") # y = tf.placeholder(tf.float32, shape=[None, y_dim],name="label") aux = tf.placeholder(tf.float32,shape=[None, X_dim],name='auxiliary_variable') drop_prob = tf.placeholder(tf.float32, shape=(),name='dropout_prob') alpha_KL = tf.placeholder(tf.float32, shape=(),name='KL_annealing') l_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate') is_train = tf.placeholder_with_default(input = False,shape=(),name='train_test_state') with tf.name_scope("latent"): z = tf.placeholder(tf.float32, shape=[None, z_dim],name="latent_vars") # introduce convenience function for batch norm batch_norm_layer = partial(tf.layers.batch_normalization, training=is_train, momentum=0.95) # =============================== Q(z|X) ==================================== def encode(x, scope='encoder', reuse=False, drop_prob=drop_probs, is_train=is_train): ''' Discriminative model (decoder) Input: x : input data Returns: z_mu, Z_logvar : mean and standard deviation of z ''' with tf.variable_scope("encoder", reuse = reuse): # ====== Qz(x) ======# inputs = x h = tf.layers.dense(inputs, h_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='e_hidden_1') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) z_mu = tf.layers.dense(h, z_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='z_mu') if batch_norm: z_mu = batch_norm_layer(z_mu) z_logvar = tf.layers.dense(h, z_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='z_logvar') if batch_norm: z_logvar = batch_norm_layer(z_logvar) return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps # =============================== P(X|z) ==================================== def decode(z,aux, scope = 'decoder', reuse=False, drop_prob=drop_probs, is_train=is_train): ''' Generative model (decoder) Input: z : latent space data Returns: x : generated data ''' with tf.variable_scope("decoder", reuse=reuse): #====== Px(z) ======# inputs = z #f.concat(axis=1, values=[z,aux]) # calculate hidden h = tf.layers.dense(inputs, 2*n_rnn_layers*RNN_size, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='RNN_state_init_layer') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) #h = tf.unstack(h, axis=0) h = tf.reshape(h,[n_rnn_layers,2,-1,RNN_size]) init_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(h[idx][0], h[idx][1]) for idx in range(n_rnn_layers)]) memory_cell = [tf.contrib.rnn.LSTMCell(RNN_size, use_peepholes= False, forget_bias=1.0, state_is_tuple=True) for cell in range(n_rnn_layers)] memory_cell = tf.contrib.rnn.MultiRNNCell(memory_cell) #memory_cell = tf.layers.dropout(memory_cell, rate=drop_prob, training = is_train, seed=128) #inputs = tf.expand_dims(inputs,-1) inputs = tf.stack([inputs,aux],axis=-1) # [10,50,1] rnn_outputs, states = tf.nn.dynamic_rnn(memory_cell, inputs=inputs,#dtype=tf.float32) initial_state=init_state,dtype=tf.float32) # [10,50,128] stacked_outputs = tf.reshape(rnn_outputs,[-1,RNN_size*z_dim]) if batch_norm: stacked_outputs = batch_norm_layer(stacked_outputs) # [10,50*128] # calculate the mean of the output (Gausian) x_mu = tf.layers.dense(stacked_outputs, X_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='x_mu') if batch_norm: x_mu = batch_norm_layer(x_mu) # [10,100] #x_mu = tf.reshape(x_mu,[-1,X_dim]) # [500,100] v.s. [10 100] x_logvar = tf.layers.dense(stacked_outputs, X_dim, activation=None,kernel_initializer=initialize, kernel_regularizer=regularize,name='x_logvar') if batch_norm: x_logvar = batch_norm_layer(x_logvar) #x_logvar = tf.reshape(x_logvar,[-1,X_dim]) #assert ph.shape(x_logvar)==(50,100) #print(ph.shape(x_logvar)) return x_mu, x_logvar # =============================== ELBO ==================================== def loss(X,x_sample,z_mu,z_logvar,aux,reuse=None, n_z_samples=1): with tf.name_scope("loss"): # E[log P(X|z)] # print(ph.shape(x_sample)) # print(ph.shape(X)) recon_loss = 0.5 * tf.reduce_sum(tf.square(x_sample-X),axis=1) #tf.cond(is_train,False): for i in range(n_z_samples-1): z_sample = sample_z(z_mu, z_logvar) x_mu, x_logvar = decode(z_sample,aux,reuse=reuse) x_sample = sample_z(x_mu, x_logvar) recon_loss += 0.5 * tf.reduce_sum(tf.square(X-x_sample),axis=1) # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, axis=1) # print(tf.shape(kl_loss)) # print(tf.shape(recon_loss)) # Regularisation cost reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # VAE loss ELBO = (recon_loss / n_z_samples) + kl_loss vae_loss = tf.add_n([tf.reduce_mean(ELBO)+ tf.reduce_sum(reg_variables)]) #batch_loss = tf.reduce_mean(ELBO) # summary with tf.name_scope("Summaries"): #tf.summary.scalar("ELBO",ELBO) tf.summary.scalar("Batch_loss",tf.reduce_mean(ELBO)) merger = tf.summary.merge_all() return vae_loss,ELBO,merger # =============================== TRAINING ==================================== # embed (encode) z_mu, z_logvar = encode(X) with tf.name_scope("latent"): z_sample = sample_z(z_mu, z_logvar) # generate (decode) x_mu, x_logvar = decode(z_sample, aux, reuse=None) # sample x x_sample = sample_z(x_mu,x_logvar) # loss vae_loss, ELBO, merger = loss(X,x_sample,z_mu,z_logvar,aux,reuse=True, n_z_samples=1) with tf.name_scope("optimiser"): # updater train_step = tf.train.AdamOptimizer(learning_rate=learning_rate) grads = train_step.compute_gradients(vae_loss) clipped_grads = [(tf.clip_by_value(grad, -2,2),var) for grad,var in grads] solver = train_step.apply_gradients(clipped_grads) bn_update = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # =============================== TESTING =================================== # with tf.name_scope("Testing"): z_mu_t, z_logvar_t = encode(X,reuse=True) z_sample_t = sample_z(z_mu_t, z_logvar_t) x_mu_t, x_logvar_t = decode(z_sample_t,aux, reuse=True) # sample x x_sample_t = sample_z(x_mu_t, x_logvar_t) # loss _,ELBO_t,_ = loss(X,x_sample_t,z_mu_t,z_logvar_t,aux,reuse=True, n_z_samples=100) # =============================== Session =============================== # if TRAIN_VAE: sum_writter = tf.summary.FileWriter(save_sum_dir,graph) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer(),{is_train: False}) if load_model: print("Loading saved model") if save_dir is None: raise ValueError("Filename and path not supplied! Aborting...") else: try: tf.train.Saver().restore(sess,save_dir+load_file_name) print("Done! Continuing training...\n") loaded = np.load(save_dir+file_name+'.npz') best_eval_loss = loaded['best_eval_loss'] except Exception: print("Could not find saved file. Training from scratch") # ========================= START TRAINING ========================== # # normalize data train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid,train_mean, train_std) print(" -----Training Started-----") for epoch in range(epochs): epoch_loss = 0 epoch_time = time.time() #x_epoch = tf.add(x_train,tf.random_normal(x_train.shape ,mean=0.0,stddev=0.05,dtype=tf.float64,seed=None,name=None)) x_epoch, l_x_train = ph.shuffle_data(x_train,l_norm_train) # For each epoch train with mini-batches of size (batch_size) for batch_num in range(train_size//batch_size): X_mb = x_epoch[batch_num*batch_size:(batch_num+1)*batch_size,] lx_mb = l_x_train[batch_num*batch_size:(batch_num+1)*batch_size,] #y_mb = y_train[batch_num*batch_size:(batch_num+1)*batch_size,] # train _, loss,_ = sess.run([solver, vae_loss,bn_update], feed_dict={X: X_mb, aux:lx_mb, is_train: True, drop_prob: drop_probs}) epoch_loss+=loss # print progress ph.progress(epoch,(epoch_loss/num_batches), 1,1,time.time()-epoch_time) training_loss.append(epoch_loss/num_batches) # validate if epoch >0 and epoch%5 ==0: vloss = 0 vloss, vaeloss = sess.run([vae_loss,merger], feed_dict={ X: x_valid, aux:l_norm_valid,is_train:False,drop_prob:drop_probs} ) sum_writter.add_summary(vaeloss, epoch) # print progress print('Validation_Training_Epoch: {}'.format(epoch)) print('Loss: {}'. format(vloss)) validation_loss.append(vloss) if vloss < best_eval_loss: # update best result and save checkpoint best_eval_loss = vloss saver.save(sess, save_dir+ file_name) save_epochs = epoch # early stopping condition if epoch - save_epochs >early_stopping : print("Early stopping condition reached. No progress for {} epochs".format(early_stopping)) break # write summary to npz file description = "Dataset: "+dataset+", model: "+run_comment+", h: "+str(h_dim)\ + ", z: "+str(z_dim)+", learning_rate: "+str(learning_rate)+ ", L2: "+str(l2_reg)\ + ", batch_size: " + str(batch_size)+", split: "+str(split_ratio)\ + ", epochs"+str(save_epochs) np.savez(save_dir+file_name,\ training_loss=training_loss,validation_loss=validation_loss,\ best_eval_loss = best_eval_loss, description=description) else: # load saved model saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) print("Loading saved model") if save_dir is None: raise ValueError("Filename and path not supplied! Aborting...") else: try: saver.restore(sess,save_dir+load_file_name) print("Done! \n") except Exception: print("Could not find saved file.") # normalize data if not TRAIN_VAE: train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid,train_mean, train_std) anom_valid = ph.p_normalize(anom_valid,train_mean,train_std) # calculate ELBO statistics on the validation set RECON_loss, KL_loss x_elbo = sess.run([ELBO_t], feed_dict={ X: x_valid, aux:l_norm_valid, is_train:False,drop_prob:drop_probs} ) x_elbo = x_elbo[0] ph.plot_hist(x_elbo, title='ELBO distribution\n validation set (normal data)', print_dir= save_print_dir, f_name="valid_normal", figsize=(8,5), dataset=dataset, plots=plots) # x_mean_elbo_valid = np.mean(x_elbo) x_std_elbo_valid = np.std(x_elbo) x_median_elbo_valid = np.median(x_elbo) # print stats ph.report_stats("\nValidation Statistics - Normal Data\n",x_elbo, verbose=verbose) # calculate ELBO statistics on the anomaly training set a_elbo = sess.run([ELBO_t], feed_dict={ X: anom_valid.reshape([-1,X_dim]), aux:l_anom_valid, is_train:False,drop_prob:drop_probs} ) a_elbo = a_elbo[0] ph.plot_hist(a_elbo,title='ELBO distribution\n validation set (anomaly data)', print_dir= save_print_dir,\ f_name="valid_anomaly", figsize=(8,5), dataset=dataset, plots=plots) a_mean_elbo_valid = np.mean(a_elbo) a_std_elbo_valid = np.std(a_elbo) a_median_elbo_valid = np.median(a_elbo) ph.report_stats("\nValidation Statistics - Anomaly Data\n",a_elbo, verbose=verbose) # set threshold value elbo_dict = {'n_mean':x_mean_elbo_valid, 'n_std':x_std_elbo_valid, 'a_mean':a_mean_elbo_valid, 'a_std':a_std_elbo_valid } if a_mean_elbo_valid > x_mean_elbo_valid: anomaly_threshold = ph.select_threshold(elbo_dict, criterion) else: print('Training error! Anomaly loss smaller than normal loss! Aborting...') anomaly_threshold = ph.select_threshold(elbo_dict, criterion) #=================== Evaluation on Test Set ==========================# # normalize data x_test = ph.p_normalize(x_test,train_mean,train_std) anom_test = ph.p_normalize(anom_test,train_mean,train_std) print(x_test.shape) print(anom_test.shape) if verbose: print("#=========================Test Set=============================#") print('Anomaly threshold: {}', anomaly_threshold) tn,fn,fp,tp = [np.zeros([output_samples]) for i in range(4)] y_pred_n = np.zeros([test_size,output_samples]) y_pred_a = np.zeros([anom_test_size, output_samples]) start_time = time.time() for i in range(output_samples): # evaluate ELBO on the normal test-set x_elbo = sess.run([ELBO_t], feed_dict={ X: x_test, aux:l_norm_test, is_train:False,drop_prob:drop_probs} ) x_elbo = x_elbo[0] # visualize normal test-set x_samples_normal = sess.run([x_sample_t], feed_dict={X: x_test, aux:l_norm_test, is_train:False,drop_prob:drop_probs}) # evaluate ELBO on the anomaly test-set a_elbo = sess.run([ELBO_t], feed_dict={ X: anom_test.reshape([-1,X_dim]), aux:l_anom_test, is_train:False,drop_prob:drop_probs} ) a_elbo = a_elbo[0] # visualize anomaly test-set x_samples_anomaly = sess.run([x_sample_t], feed_dict={ X: anom_test.reshape([-1,X_dim]), aux:l_anom_test, is_train:False,drop_prob:drop_probs} ) # save accuracy rates tn[i] = np.sum(x_elbo < anomaly_threshold) fp[i] = np.sum(x_elbo > anomaly_threshold) tp[i] = np.sum(a_elbo > anomaly_threshold) fn[i] = np.sum(a_elbo < anomaly_threshold) y_pred_n[:,i] = x_elbo > anomaly_threshold y_pred_a[:,i] = a_elbo > anomaly_threshold end_time = time.time() y_pred = np.concatenate((y_pred_n,y_pred_a),axis=0) y_true = np.concatenate((np.zeros([test_size]),np.ones([anom_test_size])), axis=0) # calculate and report total stats # scores precision = tp.mean() / (tp.mean() + fp.mean()) recall = tp.mean() / (tp.mean() + fn.mean()) f1 = 2 * precision * recall / (precision + recall) ph.report_stats("\nTest Statistics - Normal Data", x_elbo, verbose=verbose) ph.report_stats("\nTest Statistics - Anomaly Data", a_elbo, verbose=verbose) ph.scores_x(tp,fp,fn,tn, verbose=verbose) y_out = ph.uncertainty(y_true,y_pred, verbose=verbose) ph.plot_hist(x_elbo, title='ELBO distribution\n test set (normal data)', print_dir=save_print_dir, \ f_name="test_normal", figsize=(8, 5), dataset=dataset, plots=plots) ph.plot_hist(a_elbo, title='ELBO distribution\n test set (anomaly data)', print_dir=save_print_dir, \ f_name="test_anomaly", figsize=(8, 5), dataset=dataset, plots=plots) # Compute confusion matrix cnf_matrix = np.array([[int(tp.mean()),int(fn.mean())], [int(fp.mean()),int(tn.mean())]]) np.set_printoptions(precision=2) class_names = np.array(['Anomaly','Normal'],dtype='<U10') # Plot non-normalized confusion matrix if plots: plt.figure() ph.plot_confusion_matrix(cnf_matrix, classes=class_names, title=" Confusion matrix\n"+"Dataset: "+dataset+" - "+run_comment, plots=plots) plt.savefig(save_print_dir+dataset+"_split_"+str(split_ratio)+'conf_mat.png') plt.show() if verbose: print('total inference time for {} data points (x{} samples each):{:6.3}s '.format(y_true.shape[0],\ output_samples,(end_time-start_time))) return [tp.mean(), fp.mean(), tn.mean(), fn.mean()],x_samples_normal[0],x_samples_anomaly[0], x_test, anom_test
def net(TRAIN_VAE=True, load_model=True, real_data=True, bayesian_opt=True, anomaly_threshold=10, fp_ratio=1, fn_ratio=10, verbose=True, plots=True, batch_norm=False, dropout=True): # Set random generator seed for reproducibility np.random.seed(168) tf.set_random_seed(168) # Reset the default graph tf.reset_default_graph() ############################################################################### # ======================= file and directory names ========================== # ############################################################################### mode = "supervised" run_comment = mode + "_linVAElin_2" dataset = "Shuttle" save_dir = ("./" + mode + "_" + dataset + "/") save_sum_dir = save_dir + "logs/" save_print_dir = save_dir + "prints/" if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(save_sum_dir): os.makedirs(save_sum_dir) if not os.path.exists(save_print_dir): os.makedirs(save_print_dir) ############################################################################### ########## Set model parameters ########## ############################################################################### # RNN parameters RNN_size = 128 n_rnn_layers = 1 input_keep_prob = 1.0 output_keep_prob = 1.0 # hidden and latent space parameters z_dim = 10 h_dim = 1024 # running parameters (hyper-parameters epochs = 251 batch_size = 100 early_stopping = 40 learning_rate = 1e-5 l2_reg = 1e-2 drop_probs = 0.15 val_epochs = 1 anneal_rate = 5e-5 n_test_steps = 100 # sampling parameters & classification criterion num_z_samples = 100 output_samples = 1 kl_a = 1 # load data directory #directory1 = 'Data/' data_filename = 'dataset/' + dataset + "_normal_anomaly_seperate.npz" # directory1+'xy_o_30.npz' # run parameters split_ratio = 0.5 anom_split = str(np.round(1 - split_ratio, decimals=3)).replace(".", "_") file_name = run_comment + '_' + dataset + "_h_" + str(h_dim) + "_z_" + str( z_dim) load_file_name = file_name ############################################################################### # ========================= get data and variables ========================== # ############################################################################### # get data x_train, x_valid, x_test, anom_valid, anom_test = get_data( data_filename, split_ratio, real_data=real_data) # calculate sizes train_size, valid_size, test_size, anom_valid_size, anom_test_size, X_dim = \ x_train.shape[0], x_valid.shape[0],x_test.shape[0], anom_valid.shape[0], anom_test.shape[0], x_train.shape[1] # other num_batches = train_size // batch_size epoch = 0 save_epochs = 0 b_t_ratio = 2 #train_size//batch_size # Training losses containers best_eval_loss = np.inf training_loss = [] validation_loss = [] kl_training_loss = [] rc_training_loss = [] # initiallize regularizer and graph regularize = tf.contrib.layers.l2_regularizer(l2_reg, scope=None) initialize = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float32) graph = tf.Graph() # put placeholders on graph with graph.as_default(): # ========================= Placeholders ================================== with tf.name_scope("input"): X = tf.placeholder(tf.float32, shape=[None, X_dim], name="input_X") # y = tf.placeholder(tf.float32, shape=[None, y_dim],name="label") drop_prob = tf.placeholder(tf.float32, shape=(), name='dropout_prob') alpha_KL = tf.placeholder_with_default(input=1.0, shape=(), name='KL_annealing') rc_weight = tf.placeholder_with_default( input=1.0, shape=(), name='reconstruction_loss_weight') is_train = tf.placeholder_with_default(input=False, shape=(), name='train_test_state') l_rate = tf.placeholder_with_default(input=learning_rate, shape=(), name='var_learning_rate') with tf.name_scope("latent"): z = tf.placeholder(tf.float32, shape=[None, z_dim], name="latent_vars") # introduce convenience function for batch norm batch_norm_layer = partial(tf.layers.batch_normalization, training=is_train, momentum=0.95) # =============================== Q(z|X) ==================================== def encode(x, scope='encoder', reuse=False, drop_prob=drop_probs, is_train=is_train, batch_norm=batch_norm, dropout=dropout): ''' Discriminative model (decoder) Input: x : input data Returns: z_mu, Z_logvar : mean and standard deviation of z ''' with tf.variable_scope("encoder", reuse=reuse): inputs = x h = tf.layers.dense(inputs, h_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='e_hidden_1') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) z_mu = tf.layers.dense(h, z_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='z_mu') if batch_norm: z_mu = batch_norm_layer(z_mu) z_logvar = tf.layers.dense(h, z_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='z_logvar') if batch_norm: z_logvar = batch_norm_layer(z_logvar) return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps # =============================== P(X|z) ==================================== def decode(z, scope='decoder', reuse=False, drop_prob=drop_probs, is_train=is_train, batch_norm=batch_norm, dropout=dropout): ''' Generative model (decoder) Input: z : latent space data Returns: x : generated data ''' with tf.variable_scope("decoder", reuse=reuse): inputs = z # calculate hidden h = tf.layers.dense(inputs, h_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='d_hidden_1') if dropout: h = tf.layers.dropout(h, training=is_train, rate=drop_probs, seed=128) if batch_norm: h = batch_norm_layer(h) h = tf.nn.elu(h) # calculate the mean of the output (Gausian) x_mu = tf.layers.dense(h, X_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='x_mu') if batch_norm: x_mu = batch_norm_layer(x_mu) # calculate the std of the output (Gausian) x_logvar = tf.layers.dense(h, X_dim, activation=None, kernel_initializer=initialize, kernel_regularizer=regularize, name='x_logvar') if batch_norm: x_logvar = batch_norm_layer(x_logvar) return x_mu, x_logvar # =============================== ELBO ==================================== def loss(X, x_sample, z_mu, z_logvar, reuse=None, n_z_samples=1, alpha_KL=alpha_KL, rc_weight=rc_weight): with tf.name_scope("loss"): # E[log P(X|z)] recon_loss = tf.clip_by_value( 0.5 * tf.reduce_sum(tf.square(X - x_sample), axis=1), 1e-8, 1e8) # loop for number of MC samples for i in range(n_z_samples - 1): z_sample = sample_z(z_mu, z_logvar) x_mu, x_logvar = decode(z_sample, reuse=reuse) x_sample = sample_z(x_mu, x_logvar) recon_loss += tf.clip_by_value( 0.5 * tf.reduce_sum(tf.square(X - x_sample), axis=1), 1e-8, 1e8) # D_KL(Q(z|X) || P(z)); calculate in closed form as both dist. are Gaussian kl_loss = tf.clip_by_value( 0.5 * tf.reduce_sum( tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, axis=1), 1e-8, 1e8) # Regularisation cost reg_variables = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # VAE loss ELBO = rc_weight * (recon_loss / n_z_samples) + alpha_KL * kl_loss vae_loss = tf.add_n( [tf.reduce_mean(ELBO) + tf.reduce_sum(reg_variables)]) # summary with tf.name_scope("Summaries"): tf.summary.scalar("Batch_loss", tf.reduce_mean(ELBO)) merger = tf.summary.merge_all() return vae_loss, ELBO, merger, tf.reduce_mean( kl_loss), tf.reduce_mean(recon_loss) # =============================== TRAINING ==================================== # embed (encode) z_mu, z_logvar = encode(X) with tf.name_scope("latent"): z_sample = sample_z(z_mu, z_logvar) # generate (decode) x_mu, x_logvar = decode(z_sample, reuse=None) # sample x x_sample = sample_z(x_mu, x_logvar) # loss vae_loss, ELBO, merger, kl_loss, rec_loss = loss(X, x_sample, z_mu, z_logvar, reuse=True, n_z_samples=1, alpha_KL=alpha_KL, rc_weight=rc_weight) with tf.name_scope("optimiser"): # optimiser optimizer = tf.train.AdamOptimizer(learning_rate=l_rate) # collect batch norm losses if batch_norm: bn_update = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(bn_update): solver = optimizer.minimize(vae_loss) solver = optimizer.minimize(vae_loss) # =============================== TESTING =================================== # with tf.name_scope("Testing"): z_mu_t, z_logvar_t = encode(X, reuse=True) z_sample_t = sample_z(z_mu_t, z_logvar_t) x_mu_t, x_logvar_t = decode(z_sample_t, reuse=True) # sample x x_sample_t = sample_z(x_mu_t, x_logvar_t) # loss _,ELBO_t,_,_,_ = loss(X,x_sample_t,z_mu_t,z_logvar_t,reuse=True,\ n_z_samples=num_z_samples,alpha_KL=1.0) # =============================== Session =============================== # if TRAIN_VAE: sum_writter = tf.summary.FileWriter(save_sum_dir, graph) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) if load_model: print("Loading saved model") if save_dir is None: raise ValueError( "Filename and path not supplied! Aborting...") else: try: tf.train.Saver().restore(sess, save_dir + load_file_name) print("Done! Continuing training...\n") loaded = np.load(save_dir + file_name + '.npz') best_eval_loss = loaded['best_eval_loss'] except Exception: print( "Could not find saved file. Training from scratch") # ========================= START TRAINING ========================== # # normalize data train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid, train_mean, train_std) print("Ttraining set length:{}".format(x_train.shape[0])) print(" -----Training Started-----") count = 0 for epoch in range(epochs): epoch_loss = 0 r_loss = 0 kl = 0 epoch_time = time.time() x_train = ph.shuffle_x_data(x_train) # For each epoch train with mini-batches of size (batch_size) for batch_num in range(train_size // batch_size): # anneal KL cost (full cost after 50.000 batches) kl_a = 2 * (-.5 + 1 / (1 + np.exp(-count * anneal_rate))) #print("batch_number:{}".format(batch_num)) X_mb = x_train[batch_num * batch_size:(batch_num + 1) * batch_size, ] # train train_dict = { X: X_mb, is_train: True, drop_prob: drop_probs, alpha_KL: kl_a, rc_weight: b_t_ratio, l_rate: learning_rate } _, loss, k_, r_ = sess.run( [solver, vae_loss, kl_loss, rec_loss], feed_dict=train_dict) epoch_loss += loss kl += k_ r_loss += r_ count += 1 # print progress ph.progress(epoch,(epoch_loss/num_batches),(r_loss/num_batches),(kl/num_batches),\ time.time()-epoch_time) training_loss.append(epoch_loss / num_batches) rc_training_loss.append(r_loss / num_batches) kl_training_loss.append(kl / num_batches) # validate if epoch > 0 and epoch % val_epochs == 0: vloss = 0 valid_dict = { X: x_valid, is_train: False, drop_prob: 0.0, alpha_KL: 1.0 } vloss, vaeloss = sess.run([vae_loss, merger], feed_dict=valid_dict) sum_writter.add_summary(vaeloss, epoch) # print progress print('Validation_Training_Epoch: {}'.format(epoch)) print('Loss: {}'.format(vloss)) validation_loss.append(vloss) if vloss < best_eval_loss: # update best result and save checkpoint best_eval_loss = vloss saver.save(sess, save_dir + file_name) save_epochs = epoch # early stopping condition if epoch - save_epochs > early_stopping // 2: learning_rate /= 2 if epoch - save_epochs > early_stopping: print( "Early stopping condition reached. No progress for {} epochs" .format(early_stopping)) break # write summary to npz file description = "Dataset: "+dataset+", model: "+run_comment+", h: "+str(h_dim)\ + ", z: "+str(z_dim)+", learning_rate: "+str(learning_rate)+ ", L2: "+str(l2_reg)\ + ", batch_size: " + str(batch_size)+", split: "+str(split_ratio)\ + ", epochs"+str(save_epochs) np.savez(save_print_dir+file_name,\ training_loss=training_loss,validation_loss=validation_loss,\ best_eval_loss = best_eval_loss, description=description) ph.save2txt(save_print_dir, file_name, dataset, run_comment, h_dim, z_dim, num_z_samples, learning_rate, batch_size, drop_probs, l2_reg, save_epochs, early_stopping, X_dim, train_size, valid_size, test_size, 0, anom_valid_size, anom_test_size, save_dir) # print training curves plt.figure() tl = np.array(rc_training_loss) + np.array(kl_training_loss) plt.plot(tl, 'b', label='training loss') plt.plot(rc_training_loss, 'm', label='reconstruction loss') plt.plot(validation_loss, 'r', label='validation loss') plt.plot(kl_training_loss, 'g', label='KL loss') plt.title('Training Curves\nDataset:{}, Method:{}'.format( dataset, mode)) plt.xlabel('Training epoch') plt.ylabel('Loss') plt.legend(loc="upper right") plt.show() else: # load saved model saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) print("Loading saved model") if save_dir is None: raise ValueError("Filename and path not supplied! Aborting...") else: try: saver.restore(sess, save_dir + load_file_name) print("Done! \n") except Exception: print("Could not find saved file.") # normalize data if not TRAIN_VAE: train_mean, train_std, x_train = ph.normalize(x_train) x_valid = ph.p_normalize(x_valid, train_mean, train_std) anom_valid = ph.p_normalize(anom_valid, train_mean, train_std) ############################################################################## #===================== Threshold Selection Routine ======================# ############################################################################## # break the validation set evaluation into 'n_test_steps' steps to avoid memory overflow normal_valid_size = x_valid.shape[0] normal_elbo = np.zeros([normal_valid_size, 1]) anomaly_elbo = np.zeros([anom_valid_size, 1]) # evaluate ELBO on the normal validation-set for j in range(n_test_steps - 1): start = j * (normal_valid_size // n_test_steps) stop = (j + 1) * (normal_valid_size // n_test_steps) normal_valid_dict = { X: x_valid[start:stop], is_train: False, drop_prob: 0.0 } x_elbo_v = sess.run([ELBO_t], feed_dict=normal_valid_dict) normal_elbo[start:stop, 0] = x_elbo_v[0] # compute the last slice separately since it might have more points normal_valid_dict = { X: x_valid[stop:], is_train: False, drop_prob: 0.0 } x_elbo_v = sess.run([ELBO_t], feed_dict=normal_valid_dict) normal_elbo[stop:, 0] = x_elbo_v[0] normal_elbo = np.clip(normal_elbo, None, 1e3) # evaluate ELBO on the anomaly valildation-set for j in range(n_test_steps - 1): start = j * (anom_valid_size // n_test_steps) stop = (j + 1) * (anom_valid_size // n_test_steps) anomalous_valid_dict = { X: anom_valid.reshape([-1, X_dim])[start:stop], is_train: False, drop_prob: 0.0 } a_elbo_v = sess.run([ELBO_t], feed_dict=anomalous_valid_dict) anomaly_elbo[start:stop, 0] = a_elbo_v[0] # compute the last slice separately since it might have more points anomalous_valid_dict = { X: anom_valid.reshape([-1, X_dim])[stop:], is_train: False, drop_prob: 0.0 } a_elbo_v = sess.run([ELBO_t], feed_dict=anomalous_valid_dict) anomaly_elbo[stop:, 0] = a_elbo_v[0] anomaly_elbo = np.clip(anomaly_elbo, None, 1e3) # send the data to the scoring function and call the bayesian opt routine # and collect the results if bayesian_opt: # anomaly_threshold = 150 anomaly_threshold, bo_results = threshold_selector(normal_elbo,anomaly_elbo,\ fp_ratio=fp_ratio,fn_ratio=fn_ratio) plot_bayes_opt(bo_results, fn_ratio, 'figure 3', save_print_dir, dataset, plots=plots) elbo_hist(normal_elbo, anomaly_elbo, anomaly_threshold, 'Validation Set',\ 'figure 4', save_print_dir, dataset, plots=plots) #=================== Evaluation on Test Set ==========================# # normalize data x_test = ph.p_normalize(x_test, train_mean, train_std) anom_test = ph.p_normalize(anom_test, train_mean, train_std) #anomalous_test_dict = { X: anom_test.reshape([-1,X_dim]), is_train:False,drop_prob:0.0} if verbose: print( "#=========================Test Set=============================#" ) print('Anomaly threshold: {}', anomaly_threshold) start_time = time.time() t_normal_elbo = np.zeros([test_size, 1]) t_anomaly_elbo = np.zeros([anom_test_size, 1]) # evaluate ELBO on the normal validation-set for j in range(n_test_steps - 1): start = j * (test_size // n_test_steps) stop = (j + 1) * (test_size // n_test_steps) normal_test_dict = { X: x_test[start:stop], is_train: False, drop_prob: 0.0 } x_elbo_t = sess.run([ELBO_t], feed_dict=normal_test_dict) t_normal_elbo[start:stop, 0] = x_elbo_t[0] # compute the last slice separately since it might have more points normal_test_dict = {X: x_test[stop:], is_train: False, drop_prob: 0.0} x_elbo_t = sess.run([ELBO_t], feed_dict=normal_test_dict) t_normal_elbo[stop:, 0] = x_elbo_t[0] t_normal_elbo = np.clip(t_normal_elbo, None, 1e3) start = stop = 0 # evaluate ELBO on the anomaly test-set for j in range(n_test_steps - 1): start = j * (anom_test_size // n_test_steps) stop = (j + 1) * (anom_test_size // n_test_steps) anomalous_test_dict = { X: anom_test[start:stop], is_train: False, drop_prob: 0.0 } a_elbo_t = sess.run([ELBO_t], feed_dict=anomalous_test_dict) t_anomaly_elbo[start:stop, 0] = a_elbo_t[0] # compute the last slice separately since it might have more points anomalous_test_dict = { X: anom_test[stop:], is_train: False, drop_prob: 0.0 } a_elbo_t = sess.run([ELBO_t], feed_dict=anomalous_test_dict) t_anomaly_elbo[stop:, 0] = a_elbo_t[0] t_anomaly_elbo = np.clip(t_anomaly_elbo, None, 1e3) # save accuracy rates tn = np.sum(t_normal_elbo < anomaly_threshold) fp = np.sum(t_normal_elbo > anomaly_threshold) tp = np.sum(t_anomaly_elbo > anomaly_threshold) fn = np.sum(t_anomaly_elbo < anomaly_threshold) y_pred_n = t_normal_elbo > anomaly_threshold y_pred_a = t_anomaly_elbo > anomaly_threshold end_time = time.time() y_pred = np.concatenate((y_pred_n, y_pred_a), axis=0) y_true = np.concatenate( (np.zeros([test_size]), np.ones([anom_test_size])), axis=0) # calculate the AUC-score t_elbo = np.concatenate((t_normal_elbo, t_anomaly_elbo), axis=0) # calculate and report total stats # scores precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * precision * recall / (precision + recall) f01 = (1 + (1 / fn_ratio)**2) * precision * recall / ( (1 / fn_ratio**2) * precision + recall) auc, thresh = ph.auc_plot(t_elbo, y_true, anomaly_threshold, f01) print("AUC:", auc) ph.report_stats("\nTest Statistics - Normal Data", t_normal_elbo, verbose=verbose) ph.report_stats("\nTest Statistics - Anomaly Data", t_anomaly_elbo, verbose=verbose) ph.scores_x(tp, fp, fn, tn, verbose=verbose) elbo_hist(t_normal_elbo, t_anomaly_elbo, anomaly_threshold, 'Test Set', 'figure 5', save_print_dir, dataset, plots=plots) # ph.plot_2hist(t_normal_elbo,t_anomaly_elbo, save_print_dir, dataset, title='Normal', f_name="" ,figsize=(5, 5), plots=plots) #ph.plot_hist(t_anomaly_elbo, save_print_dir, dataset, title='Anomaly', f_name="" ,figsize=(5, 5), plots=plots) # Compute confusion matrix cnf_matrix = np.array([[int(tp), int(fn)], [int(fp), int(tn)]]) np.set_printoptions(precision=2) class_names = np.array(['Anomaly', 'Normal'], dtype='<U10') # Plot non-normalized confusion matrix if plots: ph.plot_confusion_matrix(cnf_matrix, classes=class_names, title=" Confusion matrix\n" + "Dataset: " + dataset + " - " + mode, plots=plots) plt.savefig(save_print_dir + dataset + "_threshold_" + str(round(anomaly_threshold, 2)) + '_conf_mat.png') plt.show() if verbose: print('total inference time for {} data points (x{} samples each):{:6.3}s '.format(y_true.shape[0],\ output_samples,(end_time-start_time))) # save all elbo results to a file for post-processing np.savez(save_dir + file_name + 'res', descr=run_comment, val_norm_elbo=normal_elbo, val_anom_elbo=anomaly_elbo, x_val=x_valid, tst_norm_elbo=t_normal_elbo, tst_anom_elbo=t_anomaly_elbo, x_tst=x_test) ph.saveres2txt(save_print_dir, file_name, dataset, round(anomaly_threshold, 2), tp, fp, tn, fn, f1, auc, f01, precision, recall) # return statements if bayesian_opt: return [ tp, fp, tn, fn ], bo_results, normal_elbo, anomaly_elbo, t_normal_elbo, t_anomaly_elbo, save_dir + file_name + 'res' else: return [ tp, fp, tn, fn ], {}, normal_elbo, anomaly_elbo, t_normal_elbo, t_anomaly_elbo, save_dir + file_name + 'res'
], {}, valid_elbo, t_elbo, anomaly_threshold, y_valid, y_test, save_dir + file_name + 'res' if __name__ == "__main__": conf_matrix,bo_results,valid_elbo,test_elbo,elbo_threshold,y_valid,y_test,res_np_file = \ net(TRAIN_VAE =False, load_model =True, real_data= True, fp_ratio=1,fn_ratio=10,anom_rate = 0.0078, \ bayesian_opt=False, anomaly_threshold=50, batch_norm=False, dropout=True,verbose=True, plots=True) dataset = 'http' # produce validation plot ph.post_plot_unsuper2(valid_elbo, elbo_threshold) # post processing # get ratios for different values of anomaly rates precision, recall, f1, f01, tn, fn, tp, fp, ratios = ph.unsupervised_elbo_analysis( valid_elbo, test_elbo, y_test, fn_ratio=10) print(precision, recall, f1, f01) print(tp, fp, tn, fn) # get ratios by selecting the cluster of anomalies precision, recall, f1, f01, tn, fn, tp, fp = ph.select_elbo_region( valid_elbo, test_elbo, y_test, 40, 60, fn_ratio=10) cnf_matrix = np.array([[int(tp), int(fn)], [int(fp), int(tn)]]) np.set_printoptions(precision=2) class_names = np.array(['Anomaly', 'Normal'], dtype='<U10') # Plot non-normalized confusion matrix ph.plot_confusion_matrix(cnf_matrix, classes=class_names, title=" Confusion matrix\n" + "Dataset: " + dataset + " - Selecting clustered anomalies", plots=True) # plot elbos and zoom around cluster of anomalies start, stop, threshold = 40, 60, 30 ph.post_plot_unsuper(valid_elbo, test_elbo, threshold, start, stop) ph.auc_plot(test_elbo, y_test, threshold, f01[0])