prediction = tf.tensordot( tf.reshape(outputs, shape=(batch_size, number_of_lstm_units)), weights_dense, 2) + bias_dense # loss evaluation loss = tf.nn.l2_loss(target - prediction) # optimization algorithm optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename='../data/sin.csv', window=sequence_len, stride=stride, mode='validation', non_train_percentage=.5, val_rel_percentage=.5, normalize='maxmin-11', time_difference=True, td_method=None) # suppress second axis on Y values (the algorithms expects shapes like (n,) for the prediction) y_train = y_train[:, 0] y_valid = y_valid[:, 0] y_test = y_test[:, 0] # if the dimensions mismatch (somehow, due tu bugs in generate_batches function, # make them match) mismatch = False if len(x_train) > len(y_train):
l2_regularizer, nn_params) regularized_elbo = -elbo + l1_regularization_penalty + l2_regularization_penalty optimizer_elbo = tf.train.AdamOptimizer(learning_rate_elbo).minimize( regularized_elbo) if random_stride == False: # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename=data_path, window=sequence_len, stride=stride, mode='strided-validation', non_train_percentage=.5, val_rel_percentage=.6, normalize=normalization, time_difference=False, td_method=None, subsampling=subsampling, rounding=rounding) # suppress second axis on Y values (the algorithms expects shapes like (n,) for the prediction) y_train = y_train[:, 0] y_valid = y_valid[:, 0] y_test = y_test[:, 0] if len(x_train) > len(y_train): x_train = x_train[:len(y_train)]
prior = tf.contrib.distributions.MultivariateNormalDiag(tf.constant(np.zeros(vae_hidden_size, dtype='float32')), tf.constant(np.ones(vae_hidden_size, dtype='float32'))) divergence = tf.contrib.distributions.kl_divergence(vae_hidden_distr, prior) elbo = tf.reduce_mean(likelihood - divergence) optimizer_elbo = tf.train.AdamOptimizer(learning_rate_elbo).minimize(elbo) # # extract clusters information from clean data x_train_tmp, y_train_tmp, x_valid_tmp, y_valid_tmp, x_test_tmp, y_test_tmp = utils.generate_batches( filename='../data/power_consumption.csv', window=sequence_len, stride=stride, mode='validation', non_train_percentage=.3, val_rel_percentage=.8, normalize='maxmin01', time_difference=False, td_method=None) # cluster info relative to signal's value (cluster's means) clusters_info = clst.k_means(x_train_tmp, n_clusters) # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename='../data/power_consumption.csv', window=sequence_len, stride=stride, mode='validation', non_train_percentage=.3,
def cnn_lstm( filename, sequence_len, stride, batch_size, cnn_kernels, # (kernel_size, stride, number_of_filters) cnn_activations, # tf activations (list) cnn_pooling, lstm_params, lstm_activation, # tf activations (list) dense_activation, # tf activation l_rate, non_train_percentage, training_epochs, l_rate_test, val_rel_percentage, normalize, time_difference, td_method, stop_on_growing_error=False, stop_valid_percentage=1., auxiliary_loss=None, l_rate_auxiliary=1e-3): # training settings epochs = 250 stop_on_growing_error = True # early-stopping enabler stop_valid_percentage = 1. # percentage of validation used for early-stopping # reset computational graph tf.reset_default_graph() # define input/output pairs input_ = tf.placeholder( tf.float32, [None, sequence_len, batch_size]) # (batch, input, time) target = tf.placeholder(tf.float32, [None, batch_size]) # (batch, output) weights_conv = bias_conv = list() for i in range(len(cnn_kernels)): weights_conv.append( tf.Variable( tf.truncated_normal( shape=[cnn_kernels[i][0], cnn_kernels[i][2], 1])) for _ in range(batch_size)) bias_conv[i] = tf.Variable(tf.zeros(shape=[batch_size])) input_stacked = list(input_) for j in range(len(cnn_kernels)): # stack one input for each battery of filters input_stacked.append( tf.stack([input_stacked[j]] * cnn_kernels[j][2], axis=3)) layer_conv = [ tf.nn.conv1d(input_stacked[j][:, :, i, :], filters=weights_conv[j], stride=cnn_activations[1], padding='SAME') for i in range(batch_size) ] # squeeze and stack the input of the lstm layer_conv = tf.squeeze(tf.stack([l for l in layer_conv], axis=-2), axis=-1) layer_conv = tf.add(layer_conv, bias_conv[j]) # non-linear activation before lstm feeding layer_conv = cnn_activations[j](layer_conv) # reshape the output so it can be feeded to the lstm (batch, time, input) number_of_lstm_inputs = layer_conv.get_shape().as_list()[1] layer_conv_flatten = tf.reshape(layer_conv, (-1, batch_size, number_of_lstm_inputs)) # define the LSTM cells cells = [ tf.contrib.rnn.LSTMCell( lstm_params[i], forget_bias=1., state_is_tuple=True, activation=lstm_activation[i], initializer=tf.contrib.layers.xavier_initializer()) for i in range(len(lstm_params)) ] multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells) outputs, _ = tf.nn.dynamic_rnn(multi_rnn_cell, layer_conv_flatten, dtype="float32") # final dense layer: declare variable shapes: weights and bias weights_dense = tf.get_variable( 'weights', shape=[lstm_params[-1], batch_size, batch_size], initializer=tf.truncated_normal_initializer()) bias_dense = tf.get_variable('bias', shape=[1, batch_size], initializer=tf.truncated_normal_initializer()) # dense layer: prediction y_hat = tf.tensordot( tf.reshape(outputs, shape=(batch_size, lstm_params[-1])), weights_dense, 2) + bias_dense # activation of the last, dense layer y_hat = dense_activation(y_hat) # estimate error as the difference between prediction and target error = target - y_hat # calculate loss loss = tf.nn.l2_loss(error) # optimization opt = tf.train.GradientDescentOptimizer( learning_rate=l_rate).minimize(loss) # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename=filename, window=sequence_len, stride=stride, mode='validation', non_train_percentage=.5, val_rel_percentage=.5, normalize=normalize, time_differene=time_difference, td_method=td_method) # suppress second axis on Y values (the algorithms expects shapes like (n,) for the prediction) y_train = y_train[:, 0] y_valid = y_valid[:, 0] y_test = y_test[:, 0] # if the dimensions mismatch (somehow, due tu bugs in generate_batches function, # make them match) mismatch = False if len(x_train) > len(y_train): x_train = x_train[:len(y_train)] mismatch = True if len(x_valid) > len(y_valid): x_valid = x_valid[:len(y_valid)] mismatch = True if len(x_test) > len(y_test): x_test = x_test[:len(y_test)] mismatch = True if mismatch is True: print( "Mismatched dimensions due to generate batches: this will be corrected automatically." ) print("Datasets shapes: ", x_train.shape, y_train.shape, x_valid.shape, y_valid.shape, x_test.shape, y_test.shape) # train the model init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) # train last_error_on_valid = np.inf current_error_on_valid = .0 e = 0 while e < epochs: print("epoch", e + 1) iter_ = 0 while iter_ < int(np.floor(x_train.shape[0] / batch_size)): batch_x = x_train[iter_ * batch_size:(iter_ + 1) * batch_size, :].T.reshape( 1, sequence_len, batch_size) batch_y = y_train[np.newaxis, iter_ * batch_size:(iter_ + 1) * batch_size] sess.run(opt, feed_dict={input_: batch_x, target: batch_y}) iter_ += 1 if stop_on_growing_error: current_error_on_valid = .0 # verificate stop condition iter_val_ = 0 while iter_val_ < int(stop_valid_percentage * np.floor(x_valid.shape[0] / batch_size)): batch_x_val = x_valid[iter_val_ * batch_size:(iter_val_ + 1) * batch_size, :].T.reshape( 1, sequence_len, batch_size) batch_y_val = y_valid[np.newaxis, iter_val_ * batch_size:(iter_val_ + 1) * batch_size] # accumulate error current_error_on_valid += np.abs( np.sum( sess.run(error, feed_dict={ input_: batch_x_val, target: batch_y_val }))) iter_val_ += 1 print("Past error on valid: ", last_error_on_valid) print("Current total error on valid: ", current_error_on_valid) if current_error_on_valid > last_error_on_valid: print("Stop learning at epoch ", e, " out of ", epochs) e = epochs last_error_on_valid = current_error_on_valid e += 1 # validation errors_valid = np.zeros(shape=(len(x_valid), batch_size)) iter_ = 0 while iter_ < int(np.floor(x_valid.shape[0] / batch_size)): batch_x = x_valid[iter_ * batch_size:(iter_ + 1) * batch_size, :].T.reshape(1, sequence_len, batch_size) batch_y = y_valid[np.newaxis, iter_ * batch_size:(iter_ + 1) * batch_size] errors_valid[iter_] = sess.run(error, feed_dict={ input_: batch_x, target: batch_y }) iter_ += 1 ########################################################################### # TEST WITH DYNAMIC ERROR'S FUNCTION FITTING ########################################################################### # estimate mean and deviation of the errors' vector # since we have a batch size that may be different from 1 and we consider # the error of each last batch_y, we need to cut off the zero values n_mixtures = 1 iter_ = 0 while iter_ < int(np.floor(x_test.shape[0] / batch_size)): batch_x = x_test[iter_ * batch_size:(iter_ + 1) * batch_size, :].T.reshape(1, sequence_len, batch_size) batch_y = y_test[np.newaxis, iter_ * batch_size:(iter_ + 1) * batch_size] predictions[iter_] = sess.run(prediction, feed_dict={ input_: batch_x, target: batch_y }).flatten() errors_test[iter_] = batch_y - predictions[iter_] for i in range(batch_size): # evaluate Pr(Z=1|X) for each gaussian distribution num = np.array([ w * scistats.norm.pdf( predictions[iter_, i] - batch_y[:, i], mean, std) for (mean, std, w) in zip(means_valid, stds_valid, weights_valid) ]) den = np.sum(num) index = np.argmax(num / den) gaussian_error_statistics[iter_, i] = scistats.norm.pdf( predictions[iter_, i] - batch_y[:, i], means_valid[index], stds_valid[index]) anomalies[iter_, i] = (True if (gaussian_error_statistics[iter_, i] < threshold[index]) else False) iter_ += 1 anomalies = np.argwhere(anomalies.flatten() == True) errors_test = errors_test.flatten() predictions = predictions.flatten() # plot results fig, ax1 = plt.subplots() # plot data series ax1.plot(y_test[:int(np.floor(x_test.shape[0] / batch_size)) * batch_size], 'b', label='index') ax1.set_xlabel('Time') ax1.set_ylabel('Index Value') # plot predictions ax1.plot(predictions[:int(np.floor(x_test.shape[0] / batch_size)) * batch_size], 'r', label='prediction') ax1.set_ylabel('Prediction') plt.legend(loc='best') # highlights anomalies for i in anomalies: if i <= len(y_test): plt.axvspan(i, i + 1, color='yellow', alpha=0.5, lw=0) fig.tight_layout() plt.show() print("Total test error:", np.sum(np.abs(errors_test))) # plot reconstructed signal tot_y = 0. tot_y_hat = 0. recovered_plot_y = np.zeros( shape=len(predictions[:int(np.floor(x_test.shape[0] / batch_size)) * batch_size]) + 1) recovered_plot_y_hat = np.zeros( shape=len(predictions[:int(np.floor(x_test.shape[0] / batch_size)) * batch_size]) + 1) for i in range(1, len(recovered_plot_y)): recovered_plot_y[i] = tot_y recovered_plot_y_hat[i] = tot_y_hat tot_y += y_test[i - 1] tot_y_hat += predictions[i - 1] fig, ax1 = plt.subplots() # plot data series print("\nReconstruction:") ax1.plot(recovered_plot_y, 'b', label='index') ax1.set_xlabel('RECONSTRUCTION: Date') ax1.set_ylabel('Space Shuttle') # plot predictions ax1.plot(recovered_plot_y_hat, 'r', label='prediction') ax1.set_ylabel('RECONSTRUCTION: Prediction') plt.legend(loc='best') fig.tight_layout() plt.show() # errors on test print("\nTest errors:") plt.hist(np.array(errors_test).ravel(), bins=30) # performances target_anomalies = np.zeros( shape=int(np.floor(x_test.shape[0] / batch_size)) * batch_size) # caveat: define the anomalies based on absolute position in test set (i.e. size matters!) # train 50%, validation_relative 50% target_anomalies[520:540] = 1 # real values condition_positive = np.argwhere(target_anomalies == 1) condition_negative = np.argwhere(target_anomalies == 0) # predictions predicted_positive = anomalies predicted_negative = np.setdiff1d(np.array( [i for i in range(len(target_anomalies))]), predicted_positive, assume_unique=True) # precision precision = len(np.intersect1d( condition_positive, predicted_positive)) / len(predicted_positive) # fall-out fall_out = len(np.intersect1d( predicted_positive, condition_negative)) / len(condition_negative) # recall recall = len(np.intersect1d(condition_positive, predicted_positive)) / len(condition_positive) print("Anomalies: ", condition_positive.T) print("Anomalies Detected: ", predicted_positive.T) print("Precision: ", precision) print("Fallout: ", fall_out) print("Recall: ", recall) # top-n distributions that fit the test errors. top_n = 10 cols = [ col for col in bfd.best_fit_distribution(np.array(errors_test).ravel(), top_n=top_n) ] top_n_distr = pd.DataFrame(cols, index=['NAME', 'PARAMS', 'ERRORS']) print("\n\nTop distributions: NAME ERRORS PARAM ", top_n_distr) file_ptr = np.loadtxt('../__tmp/__tmp_res.csv', dtype=object) for i in range(top_n): file_ptr = np.append(file_ptr, top_n_distr[i]['NAME']) np.savetxt('../__tmp/__tmp_res.csv', file_ptr, fmt='%s') # save sMAPE of each model sMAPE_error_len = len(np.array(errors_test).ravel()) sMAPE_den = np.abs( np.array(predictions).ravel()[:sMAPE_error_len]) + np.abs( np.array(y_test).ravel()[:sMAPE_error_len]) perc_error = np.mean( 200 * (np.abs(np.array(errors_test).ravel()[:sMAPE_error_len])) / sMAPE_den) print("Percentage error: ", perc_error) file_ptr = np.loadtxt('../__tmp/__tmp_err.csv', dtype=object) file_ptr = np.append(file_ptr, str(perc_error)) np.savetxt('../__tmp/__tmp_err.csv', file_ptr, fmt='%s')
def vae(filename, sequence_len, stride, batch_size, cnn_sizes, cnn_activations, cnn_pooling, cnn_l_rate, # used with "auxiliary" loss global_features, vae_hidden_size, tstud_degrees_of_freedom, l_rate, non_train_percentage, training_epochs, l_rate_test, val_rel_percentage, normalize, time_difference, td_method, stop_on_growing_error=False, stop_valid_percentage=1.): # reset computational graph tf.reset_default_graph() # data parameters batch_size = 1 sequence_len = 35 stride = 10 # training epochs epochs = 100 # define VAE parameters learning_rate_elbo = 5e-2 vae_hidden_size = 4 tstud_degrees_of_freedom = 3. sigma_threshold_elbo = 1e-3 # number of sampling per iteration samples_per_iter = 1 # early-stopping parameters stop_on_growing_error = True # early-stopping enabler stop_valid_percentage = .5 # percentage of validation used for early-stopping min_loss_improvment = .005 # percentage of minimum loss' decrease (.01 is 1%) # define input/output pairs input_ = tf.placeholder(tf.float32, [None, sequence_len, batch_size]) # (batch, input, time) target = tf.placeholder(tf.float32, [None, batch_size]) # (batch, output) # parameters' initialization vae_encoder_shape_weights = [batch_size*sequence_len, vae_hidden_size*2] vae_decoder_shape_weights = [vae_hidden_size, batch_size*sequence_len] zip_weights_encoder = zip(vae_encoder_shape_weights[:-1], vae_encoder_shape_weights[1:]) weights_vae_encoder = [tf.Variable(tf.truncated_normal(shape=[shape, next_shape])) for (shape, next_shape) in zip_weights_encoder] bias_vae_encoder = [tf.Variable(tf.truncated_normal(shape=[shape])) for shape in vae_encoder_shape_weights[1:]] zip_weights_decoder = zip(vae_decoder_shape_weights[:-1], vae_decoder_shape_weights[1:]) weights_vae_decoder = [tf.Variable(tf.truncated_normal(shape=[shape, next_shape])) for (shape, next_shape) in zip_weights_decoder] bias_vae_decoder = [tf.Variable(tf.truncated_normal(shape=[shape])) for shape in vae_decoder_shape_weights[1:]] # # VAE graph's definition flattened_input = tf.layers.flatten(input_) vae_encoder = tf.matmul(flattened_input, weights_vae_encoder[0]) + bias_vae_encoder[0] for (w_vae, b_vae) in zip(weights_vae_encoder[1:], bias_vae_encoder[1:]): vae_encoder = tf.nn.leaky_relu(vae_encoder) vae_encoder = tf.matmul(vae_encoder, w_vae) + b_vae # means and variances' vectors of the learnt hidden distribution # we assume the hidden gaussian's variances matrix is diagonal loc = tf.slice(tf.nn.relu(vae_encoder), [0, 0], [-1, vae_hidden_size]) loc = tf.squeeze(loc, axis=0) scale = tf.slice(tf.nn.softplus(vae_encoder), [0, vae_hidden_size], [-1, vae_hidden_size]) scale = tf.squeeze(scale, 0) vae_hidden_distr = tf.contrib.distributions.MultivariateNormalDiag(loc, scale) vae_hidden_state = tf.reduce_mean([vae_hidden_distr.sample() for _ in range(samples_per_iter)], axis=0) # probability of the *single* sample (no multisampling) --> used in test phase vae_hidden_pdf = vae_hidden_distr.prob(vae_hidden_distr.sample()) feed_decoder = tf.reshape(vae_hidden_state, shape=(-1, vae_hidden_size)) vae_decoder = tf.matmul(feed_decoder, weights_vae_decoder[0]) + bias_vae_decoder[0] vae_decoder = tf.nn.leaky_relu(vae_decoder) for (w_vae, b_vae) in zip(weights_vae_decoder[1:], bias_vae_decoder[1:]): vae_decoder = tf.matmul(vae_decoder, w_vae) + b_vae vae_decoder = tf.nn.leaky_relu(vae_decoder) # time-series reconstruction and ELBO loss vae_reconstruction = tf.contrib.distributions.StudentT(tstud_degrees_of_freedom, tf.constant(np.zeros(batch_size*sequence_len, dtype='float32')), tf.constant(np.ones(batch_size*sequence_len, dtype='float32'))) likelihood = vae_reconstruction.log_prob(flattened_input) prior = tf.contrib.distributions.MultivariateNormalDiag(tf.constant(np.zeros(vae_hidden_size, dtype='float32')), tf.constant(np.ones(vae_hidden_size, dtype='float32'))) divergence = tf.contrib.distributions.kl_divergence(vae_hidden_distr, prior) elbo = tf.reduce_mean(likelihood - divergence) optimizer_elbo = tf.train.AdamOptimizer(learning_rate_elbo).minimize(-elbo) # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename='../data/sin.csv', window=sequence_len, stride=stride, mode='validation', non_train_percentage=.5, val_rel_percentage=.5, normalize='maxmin-11', time_difference=True, td_method=None) # suppress second axis on Y values (the algorithms expects shapes like (n,) for the prediction) y_train = y_train[:,0]; y_valid = y_valid[:,0]; y_test = y_test[:,0] # if the dimensions mismatch (somehow, due tu bugs in generate_batches function, # make them match) mismatch = False if len(x_train) > len(y_train): x_train = x_train[:len(y_train)] mismatch = True if len(x_valid) > len(y_valid): x_valid = x_valid[:len(y_valid)] mismatch = True if len(x_test) > len(y_test): x_test = x_test[:len(y_test)] mismatch = True if mismatch is True: print("Mismatched dimensions due to generate batches: this will be corrected automatically.") print("Datasets shapes: ", x_train.shape, y_train.shape, x_valid.shape, y_valid.shape, x_test.shape, y_test.shape) # train + early-stopping init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) # train last_error_on_valid = np.inf current_error_on_valid = .0 e = 0 while e < epochs: print("epoch", e+1) iter_ = 0 while iter_ < int(np.floor(x_train.shape[0] / batch_size)): batch_x = x_train[iter_*batch_size: (iter_+1)*batch_size, :].T.reshape(1, sequence_len, batch_size) # run VAE encoding-decoding sess.run(optimizer_elbo, feed_dict={input_: batch_x}) iter_ += 1 if stop_on_growing_error: current_error_on_valid = .0 # verificate stop condition iter_val_ = 0 while iter_val_ < int(stop_valid_percentage * np.floor(x_valid.shape[0] / batch_size)): batch_x_val = x_valid[iter_val_*batch_size: (iter_val_+1)*batch_size, :].T.reshape(1, sequence_len, batch_size) batch_y_val = y_valid[np.newaxis, iter_val_*batch_size: (iter_val_+1)*batch_size] # accumulate error current_error_on_valid += np.abs(np.sum(sess.run(-elbo, feed_dict={input_: batch_x_val, target: batch_y_val}))) iter_val_ += 1 # stop learning if the loss reduction is below 1% (current_loss/past_loss) if current_error_on_valid > last_error_on_valid or (np.abs(current_error_on_valid/last_error_on_valid) > 1-min_loss_improvment and e!=0): if current_error_on_valid > last_error_on_valid: print("Loss function has increased wrt to past iteration.") else: print("Loss' decrement is below 1% (relative).") print("Stop learning at epoch ", e, " out of ", epochs) e = epochs last_error_on_valid = current_error_on_valid e += 1 # test y_test = y_test[:x_test.shape[0]] mean_elbo = .0 std_elbo = 1. vae_anomalies = np.zeros(shape=(int(np.floor(x_test.shape[0] / batch_size)))) threshold_elbo = scistats.t.pdf(mean_elbo-sigma_threshold_elbo, df=tstud_degrees_of_freedom, loc=mean_elbo, scale=std_elbo) iter_ = 0 while iter_ < int(np.floor(x_test.shape[0] / batch_size)): batch_x = x_test[iter_*batch_size: (iter_+1)*batch_size, :].T.reshape(1, sequence_len, batch_size) # get probability of the encoding vae_anomalies[iter_] = sess.run(vae_hidden_pdf, feed_dict={input_: batch_x}) iter_ += 1 # plot vae likelihood values fig, ax1 = plt.subplots() ax1.plot(vae_anomalies, 'b', label='likelihood') ax1.set_ylabel('VAE: Anomalies likelihood') plt.legend(loc='best') # highlights elbo's boundary ax1.plot(np.array([threshold_elbo for _ in range(len(vae_anomalies))]), 'r', label='threshold') plt.legend(loc='best') fig.tight_layout() plt.show() # plot the graph fig, ax1 = plt.subplots() ax1.plot(y_test, 'b', label='test set') ax1.set_ylabel('time') plt.legend(loc='best') fig.tight_layout() plt.show()
def vae_experiment(data_path, sequence_len, stride, activation, vae_hidden_size, learning_rate_elbo, normalization): # reset computational graph tf.reset_default_graph() # parameters that are constant batch_size = 1 subsampling = 3 # maximize precision or precision/F1-score over this vector sigma_threshold_elbo = [round(i * 1e-5, 5) for i in range(1, 300, 20)] # training epochs epochs = 100 # early-stopping parameters stop_on_growing_error = True stop_valid_percentage = 1. # percentage of validation used for early-stopping min_loss_improvment = .005 # percentage of minimum loss' decrease (.01 is 1%) # define input/output pairs input_ = tf.placeholder( tf.float32, [None, sequence_len, batch_size]) # (batch, input, time) # encoder/decoder parameters + initialization vae_encoder_shape_weights = [ batch_size * sequence_len, vae_hidden_size * 3 ] vae_decoder_shape_weights = [vae_hidden_size, batch_size * sequence_len] zip_weights_encoder = zip(vae_encoder_shape_weights[:-1], vae_encoder_shape_weights[1:]) weights_vae_encoder = [ tf.Variable(tf.truncated_normal(shape=[shape, next_shape])) for (shape, next_shape) in zip_weights_encoder ] bias_vae_encoder = [ tf.Variable(tf.truncated_normal(shape=[shape])) for shape in vae_encoder_shape_weights[1:] ] zip_weights_decoder = zip(vae_decoder_shape_weights[:-1], vae_decoder_shape_weights[1:]) weights_vae_decoder = [ tf.Variable(tf.truncated_normal(shape=[shape, next_shape])) for (shape, next_shape) in zip_weights_decoder ] bias_vae_decoder = [ tf.Variable(tf.truncated_normal(shape=[shape])) for shape in vae_decoder_shape_weights[1:] ] # VAE graph's definition flattened_input = tf.layers.flatten(input_) vae_encoder = tf.matmul(flattened_input, weights_vae_encoder[0]) + bias_vae_encoder[0] for (w_vae, b_vae) in zip(weights_vae_encoder[1:], bias_vae_encoder[1:]): vae_encoder = activation(vae_encoder) vae_encoder = tf.matmul(vae_encoder, w_vae) + b_vae # means and variances' vectors of the learnt hidden distribution # we assume the hidden gaussian's variances matrix is diagonal loc = tf.slice(activation(vae_encoder), [0, 0], [-1, vae_hidden_size]) loc = tf.squeeze(loc, axis=0) scale = tf.slice(tf.nn.softplus(vae_encoder), [0, vae_hidden_size], [-1, vae_hidden_size]) scale = tf.squeeze(scale, 0) hidden_sample = tf.slice(tf.nn.softplus(vae_encoder), [0, 2 * vae_hidden_size], [-1, vae_hidden_size]) hidden_sample = tf.squeeze(hidden_sample, 0) # sample from the hidden ditribution vae_hidden_distr = tf.contrib.distributions.MultivariateNormalDiag( loc, scale) # extract each sample 'watermark' as last 'vae_hidde_size' points of the input_ itself vae_hidden_state = hidden_sample # get probability of the hidden state s = vae_hidden_distr.sample(int(100e4)) in_box = tf.cast(tf.reduce_all(s <= hidden_sample, axis=-1), vae_hidden_distr.dtype) vae_hidden_prob = tf.reduce_mean(in_box, axis=0) feed_decoder = tf.reshape(vae_hidden_state, shape=(-1, vae_hidden_size)) vae_decoder = tf.matmul(feed_decoder, weights_vae_decoder[0]) + bias_vae_decoder[0] vae_decoder = activation(vae_decoder) for (w_vae, b_vae) in zip(weights_vae_decoder[1:], bias_vae_decoder[1:]): vae_decoder = tf.matmul(vae_decoder, w_vae) + b_vae vae_decoder = activation(vae_decoder) # time-series reconstruction and ELBO loss vae_reconstruction = tf.contrib.distributions.MultivariateNormalDiag( tf.constant(np.zeros(batch_size * sequence_len, dtype='float32')), tf.constant(np.ones(batch_size * sequence_len, dtype='float32'))) likelihood = vae_reconstruction.log_prob(flattened_input) prior = tf.contrib.distributions.MultivariateNormalDiag( tf.constant(np.zeros(vae_hidden_size, dtype='float32')), tf.constant(np.ones(vae_hidden_size, dtype='float32'))) divergence = tf.contrib.distributions.kl_divergence( vae_hidden_distr, prior) elbo = tf.reduce_mean(likelihood - divergence) optimizer_elbo = tf.train.AdamOptimizer(learning_rate_elbo).minimize(-elbo) # extract train and test x_train, y_train, x_valid, y_valid, x_test, y_test = utils.generate_batches( filename=data_path, window=sequence_len, stride=stride, mode='validation', non_train_percentage=.5, val_rel_percentage=.5, normalize=normalization, time_difference=False, td_method=None, subsampling=subsampling) # suppress second axis on Y values (the algorithms expects shapes like (n,) for the prediction) y_train = y_train[:, 0] y_valid = y_valid[:, 0] y_test = y_test[:, 0] if len(x_train) > len(y_train): x_train = x_train[:len(y_train)] if len(x_valid) > len(y_valid): x_valid = x_valid[:len(y_valid)] if len(x_test) > len(y_test): x_test = x_test[:len(y_test)] # train + early-stopping init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) # train last_error_on_valid = np.inf current_error_on_valid = .0 e = 0 while e < epochs: iter_ = 0 while iter_ < int(np.floor(x_train.shape[0] / batch_size)): batch_x = x_train[iter_ * batch_size:(iter_ + 1) * batch_size, :].T.reshape( 1, sequence_len, batch_size) # run VAE encoding-decoding sess.run(optimizer_elbo, feed_dict={input_: batch_x}) iter_ += 1 if stop_on_growing_error: current_error_on_valid = .0 # verificate stop condition iter_val_ = 0 while iter_val_ < int(stop_valid_percentage * np.floor(x_valid.shape[0] / batch_size)): batch_x_val = x_valid[iter_val_ * batch_size:(iter_val_ + 1) * batch_size, :].T.reshape( 1, sequence_len, batch_size) # accumulate error current_error_on_valid += np.abs( np.sum(sess.run(-elbo, feed_dict={input_: batch_x_val}))) iter_val_ += 1 # stop learning if the loss reduction is below the threshold (current_loss/past_loss) if current_error_on_valid > last_error_on_valid or ( np.abs(current_error_on_valid / last_error_on_valid) > 1 - min_loss_improvment and e != 0): e = epochs last_error_on_valid = current_error_on_valid e += 1 # anomaly detection on test set y_test = y_test[:x_test.shape[0]] # find the thershold that maximizes the F1-score best_precision = best_recall = .0 best_threshold = .0 for t in sigma_threshold_elbo: vae_anomalies = [] threshold_elbo = (t, 1. - t) iter_ = 0 while iter_ < int(np.floor(x_test.shape[0] / batch_size)): batch_x = x_test[iter_ * batch_size:(iter_ + 1) * batch_size, :].T.reshape( 1, sequence_len, batch_size) # get probability of the encoding and a boolean (anomaly or not) p_anom = sess.run(vae_hidden_prob, feed_dict={input_: batch_x}) if (p_anom <= threshold_elbo[0] and iter_ < int(np.floor(x_test.shape[0] / batch_size)) - sequence_len): for i in range(iter_ * batch_size, (iter_ + 1) * batch_size): vae_anomalies.append(i) iter_ += 1 # predictions predicted_positive = np.array([vae_anomalies]).T # caveat: define the anomalies based on absolute position in test set (i.e. size matters!) # train 50%, validation_relative 50% # performances target_anomalies = np.zeros( shape=int(np.floor(y_test.shape[0] / batch_size)) * batch_size) target_anomalies[170 - sequence_len:210 - sequence_len] = 1 # real values condition_positive = np.argwhere(target_anomalies == 1) # precision and recall try: precision = len( np.intersect1d( condition_positive, predicted_positive)) / len(predicted_positive) recall = len( np.intersect1d( condition_positive, predicted_positive)) / len(condition_positive) except ZeroDivisionError: precision = recall = .0 print("Precision and recall for threshold: ", t, " is ", (precision, recall)) if precision >= best_precision: best_threshold = t best_precision = precision best_recall = recall return best_precision, best_recall, best_threshold