def test_fit_args(self): values, labels, missing, excludes = self._payload() tf.set_random_seed(1234) donut = Donut( h_for_p_x=lambda x: x, h_for_q_z=lambda x: x, x_dims=5, z_dims=3 ) trainer = DonutTrainer(donut, max_epoch=1) with self.test_session(): # test no exclude trainer.fit(values=values, labels=labels, missing=missing, mean=1., std=2.) # test shape error with pytest.raises( ValueError, match='`values` must be a 1-D array'): trainer.fit(values=np.array([[1.]]), labels=labels, missing=missing, mean=1., std=2.) with pytest.raises( ValueError, match='The shape of `labels` does not agree ' 'with the shape of `values`'): trainer.fit(values=values, labels=labels[:-1], missing=missing, mean=1., std=2.) with pytest.raises( ValueError, match='The shape of `missing` does not agree ' 'with the shape of `values`'): trainer.fit(values=values, labels=labels, missing=missing[:-1], mean=1., std=2.)
def test_fit(self): values, labels, missing, excludes = self._payload() with TemporaryDirectory() as tmpdir: tf.set_random_seed(1234) donut = Donut( h_for_p_x=lambda x: x, h_for_q_z=lambda x: x, x_dims=5, z_dims=3 ) trainer = DonutTrainer( donut, max_epoch=3, batch_size=7, valid_step_freq=50, lr_anneal_epochs=2 ) with self.test_session(): trainer.fit( values=values, labels=labels, missing=missing, mean=1., std=2., excludes=excludes, summary_dir=tmpdir )
kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) trainer = DonutTrainer(model=model, model_vs=model_vs, max_epoch=300) predictor = DonutPredictor(model) with tf.Session().as_default(): trainer.fit(train_values, train_labels, train_missing, mean, std) print('Testing size:', np.shape(test_values)) start_time = time.time() test_score = predictor.get_score(test_values, test_missing) end_time = time.time() print('time comsuming', end_time - start_time) writer = csv.writer(open('donut_result.csv', 'w', newline='')) print(len(test_labels), len(test_score), len(test_values)) for i in range(len(test_score)): writer.writerow(
activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) # To train the Donut model, and use a trained model for prediction trainer = DonutTrainer(model=model, model_vs=model_vs) predictor = DonutPredictor(model) with tf.Session().as_default(): #trainer.fit(train_values, train_labels, train_missing, mean, std) #var_dict = get_variables_as_dict(model_vs) #saver = VariableSaver(var_dict, "donut_without_label_2.ckpt") #saver.save() # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), "donut_without_label_2.ckpt") saver.restore() test_score = predictor.get_score(test_values, test_missing) result = np.array([test_labels[119:], test_score]) np.savetxt('result_arti_sin2.csv',
def test_construction_args(self): values, labels, missing, excludes = self._payload() tf.set_random_seed(1234) donut = Donut(h_for_p_x=lambda x: x, h_for_q_z=lambda x: x, x_dims=5, z_dims=3) # test feed_dict is_training = tf.placeholder(tf.bool, ()) trainer = DonutTrainer(donut, max_epoch=1, feed_dict={is_training: True}) with self.test_session(): trainer.fit(values=values, labels=labels, missing=missing, mean=1., std=2., excludes=excludes) # test valid_feed_dict trainer = DonutTrainer(donut, max_epoch=1, valid_feed_dict={is_training: True}) with self.test_session(): trainer.fit(values=values, labels=labels, missing=missing, mean=1., std=2., excludes=excludes) # test max_epoch is None and max_step is None with pytest.raises(ValueError, match='At least one of `max_epoch` and `max_step` ' 'should be specified'): _ = DonutTrainer(donut, max_epoch=None, max_step=None) # test optimizer and optimizer_params trainer = DonutTrainer(donut, max_epoch=1, optimizer=tf.train.MomentumOptimizer, optimizer_params={'momentum': 0.01}) with self.test_session(): trainer.fit(values=values, labels=labels, missing=missing, mean=1., std=2., excludes=excludes)
def donut_test(src_dir, output_dir, file, batch): if os.path.exists(output_dir + "performance-donut-" + str(batch) + ".csv"): perform = pd.read_csv(output_dir + "performance-donut-" + str(batch) + ".csv") else: perform = pd.DataFrame({ "file": [], "storage": [], "train-time": [], "codisp-time": [], "test-time": [], "precision": [], "recall": [], "best-F1": [], "best-threshold": [] }) perform = perform.append([{ 'file': file, "storage": 0.0, "train-time": 0.0, "codisp-time": 0.0, "test-time": 0.0, "precision": 0.0, "recall": 0.0, "best-F1": 0.0, "best-threshold": 0.0 }], ignore_index=True) perform.index = perform["file"] data = pd.read_csv(src_dir + file) timestamp, value, labels = data["timestamp"], data["value"], data[ "anomaly"] missing = np.zeros(len(timestamp)) test_portion = 0.5 test_n = int(len(value) * test_portion) train_values, test_values = value[:-test_n], value[-test_n:] train_labels, test_labels = labels[:-test_n], labels[-test_n:] train_time, test_time = timestamp[:-test_n], timestamp[-test_n:] train_missing, test_missing = missing[:-test_n], missing[-test_n:] train_values, mean, std = standardize_kpi(train_values, excludes=np.logical_or( train_labels, train_missing)) test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std) with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) trainer = DonutTrainer(model=model, model_vs=model_vs) predictor = DonutPredictor(model) with tf.Session().as_default(): start = time.time() trainer.fit(train_values, train_labels, train_missing, mean, std) end = time.time() perform.loc[file, "train-time"] = end - start start = time.time() test_score = predictor.get_score(test_values, test_missing) end = time.time() perform.loc[file, "test-time"] = end - start storage = get_size(trainer) + get_size(predictor) perform.loc[file, "storage"] = storage pd.DataFrame({ "timestamp": test_time[-len(test_score):], "score": test_score }).to_csv(output_dir + "test-donut" + file, index=False) best_F1, best_threshold, precision, recall = compute_best_F1( src_dir + file, output_dir + "test-donut" + file, reverse=True, mean_start=False) perform.loc[file, "best-F1"] = best_F1 perform.loc[file, "best-threshold"] = best_threshold perform.loc[file, "precision"] = precision perform.loc[file, "recall"] = recall perform.to_csv(output_dir + "performance-donut-" + str(batch) + ".csv", index=False)
def generate_score(number): # Read the raw data. data_dir_path = 'C:/Users/Administrator/Downloads/research/donut-master/SMD/data_concat/data-' + number + '.csv' data = np.array(pd.read_csv(data_dir_path, header=None), dtype=np.float64) tag_dir_path = './SMD/test_label/machine-' + number + '.csv' tag = np.array(pd.read_csv(tag_dir_path, header=None), dtype=np.int) labels = np.append(np.zeros(int(len(data) / 2)), tag) # pick one colume values = data[:, 1] timestamp = np.arange(len(data)) + 1 # If there is no label, simply use all zeros. # labels = np.zeros_like(values, dtype=np.int32) # Complete the timestamp, and obtain the missing point indicators. timestamp, (values, labels) = \ complete_timestamp(timestamp, (values, labels)) # Split the training and testing data. test_portion = 0.5 test_n = int(len(values) * test_portion) train_values = values[:-test_n] test_values = values[-len(train_values):] train_labels, test_labels = labels[:-test_n], labels[-test_n:] # print(len(test_values), len(test_labels)) # Standardize the training and testing data. train_values, mean, std = standardize_kpi(train_values, excludes=train_labels) test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std) import tensorflow as tf from donut import Donut from tensorflow import keras as K from tfsnippet.modules import Sequential # We build the entire model within the scope of `model_vs`, # it should hold exactly all the variables of `model`, including # the variables created by Keras layers. with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) from donut import DonutTrainer, DonutPredictor trainer = DonutTrainer(model=model, model_vs=model_vs) predictor = DonutPredictor(model) with tf.Session().as_default(): trainer.fit(train_values, train_labels, mean, std) test_score = predictor.get_score(test_values) if not os.path.exists('./score'): os.makedirs('./score') np.save('./score/' + number + '.npy', test_score)
def vae_donut(ts_obj, window_size, mcmc_iteration, latent_dim, gaussian_window_size, step_size, plot_reconstruction=False, plot_anomaly_score=False): # authors use window_size = 120 # mcmc_iteration = 10 # https://github.com/kratzert/finetune_alexnet_with_tensorflow/issues/8 tf.reset_default_graph() start = time.time() # if there are missing time steps, we DO NOT fill them with NaNs because donut will replace them with 0s # using complete_timestamp # see line 6 in https://github.com/NetManAIOps/donut/blob/master/donut/preprocessing.py timestamp, values, labels = ts_obj.dataframe[ "timestamp"].values, ts_obj.dataframe["value"].values, np.zeros_like( ts_obj.dataframe["value"].values, dtype=np.int32) # print(len(timestamp)) # print(len(values)) # print(len(labels)) # Complete the timestamp, and obtain the missing point indicators # replaces missing with 0s. # donut cannot handle this date format for some reason if ts_obj.dateformat == "%Y-%m": rng = pd.date_range('2000-01-01', periods=len(values), freq='T') timestamp, missing, (values, labels) = complete_timestamp( rng, (values, labels)) else: timestamp, missing, (values, labels) = complete_timestamp( timestamp, (values, labels)) # print(len(timestamp)) # print(len(values)) # print(len(labels)) # print(sum(missing)) # Standardize the training and testing data. values, mean, std = standardize_kpi(values, excludes=np.logical_or( labels, missing)) with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=window_size, z_dims=latent_dim, ) trainer = DonutTrainer(model=model, model_vs=model_vs) predictor = DonutPredictor(model) with tf.Session().as_default(): trainer.fit(values, labels, missing, mean, std) score = predictor.get_score(values, missing) # if time series is [1,2,3,4...] and ts_length is 3 # this gives us [[1,2,3],[2,3,4]...] ts_strided = ah.as_sliding_window(values, window_size) ts_strided = my_func_float(np.array(ts_strided, dtype=np.float32)) missing_strided = ah.as_sliding_window(missing, window_size) missing_strided = my_func_int( np.array(missing_strided, dtype=np.int32)) # print(ts_strided) # print(missing_strided) x = model.vae.reconstruct( iterative_masked_reconstruct(reconstruct=model.vae.reconstruct, x=ts_strided, mask=missing_strided, iter_count=mcmc_iteration, back_prop=False)) # `x` is a :class:`tfsnippet.stochastic.StochasticTensor`, from which # you may derive many useful outputs, for example: # print(x.tensor.eval()) # the `x` samples # print(x.log_prob(group_ndims=0).eval()) # element-wise log p(x|z) of sampled x # print(x.distribution.log_prob(ts_strided).eval()) # the reconstruction probability # print(x.distribution.mean.eval(), x.distribution.std.eval()) # mean and std of p(x|z) tensor_reconstruction_probabilities = x.distribution.log_prob( ts_strided).eval() # because of the way strided works, we use the first 120 anomaly scores in the first slide # and then for remaining slides, we use the last point/score reconstruction_probabilities = list( tensor_reconstruction_probabilities[0]) for i in range(len(tensor_reconstruction_probabilities)): if i != 0: slide = tensor_reconstruction_probabilities[i] reconstruction_probabilities.append(slide[-1]) # print(len(reconstruction_probabilities)) # print(len(ts_obj.dataframe)) if ts_obj.miss: ref_date_range = ch.get_ref_date_range(ts_obj.dataframe, ts_obj.dateformat, ts_obj.timestep) gaps = ref_date_range[~ref_date_range.isin(ts_obj. dataframe["timestamp"])] filled_df = ch.fill_df(ts_obj.dataframe, ts_obj.timestep, ref_date_range, "fill_nan") # print("NaNs exist?: ",filled_df['value'].isnull().values.any()) filled_df[ "reconstruction_probabilities"] = reconstruction_probabilities # remove nans filled_df = filled_df.dropna() reconstruction_probabilities = list( filled_df["reconstruction_probabilities"].values) # print(len(reconstruction_probabilities)) # print(len(ts_obj.dataframe)) reconstruction_probabilities = [ abs(item) for item in reconstruction_probabilities ] anomaly_scores = anomaly_scores = ah.determine_anomaly_scores_error( reconstruction_probabilities, np.zeros_like(reconstruction_probabilities), ts_obj.get_length(), gaussian_window_size, step_size) end = time.time() if plot_reconstruction: plt.subplot(211) # see lines 98 to 100 of https://github.com/NetManAIOps/donut/blob/master/donut/prediction.py plt.title("Negative of Reconstruction Probabilities") plt.plot(reconstruction_probabilities) # plt.ylim([.99,1]) plt.subplot(212) plt.title("Time Series") plt.plot(ts_obj.dataframe["value"].values) plt.axvline(ts_obj.get_probationary_index(), color="black", label="probationary line") plt.tight_layout() plt.show() if plot_anomaly_score: plt.subplot(211) plt.title("Anomaly Scores") plt.plot(anomaly_scores) plt.ylim([.998, 1]) plt.subplot(212) plt.title("Time Series") plt.plot(ts_obj.dataframe["value"].values) plt.axvline(ts_obj.get_probationary_index(), color="black", label="probationary line") plt.tight_layout() plt.show() return { "Anomaly Scores": anomaly_scores, "Time": end - start, "Reconstruction Probabilities": reconstruction_probabilities }
K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) # Remember to get the model variables after the birth of a # `predictor` or a `trainer`. The :class:`Donut` instances # does not build the graph until :meth:`Donut.get_score` or # :meth:`Donut.get_training_objective` is called, which is # done in the `predictor` or the `trainer`. # save variables to `save_dir` trainer = DonutTrainer(model=model, model_vs=model_vs) trainer.fit(train_values, train_labels, train_missing, mean, std) var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, save_dir) saver.save() with tf.Session().as_default(): with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001),