def trainModelPipe(self, loss_type): encoder = EncoderRNN(input_size=self.n_features, hidden_size=self.hidden_size, num_grulstm_layers=self.num_grulstm_layers, batch_size=self.batch_size).to(self.device) decoder = DecoderRNN(input_size=1, hidden_size=self.hidden_size, num_grulstm_layers=self.num_grulstm_layers, fc_units=16, output_size=1).to(self.device) net_gru = Net_GRU(encoder, decoder, self.N_output, self.device).to(self.device) self.train_model(net_gru, batch_size=self.batch_size, loss_type=loss_type, learning_rate=0.001, epochs=500, gamma=self.gamma, print_every=50, eval_every=50, verbose=1, alpha=self.alpha) return net_gru
def run_seq2seq(): (x_train, y_train), (x_test, y_test) = dataloader.load_HDFS( 'data/HDFS_100k.log_structured.csv', label_file='data/anomaly_label.csv', train_ratio=0.8) tknzr = Tokenizer(lower=True, split=" ") tknzr.fit_on_texts(x_train) # making sequences: X_train = tknzr.texts_to_sequences(x_train) X_test = tknzr.texts_to_sequences(x_test) X_train = [(x, x) for x in X_train] X_test = [(x, x) for x in X_test] print(tknzr.word_index.keys()) print(X_test) hidden_size = 32 encoder1 = EncoderRNN(len(tknzr.word_index.keys()) + 3, hidden_size).to(device) decoder1 = DecoderRNN(hidden_size, len(tknzr.word_index.keys()) + 3).to(device) encoder_hidden = encoder1.initHidden() trainIters(encoder1, decoder1, X_train, 5000, print_every=100) testing_pairs = [tensorsFromPair(i) for i in X_test] y_pred_outputs = [] for iter in range(1, len(testing_pairs) + 1): testing_pair = testing_pairs[iter - 1] input_tensor = testing_pair[0] target_tensor = testing_pair[1] input_length = input_tensor.size(0) encoder_outputs = torch.zeros(MAX_LENGTH, encoder1.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder1(input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] y_pred_outputs.append(encoder_output.cpu().data.numpy().flatten()) # x_test_output = encoder1(x_test) # kmeans = KMeans(n_clusters=2) # y_pred = kmeans.fit_predict(y_pred_outputs) dbscan = DBSCAN(eps=0.075, min_samples=100, metric="cosine") y_pred = dbscan.fit_predict(y_pred_outputs).tolist() y_pred = np.array([1 if i == -1 else 0 for i in y_pred]) print(len(y_pred)) print(len(y_test)) print(y_pred) print(y_test) print(Counter(y_pred)) print(Counter(y_test)) print("Homogeneity Score: %s" % str(homogeneity_score(y_test, y_pred))) print("completeness_score: %s" % str(completeness_score(y_test, y_pred))) print("v_measure_score: %s" % str(v_measure_score(y_test, y_pred))) print("F1 score %s" % str(f1_score(y_test, y_pred))) print("Precision score %s" % str(precision_score(y_test, y_pred))) print("Recall score %s" % str(recall_score(y_test, y_pred)))
# print statistics losses_mse.append(loss_mse.item()) losses_dtw.append(loss_dtw) losses_tdi.append(loss_tdi) print(' Eval mse= ', np.array(losses_mse).mean(), ' dtw= ', np.array(losses_dtw).mean(), ' tdi= ', np.array(losses_tdi).mean()) print(f"batch_size: {batch_size}") ## TODO run with dtw implementation encoder = EncoderRNN(input_size=3, hidden_size=128, num_grulstm_layers=2, batch_size=batch_size).to(device) decoder = DecoderRNN(input_size=1, hidden_size=128, num_grulstm_layers=2, fc_units=16, output_size=1).to(device) net_gru_dtw = Net_GRU(encoder, decoder, N_output, device).to(device) train_model(net_gru_dtw, batch_size=batch_size, loss_type='dtw', learning_rate=0.001, epochs=500, gamma=gamma, print_every=50,
x_train = torch.from_numpy(x_train).contiguous() y_train = torch.from_numpy(y_train).contiguous() x_val = torch.from_numpy(x_val).contiguous() y_val = torch.from_numpy(y_val).contiguous() targets_train = y_train[:, :, :, [0]] features_train = y_train[:, :, :, 1:] targets_val = y_val[:, :, :, [0]] features_val = y_val[:, :, :, 1:] targets_test = y_test[:, :, :, [0]] features_test = y_test[:, :, :, 1:] encoder = EncoderRNN(input_size, hidden_size, n_layers, dropout) decoder = DecoderRNN(input_size, hidden_size, output_size, n_layers, dropout) if os.path.isfile(encoder_checkpoint): print("Loading encoder checkpoint...") encoder.load_state_dict(torch.load(encoder_checkpoint)) if os.path.isfile(decoder_checkpoint): print("Loading decoder checkpoint...") decoder.load_state_dict(torch.load(decoder_checkpoint)) encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr) if use_cuda: