def train(self, train_features, train_targets, valid_features, valid_targets): # build up datasets config = self.config #print(np.shape(train_features)) #print(np.shape(valid_features)) self.train_dataset = Dataset() self.train_dataset.build_from_data(train_features, train_targets) self.valid_dataset = Dataset() self.valid_dataset.build_from_data(valid_features, valid_targets) stop_flag = False batch_size = config.batch_size iters = 0 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=config.dnn_lr, decay_steps=1000, decay_rate=0.90, staircase=True) #optimizer = tf.keras.optimizers.Adadelta(learning_rate=lr_schedule) optimizer = tf.keras.optimizers.Adam(learning_rate=config.dnn_lr) best_valid_metric = 1e10 no_progress_count = 0 while not stop_flag: batch_data = self.train_dataset.next_batch(batch_size) x_train = batch_data['input'] y_train = batch_data['target'] with tf.GradientTape() as tape: predictions = self.model(x_train) predictions = tf.reshape(predictions, [-1]) loss = tf.keras.losses.MSE(y_train, predictions) gradients = tape.gradient(loss, self.model.trainable_variables) optimizer.apply_gradients( zip(gradients, self.model.trainable_variables)) if iters % config.dnn_valid_freq == 0: valid_metric = self.test(self.valid_dataset, metrics=['MSE'])[0] print('Iter: {}, train_MSE: {}, valid_MSE: {}'.\ format(iters, loss.numpy(), valid_metric)) if valid_metric > best_valid_metric: no_progress_count += 1 else: no_progress_count = 0 best_valid_metric = valid_metric if no_progress_count > 10: stop_flag = True if iters > config.max_iterations: stop_flag = True iters += 1 saved_model_path = os.path.join(root_path, 'Models', self.exp_name) tf.saved_model.save(self.model, saved_model_path) print('Model saved at {}'.format(saved_model_path))
default=10, help= "how many times we repeat the experiments to obtain the average performance" ) parser.add_argument("--output", type=str, default='./results/performance_global.csv', help="the output file path") parser.add_argument("--known_outliers", type=int, default=10, help="the number of labeled outliers available at hand") args = parser.parse_args() data = Dataset(mode="other") def get_train(data, args): x_train, y_train = data.X_train, data.Y_train if mode == "unsupervised": x_train = x_train[np.where(y_train == 0)[0]] if mode == "semi_supervised": outlier_ids = np.where(y_train == 1)[0] outlier_to_keep = np.random.choice(outlier_ids, int(args.known_outliers / 2)) outlier_to_remove = np.setdiff1d(outlier_ids, outlier_to_keep) x_train = np.delete(x_train, outlier_to_remove, axis=0)
parser.add_argument("--output", type=str, default='./results/performance_no_bn.csv', help="the output file path") parser.add_argument("--weight_decay", type=float, default=0.05, help="the regularization weight") parser.add_argument("--device", type=str, default="cuda", help="the regularization weight") args = parser.parse_args() data = Dataset(mode="other") data.X_train = data.X_train[np.where(data.Y_train == 0)] def train_and_save(args, data): rauc = np.zeros(args.runs) ap = np.zeros(args.runs) print("Mode :", args.mode, "Rep dim :", args.rep_dim, "L2 weight :", args.weight_decay) for i in range(args.runs): d_svdd = DSVDD(**args.__dict__) d_svdd.fit(data.X_train, verbose=False) scores = d_svdd.decision_function(data.X_val) rauc[i], ap[i] = aucPerformance(scores, data.Y_val)
class DNN_regressor(Basic_regressor): def __init__(self, config=None, exp_name='new_exp'): self.config = config self.exp_name = exp_name self._build_model() def _build_model(self): if not self.config.neighborhood: model = tf.keras.Sequential([ tf.keras.layers.Dense( 128, activation='relu', input_shape=(self.config.dim_features, )), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(16, activation='relu'), tf.keras.layers.Dense(1) ]) self.model = model #model.summary() def train(self, train_features, train_targets, valid_features, valid_targets): # build up datasets config = self.config #print(np.shape(train_features)) #print(np.shape(valid_features)) self.train_dataset = Dataset() self.train_dataset.build_from_data(train_features, train_targets) self.valid_dataset = Dataset() self.valid_dataset.build_from_data(valid_features, valid_targets) stop_flag = False batch_size = config.batch_size iters = 0 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=config.dnn_lr, decay_steps=1000, decay_rate=0.90, staircase=True) #optimizer = tf.keras.optimizers.Adadelta(learning_rate=lr_schedule) optimizer = tf.keras.optimizers.Adam(learning_rate=config.dnn_lr) best_valid_metric = 1e10 no_progress_count = 0 while not stop_flag: batch_data = self.train_dataset.next_batch(batch_size) x_train = batch_data['input'] y_train = batch_data['target'] with tf.GradientTape() as tape: predictions = self.model(x_train) predictions = tf.reshape(predictions, [-1]) loss = tf.keras.losses.MSE(y_train, predictions) gradients = tape.gradient(loss, self.model.trainable_variables) optimizer.apply_gradients( zip(gradients, self.model.trainable_variables)) if iters % config.dnn_valid_freq == 0: valid_metric = self.test(self.valid_dataset, metrics=['MSE'])[0] print('Iter: {}, train_MSE: {}, valid_MSE: {}'.\ format(iters, loss.numpy(), valid_metric)) if valid_metric > best_valid_metric: no_progress_count += 1 else: no_progress_count = 0 best_valid_metric = valid_metric if no_progress_count > 10: stop_flag = True if iters > config.max_iterations: stop_flag = True iters += 1 saved_model_path = os.path.join(root_path, 'Models', self.exp_name) tf.saved_model.save(self.model, saved_model_path) print('Model saved at {}'.format(saved_model_path)) def predict(self, features, load_model_path=None): if not load_model_path is None: self.model = tf.keras.models.load_model(load_model_path) predictions = self.model.predict(features) predictions = np.reshape(predictions, [-1]) return predictions def load_model(self, load_model_path): self.model = tf.keras.models.load_model(load_model_path) def test(self, dataset, metrics=['MSE']): test_data = dataset.next_batch(dataset._num_examples) predictions = self.model.predict_on_batch(test_data['input']) predictions = np.reshape(predictions, [-1]) return evaluation(predictions, test_data['target'], 0, 1, metrics=metrics)
auc = plot_precision_recall([scores], [Y_val], name=['Val'], name_model=name) if save: writeResults(name, dim=contamination, auc=auc, path="./results/auc_performance.csv", std_auc=0.0) return auc if __name__ == '__main__': data = Dataset() X_train = data.X_train[np.where(data.Y_train == 1)[0]] ## Training KNN contamination = 1000 train_and_predict("KNN", data.X_train, data.X_val, data.Y_val, "KNN", contamination=contamination, save=False) ## Training Iforest auc = np.zeros(10) for n_run in range(auc.shape[0]):
def run_devnet(args): print("Chosen mode :", args.mode) nm = 'fraud' network_depth = int(args.network_depth) random_seed = args.ramdn_seed runs = args.runs rauc = np.zeros(runs) ap = np.zeros(runs) filename = nm.strip() global data_format data_format = int(args.data_format) data = Dataset(mode="other") if args.mode =="unsupervised" : outlier_scores = lesinn(data.X_train, data.X_train) ind_scores = np.argsort(outlier_scores.flatten()) inlier_ids, outlier_ids = ind_scores[:-args.known_outliers:], ind_scores[-args.known_outliers:] inlier_ids = np.intersect1d(inlier_ids, np.where(data.Y_train == 0)[0]) #print("Original training size: %d, No. outliers: %d" % (x_train.shape[0], # n_outliers)) train_time = 0 test_time = 0 for i in np.arange(runs): print(filename + ': round ' + str(i)) x_train, x_test, y_train, y_test = data.X_train, data.X_val, data.Y_train, data.Y_val if args.mode == "unsupervised" : y_train[inlier_ids] = 0; y_train[outlier_ids] = 1 outlier_indices = np.where(y_train == 1)[0] outliers = x_train[outlier_indices] n_outliers_org = outliers.shape[0] inlier_indices = np.where(y_train == 0)[0] n_outliers = len(outlier_indices) n_noise = len(np.where(y_train == 0)[0]) * args.cont_rate / (1. - args.cont_rate) n_noise = int(n_noise) rng = np.random.RandomState(random_seed) if data_format == 0: if n_outliers > args.known_outliers: mn = n_outliers - args.known_outliers remove_idx = rng.choice(outlier_indices, mn, replace=False) x_train = np.delete(x_train, remove_idx, axis=0) y_train = np.delete(y_train, remove_idx, axis=0) if args.cont_rate > 0 : noises = inject_noise(outliers, n_noise, random_seed) x_train = np.append(x_train, noises, axis = 0) y_train = np.append(y_train, np.zeros((noises.shape[0], 1))) outlier_indices = np.where(y_train == 1)[0] inlier_indices = np.where(y_train == 0)[0] #print(y_train.shape[0], outlier_indices.shape[0], inlier_indices.shape[0], n_noise) input_shape = x_train.shape[1:] n_samples_trn = x_train.shape[0] n_outliers = len(outlier_indices) print("Training data size: %d, No. outliers: %d" % (x_train.shape[0], n_outliers)) start_time = time.time() input_shape = x_train.shape[1:] epochs = args.epochs batch_size = args.batch_size nb_batch = args.nb_batch model = deviation_network(input_shape) #print(model.summary()) model_name = "./model/" + args.mode + "_" + str(args.cont_rate) + "cr_" + str(args.known_outliers) +"d.h5" checkpointer = ModelCheckpoint(model_name, monitor='loss', verbose=0, save_best_only = True, save_weights_only = True) model.fit_generator(batch_generator_sup(x_train, outlier_indices, inlier_indices, batch_size, nb_batch, rng), steps_per_epoch = nb_batch, epochs = epochs, callbacks=[checkpointer], verbose = True) train_time += time.time() - start_time start_time = time.time() scores = load_model_weight_predict(model_name, input_shape, network_depth, x_test) test_time += time.time() - start_time rauc[i], ap[i] = aucPerformance(scores, y_test) mean_auc = np.mean(rauc) #std_auc = np.std(rauc) mean_aucpr = np.mean(ap) std_aucpr = np.std(ap) train_time = train_time/runs test_time = test_time/runs print("average AUC-ROC: %.4f, average AUC-PR: %.4f" % (mean_auc, mean_aucpr)) #print("average runtime: %.4f seconds" % (train_time + test_time)) writeResults(filename+'_vrai_'+str(network_depth), n_samples_trn, n_outliers_org, n_outliers, mean_aucpr, std_aucpr, args.cont_rate, path=args.output)
def main(args=None): if args is None: args = sys.argv[1:] args = parse_args(args) if args.start_idx != 0: args.file_mode = 'a' sim = Simulation(args.verbose, args.stereo_images) succes = 0 print("test:", args.test) expert = Expert(args.verbose) dataset = Dataset(args.verbose, args.stereo_images, args.data_file_name, image_path=args.image_path, data_file_path=args.data_file_path, filemode=args.file_mode, start_idx=args.start_idx) if args.test: #def get_model(normalise_poke_vec=False, complex_mlp=False, is_stereo=False, p_dropout=0): model = get_model(complex_mlp=args.complex_mlp, is_stereo=args.stereo_images) model.load_state_dict(torch.load(args.model_name))#, map_location=torch.device('cpu'))) model.eval() device = next(model.parameters()).device #print(device) t = tqdm(range(args.episodes)) for episode in t: for _ in range(args.MaxSteps): state = sim.get_state() if not args.test: tcp_pose = sim.robotArm.get_tcp_pose() #sim.draw_coordinate_frame(*tcp_pose) poke = expert.calculate_move(tcp_pose, state.item, state.goal) #print("exportPoke") #dataset.add(state.image, poke) else: tf = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) if args.stereo_images: img1 = state.image[0].convert("RGB") img2 = state.image[1].convert("RGB") x1 = tf(img1).unsqueeze_(0).to(device) x2 = tf(img2).unsqueeze_(0).to(device) y = model(x1, x2) else: img = state.image.convert("RGB") x = tf(img).unsqueeze_(0).to(device) y = model(x) poke = y.cpu().detach().numpy().flatten() poke = Geometry.unit_vector(poke) * expert.step_size #print(poke) tcp_pose = sim.robotArm.get_tcp_pose() poke_for_ori = expert.calculate_move(tcp_pose, state.item, state.goal) joined = np.concatenate([poke, poke_for_ori[3:]]) # why?? sim.set_robot_pose(*joined, mode="rel", useLimits=True) #sim.set_robot_pose(*poke, mode="rel", useLimits=True) sim.step(False) if expert.STATE == ON_GOAL: succes += 1 break t.set_description(f'Succes: {succes} | Succes Rate: {succes / (episode + 1):.4f}') dataset.next_episode() sim.reset_environment() #dataset.next_episode() print("Succes: {} | Succes rate: {:.4f}%".format(succes, 100*succes/args.episodes)) print("Done!\nTerminating...") sim.terminate()