def create_adversarial_validation_images(self): classifier = Classifier(self._sess, self._data, epochs=350, learning_rate=0.01, batch_size=32) classifier.execute() length = 2000 # # Creates surrogate model and returns the perturbed NumPy test set x_val_adv = Adversarial_Attack( self._sess, self._data, dataset="_x_val_set_", length=2000, attack="DEEPFOOL", epochs=12).attack(model=classifier.model) scores_leg = classifier.model.evaluate( self._data.x_val[self._idx_adv][:length], self._data.y_val[self._idx_adv][:length], verbose=1) scores = classifier.model.evaluate( x_val_adv[:length], self._data.y_val[self._idx_adv][:length], verbose=1) print("\nMain classifier's accuracy on legitimate examples: %.2f%%" % (scores_leg[1] * 100)) print("\nMain classifier's accuracy on adversarial examples: %.2f%%" % (scores[1] * 100)) helpers.plot_images(self._data.x_val[self._idx_adv][:length], x_val_adv[:length], x_val_adv.shape)
def train_test(features_vector: FeatureVector, classifier: Classifier, data: ClassifierData): # Build train features vector with Timer('building train features', VERBOSE): x_train_features = features_vector.convert_to_features( data.x_train, VERBOSE) # Train with Timer('training', VERBOSE): classifier.train(x_train_features, data.y_train) # Build test features vector with Timer('building test features', VERBOSE): x_test_features = features_vector.convert_to_features( data.x_test, VERBOSE) # Test if VERBOSE: print(features_vector.name) print(classifier.report(x_test_features, data.y_test)) return classifier.f1_micro(x_test_features, data.y_test)
# anlz = Analyzer(tr_path, # te_path, # cfg['data']['classes_list'], # cfg['analysis']['figures_path']) # anlz.run() ############################################################## # CNN MODEL # ############################################################## train_path = os.path.join(cfg['data']['sorted_path'], 'training') test_path = os.path.join(cfg['data']['sorted_path'], 'test') fig_full_path = os.path.abspath(cfg['model']['figures_path']) model_full_path = os.path.abspath(cfg['model']['models_path']) weights_full_path = os.path.abspath(cfg['model']['weights_path']) training_size = 18966 test_size = 4742 cnn = Classifier(train_path, test_path, training_size, test_size, cfg['data']['classes_list'], cfg['model'], fig_path=fig_full_path) cnn.compile() cnn.plot_model() cnn.train() cnn.show_training_history() cnn.plot_confusion_matrix((4742 // 32 + 1)) cnn.save_model(model_full_path)
def __init__(self, bert_config_file: str, init_checkpoint: str, dataset_db_name: str, dataset_split: str, vocab_file: str, output_dir: str, split_table_name: str, skip_trivial_samples: bool = False, seq_len: int = 256, batch_size: int = 32, layer_indexes: List[int] = [-1, -2, -3, -4], learning_rate: float = 2e-6, num_train_epochs: float = 1.0, warmup_proportion: float = 0.1, do_lower_case: bool = True, save_checkpoints_steps: int = 1000, summary_steps: int = 1, margin: float = 2.0, steps_per_eval_iter: int = 10, loss: str = 'cosine_contrastive', beta: float = 1.0, num_train_steps: int = None, num_query_sentences_per_entity: int = 2): self._seq_len = seq_len self._batch_size = batch_size self._layer_indexes = layer_indexes self._do_lower_case = do_lower_case self._init_checkpoint = init_checkpoint self._bert_config_file = bert_config_file self._output_dir = output_dir self._save_checkpoints_steps = save_checkpoints_steps self._summary_steps = summary_steps self._num_train_epochs = num_train_epochs self._num_train_steps = num_train_steps self._warmup_proportion = warmup_proportion self._learning_rate = learning_rate self._margin = margin self._loss_name = loss self._beta = beta self._steps_per_eval_iter = steps_per_eval_iter self._tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) assert dataset_split in ['train', 'test', 'val'] train_query_data, train_context_data, train_entities, _ = Classifier.load_datasplit( dataset_db_name=dataset_db_name, dataset_split=dataset_split, split_table_name=split_table_name, skip_trivial_samples=skip_trivial_samples, load_context=False) self._training_data = self.generate_data_pairs( train_query_data, train_context_data, train_entities, num_query_sentences_per_entity=num_query_sentences_per_entity) # Only load the validation split if the training split has been specified self._validation_data = None if dataset_split == 'train': val_query_data, val_context_data, val_entities, _ = Classifier.load_datasplit( dataset_db_name=dataset_db_name, dataset_split='val', split_table_name=split_table_name, skip_trivial_samples=skip_trivial_samples, load_context=False) self._validation_data = self.generate_data_pairs( val_query_data, val_context_data, val_entities, num_query_sentences_per_entity=num_query_sentences_per_entity)
from classifiers.classifier import Classifier from preprocess.test_processor import TestProcessor from time import ctime, sleep from sys import exit from utils.result_accumulator import ResultAccumulator if __name__ != '__main__': print("This module must be run as the main module.") exit(1) # Reads the model first and creates a preprocessor. classifier = Classifier("models/random_forest.pkl") # Instantiates a result accumulator. classes = { "chicken": 5, "number7": 11, "sidestep": 10, "turnclap": 4, "wipers": 5, "stationary": 5, "cowboy": 7, "mermaid": 13, "numbersix": 8, "salute": 10, "swing": 7, "logout": 14 } accumulator = ResultAccumulator(classes) # Creates a processor for input data.
from classifiers.classifier import Classifier from bert import tokenization import numpy as np db = '../data/databases/dataset_geraete_small.db' t_q, t_c, t_e, _ = Classifier.load_datasplit(db, 'train') e_q, e_c, e_e, _ = Classifier.load_datasplit(db, 'test') v_q, v_c, v_e, _ = Classifier.load_datasplit(db, 'val') def collect_sentences(query, context): out = set() for sample in query: out.add(sample['sentence']) for sample in context: out.add(sample['sentence']) return out def get_avg_token_len(data, tokenizer, token_lens): for i, sample in enumerate(data): s = str(sample['sentence']) tokens, mapping = tokenizer.tokenize(s) token_lens.append(len(tokens)) print("Avg. number of tokens: %s\n" "Std. deviation: %s\n" "Min: %s \tMax: %s" % ((sum(token_lens) / len(token_lens)), np.std(token_lens), min(token_lens), max(token_lens))) return token_lens
def __init__(self): Classifier.__init__(self) self._clf = LogisticRegression()
def __init__(self): Classifier.__init__(self) self._clf = RandomForestClassifier()
def all_cases_experiment(self, *args, length=2000): """ Creates an cartesian product with '*args' in order to make the experiments on several different scenarios. All the experiments' results are saved in a .TXT file called 'all_cases_experiment.txt' # Attributes: *args: each '*args' parameter is a list containing all possible MultiMagNet's parameters: NUMBER_EXPERIMENTS: how many times the code will run. DATASETS: ("MNIST" or "CIFAR"), ATTACKS: ("FGSM", "BIM", "DEEPFOOL", "CW_0.0"), DROP_RATE: (values below 1, preferably below 0.1), REDUCTION_MODELS: (1,3,5,7,9 for MNIST), TAU: ("RE" or "minRE") T: Temperature (>= 1) metric: "RE", "JSD", "DKL" """ import itertools start = time.time() combinations = list(itertools.product(*args)) att = "" classifier = Classifier(self._sess, self._data, epochs=350, learning_rate=0.01, batch_size=32) classifier.execute() for combination in combinations: n_experiments = combination[0] reduction_models = combination[1] attack = combination[2] drop_rate = combination[3] tau = combination[4] try: T = combination[5] metric = combination[6] except: T = 1 metric = "RE" if att != attack: f = open( "./experiments/experiments_logs/" + self._data.dataset_name + "_" + attack + "_all_cases_experiment.txt", "a+") if tau == "RE" and reduction_models == 1: continue else: team_stats = np.zeros((n_experiments, 5)) if att != attack: x_test_adv = Adversarial_Attack(self._sess, self._data, length=length, attack=attack, epochs=5).attack() _, x, y, _ = helpers.join_test_sets( self._data.x_test, x_test_adv, self._data.y_test, length, idx=self._idx_adv[:length]) att = attack multiple_team = Assembly_Team(self._sess, self._data, reduction_models) scores_leg = classifier.model.evaluate( self._data.x_test[self._idx_adv][:length], self._data.y_test[self._idx_adv][:length], verbose=1) scores = classifier.model.evaluate( x_test_adv[:length], self._data.y_test[self._idx_adv][:length], verbose=1) print( "\nMain classifier's accuracy on legitimate examples: %.2f%%" % (scores_leg[1] * 100)) print( "\nMain classifier's accuracy on adversarial examples: %.2f%%" % (scores[1] * 100)) for exp in range(n_experiments): if metric == "RE": multiple_thresholds = multiple_team.get_thresholds( tau=tau, drop_rate=drop_rate, p=1, plot_rec_images=False) multiple_x_marks = Image_Reduction.apply_techniques( x, multiple_team, p=1) else: multiple_thresholds = multiple_team.get_thresholds_pd( tau=tau, classifier=classifier, T=T, drop_rate=drop_rate, p=1, plot_rec_images=False, metric=metric) multiple_x_marks = Image_Reduction.apply_techniques_pd( x, multiple_team, classifier, T=T, p=1, metric=metric) y_pred_team, _ = poll_votes(x, y, multiple_x_marks, multiple_thresholds, reduction_models) team_stats[exp, 0], team_stats[exp, 1], team_stats[ exp, 2], team_stats[exp, 3], team_stats[ exp, 4], confusion_matrix_team = helpers.get_cm_and_statistics( y, y_pred_team) print( "\nSCENARIO {0}/{1} FINISHED.\nTeam CM \n{2}\n".format( exp + 1, n_experiments, confusion_matrix_team)) print( "\nEXPERIMENT TERMINATED. {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}, tau = {6}, T = {7}\n" .format(self._data.dataset_name, len(x), attack, 1, reduction_models, drop_rate, tau, T)) s1 = helpers.get_statistics_experiments("Team", team_stats) if type(f) != type(None): s0 = "EXPERIMENT TERMINATED. {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}, tau = {6}, T = {7}\n\n".format( self._data.dataset_name, len(x), attack, 1, reduction_models, drop_rate, tau, T) sep = '-' * len(s0) helpers.write_txt(f, '\n', '\n', s0, s1, '\n', sep, '\n', '\n') helpers.write_txt( f, "\nExperiment's elapsed time: {0}".format( timedelta(seconds=time.time() - start))) f.close()
def tuning_team_parameters(self, attack, *args, classifier=None): print("\nStarting validation process...\n") classifier = Classifier(self._sess, self._data, epochs=350, learning_rate=0.01, batch_size=32) classifier.execute() path = os.path.join( "./adv_attacks/adversarial_images", self._data.dataset_name.lower() + "_val_set_" + attack.lower() + ".plk") val_set_adv = helpers.load_pkl(path) path = os.path.join( "./adv_attacks/adversarial_images/validation_idx.pkl") idx = helpers.load_pkl(path) val_set_leg = self._data.x_val[idx] _, x_val, y_val, _ = helpers.join_test_sets(self._data.x_test, val_set_adv, self._data.y_test, len(val_set_leg), idx=idx) import itertools combinations = list(itertools.product(*args)) print(len(combinations)) team_stats = np.zeros((len(combinations), 3)) parameters = [[0 for x in range(5)] for y in range(len(combinations))] k = 0 for combination in combinations: reduction_models = parameters[k][0] = combination[0] drop_rate = parameters[k][1] = combination[1] tau = parameters[k][2] = combination[2] metric = parameters[k][3] = combination[3] if self._data.dataset_name == "CIFAR": T = parameters[k][4] = combination[4] team = Assembly_Team(self._sess, self._data, reduction_models) if metric == "RE": thresholds = team.get_thresholds(tau=tau, drop_rate=drop_rate, p=1, plot_rec_images=False, load_thresholds=False) val_marks = Image_Reduction.apply_techniques(x_val, team, p=1) else: thresholds = team.get_thresholds_pd(tau=tau, classifier=classifier, T=T, drop_rate=drop_rate, p=1, plot_rec_images=False, load_thresholds=False, metric=metric) val_marks = Image_Reduction.apply_techniques_pd(x_val, team, classifier, T=T, p=1, metric=metric) y_pred, _ = poll_votes(x_val, y_val, val_marks, thresholds, reduction_models) print( "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}" .format(self._data.dataset_name, len(x_val), attack, 1, reduction_models, drop_rate, T)) team_stats[k, 0], team_stats[k, 1], team_stats[ k, 2], _, _, cm = helpers.get_cm_and_statistics(y_val, y_pred) print( 'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}' .format(thresholds, cm, team_stats[k, 0], team_stats[k, 1], team_stats[k, 2])) k = k + 1 max_acc = max(team_stats[:, 0]) index = np.argmax(team_stats[:, 0]) print( "\nBest accuracy of {0:.3} was obtained by the following MultiMagNet's hyperparameters:\n{1}" .format(max_acc, parameters[index]))
def choose_team_each_jump_experiment(self, jump=0, magnet=False, attack="FGSM", drop_rate=0.001, tau="RE", p=1, length=2000, T=1, metric='JSD'): import math """ Evaluates MultiMagNet with test dataset containing half legitimate and adversarial images, and prints the its metrics. # Attributes: length: the size of the test dataset containing legitimate images that will be used in the experiments. A final test dataset will be produced containing legitimate and adversarial images, with size length * 2. jump: forms a different 'R' team at each jump. magnet: if True, it is chosen just one autoencoder; if False, it is chosen a random number of autoencoders. attack: can be 'FGSM', 'BIM', 'DEEPFOOL', 'CW_0.0', 'CW_10.0', 'CW_20.0', 'CW_30.0', 'CW_40.0'. drop_rate: the maximum percentage of legitimate images classified as 'adversarial'. tau: the approach used to compute the thresholds. It can be 'RE' which assigns a different threshold based on each autoencoder's reconstruction error or 'minRE', which assigns the minimum reconstruction error obtained for all the autoencoders. """ start = time.time() # test inputs on main classifier classifier = Classifier(self._sess, self._data, epochs=350, learning_rate=0.01, batch_size=32) classifier.execute() # # Creates surrogate model and returns the perturbed NumPy test set x_test_adv = Adversarial_Attack( self._sess, self._data, length=length, attack=attack, epochs=12).attack(model=classifier.model) # Evaluates the brand-new adversarial examples on the main model. scores_leg = classifier.model.evaluate( self._data.x_test[self._idx_adv][:length], self._data.y_test[self._idx_adv][:length], verbose=1) scores = classifier.model.evaluate( x_test_adv[:length], self._data.y_test[self._idx_adv][:length], verbose=1) print("\nMain classifier's accuracy on legitimate examples: %.2f%%" % (scores_leg[1] * 100)) print("\nMain classifier's accuracy on adversarial examples: %.2f%%" % (scores[1] * 100)) # plots the adversarial images #helpers.plot_images(self._data.x_test[self._idx_adv][:length], x_test_adv[:length], x_test_adv.shape) # Creates a test set containing 'length * 2' input images 'x', where half are benign images and half are adversarial. _, x, y, y_ori = helpers.join_test_sets(self._data.x_test, x_test_adv, self._data.y_test, length, idx=self._idx_adv[:length]) team_stats = np.zeros((math.floor(len(x) / jump), 4)) i = 0 k = 0 while i + jump <= len(x): reduction_models = random.choice([3, 5, 7, 9]) if not magnet else 1 print( "\nInput images 'x' {0}-{1}/{2}\nNumber of autoencoders chosen: {3}" .format(i + 1, i + jump, len(x), reduction_models)) print("==============================================") team = Assembly_Team(self._sess, self._data, reduction_models) if metric == "RE": thresholds = team.get_thresholds(tau=tau, drop_rate=drop_rate, p=p, plot_rec_images=False) x_marks = Image_Reduction.apply_techniques(x[i:i + jump], team, p=p) else: thresholds = team.get_thresholds_pd(tau=tau, classifier=classifier, T=T, drop_rate=drop_rate, p=p, plot_rec_images=False, metric=metric) x_marks = Image_Reduction.apply_techniques_pd(x[i:i + jump], team, classifier, T=T, p=p, metric=metric) y_pred, filtered_indices = poll_votes(x[i:i + jump], y[i:i + jump], x_marks, thresholds, reduction_models) print( "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}" .format(self._data.dataset_name, len(x[i:i + jump]), attack, p, reduction_models, drop_rate, T)) team_stats[k, 0], team_stats[k, 1], team_stats[ k, 2], _, _, cm = helpers.get_cm_and_statistics( y[i:i + jump], y_pred) team_stats[k, 3] = reduction_models print( 'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}' .format(thresholds, cm, team_stats[k, 0], team_stats[k, 1], team_stats[k, 2])) ori_acc, ref_acc = Reformer(classifier.model, team, x[i:i + jump][filtered_indices], y_ori[i:i + jump][filtered_indices]) d_acc = classifier.model.evaluate(x[i:i + jump], y_ori[i:i + jump])[1] print("\nModel accuracy on D set: %.2f%%" % (d_acc * 100)) print("\nModel accuracy on filtered images: %.2f%%" % (ori_acc * 100)) print("Model accuracy on filtered and reformed images: %.2f%%" % (ref_acc * 100)) print("\nExperiment's elapsed time: {0}\n".format( timedelta(seconds=time.time() - start))) i = i + jump k = k + 1 helpers.get_statistics_experiments("Team", team_stats) print("Number of autoencoders chosen on each experiment: {0}".format( team_stats[:, 3]))
def simple_experiment(self, reduction_models, attack="FGSM", drop_rate=0.001, tau="RE", p=1, length=2000, T=1, metric='JSD'): """ Evaluates MultiMagNet with test dataset containing half legitimate and adversarial images, and prints the its metrics. # Attributes: length: the size of the test dataset containing legitimate images that will be used in the experiments. A final test dataset will be produced containing legitimate and adversarial images, with size length * 2. reduction_models: the number of autoencoders randomly chosen to form the MultiMagNet ensemble. attack: can be 'FGSM', 'BIM', 'DEEPFOOL', 'CW_0.0', 'CW_10.0', 'CW_20.0', 'CW_30.0', 'CW_40.0'. drop_rate: the maximum percentage of legitimate images classified as 'adversarial'. tau: the approach used to compute the thresholds. It can be 'RE' which assigns a different threshold based on each autoencoder's reconstruction error or 'minRE', which assigns the minimum reconstruction error obtained for all the autoencoders. """ start = time.time() # test inputs on main classifier classifier = Classifier(self._sess, self._data, epochs=350, learning_rate=0.01, batch_size=32) classifier.execute() # # Creates surrogate model and returns the perturbed NumPy test set x_test_adv = Adversarial_Attack( self._sess, self._data, length=length, attack=attack, epochs=12).attack(model=classifier.model) # Evaluates the brand-new adversarial examples on the main model. scores_leg = classifier.model.evaluate( self._data.x_test[self._idx_adv][:length], self._data.y_test[self._idx_adv][:length], verbose=1) scores = classifier.model.evaluate( x_test_adv[:length], self._data.y_test[self._idx_adv][:length], verbose=1) print("\nMain classifier's accuracy on legitimate examples: %.2f%%" % (scores_leg[1] * 100)) print("\nMain classifier's accuracy on adversarial examples: %.2f%%" % (scores[1] * 100)) # plots the adversarial images helpers.plot_images(self._data.x_test[self._idx_adv][:length], x_test_adv[:length], x_test_adv.shape) # Creates a test set containing 'length * 2' input images 'x', where half are benign images and half are adversarial. _, x, y, y_ori = helpers.join_test_sets(self._data.x_test, x_test_adv, self._data.y_test, length, idx=self._idx_adv[:length]) # # Creates, trains and returns the 'R' dimensionality reduction team team = Assembly_Team(self._sess, self._data, reduction_models) if metric == "RE": thresholds = team.get_thresholds(tau=tau, drop_rate=drop_rate, p=p, plot_rec_images=False) x_marks = Image_Reduction.apply_techniques(x, team, p=p) else: thresholds = team.get_thresholds_pd(tau=tau, classifier=classifier, T=T, drop_rate=drop_rate, p=p, plot_rec_images=False, metric=metric) x_marks = Image_Reduction.apply_techniques_pd(x, team, classifier, T=T, p=p, metric=metric) y_pred, filtered_indices = poll_votes(x, y, x_marks, thresholds, reduction_models) print( "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}" .format(self._data.dataset_name, len(x), attack, p, reduction_models, drop_rate, T)) acc, pp, nn, auc, f1, cm = helpers.get_cm_and_statistics(y, y_pred) print( 'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}, AUC: {5:.3}, F1: {6:.3}' .format(thresholds, cm, acc, pp, nn, auc, f1)) ori_acc, ref_acc = Reformer(classifier.model, team, x[filtered_indices], y_ori[filtered_indices]) d_acc = classifier.model.evaluate(x, y_ori)[1] print("\nModel accuracy on D set: %.2f%%" % (d_acc * 100)) print("\nModel accuracy on filtered images: %.2f%%" % (ori_acc * 100)) print("Model accuracy on filtered and reformed images: %.2f%%" % (ref_acc * 100)) print("\nExperiment's elapsed time: {0}".format( timedelta(seconds=time.time() - start)))
dataset = data_lookup_table['raw_dataset_19Oct_1a'] ######################################## # Test for SVM ######################################## preprocessor = TrainProcessor(dataset['X_Columns'], dataset['Y_Columns']) X_train, X_test, y_train, y_test = preprocessor.prepare_train( dataset['raw_data_path']) trainer = SvmTrainer() trainer.train(X_train, y_train) trainer.evaluate(X_test, y_test) trainer.save(dataset['save_data_path']) classifier = Classifier(dataset['save_data_path']) prediction_score = classifier.predict(X_test) print(prediction_score) ######################################## # Test for KNN ######################################## preprocessor = TrainProcessor(dataset['X_Columns'], dataset['Y_Columns']) X_train, X_test, y_train, y_test = preprocessor.prepare_train( dataset['raw_data_path']) max_knn_value = KnnTrainer.find_best_knn_value(X_train, y_train) trainer = KnnTrainer(max_knn_value) trainer.train(X_train, y_train)
def __init__(self): Classifier.__init__(self) self._clf = XGBClassifier()