Пример #1
0
    def create_adversarial_validation_images(self):
        classifier = Classifier(self._sess,
                                self._data,
                                epochs=350,
                                learning_rate=0.01,
                                batch_size=32)
        classifier.execute()
        length = 2000
        # # Creates surrogate model and returns the perturbed NumPy test set
        x_val_adv = Adversarial_Attack(
            self._sess,
            self._data,
            dataset="_x_val_set_",
            length=2000,
            attack="DEEPFOOL",
            epochs=12).attack(model=classifier.model)
        scores_leg = classifier.model.evaluate(
            self._data.x_val[self._idx_adv][:length],
            self._data.y_val[self._idx_adv][:length],
            verbose=1)
        scores = classifier.model.evaluate(
            x_val_adv[:length],
            self._data.y_val[self._idx_adv][:length],
            verbose=1)
        print("\nMain classifier's accuracy on legitimate examples: %.2f%%" %
              (scores_leg[1] * 100))
        print("\nMain classifier's accuracy on adversarial examples: %.2f%%" %
              (scores[1] * 100))

        helpers.plot_images(self._data.x_val[self._idx_adv][:length],
                            x_val_adv[:length], x_val_adv.shape)
Пример #2
0
def train_test(features_vector: FeatureVector, classifier: Classifier,
               data: ClassifierData):
    # Build train features vector
    with Timer('building train features', VERBOSE):
        x_train_features = features_vector.convert_to_features(
            data.x_train, VERBOSE)

    # Train
    with Timer('training', VERBOSE):
        classifier.train(x_train_features, data.y_train)

    # Build test features vector
    with Timer('building test features', VERBOSE):
        x_test_features = features_vector.convert_to_features(
            data.x_test, VERBOSE)

    # Test
    if VERBOSE:
        print(features_vector.name)
        print(classifier.report(x_test_features, data.y_test))

    return classifier.f1_micro(x_test_features, data.y_test)
    # anlz = Analyzer(tr_path,
    #                 te_path,
    #                 cfg['data']['classes_list'],
    #                 cfg['analysis']['figures_path'])
    # anlz.run()

    ##############################################################
    #                        CNN MODEL                           #
    ##############################################################
    train_path = os.path.join(cfg['data']['sorted_path'], 'training')
    test_path = os.path.join(cfg['data']['sorted_path'], 'test')
    fig_full_path = os.path.abspath(cfg['model']['figures_path'])
    model_full_path = os.path.abspath(cfg['model']['models_path'])
    weights_full_path = os.path.abspath(cfg['model']['weights_path'])
    training_size = 18966
    test_size = 4742

    cnn = Classifier(train_path,
                     test_path,
                     training_size,
                     test_size,
                     cfg['data']['classes_list'],
                     cfg['model'],
                     fig_path=fig_full_path)
    cnn.compile()
    cnn.plot_model()
    cnn.train()
    cnn.show_training_history()
    cnn.plot_confusion_matrix((4742 // 32 + 1))
    cnn.save_model(model_full_path)
    def __init__(self,
                 bert_config_file: str,
                 init_checkpoint: str,
                 dataset_db_name: str,
                 dataset_split: str,
                 vocab_file: str,
                 output_dir: str,
                 split_table_name: str,
                 skip_trivial_samples: bool = False,
                 seq_len: int = 256,
                 batch_size: int = 32,
                 layer_indexes: List[int] = [-1, -2, -3, -4],
                 learning_rate: float = 2e-6,
                 num_train_epochs: float = 1.0,
                 warmup_proportion: float = 0.1,
                 do_lower_case: bool = True,
                 save_checkpoints_steps: int = 1000,
                 summary_steps: int = 1,
                 margin: float = 2.0,
                 steps_per_eval_iter: int = 10,
                 loss: str = 'cosine_contrastive',
                 beta: float = 1.0,
                 num_train_steps: int = None,
                 num_query_sentences_per_entity: int = 2):
        self._seq_len = seq_len
        self._batch_size = batch_size
        self._layer_indexes = layer_indexes

        self._do_lower_case = do_lower_case
        self._init_checkpoint = init_checkpoint
        self._bert_config_file = bert_config_file

        self._output_dir = output_dir
        self._save_checkpoints_steps = save_checkpoints_steps
        self._summary_steps = summary_steps

        self._num_train_epochs = num_train_epochs
        self._num_train_steps = num_train_steps
        self._warmup_proportion = warmup_proportion
        self._learning_rate = learning_rate
        self._margin = margin
        self._loss_name = loss
        self._beta = beta

        self._steps_per_eval_iter = steps_per_eval_iter

        self._tokenizer = tokenization.FullTokenizer(
            vocab_file=vocab_file, do_lower_case=do_lower_case)

        assert dataset_split in ['train', 'test', 'val']
        train_query_data, train_context_data, train_entities, _ = Classifier.load_datasplit(
            dataset_db_name=dataset_db_name,
            dataset_split=dataset_split,
            split_table_name=split_table_name,
            skip_trivial_samples=skip_trivial_samples,
            load_context=False)
        self._training_data = self.generate_data_pairs(
            train_query_data,
            train_context_data,
            train_entities,
            num_query_sentences_per_entity=num_query_sentences_per_entity)

        # Only load the validation split if the training split has been specified
        self._validation_data = None
        if dataset_split == 'train':
            val_query_data, val_context_data, val_entities, _ = Classifier.load_datasplit(
                dataset_db_name=dataset_db_name,
                dataset_split='val',
                split_table_name=split_table_name,
                skip_trivial_samples=skip_trivial_samples,
                load_context=False)
            self._validation_data = self.generate_data_pairs(
                val_query_data,
                val_context_data,
                val_entities,
                num_query_sentences_per_entity=num_query_sentences_per_entity)
Пример #5
0
from classifiers.classifier import Classifier
from preprocess.test_processor import TestProcessor
from time import ctime, sleep
from sys import exit
from utils.result_accumulator import ResultAccumulator

if __name__ != '__main__':
    print("This module must be run as the main module.")
    exit(1)

# Reads the model first and creates a preprocessor.
classifier = Classifier("models/random_forest.pkl")

# Instantiates a result accumulator.
classes = {
    "chicken": 5,
    "number7": 11,
    "sidestep": 10,
    "turnclap": 4,
    "wipers": 5,
    "stationary": 5,
    "cowboy": 7,
    "mermaid": 13,
    "numbersix": 8,
    "salute": 10,
    "swing": 7,
    "logout": 14
}
accumulator = ResultAccumulator(classes)

# Creates a processor for input data.
from classifiers.classifier import Classifier
from bert import tokenization
import numpy as np

db = '../data/databases/dataset_geraete_small.db'
t_q, t_c, t_e, _ = Classifier.load_datasplit(db, 'train')
e_q, e_c, e_e, _ = Classifier.load_datasplit(db, 'test')
v_q, v_c, v_e, _ = Classifier.load_datasplit(db, 'val')


def collect_sentences(query, context):
    out = set()
    for sample in query:
        out.add(sample['sentence'])
    for sample in context:
        out.add(sample['sentence'])
    return out


def get_avg_token_len(data, tokenizer, token_lens):
    for i, sample in enumerate(data):
        s = str(sample['sentence'])
        tokens, mapping = tokenizer.tokenize(s)
        token_lens.append(len(tokens))

    print("Avg. number of tokens: %s\n"
          "Std. deviation: %s\n"
          "Min: %s \tMax: %s" %
          ((sum(token_lens) / len(token_lens)), np.std(token_lens),
           min(token_lens), max(token_lens)))
    return token_lens
Пример #7
0
 def __init__(self):
     Classifier.__init__(self)
     self._clf = LogisticRegression()
Пример #8
0
 def __init__(self):
     Classifier.__init__(self)
     self._clf = RandomForestClassifier()
Пример #9
0
    def all_cases_experiment(self, *args, length=2000):
        """
        Creates an cartesian product with '*args' in order to make the experiments on several different scenarios. 
        All the experiments' results are saved in a .TXT file called 'all_cases_experiment.txt'

        # Attributes:
            *args: each '*args' parameter is a list containing all possible MultiMagNet's parameters: 
                NUMBER_EXPERIMENTS: how many times the code will run.
                DATASETS: ("MNIST" or "CIFAR"),
                ATTACKS: ("FGSM", "BIM", "DEEPFOOL", "CW_0.0"),
                DROP_RATE: (values below 1, preferably below 0.1),
                REDUCTION_MODELS: (1,3,5,7,9 for MNIST),
                TAU: ("RE" or "minRE")
                T: Temperature (>= 1)
                metric: "RE", "JSD", "DKL"
        """
        import itertools

        start = time.time()
        combinations = list(itertools.product(*args))
        att = ""

        classifier = Classifier(self._sess,
                                self._data,
                                epochs=350,
                                learning_rate=0.01,
                                batch_size=32)
        classifier.execute()

        for combination in combinations:
            n_experiments = combination[0]
            reduction_models = combination[1]
            attack = combination[2]
            drop_rate = combination[3]
            tau = combination[4]
            try:
                T = combination[5]
                metric = combination[6]
            except:
                T = 1
                metric = "RE"

            if att != attack:
                f = open(
                    "./experiments/experiments_logs/" +
                    self._data.dataset_name + "_" + attack +
                    "_all_cases_experiment.txt", "a+")

            if tau == "RE" and reduction_models == 1:
                continue
            else:
                team_stats = np.zeros((n_experiments, 5))

                if att != attack:
                    x_test_adv = Adversarial_Attack(self._sess,
                                                    self._data,
                                                    length=length,
                                                    attack=attack,
                                                    epochs=5).attack()
                    _, x, y, _ = helpers.join_test_sets(
                        self._data.x_test,
                        x_test_adv,
                        self._data.y_test,
                        length,
                        idx=self._idx_adv[:length])
                    att = attack

                multiple_team = Assembly_Team(self._sess, self._data,
                                              reduction_models)

                scores_leg = classifier.model.evaluate(
                    self._data.x_test[self._idx_adv][:length],
                    self._data.y_test[self._idx_adv][:length],
                    verbose=1)
                scores = classifier.model.evaluate(
                    x_test_adv[:length],
                    self._data.y_test[self._idx_adv][:length],
                    verbose=1)
                print(
                    "\nMain classifier's accuracy on legitimate examples: %.2f%%"
                    % (scores_leg[1] * 100))
                print(
                    "\nMain classifier's accuracy on adversarial examples: %.2f%%"
                    % (scores[1] * 100))

                for exp in range(n_experiments):
                    if metric == "RE":
                        multiple_thresholds = multiple_team.get_thresholds(
                            tau=tau,
                            drop_rate=drop_rate,
                            p=1,
                            plot_rec_images=False)
                        multiple_x_marks = Image_Reduction.apply_techniques(
                            x, multiple_team, p=1)
                    else:
                        multiple_thresholds = multiple_team.get_thresholds_pd(
                            tau=tau,
                            classifier=classifier,
                            T=T,
                            drop_rate=drop_rate,
                            p=1,
                            plot_rec_images=False,
                            metric=metric)
                        multiple_x_marks = Image_Reduction.apply_techniques_pd(
                            x,
                            multiple_team,
                            classifier,
                            T=T,
                            p=1,
                            metric=metric)

                    y_pred_team, _ = poll_votes(x, y, multiple_x_marks,
                                                multiple_thresholds,
                                                reduction_models)
                    team_stats[exp, 0], team_stats[exp, 1], team_stats[
                        exp, 2], team_stats[exp, 3], team_stats[
                            exp,
                            4], confusion_matrix_team = helpers.get_cm_and_statistics(
                                y, y_pred_team)

                    print(
                        "\nSCENARIO {0}/{1} FINISHED.\nTeam CM \n{2}\n".format(
                            exp + 1, n_experiments, confusion_matrix_team))

                print(
                    "\nEXPERIMENT TERMINATED. {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}, tau = {6}, T = {7}\n"
                    .format(self._data.dataset_name, len(x), attack, 1,
                            reduction_models, drop_rate, tau, T))

                s1 = helpers.get_statistics_experiments("Team", team_stats)

                if type(f) != type(None):
                    s0 = "EXPERIMENT TERMINATED. {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}, tau = {6}, T = {7}\n\n".format(
                        self._data.dataset_name, len(x), attack, 1,
                        reduction_models, drop_rate, tau, T)
                    sep = '-' * len(s0)
                    helpers.write_txt(f, '\n', '\n', s0, s1, '\n', sep, '\n',
                                      '\n')

                helpers.write_txt(
                    f, "\nExperiment's elapsed time: {0}".format(
                        timedelta(seconds=time.time() - start)))
        f.close()
Пример #10
0
    def tuning_team_parameters(self, attack, *args, classifier=None):

        print("\nStarting validation process...\n")

        classifier = Classifier(self._sess,
                                self._data,
                                epochs=350,
                                learning_rate=0.01,
                                batch_size=32)
        classifier.execute()

        path = os.path.join(
            "./adv_attacks/adversarial_images",
            self._data.dataset_name.lower() + "_val_set_" + attack.lower() +
            ".plk")
        val_set_adv = helpers.load_pkl(path)

        path = os.path.join(
            "./adv_attacks/adversarial_images/validation_idx.pkl")
        idx = helpers.load_pkl(path)

        val_set_leg = self._data.x_val[idx]
        _, x_val, y_val, _ = helpers.join_test_sets(self._data.x_test,
                                                    val_set_adv,
                                                    self._data.y_test,
                                                    len(val_set_leg),
                                                    idx=idx)

        import itertools
        combinations = list(itertools.product(*args))
        print(len(combinations))
        team_stats = np.zeros((len(combinations), 3))
        parameters = [[0 for x in range(5)] for y in range(len(combinations))]
        k = 0

        for combination in combinations:
            reduction_models = parameters[k][0] = combination[0]
            drop_rate = parameters[k][1] = combination[1]
            tau = parameters[k][2] = combination[2]
            metric = parameters[k][3] = combination[3]

            if self._data.dataset_name == "CIFAR":
                T = parameters[k][4] = combination[4]

            team = Assembly_Team(self._sess, self._data, reduction_models)

            if metric == "RE":
                thresholds = team.get_thresholds(tau=tau,
                                                 drop_rate=drop_rate,
                                                 p=1,
                                                 plot_rec_images=False,
                                                 load_thresholds=False)
                val_marks = Image_Reduction.apply_techniques(x_val, team, p=1)
            else:
                thresholds = team.get_thresholds_pd(tau=tau,
                                                    classifier=classifier,
                                                    T=T,
                                                    drop_rate=drop_rate,
                                                    p=1,
                                                    plot_rec_images=False,
                                                    load_thresholds=False,
                                                    metric=metric)

                val_marks = Image_Reduction.apply_techniques_pd(x_val,
                                                                team,
                                                                classifier,
                                                                T=T,
                                                                p=1,
                                                                metric=metric)

            y_pred, _ = poll_votes(x_val, y_val, val_marks, thresholds,
                                   reduction_models)

            print(
                "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}"
                .format(self._data.dataset_name, len(x_val), attack, 1,
                        reduction_models, drop_rate, T))

            team_stats[k, 0], team_stats[k, 1], team_stats[
                k, 2], _, _, cm = helpers.get_cm_and_statistics(y_val, y_pred)

            print(
                'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}'
                .format(thresholds, cm, team_stats[k, 0], team_stats[k, 1],
                        team_stats[k, 2]))
            k = k + 1

        max_acc = max(team_stats[:, 0])
        index = np.argmax(team_stats[:, 0])

        print(
            "\nBest accuracy of {0:.3} was obtained by the following MultiMagNet's hyperparameters:\n{1}"
            .format(max_acc, parameters[index]))
Пример #11
0
    def choose_team_each_jump_experiment(self,
                                         jump=0,
                                         magnet=False,
                                         attack="FGSM",
                                         drop_rate=0.001,
                                         tau="RE",
                                         p=1,
                                         length=2000,
                                         T=1,
                                         metric='JSD'):
        import math
        """
        Evaluates MultiMagNet with test dataset containing half legitimate and adversarial images, and prints the its metrics.

        # Attributes:        
            length: the size of the test dataset containing legitimate images that will be used in the experiments. A final test dataset will be produced containing legitimate and adversarial images, with size length * 2.

            jump: forms a different 'R' team at each jump.

            magnet: if True, it is chosen just one autoencoder; if False, it is chosen a random number of autoencoders.

            attack: can be 'FGSM', 'BIM', 'DEEPFOOL', 'CW_0.0', 'CW_10.0', 'CW_20.0', 'CW_30.0', 'CW_40.0'.

            drop_rate: the maximum percentage of legitimate images classified as 'adversarial'.

            tau: the approach used to compute the thresholds. It can be 'RE' which assigns a different threshold based on each autoencoder's reconstruction error or 'minRE', which assigns the minimum reconstruction error obtained for all the autoencoders. 
        """
        start = time.time()

        # test inputs on main classifier
        classifier = Classifier(self._sess,
                                self._data,
                                epochs=350,
                                learning_rate=0.01,
                                batch_size=32)
        classifier.execute()

        # # Creates surrogate model and returns the perturbed NumPy test set
        x_test_adv = Adversarial_Attack(
            self._sess, self._data, length=length, attack=attack,
            epochs=12).attack(model=classifier.model)

        # Evaluates the brand-new adversarial examples on the main model.
        scores_leg = classifier.model.evaluate(
            self._data.x_test[self._idx_adv][:length],
            self._data.y_test[self._idx_adv][:length],
            verbose=1)
        scores = classifier.model.evaluate(
            x_test_adv[:length],
            self._data.y_test[self._idx_adv][:length],
            verbose=1)
        print("\nMain classifier's accuracy on legitimate examples: %.2f%%" %
              (scores_leg[1] * 100))
        print("\nMain classifier's accuracy on adversarial examples: %.2f%%" %
              (scores[1] * 100))

        # plots the adversarial images
        #helpers.plot_images(self._data.x_test[self._idx_adv][:length], x_test_adv[:length], x_test_adv.shape)

        # Creates a test set containing 'length * 2' input images 'x', where half are benign images and half are adversarial.
        _, x, y, y_ori = helpers.join_test_sets(self._data.x_test,
                                                x_test_adv,
                                                self._data.y_test,
                                                length,
                                                idx=self._idx_adv[:length])
        team_stats = np.zeros((math.floor(len(x) / jump), 4))

        i = 0
        k = 0

        while i + jump <= len(x):
            reduction_models = random.choice([3, 5, 7, 9]) if not magnet else 1
            print(
                "\nInput images 'x' {0}-{1}/{2}\nNumber of autoencoders chosen: {3}"
                .format(i + 1, i + jump, len(x), reduction_models))
            print("==============================================")
            team = Assembly_Team(self._sess, self._data, reduction_models)

            if metric == "RE":
                thresholds = team.get_thresholds(tau=tau,
                                                 drop_rate=drop_rate,
                                                 p=p,
                                                 plot_rec_images=False)
                x_marks = Image_Reduction.apply_techniques(x[i:i + jump],
                                                           team,
                                                           p=p)
            else:
                thresholds = team.get_thresholds_pd(tau=tau,
                                                    classifier=classifier,
                                                    T=T,
                                                    drop_rate=drop_rate,
                                                    p=p,
                                                    plot_rec_images=False,
                                                    metric=metric)
                x_marks = Image_Reduction.apply_techniques_pd(x[i:i + jump],
                                                              team,
                                                              classifier,
                                                              T=T,
                                                              p=p,
                                                              metric=metric)

            y_pred, filtered_indices = poll_votes(x[i:i + jump], y[i:i + jump],
                                                  x_marks, thresholds,
                                                  reduction_models)

            print(
                "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}"
                .format(self._data.dataset_name, len(x[i:i + jump]), attack, p,
                        reduction_models, drop_rate, T))

            team_stats[k, 0], team_stats[k, 1], team_stats[
                k, 2], _, _, cm = helpers.get_cm_and_statistics(
                    y[i:i + jump], y_pred)
            team_stats[k, 3] = reduction_models

            print(
                'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}'
                .format(thresholds, cm, team_stats[k, 0], team_stats[k, 1],
                        team_stats[k, 2]))

            ori_acc, ref_acc = Reformer(classifier.model, team,
                                        x[i:i + jump][filtered_indices],
                                        y_ori[i:i + jump][filtered_indices])
            d_acc = classifier.model.evaluate(x[i:i + jump],
                                              y_ori[i:i + jump])[1]

            print("\nModel accuracy on D set: %.2f%%" % (d_acc * 100))
            print("\nModel accuracy on filtered images: %.2f%%" %
                  (ori_acc * 100))
            print("Model accuracy on filtered and reformed images: %.2f%%" %
                  (ref_acc * 100))

            print("\nExperiment's elapsed time: {0}\n".format(
                timedelta(seconds=time.time() - start)))

            i = i + jump
            k = k + 1

        helpers.get_statistics_experiments("Team", team_stats)
        print("Number of autoencoders chosen on each experiment: {0}".format(
            team_stats[:, 3]))
Пример #12
0
    def simple_experiment(self,
                          reduction_models,
                          attack="FGSM",
                          drop_rate=0.001,
                          tau="RE",
                          p=1,
                          length=2000,
                          T=1,
                          metric='JSD'):
        """
        Evaluates MultiMagNet with test dataset containing half legitimate and adversarial images, and prints the its metrics.

        # Attributes:        
            length: the size of the test dataset containing legitimate images that will be used in the experiments. A final test dataset will be produced containing legitimate and adversarial images, with size length * 2.
            
            reduction_models: the number of autoencoders randomly chosen to form the MultiMagNet ensemble. 

            attack: can be 'FGSM', 'BIM', 'DEEPFOOL', 'CW_0.0', 'CW_10.0', 'CW_20.0', 'CW_30.0', 'CW_40.0'.

            drop_rate: the maximum percentage of legitimate images classified as 'adversarial'.

            tau: the approach used to compute the thresholds. It can be 'RE' which assigns a different threshold based on each autoencoder's reconstruction error or 'minRE', which assigns the minimum reconstruction error obtained for all the autoencoders. 
        """
        start = time.time()

        # test inputs on main classifier
        classifier = Classifier(self._sess,
                                self._data,
                                epochs=350,
                                learning_rate=0.01,
                                batch_size=32)
        classifier.execute()

        # # Creates surrogate model and returns the perturbed NumPy test set
        x_test_adv = Adversarial_Attack(
            self._sess, self._data, length=length, attack=attack,
            epochs=12).attack(model=classifier.model)

        # Evaluates the brand-new adversarial examples on the main model.
        scores_leg = classifier.model.evaluate(
            self._data.x_test[self._idx_adv][:length],
            self._data.y_test[self._idx_adv][:length],
            verbose=1)
        scores = classifier.model.evaluate(
            x_test_adv[:length],
            self._data.y_test[self._idx_adv][:length],
            verbose=1)
        print("\nMain classifier's accuracy on legitimate examples: %.2f%%" %
              (scores_leg[1] * 100))
        print("\nMain classifier's accuracy on adversarial examples: %.2f%%" %
              (scores[1] * 100))

        # plots the adversarial images
        helpers.plot_images(self._data.x_test[self._idx_adv][:length],
                            x_test_adv[:length], x_test_adv.shape)

        # Creates a test set containing 'length * 2' input images 'x', where half are benign images and half are adversarial.
        _, x, y, y_ori = helpers.join_test_sets(self._data.x_test,
                                                x_test_adv,
                                                self._data.y_test,
                                                length,
                                                idx=self._idx_adv[:length])

        # # Creates, trains and returns the 'R' dimensionality reduction team
        team = Assembly_Team(self._sess, self._data, reduction_models)

        if metric == "RE":
            thresholds = team.get_thresholds(tau=tau,
                                             drop_rate=drop_rate,
                                             p=p,
                                             plot_rec_images=False)
            x_marks = Image_Reduction.apply_techniques(x, team, p=p)
        else:
            thresholds = team.get_thresholds_pd(tau=tau,
                                                classifier=classifier,
                                                T=T,
                                                drop_rate=drop_rate,
                                                p=p,
                                                plot_rec_images=False,
                                                metric=metric)
            x_marks = Image_Reduction.apply_techniques_pd(x,
                                                          team,
                                                          classifier,
                                                          T=T,
                                                          p=p,
                                                          metric=metric)

        y_pred, filtered_indices = poll_votes(x, y, x_marks, thresholds,
                                              reduction_models)

        print(
            "\nEXPERIMENT USING {0} DATASET: {1} Input Images 'x', {2} Attack, p = {3}, reduction models = {4}, drop_rate = {5}\n, T = {6}"
            .format(self._data.dataset_name, len(x), attack, p,
                    reduction_models, drop_rate, T))

        acc, pp, nn, auc, f1, cm = helpers.get_cm_and_statistics(y, y_pred)

        print(
            'Threshold used: {0}\nConfusion Matrix:\n{1}\nACC: {2}, Positive Precision: {3}, Negative Precision: {4}, AUC: {5:.3}, F1: {6:.3}'
            .format(thresholds, cm, acc, pp, nn, auc, f1))

        ori_acc, ref_acc = Reformer(classifier.model, team,
                                    x[filtered_indices],
                                    y_ori[filtered_indices])
        d_acc = classifier.model.evaluate(x, y_ori)[1]

        print("\nModel accuracy on D set: %.2f%%" % (d_acc * 100))
        print("\nModel accuracy on filtered images: %.2f%%" % (ori_acc * 100))
        print("Model accuracy on filtered and reformed images: %.2f%%" %
              (ref_acc * 100))

        print("\nExperiment's elapsed time: {0}".format(
            timedelta(seconds=time.time() - start)))
Пример #13
0
dataset = data_lookup_table['raw_dataset_19Oct_1a']

########################################
# Test for SVM
########################################

preprocessor = TrainProcessor(dataset['X_Columns'], dataset['Y_Columns'])
X_train, X_test, y_train, y_test = preprocessor.prepare_train(
    dataset['raw_data_path'])
trainer = SvmTrainer()

trainer.train(X_train, y_train)
trainer.evaluate(X_test, y_test)
trainer.save(dataset['save_data_path'])
classifier = Classifier(dataset['save_data_path'])
prediction_score = classifier.predict(X_test)

print(prediction_score)

########################################
# Test for KNN
########################################

preprocessor = TrainProcessor(dataset['X_Columns'], dataset['Y_Columns'])

X_train, X_test, y_train, y_test = preprocessor.prepare_train(
    dataset['raw_data_path'])
max_knn_value = KnnTrainer.find_best_knn_value(X_train, y_train)
trainer = KnnTrainer(max_knn_value)
trainer.train(X_train, y_train)
Пример #14
0
 def __init__(self):
     Classifier.__init__(self)
     self._clf = XGBClassifier()