def get_frame_saliency(classifier, inner_config=None, **kwargs): from art.attacks.evasion import FrameSaliencyAttack from armory.utils import config_loading attacker = config_loading.load_attack(inner_config, classifier) attack = FrameSaliencyAttack(classifier, attacker, **kwargs) return attack
def __init__(self, estimator, **kwargs): self._check_kwargs(kwargs) self.targeted = kwargs.get("targeted", False) self.attacks = [] for inner_config in kwargs["inner_configs"]: inner_config["kwargs"]["targeted"] = self.targeted self.attacks.append(load_attack(inner_config, estimator)) kwargs.pop("inner_configs") super().__init__(estimator=estimator, attacks=self.attacks, **kwargs)
def _evaluate(self, config: dict) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info(f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) train_epochs = config["model"]["fit_kwargs"]["nb_epochs"] batch_size = config["dataset"]["batch_size"] logger.info(f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=train_epochs, split_type="train", preprocessing_fn=preprocessing_fn, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) else: logger.info(f"Fitting classifier on clean train dataset...") for epoch in range(train_epochs): classifier.set_learning_phase(True) for _ in tqdm( range(train_data.batches_per_epoch), desc=f"Epoch: {epoch}/{train_epochs}", ): x, y = train_data.get_batch() # x_trains consists of one or more videos, each represented as an # ndarray of shape (n_stacks, 3, 16, 112, 112). # To train, randomly sample a batch of stacks x = np.stack([x_i[np.random.randint(x_i.shape[0])] for x_i in x]) if defense_type == "Trainer": defense.fit(x, y, batch_size=batch_size, nb_epochs=1) else: classifier.fit(x, y, batch_size=batch_size, nb_epochs=1) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info(f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) logger.info("Running inference on benign examples...") metrics_logger = metrics.MetricsLogger.from_config(config["metric"]) for x_batch, y_batch in tqdm(test_data_generator, desc="Benign"): for x, y in zip(x_batch, y_batch): # combine predictions across all stacks y_pred = np.mean(classifier.predict(x), axis=0) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating / testing adversarial examples...") attack = load_attack(config["attack"], classifier) test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) for x_batch, y_batch in tqdm(test_data_generator, desc="Attack"): for x, y in zip(x_batch, y_batch): # each x is of shape (n_stack, 3, 16, 112, 112) # n_stack varies attack.set_params(batch_size=x.shape[0]) x_adv = attack.generate(x=x) # combine predictions across all stacks y_pred = np.mean(classifier.predict(x), axis=0) metrics_logger.update_task(y, y_pred, adversarial=True) metrics_logger.update_perturbation([x], [x_adv]) metrics_logger.log_task(adversarial=True) return metrics_logger.results()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool], skip_attack: Optional[bool], skip_misclassified: Optional[bool], ) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] estimator, _ = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to estimator") estimator = load_defense_internal(config["defense"], estimator) if model_config["fit"]: try: logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split=config["dataset"].get("train_split", "train"), shuffle_files=True, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting estimator on clean train dataset...") estimator.fit_generator(train_data, **fit_kwargs) except NotImplementedError: raise NotImplementedError( "Training has not yet been implemented for object detectors" ) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming estimator with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) estimator = defense() attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign, skip_attack=skip_attack, targeted=targeted, ) eval_split = config["dataset"].get("eval_split", "test") if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART estimator on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): # Ensure that input sample isn't overwritten by estimator x.flags.writeable = False with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): y_pred = estimator.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() if skip_attack: logger.info("Skipping attack generation...") return metrics_logger.results() # Evaluate the ART estimator on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") if skip_misclassified: acc_task_idx = [i.name for i in metrics_logger.tasks ].index("categorical_accuracy") benign_acc = metrics_logger.tasks[acc_task_idx].values() if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": preloaded_split = attack_config.get("kwargs", {}).get( "split", "adversarial") test_data = load_adversarial_dataset( attack_config, epochs=1, split=preloaded_split, num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, estimator) if targeted != getattr(attack, "targeted", False): logger.warning( f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}" ) test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter( attack_config["targeted_labels"]) export_samples = config["scenario"].get("export_samples") if export_samples is not None and export_samples > 0: sample_exporter = SampleExporter(self.scenario_output_dir, test_data.context, export_samples) else: sample_exporter = None for batch_idx, (x, y) in enumerate(tqdm(test_data, desc="Attack")): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if attack_type == "preloaded": if len(x) == 2: x, x_adv = x else: x_adv = x if targeted: y, y_target = y else: generate_kwargs = deepcopy( attack_config.get("generate_kwargs", {})) # Temporary workaround for ART code requirement of ndarray mask if "mask" in generate_kwargs: generate_kwargs["mask"] = np.array( generate_kwargs["mask"]) if attack_config.get("use_label"): generate_kwargs["y"] = y elif targeted: y_target = label_targeter.generate(y) generate_kwargs["y"] = y_target if skip_misclassified and benign_acc[batch_idx] == 0: x_adv = x else: x_adv = attack.generate(x=x, **generate_kwargs) # Ensure that input sample isn't overwritten by estimator x_adv.flags.writeable = False y_pred_adv = estimator.predict(x_adv) metrics_logger.update_task(y, y_pred_adv, adversarial=True) if targeted: metrics_logger.update_task(y_target, y_pred_adv, adversarial=True, targeted=True) metrics_logger.update_perturbation(x, x_adv) if sample_exporter is not None: sample_exporter.export(x, x_adv, y, y_pred_adv) metrics_logger.log_task(adversarial=True) if targeted: metrics_logger.log_task(adversarial=True, targeted=True) return metrics_logger.results()
def _evaluate(self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool]) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) train_epochs = config["model"]["fit_kwargs"]["nb_epochs"] batch_size = config["dataset"]["batch_size"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=train_epochs, split_type="train", preprocessing_fn=preprocessing_fn, shuffle_files=True, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) else: logger.info("Fitting classifier on clean train dataset...") for epoch in range(train_epochs): classifier.set_learning_phase(True) for _ in tqdm( range(train_data.batches_per_epoch), desc=f"Epoch: {epoch}/{train_epochs}", ): x, y = train_data.get_batch() # x_trains consists of one or more videos, each represented as an # ndarray of shape (n_stacks, 3, 16, 112, 112). # To train, randomly sample a batch of stacks x = np.stack( [x_i[np.random.randint(x_i.shape[0])] for x_i in x]) if defense_type == "Trainer": defense.fit(x, y, batch_size=batch_size, nb_epochs=1) else: classifier.fit(x, y, batch_size=batch_size, nb_epochs=1) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign) if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x_batch, y_batch in tqdm(test_data, desc="Benign"): for x, y in zip(x_batch, y_batch): # combine predictions across all stacks with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): y_pred = np.mean(classifier.predict(x, batch_size=1), axis=0) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, classifier) if targeted != getattr(attack, "targeted", False): logger.warning( f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}" ) attack.set_params(batch_size=1) test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter( attack_config["targeted_labels"]) for x_batch, y_batch in tqdm(test_data, desc="Attack"): if attack_type == "preloaded": x_batch = list(zip(*x_batch)) if targeted: y_batch = list(zip(*y_batch)) for x, y in zip(x_batch, y_batch): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y else: # each x is of shape (n_stack, 3, 16, 112, 112) # n_stack varies if attack_config.get("use_label"): # expansion required due to preprocessing y_input = np.repeat(y, x.shape[0]) x_adv = attack.generate(x=x, y=y_input) elif targeted: y_target = label_targeter.generate(y) y_input = np.repeat(y_target, x.shape[0]) x_adv = attack.generate(x=x, y=y_input) else: x_adv = attack.generate(x=x) # combine predictions across all stacks y_pred_adv = np.mean(classifier.predict(x_adv, batch_size=1), axis=0) if targeted: metrics_logger.update_task(y_target, y_pred_adv, adversarial=True) else: metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation([x], [x_adv]) metrics_logger.log_task(adversarial=True, targeted=targeted) return metrics_logger.results()
def _evaluate(self, config: dict) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info(f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info(f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=preprocessing_fn, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info(f"Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info(f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) logger.info("Running inference on benign examples...") metrics_logger = metrics.MetricsLogger.from_config(config["metric"]) for x, y in tqdm(test_data_generator, desc="Benign"): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating / testing adversarial examples...") attack = load_attack(config["attack"], classifier) test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) for x, y in tqdm(test_data_generator, desc="Attack"): x_adv = attack.generate(x=x) y_pred_adv = classifier.predict(x_adv) metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True) return metrics_logger.results()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool], skip_attack: Optional[bool], ) -> dict: """ Evaluate the config and return a results dict """ if config["dataset"]["batch_size"] != 1: raise ValueError( "batch_size must be 1 for evaluation, due to variable length inputs.\n" " If training, set config['model']['fit_kwargs']['fit_batch_size']" ) model_config = config["model"] classifier, fit_preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info(f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info(f"Loading train dataset {config['dataset']['name']}...") batch_size = config["dataset"].pop("batch_size") config["dataset"]["batch_size"] = fit_kwargs.get( "fit_batch_size", batch_size ) train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split=config["dataset"].get("train_split", "train"), preprocessing_fn=fit_preprocessing_fn, shuffle_files=True, ) config["dataset"]["batch_size"] = batch_size if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info(f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign, skip_attack=skip_attack, targeted=targeted, ) if config["dataset"]["batch_size"] != 1: logger.warning("Evaluation batch_size != 1 may not be supported.") eval_split = config["dataset"].get("eval_split", "test") if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): # Ensure that input sample isn't overwritten by classifier x.flags.writeable = False with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger.computational_resource_dict, ): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() if skip_attack: logger.info("Skipping attack generation...") return metrics_logger.results() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split="adversarial", num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, classifier) if targeted != getattr(attack, "targeted", False): logger.warning( f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}" ) test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter(attack_config["targeted_labels"]) export_samples = config["scenario"].get("export_samples") if export_samples is not None and export_samples > 0: sample_exporter = SampleExporter( self.scenario_output_dir, test_data.context, export_samples ) else: sample_exporter = None for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger.computational_resource_dict, ): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y else: generate_kwargs = deepcopy(attack_config.get("generate_kwargs", {})) if attack_config.get("use_label"): generate_kwargs["y"] = y elif targeted: y_target = label_targeter.generate(y) generate_kwargs["y"] = y_target x_adv = attack.generate(x=x, **generate_kwargs) # Ensure that input sample isn't overwritten by classifier x_adv.flags.writeable = False y_pred_adv = classifier.predict(x_adv) metrics_logger.update_task(y, y_pred_adv, adversarial=True) if targeted: metrics_logger.update_task( y_target, y_pred_adv, adversarial=True, targeted=True ) metrics_logger.update_perturbation(x, x_adv) if sample_exporter is not None: sample_exporter.export(x, x_adv, y, y_pred_adv) metrics_logger.log_task(adversarial=True) if targeted: metrics_logger.log_task(adversarial=True, targeted=True) return metrics_logger.results()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool], skip_attack: Optional[bool], ) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] estimator, _ = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info(f"Applying internal {defense_type} defense to estimator") estimator = load_defense_internal(config["defense"], estimator) attack_config = config["attack"] attack_channels = attack_config.get("generate_kwargs", {}).get("channels") if attack_channels is None: if self.attack_modality == "sar": logger.info("No mask configured. Attacking all SAR channels") attack_channels = range(4) elif self.attack_modality == "eo": logger.info("No mask configured. Attacking all EO channels") attack_channels = range(4, 14) elif self.attack_modality == "both": logger.info("No mask configured. Attacking all SAR and EO channels") attack_channels = range(14) else: assert isinstance( attack_channels, list ), "Mask is specified, but incorrect format. Expected list" attack_channels = np.array(attack_channels) if self.attack_modality == "sar": assert np.all( np.logical_and(attack_channels >= 0, attack_channels < 4) ), "Selected SAR-only attack modality, but specify non-SAR channels" elif self.attack_modality == "eo": assert np.all( np.logical_and(attack_channels >= 4, attack_channels < 14) ), "Selected EO-only attack modality, but specify non-EO channels" elif self.attack_modality == "both": assert np.all( np.logical_and(attack_channels >= 0, attack_channels < 14) ), "Selected channels are out-of-bounds" if model_config["fit"]: try: estimator.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info(f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split=config["dataset"].get("train_split", "train"), shuffle_files=True, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting estimator on clean train dataset...") estimator.fit_generator(train_data, **fit_kwargs) except NotImplementedError: raise NotImplementedError( "Training has not yet been implemented for object detectors" ) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info(f"Transforming estimator with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) estimator = defense() try: estimator.set_learning_phase(False) except NotImplementedError: logger.warning( "Unable to set estimator's learning phase. As of ART 1.4.1, " "this is not yet supported for object detectors." ) attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) performance_metrics = deepcopy(config["metric"]) performance_metrics.pop("perturbation") performance_logger = metrics.MetricsLogger.from_config( performance_metrics, skip_benign=skip_benign, skip_attack=skip_attack, targeted=targeted, ) eval_split = config["dataset"].get("eval_split", "test") if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART estimator on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): # Ensure that input sample isn't overwritten by estimator x.flags.writeable = False with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=performance_logger.computational_resource_dict, ): y_pred = estimator.predict(x) performance_logger.update_task(y, y_pred) performance_logger.log_task() if skip_attack: logger.info("Skipping attack generation...") return performance_logger.results() # Evaluate the ART estimator on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") perturbation_metrics = deepcopy(config["metric"]) perturbation_metrics.pop("task") if self.attack_modality in ("sar", "both"): sar_perturbation_logger = metrics.MetricsLogger.from_config( perturbation_metrics, skip_benign=True, skip_attack=False, targeted=targeted, ) else: sar_perturbation_logger = None if self.attack_modality in ("eo", "both"): eo_perturbation_logger = metrics.MetricsLogger.from_config( perturbation_metrics, skip_benign=True, skip_attack=False, targeted=targeted, ) else: eo_perturbation_logger = None if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split="adversarial", num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, estimator) if targeted != getattr(attack, "targeted", False): logger.warning( f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}" ) test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter(attack_config["targeted_labels"]) export_samples = config["scenario"].get("export_samples") if export_samples is not None and export_samples > 0: sample_exporter = SampleExporter( self.scenario_output_dir, test_data.context, export_samples ) else: sample_exporter = None for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=performance_logger.computational_resource_dict, ): if attack_type == "preloaded": logger.warning( "Specified preloaded attack. Ignoring `attack_modality` parameter" ) if len(x) == 2: x, x_adv = x else: x_adv = x if targeted: y, y_target = y else: generate_kwargs = deepcopy(attack_config.get("generate_kwargs", {})) generate_kwargs["mask"] = attack_channels if attack_config.get("use_label"): generate_kwargs["y"] = y elif targeted: y_target = label_targeter.generate(y) generate_kwargs["y"] = y_target x_adv = attack.generate(x=x, **generate_kwargs) # Ensure that input sample isn't overwritten by estimator x_adv.flags.writeable = False y_pred_adv = estimator.predict(x_adv) performance_logger.update_task(y, y_pred_adv, adversarial=True) if targeted: performance_logger.update_task( y_target, y_pred_adv, adversarial=True, targeted=True ) # Update perturbation metrics for SAR/EO separately x_sar = np.stack( (x[..., 0] + 1j * x[..., 1], x[..., 2] + 1j * x[..., 3]), axis=3 ) x_adv_sar = np.stack( ( x_adv[..., 0] + 1j * x_adv[..., 1], x_adv[..., 2] + 1j * x_adv[..., 3], ), axis=3, ) x_eo = x[..., 4:] x_adv_eo = x_adv[..., 4:] if sar_perturbation_logger is not None: sar_perturbation_logger.update_perturbation(x_sar, x_adv_sar) if eo_perturbation_logger is not None: eo_perturbation_logger.update_perturbation(x_eo, x_adv_eo) if sample_exporter is not None: sample_exporter.export(x, x_adv, y, y_pred_adv) performance_logger.log_task(adversarial=True) if targeted: performance_logger.log_task(adversarial=True, targeted=True) # Merge performance, SAR, EO results combined_results = performance_logger.results() if sar_perturbation_logger is not None: combined_results.update( {f"sar_{k}": v for k, v in sar_perturbation_logger.results().items()} ) if eo_perturbation_logger is not None: combined_results.update( {f"eo_{k}": v for k, v in eo_perturbation_logger.results().items()} ) return combined_results
def _evaluate(self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool]) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] estimator, fit_preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to estimator") estimator = load_defense_internal(config["defense"], estimator) if model_config["fit"]: try: estimator.set_learning_phase(True) except NotImplementedError: logger.exception( "set_learning_phase error; training may not work.") logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") batch_size = config["dataset"].pop("batch_size") config["dataset"]["batch_size"] = fit_kwargs.get( "fit_batch_size", batch_size) train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type=config["dataset"].get("train_split", "train_clean100"), preprocessing_fn=fit_preprocessing_fn, shuffle_files=True, ) config["dataset"]["batch_size"] = batch_size if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting estimator on clean train dataset...") estimator.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming estimator with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) estimator = defense() try: estimator.set_learning_phase(False) except NotImplementedError: logger.warning( "Unable to set estimator's learning phase. As of ART 1.4.1, " "this is not yet supported for speech recognition models.") metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign) if config["dataset"]["batch_size"] != 1: logger.warning("Evaluation batch_size != 1 may not be supported.") predict_kwargs = config["model"].get("predict_kwargs", {}) eval_split = config["dataset"].get("eval_split", "test_clean") if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART estimator on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): # Ensure that input sample isn't overwritten by estimator x.flags.writeable = False with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): y_pred = estimator.predict(x, **predict_kwargs) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Imperceptible attack still WIP if (config.get("adhoc") or {}).get("skip_adversarial"): logger.info("Skipping adversarial classification...") return metrics_logger.results() # Evaluate the ART estimator on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("targeted")) if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, estimator) if targeted != attack.targeted: logger.warning( f"targeted config {targeted} != attack field {attack.targeted}" ) test_data = load_dataset( config["dataset"], epochs=1, split_type=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter( attack_config["targeted_labels"]) for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: y_target = label_targeter.generate(y) x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) # Ensure that input sample isn't overwritten by estimator x_adv.flags.writeable = False y_pred_adv = estimator.predict(x_adv, **predict_kwargs) metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True, targeted=True) return metrics_logger.results()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool], skip_attack: Optional[bool], skip_misclassified: Optional[bool], ) -> dict: """ Evaluate the config and return a results dict """ if skip_misclassified: raise ValueError( "skip_misclassified shouldn't be set for ASR scenario") model_config = config["model"] estimator, fit_preprocessing_fn = load_model(model_config) audio_channel_config = config.get("adhoc", {}).get("audio_channel") if audio_channel_config is not None: logger.info("loading audio channel") for k in "delay", "attenuation": if k not in audio_channel_config: raise ValueError(f"audio_channel must have key {k}") audio_channel = load_audio_channel(**audio_channel_config) if estimator.preprocessing_defences: estimator.preprocessing_defences.insert(0, audio_channel) else: estimator.preprocessing_defences = [audio_channel] estimator._update_preprocessing_operations() defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to estimator") estimator = load_defense_internal(config["defense"], estimator) if model_config["fit"]: logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") batch_size = config["dataset"].pop("batch_size") config["dataset"]["batch_size"] = fit_kwargs.get( "fit_batch_size", batch_size) train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split=config["dataset"].get("train_split", "train_clean100"), preprocessing_fn=fit_preprocessing_fn, shuffle_files=True, ) config["dataset"]["batch_size"] = batch_size if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting estimator on clean train dataset...") estimator.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming estimator with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) estimator = defense() attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("targeted")) metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign, skip_attack=skip_attack, targeted=targeted, ) if config["dataset"]["batch_size"] != 1: logger.warning("Evaluation batch_size != 1 may not be supported.") predict_kwargs = config["model"].get("predict_kwargs", {}) eval_split = config["dataset"].get("eval_split", "test_clean") if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART estimator on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): # Ensure that input sample isn't overwritten by estimator x.flags.writeable = False with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): y_pred = estimator.predict(x, **predict_kwargs) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() if skip_attack: logger.info("Skipping attack generation...") return metrics_logger.results() # Imperceptible attack still WIP if (config.get("adhoc") or {}).get("skip_adversarial"): logger.info("Skipping adversarial classification...") return metrics_logger.results() # Evaluate the ART estimator on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split="adversarial", num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, estimator) if targeted != attack.targeted: logger.warning( f"targeted config {targeted} != attack field {attack.targeted}" ) test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter( attack_config["targeted_labels"]) export_samples = config["scenario"].get("export_samples") if export_samples is not None and export_samples > 0: sample_exporter = SampleExporter(self.scenario_output_dir, test_data.context, export_samples) else: sample_exporter = None for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: y_target = label_targeter.generate(y) x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) # Ensure that input sample isn't overwritten by estimator x_adv.flags.writeable = False y_pred_adv = estimator.predict(x_adv, **predict_kwargs) metrics_logger.update_task(y, y_pred_adv, adversarial=True) if targeted: metrics_logger.update_task( y_target, y_pred_adv, adversarial=True, targeted=True, ) metrics_logger.update_perturbation(x, x_adv) if sample_exporter is not None: sample_exporter.export(x, x_adv, y, y_pred_adv) metrics_logger.log_task(adversarial=True) if targeted: metrics_logger.log_task(adversarial=True, targeted=True) return metrics_logger.results()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool], skip_attack: Optional[bool], skip_misclassified: Optional[bool], ) -> dict: """ Evaluate the config and return a results dict """ if skip_misclassified: raise ValueError( "skip_misclassified shouldn't be set for D-APRICOT scenario") if skip_attack: raise ValueError( "--skip-attack should not be set for D-APRICOT scenario.") if skip_benign: logger.warning("--skip-benign is being ignored since the D-APRICOT" " scenario doesn't include benign evaluation.") attack_config = config["attack"] attack_type = attack_config.get("type") if attack_type == "preloaded": raise ValueError( "D-APRICOT scenario should not have preloaded set to True in attack config" ) elif "targeted_labels" not in attack_config: raise ValueError( "Attack config must have 'targeted_labels' key, as the " "D-APRICOT threat model is targeted.") elif attack_config.get("use_label"): raise ValueError( "The D-APRICOT scenario threat model is targeted, and" " thus 'use_label' should be set to false.") if config["dataset"].get("batch_size") != 1: raise ValueError( "batch_size of 1 is required for D-APRICOT scenario") model_config = config["model"] estimator, _ = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") label_targeter = load_label_targeter(attack_config["targeted_labels"]) if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to estimator") estimator = load_defense_internal(config["defense"], estimator) if model_config["fit"]: try: logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split=config["dataset"].get("train_split", "train"), shuffle_files=True, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting estimator on clean train dataset...") estimator.fit_generator(train_data, **fit_kwargs) except NotImplementedError: raise NotImplementedError( "Training has not yet been implemented for object detectors" ) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming estimator with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], estimator) estimator = defense() metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=True, skip_attack=False, targeted=True, ) eval_split = config["dataset"].get("eval_split", "test") # Evaluate the ART estimator on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack = load_attack(attack_config, estimator) test_data = load_dataset( config["dataset"], epochs=1, split=eval_split, num_batches=num_eval_batches, shuffle_files=False, ) export_samples = config["scenario"].get("export_samples") if export_samples is not None and export_samples > 0: sample_exporter = SampleExporter(self.scenario_output_dir, test_data.context, export_samples) else: sample_exporter = None for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if x.shape[0] != 1: raise ValueError("D-APRICOT batch size must be set to 1") # (nb=1, num_cameras, h, w, c) --> (num_cameras, h, w, c) x = x[0] y_object, y_patch_metadata = y generate_kwargs = deepcopy( attack_config.get("generate_kwargs", {})) generate_kwargs["y_patch_metadata"] = y_patch_metadata y_target = label_targeter.generate(y_object) generate_kwargs["y_object"] = y_target x_adv = attack.generate(x=x, **generate_kwargs) # Ensure that input sample isn't overwritten by estimator x_adv.flags.writeable = False y_pred_adv = estimator.predict(x_adv) for img_idx in range(len(y_object)): y_i_target = y_target[img_idx] y_i_pred = y_pred_adv[img_idx] metrics_logger.update_task([y_i_target], [y_i_pred], adversarial=True, targeted=True) metrics_logger.update_perturbation(x, x_adv) if sample_exporter is not None: sample_exporter.export(x, x_adv, y_object, y_pred_adv) metrics_logger.log_task(adversarial=True, targeted=True) return metrics_logger.results()
def _evaluate(self, config: dict) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=preprocessing_fn, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) ################################################################ #### Save weights at the end of training ################################################################ ckpt_name = model_config['module'].replace('.', '_') ckpt_name += '_pretrained' if model_config['model_kwargs'][ 'pretrained'] else '' ckpt_name += '_epochs%d.pth' % model_config['fit_kwargs'][ 'nb_epochs'] classifier.save( osp.join(paths.runtime_paths().saved_model_dir, ckpt_name)) logger.info(f"Saved classifier {ckpt_name} ...") if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) logger.info("Running inference on benign examples...") metrics_logger = metrics.MetricsLogger.from_config(config["metric"]) for x, y in tqdm(test_data, desc="Benign"): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", preprocessing_fn=preprocessing_fn, ) else: attack = load_attack(attack_config, classifier) test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) for x, y in tqdm(test_data, desc="Attack"): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: raise NotImplementedError( "Requires generation of target labels") # x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) y_pred_adv = classifier.predict(x_adv) if targeted: # NOTE: does not remove data points where y == y_target metrics_logger.update_task(y_target, y_pred_adv, adversarial=True) else: metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True, targeted=targeted) return metrics_logger.results()
def _evaluate(self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool]) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=preprocessing_fn, shuffle_files=True, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) metrics_logger = metrics.MetricsLogger.from_config( config["metric"], skip_benign=skip_benign) if skip_benign: logger.info("Skipping benign classification...") else: # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) logger.info("Running inference on benign examples...") for x, y in tqdm(test_data, desc="Benign"): with metrics.resource_context( name="Inference", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) else: attack = load_attack(attack_config, classifier) if targeted != getattr(attack, "targeted", False): logger.warning( f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}" ) test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, num_batches=num_eval_batches, shuffle_files=False, ) if targeted: label_targeter = load_label_targeter( attack_config["targeted_labels"]) for x, y in tqdm(test_data, desc="Attack"): with metrics.resource_context( name="Attack", profiler=config["metric"].get("profiler_type"), computational_resource_dict=metrics_logger. computational_resource_dict, ): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: y_target = label_targeter.generate(y) x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) y_pred_adv = classifier.predict(x_adv) if targeted: metrics_logger.update_task(y_target, y_pred_adv, adversarial=True) else: metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True, targeted=targeted) return metrics_logger.results()
def _evaluate(self, config: dict) -> dict: """ Evaluate a config file for classification robustness against attack. """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) n_tbins = 100 # number of time bins in spectrogram input to model task_metric = metrics.categorical_accuracy # Train ART classifier if not model_config["weights_file"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] train_data_generator = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=preprocessing_fn, ) for cnt, (x, y) in tqdm(enumerate(train_data_generator)): x_seg, y_seg = segment(x, y, n_tbins) classifier.fit( x_seg, y_seg, batch_size=config["dataset"]["batch_size"], nb_epochs=1, verbose=True, ) if (cnt + 1) % train_data_generator.batches_per_epoch == 0: # evaluate on validation examples val_data_generator = load_dataset( config["dataset"], epochs=1, split_type="validation", preprocessing_fn=preprocessing_fn, ) cnt = 0 validation_accuracies = [] for x_val, y_val in tqdm(val_data_generator): x_val_seg, y_val_seg = segment(x_val, y_val, n_tbins) y_pred = classifier.predict(x_val_seg) validation_accuracies.extend( task_metric(y_val_seg, y_pred)) cnt += len(y_val_seg) validation_accuracy = sum(validation_accuracies) / cnt logger.info( "Validation accuracy: {}".format(validation_accuracy)) classifier.set_learning_phase(False) # Evaluate ART classifier on test examples logger.info(f"Loading testing dataset {config['dataset']['name']}...") test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) logger.info("Running inference on benign test examples...") cnt = 0 benign_accuracies = [] for x, y in tqdm(test_data_generator, desc="Benign"): x_seg, y_seg = segment(x, y, n_tbins) y_pred = classifier.predict(x_seg) benign_accuracies.extend(task_metric(y_seg, y_pred)) cnt += len(y_seg) benign_accuracy = sum(benign_accuracies) / cnt logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}") # Evaluate the ART classifier on adversarial test examples logger.info("Generating / testing adversarial examples...") attack = load_attack(config["attack"], classifier) test_data_generator = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) cnt = 0 adversarial_accuracies = [] for x, y in tqdm(test_data_generator, desc="Attack"): x_seg, y_seg = segment(x, y, n_tbins) x_adv = attack.generate(x=x_seg) y_pred = classifier.predict(x_adv) adversarial_accuracies.extend(task_metric(y_seg, y_pred)) cnt += len(y_seg) adversarial_accuracy = sum(adversarial_accuracies) / cnt logger.info( f"Accuracy on adversarial test examples: {adversarial_accuracy:.2%}" ) results = { "mean_benign_accuracy": benign_accuracy, "mean_adversarial_accuracy": adversarial_accuracy, } return results
def _evaluate(self, config: dict) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) if isinstance(preprocessing_fn, tuple): fit_preprocessing_fn, predict_preprocessing_fn = preprocessing_fn else: fit_preprocessing_fn = predict_preprocessing_fn = preprocessing_fn defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") batch_size = config["dataset"].pop("batch_size") config["dataset"]["batch_size"] = config.get("adhoc", {}).get( "fit_batch_size", batch_size) train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=fit_preprocessing_fn, ) config["dataset"]["batch_size"] = batch_size if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() #HACK: to save model -- currently commenting it out #SAIL-JATI ---------------------------------- #ts = time.time() #st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S') #model_save_dir_ = os.path.join("/nas/home/ajati/work/codes/SAIL_ALR_models/", st+"/") #os.system("mkdir -p "+model_save_dir_) #torch.save(classifier._model._model.state_dict(), model_save_dir_+"/sail_alr_model_state_dict.pt") #torch.save(classifier._model._model, model_save_dir_+"/sail_alr_model.pt") #torch.save(classifier._optimizer.state_dict(), model_save_dir_+"/sail_alr_optim_state_dict.pt") #torch.save(classifier._optimizer, model_save_dir_+"/sail_alr_optim.pt") ##------------------------------------------- classifier.set_learning_phase(False) # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=predict_preprocessing_fn, ) logger.info("Running inference on benign examples...") metrics_logger = metrics.MetricsLogger.from_config(config["metric"]) for x, y in tqdm(test_data, desc="Benign"): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", preprocessing_fn=predict_preprocessing_fn, ) else: attack = load_attack(attack_config, classifier) test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=predict_preprocessing_fn, ) #JATI -- snr snrs = [] for x, y in tqdm(test_data, desc="Attack"): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: raise NotImplementedError( "Requires generation of target labels") # x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) #JATI - snr noise = x_adv - x snr = 10 * np.log10(np.mean(x**2) / np.mean(noise**2)) snrs.append(snr) y_pred_adv = classifier.predict(x_adv) if targeted: # NOTE: does not remove data points where y == y_target metrics_logger.update_task(y_target, y_pred_adv, adversarial=True) else: metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True, targeted=targeted) mean_snr = np.mean(snrs) logging.info(f"MEAN SNR of adversarial samples = {mean_snr}") return metrics_logger.results()