def _gather_env_variables(self): """ Update the extra env variable dictionary to pass into container or run on host """ self.extra_env_vars["ARMORY_GITHUB_TOKEN"] = os.getenv( "ARMORY_GITHUB_TOKEN", default="") self.extra_env_vars["ARMORY_PRIVATE_S3_ID"] = os.getenv( "ARMORY_PRIVATE_S3_ID", default="") self.extra_env_vars["ARMORY_PRIVATE_S3_KEY"] = os.getenv( "ARMORY_PRIVATE_S3_KEY", default="") self.extra_env_vars["ARMORY_INCLUDE_SUBMISSION_BUCKETS"] = os.getenv( "ARMORY_INCLUDE_SUBMISSION_BUCKETS", default="") if not self.armory_global_config["verify_ssl"]: self.extra_env_vars["VERIFY_SSL"] = "false" if self.config["sysconfig"].get("use_gpu", None): gpus = self.config["sysconfig"].get("gpus") if gpus is not None: self.extra_env_vars["NVIDIA_VISIBLE_DEVICES"] = gpus if self.config["sysconfig"].get("set_pythonhashseed"): self.extra_env_vars["PYTHONHASHSEED"] = "0" # Because we may want to allow specification of ARMORY_TORCH_HOME # this constant path is placed here among the other imports self.extra_env_vars["TORCH_HOME"] = paths.runtime_paths().pytorch_dir self.extra_env_vars[environment.ARMORY_VERSION] = armory.__version__
def _scenario_setup(config: dict): """ Creates scenario specific tmp and output directiories. Also pulls external repositories ahead of running the scenario in case the scenario itself is found in the external repository. """ runtime_paths = paths.runtime_paths() scenario_output_dir = os.path.join(runtime_paths.output_dir, config["eval_id"]) #scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir, config["eval_id"]) scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir) os.makedirs(scenario_output_dir, exist_ok=True) os.makedirs(scenario_tmp_dir, exist_ok=True) logger.warning(f"Outputs will be written to {scenario_output_dir}") # Download any external repositories and add them to the sys path for use if config["sysconfig"].get("external_github_repo", None): external_repo_dir = os.path.join(scenario_tmp_dir, "external") external_repo.download_and_extract_repos( config["sysconfig"]["external_github_repo"], external_repo_dir=external_repo_dir, )
def fit(self, *args, save_weights_file=None, **kwargs): super(SmoothedDeepSpeech, self).fit(*args, **kwargs) if save_weights_file: saved_model_dir = paths.runtime_paths().saved_model_dir save_weights_path = os.path.join(saved_model_dir, save_weights_file) dic = self._model.state_dict() torch.save(dic, save_weights_path)
def _run_interactive_bash( self, runner: ArmoryInstance, check_run=False, num_eval_batches=None, skip_benign=None, skip_attack=None, validate_config=None, ) -> None: user_group_id = self.get_id() lines = [ "Container ready for interactive use.", bold( "*** In a new terminal, run the following to attach to the container:" ), bold( red( f" docker exec -it -u {user_group_id} {runner.docker_container.short_id} bash" ) ), ] if self.config.get("scenario"): options = self._build_options( check_run=check_run, num_eval_batches=num_eval_batches, skip_benign=skip_benign, skip_attack=skip_attack, validate_config=validate_config, ) tmp_dir = os.path.join(self.host_paths.tmp_dir, self.config["eval_id"]) os.makedirs(tmp_dir) self.tmp_config = os.path.join(tmp_dir, "interactive-config.json") docker_config_path = os.path.join( paths.runtime_paths().tmp_dir, self.config["eval_id"], "interactive-config.json", ) with open(self.tmp_config, "w") as f: f.write(json.dumps(self.config, sort_keys=True, indent=4) + "\n") lines.extend( [ bold("*** To run your scenario in the container:"), bold( red( f" python -m armory.scenarios.base {docker_config_path}{options} --load-config-from-file" ) ), bold("*** To gracefully shut down container, press: Ctrl-C"), "", ] ) logger.info("\n".join(lines)) while True: time.sleep(1)
def maybe_download_weights_from_s3(weights_file: str) -> str: """ :param weights_file: :return: """ saved_model_dir = paths.runtime_paths().saved_model_dir filepath = os.path.join(saved_model_dir, weights_file) if os.path.isfile(filepath): logger.info(f"Using available {weights_file} in Armory `saved_model_dir`") else: logger.info( f"{weights_file} not found in Armory `saved_model_dir`. Attempting to pull weights from S3" ) try: download_file_from_s3( "armory-public-data", f"model-weights/{weights_file}", f"{saved_model_dir}/{weights_file}", ) except KeyError: if ( "ARMORY_INCLUDE_SUBMISSION_BUCKETS" in os.environ and os.getenv("ARMORY_INCLUDE_SUBMISSION_BUCKETS") != "" ): try: download_private_file_from_s3( "armory-submission-data", f"model-weights/{weights_file}", f"{saved_model_dir}/{weights_file}", ) except KeyError: raise ValueError( ( f"{weights_file} was not found in the armory public & submission S3 buckets." ) ) else: raise ValueError( ( f"{weights_file} was not found in the armory S3 bucket. If " "you're attempting to load a custom set of weights for " "your model be sure that they are available in the armory " "`saved_model_dir` directory on your host environment." ) ) return filepath
def __init__(self, scheme, exec_path, return_all=False): self.rover_path = exec_path self.rover_directory = os.path.join(paths.runtime_paths().tmp_dir, "rover") if not os.path.exists(self.rover_directory): os.makedirs(self.rover_directory) self.outfile = os.path.join(self.rover_directory, 'out.txt') if scheme == 'freq': self.rover_options = ['-m', "avgconf", "-a", "1.0", "-c", '0.0'] elif scheme == 'conf': self.rover_options = ['-m', "avgconf"] else: assert scheme == 'max' self.rover_options = ['-m', 'maxconf'] self.return_all = return_all
def get_art_model(model_kwargs, wrapper_kwargs, weights_file=None): input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) labels_ph = tf.placeholder(tf.int32, shape=[None, 10]) training_ph = tf.placeholder(tf.bool, shape=()) x = tf.layers.conv2d(input_ph, filters=4, kernel_size=(5, 5), activation=tf.nn.relu) x = tf.layers.max_pooling2d(x, 2, 2) x = tf.layers.conv2d(x, filters=10, kernel_size=(5, 5), activation=tf.nn.relu) x = tf.layers.max_pooling2d(x, 2, 2) x = tf.layers.flatten(x) x = tf.layers.dense(x, 100, activation=tf.nn.relu) logits = tf.layers.dense(x, 10) loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) if weights_file: # Load Model using preferred save/restore method filepath = maybe_download_weights_from_s3(weights_file) tar = tarfile.open(filepath) tar.extractall(path=paths.runtime_paths().saved_model_dir) tar.close() # Restore variables... wrapped_model = TFClassifier(clip_values=(0.0, 1.0), input_ph=input_ph, output=logits, labels_ph=labels_ph, train=train_op, loss=loss, learning=training_ph, sess=sess, **wrapper_kwargs) return wrapped_model
def _save(self, output: dict): """ Save json-formattable output to a file """ runtime_paths = paths.runtime_paths() scenario_output_dir = os.path.join(runtime_paths.output_dir, output["config"]["eval_id"]) override_name = output["config"]["sysconfig"].get( "output_filename", None) scenario_name = (override_name if override_name else output["config"]["scenario"]["name"]) filename = f"{scenario_name}_{output['timestamp']}.json" logger.info( f"Saving evaluation results saved to <output_dir>/{filename}") with open(os.path.join(scenario_output_dir, filename), "w") as f: f.write(json.dumps(output, sort_keys=True, indent=4) + "\n")
def _run_interactive_bash(self, runner: ArmoryInstance) -> None: user_id = os.getuid() if os.name != "nt" else 0 group_id = os.getgid() if os.name != "nt" else 0 lines = [ "Container ready for interactive use.", bold( "*** In a new terminal, run the following to attach to the container:" ), bold( red( f" docker exec -it -u {user_id}:{group_id} {runner.docker_container.short_id} bash" ) ), ] if self.config.get("scenario"): tmp_dir = os.path.join(self.host_paths.tmp_dir, self.config["eval_id"]) os.makedirs(tmp_dir) self.tmp_config = os.path.join(tmp_dir, "interactive-config.json") docker_config_path = os.path.join( paths.runtime_paths().tmp_dir, self.config["eval_id"], "interactive-config.json", ) with open(self.tmp_config, "w") as f: f.write(json.dumps(self.config, sort_keys=True, indent=4) + "\n") lines.extend( [ bold("*** To run your scenario in the container:"), bold( red( f" python -m armory.scenarios.base {docker_config_path} --load-config-from-file" ) ), bold("*** To gracefully shut down container, press: Ctrl-C"), "", ] ) logger.info("\n".join(lines)) while True: time.sleep(1)
def _download_weights(weights_file, force_download=False): if not weights_file: return saved_model_dir = paths.runtime_paths().saved_model_dir filepath = os.path.join(saved_model_dir, weights_file) if os.path.isfile(filepath) and not force_download: logger.info(f"Model weights file {filepath} found, skipping.") else: if os.path.isfile(filepath): logger.info("Forcing overwrite of old file.") os.remove(filepath) logger.info(f"Downloading weights file {weights_file} from s3...") download_file_from_s3( "armory-public-data", f"model-weights/{weights_file}", f"{saved_model_dir}/{weights_file}", )
def __init__(self, load_weights_file, cfg_file, apply_fit: bool = True, apply_predict: bool = True): from segan.models import * saved_model_dir = paths.runtime_paths().saved_model_dir model_path = os.path.join(saved_model_dir, load_weights_file) opts_path = os.path.join(saved_model_dir, cfg_file) with open(opts_path, 'r') as cfg_f: args = ArgParser(json.load(cfg_f)) args.cuda = torch.cuda.is_available() self.device = "cuda" if args.cuda else "cpu" if hasattr(args, 'wsegan') and args.wsegan: self.model = WSEGAN(args) else: self.model = SEGAN(args) self.model.G.load_pretrained(model_path, True) self._apply_fit = apply_fit self._apply_predict = apply_predict
def get_art_model(model_kwargs, wrapper_kwargs, weights_file=None): input_ph = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) labels_ph = tf.placeholder(tf.int32, shape=[None, 10]) training_ph = tf.placeholder(tf.bool, shape=()) # Conditional for handling training phase or inference phase output = tf.cond( training_ph, true_fn=lambda: _training_pass(input_ph), false_fn=lambda: _inference_pass(input_ph), ) loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=output, onehot_labels=labels_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.003) train_op = optimizer.minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) if weights_file: # Load Model using preferred save/restore method filepath = maybe_download_weights_from_s3(weights_file) tar = tarfile.open(filepath) tar.extractall(path=paths.runtime_paths().saved_model_dir) tar.close() # Restore variables... wrapped_model = TFClassifier(clip_values=(0.0, 1.0), input_ph=input_ph, output=output, labels_ph=labels_ph, train=train_op, loss=loss, learning=training_ph, sess=sess, **wrapper_kwargs) return wrapped_model
def _scenario_setup(config: dict): """ Creates scenario specific tmp and output directiories. Also pulls external repositories ahead of running the scenario in case the scenario itself is found in the external repository. """ runtime_paths = paths.runtime_paths() scenario_output_dir = os.path.join(runtime_paths.output_dir, config["eval_id"]) scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir, config["eval_id"]) os.makedirs(scenario_output_dir, exist_ok=True) os.makedirs(scenario_tmp_dir, exist_ok=True) logger.warning(f"Outputs will be written to {scenario_output_dir}") # Download any external repositories and add them to the sys path for use if config["sysconfig"].get("external_github_repo", None): external_repo_dir = os.path.join(scenario_tmp_dir, "external") external_repo.download_and_extract_repos( config["sysconfig"]["external_github_repo"], external_repo_dir=external_repo_dir, ) pythonpaths = config["sysconfig"].get("external_github_repo_pythonpath") if isinstance(pythonpaths, str): pythonpaths = [pythonpaths] elif pythonpaths is None: pythonpaths = [] for pythonpath in pythonpaths: external_repo.add_pythonpath(pythonpath, external_repo_dir=external_repo_dir) local_paths = config["sysconfig"].get("local_repo_path") if isinstance(local_paths, str): local_paths = [local_paths] elif local_paths is None: local_paths = [] for local_path in local_paths: external_repo.add_local_repo(local_path)
def __init__(self, *args, voting_kwargs, niters_forward=1, niters_backward=1, batch_backward=0, batch_forward=0, load_weights_file=None, use_half=False, random_init=False, **kwargs): filename = load_weights_file if load_weights_file else "librispeech_pretrained_v2.pth" saved_model_dir = paths.runtime_paths().saved_model_dir model_path = os.path.join(saved_model_dir, filename) model = load_model(device="cpu", model_path=model_path, use_half=use_half) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5, amsgrad=False) super(SmoothedDeepSpeech, self).__init__(model, *args, optimizer=optimizer, **kwargs) self.model_path = model_path self.use_half = use_half self.niters_forward = niters_forward self.niters_backward = niters_backward if random_init: for p in self._model.parameters(): if p.dim() > 1: torch.nn.init.xavier_uniform(p) else: torch.nn.init.zeros_(p) self.decoder = load_decoder_with_scores(self.decoder) self.set_voting_module(**voting_kwargs, **kwargs) self.batch_backward = batch_backward self.batch_forward = batch_forward
from importlib import import_module import os import numpy as np import pytest from armory.data import datasets, adversarial_datasets from armory import paths from armory.utils.metrics import ( object_detection_AP_per_class, apricot_patch_targeted_AP_per_class, ) DATASET_DIR = paths.runtime_paths().dataset_dir @pytest.mark.usefixtures("ensure_armory_dirs") def test_tf1_mnist(): classifier_module = import_module("armory.baseline_models.tf_graph.mnist") classifier_fn = getattr(classifier_module, "get_art_model") classifier = classifier_fn(model_kwargs={}, wrapper_kwargs={}) train_dataset = datasets.mnist( split="train", epochs=1, batch_size=600, dataset_dir=DATASET_DIR, ) test_dataset = datasets.mnist( split="test", epochs=1,
def _set_output_dir(self, config): runtime_paths = paths.runtime_paths() self.scenario_output_dir = os.path.join(runtime_paths.output_dir, config["eval_id"])
def download_and_extract_repo(external_repo_name: str, external_repo_dir: str = None) -> None: """ Downloads and extracts an external repository for use within ARMORY. The external repositories project root will be added to the sys path. Private repositories require an `ARMORY_GITHUB_TOKEN` environment variable. :param external_repo_name: String name of "organization/repo-name" or "organization/repo-name@branch" """ verify_ssl = get_verify_ssl() if external_repo_dir is None: external_repo_dir = paths.runtime_paths().external_repo_dir os.makedirs(external_repo_dir, exist_ok=True) headers = {} if "@" in external_repo_name: org_repo_name, branch = external_repo_name.split("@") else: org_repo_name = external_repo_name branch = "master" repo_name = org_repo_name.split("/")[-1] if "ARMORY_GITHUB_TOKEN" in os.environ and os.getenv( "ARMORY_GITHUB_TOKEN") != "": headers = { "Authorization": f'token {os.getenv("ARMORY_GITHUB_TOKEN")}' } response = requests.get( f"https://api.github.com/repos/{org_repo_name}/tarball/{branch}", headers=headers, stream=True, verify=verify_ssl, ) if response.status_code == 200: logger.info(f"Downloading external repo: {external_repo_name}") tar_filename = os.path.join(external_repo_dir, repo_name + ".tar.gz") with open(tar_filename, "wb") as f: f.write(response.raw.read()) tar = tarfile.open(tar_filename, "r:gz") dl_directory_name = tar.getnames()[0] tar.extractall(path=external_repo_dir) # Always overwrite existing repositories to keep them at HEAD final_dir_name = os.path.join(external_repo_dir, repo_name) if os.path.isdir(final_dir_name): shutil.rmtree(final_dir_name) os.rename( os.path.join(external_repo_dir, dl_directory_name), final_dir_name, ) add_path(final_dir_name, include_parent=True) else: raise ConnectionError( "Unable to download repository. If it's private make sure " "`ARMORY_GITHUB_TOKEN` environment variable is set\n" f"status_code is {response.status_code}\n" f"full response is {response.text}")
def add_pythonpath(subpath: str, external_repo_dir: str = None) -> None: if external_repo_dir is None: external_repo_dir = paths.runtime_paths().external_repo_dir path = os.path.join(external_repo_dir, subpath) add_path(path, include_parent=True)
def add_local_repo(local_repo_name: str) -> None: local_repo_dir = paths.runtime_paths().local_git_dir path = os.path.join(local_repo_dir, local_repo_name) add_path(path, include_parent=True)
Model contributed by: MITRE Corporation """ # BEGIN hacks # Save deep speech model to armory # This can be made less hacky after this ART issue: # https://github.com/Trusted-AI/adversarial-robustness-toolbox/issues/693 import os import logging from typing import Optional logger = logging.getLogger(__name__) from armory import paths ART_DATA_PATH = os.path.join(paths.runtime_paths().saved_model_dir, "art") os.makedirs(ART_DATA_PATH, exist_ok=True) from art.estimators.speech_recognition import pytorch_deep_speech pytorch_deep_speech.ART_DATA_PATH = ART_DATA_PATH logger.warning(f"Saving art deep speech model weights to {ART_DATA_PATH}") # END hacks from art.estimators.speech_recognition import PyTorchDeepSpeech def get_art_model( model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None ) -> PyTorchDeepSpeech: return PyTorchDeepSpeech(**wrapper_kwargs)
def _generator_from_tfds( dataset_name: str, split_type: str, batch_size: int, epochs: int, dataset_dir: str, preprocessing_fn: Callable, as_supervised: bool = True, supervised_xy_keys=None, download_and_prepare_kwargs=None, variable_length=False, shuffle_files=True, cache_dataset: bool = True, framework: str = "numpy", lambda_map: Callable = None, ) -> Union[ArmoryDataGenerator, tf.data.Dataset]: """ If as_supervised=False, must designate keys as a tuple in supervised_xy_keys: supervised_xy_keys=('video', 'label') # ucf101 dataset if variable_length=True and batch_size > 1: output batches are 1D np.arrays of objects lambda_map - if not None, mapping function to apply to dataset elements """ if not dataset_dir: dataset_dir = paths.runtime_paths().dataset_dir if cache_dataset: _cache_dataset( dataset_dir, dataset_name=dataset_name, ) default_graph = tf.compat.v1.keras.backend.get_session().graph ds, ds_info = tfds.load( dataset_name, split=split_type, as_supervised=as_supervised, data_dir=dataset_dir, with_info=True, download_and_prepare_kwargs=download_and_prepare_kwargs, shuffle_files=shuffle_files, ) if not as_supervised: try: x_key, y_key = supervised_xy_keys except (TypeError, ValueError): raise ValueError( f"When as_supervised=False, supervised_xy_keys must be a (x_key, y_key)" f" tuple, not {supervised_xy_keys}") if not isinstance(x_key, str) or not isinstance(y_key, str): raise ValueError(f"supervised_xy_keys be a tuple of strings," f" not {type(x_key), type(y_key)}") ds = ds.map(lambda x: (x[x_key], x[y_key])) if lambda_map is not None: ds = ds.map(lambda_map) ds = ds.repeat(epochs) if shuffle_files: ds = ds.shuffle(batch_size * 10, reshuffle_each_iteration=True) if variable_length and batch_size > 1: ds = ds.batch(1, drop_remainder=False) else: ds = ds.batch(batch_size, drop_remainder=False) ds = ds.prefetch(tf.data.experimental.AUTOTUNE) if framework == "numpy": ds = tfds.as_numpy(ds, graph=default_graph) generator = ArmoryDataGenerator( ds, size=ds_info.splits[split_type].num_examples, batch_size=batch_size, epochs=epochs, preprocessing_fn=preprocessing_fn, variable_length=bool(variable_length and batch_size > 1), ) elif framework == "tf": generator = ds elif framework == "pytorch": torch_ds = _get_pytorch_dataset(ds) generator = torch.utils.data.DataLoader(torch_ds, batch_size=None, collate_fn=lambda x: x, num_workers=0) else: raise ValueError( f"`framework` must be one of ['tf', 'pytorch', 'numpy']. Found {framework}" ) return generator
def _generator_from_tfds( dataset_name: str, split: str, batch_size: int, epochs: int, dataset_dir: str, preprocessing_fn: Callable, label_preprocessing_fn: Callable = None, as_supervised: bool = True, supervised_xy_keys=None, download_and_prepare_kwargs=None, variable_length=False, variable_y=False, shuffle_files=True, cache_dataset: bool = True, framework: str = "numpy", lambda_map: Callable = None, context=None, class_ids=None, index=None, ) -> Union[ArmoryDataGenerator, tf.data.Dataset]: """ If as_supervised=False, must designate keys as a tuple in supervised_xy_keys: supervised_xy_keys=('video', 'label') # ucf101 dataset supervised_xy_keys=('speech', 'text') # librispeech-dev-clean with ASR if variable_length=True and batch_size > 1: output batches are 1D np.arrays of objects lambda_map - if not None, mapping function to apply to dataset elements """ if not dataset_dir: dataset_dir = paths.runtime_paths().dataset_dir if cache_dataset: _cache_dataset( dataset_dir, dataset_name=dataset_name, ) default_graph = tf.compat.v1.keras.backend.get_session().graph if not isinstance(split, str): raise ValueError(f"split must be str, not {type(split)}") try: ds, ds_info = tfds.load( dataset_name, split=split, as_supervised=as_supervised, data_dir=dataset_dir, with_info=True, download_and_prepare_kwargs=download_and_prepare_kwargs, shuffle_files=shuffle_files, ) except AssertionError as e: if not str(e).startswith("Unrecognized instruction format: "): raise logger.warning(f"Caught AssertionError in TFDS load split argument: {e}") logger.warning(f"Attempting to parse split {split}") split = parse_split_index(split) logger.warning(f"Replacing split with {split}") ds, ds_info = tfds.load( dataset_name, split=split, as_supervised=as_supervised, data_dir=dataset_dir, with_info=True, download_and_prepare_kwargs=download_and_prepare_kwargs, shuffle_files=shuffle_files, ) if not as_supervised: try: x_key, y_key = supervised_xy_keys except (TypeError, ValueError): raise ValueError( f"When as_supervised=False, supervised_xy_keys must be a (x_key, y_key)" f" tuple, not {supervised_xy_keys}" ) for key in [x_key, y_key]: if not (isinstance(key, str) or isinstance(key, tuple)): raise ValueError( f"supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings" f" not {type(x_key), type(y_key)}" ) if isinstance(x_key, tuple): if isinstance(y_key, tuple): raise ValueError( "Only one of (x_key, y_key) can be a tuple while the other must be a string." ) for k in x_key: if not (isinstance(k, str)): raise ValueError( "supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings" ) ds = ds.map(lambda x: (tuple(x[k] for k in x_key), x[y_key])) elif isinstance(y_key, tuple): for k in y_key: if not (isinstance(k, str)): raise ValueError( "supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings" ) ds = ds.map(lambda x: (x[x_key], tuple(x[k] for k in y_key))) else: ds = ds.map(lambda x: (x[x_key], x[y_key])) if lambda_map is not None: ds = ds.map(lambda_map) dataset_size = ds_info.splits[split].num_examples # Add class-based filtering if class_ids is not None: if split == "train": logger.warning( "Filtering by class entails iterating over the whole dataset and thus " "can be very slow if using the 'train' split" ) if isinstance(class_ids, list): ds, dataset_size = filter_by_class(ds, class_ids=class_ids) elif isinstance(class_ids, int): ds, dataset_size = filter_by_class(ds, class_ids=[class_ids]) else: raise ValueError( f"class_ids must be a list, int, or None, not {type(class_ids)}" ) # Add index-based filtering if isinstance(index, list): ds, dataset_size = filter_by_index(ds, index, dataset_size) elif isinstance(index, str): ds, dataset_size = filter_by_str_slice(ds, index, dataset_size) elif index is not None: raise ValueError(f"index must be a list, str, or None, not {type(index)}") ds = ds.repeat(epochs) if shuffle_files: ds = ds.shuffle(batch_size * 10, reshuffle_each_iteration=True) if variable_length and batch_size > 1: ds = ds.batch(1, drop_remainder=False) else: ds = ds.batch(batch_size, drop_remainder=False) ds = ds.prefetch(tf.data.experimental.AUTOTUNE) if framework != "numpy" and ( preprocessing_fn is not None or label_preprocessing_fn is not None ): raise ValueError( f"Data/label preprocessing functions only supported for numpy framework. Selected {framework} framework" ) if framework == "numpy": ds = tfds.as_numpy(ds, graph=default_graph) generator = ArmoryDataGenerator( ds, size=dataset_size, batch_size=batch_size, epochs=epochs, preprocessing_fn=preprocessing_fn, label_preprocessing_fn=label_preprocessing_fn, variable_length=bool(variable_length and batch_size > 1), variable_y=bool(variable_y and batch_size > 1), context=context, ) elif framework == "tf": generator = ds elif framework == "pytorch": torch_ds = _get_pytorch_dataset(ds) generator = torch.utils.data.DataLoader( torch_ds, batch_size=None, collate_fn=lambda x: x, num_workers=0 ) else: raise ValueError( f"`framework` must be one of ['tf', 'pytorch', 'numpy']. Found {framework}" ) return generator
def maybe_download_weights_from_s3(weights_file: str, *, auto_expand_tars: bool = False) -> str: """ :param weights_file: :param auto_expand_tars: :return: """ saved_model_dir = paths.runtime_paths().saved_model_dir filepath = os.path.join(saved_model_dir, weights_file) if os.path.isfile(filepath): logger.info( f"Using available {weights_file} in Armory `saved_model_dir`") else: logger.info( f"{weights_file} not found in Armory `saved_model_dir`. Attempting to pull weights from S3" ) try: download_file_from_s3( "armory-public-data", f"model-weights/{weights_file}", f"{saved_model_dir}/{weights_file}", ) except KeyError: if ("ARMORY_INCLUDE_SUBMISSION_BUCKETS" in os.environ and os.getenv("ARMORY_INCLUDE_SUBMISSION_BUCKETS") != ""): try: download_private_file_from_s3( "armory-submission-data", f"model-weights/{weights_file}", f"{saved_model_dir}/{weights_file}", ) except KeyError: raise ValueError(( f"{weights_file} was not found in the armory public & submission S3 buckets." )) else: raise ValueError(( f"{weights_file} was not found in the armory S3 bucket. If " "you're attempting to load a custom set of weights for " "your model be sure that they are available in the armory " "`saved_model_dir` directory on your host environment.")) if auto_expand_tars: if tarfile.is_tarfile(filepath): logger.debug( f"Detected model weights file {weights_file} as a tar archive") with tarfile.open(filepath) as tar: # check if the tarfile contains a directory containing all its members # ie if the tarfile expands out entirely into a subdirectory dirs = [fi.name for fi in tar.getmembers() if fi.isdir()] commonpath = os.path.commonpath(tar.getnames()) if not commonpath or commonpath not in dirs: raise PermissionError(( f"{weights_file} does not expand into a subdirectory." f" Weights files submitted as tarballs must expand into a subdirectory." )) full_path = os.path.join(saved_model_dir, commonpath) if os.path.exists(full_path): logger.warning( f"Model weights folder {commonpath} from {weights_file} already exists" ) logger.warning( f"Skipping auto-unpacking of {weights_file}") logger.warning( f"Delete {commonpath} manually to force unpacking") else: logger.info( f"Auto-unpacking model weights from {weights_file}") tar.extractall(path=saved_model_dir) filepath = commonpath return filepath
def _evaluate(self, config: dict) -> dict: """ Evaluate the config and return a results dict """ model_config = config["model"] classifier, preprocessing_fn = load_model(model_config) defense_config = config.get("defense") or {} defense_type = defense_config.get("type") if defense_type in ["Preprocessor", "Postprocessor"]: logger.info( f"Applying internal {defense_type} defense to classifier") classifier = load_defense_internal(config["defense"], classifier) if model_config["fit"]: classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] logger.info( f"Loading train dataset {config['dataset']['name']}...") train_data = load_dataset( config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", preprocessing_fn=preprocessing_fn, ) if defense_type == "Trainer": logger.info(f"Training with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) defense.fit_generator(train_data, **fit_kwargs) else: logger.info("Fitting classifier on clean train dataset...") classifier.fit_generator(train_data, **fit_kwargs) ################################################################ #### Save weights at the end of training ################################################################ ckpt_name = model_config['module'].replace('.', '_') ckpt_name += '_pretrained' if model_config['model_kwargs'][ 'pretrained'] else '' ckpt_name += '_epochs%d.pth' % model_config['fit_kwargs'][ 'nb_epochs'] classifier.save( osp.join(paths.runtime_paths().saved_model_dir, ckpt_name)) logger.info(f"Saved classifier {ckpt_name} ...") if defense_type == "Transform": # NOTE: Transform currently not supported logger.info( f"Transforming classifier with {defense_type} defense...") defense = load_defense_wrapper(config["defense"], classifier) classifier = defense() classifier.set_learning_phase(False) # Evaluate the ART classifier on benign test examples logger.info(f"Loading test dataset {config['dataset']['name']}...") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) logger.info("Running inference on benign examples...") metrics_logger = metrics.MetricsLogger.from_config(config["metric"]) for x, y in tqdm(test_data, desc="Benign"): y_pred = classifier.predict(x) metrics_logger.update_task(y, y_pred) metrics_logger.log_task() # Evaluate the ART classifier on adversarial test examples logger.info("Generating or loading / testing adversarial examples...") attack_config = config["attack"] attack_type = attack_config.get("type") targeted = bool(attack_config.get("kwargs", {}).get("targeted")) if targeted and attack_config.get("use_label"): raise ValueError("Targeted attacks cannot have 'use_label'") if attack_type == "preloaded": test_data = load_adversarial_dataset( attack_config, epochs=1, split_type="adversarial", preprocessing_fn=preprocessing_fn, ) else: attack = load_attack(attack_config, classifier) test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, ) for x, y in tqdm(test_data, desc="Attack"): if attack_type == "preloaded": x, x_adv = x if targeted: y, y_target = y elif attack_config.get("use_label"): x_adv = attack.generate(x=x, y=y) elif targeted: raise NotImplementedError( "Requires generation of target labels") # x_adv = attack.generate(x=x, y=y_target) else: x_adv = attack.generate(x=x) y_pred_adv = classifier.predict(x_adv) if targeted: # NOTE: does not remove data points where y == y_target metrics_logger.update_task(y_target, y_pred_adv, adversarial=True) else: metrics_logger.update_task(y, y_pred_adv, adversarial=True) metrics_logger.update_perturbation(x, x_adv) metrics_logger.log_task(adversarial=True, targeted=targeted) return metrics_logger.results()
def locate_data(dataset_name, dataset_ver, split): data_dir = paths.runtime_paths().dataset_dir ds_dir = os.path.join(data_dir, dataset_name, dataset_ver) return list(glob.glob(f"{ds_dir}/*{split}*.tfrecord*"))