def test_num_args_exception(self): x = tf.Variable(np.random.randint(0, 256, [224, 224, 3]), dtype=tf.uint8) for pp_str in [ "inception_crop(1)", "resize()", "resize(1, 1, 1)" "flip_lr(1)", "central_crop()", ]: with self.assertRaises(ValueError): pipeline_builder.get_preprocess_fn(pp_str)(x)
def create(): """Loads the VGG ImageNet model.""" with torch.set_grad_enabled(False): model = torch.hub.load("pytorch/vision:v0.6.0", "vgg11", pretrained=True) with_cuda = torch.cuda.is_available() if with_cuda: model.to("cuda") else: logging.warn("Running on CPU, no CUDA detected.") def call(features): images = features["image"].numpy() # Normalize according to the documentation. Note that the pro-processing # will already have the range normalized to [0, 1]. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] images_normalized = (images - mean) / std # Reshape from [batch, h, w, c] -> [batch, c, h, w] images_torch = torch.from_numpy(np.transpose(images_normalized, [0, 3, 1, 2]), dtype=torch.float32) if with_cuda: images_torch = images_torch.to("cuda") logits = model(images_torch) return torch.nn.functional.softmax(logits, dim=-1).cpu().numpy() preprocess_config = "resize(256)|central_crop(224)|value_range(0,1)" preprocess_fn = pipeline_builder.get_preprocess_fn(preprocess_config, remove_tpu_dtypes=False) return call, preprocess_fn
def create(config=None): """Loads the Anti Alias model.""" del config # Unused argument with torch.set_grad_enabled(False): model = antialiased_cnns.resnet50(pretrained=True) model = model.eval() image_mean = [0.485, 0.456, 0.406] image_std = [0.229, 0.224, 0.225] def call(features): # Normalize according to the documentation. Note that the pre-processing # will already have the range normalized to [0, 1]. images_normalized = (features["image"] - image_mean) / image_std # Reshape from [batch, h, w, c] -> [batch, c, h, w] images_torch = torch.tensor( np.transpose(images_normalized, [0, 3, 1, 2]).astype(np.float32)) with torch.no_grad(): logits = model(images_torch) return logits.softmax(dim=-1).cpu().numpy() preprocess_config = ("resize_small(256)|" "central_crop(224)|" "value_range(0,1)") preprocess_fn = pipeline_builder.get_preprocess_fn(preprocess_config, remove_tpu_dtypes=False) return call, preprocess_fn
def test_preprocessing_pipeline_multi_channel(self): pp_str = ("resize((256,256))|random_crop((224,224))|" "value_range_mc(-1,1,0,0,0,0,999,999,999,999)|" "select_channels([0,1,3])") pp_fn = pipeline_builder.get_preprocess_fn(pp_str) # Example multi-channel input x = tf.Variable(np.random.randint(0, 999, [64, 64, 4])) result = pp_fn({"image": x}) image = result["image"].numpy() self.assertEqual(image.shape, (224, 224, 3)) self.assertLessEqual(np.max(image), 1) self.assertGreaterEqual(np.min(image), -1)
def test_batched_preprocessing_pipeline(self): pp_str = ( "pad(4)|pad((4,4))|replicate(4)|inception_crop(300)|resize(256)|" "resize((256, 256))|random_crop(240)|" "central_crop((80, 120))|flip_lr|flip_ud|standardize(0, 1)|" "value_range(0,1)|value_range(-1,1)") pp_fn = pipeline_builder.get_preprocess_fn(pp_str) # Typical image input x = tf.Variable(np.random.randint(0, 256, [640, 480, 3])) result = pp_fn({"image": x}) image = result["image"].numpy() self.assertEqual(image.shape, (4, 80, 120, 3)) self.assertLessEqual(np.max(image), 1) self.assertGreaterEqual(np.min(image), -1)
def create(network): """Loads the CLIP model.""" json_path = os.path.join(os.path.dirname(__file__), "imagenet.json") with open(json_path, "r") as fp: imagenet_labels = json.load(fp) with torch.set_grad_enabled(False): model, _ = clip.load(network, device="cuda", jit=False) model = model.eval() prompts = clip.tokenize( [f"This is a photo of a {label}" for label in imagenet_labels]) with torch.no_grad(): prompts_features = model.encode_text(prompts.cuda()).float() prompts_features /= prompts_features.norm(dim=-1, keepdim=True) image_mean = [0.48145466, 0.4578275, 0.40821073] image_std = [0.26862954, 0.26130258, 0.27577711] def call(features): # Normalize according to the documentation. Note that the pre-processing # will already have the range normalized to [0, 1]. images_normalized = (features["image"] - image_mean) / image_std # Reshape from [batch, h, w, c] -> [batch, c, h, w] images_torch = torch.tensor( np.transpose(images_normalized, [0, 3, 1, 2]).astype(np.float32)) with torch.no_grad(): image_features = model.encode_image( images_torch.to("cuda")).float() image_features /= image_features.norm(dim=-1, keepdim=True) similarities = image_features @ prompts_features.T # The 100 (inv temperature) comes from the released code. return (100.0 * similarities).softmax(dim=-1).cpu().numpy() input_resolution = model.visual.input_resolution preprocess_config = (f"resize_small({input_resolution})|" f"central_crop({input_resolution})|" f"value_range(0,1)") preprocess_fn = pipeline_builder.get_preprocess_fn(preprocess_config, remove_tpu_dtypes=False) return call, preprocess_fn
def create(dataset, network, size, resolution=None): """Returns the BiT pre-trained models with the linear classifier layer.""" # Mapping the dataset names to the BiT qualifiers. supported_datasets = { "Imagenet1k": "s", "Imagenet21k": "m", } supported_networks = ["R50", "R101", "R152"] supported_sizes = ["x1", "x3", "x4"] if size not in supported_sizes: raise ValueError(f"Size {size} is not in {supported_sizes!r}.") if dataset not in supported_datasets: raise ValueError( f"Dataset {dataset} is not in {supported_datasets!r}.") if network not in supported_networks: raise ValueError( f"Network {network} is not in {supported_networks!r}.") root = ("https://tfhub.dev/google/bit/{qualifier}-{network_name}{size}/" "ilsvrc2012_classification/1") path = root.format(qualifier=supported_datasets[dataset], network_name=network.lower(), size=size) module = hub.load(path) @tf.function def model(features): return tf.nn.softmax(module(features["image"]), axis=-1) if resolution is not None: preprocess_config_fmt = "resize_small({})|central_crop({})|value_range(0,1)" preprocess_config = preprocess_config_fmt.format( int(1.15 * resolution), resolution) elif size == "x4": preprocess_config = "resize_small(512)|central_crop(480)|value_range(0,1)" else: preprocess_config = "resize(384)|value_range(0,1)" preprocess_fn = pipeline_builder.get_preprocess_fn(preprocess_config, remove_tpu_dtypes=False) return model, preprocess_fn
def create(hub_path: str, preprocess_config: str = None, signature: str = "representation", logits_key: str = "logits"): """Returns a model using the specified hub signature and preprocessing.""" module = hub.KerasLayer(hub_path, signature=signature, output_key=logits_key) @tf.function def model(features): return tf.nn.softmax(module(features["image"]), axis=-1) if preprocess_config: preprocess_fn = pipeline_builder.get_preprocess_fn( preprocess_config, remove_tpu_dtypes=False) else: preprocess_fn = None return model, preprocess_fn
def create(size="1x", variant="self-supervised", resolution=None): """Returns the SimCLR pre-trained models with the linear classifier layer. Args: size: Width of the ResNet-50 model in ('1x', '2x', '4x'). variant: String in ["self-supervised", "fine-tuned-100"]. resolution: If set, the preprocessing function will first 1) crop the smaller side to `1.15 * resolution`, and then take a square central crop of size `resolution`. Returns: tf.function wrapping the model. """ supported_sizes = ["1x", "2x", "4x"] supported_variants = ["self-supervised", "fine-tuned-10", "fine-tuned-100"] if size not in supported_sizes: raise ValueError(f"Size {size} is not in {supported_sizes!r}.") if variant not in supported_variants: raise ValueError( f"Variant {variant} is not in {supported_variants!r}.") module_id = f"{size}-{variant}" module_path = MODULE_PATHS[module_id] # The default signature is the 2048 dimensional representation, however we # want the ImageNet logits here. module = hub.KerasLayer(module_path, output_key="logits_sup") @tf.function def model(features): return tf.nn.softmax(module(features["image"]), axis=-1) if resolution is not None: preprocess_config_fmt = "resize_small({})|central_crop({})|value_range(0,1)" preprocess_config = preprocess_config_fmt.format( int(1.15 * resolution), resolution) preprocess_fn = pipeline_builder.get_preprocess_fn( preprocess_config, remove_tpu_dtypes=False) else: preprocess_fn = preprocess_fn_default return model, preprocess_fn
def create(model: str, resolution: int, ckpt_path: str): """Loads a ViT model from the given checkpoint. Args: model: The model name, for a list see vit_jax/models.KNOWN_MODELS. resolution: The image gets resized to `[resolution, resolution]`. ckpt_path: The .npz checkpoint file in the experiment's workdir. Returns: The model function and the corresponding preprocessing function. """ # Assert that there is only one host! assert jax.host_count() == 1, "Multi-host setups not supported under JAX." try: model = models.KNOWN_MODELS[model].partial(num_classes=1000) except KeyError: raise ValueError(f"Unknown model {model!r}, available models: " f"{list(models.KNOWN_MODELS)}") params = checkpoint.load(ckpt_path) params["pre_logits"] = {} # Need to restore empty leaf for Flax. def _model_call(features, params): return jax.nn.softmax(model.call(params, features), axis=-1) model_call = jax.pmap(_model_call, static_broadcasted_argnums=[1]) def call(features): images = features["image"].numpy() # We have to pad the images in case the batch_size is not divisibly by # the device count. images_for_pmap = _pad_for_pmap(images) pmap_result = model_call(images_for_pmap, params) n_images = images.shape[0] return pmap_result.reshape([-1] + list(pmap_result.shape[2:]))[:n_images] preprocess_config = f"resize({resolution})|value_range(-1,1)" preprocess_fn = pipeline_builder.get_preprocess_fn(preprocess_config, remove_tpu_dtypes=False) return call, preprocess_fn
def create(variant): """Loads the model. Args: variant: One of 32x8d,32x16d,32x32d,32x48d. Returns: The model and the pre-processing function. """ with torch.set_grad_enabled(False): model = torch.hub.load( "facebookresearch/WSL-Images", f"resnext101_{variant}_wsl").eval() with_cuda = torch.cuda.is_available() if with_cuda: model.to("cuda") else: logging.warn("Running on CPU, no CUDA detected.") def call(features): images = features["image"].numpy() # Normalize according to the documentation. Note that the pro-processing # will already have the range normalized to [0, 1]. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] images_normalized = (images - mean) / std # Reshape from [batch, h, w, c] -> [batch, c, h, w] images_normalized_bchw = np.transpose( images_normalized, [0, 3, 1, 2]).astype(np.float32).copy() with torch.no_grad(): images_torch = torch.from_numpy(images_normalized_bchw) if with_cuda: images_torch = images_torch.to("cuda") logits = model(images_torch) return torch.nn.functional.softmax(logits, dim=-1).cpu().numpy() preprocess_config = "resize_small(256)|central_crop(224)|value_range(0,1)" preprocess_fn = pipeline_builder.get_preprocess_fn( preprocess_config, remove_tpu_dtypes=True) return call, preprocess_fn
# limitations under the License. # Lint as: python3 """Wrappers for datasets in tfds.""" from typing import Any, Callable, Dict, Optional, Union from robustness_metrics.common import ops from robustness_metrics.common import pipeline_builder from robustness_metrics.common import types from robustness_metrics.datasets import base import tensorflow as tf import tensorflow_datasets as tfds default_imagenet_preprocessing = None default_config = "resize_small(256)|central_crop(224)|value_range(-1,1)" default_imagenet_preprocessing = pipeline_builder.get_preprocess_fn( default_config, remove_tpu_dtypes=False) PreprocessFn = Callable[[types.Features], types.Features] def _enumerated_to_metadata(position, features): features["metadata"]["element_id"] = tf.reshape(position, [1]) return features class TFDSDataset(base.Dataset): """The base class of all `tensorflow_datasets` (TFDS) datasets. Two fields will be added to the wrapped dataset, before preprocessing it with the given function in `load` and batching. The two fields are:
def create(model_size="b0", variant="std", resolution=None): """Create EfficientNet models with corresponding preprocessing operations.""" if variant not in ("std", "aa", "adv-prop", "noisy-student"): raise ValueError(f"EfficientNet variant not supported: {variant}") # Note that for the standard EfficientNet variant only B0-B5 architectures are # supported, B0-B7 for all other variants. Noisy-Student also supports L2 # and L2_475 (with a resolution of 475). valid = (variant == "std" and model_size in {f"b{i}" for i in range(6)}) or \ (variant != "std" and model_size in {f"b{i}" for i in range(8)}) or \ (variant == "noisy-student" and model_size in ("l2", "l2_475")) if not valid: raise ValueError( f"Invalid `model_size` {model_size!r} for EfficientNet `variant` " f"{variant!r}!") if model_size.startswith("l2"): noisy_student = hub.KerasLayer(MODEL_PATHS[variant + "-l2"].format(model_size)) else: noisy_student = hub.KerasLayer(MODEL_PATHS[variant].format(model_size)) @tf.function def model(features): images = features["image"] return tf.nn.softmax(noisy_student(images), axis=-1) def preprocess_fn(features): # EfficientNet preprocessing with model-dependent input resolution. # Preprocessing mimicks that of the public EfficientNet code from # https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/preprocessing.py # (both `_resize_image` and `_decode_and_center_crop` taken from that code) def _resize_image(image, image_size, method=None): if method is not None: return tf1.image.resize([image], [image_size, image_size], method)[0] return tf1.image.resize_bicubic([image], [image_size, image_size])[0] def _decode_and_center_crop(image, image_size, resize_method=None): """Crops to center of image with padding then scales image_size.""" shape = tf1.shape(image) image_height = shape[0] image_width = shape[1] padded_center_crop_size = tf1.cast( ((image_size / (image_size + CROP_PADDING)) * tf.cast(tf.minimum(image_height, image_width), tf.float32)), tf.int32) offset_height = ((image_height - padded_center_crop_size) + 1) // 2 offset_width = ((image_width - padded_center_crop_size) + 1) // 2 image = tf1.image.crop_to_bounding_box(image, offset_height, offset_width, padded_center_crop_size, padded_center_crop_size) image = _resize_image(image, image_size, resize_method) return image features["image"] = _decode_and_center_crop( features["image"], EFFICIENTNET_RESOLUTIONS[model_size]) features["image"] = tf1.cast(features["image"], tf1.float32) # We assume the modules expect pixels in [-1, 1]. features["image"] = features["image"] / 127.5 - 1.0 return features if resolution is not None: preprocess_config_fmt = "resize_small({})|central_crop({})|value_range(-1,1)" preprocess_config = preprocess_config_fmt.format( int(1.15 * resolution), resolution) preprocess_fn = pipeline_builder.get_preprocess_fn( preprocess_config, remove_tpu_dtypes=False) return model, preprocess_fn