def __init__(self, cfg: AttrDict, path: str, split: str, dataset_name="fastmri_dataset", data_source="fastmri"): super(FastMRIDataSet, self).__init__() assert PathManager.isdir(path), f"Directory {path} does not exist" self.dataset_name = "singlecoil" self.data_source = "fastmri" self.path = path data = cfg.get("DATA", AttrDict({})) self.key = data.get("KEY", "reconstruction_esc") self.index = data.get("INDEX", 12) self.split = split.lower() self.dataset = self._load_data()
def extract_low_shot_features(args: Namespace, cfg: AttrDict, output_dir: str): dataset_name = cfg["SVM"]["low_shot"]["dataset_name"] k_values = cfg["SVM"]["low_shot"]["k_values"] sample_inds = cfg["SVM"]["low_shot"]["sample_inds"] if "voc" in dataset_name: # extract the features. In case of voc07 low-shot, we extract the # features on full train and test sets. Both sets have about 5K images # we extract launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) elif "places" in dataset_name: # in case of places, since the features size could become large, we need # to extract features at smaller subsamples data_paths, label_paths = dataset_catalog.get_data_files( split="TRAIN", dataset_config=cfg["DATA"]) targets = load_file(label_paths[0]) logging.info("Generating low-shot samples for Places205...") generate_places_low_shot_samples(targets, k_values, sample_inds, output_dir, data_paths[0]) test_features_extracted = False for idx in sample_inds: for k in k_values: out_img_file = f"{output_dir}/train_images_sample{idx}_k{k}.npy" out_lbls_file = f"{output_dir}/train_labels_sample{idx}_k{k}.npy" cfg.DATA.TRAIN.DATA_PATHS = [out_img_file] cfg.DATA.TRAIN.LABEL_PATHS = [out_lbls_file] cfg.CHECKPOINT.DIR = f"{output_dir}/sample{idx}_k{k}" logging.info( f"Extracting features for places low shot: sample{idx}_k{k}" ) # we want to extract the test features only once since the test # features are commonly used for testing for all low-shot setup. if test_features_extracted: cfg.TEST_MODEL = False launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) test_features_extracted = True # set the test model to true again after feature extraction is done cfg.TEST_MODEL = True else: raise RuntimeError(f"Dataset not recognised: {dataset_name}")
def _get_data_limit_sampling(cfg: AttrDict, split: str) -> AttrDict: default_sampling = AttrDict({ "SEED": 0, "IS_BALANCED": False, "SKIP_NUM_SAMPLES": 0 }) return cfg["DATA"][split].get("DATA_LIMIT_SAMPLING", default_sampling)
def convert_to_attrdict(cfg: DictConfig, cmdline_args: List[Any] = None): """ Given the user input Hydra Config, and some command line input options to override the config file: 1. merge and override the command line options in the config 2. Convert the Hydra OmegaConf to AttrDict structure to make it easy to access the keys in the config file 3. Also check the config version used is compatible and supported in vissl. In future, we would want to support upgrading the old config versions if we make changes to the VISSL default config structure (deleting, renaming keys) 4. We infer values of some parameters in the config file using the other parameter values. """ if cmdline_args: # convert the command line args to DictConfig sys.argv = cmdline_args cli_conf = OmegaConf.from_cli(cmdline_args) # merge the command line args with config cfg = OmegaConf.merge(cfg, cli_conf) # convert the config to AttrDict cfg = OmegaConf.to_container(cfg) cfg = AttrDict(cfg) # check the cfg has valid version check_cfg_version(cfg) # assert the config and infer config = cfg.config assert_hydra_conf(config) return cfg, config
def __init__(self, loss_config: AttrDict): """ Intializer for the sum cross-entropy loss. For a single tensor, this is equivalent to the cross-entropy loss. For a list of tensors, this computes the sum of the cross-entropy losses for each tensor in the list against the target. Config params: reduction: specifies reduction to apply to the output, optional normalize_output: Whether to L2 normalize the outputs world_size: total number of gpus in training. automatically inferred by vissl """ super(BCELogitsMultipleOutputSingleTargetLoss, self).__init__() self.loss_config = loss_config self._losses = torch.nn.modules.ModuleList([]) self._reduction = loss_config.get("reduction", "none") self._normalize_output = loss_config.get("normalize_output", False) self._world_size = loss_config["world_size"]
def _copy_to_local(cfg: AttrDict): available_splits = _get_available_splits(cfg) for split in available_splits: if cfg.DATA[split].COPY_TO_LOCAL_DISK: dest_dir = cfg.DATA[split]["COPY_DESTINATION_DIR"] tmp_dest_dir = tempfile.mkdtemp() data_files, label_files = get_data_files(split, cfg.DATA) data_files.extend(label_files) _, output_dir = copy_data_to_local( data_files, dest_dir, tmp_destination_dir=tmp_dest_dir) cfg.DATA[split]["COPY_DESTINATION_DIR"] = output_dir
class TestMLP(unittest.TestCase): """ Unit test to verify that correct construction of MLP layers and linear evaluation MLP layers """ MODEL_CONFIG = AttrDict( { "HEAD": { "BATCHNORM_EPS": 1e-6, "BATCHNORM_MOMENTUM": 0.99, "PARAMS_MULTIPLIER": 1.0, } } ) def test_mlp(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(4, 2048)) out = mlp(x) assert out.shape == torch.Size([4, 100]) x = torch.randn(size=(1, 2048)) out = mlp(x) assert out.shape == torch.Size([1, 100]) def test_mlp_reshaping(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(1, 2048, 1, 1)) out = mlp(x) assert out.shape == torch.Size([1, 100]) def test_mlp_catch_bad_shapes(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(1, 2048, 2, 1)) with self.assertRaises(AssertionError) as context: mlp(x) assert context.exception is not None def test_eval_mlp_shape(self): eval_mlp = LinearEvalMLP( self.MODEL_CONFIG, in_channels=2048, dims=[2048 * 2 * 2, 1000] ) resnet_feature_map = torch.randn(size=(4, 2048, 2, 2)) out = eval_mlp(resnet_feature_map) assert out.shape == torch.Size([4, 1000]) resnet_feature_map = torch.randn(size=(1, 2048, 2, 2)) out = eval_mlp(resnet_feature_map) assert out.shape == torch.Size([1, 1000])
def setup_pathmanager(): """ Setup PathManager. A bit hacky -- we use the #set_env_vars method to setup pathmanager and as such we need to create a dummy config, and dummy values for local_rank and node_id. """ with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=["config=test/integration_test/quick_swav"], ) config = AttrDict(cfg).config set_env_vars(local_rank=0, node_id=0, cfg=config)
def main(args: Namespace, config: AttrDict): # setup logging setup_logging(__name__, output_dir=get_checkpoint_folder(config)) # print the coniguration used print_cfg(config) assert config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON, ( "Feature eval mode is not ON. Can't run train_svm. " "Set config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True " "in your config or from command line.") # extract the features if not config.SVM_FEATURES_PATH: launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) config.SVM_FEATURES_PATH = get_checkpoint_folder(config) # Get the names of the features that we extracted features for. If user doesn't # specify the features to evaluate, we get the full model output and freeze # head/trunk both as caution. layers = get_trunk_output_feature_names(config.MODEL) if len(layers) == 0: layers = ["heads"] output_dir = get_checkpoint_folder(config) running_tasks = [ mp.Process(target=train_svm, args=(config, output_dir, layer)) for layer in layers ] for running_task in running_tasks: running_task.start() for running_task in running_tasks: running_task.join() # collect the mAP stats for all the layers and report output_mAP = [] for layer in layers: try: ap_file = f"{output_dir}/{layer}/test_ap.npy" output_mAP.append(round(100.0 * np.mean(load_file(ap_file)), 3)) except Exception: output_mAP.append(-1) logging.info(f"AP for various layers:\n {layers}: {output_mAP}") # close the logging streams including the filehandlers shutdown_logging()
def __init__(self, model_config: AttrDict, model_name: str): super().__init__() self.model_config = model_config assert model_config.INPUT_TYPE in ["rgb", "bgr"], "Input type not supported" trunk_config = copy.deepcopy(model_config.TRUNK.VISION_TRANSFORMERS) logging.info("Building model: Vision Transformer from yaml config") trunk_config = AttrDict( {k.lower(): v for k, v in trunk_config.items()}) self.model = ClassyVisionTransformer( image_size=trunk_config.image_size, patch_size=trunk_config.patch_size, num_layers=trunk_config.num_layers, num_heads=trunk_config.num_heads, hidden_dim=trunk_config.hidden_dim, mlp_dim=trunk_config.mlp_dim, dropout_rate=trunk_config.dropout_rate, attention_dropout_rate=trunk_config.attention_dropout_rate, classifier=trunk_config.classifier, )
def _test_synch_bn_pytorch_worker(gpu_id: int, world_size: int, group_size: int, sync_file: str): torch.cuda.set_device(gpu_id) init_distributed_on_file(world_size=world_size, gpu_id=gpu_id, sync_file=sync_file) config = AttrDict({ "MODEL": { "SYNC_BN_CONFIG": { "SYNC_BN_TYPE": "pytorch", "GROUP_SIZE": group_size, } }, "DISTRIBUTED": { "NUM_PROC_PER_NODE": world_size, "NUM_NODES": 1, "NCCL_DEBUG": False, "NCCL_SOCKET_NTHREADS": 4, }, }) set_env_vars(local_rank=gpu_id, node_id=0, cfg=config) channels = 8 model = nn.Sequential( nn.BatchNorm2d(num_features=channels), nn.AdaptiveAvgPool2d(output_size=(1, 1)), ) model = convert_sync_bn(config, model).cuda(gpu_id) model = DistributedDataParallel(model, device_ids=[gpu_id]) x = torch.full(size=(5, channels, 4, 4), fill_value=float(gpu_id)) model(x) running_mean = model.module[0].running_mean.cpu() print(gpu_id, running_mean) if group_size == 1: if gpu_id == 0: assert torch.allclose(running_mean, torch.full(size=(8, ), fill_value=0.0)) elif gpu_id == 1: assert torch.allclose(running_mean, torch.full(size=(8, ), fill_value=0.1)) else: if gpu_id in {0, 1}: assert torch.allclose(running_mean, torch.full(size=(8, ), fill_value=0.05))
def test_configuration( self, temperature: float, normalize_output: bool, label_smoothing: float, batch_size: int = 16, target_count: int = 10, ): torch.random.manual_seed(0) logits = torch.randn(size=(batch_size, target_count)) target = torch.randint(0, target_count, size=(batch_size, )) criterion_ref = CrossEntropyMultipleOutputSingleTargetCriterion( temperature=temperature, normalize_output=normalize_output, label_smoothing=label_smoothing, ) config = AttrDict({ "temperature": temperature, "normalize_output": normalize_output, "label_smoothing": label_smoothing, }) criterion = CrossEntropyMultipleOutputSingleTargetLoss(config) self.assertEqual(criterion(logits, target), criterion_ref(logits, target))
def save_attrdict_to_disk(cfg: AttrDict): from vissl.utils.checkpoint import get_checkpoint_folder yaml_output_file = f"{get_checkpoint_folder(cfg)}/train_config.yaml" save_file(cfg.to_dict(), yaml_output_file)
def __init__(self, model_config: AttrDict, model_name: str): super().__init__() assert model_config.INPUT_TYPE in ["rgb", "bgr"], "Input type not supported" trunk_config = copy.deepcopy(model_config.TRUNK.VISION_TRANSFORMERS) logging.info("Building model: Vision Transformer from yaml config") # Hacky workaround trunk_config = AttrDict({k.lower(): v for k, v in trunk_config.items()}) img_size = trunk_config.image_size patch_size = trunk_config.patch_size in_chans = 3 embed_dim = trunk_config.hidden_dim depth = trunk_config.num_layers num_heads = trunk_config.num_heads mlp_ratio = 4.0 qkv_bias = trunk_config.qkv_bias qk_scale = trunk_config.qk_scale drop_rate = trunk_config.dropout_rate attn_drop_rate = trunk_config.attention_dropout_rate drop_path_rate = trunk_config.drop_path_rate hybrid_backbone_string = None # TODO Implement hybrid backbones if "HYBRID" in trunk_config.keys(): hybrid_backbone_string = trunk_config.HYBRID norm_layer = partial(nn.LayerNorm, eps=1e-6) self.num_features = ( self.embed_dim ) = embed_dim # num_features for consistency with other models # TODO : Enable Hybrid Backbones if hybrid_backbone_string: self.patch_embed = globals()[hybrid_backbone_string]( out_dim=embed_dim, img_size=img_size ) # if hybrid_backbone is not None: # self.patch_embed = HybridEmbed( # hybrid_backbone, # img_size=img_size, # in_chans=in_chans, # embed_dim=embed_dim, # ) else: self.patch_embed = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ) num_patches = self.patch_embed.num_patches self.class_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embedding = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList( [ Block( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, ) for i in range(depth) ] ) self.norm = norm_layer(embed_dim) # NOTE as per official impl, we could have a pre-logits # representation dense layer + tanh here # self.repr = nn.Linear(embed_dim, representation_size) # self.repr_act = nn.Tanh() trunc_normal_(self.pos_embedding, std=0.02) trunc_normal_(self.class_token, std=0.02) self.apply(self._init_weights)
def __init__(self, model_config: AttrDict, model_name: str): super().__init__() assert model_config.INPUT_TYPE in ["rgb", "bgr"], "Input type not supported" trunk_config = copy.deepcopy(model_config.TRUNK.XCIT) logging.info("Building model: XCiT from yaml config") # Hacky workaround trunk_config = AttrDict( {k.lower(): v for k, v in trunk_config.items()}) img_size = trunk_config.image_size patch_size = trunk_config.patch_size embed_dim = trunk_config.hidden_dim depth = trunk_config.num_layers num_heads = trunk_config.num_heads mlp_ratio = trunk_config.mlp_ratio qkv_bias = trunk_config.qkv_bias qk_scale = trunk_config.qk_scale drop_rate = trunk_config.dropout_rate attn_drop_rate = trunk_config.attention_dropout_rate drop_path_rate = trunk_config.drop_path_rate eta = trunk_config.eta tokens_norm = trunk_config.tokens_norm norm_layer = partial(nn.LayerNorm, eps=1e-6) self.num_features = ( self.embed_dim ) = embed_dim # num_features for consistency with other models self.patch_embed = ConvPatchEmbed(img_size=img_size, embed_dim=embed_dim, patch_size=patch_size) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [drop_path_rate for i in range(depth)] self.blocks = nn.ModuleList([ XCABlock( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, num_tokens=num_patches, eta=eta, ) for i in range(depth) ]) cls_attn_layers = 2 self.cls_attn_blocks = nn.ModuleList([ ClassAttentionBlock( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer, eta=eta, tokens_norm=tokens_norm, ) for i in range(cls_attn_layers) ]) self.norm = norm_layer(embed_dim) self.pos_embeder = PositionalEncodingFourier(dim=embed_dim) self.use_pos = True # Classifier head trunc_normal_(self.cls_token, std=0.02) self.apply(self._init_weights)
from vissl.data.fastmri_dataset import FastMRIDataSet from vissl.config import AttrDict from vissl.data.ssl_transforms.freq_to_spatial import FrequencyToSpatial from vissl.data.ssl_transforms.spatial_to_freq import SpatialToFrequency from vissl.data.ssl_transforms.freq_apply_mask import ApplyFrequencyMask # from vissl.data.ssl_transforms.rgb_to_grayscale import RGBToGrayScale from PIL import Image attributes = AttrDict({"DATA": {"INDEX": 18}}) data = FastMRIDataSet(cfg=attributes, path="/mnt/d/data", split="train") #data = FastMRIDataSet(cfg=attributes, path="/Users/ylichman/classes/dl/final/data", split="train") # print(data.num_samples()) spatial_image, _ = data[18] # tmp = ( tmp * 255 / np.max(tmp)).astype('uint8') # onedimImage = Image.fromarray(tmp) # imaget = Image.fromarray(spatial_image[:,:,0], ) # imaget.save("test_spatial.png") # gray = RGBToGrayScale()(spatial_image) freq_image = SpatialToFrequency()(spatial_image) # this needs to be a tensor print(freq_image.shape) print( f'Processes spatial to freq with shape: {freq_image.shape}, {freq_image[0, 0].dtype}' )
def __init__(self, meters_config: AttrDict): self.num_classes = meters_config.get("num_classes") self._total_sample_count = None self._curr_sample_count = None self.reset()