def flush_report(self): if not is_master(): # Empty report in all processes to avoid any leaks self.report = [] return name = self.current_datamodule.dataset_name time_format = "%Y-%m-%dT%H:%M:%S" time = self.timer.get_time_hhmmss(None, format=time_format) filename = name + "_" if len(self.experiment_name) > 0: filename += self.experiment_name + "_" filename += self.dataset_type + "_" filename += time use_csv_writer = (self.config.evaluation.predict_file_format == "csv" or self.test_reporter_config.predict_file_format == "csv") if use_csv_writer: filepath = os.path.join(self.report_folder, filename + ".csv") self.csv_dump(filepath) else: filepath = os.path.join(self.report_folder, filename + ".json") self.json_dump(filepath) logger.info( f"Wrote predictions for {name} to {os.path.abspath(filepath)}") self.report = []
def __init__(self, embedding_name, *args, **kwargs): """Use this if you want to use pretrained embedding. See description of IntersectedVocab to get a list of the embedding available from torchtext Parameters ---------- embedding_name : str Name of the pretrained alias for the embedding to used """ self.type = "pretrained" if embedding_name not in vocab.pretrained_aliases: from mmf.common.registry import registry writer = registry.get("writer") error = "Unknown embedding type: %s" % embedding_name, "error" if writer is not None: writer.write(error, "error") raise RuntimeError(error) vector_cache = get_mmf_cache_dir() # First test loading the vectors in master so that everybody doesn't # download it in case it doesn't exist if is_master(): vocab.pretrained_aliases[embedding_name](cache=vector_cache) synchronize() embedding = vocab.pretrained_aliases[embedding_name]( cache=vector_cache) self.UNK_INDEX = 3 self.stoi = defaultdict(lambda: self.UNK_INDEX) self.itos = {} self.itos[self.PAD_INDEX] = self.PAD_TOKEN self.itos[self.SOS_INDEX] = self.SOS_TOKEN self.itos[self.EOS_INDEX] = self.EOS_TOKEN self.itos[self.UNK_INDEX] = self.UNK_TOKEN self.stoi[self.SOS_TOKEN] = self.SOS_INDEX self.stoi[self.EOS_TOKEN] = self.EOS_INDEX self.stoi[self.PAD_TOKEN] = self.PAD_INDEX self.stoi[self.UNK_TOKEN] = self.UNK_INDEX self.vectors = torch.FloatTensor( len(self.itos.keys()) + len(embedding.itos), len(embedding.vectors[0])) for i in range(4): self.vectors[i] = torch.ones_like(self.vectors[i]) * 0.1 * i index = 4 for word in embedding.stoi: self.itos[index] = word self.stoi[word] = index actual_index = embedding.stoi[word] self.vectors[index] = embedding.vectors[actual_index] index += 1
def _try_download(self): _is_master = is_master() if self._already_downloaded: return needs_download = False if not hasattr(self.config, "model_file"): if _is_master: warnings.warn("'model_file' key is required but missing " "from FastTextProcessor's config.") needs_download = True model_file = self.config.model_file # If model_file is already an existing path don't join to cache dir if not PathManager.exists(model_file): model_file = os.path.join(get_mmf_cache_dir(), model_file) if not PathManager.exists(model_file): if _is_master: warnings.warn(f"No model file present at {model_file}.") needs_download = True if needs_download: logger.info("Downloading FastText bin") model_file = self._download_model() self.model_file = model_file self._already_downloaded = True synchronize()
def flush_report(self): if not is_master(): return name = self.current_dataset.dataset_name time_format = "%Y-%m-%dT%H:%M:%S" time = self.timer.get_time_hhmmss(None, format=time_format) filename = name + "_" if len(self.experiment_name) > 0: filename += self.experiment_name + "_" filename += self.task_type + "_" filename += time if self.config.evaluation.predict_file_format == "csv": filepath = os.path.join(self.report_folder, filename + ".csv") self.csv_dump(filepath) else: filepath = os.path.join(self.report_folder, filename + ".json") self.json_dump(filepath) logger.info( f"Wrote evalai predictions for {name} to {os.path.abspath(filepath)}" ) self.report = []
def add_to_report(self, report): # TODO: Later gather whole report for no opinions if self.current_dataset.dataset_name == "coco": report.captions = gather_tensor(report.captions) if isinstance(report.image_id, torch.Tensor): report.image_id = gather_tensor(report.image_id).view(-1) else: report.scores = gather_tensor(report.scores).view( -1, report.scores.size(-1) ) if "question_id" in report: report.question_id = gather_tensor(report.question_id).view(-1) if "image_id" in report: _, enc_size = report.image_id.size() report.image_id = gather_tensor(report.image_id) report.image_id = report.image_id.view(-1, enc_size) if "context_tokens" in report: _, enc_size = report.context_tokens.size() report.context_tokens = gather_tensor(report.context_tokens) report.context_tokens = report.context_tokens.view(-1, enc_size) if not is_master(): return results = self.current_dataset.format_for_evalai(report) self.report = self.report + results
def summarize_report( current_iteration, num_updates, max_updates, meter, should_print=True, extra=None, tb_writer=None, ): if extra is None: extra = {} if not is_master() and not is_xla(): return if tb_writer: scalar_dict = meter.get_scalar_dict() tb_writer.add_scalars(scalar_dict, current_iteration) if not should_print: return log_dict = {} if num_updates is not None and max_updates is not None: log_dict.update({"progress": f"{num_updates}/{max_updates}"}) log_dict.update(meter.get_log_dict()) log_dict.update(extra) log_progress(log_dict)
def flush_report(self): if not is_master(): return name = self.current_dataset.dataset_name time_format = "%Y-%m-%dT%H:%M:%S" time = self.timer.get_time_hhmmss(None, format=time_format) filename = name + "_" if len(self.experiment_name) > 0: filename += self.experiment_name + "_" filename += self.task_type + "_" filename += time + ".json" filepath = os.path.join(self.report_folder, filename) with PathManager.open(filepath, "w") as f: json.dump(self.report, f) self.writer.write( "Wrote evalai predictions for %s to %s" % (name, os.path.abspath(filepath)) ) self.report = []
def evaluation_loop( self, loader, use_tqdm: bool = False, single_batch: bool = False) -> Tuple[Dict[str, Any], Type[Meter]]: meter = Meter() with torch.no_grad(): self.model.eval() disable_tqdm = not use_tqdm or not is_master() combined_report = None for batch in tqdm.tqdm(loader, disable=disable_tqdm): report = self._forward(batch) self.update_meter(report, meter) # accumulate necessary params for metric calculation if combined_report is None: combined_report = report else: combined_report.accumulate_tensor_fields( report, self.metrics.required_params) combined_report.batch_size += report.batch_size if single_batch is True: break combined_report.metrics = self.metrics(combined_report, combined_report) self.update_meter(combined_report, meter, eval_mode=True) # enable train mode again self.model.train() return combined_report, meter
def evaluate(self, loader, use_tqdm=False, single_batch=False): meter = Meter() with torch.no_grad(): self.model.eval() disable_tqdm = not use_tqdm or not is_master() combined_report = None for batch in tqdm(loader, disable=disable_tqdm): report = self._forward_pass(batch) self._update_meter(report, meter) # accumulate necessary params for metric calculation if combined_report is None: combined_report = report else: combined_report.accumulate_tensor_fields( report, self.metrics.required_params) combined_report.batch_size += report.batch_size if single_batch is True: break combined_report.metrics = self.metrics(combined_report, combined_report) self._update_meter(combined_report, meter, eval_mode=True) self.model.train() return combined_report, meter
def __init__( self, loaders: Dict[str, DataLoader], iteration_strategy: iteration_strategies.IterationStrategy = None, ): if loaders is None or len(loaders) == 0: warnings.warn( "Empty loaders passed into MultiDataLoader. This can have " "unintended consequences.") if iteration_strategy is None: iteration_strategy = iteration_strategies.RoundRobinIterationStrategy( OmegaConf.create(), loaders) self._iteration_strategy = iteration_strategy self._loaders = loaders self._is_master = is_master() self._num_datasets = len(self.loaders) self.dataset_list = list(loaders.keys()) self._iterators = {} self._finished_iterators = {} self.current_index = 0 self.set_lengths() self.set_samplers()
def evaluation_loop( self, dataset_type: str, use_tqdm: bool = False, single_batch: bool = False) -> Tuple[Dict[str, Any], Type[Meter]]: meter = Meter() reporter = self.dataset_loader.get_test_reporter(dataset_type) with torch.no_grad(): self.model.eval() disable_tqdm = not use_tqdm or not is_master() while reporter.next_dataset(flush_report=False): dataloader = reporter.get_dataloader() combined_report = None for batch in tqdm.tqdm(dataloader, disable=disable_tqdm): prepared_batch = reporter.prepare_batch(batch) prepared_batch = to_device(prepared_batch, self.device) model_output = self.model(prepared_batch) report = Report(prepared_batch, model_output) self.update_meter(report, meter) # accumulate necessary params for metric calculation if combined_report is None: # make a copy of report since `reporter.add_to_report` will # change some of the report keys later combined_report = Report(report) else: combined_report.accumulate_tensor_fields_and_loss( report, self.metrics.required_params) combined_report.batch_size += report.batch_size # Each node generates a separate copy of predict JSON from the report, # which will be used to evaluate dataset-level metrics # (such as mAP in object detection or CIDEr in image captioning) # Since `reporter.add_to_report` changes report keys (e.g. scores), # do this after `combined_report.accumulate_tensor_fields_and_loss` if "__prediction_report__" in self.metrics.required_params: reporter.add_to_report(report, self.model, execute_on_master_only=False) if single_batch is True: break reporter.postprocess_dataset_report() # add prediction_report is used for set-level metrics combined_report.prediction_report = reporter.report combined_report.metrics = self.metrics(combined_report, combined_report) self.update_meter(combined_report, meter, eval_mode=True) # enable train mode again self.model.train() return combined_report, meter
def load_requirements(self, *args, **kwargs): if is_master(): requirements = self.config.get("zoo_requirements", []) if isinstance(requirements, str): requirements = [requirements] for item in requirements: download_pretrained_model(item, *args, **kwargs) synchronize()
def _threaded_read(self): elements = [idx for idx in range(1, len(self.annotation_db))] pool = ThreadPool(processes=4) with tqdm.tqdm(total=len(elements), disable=not is_master()) as pbar: for i, _ in enumerate(pool.imap_unordered(self._fill_cache, elements)): if i % 100 == 0: pbar.update(100) pool.close()
def save(self, update, iteration=None, update_best=False): # Only save in main process if not is_master(): return if not iteration: iteration = update ckpt_filepath = os.path.join(self.models_foldername, "model_%d.ckpt" % update) best_ckpt_filepath = os.path.join( self.ckpt_foldername, self.ckpt_prefix + "best.ckpt" ) current_ckpt_filepath = os.path.join( self.ckpt_foldername, self.ckpt_prefix + "current.ckpt" ) best_iteration = ( self.trainer.early_stop_callback.early_stopping.best_monitored_iteration ) best_update = ( self.trainer.early_stop_callback.early_stopping.best_monitored_update ) best_metric = ( self.trainer.early_stop_callback.early_stopping.best_monitored_value ) model = self.trainer.model data_parallel = registry.get("data_parallel") or registry.get("distributed") if data_parallel is True: model = model.module ckpt = { "model": model.state_dict(), "optimizer": self.trainer.optimizer.state_dict(), "best_iteration": best_iteration, "current_iteration": iteration, "current_epoch": self.trainer.current_epoch, "num_updates": update, "best_update": best_update, "best_metric_value": best_metric, # Convert to container to avoid any dependencies "config": OmegaConf.to_container(self.config, resolve=True), } if self.git_repo: git_metadata_dict = self._get_vcs_fields() ckpt.update(git_metadata_dict) torch.save(ckpt, ckpt_filepath) if update_best: torch.save(ckpt, best_ckpt_filepath) # Save current always torch.save(ckpt, current_ckpt_filepath)
def calculate(self, sample_list, model_output, execute_on_master_only=True, *args, **kwargs): """Calculate detection mean AP (mAP) from the prediction list and the dataset annotations. The function returns COCO-style mAP@IoU=0.50:0.95. Args: sample_list (SampleList): SampleList provided by DataLoader for current iteration. model_output (Dict): Dict returned by model. This should contain "prediction_report" field, which is a list of detection predictions from the model. execute_on_master_only (bool): Whether to only run mAP evaluation on the master node over the gathered detection prediction (to avoid wasting computation and CPU OOM). Default: True (only run mAP evaluation on master). Returns: torch.FloatTensor: COCO-style mAP@IoU=0.50:0.95. """ # as the detection mAP metric is run on the entire dataset-level predictions, # which are *already* gathered from all notes, the evaluation should only happen # in one node and broadcasted to other nodes (to avoid CPU OOM due to concurrent # mAP evaluation) from mmf.utils.distributed import broadcast_tensor, is_master from mmf.utils.general import get_current_device from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval device = get_current_device() if execute_on_master_only and not is_master(): # dummy mAP to be override in boardcasting mAP = torch.tensor(-1, dtype=torch.float, device=device) else: predictions = model_output.prediction_report cocoGt = COCO(self.dataset_json_files[sample_list.dataset_name][ sample_list.dataset_type]) cocoDt = cocoGt.loadRes(predictions) cocoEval = COCOeval(cocoGt, cocoDt, "bbox") cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mAP = torch.tensor(cocoEval.stats[0], dtype=torch.float, device=device) if execute_on_master_only: mAP = broadcast_tensor(mAP, src=0) return mAP
def evaluation_loop( self, loader, use_tqdm: bool = False, config=None ) -> Tuple[Dict[str, Any], Type[Meter]]: expl = ExplanationGenerator.HeadPrune(self.model) vis = VisualizationGenerator.SelfAttentionGenerator(self.model) # saving cams per method for all the samples self.model.eval() disable_tqdm = not use_tqdm or not is_master() steps = [0, 0.4, 0.6, 0.9] i = 0 ####### CONFIGURATION VALUES ####### NUM_OF_EXAMPLES = config.num_of_examples if (config != None and config.num_of_examples != None) else 10 COCO_VAL_PATH = config.COCO_path if (config != None and config.COCO_path != None) else '/media/data2/hila_chefer/env_MMF/datasets/coco/subset_val/images/val2014/' #################################### num_of_layers = len(self.model.model.bert.encoder.layer) first_layer = self.model.model.bert.encoder.layer[0] num_of_heads_in_layer = int(first_layer.attention.self.all_head_size / first_layer.attention.self.attention_head_size) total_heads = num_of_heads_in_layer * num_of_layers for batch in tqdm.tqdm(loader, disable=disable_tqdm): head_scores_grad, _ = expl.generate_expl(batch) for step_idx, step in enumerate(steps): # calculate num_heads to prune: step * total_heads num_of_heads_to_prune = int(step * total_heads) # create binary vec of all heads per step head_scores = head_scores_grad head_scores = torch.cat(head_scores, dim=0) # flatten list of tensors to one tensor heads_prune = torch.ones(head_scores.size()) if num_of_heads_to_prune > 0: # this is due to a bug in torch.topk(k=0) with float tensors heads_prune_idx = torch.topk(head_scores, num_of_heads_to_prune, largest=False) #prune largest heads heads_prune[heads_prune_idx.indices] = 0 heads_prune = heads_prune.reshape([num_of_layers, num_of_heads_in_layer]) # generate visualization vis.generate_ours(batch, save_visualization=True, head_prune=heads_prune, prune_step=step*100, COCO_path=(config.COCO_path if config != None else COCO_VAL_PATH)) # call model with head prune vec res = self.model(batch, heads_prune)["scores"] print("question:", batch["text"]) print("answer:", answers[res.argmax().item()]) i += 1 if i >= NUM_OF_EXAMPLES: break
def __init__(self, dataset_type="train"): self._dataset_type = dataset_type self._is_master = is_master() self._datasets = [] self._loaders = [] self._samplers = [] self._iterators = [] self._total_length = 0 self._per_dataset_lengths = [] self._num_datasets = 0 self._finished_iterators = {}
def try_fast_read(self): # Don't fast read in case of test set. if self._dataset_type == "test": return if hasattr(self, "_should_fast_read") and self._should_fast_read is True: self.writer.write("Starting to fast read {} {} dataset".format( self.dataset_name, self.dataset_type)) self.cache = {} for idx in tqdm.tqdm(range(len(self.annotation_db)), miniters=100, disable=not is_master()): self.cache[idx] = self.load_item(idx)
def try_fast_read(self): # Don't fast read in case of test set. if self._dataset_type == "test": return if hasattr(self, "_should_fast_read") and self._should_fast_read: logger.info( f"Starting to fast read {self.dataset_name} {self.dataset_type} " + "dataset") self.cache = {} for idx in tqdm.tqdm(range(len(self.annotation_db)), miniters=100, disable=not is_master()): self.cache[idx] = self.load_item(idx)
def download_pretrained_model(model_name, *args, **kwargs): import omegaconf from omegaconf import OmegaConf from mmf.utils.configuration import load_yaml, get_mmf_env model_zoo = load_yaml(get_mmf_env(key="model_zoo")) OmegaConf.set_struct(model_zoo, True) OmegaConf.set_readonly(model_zoo, True) data_dir = get_absolute_path(get_mmf_env("data_dir")) model_data_dir = os.path.join(data_dir, "models") download_path = os.path.join(model_data_dir, model_name) try: model_config = OmegaConf.select(model_zoo, model_name) except omegaconf.errors.OmegaConfBaseException as e: print(f"No such model name {model_name} defined in mmf zoo") raise e if "version" not in model_config or "resources" not in model_config: # Version and Resources are not present time to try the defaults try: model_config = model_config.defaults download_path = os.path.join(model_data_dir, model_name + ".defaults") except omegaconf.errors.OmegaConfBaseException as e: print( f"Model name {model_name} doesn't specify 'resources' and 'version' " "while no defaults have been provided" ) raise e # Download requirements if any specified by "zoo_requirements" field # This can either be a list or a string if "zoo_requirements" in model_config: requirements = model_config.zoo_requirements if isinstance(requirements, str): requirements = [requirements] for item in requirements: download_pretrained_model(item, *args, **kwargs) version = model_config.version resources = model_config.resources if is_master(): download_resources(resources, download_path, version) synchronize() return download_path
def __init__(self, dataset_type="train"): self._dataset_type = dataset_type self.writer = registry.get("writer") self._is_master = is_master() self._datasets = [] self._loaders = [] self._samplers = [] self._iterators = [] self._total_length = 0 self._per_dataset_lengths = [] self._num_datasets = 0 self._finished_iterators = {} self._used_once = {}
def add_to_report(self, report, model): keys = ["id", "question_id", "image_id", "context_tokens", "captions", "scores"] for key in keys: report = self.reshape_and_gather(report, key) if not is_master(): return results = self.current_dataset.format_for_prediction(report) if hasattr(model, "format_for_prediction"): results = model.format_for_prediction(results, report) elif hasattr(model.module, "format_for_prediction"): results = model.module.format_for_prediction(results, report) self.report = self.report + results
def __init__(self, log_folder="./logs", iteration=0): # This would handle warning of missing tensorboard from torch.utils.tensorboard import SummaryWriter self.summary_writer = None self._is_master = is_master() self.timer = Timer() self.log_folder = log_folder self.time_format = "%Y-%m-%dT%H:%M:%S" if self._is_master: current_time = self.timer.get_time_hhmmss(None, format=self.time_format) tensorboard_folder = os.path.join(self.log_folder, f"tensorboard_{current_time}") self.summary_writer = SummaryWriter(tensorboard_folder)
def _summarize_report(self, meter, should_print=True, extra=None): if extra is None: extra = {} if not is_master(): return if self.training_config.tensorboard: scalar_dict = meter.get_scalar_dict() self.tb_writer.add_scalars(scalar_dict, self.current_iteration) if not should_print: return log_dict = {"progress": f"{self.num_updates}/{self.max_updates}"} log_dict.update(meter.get_log_dict()) log_dict.update(extra) self.writer.log_progress(log_dict)
def add_to_report(self, report, model, execute_on_master_only=True): for key in self.candidate_fields: report = self.reshape_and_gather(report, key) if execute_on_master_only and not is_master(): return results = [] if hasattr(self.current_dataset, "format_for_prediction"): results = self.current_dataset.format_for_prediction(report) if hasattr(model, "format_for_prediction"): results = model.format_for_prediction(results, report) elif hasattr(model.module, "format_for_prediction"): results = model.module.format_for_prediction(results, report) self.report = self.report + results
def __init__(self, loaders: Dict[str, DataLoader]): if loaders is not None and len(loaders) != 0: warnings.warn( "Empty loaders passed into MultiDataLoader. This can have " "unintended consequences." ) self._loaders = loaders self._is_master = is_master() self._num_datasets = len(self.loaders) self.dataset_list = list(loaders.keys()) self._iterators = {} self._finished_iterators = {} self.current_index = 0 self.set_lengths() self.set_samplers() self._infer_dataset_probabilities()
def __call__(self, update, iteration, meter): """ Method to be called everytime you need to check whether to early stop or not Arguments: update {number}: Current update number iteration {number}: Current iteration number Returns: bool -- Tells whether early stopping occurred or not """ # There are operations involving synchronization downstream # For XLA those calls must be executed from all cores # Therefore we do return here in case of XLA if not is_master() and not is_xla(): return False value = meter.meters.get(self.early_stop_criteria, None) if value is None: raise ValueError("Criteria used for early stopping ({}) is not " "present in meter.".format( self.early_stop_criteria)) value = value.global_avg if isinstance(value, torch.Tensor): value = value.item() if (self.minimize and value < self.best_monitored_value) or ( not self.minimize and value > self.best_monitored_value): self.best_monitored_value = value self.best_monitored_iteration = iteration self.best_monitored_update = update self.checkpoint.save(update, iteration, update_best=True) elif self.best_monitored_update + self.patience < update: self.activated = True if self.should_stop is True: self.checkpoint.restore() self.checkpoint.finalize() return True else: return False else: self.checkpoint.save(update, iteration, update_best=False) return False
def build_dataset(self, config, dataset_type="train", *args, **kwargs): """ Similar to load function, used by MMF to build a dataset for first time when it is not available. This internally calls 'build' function. Override that function in your child class. Args: config (DictConfig): Configuration of this dataset loaded from config. dataset_type (str): Type of dataset, train|val|test .. warning:: DO NOT OVERRIDE in child class. Instead override ``build``. """ # Only build in main process, so none of the others have to build if is_master(): self.build(config, dataset_type, *args, **kwargs) synchronize()
def load(self): self.image_path = os.path.join(self._data_folder, _CONSTANTS["images_folder"], self._dataset_type) with open( os.path.join( self._data_folder, _CONSTANTS["questions_folder"], _TEMPLATES["question_json_file"].format( self._dataset_type), )) as f: self.questions = json.load(f)[_CONSTANTS["questions_key"]] # Vocab should only be built in main process, as it will repetition of same task if is_master(): self._build_vocab(self.questions, _CONSTANTS["question_key"]) self._build_vocab(self.questions, _CONSTANTS["answer_key"]) synchronize()
def _download_model(self): _is_master = is_master() model_file_path = os.path.join(get_mmf_cache_dir(), "wiki.en.bin") if not _is_master: return model_file_path if PathManager.exists(model_file_path): self.writer.write( "Vectors already present at {}.".format(model_file_path), "info") return model_file_path import requests from mmf.common.constants import FASTTEXT_WIKI_URL from tqdm import tqdm PathManager.mkdirs(os.path.dirname(model_file_path)) response = requests.get(FASTTEXT_WIKI_URL, stream=True) with PathManager.open(model_file_path, "wb") as f: pbar = tqdm( total=int(response.headers["Content-Length"]) / 4096, miniters=50, disable=not _is_master, ) idx = 0 for data in response.iter_content(chunk_size=4096): if data: if idx % 50 == 0: pbar.update(len(data)) f.write(data) idx += 1 pbar.close() self.writer.write( "fastText bin downloaded at {}.".format(model_file_path), "info") return model_file_path