def test_get_model_dummy_input(self): for config in get_test_model_configs(): model = build_model( config) # pass in a dummy model for the cuda check batchsize = 8 # input_key is list input_key = ["audio", "video"] input_shape = [[3, 40, 100], [4, 16, 223, 223]] # dummy input shapes result = util.get_model_dummy_input(model, input_shape, input_key, batchsize) self.assertEqual(result.keys(), {"audio", "video"}) for i in range(len(input_key)): self.assertEqual(result[input_key[i]].size(), tuple([batchsize] + input_shape[i])) # input_key is string input_key = "video" input_shape = [4, 16, 223, 223] result = util.get_model_dummy_input(model, input_shape, input_key, batchsize) self.assertEqual(result.keys(), {"video"}) self.assertEqual(result[input_key].size(), tuple([batchsize] + input_shape)) # input_key is None input_key = None input_shape = [4, 16, 223, 223] result = util.get_model_dummy_input(model, input_shape, input_key, batchsize) self.assertEqual(result.size(), tuple([batchsize] + input_shape))
def plot_model( model: ClassyModel, size: Tuple[int, ...] = (3, 224, 224), input_key: Optional[Union[str, List[str]]] = None, writer: Optional["SummaryWriter"] = None, folder: str = "", train: bool = True, ) -> None: """Visualizes a model in TensorBoard. The TensorBoard writer can be either specified directly via `writer` or can be specified via a `folder`. The model can be run in training or evaluation model via the `train` argument. Example usage on devserver: - Install TensorBoard using: `sudo feature install tensorboard` - Start TensorBoard using: `tensorboard --port=8098 --logdir <folder>` """ assert ( writer is not None or folder != "" ), "must specify SummaryWriter or folder to create SummaryWriter in" input = get_model_dummy_input(model, size, input_key) if writer is None: writer = SummaryWriter(log_dir=folder, comment="Model graph") with writer: orig_train = model.training model.train(train) # visualize model in desired mode writer.add_graph(model, input_to_model=(input, )) model.train(orig_train)
def compute_complexity(model, compute_fn, input_shape, input_key=None): """ Compute the complexity of a forward pass. """ # assertions, input, and upvalue in which we will perform the count: assert isinstance(model, nn.Module) if not isinstance(input_shape, abc.Sequence) and not isinstance( input_shape, dict): return None else: input = get_model_dummy_input(model, input_shape, input_key) compute_list = [] # measure FLOPs: modify_forward(model, compute_list, compute_fn) try: # compute complexity in eval mode with eval_model(model), torch.no_grad(): model.forward(input) except NotImplementedError as err: raise err finally: restore_forward(model) return sum(compute_list)
def torchscript_using_trace(self, model): input_shape = model.input_shape if hasattr(model, "input_shape") else None if not input_shape: logging.warning("This model doesn't implement input_shape." "Cannot save torchscripted model.") return input_data = get_model_dummy_input( model, input_shape, input_key=model.input_key if hasattr(model, "input_key") else None, ) with eval_model(model) and torch.no_grad(): torchscript = torch.jit.trace(model, input_data) return torchscript
def compute_complexity( model, compute_fn, input_shape, input_key=None, patch_attr=None, compute_unique=False, ): """ Compute the complexity of a forward pass. Args: compute_unique: If True, the compexity for a given module is only calculated once. Otherwise, it is counted every time the module is called. TODO(@mannatsingh): We have some assumptions about only modules which are leaves or have patch_attr defined. This should be fixed and generalized if possible. """ # assertions, input, and upvalue in which we will perform the count: assert isinstance(model, nn.Module) if not isinstance(input_shape, abc.Sequence) and not isinstance( input_shape, dict): return None else: input = get_model_dummy_input(model, input_shape, input_key) complexity_computer = ComplexityComputer(compute_fn, compute_unique) # measure FLOPs: modify_forward(model, complexity_computer, patch_attr=patch_attr) try: # compute complexity in eval mode with eval_model(model), torch.no_grad(): model.forward(input) except NotImplementedError as err: raise err finally: restore_forward(model, patch_attr=patch_attr) return complexity_computer.count
def compute_flops(model, input_shape=(3, 244, 244), input_key=None): """ Compute the number of FLOPs needed for a forward pass. """ # assertions, input, and upvalue in which we will perform the count: assert isinstance(model, nn.Module) if not isinstance(input_shape, abc.Sequence): return None input = get_model_dummy_input(model, input_shape, input_key) flops_list = [] # measure FLOPs: modify_forward(model, flops_list) try: model.forward(input) except NotImplementedError as err: raise err finally: restore_forward(model) return sum(flops_list)
def save_torchscript(self, task) -> None: model = task.base_model input_shape = (model.input_shape if hasattr(task.base_model, "input_shape") else None) if not input_shape: logging.warning("This model doesn't implement input_shape." "Cannot save torchscripted model.") return input_data = get_model_dummy_input( model, input_shape, input_key=model.input_key if hasattr(model, "input_key") else None, ) with eval_model(model) and torch.no_grad(): torchscript = torch.jit.trace(model, input_data) # save torchscript: logging.info("Saving torchscript to '{}'...".format( self.torchscript_folder)) torchscript_name = f"{self.torchscript_folder}/{TORCHSCRIPT_FILE}" with PathManager.open(torchscript_name, "wb") as f: torch.jit.save(torchscript, f)
def profile( model, batchsize_per_replica=32, input_shape=(3, 224, 224), use_nvprof=False, input_key=None, ): """ Performs CPU or GPU profiling of the specified model on the specified input. """ # assertions: if use_nvprof: raise NotImplementedError # FIXME (mannatsingh): in case of use_nvprof, exit() is called at the end # and we do not return a profile. assert is_on_gpu(model), "can only nvprof model that lives on GPU" logging.info("CUDA profiling: Make sure you are running under nvprof!") # input for model: input = get_model_dummy_input( model, input_shape, input_key, batchsize=batchsize_per_replica, non_blocking=False, ) # perform profiling: with torch.no_grad(): model(input) # warm up CUDA memory allocator and profiler if use_nvprof: # nvprof profiling (TODO: Can we infer this?) cudart().cudaProfilerStart() model(input) cudart().cudaProfilerStop() exit() # exit gracefully else: # regular profiling with torch.autograd.profiler.profile(use_cuda=True) as profiler: model(input) return profiler
def profile( model: nn.Module, batchsize_per_replica: int = 32, input_shape: Tuple[int] = (3, 224, 224), use_nvprof: bool = False, input_key: Optional[Union[str, List[str]]] = None, ): """ Performs CPU or GPU profiling of the specified model on the specified input. """ # assertions: if use_nvprof: raise ClassyProfilerError("Profiling not supported with nvprof") # FIXME (mannatsingh): in case of use_nvprof, exit() is called at the end # and we do not return a profile. assert is_on_gpu(model), "can only nvprof model that lives on GPU" logging.info("CUDA profiling: Make sure you are running under nvprof!") # input for model: input = get_model_dummy_input( model, input_shape, input_key, batchsize=batchsize_per_replica, non_blocking=False, ) # perform profiling in eval mode with eval_model(model), torch.no_grad(): model(input) # warm up CUDA memory allocator and profiler if use_nvprof: # nvprof profiling (TODO: Can we infer this?) cudart().cudaProfilerStart() model(input) cudart().cudaProfilerStop() exit() # exit gracefully else: # regular profiling with torch.autograd.profiler.profile(use_cuda=True) as profiler: model(input) return profiler