def wrapper(*args, **kwargs) -> Tuple[Any, Stats]: from pytorch_memlab import LineProfiler model = args[0] if not isinstance(model, torch.nn.Module): raise AttributeError( 'First argument for profiling needs to be torch.nn.Module') # Init `pytorch_memlab` for analyzing the model forward pass: line_profiler = LineProfiler() line_profiler.enable() line_profiler.add_function(args[0].forward) start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() out = func(*args, **kwargs) end.record() torch.cuda.synchronize() time = start.elapsed_time(end) / 1000 # Get the global memory statistics collected by `pytorch_memlab`: memlab = read_from_memlab(line_profiler) max_allocated_cuda, max_reserved_cuda, max_active_cuda = memlab line_profiler.disable() # Get additional information from `nvidia-smi`: free_cuda, used_cuda = get_gpu_memory_from_nvidia_smi() stats = Stats(time, max_allocated_cuda, max_reserved_cuda, max_active_cuda, free_cuda, used_cuda) return out, stats
def test_line_report(): def work(): # comment linear = torch.nn.Linear(100, 100).cuda() linear_2 = torch.nn.Linear(100, 100).cuda() linear_3 = torch.nn.Linear(100, 100).cuda() def work_3(): lstm = torch.nn.LSTM(1000, 1000).cuda() def work_2(): # comment linear = torch.nn.Linear(100, 100).cuda() linear_2 = torch.nn.Linear(100, 100).cuda() linear_3 = torch.nn.Linear(100, 100).cuda() work_3() line_profiler = LineProfiler(work, work_2) line_profiler.enable() work() work_2() line_profiler.disable() line_profiler.print_stats()
def mem_benchmark(): from pytorch_memlab import LineProfiler n = int(8e6) prob = test.random_problem(S=n, T=n) with LineProfiler(solve) as prof: solve(prob) prof.print_stats()
def forward(self, x, r_ij, neighbors, pairwise_mask, f_ij=None): """Compute convolution block. Args: x (torch.Tensor): input representation/embedding of atomic environments with (N_b, N_a, n_in) shape. r_ij (torch.Tensor): interatomic distances of (N_b, N_a, N_nbh) shape. neighbors (torch.Tensor): indices of neighbors of (N_b, N_a, N_nbh) shape. pairwise_mask (torch.Tensor): mask to filter out non-existing neighbors introduced via padding. f_ij (torch.Tensor, optional): expanded interatomic distances in a basis. If None, r_ij.unsqueeze(-1) is used. Returns: torch.Tensor: block output with (N_b, N_a, n_out) shape. """ if f_ij is None: f_ij = r_ij.unsqueeze(-1) # pass expanded interactomic distances through filter block W = self.filter_network(f_ij) # apply cutoff if self.cutoff_network is not None: C = self.cutoff_network(r_ij) W = W * C.unsqueeze(-1) def inner(x, neighbors, pairwise_mask): # pass initial embeddings through Dense layer y = self.in2f(x) print(x.shape, y.shape) # reshape y for element-wise multiplication by W nbh_size = neighbors.size() nbh = neighbors.view(-1, nbh_size[1] * nbh_size[2], 1) nbh = nbh.expand(-1, -1, y.size(2)) print(neighbors.shape, nbh.shape) y = torch.gather(y, 1, nbh) y = y.view(nbh_size[0], nbh_size[1], nbh_size[2], -1) print(y.shape) print('') # element-wise multiplication, aggregating and Dense layer y = y * W y = self.agg(y, pairwise_mask) y = self.f2out(y) return y with LineProfiler(inner) as prof: y = inner(x, neighbors, pairwise_mask) prof.print_stats() return y
def test_display(): def work(): # comment linear = torch.nn.Linear(100, 100).cuda() linear_2 = torch.nn.Linear(100, 100).cuda() linear_3 = torch.nn.Linear(100, 100).cuda() def work_3(): lstm = torch.nn.LSTM(1000, 1000).cuda() def work_2(): # comment linear = torch.nn.Linear(100, 100).cuda() linear_2 = torch.nn.Linear(100, 100).cuda() linear_3 = torch.nn.Linear(100, 100).cuda() work_3() with LineProfiler(work, work_2) as prof: work() work_2() return prof.display()
# Do not change; required for benchmarking import torch_geometric_benchmark.torchprof_local as torchprof # noqa from pytorch_memlab import LineProfiler # noqa from torch_geometric_benchmark.utils import count_parameters # noqa from torch_geometric_benchmark.utils import get_gpu_memory_nvdia # noqa from torch_geometric_benchmark.utils import get_memory_status # noqa from torch_geometric_benchmark.utils import get_model_size # noqa global_line_profiler = LineProfiler() global_line_profiler.enable()
help='Load a model to continue training') parser.add_argument('--save_every', default=None, type=int, help='How often to save during training') args = vars(parser.parse_args()) file_folder_path = os.path.dirname(os.path.abspath(__file__)) project_folder_path = os.path.join(file_folder_path, "..") input_folder = os.path.join(project_folder_path, "TrainingData") output_folder = os.path.join(project_folder_path, "Output") save_folder = os.path.join(project_folder_path, "SavedModels") prof = LineProfiler() prof.enable() if (args['load_from'] is None): opt = Options.get_default() for k in args.keys(): if args[k] is not None: opt[k] = args[k] dataset = LocalImplicitDataset(opt) model = HierarchicalACORN(opt) trainer = Trainer(opt) trainer.train(model, dataset) print(prof.display()) prof.disable()
""" from pathlib import Path import torch from pytorch_memlab import LineProfiler from apppath import ensure_existence if __name__ == "__main__": def inner() -> None: """ :rtype: None """ torch.nn.Linear(100, 100).cuda() def outer() -> None: """ :rtype: None """ linear = torch.nn.Linear(100, 100).cuda() linear2 = torch.nn.Linear(100, 100).cuda() inner() with LineProfiler(outer, inner) as prof: outer() with open(ensure_existence(Path("exclude")) / "test.html", "w") as f: f.write(prof.display()._repr_html_())