def test_profiler_fwd_bwd_link(self): with _profile(use_kineto=True) as prof: t1, t2 = torch.ones(1, requires_grad=True), torch.ones(1, requires_grad=True) z = torch.add(t1, t2) y = torch.ones(1) loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y) loss.backward() with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: j = json.load(f) events = j["traceEvents"] ts_to_name = {} flow_s_to_ts = {} flow_f_to_ts = {} for e in events: if e["ph"] == "X": ts_to_name[e["ts"]] = e["name"] if "cat" in e and "name" in e and e["cat"] == "forward_backward" and e["name"] == "fwd_bwd": if e["ph"] == "s": flow_s_to_ts[e["id"]] = e["ts"] elif e["ph"] == "f": flow_f_to_ts[e["id"]] = e["ts"] self.assertTrue(len(flow_s_to_ts) == 2) self.assertTrue(len(flow_f_to_ts) == 2) self.assertTrue(1 in flow_s_to_ts.keys()) self.assertTrue(1 in flow_f_to_ts.keys()) self.assertTrue(2 in flow_s_to_ts.keys()) self.assertTrue(2 in flow_f_to_ts.keys()) s_ts_1 = flow_s_to_ts[1] f_ts_1 = flow_f_to_ts[1] s_ts_2 = flow_s_to_ts[2] f_ts_2 = flow_f_to_ts[2] self.assertTrue(all([ts in ts_to_name.keys() for ts in [s_ts_1, f_ts_1, s_ts_2, f_ts_2]])) self.assertTrue(ts_to_name[s_ts_1] == "aten::binary_cross_entropy_with_logits") self.assertTrue(ts_to_name[s_ts_2] == "aten::add")
def test_conv2d_legacy_jit_model(self): """ MKLDNN integration used to serialize models with 5d weight for grouped convolutions, we'd like to preserve this behavior """ g = 4 conv2d = torch.nn.Conv2d(16, 16, 3, groups=g) conv2d_mkldnn = torch.utils.mkldnn.to_mkldnn(conv2d) # contrive legacy conv2d module with a 5-d weight o, i, h, w = conv2d.weight.shape weight_5d = conv2d.weight.reshape((g, o // g, i, h, w)) conv2d_mkldnn.weight = weight_5d.to_mkldnn() x = torch.randn(1, 16, 8, 8) with TemporaryFileName() as fname: torch.jit.save(conv2d_mkldnn, fname) conv2d_loaded = torch.jit.load(fname) self.assertEqual(conv2d_mkldnn.weight.ndimension(), 5) self.assertEqual(conv2d_loaded.weight.ndimension(), 4) self.assertEqual(conv2d(x), conv2d_loaded(x.to_mkldnn()).to_dense())
def test_profiling(self): with TemporaryFileName() as fname: self.linear_test(TwoLayerNetModule, profiler_output_path=fname)
def test_memory_profiler(self): def run_profiler(tensor_creation_fn): # collecting allocs / deallocs with _profile(profile_memory=True, record_shapes=True, use_kineto=kineto_available()) as prof: x = None with record_function("test_user_scope_alloc"): x = tensor_creation_fn() with record_function("test_user_scope_dealloc"): del x return prof.key_averages(group_by_input_shape=True) def check_metrics(stats, metric, allocs=None, deallocs=None): stat_metrics = {} for stat in stats: stat_metrics[stat.key] = getattr(stat, metric) if allocs is not None: for alloc_fn in allocs: self.assertTrue(alloc_fn in stat_metrics) self.assertTrue(stat_metrics[alloc_fn] > 0) if deallocs is not None: for dealloc_fn in deallocs: self.assertTrue(dealloc_fn in stat_metrics) self.assertTrue(stat_metrics[dealloc_fn] < 0) def create_cpu_tensor(): return torch.rand(10, 10) def create_cuda_tensor(): return torch.rand(10, 10).cuda() def create_mkldnn_tensor(): return torch.rand(10, 10, dtype=torch.float32).to_mkldnn() stats = run_profiler(create_cpu_tensor) check_metrics(stats, "cpu_memory_usage", allocs=[ "aten::empty", "aten::rand", "test_user_scope_alloc", ], deallocs=[ "test_user_scope_dealloc", ]) if kineto_available(): with TemporaryFileName(mode="w+") as fname: with profile(profile_memory=True) as prof: x = None with record_function("test_user_scope_alloc"): x = create_cpu_tensor() with record_function("test_user_scope_dealloc"): del x prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: trace = json.load(f) assert "traceEvents" in trace events = trace["traceEvents"] found_memory_events = False for evt in events: assert "name" in evt if evt["name"] == "[memory]": found_memory_events = True assert "args" in evt assert "Device Type" in evt["args"] assert "Device Id" in evt["args"] assert "Bytes" in evt["args"] assert found_memory_events if torch.cuda.is_available(): create_cuda_tensor() stats = run_profiler(create_cuda_tensor) check_metrics(stats, "cuda_memory_usage", allocs=[ "test_user_scope_alloc", "aten::to", "aten::empty_strided", ], deallocs=[ "test_user_scope_dealloc", ]) check_metrics(stats, "cpu_memory_usage", allocs=[ "aten::rand", "aten::empty", ]) if torch._C.has_mkldnn: create_mkldnn_tensor() stats = run_profiler(create_mkldnn_tensor) check_metrics(stats, "cpu_memory_usage", allocs=[ "test_user_scope_alloc", "aten::rand", "aten::empty", "aten::to_mkldnn", ], deallocs=[ "test_user_scope_dealloc", ]) # check top-level memory events with _profile(profile_memory=True, use_kineto=kineto_available()) as prof: x = torch.rand(10, 10) del x if torch.cuda.is_available(): y = torch.rand(10, 10).cuda() del y gc.collect() stats = prof.key_averages(group_by_input_shape=True) check_metrics(stats, "cpu_memory_usage", allocs=["aten::rand", "aten::empty"], deallocs=["[memory]"]) if torch.cuda.is_available(): check_metrics(stats, "cuda_memory_usage", deallocs=["[memory]"])
def test_pickle_future(self): fut = Future() errMsg = "Can not pickle torch.futures.Future" with TemporaryFileName() as fname: with self.assertRaisesRegex(RuntimeError, errMsg): torch.save(fut, fname)
def test_module_hierarchy(self): class A(nn.Module): def __init__(self): super(A, self).__init__() def my_new_method(self, x): return x * 3 def forward_impl_(self, x, y): return self.my_new_method(x) + y def forward(self, x, y): y = y - 2 return self.forward_impl_(x, y) class B(nn.Module): def __init__(self): super(B, self).__init__() def forward(self, x): return x + 2 class C(nn.Module): def __init__(self): super(C, self).__init__() self.A0 = A() self.B0 = B() def call_b(self, x): return self.B0.forward(x) def forward(self, x, y): return self.A0.forward(x, y) + self.call_b(x) model = C() model = torch.jit.script(model) input_a = torch.rand(128, 128) input_b = torch.rand(128, 128) op_to_module_hierarchy = {} op_to_module_hierarchy["aten::sub"] = [ "TOP(C)::forward.A0(A)::forward." ] op_to_module_hierarchy["aten::mul"] = [ "TOP(C)::forward.A0(A)::forward.SELF(A)::forward_impl_.SELF(A)::my_new_method." ] op_to_module_hierarchy["aten::add"] = [ "TOP(C)::forward.A0(A)::forward.SELF(A)::forward_impl_.", "TOP(C)::forward.SELF(C)::call_b.B0(B)::forward.", "TOP(C)::forward." ] with TemporaryFileName(mode="w+") as fname: with profile( activities=[torch.profiler.ProfilerActivity.CPU], with_modules=True, ) as prof: model(input_a, input_b) prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: trace = json.load(f) assert "traceEvents" in trace events = trace["traceEvents"] found_memory_events = False for evt in events: assert "name" in evt if "args" in evt: op_name = evt["name"] if "Module Hierarchy" in evt["args"]: hierarchy = evt["args"]["Module Hierarchy"] if op_name in op_to_module_hierarchy: assert hierarchy in op_to_module_hierarchy[ op_name]
def test_source(self): """Checks that source code attribution works for eager, TS and autograd mode """ # avoid automatic inlining prev_opt = torch._C._get_graph_executor_optimize() torch._C._set_graph_executor_optimize(False) @torch.jit.script def ts_method_2(x, y): return torch.matmul(x, y) @torch.jit.script def ts_method_1(x, y, z): a = x + z w = ts_method_2(x, y) + a return w.sum() class DummyModule(nn.Module): def __init__(self): super(DummyModule, self).__init__() self.conv = torch.nn.Conv2d(3, 2, kernel_size=1, stride=2, padding=3, bias=False) def forward(self, x): return self.conv(x) mod = DummyModule() def call_module(x): return mod(x) with _profile(with_stack=True, use_kineto=kineto_available()) as p: x = torch.randn(10, 10, requires_grad=True) y = torch.randn(10, 10, requires_grad=True) z = x + y w = ts_method_1(x, y, z) v = 2 * w v.backward() a = torch.randn(2, 3, 2, 2, requires_grad=True) b = call_module(a) c = b.sum() c.backward() for e in p.function_events: if "aten::add" in e.name or "AddBackward" in e.name: self.assertTrue( any(["test_profiler" in entry for entry in e.stack])) self.assertTrue( any([("test_source" in entry or "ts_method_1" in entry or "ts_method_2" in entry) for entry in e.stack])) # TODO: https://github.com/pytorch/kineto/issues/617 if kineto_available() and not IS_WINDOWS: with TemporaryFileName(mode="w+") as fname: p.export_chrome_trace(fname) with io.open(fname, 'r') as f: events = json.load(f)["traceEvents"] def extract(pattern: str): matches = [ e for e in events if re.search(pattern, e["name"]) ] self.assertEqual(len(matches), 1, repr([e["name"] for e in matches])) return matches[0] module_event = extract(r"DummyModule_0") wrapper_event = extract(r"call_module") self.assertEqual(module_event["args"]["Python parent id"], wrapper_event["args"]["Python id"]) torch._C._set_graph_executor_optimize(prev_opt)