def test_cuda_assert_should_not_stop_common_distributed_test_suite(self, device): # test to ensure common_distributed.py override should not early terminate CUDA. stderr = TestCase.runWithPytorchAPIUsageStderr("""\ #!/usr/bin/env python import torch from torch.testing._internal.common_utils import (run_tests, slowTest) from torch.testing._internal.common_device_type import instantiate_device_type_tests from torch.testing._internal.common_distributed import MultiProcessTestCase class TestThatContainsCUDAAssertFailure(MultiProcessTestCase): @slowTest def test_throw_unrecoverable_cuda_exception(self, device): x = torch.rand(10, device=device) # cause unrecoverable CUDA exception, recoverable on CPU y = x[torch.tensor([25])].cpu() @slowTest def test_trivial_passing_test_case_on_cpu_cuda(self, device): x1 = torch.tensor([0., 1.], device=device) x2 = torch.tensor([0., 1.], device='cpu') self.assertEqual(x1, x2) instantiate_device_type_tests( TestThatContainsCUDAAssertFailure, globals(), only_for='cuda' ) if __name__ == '__main__': run_tests() """) # we are currently disabling CUDA early termination for distributed tests. self.assertIn('Ran 2 test', stderr)
def test_filtering_env_var(self): # Test environment variable selected device type test generator. test_filter_file_template = """\ #!/usr/bin/env python import torch from torch.testing._internal.common_utils import (TestCase, run_tests) from torch.testing._internal.common_device_type import instantiate_device_type_tests class TestEnvironmentVariable(TestCase): def test_trivial_passing_test(self, device): x1 = torch.tensor([0., 1.], device=device) x2 = torch.tensor([0., 1.], device='cpu') self.assertEqual(x1, x2) instantiate_device_type_tests( TestEnvironmentVariable, globals(), ) if __name__ == '__main__': run_tests() """ test_bases_count = len(get_device_type_test_bases()) # Test without setting env var should run everything. env = dict(os.environ) for k in [ 'IN_CI', PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY, PYTORCH_TESTING_DEVICE_EXCEPT_FOR_KEY ]: if k in env.keys(): del env[k] _, stderr = TestCase.run_process_no_exception( test_filter_file_template, env=env) self.assertIn(f'Ran {test_bases_count} test', stderr.decode('ascii')) # Test with setting only_for should only run 1 test. env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY] = 'cpu' _, stderr = TestCase.run_process_no_exception( test_filter_file_template, env=env) self.assertIn('Ran 1 test', stderr.decode('ascii')) # Test with setting except_for should run 1 less device type from default. del env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY] env[PYTORCH_TESTING_DEVICE_EXCEPT_FOR_KEY] = 'cpu' _, stderr = TestCase.run_process_no_exception( test_filter_file_template, env=env) self.assertIn(f'Ran {test_bases_count-1} test', stderr.decode('ascii')) # Test with setting both should throw exception env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY] = 'cpu' _, stderr = TestCase.run_process_no_exception( test_filter_file_template, env=env) self.assertNotIn('OK', stderr.decode('ascii'))
def test_cuda_assert_should_stop_test_suite(self, device): # This test is slow because it spawn another process to run another test suite. import subprocess import sys problematic_test_script = """\ #!/usr/bin/env python import torch from torch.testing._internal.common_utils import (TestCase, run_tests) from torch.testing._internal.common_device_type import instantiate_device_type_tests # This test is added to ensure that test suite terminates early when # CUDA assert was thrown since all subsequent test will fail. # See: https://github.com/pytorch/pytorch/issues/49019 # This test file should be invoked from test_testing.py class TestThatContainsCUDAAssertFailure(TestCase): def test_throw_unrecoverable_cuda_exception(self, device): x = torch.rand(10, device=device) # cause unrecoverable CUDA exception, recoverable on CPU y = x[torch.tensor([25])].cpu() def test_trivial_passing_test_case_on_cpu_cuda(self, device): x1 = torch.tensor([0., 1.], device=device) x2 = torch.tensor([0., 1.], device='cpu') self.assertEqual(x1, x2) instantiate_device_type_tests( TestThatContainsCUDAAssertFailure, globals(), except_for=None ) if __name__ == '__main__': run_tests() """ # Test running of cuda assert test suite should early terminate. p = subprocess.run([sys.executable, '-c', problematic_test_script], stderr=subprocess.PIPE, timeout=120) # should capture CUDA error self.assertIn('CUDA error: device-side assert triggered', p.stderr.decode('ascii')) # should run only 3 tests - 2 CPUs and 1 CUDA (remaining CUDA test should skip) self.assertIn('Ran 3 tests', p.stderr.decode('ascii'))
def test_cuda_assert_should_stop_test_suite(self, device): # This test is slow because it spawn another process to run another test suite. # Test running of cuda assert test suite should early terminate. stderr = TestCase.runWithPytorchAPIUsageStderr("""\ #!/usr/bin/env python import torch from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest) from torch.testing._internal.common_device_type import instantiate_device_type_tests # This test is added to ensure that test suite terminates early when # CUDA assert was thrown since all subsequent test will fail. # See: https://github.com/pytorch/pytorch/issues/49019 # This test file should be invoked from test_testing.py class TestThatContainsCUDAAssertFailure(TestCase): @slowTest def test_throw_unrecoverable_cuda_exception(self, device): x = torch.rand(10, device=device) # cause unrecoverable CUDA exception, recoverable on CPU y = x[torch.tensor([25])].cpu() @slowTest def test_trivial_passing_test_case_on_cpu_cuda(self, device): x1 = torch.tensor([0., 1.], device=device) x2 = torch.tensor([0., 1.], device='cpu') self.assertEqual(x1, x2) instantiate_device_type_tests( TestThatContainsCUDAAssertFailure, globals(), only_for='cuda' ) if __name__ == '__main__': run_tests() """) # should capture CUDA error self.assertIn('CUDA error: device-side assert triggered', stderr) # should run only 1 test because it throws unrecoverable error. self.assertIn('Ran 1 test', stderr)
def test_cuda_assert_should_stop_common_device_type_test_suite( self, device): # test to ensure common_device_type.py override has early termination for CUDA. stderr = TestCase.runWithPytorchAPIUsageStderr("""\ #!/usr/bin/env python import torch from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest) from torch.testing._internal.common_device_type import instantiate_device_type_tests class TestThatContainsCUDAAssertFailure(TestCase): @slowTest def test_throw_unrecoverable_cuda_exception(self, device): x = torch.rand(10, device=device) # cause unrecoverable CUDA exception, recoverable on CPU y = x[torch.tensor([25])].cpu() @slowTest def test_trivial_passing_test_case_on_cpu_cuda(self, device): x1 = torch.tensor([0., 1.], device=device) x2 = torch.tensor([0., 1.], device='cpu') self.assertEqual(x1, x2) instantiate_device_type_tests( TestThatContainsCUDAAssertFailure, globals(), only_for='cuda' ) if __name__ == '__main__': run_tests() """) # should capture CUDA error self.assertIn('CUDA error: device-side assert triggered', stderr) # should run only 1 test because it throws unrecoverable error. self.assertIn('Ran 1 test', stderr)
# 4d, inner dimensions Fortran x = torch.randn(r, o, n, m, device=device).transpose(-1, -2) check_single_nuclear_norm(x, axes) # 4d, inner dimensions non-contiguous x = torch.randn(r, o, n, 2 * m, device=device)[:, :, :, ::2] check_single_nuclear_norm(x, axes) # 4d, all dimensions non-contiguous x = torch.randn(7 * r, 5 * o, 11 * n, 2 * m, device=device)[::7, ::5, ::11, ::2] check_single_nuclear_norm(x, axes) @skipCUDAIfNoMagma def test_nuclear_norm_exceptions_old(self, device): for lst in [], [1], [1, 2]: x = torch.tensor(lst, dtype=torch.double, device=device) for axes in (), (0,): self.assertRaises(RuntimeError, torch.norm, x, "nuc", axes) self.assertRaises(IndexError, torch.norm, x, "nuc", (0, 1)) x = torch.tensor([[0, 1, 2], [3, 4, 5]], dtype=torch.double, device=device) self.assertRaisesRegex(RuntimeError, "duplicate or invalid", torch.norm, x, "nuc", (0, 0)) self.assertRaisesRegex(IndexError, "Dimension out of range", torch.norm, x, "nuc", (0, 2)) instantiate_device_type_tests(TestLinalg, globals()) if __name__ == '__main__': run_tests()
softmin=lambda *args, **kwargs: apply_masked_normalization_along_dim( torch.nn.functional.softmin, *args, **kwargs), ) masked_ops = [op for op in op_db if op.name.startswith('_masked.')] masked_ops_with_references = [ op for op in masked_ops if op.name.rsplit('.', 1)[-1] in reference_functions ] class TestMasked(TestCase): @onlyNativeDeviceTypes @suppress_warnings @ops(masked_ops_with_references) def test_reference_masked(self, device, dtype, op): ref_op = reference_functions[op.name.rsplit('.', 1)[-1]] sample_inputs = op.sample_inputs(device, dtype) for sample_input in sample_inputs: t_inp, t_args, t_kwargs = sample_input.input, sample_input.args, sample_input.kwargs actual = op.op(t_inp, *t_args, **t_kwargs) expected = ref_op(t_inp, *t_args, **t_kwargs) outmask = torch._masked._output_mask(op.op, t_inp, *t_args, **t_kwargs) actual = torch.where(outmask, actual, actual.new_zeros([])) expected = torch.where(outmask, expected, expected.new_zeros([])) self.assertEqual(actual, expected, exact_device=False) instantiate_device_type_tests(TestMasked, globals())
# Case 5: out= with correct shape and device, but a dtype # that output cannot be "safely" cast to (long). # Expected behavior: error. # NOTE: this case is filtered by dtype since some ops produce # bool tensors, for example, which can be safely cast to any # dtype. It is applied when single tensors are floating point or complex # dtypes, or if an op returns multiple tensors when at least one such # tensor is a floating point or complex dtype. _dtypes = floating_and_complex_types_and(torch.float16, torch.bfloat16) if (isinstance(expected, torch.Tensor) and expected.dtype in _dtypes or (not isinstance(expected, torch.Tensor) and any(t.dtype in _dtypes for t in expected))): def _case_five_transform(t): return make_tensor(t.shape, dtype=torch.long, device=t.device) out = _apply_out_transform(_case_five_transform, expected) msg_fail = "" if not isinstance(expected, torch.Tensor) else \ ("Expected RuntimeError when doing an unsafe cast from a result of dtype " f"{expected.dtype} into an out= with dtype torch.long") with self.assertRaises(RuntimeError, msg=msg_fail): op_out(out=out) instantiate_device_type_tests(TestOpInfo, globals()) instantiate_device_type_tests(TestGradients, globals()) instantiate_device_type_tests(TestCommon, globals()) if __name__ == '__main__': run_tests()
self.assertEqual(actual, tensors1) @onlyCUDA @dtypes(*torch.testing.get_all_fp_dtypes(include_half=False, include_bfloat16=False)) @ops(foreach_pointwise_op_db) def test_pointwise_op_tensors_on_different_devices(self, device, dtype, op): # tensors1: ['cuda', 'cpu] # tensors2: ['cuda', 'cpu] # tensors3: ['cuda', 'cpu] _cuda_tensors = op.sample_inputs(device, dtype, 3, same_size=True) _cpu_tensors = op.sample_inputs('cpu', dtype, 3, same_size=True) tensors1, tensors2, tensors3 = list( tensors for tensors in zip(_cuda_tensors, _cpu_tensors)) foreach_op, foreach_op_, native_op = op.method_variant, op.inplace_variant, op.ref actual = foreach_op(tensors1, tensors2, tensors3) expected = [native_op(*_cuda_tensors), native_op(*_cpu_tensors)] self.assertEqual(expected, actual) # note(mkozuki): Limiting dtypes to FP32&FP64, we can safely run inplace ops. foreach_op_(tensors1, tensors2, tensors3) self.assertEqual(expected, tensors1) instantiate_device_type_tests(TestForeach, globals()) if __name__ == '__main__': run_tests()
def test_cat_out_different_dtypes(self, device): out = torch.zeros(6, device=device, dtype=torch.int16) x = torch.tensor([1, 2, 3], device=device, dtype=torch.int8) y = torch.tensor([4, 5, 6], device=device, dtype=torch.int32) expected_out = torch.tensor([1, 2, 3, 4, 5, 6], device=device, dtype=torch.int16) torch.cat([x, y], out=out) self.assertEqual(out, expected_out, exact_dtype=True) z = torch.tensor([7, 8, 9], device=device, dtype=torch.int16) out = torch.zeros(9, device=device, dtype=torch.int64) expected_out = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], device=device, dtype=torch.int64) torch.cat([x, y, z], out=out) self.assertEqual(out, expected_out, exact_dtype=True) @onlyOnCPUAndCUDA def test_cat_invalid_dtype_promotion(self, device): out = torch.zeros(6, device=device, dtype=torch.int16) x = torch.tensor([1, 2, 3], device=device, dtype=torch.int16) y = torch.tensor([4, 5, 6], device=device, dtype=torch.float) with self.assertRaisesRegex(RuntimeError, 'can\'t be cast'): torch.cat([x, y], out=out) instantiate_device_type_tests(TestTypePromotion, globals()) if __name__ == '__main__': run_tests()
check_batched_grad=False, check_batched_forward_grad=check_batched_forward_grad) if op.supports_forward_ad: call_grad_test_helper() else: err_msg = r"Trying to use forward AD with .* that does not support it" hint_msg = ("Running forward AD for an OP that has does not support it did not " "raise any error. If your op supports forward AD, you should set supports_forward_ad=True") with self.assertRaisesRegex(NotImplementedError, err_msg, msg=hint_msg): call_grad_test_helper() @_gradcheck_ops(op_db) def test_forward_mode_AD(self, device, dtype, op): self._skip_helper(op, device, dtype) self._forward_grad_helper(device, dtype, op, op.get_op(), is_inplace=False) @_gradcheck_ops(op_db) def test_inplace_forward_mode_AD(self, device, dtype, op): self._skip_helper(op, device, dtype) if not op.inplace_variant or not op.supports_inplace_autograd: self.skipTest("Skipped! Operation does not support inplace autograd.") self._forward_grad_helper(device, dtype, op, self._get_safe_inplace(op.get_inplace()), is_inplace=True) instantiate_device_type_tests(TestGradients, globals()) if __name__ == '__main__': run_tests()
ind[-1] = 10 self.assertRaises(IndexError, a.__getitem__, ind) self.assertRaises(IndexError, a.__setitem__, ind, 0) ind = torch.ones(20, dtype=torch.int64, device=device) ind[0] = 11 self.assertRaises(IndexError, a.__getitem__, ind) self.assertRaises(IndexError, a.__setitem__, ind, 0) def test_index_is_larger(self, device): # Simple case of fancy index broadcasting of the index. a = torch.zeros((5, 5), device=device) a[[[0], [1], [2]], [0, 1, 2]] = tensor([2., 3., 4.], device=device) self.assertTrue((a[:3, :3] == tensor([2., 3., 4.], device=device)).all()) def test_broadcast_subspace(self, device): a = torch.zeros((100, 100), device=device) v = torch.arange(0., 100, device=device)[:, None] b = torch.arange(99, -1, -1, device=device).long() a[b] = v expected = b.double().unsqueeze(1).expand(100, 100) self.assertEqual(a, expected) instantiate_device_type_tests(TestIndexing, globals()) instantiate_device_type_tests(NumpyTests, globals()) if __name__ == '__main__': run_tests()
high=op.domain[1]) contig = contig[:1, :, :, :, :, :, :, :, :, :, :, :] contig2 = torch.empty(contig.size(), device=device, dtype=dtype) contig2.copy_(contig) self.assertTrue(contig.is_contiguous()) self.assertTrue(contig2.is_contiguous()) self.assertEqual(op(contig), op(contig2)) # Tests that computation on a multiple batches is the same as # per-batch computation. @ops(unary_ufuncs) def test_batch_vs_slicing(self, device, dtype, op): input = _make_tensor((1024, 512), dtype=dtype, device=device, low=op.domain[0], high=op.domain[1]) actual = op(input) expected = torch.stack([op(slice) for slice in input]) self.assertEqual(actual, expected) instantiate_device_type_tests(TestUnaryUfuncs, globals()) if __name__ == '__main__': run_tests()
initial_value = 1000 # some high number expected_result = [ np.full((2, 5), initial_value).tolist(), np.min(data, axis=0).tolist(), ] elif reduction == "sum": expected_result = [ np.full((2, 5), initial_value).tolist(), np.sum(data, axis=0).tolist(), ] for unsafe in [True, False]: self._test_common( reduction, device, val_dtype, unsafe, axis, initial_value, data, lengths, expected_result, expected_grad, check_backward, ) instantiate_device_type_tests(TestSegmentReductions, globals()) if __name__ == "__main__": run_tests()
kwargs = sample.kwargs copy_args = clone_to_device(args, test_device) r_exp = op(*copy_args, **kwargs) r_actual = op(*args, **kwargs) torch._lazy.mark_step() assert_allclose_rec((r_actual, r_exp)) torch._lazy.ir_cache.reset() torch._lazy.config.set_reuse_ir(False) # TODO: after we move to master, add Lazy as a new Device here: # https://github.com/pytorch/pytorch/blob/master/torch/testing/_internal/common_device_type.py#L532 instantiate_device_type_tests(TestLazyOpInfo, globals(), only_for="cpu") class TestLazyDynamicOps(TestCase): @classmethod def setUpClass(cls) -> None: # Setup the dynamic shape mode cls.old_ssa_mode = torch._C._lazy._get_symbolic_shape_mode() torch._C._lazy._set_symbolic_shape_mode(True) return super().setUpClass() @classmethod def tearDownClass(cls) -> None: torch._C._lazy._set_symbolic_shape_mode(cls.old_ssa_mode) return super().tearDownClass()
@ops([op for op in op_db if op.aten_name in custom_rules_works_list]) def test_custom_rules(self, device, dtype, op): self.custom_rules_test_base(device, dtype, op) @ops([op for op in op_db if op.aten_name in custom_rules_works_list]) def test_custom_rules_ints(self, device, dtype, op): # This is done because opinfos currently only runs on floats. # Return fn, inputs_fn for all if dtype == torch.float32: dtype = torch.int32 else: dtype = torch.int64 # Because ints are not always implemented, we need to allow for eager to fail self.custom_rules_test_base(device, dtype, op, allow_eager_fail=True) @expectedFailure @ops([ op for op in op_db if op.aten_name in custom_rules_expected_failure_list ]) def test_custom_rules_expected_failure(self, device, dtype, op): self.custom_rules_test_base(device, dtype, op) TestDtypeCustomRulesCPU = None # This creates TestDtypeCustomRulesCPU instantiate_device_type_tests(TestDtypeCustomRules, globals(), only_for=("cpu", ))
if op.aten_backward_name in decomposition_names or run_all: cotangents = tree_map(lambda x: torch.randn_like(x), decomp_out) decomposed.clear() with enable_torch_dispatch_mode(DecompCrossRefMode): decomp_vjp_fn(cotangents) if not run_all: check_decomposed(op.aten_backward_name) elif aten_name in decomposition_names or run_all: args = [sample_input.input] + list(sample_input.args) kwargs = sample_input.kwargs decomposed.clear() with enable_torch_dispatch_mode(DecompCrossRefMode): func(*args, **kwargs) if not run_all: check_decomposed(aten_name) else: assert op.supports_autograd self.skipTest( "only backwards is decomposed, but dtype doesn't support AD" ) instantiate_device_type_tests(TestDecomp, globals()) if __name__ == "__main__": run_tests()
# Tests that the alias functions perform the same operation as the original def _test_alias_computation(self, device, info=info): alias_op = info.alias_op original_op = info.original_op inp = info.get_input(device) args = info.get_args(device) alias_input = clone_inp(inp) alias_result = alias_op(alias_input, *args) original_input = clone_inp(inp) original_result = alias_op(original_input, *args) self.assertEqual(alias_input, original_input, atol=0, rtol=0) self.assertEqual(alias_result, original_result, atol=0, rtol=0) # Applies decorators for decorator in info.decorators: _test_alias_computation = decorator(_test_alias_computation) test_name = "test_alias_computation_" + info.alias_name setattr(cls, test_name, _test_alias_computation) create_alias_tests(TestOpNormalization) instantiate_device_type_tests(TestOpNormalization, globals()) if __name__ == '__main__': run_tests()
dst2 = tensor_nc.nonzero(as_tuple=False) self.assertEqual(dst1, dst2, atol=0, rtol=0) dst3 = torch.empty_like(dst1) data_ptr = dst3.data_ptr() # expect dst3 storage to be reused torch.nonzero(tensor, out=dst3) self.assertEqual(data_ptr, dst3.data_ptr()) self.assertEqual(dst1, dst3, atol=0, rtol=0) # discontiguous out dst4 = torch.empty(dst1.size(0), dst1.size(1) * 2, dtype=torch.long, device=device)[:, ::2] data_ptr = dst4.data_ptr() strides = dst4.stride() torch.nonzero(tensor, out=dst4) self.assertEqual(data_ptr, dst4.data_ptr()) self.assertEqual(dst1, dst4, atol=0, rtol=0) self.assertEqual(strides, dst4.stride()) def test_nonzero_non_diff(self, device): x = torch.randn(10, requires_grad=True) nz = x.nonzero() self.assertFalse(nz.requires_grad) instantiate_device_type_tests(TestShapeOps, globals()) if __name__ == '__main__': run_tests()
assert torch.allclose(ref, res) class TestAutocast(TestCase): @unittest.skipIf(not torch.cuda.is_available(), "CUDA is unavailable") @unittest.skipIf(not USE_TORCHVISION, "test requires torchvision") def test_autocast(self): mod = torchvision.models.resnet18().cuda() mod.train() x = torch.randn(16, 3, 32, 32, device="cuda") aot_mod = memory_efficient_fusion(mod) # Ensure that AOT Autograd works with AMP with torch.cuda.amp.autocast(True): res = aot_mod(x) res.sum().backward() only_for = ("cpu") instantiate_device_type_tests( TestPythonKey, globals(), only_for=only_for, ) instantiate_device_type_tests(TestEagerFusionOpInfo, globals(), only_for=only_for) if __name__ == '__main__': run_tests()
def test_isin_different_dtypes(self, device): supported_types = all_types() if device == 'cpu' else all_types_and(torch.half) for mult in [1, 10]: for assume_unique in [False, True]: for dtype1, dtype2 in product(supported_types, supported_types): a = torch.tensor([1, 2, 3], device=device, dtype=dtype1) b = torch.tensor([3, 4, 5] * mult, device=device, dtype=dtype2) ec = torch.tensor([False, False, True], device=device) c = torch.isin(a, b, assume_unique=assume_unique) self.assertEqual(c, ec) @onlyCUDA @dtypes(*all_types()) def test_isin_different_devices(self, device, dtype): a = torch.arange(6, device=device, dtype=dtype).reshape([2, 3]) b = torch.arange(3, 30, device='cpu', dtype=dtype) with self.assertRaises(RuntimeError): torch.isin(a, b) c = torch.arange(6, device='cpu', dtype=dtype).reshape([2, 3]) d = torch.arange(3, 30, device=device, dtype=dtype) with self.assertRaises(RuntimeError): torch.isin(c, d) instantiate_device_type_tests(TestSortAndSelect, globals()) if __name__ == '__main__': run_tests()
@unittest.skipIf(IS_WINDOWS, "NCCL doesn't support Windows") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @dtypes(*datatypes) def test_reduce_scatter(self, device, dtype): in_size = 32 * nGPUs out_size = 32 inputs = [ torch.zeros(in_size).uniform_().to(dtype=dtype) for i in range(nGPUs) ] expected = torch.zeros(in_size, dtype=dtype) for t in inputs: expected.add_(t) expected = expected.view(nGPUs, 32) inputs = [inputs[i].cuda(i) for i in range(nGPUs)] outputs = [ torch.zeros(out_size, device=i, dtype=dtype) for i in range(nGPUs) ] nccl.reduce_scatter(inputs, outputs) for i in range(nGPUs): self.assertEqual(outputs[i], expected[i]) instantiate_device_type_tests(TestNCCL, globals(), only_for='cuda') if __name__ == '__main__': run_tests()
def test_nested_tensor_mul_in_place(self, device, dtype): (nt1, nt2) = self.random_nt_pair(device, dtype, 4, (4, 4)) ref = torch.nested_tensor([t1 * t2 for (t1, t2) in zip(nt1.unbind(), nt2.unbind())]) nt1 *= nt2 self.nt_equal(ref, nt1) @dtypes(torch.float, torch.float16) @skipMeta @torch.inference_mode() def test_clone(self, device, dtype): nt1 = self.random_nt(device, dtype, 4, (4, 4), (1, 1)) nt2 = nt1.clone() # Verify the values match self.nt_equal(nt1, nt2) # Verify modifying nt2 doesn't affect nt1 nt2.mul_(nt1) ub1 = nt1.unbind() ub2 = nt2.unbind() for i in range(len(ub1)): self.assertNotEqual(ub1[i], ub2[i]) nt1.clone(memory_format=torch.preserve_format) msg = "clone_nested only supports memory format Preserve, but got ChannelsLast instead." with self.assertRaisesRegex(RuntimeError, msg): nt1.clone(memory_format=torch.channels_last) instantiate_device_type_tests(TestNestedTensorDeviceType, globals()) if __name__ == '__main__': run_tests()
with BytesIOContext() as f: torch.save(my_tensor, f) f.seek(0) new_tensor = torch.load(f) self.assertIsInstance(new_tensor, TestGetStateSubclass) self.assertEqual(new_tensor.elem, my_tensor.elem) self.assertEqual(new_tensor.foo, foo_val) self.assertTrue(new_tensor.reloaded) def test_tensor_subclass_deepcopy(self): wrapped_tensor = torch.rand(2) my_tensor = TestWrapperSubclass(wrapped_tensor) foo_val = "bar" my_tensor.foo = foo_val self.assertEqual(my_tensor.foo, foo_val) new_tensor = deepcopy(my_tensor) self.assertIsInstance(new_tensor, TestWrapperSubclass) self.assertEqual(new_tensor.elem, my_tensor.elem) self.assertEqual(new_tensor.foo, foo_val) instantiate_device_type_tests(TestBothSerialization, globals()) if __name__ == '__main__': run_tests()
from torch.testing._internal.common_device_type import instantiate_device_type_tests from torch.testing._internal.common_utils import TestCase class TestFoo(TestCase): def test_bar(self, device): pass instantiate_device_type_tests(TestFoo, globals(), only_for="cpu") class TestSpam(TestCase): def test_ham(self): pass
requires_grad=False) new_f = None for sample_input in sample_inputs_itr: args = [sample_input.input] + list(sample_input.args) kwargs = sample_input.kwargs new_f = make_fx(f)(args, kwargs) for arg in args: if isinstance(arg, torch.Tensor) and arg.dtype == torch.float: arg.uniform_(0, 1) try: old_out = f(args, kwargs) except Exception: continue new_out = new_f(args, kwargs) self.assertEqual(new_out, old_out) only_for = ("cpu") instantiate_device_type_tests( TestProxyTensor, globals(), only_for=only_for, ) instantiate_device_type_tests(TestProxyTensorOpInfo, globals(), only_for=only_for) if __name__ == '__main__': run_tests()
self.assertEqual(x1, x2) instantiate_device_type_tests( TestThatContainsCUDAAssertFailure, globals(), only_for='cuda' ) if __name__ == '__main__': run_tests() """) # we are currently disabling CUDA early termination for distributed tests. self.assertIn('Ran 2 test', stderr) instantiate_device_type_tests(TestTesting, globals()) class TestFrameworkUtils(TestCase): tests = [ 'super_long_test', 'long_test1', 'long_test2', 'normal_test1', 'normal_test2', 'normal_test3', 'short_test1', 'short_test2', 'short_test3', 'short_test4', 'short_test5',
args = [sample_input.input] + list(sample_input.args) kwargs = sample_input.kwargs with MetaCrossRefDispatchMode.push(self, dtype=dtype, device=device): expected = func(*args, **kwargs) if isinstance(expected, torch.Tensor) and op.supports_out: func(*args, **kwargs, out=expected) def test_empty_quantized(self): r = torch.empty(2**52, device='meta', dtype=torch.qint8) self.assertEqual(r.device.type, 'meta') instantiate_device_type_tests(TestMeta, globals()) def print_op_str_if_not_supported(op_str): op = OperatorName.parse(op_str) packet = getattr(torch.ops.aten, str(op.name)) overload = getattr(packet, op.overload_name if op.overload_name else "default") if any(overload in d for d in [meta_dispatch_skips, meta_dispatch_device_skips['cuda']]): print(f"{overload} # SKIP") if any(overload in d for d in [ meta_dispatch_expected_failures, meta_dispatch_device_expected_failures['cuda'] ]): print(overload)
namespace_basename = namespace.__name__.split('.')[-1] for module_name in namespace.modules.__all__: # class object for this module (e.g. torch.nn.Linear) module_cls = getattr(namespace.modules, module_name) if module_cls in MODULES_TO_SKIP: continue verify_kwargs = module_cls not in MODULES_WITHOUT_KWARGS_SUPPORT module_is_lazy = module_cls in LAZY_MODULES check_nonexistent_arg = module_cls not in MODULES_WITH_PREVIOUS_KWARGS # Generate a function for testing this module and setattr it onto the test class. run_test = generate_test_func(test_cls, module_cls, constructor_arg_db, verify_kwargs=verify_kwargs, module_is_lazy=module_is_lazy, check_nonexistent_arg=check_nonexistent_arg) test_name = f'test_{namespace_basename}_{module_name}' if module_cls in MODULES_THAT_REQUIRE_FBGEMM: run_test = skipIfNoFBGEMM(run_test) setattr(TestModuleInit, test_name, run_test) class TestModuleInit(TestCase): _ignore_not_implemented_error = False generate_tests(TestModuleInit, build_constructor_arg_db()) instantiate_device_type_tests(TestModuleInit, globals()) if __name__ == '__main__': run_tests()
tensors = (torch.tensor(3, dtype=dtype, device=device), torch.tensor([1, 0, -3], dtype=dtype, device=device), torch.tensor([[3, 0, -1], [3, 5, 4]], dtype=dtype, device=device)) for tensor in tensors: if dtype == torch.bfloat16: with self.assertRaises(TypeError): np_array = tensor.cpu().numpy() continue np_array = tensor.cpu().numpy() for t, a in product( (tensor.flatten()[0], tensor.flatten()[0].item()), (np_array.flatten()[0], np_array.flatten()[0].item())): self.assertEqual(t, a) if dtype == torch.complex64 and torch.is_tensor(t) and type( a) == np.complex64: # TODO: Imaginary part is dropped in this case. Need fix. # https://github.com/pytorch/pytorch/issues/43579 self.assertFalse(t == a) else: self.assertTrue(t == a) instantiate_device_type_tests(TestNumPyInterop, globals()) if __name__ == '__main__': run_tests()