def test_lengths_ops(self): LengthsTester().test( 'Lengths', hu.lengths_tensor(dtype=np.float32, min_value=1, max_value=10, allow_empty=True), REFERENCES_ALL)(self)
def test_lengths_ops(self): LengthsTester()._test( 'Lengths', hu.lengths_tensor(dtype=np.float32, min_value=1, max_value=5, allow_empty=True), REFERENCES_ALL + REFERENCES_LENGTHS_ONLY, )(self)
def test_lengths_ops(self): LengthsTester()._test( 'Lengths', hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True ), REFERENCES_ALL + REFERENCES_LENGTHS_ONLY, )(self)
def test_lengths_ops(self): LengthsTester().test( 'Lengths', hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=10, allow_empty=True ), REFERENCES_ALL )(self)
class TestLengthsPadOp(serial.SerializedTestCase): @serial.given( inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, ), delta_length=st.integers(0, 10), padding_value=st.floats(-10.0, 10.0), **hu.gcs ) def test_lengths_pad(self, inputs, delta_length, padding_value, gc, dc): data, lengths = inputs max_length = np.max(lengths) if len(lengths) > 0 else 0 target_length = max(max_length + delta_length, 1) def lengths_pad_op(data, lengths): N = len(lengths) output = np.ndarray( shape=(target_length * N, ) + data.shape[1:], dtype=np.float32) output.fill(padding_value) ptr1, ptr2 = 0, 0 for i in range(N): output[ptr1:ptr1 + lengths[i]] = data[ptr2:ptr2 + lengths[i]] ptr1 += target_length ptr2 += lengths[i] return [output] op = core.CreateOperator( "LengthsPad", ["data", "lengths"], ["data_padded"], target_length=target_length, padding_value=padding_value, ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[data, lengths], reference=lengths_pad_op, )
class TestUtilityOps(serial.SerializedTestCase): @given(X=hu.tensor(), args=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_slice(self, X, args, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end if args: op = core.CreateOperator( "Slice", ["X"], ["Y"], starts=starts, ends=ends, device_option=gc ) def slice_ref(X): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] inputs = [X] else: op = core.CreateOperator( "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc ) def slice_ref(x, starts, ends): slc = [slice(None)] * x.ndim slc[dim] = slice(slice_start, slice_end) return [x[slc]] inputs = [X, starts, ends] self.assertReferenceChecks(gc, op, inputs, slice_ref) self.assertDeviceChecks(dc, op, inputs, [0]) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=0, outputs_with_grads=[0], ) @given(ndims=st.integers(min_value=1, max_value=10), **hu.gcs) @settings(deadline=10000) def test_resize_like(self, ndims, gc, dc): X = np.zeros((ndims * 2, )) Y = np.zeros((ndims, 2)) op = core.CreateOperator( "ResizeLike", ["X", "Y"], ["Z"], ) def resize_like(X, Y): return [X.reshape(Y.shape)] self.assertDeviceChecks(dc, op, [X, Y], [0]) self.assertReferenceChecks(gc, op, [X, Y], resize_like, ensure_outputs_are_inferred=True) @given(dtype=st.sampled_from([np.float32, np.int32]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) @settings(deadline=10000) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): if (gc.device_type == caffe2_pb2.CUDA and engine == "CUDNN"): # cudnn 5.1 does not support int. assume(workspace.GetCuDNNVersion() >= 6000 or dtype != np.int32) dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator( "Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator( "Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes),) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator( "NanCheck", ["X", "other"], ["Y"] ) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @serial.given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator( "Max", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) @settings(deadline=10000) def test_elementwise_max_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.maximum(np.maximum(X, Y), Z) inputs = [mx, go, X, Y, Z] def max_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator( "MaxGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @serial.given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def min_op(X, Y, Z): return [np.minimum(np.minimum(X, Y), Z)] op = core.CreateOperator( "Min", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) @settings(deadline=10000) def test_elementwise_min_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.minimum(np.minimum(X, Y), Z) inputs = [mx, go, X, Y, Z] def min_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator( "MinGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given( n=st.integers(1, 8), m=st.integers(1, 10), d=st.integers(1, 4), in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), seed=st.integers(min_value=0, max_value=65535), dtype=st.sampled_from([np.int32, np.int64, np.float32]), **hu.gcs) @settings(deadline=10000) def test_sum( self, n, m, d, in_place, engine, seed, dtype, gc, dc): input_names = [] input_vars = [] np.random.seed(seed) for i in range(m): X_name = 'X' + str(i) input_names.extend([X_name]) var = np.random.rand(n, d).astype(dtype) vars()[X_name] = var input_vars.append(var) def sum_op_ref(*args): res = np.zeros((n, d)) for i in range(m): res = res + args[i] return (res, ) op = core.CreateOperator( "Sum", input_names, [input_names[0]] if in_place else ['Y'], engine=engine, ) self.assertReferenceChecks( device_option=gc, op=op, inputs=input_vars, reference=sum_op_ref, ) self.assertDeviceChecks(dc, op, input_vars, [0]) @given( inputs=hu.lengths_tensor().flatmap( lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), ) ).flatmap( lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays( tup[2], dtype=np.int32, elements=st.integers( min_value=0, max_value=len(tup[1]) - 1)), ) ), **hu.gcs_cpu_only) @settings(deadline=1000) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices))] op = core.CreateOperator( "LengthsGather", ["items", "lengths", "indices"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given( inputs=hu.lengths_tensor(), **hu.gcs_cpu_only) @settings(deadline=1000) def test_lengths_to_ranges(self, inputs, gc, dc): _, lengths = inputs def lengths_to_ranges_op(lengths): return [ [[x, y] for x, y in zip(np.cumsum(np.append([0], lengths)), lengths)] ] op = core.CreateOperator( "LengthsToRanges", ["lengths"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[lengths], reference=lengths_to_ranges_op, ) # Test shape inference logic net = core.Net("test_shape_inference") workspace.FeedBlob("lengths", lengths) output = net.LengthsToRanges( ["lengths"], ["output"] ) (shapes, types) = workspace.InferShapesAndTypes([net]) workspace.RunNetOnce(net) self.assertEqual(shapes[output], list(workspace.blobs[output].shape)) self.assertEqual(shapes[output], list(lengths.shape) + [2]) self.assertEqual(types[output], core.DataType.INT32) @given(**hu.gcs) @settings(deadline=None, max_examples=50) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator( "Size", ["X"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, ) def test_alias_op(self): """ Don't use hypothesis because there are only 2 cases to check""" for size in [0, 5]: X = np.arange(size).astype(np.float32) workspace.FeedBlob('X', X) op = core.CreateOperator( "Alias", ["X"], ["Y"] ) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') np.testing.assert_array_equal(X, Y) @given(**hu.gcs) @settings(deadline=10000) def test_range(self, gc, dc): names = [ ('stop_',), ('start_', 'stop_'), ('start_', 'stop_', 'step_'), ] # Most random values aren't great here, so use a fixed set instead of # hypothesis. for inputs in ( (10,), (np.float32(10.0),), (0,), (0, 0), (10., 5.0, -1.), (2, 10000), (2, 10000, 20000), (2, 10000, -1), ): inputs = [np.array(v) for v in inputs] op = core.CreateOperator( "Range", names[len(inputs) - 1], ["Y"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], ) self.assertDeviceChecks(dc, op, inputs, [0]) inputs = (np.array(0), np.array(10), np.array(0)) op = core.CreateOperator( "Range", names[len(inputs) - 1], ["Y"] ) with six.assertRaisesRegex(self, RuntimeError, 'Step size cannot be 0'): self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], )
class TorchIntegration(hu.HypothesisTestCase): @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10), num_classes=st.integers(1, 10), rotated=st.booleans(), angle_bound_on=st.booleans(), clip_angle_thresh=st.sampled_from([-1.0, 1.0]), **hu.gcs_cpu_only) def test_bbox_transform( self, roi_counts, num_classes, rotated, angle_bound_on, clip_angle_thresh, gc, dc, ): """ Test with rois for multiple images in a batch """ rois, deltas, im_info = create_bbox_transform_inputs( roi_counts, num_classes, rotated) def bbox_transform_ref(): ref_op = core.CreateOperator( "BBoxTransform", ["rois", "deltas", "im_info"], ["box_out"], apply_scale=False, rotated=rotated, angle_bound_on=angle_bound_on, clip_angle_thresh=clip_angle_thresh, ) workspace.FeedBlob("rois", rois) workspace.FeedBlob("deltas", deltas) workspace.FeedBlob("im_info", im_info) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("box_out") box_out = torch.tensor(bbox_transform_ref()) a, b = torch.ops._caffe2.BBoxTransform( torch.tensor(rois), torch.tensor(deltas), torch.tensor(im_info), [1.0, 1.0, 1.0, 1.0], False, rotated, angle_bound_on, -90, 90, clip_angle_thresh, legacy_plus_one=True, ) torch.testing.assert_allclose(box_out, a) @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10), num_classes=st.integers(1, 10), rotated=st.booleans(), angle_bound_on=st.booleans(), clip_angle_thresh=st.sampled_from([-1.0, 1.0]), **hu.gcs_cpu_only) def test_box_with_nms_limits( self, roi_counts, num_classes, rotated, angle_bound_on, clip_angle_thresh, gc, dc, ): rotated = False # FIXME remove this after rotation is supported rois, deltas, im_info = create_bbox_transform_inputs( roi_counts, num_classes, rotated) pred_bbox, batch_splits = [ t.detach().numpy() for t in torch.ops._caffe2.BBoxTransform( torch.tensor(rois), torch.tensor(deltas), torch.tensor(im_info), [1.0, 1.0, 1.0, 1.0], False, rotated, angle_bound_on, -90, 90, clip_angle_thresh, legacy_plus_one=True, ) ] class_prob = np.random.randn(sum(roi_counts), num_classes).astype(np.float32) score_thresh = 0.5 nms_thresh = 0.5 topk_per_image = sum(roi_counts) / 2 def box_with_nms_limit_ref(): input_blobs = ["class_prob", "pred_bbox", "batch_splits"] output_blobs = [ "score_nms", "bbox_nms", "class_nms", "batch_splits_nms", "keeps_nms", "keeps_size_nms", ] ref_op = core.CreateOperator( "BoxWithNMSLimit", input_blobs, output_blobs, score_thresh=float(score_thresh), nms=float(nms_thresh), detections_per_im=int(topk_per_image), soft_nms_enabled=False, soft_nms_method="linear", soft_nms_sigma=0.5, soft_nms_min_score_thres=0.001, rotated=rotated, ) workspace.FeedBlob("class_prob", class_prob) workspace.FeedBlob("pred_bbox", pred_bbox) workspace.FeedBlob("batch_splits", batch_splits) workspace.RunOperatorOnce(ref_op) return (workspace.FetchBlob(b) for b in output_blobs) output_refs = box_with_nms_limit_ref() outputs = torch.ops._caffe2.BoxWithNMSLimit( torch.tensor(class_prob), torch.tensor(pred_bbox), torch.tensor(batch_splits), score_thresh=float(score_thresh), nms=float(nms_thresh), detections_per_im=int(topk_per_image), soft_nms_enabled=False, soft_nms_method="linear", soft_nms_sigma=0.5, soft_nms_min_score_thres=0.001, rotated=rotated, cls_agnostic_bbox_reg=False, input_boxes_include_bg_cls=True, output_classes_include_bg_cls=True, legacy_plus_one=True, ) for o, o_ref in zip(outputs, output_refs): torch.testing.assert_allclose(o, o_ref) @given( A=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), img_count=st.integers(min_value=3, max_value=3), ) def test_generate_proposals(self, A, H, W, img_count): scores = np.ones((img_count, A, H, W)).astype(np.float32) bbox_deltas = (np.linspace(0, 10, num=img_count * 4 * A * H * W).reshape( (img_count, 4 * A, H, W)).astype(np.float32)) im_info = np.ones((img_count, 3)).astype(np.float32) / 10 anchors = np.ones((A, 4)).astype(np.float32) def generate_proposals_ref(): ref_op = core.CreateOperator( "GenerateProposals", ["scores", "bbox_deltas", "im_info", "anchors"], ["rois", "rois_probs"], spatial_scale=2.0, ) workspace.FeedBlob("scores", scores) workspace.FeedBlob("bbox_deltas", bbox_deltas) workspace.FeedBlob("im_info", im_info) workspace.FeedBlob("anchors", anchors) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("rois"), workspace.FetchBlob( "rois_probs") rois, rois_probs = generate_proposals_ref() rois = torch.tensor(rois) rois_probs = torch.tensor(rois_probs) a, b = torch.ops._caffe2.GenerateProposals( torch.tensor(scores), torch.tensor(bbox_deltas), torch.tensor(im_info), torch.tensor(anchors), 2.0, 6000, 300, 0.7, 16, True, -90, 90, 1.0, legacy_plus_one=True, ) torch.testing.assert_allclose(rois, a) torch.testing.assert_allclose(rois_probs, b) @given( bsz=st.integers(1, 5), seq_lens=st.integers(1, 6), emb_lens=st.integers(5, 10), hidden_size=st.integers(3, 7), num_layers=st.integers(1, 4), has_biases=st.booleans(), is_bidirectional=st.booleans(), batch_first=st.booleans(), ) def test_inference_lstm( self, bsz, seq_lens, emb_lens, hidden_size, num_layers, has_biases, is_bidirectional, batch_first, ): num_directions = 2 if is_bidirectional else 1 hx = np.zeros((num_layers * num_directions, bsz, hidden_size), dtype=np.float32) if batch_first: inputs = np.random.randn(bsz, seq_lens, emb_lens).astype(np.float32) else: inputs = np.random.randn(seq_lens, bsz, emb_lens).astype(np.float32) torch_lstm = torch.nn.LSTM( emb_lens, hidden_size, batch_first=batch_first, bidirectional=is_bidirectional, bias=has_biases, num_layers=num_layers, ) def inference_lstm_ref(): input_names = ["inputs", "hidden_0", "hidden_1"] workspace.FeedBlob("inputs", inputs) workspace.FeedBlob("hidden_0", hx) workspace.FeedBlob("hidden_1", hx) for i, param in enumerate(torch_lstm._flat_weights): input_names.append("param_{}".format(i)) workspace.FeedBlob("param_{}".format(i), param.detach().numpy()) ref_op = core.CreateOperator( "InferenceLSTM", input_names, ["output", "hidden", "cell"], num_layers=num_layers, has_biases=has_biases, batch_first=batch_first, bidirectional=is_bidirectional, ) workspace.RunOperatorOnce(ref_op) return (workspace.FetchBlob("output"), workspace.FetchBlob("hidden"), workspace.FetchBlob("cell")) output, hidden, cell = inference_lstm_ref() output = torch.tensor(output) hidden = torch.tensor(hidden) cell = torch.tensor(cell) lstm_in = [ torch.from_numpy(inputs), torch.from_numpy(hx), torch.from_numpy(hx), ] + [param.detach() for param in torch_lstm._flat_weights] a, b, c = torch.ops._caffe2.InferenceLSTM(lstm_in, num_layers, has_biases, batch_first, is_bidirectional) torch.testing.assert_allclose(output, a) torch.testing.assert_allclose(hidden, b) torch.testing.assert_allclose(cell, c) # Test case is using workspace.has_cuda_support and not workspace.has_gpu_support # to exclude it from HIP because tensor interop doesn't work for HIP tensors yet @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") @given( A=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), img_count=st.integers(min_value=3, max_value=3), ) def test_generate_proposals_cuda(self, A, H, W, img_count): scores = np.ones((img_count, A, H, W)).astype(np.float32) bbox_deltas = (np.linspace(0, 10, num=img_count * 4 * A * H * W).reshape( (img_count, 4 * A, H, W)).astype(np.float32)) im_info = np.ones((img_count, 3)).astype(np.float32) / 10 anchors = np.ones((A, 4)).astype(np.float32) def generate_proposals_ref(): ref_op = core.CreateOperator( "GenerateProposals", ["scores", "bbox_deltas", "im_info", "anchors"], ["rois", "rois_probs"], spatial_scale=2.0, ) workspace.FeedBlob("scores", scores) workspace.FeedBlob("bbox_deltas", bbox_deltas) workspace.FeedBlob("im_info", im_info) workspace.FeedBlob("anchors", anchors) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("rois"), workspace.FetchBlob( "rois_probs") rois, rois_probs = generate_proposals_ref() rois = torch.tensor(rois) rois_probs = torch.tensor(rois_probs) a, b = torch.ops._caffe2.GenerateProposals( torch.tensor(scores).cuda(), torch.tensor(bbox_deltas).cuda(), torch.tensor(im_info).cuda(), torch.tensor(anchors).cuda(), 2.0, 6000, 300, 0.7, 16, True, -90, 90, 1.0, legacy_plus_one=True, ) torch.testing.assert_allclose(rois, a.cpu()) torch.testing.assert_allclose(rois_probs, b.cpu()) @given( N=st.integers(min_value=1, max_value=2), C=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), ) def _test_roi_align(self, N, C, H, W, device): def rand_roi(): return np.array([ float(int(N * np.random.rand())), 0.5 * np.random.rand() * W, 0.5 * np.random.rand() * H, (0.5 + 0.5 * np.random.rand()) * W, (0.5 + 0.5 * np.random.rand()) * H, ]).astype(np.float32) feature = np.random.randn(N, C, H, W).astype(np.float32) rois = np.array([rand_roi() for _ in range(10)]) def roi_align_ref(_feature, _rois): ref_op = core.CreateOperator( "RoIAlign", ["feature", "rois"], ["roi_feature"], spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, ) workspace.FeedBlob("feature", _feature) workspace.FeedBlob("rois", _rois) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("roi_feature") roi_feature_ref = roi_align_ref(feature, rois) roi_feature = torch.ops._caffe2.RoIAlign( torch.Tensor(feature).to(device), torch.Tensor(rois).to(device), order="NCHW", spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, aligned=False, ) torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu()) def test_roi_align_cpu(self): self._test_roi_align(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_roi_align_cuda(self): self._test_roi_align(device="cuda") @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10)) def test_collect_and_distribute_fpn_rpn_proposals_op(self, roi_counts): batch_size = len(roi_counts) im_dims = np.random.randint(100, 600, batch_size) rpn_rois_and_scores = [] for i in range(5): rpn_rois_and_scores.append( torch.Tensor(generate_rois(roi_counts, im_dims))) for i in range(5): rpn_rois_and_scores.append(torch.rand(sum(roi_counts))) rois = torch.ops._caffe2.CollectRpnProposals( rpn_rois_and_scores, rpn_max_level=6, rpn_min_level=2, rpn_post_nms_topN=sum(roi_counts), ) fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( rois, roi_canonical_scale=224, roi_canonical_level=4, roi_max_level=5, roi_min_level=2, legacy_plus_one=True, ) all_outputs = torch.ops._caffe2.CollectAndDistributeFpnRpnProposals( rpn_rois_and_scores, roi_canonical_scale=224, roi_canonical_level=4, roi_max_level=5, roi_min_level=2, rpn_max_level=6, rpn_min_level=2, rpn_post_nms_topN=sum(roi_counts), legacy_plus_one=True, ) rois_fpn_list = fpn_outputs[:-1] rois_idx_restore_int32 = fpn_outputs[-1] # [rois] + fpn_outputs should be equal to all_outputs torch.testing.assert_allclose(rois, all_outputs[0]) for x, y in zip(fpn_outputs, all_outputs[1:]): torch.testing.assert_allclose(x, y) @given(X=hu.tensor(), fast_gelu=st.booleans()) def _test_gelu_op(self, X, fast_gelu, device): def _gelu_ref(_X): return (_X * norm.cdf(_X).astype(np.float32), ) expected_output, = _gelu_ref(X) actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu) rtol = 1e-3 if fast_gelu else 1e-4 atol = 1e-5 torch.testing.assert_allclose(expected_output, actual_output.cpu(), rtol=rtol, atol=atol) def test_gelu_op(self): self._test_gelu_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_gelu_op_cuda(self): self._test_gelu_op(device="cuda") @given(inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, )) def _test_lengths_op(self, inputs, ref_op_name, torch_op, device): data, lengths = inputs def _lengths_ref(X, Y): ref_op = core.CreateOperator(ref_op_name, ["X", "Y"], "out") workspace.FeedBlob("X", X) workspace.FeedBlob("Y", Y) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("out") expected_output = _lengths_ref(data, lengths) actual_output = torch_op(torch.tensor(data), torch.tensor(lengths, dtype=torch.int32)) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def _test_lengths_sum_op(self, device): self._test_lengths_op("LengthsSum", torch.ops._caffe2.LengthsSum, device) def test_lengths_sum_op(self): self._test_lengths_sum_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_sum_op_cuda(self): self._test_lengths_sum_op(device="cuda") def _test_lengths_mean_op(self, device): self._test_lengths_op("LengthsMean", torch.ops._caffe2.LengthsMean, device) def test_lengths_mean_op(self): self._test_lengths_mean_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_mean_op_cuda(self): self._test_lengths_mean_op(device="cuda") def _test_lengths_max_op(self, device): self._test_lengths_op("LengthsMax", torch.ops._caffe2.LengthsMax, device) def test_lengths_max_op(self): self._test_lengths_max_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_max_op_cuda(self): self._test_lengths_max_op(device="cuda") def _test_resize_nearest_op(self, device): data = np.random.rand(1, 2, 3, 4).astype(np.float32) def _resize_nearest_ref(X): ref_op = core.CreateOperator( "ResizeNearest", ["X"], ["Y"], width_scale=2.0, height_scale=1.5, order="NCHW", ) workspace.FeedBlob("X", X) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _resize_nearest_ref(data) actual_output = torch.ops._caffe2.ResizeNearest( torch.tensor(data).to(device), order="NCHW", width_scale=2.0, height_scale=1.5, ) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def test_resize_nearest_op_cpu(self): return self._test_resize_nearest_op("cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_resize_nearest_op_cuda(self): return self._test_resize_nearest_op("cuda")
class TestSegmentOps(hu.HypothesisTestCase): def test_sorted_segment_ops(self): SegmentsTester()._test( 'SortedSegment', hu.segmented_tensor( dtype=np.float32, is_sorted=True, allow_empty=True ), REFERENCES_ALL + REFERENCES_SORTED )(self) def test_unsorted_segment_ops(self): SegmentsTester()._test( 'UnsortedSegment', hu.segmented_tensor( dtype=np.float32, is_sorted=False, allow_empty=True ), REFERENCES_ALL, )(self) def test_unsorted_segment_ops_gpu(self): SegmentsTester()._test( 'UnsortedSegment', hu.segmented_tensor( dtype=np.float32, is_sorted=False, allow_empty=True, ), REFERENCES_ALL, gpu=workspace.has_gpu_support, grad_check=False, )(self) def test_sparse_sorted_segment_ops(self): SegmentsTester()._test( 'SparseSortedSegment', hu.sparse_segmented_tensor( dtype=np.float32, is_sorted=True, allow_empty=True ), REFERENCES_ALL )(self) def test_sparse_unsorted_segment_ops(self): SegmentsTester()._test( 'SparseUnsortedSegment', hu.sparse_segmented_tensor( dtype=np.float32, is_sorted=False, allow_empty=True ), REFERENCES_ALL )(self) def test_lengths_ops(self): LengthsTester()._test( 'Lengths', hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True ), REFERENCES_ALL + REFERENCES_LENGTHS_ONLY, )(self) def test_sparse_lengths_ops(self): for itype in [np.int32, np.int64]: LengthsTester()._test( 'SparseLengths', hu.sparse_lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, itype=itype, ), REFERENCES_ALL, )(self) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") @given(**hu.gcs) def test_unsorted_sums_large(self, gc, dc): X = np.random.rand(10000, 32, 12).astype(np.float32) segments = np.random.randint(0, 10000, size=10000).astype(np.int32) op = core.CreateOperator("UnsortedSegmentSum", ["X", "segments"], "out") self.assertDeviceChecks(dc, op, [X, segments], [0]) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") @given(**hu.gcs) def test_sorted_segment_range_mean(self, gc, dc): X = np.random.rand(6, 32, 12).astype(np.float32) segments = np.array([0, 0, 1, 1, 2, 3]).astype(np.int32) op = core.CreateOperator( "SortedSegmentRangeMean", ["X", "segments"], "out" ) self.assertDeviceChecks(dc, op, [X, segments], [0]) self.assertGradientChecks(gc, op, [X, segments], 0, [0]) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") @given(**hu.gcs) def test_sorted_segment_range_log_mean_exp(self, gc, dc): X = np.random.rand(7, 32, 12).astype(np.float32) segments = np.array([0, 0, 1, 1, 2, 2, 3]).astype(np.int32) op = core.CreateOperator( "SortedSegmentRangeLogMeanExp", ["X", "segments"], "out" ) self.assertDeviceChecks(dc, op, [X, segments], [0]) self.assertGradientChecks(gc, op, [X, segments], 0, [0]) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") @given(**hu.gcs) def test_unsorted_means_large(self, gc, dc): X = np.random.rand(10000, 31, 19).astype(np.float32) segments = np.random.randint(0, 10000, size=10000).astype(np.int32) op = core.CreateOperator("UnsortedSegmentMean", ["X", "segments"], "out") self.assertDeviceChecks(dc, op, [X, segments], [0]) @given( inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, ), **hu.gcs ) def test_lengths_sum(self, inputs, gc, dc): X, Y = inputs op = core.CreateOperator("LengthsSum", ["X", "Y"], "out") def ref(D, L): R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype) line = 0 for g in range(L.size): for _ in range(L[g]): if len(D.shape) > 1: R[g, :] += D[line, :] else: R[g] += D[line] line += 1 return [R] self.assertReferenceChecks(gc, op, [X, Y], ref) self.assertDeviceChecks(dc, op, [X, Y], [0]) self.assertGradientChecks(gc, op, [X, Y], 0, [0]) @given( inputs=hu.sparse_lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True ), **hu.gcs ) def test_sparse_lengths_sum(self, inputs, gc, dc): X, Y, Z = inputs op = core.CreateOperator("SparseLengthsSum", ["X", "Y", "Z"], "out") def ref(D, I, L): R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype) line = 0 for g in range(L.size): for _ in range(L[g]): if len(D.shape) > 1: R[g, :] += D[I[line], :] else: R[g] += D[I[line]] line += 1 return [R] self.assertReferenceChecks(gc, op, [X, Y, Z], ref) self.assertDeviceChecks(dc, op, [X, Y, Z], [0]) self.assertGradientChecks(gc, op, [X, Y, Z], 0, [0]) @given( inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, ), **hu.gcs ) def test_lengths_mean(self, inputs, gc, dc): X, Y = inputs op = core.CreateOperator("LengthsMean", ["X", "Y"], "out") def ref(D, L): R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype) line = 0 for g in range(L.size): for _ in range(L[g]): if len(D.shape) > 1: R[g, :] += D[line, :] else: R[g] += D[line] line += 1 if L[g] > 1: if len(D.shape) > 1: R[g, :] = R[g, :] / L[g] else: R[g] = R[g] / L[g] return [R] self.assertReferenceChecks(gc, op, [X, Y], ref) self.assertDeviceChecks(dc, op, [X, Y], [0]) self.assertGradientChecks(gc, op, [X, Y], 0, [0]) @given( inputs=hu.sparse_lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True ), **hu.gcs ) def test_sparse_lengths_mean(self, inputs, gc, dc): X, Y, Z = inputs op = core.CreateOperator("SparseLengthsMean", ["X", "Y", "Z"], "out") def ref(D, I, L): R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype) line = 0 for g in range(L.size): for _ in range(L[g]): if len(D.shape) > 1: R[g, :] += D[I[line], :] else: R[g] += D[I[line]] line += 1 if L[g] > 1: if len(D.shape) > 1: R[g, :] = R[g, :] / L[g] else: R[g] = R[g] / L[g] return [R] self.assertReferenceChecks(gc, op, [X, Y, Z], ref) self.assertDeviceChecks(dc, op, [X, Y, Z], [0]) self.assertGradientChecks(gc, op, [X, Y, Z], 0, [0]) @given( grad_on_weights=st.booleans(), inputs=hu.sparse_lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True ), seed=st.integers(min_value=0, max_value=100), **hu.gcs ) def test_sparse_lengths_weighted_sum( self, grad_on_weights, inputs, seed, gc, dc): D, I, L = inputs np.random.seed(int(seed)) W = np.random.rand(I.size).astype(np.float32) op = core.CreateOperator( "SparseLengthsWeightedSum", ["D", "W", "I", "L"], "out", grad_on_weights=grad_on_weights) self.assertDeviceChecks(dc, op, [D, W, I, L], [0]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[D, W, I, L], reference=sparse_lengths_weighted_sum_ref, threshold=1e-4, output_to_grad='out', grad_reference=partial( sparse_lengths_weighted_sum_grad_ref, grad_on_weights=grad_on_weights), ) self.assertGradientChecks(gc, op, [D, W, I, L], 0, [0]) if grad_on_weights: self.assertGradientChecks(gc, op, [D, W, I, L], 1, [0]) @given(**hu.gcs) def test_sparse_lengths_indices_in_gradient_sum_gpu(self, gc, dc): X = np.random.rand(3, 3, 4, 5).astype(np.float32) Y = np.asarray([3, 3, 2]).astype(np.int32) Z = np.random.randint(0, 50, size=8).astype(np.int64) op = core.CreateOperator( "SparseLengthsIndicesInGradientSumGradient", ["X", "Y", "Z"], "out" ) self.assertDeviceChecks(dc, op, [X, Y, Z], [0]) @given(**hu.gcs) def test_sparse_lengths_indices_in_gradient_mean_gpu(self, gc, dc): X = np.random.rand(3, 3, 4, 5).astype(np.float32) Y = np.asarray([3, 3, 2]).astype(np.int32) Z = np.random.randint(0, 50, size=8).astype(np.int64) op = core.CreateOperator( "SparseLengthsIndicesInGradientMeanGradient", ["X", "Y", "Z"], "out" ) self.assertDeviceChecks(dc, op, [X, Y, Z], [0]) @given(**hu.gcs_cpu_only) def test_legacy_sparse_and_lengths_sum_gradient(self, gc, dc): X = np.random.rand(3, 64).astype(np.float32) Y = np.asarray([20, 20, 10]).astype(np.int32) workspace.FeedBlob("X", X) workspace.FeedBlob("Y", Y) test_net = core.Net("test_net") test_net.SparseLengthsSumGradient(["X", "Y"], "out1") test_net.LengthsSumGradient(["X", "Y"], "out2") workspace.RunNetOnce(test_net) out1 = workspace.FetchBlob("out1") out2 = workspace.FetchBlob("out2") self.assertTrue((out1 == out2).all()) @given(**hu.gcs) def test_sparse_lengths_sum_invalid_index(self, gc, dc): D = np.random.rand(50, 3, 4, 5).astype(np.float32) I = (np.random.randint(0, 10000, size=10) + 10000).astype(np.int64) L = np.asarray([4, 4, 2]).astype(np.int32) op = core.CreateOperator( "SparseLengthsSum", ["D", "I", "L"], "out") workspace.FeedBlob('D', D) workspace.FeedBlob('I', I) workspace.FeedBlob('L', L) with self.assertRaises(RuntimeError): workspace.RunOperatorOnce(op) @given(**hu.gcs_cpu_only) def test_sparse_lengths_positional_weighted_sum( self, gc, dc): D = np.random.rand(50, 3, 4, 5).astype(np.float32) W = np.random.rand(50).astype(np.float32) indices = np.random.randint(0, 50, size=10).astype(np.int64) L = np.asarray([4, 4, 2]).astype(np.int32) op = core.CreateOperator( "SparseLengthsPositionalWeightedSum", ["D", "W", "indices", "L"], "out") def ref_sparse(D, W, indices, L): workspace.FeedBlob("L", L) lengths_range_fill_op = core.CreateOperator( "LengthsRangeFill", ["L"], ["L_pos_seq"]) workspace.RunOperatorOnce(lengths_range_fill_op) workspace.FeedBlob("W", W) gather_op = core.CreateOperator( "Gather", ["W", "L_pos_seq"], ["W_gathered"]) workspace.RunOperatorOnce(gather_op) workspace.FeedBlob("D", D) workspace.FeedBlob("indices", indices) sparse_op = core.CreateOperator( "SparseLengthsWeightedSum", ["D", "W_gathered", "indices", "L"], "out_ref") workspace.RunOperatorOnce(sparse_op) return (workspace.FetchBlob("out_ref"),) self.assertReferenceChecks( gc, op, [D, W, indices, L], ref_sparse)
class TestConcatSplitOps(hu.HypothesisTestCase): @given(tensor_splits=_tensor_splits(), **hu.gcs) def test_concat(self, tensor_splits, gc, dc): axis, _, splits = tensor_splits op = core.CreateOperator( "Concat", ['X_{}'.format(i) for i in range(len(splits))], ['concat_result', 'split_info'], axis=axis) self.assertReferenceChecks( gc, op, splits, lambda *splits: (np.concatenate(splits, axis=axis), np.array([a.shape[axis] for a in splits]))) self.assertDeviceChecks(dc, op, splits, [0, 1]) self.assertGradientChecks(gc, op, splits, 0, [0]) @given(tensor_splits=_tensor_splits(add_axis=True), **hu.gcs) def test_concat_add_axis(self, tensor_splits, gc, dc): axis, _, splits = tensor_splits op = core.CreateOperator( "Concat", ['X_{}'.format(i) for i in range(len(splits))], ['concat_result', 'split_info'], axis=axis, add_axis=1) self.assertReferenceChecks( gc, op, splits, lambda *splits: (np.concatenate([np.expand_dims(a, axis) for a in splits], axis=axis), np.array([1] * len(splits)))) self.assertDeviceChecks(dc, op, splits, [0, 1]) for i in range(len(splits)): self.assertGradientChecks(gc, op, splits, i, [0]) @given(tensor_splits=_tensor_splits(), split_as_arg=st.booleans(), **hu.gcs) def test_split(self, tensor_splits, split_as_arg, gc, dc): axis, split_info, splits = tensor_splits split_as_arg = True if split_as_arg: input_names = ['input'] input_tensors = [np.concatenate(splits, axis=axis)] kwargs = dict(axis=axis, split=split_info) else: input_names = ['input', 'split'] input_tensors = [np.concatenate(splits, axis=axis), split_info] kwargs = dict(axis=axis) op = core.CreateOperator( "Split", input_names, ['X_{}'.format(i) for i in range(len(split_info))], **kwargs) def split_ref(input, split=split_info): s = np.cumsum([0] + list(split)) return [ np.array(input.take(np.arange(s[i], s[i + 1]), axis=axis)) for i in range(len(split)) ] outputs_with_grad = range(len(split_info)) self.assertReferenceChecks(gc, op, input_tensors, split_ref) self.assertDeviceChecks(dc, op, input_tensors, outputs_with_grad) self.assertGradientChecks(gc, op, input_tensors, 0, outputs_with_grad) @given(inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, ), **hu.gcs) def test_split_by_lengths(self, inputs, gc, dc): data, lengths = inputs len_len = len(lengths) def _find_factor_simple(x): for i in [2, 3, 5]: if x % i == 0: return i return x num_output = _find_factor_simple(len_len) axis = 0 op = core.CreateOperator( "SplitByLengths", ["data", "lengths"], ['X_{}'.format(i) for i in range(num_output)], axis=axis, ) def split_by_lengths_ref(data, lengths, num_output=num_output, axis=0): idxs = np.cumsum([0] + list(lengths)).astype(np.int32) return [ np.array( data.take(np.arange(idxs[i * len_len // num_output], idxs[(i + 1) * len_len // num_output]), axis=axis)) for i in range(num_output) ] outputs_with_grad = range(num_output) input_tensors = [data, lengths] self.assertReferenceChecks(hu.cpu_do, op, input_tensors, split_by_lengths_ref) self.assertDeviceChecks(dc, op, input_tensors, outputs_with_grad) self.assertGradientChecks(hu.cpu_do, op, input_tensors, 0, outputs_with_grad, input_device_options={"lengths": hu.cpu_do})
class TestUtilityOps(hu.HypothesisTestCase): @given(X=hu.tensor(), args=st.booleans(), **hu.gcs) def test_slice(self, X, args, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end if args: op = core.CreateOperator("Slice", ["X"], ["Y"], starts=starts, ends=ends, device_option=gc) def slice_ref(X): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] inputs = [X] else: op = core.CreateOperator("Slice", ["X", "starts", "ends"], ["Y"], device_option=gc) def slice_ref(x, starts, ends): slc = [slice(None)] * x.ndim slc[dim] = slice(slice_start, slice_end) return [x[slc]] inputs = [X, starts, ends] self.assertReferenceChecks(gc, op, inputs, slice_ref) self.assertDeviceChecks(dc, op, inputs, [0]) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=0, outputs_with_grads=[0], ) @given(dtype=st.sampled_from([np.float32, np.int32]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator("Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator("Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes), ) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") def test_gpu_transpose_minusones(self): ''' Repro a problem with earlier version of CuDNN Transpose Op that casted ints to floats. ''' X = -np.ones((2, 10)).astype(np.int32) with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): workspace.FeedBlob("X", X) print("X:\n{}\n".format(workspace.FetchBlob("X"))) op = core.CreateOperator("Transpose", ["X"], ["Y"], engine='CUDNN') workspace.RunOperatorOnce(op) Y = workspace.FetchBlob("Y") print("Y:\n{}\n".format(Y)) for j in list(Y.flatten()): self.assertEqual(-1, j) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator("NanCheck", ["X", "other"], ["Y"]) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator("Max", ["X", "Y", "Z"], ["mx"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.maximum(np.maximum(X, Y), Z) inputs = [mx, go, X, Y, Z] def max_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator("MaxGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def min_op(X, Y, Z): return [np.minimum(np.minimum(X, Y), Z)] op = core.CreateOperator("Min", ["X", "Y", "Z"], ["mx"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.minimum(np.minimum(X, Y), Z) inputs = [mx, go, X, Y, Z] def min_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator("MinGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given(inputs=hu.lengths_tensor().flatmap(lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), )).flatmap(lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays(tup[2], dtype=np.int32, elements=st.integers(min_value=0, max_value=len(tup[1]) - 1) ), )), **hu.gcs_cpu_only) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [ np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices)) ] op = core.CreateOperator("LengthsGather", ["items", "lengths", "indices"], ["output"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given(**hu.gcs) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator("Size", ["X"], ["output"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, ) def test_alias_op(self): """ Don't use hypothesis because there are only 2 cases to check""" for size in [0, 5]: X = np.arange(size).astype(np.float32) workspace.FeedBlob('X', X) op = core.CreateOperator("Alias", ["X"], ["Y"]) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') np.testing.assert_array_equal(X, Y) @given(**hu.gcs) def test_range(self, gc, dc): names = [ ('stop_', ), ('start_', 'stop_'), ('start_', 'stop_', 'step_'), ] # Most random values aren't great here, so use a fixed set instead of # hypothesis. for inputs in ( (10, ), (np.float32(10.0), ), (0, ), (0, 0), (10., 5.0, -1.), (2, 10000), (2, 10000, 20000), (2, 10000, -1), ): inputs = [np.array(v) for v in inputs] op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], ) self.assertDeviceChecks(dc, op, inputs, [0]) with self.assertRaisesRegexp(RuntimeError, 'Step size cannot be 0'): inputs = (np.array(0), np.array(10), np.array(0)) op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], )
class TorchIntegration(hu.HypothesisTestCase): @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10), num_classes=st.integers(1, 10), rotated=st.booleans(), angle_bound_on=st.booleans(), clip_angle_thresh=st.sampled_from([-1.0, 1.0]), **hu.gcs_cpu_only) def test_bbox_transform( self, roi_counts, num_classes, rotated, angle_bound_on, clip_angle_thresh, gc, dc, ): """ Test with rois for multiple images in a batch """ rois, deltas, im_info = create_bbox_transform_inputs( roi_counts, num_classes, rotated) def bbox_transform_ref(): ref_op = core.CreateOperator( "BBoxTransform", ["rois", "deltas", "im_info"], ["box_out"], apply_scale=False, rotated=rotated, angle_bound_on=angle_bound_on, clip_angle_thresh=clip_angle_thresh, ) workspace.FeedBlob("rois", rois) workspace.FeedBlob("deltas", deltas) workspace.FeedBlob("im_info", im_info) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("box_out") box_out = torch.tensor(bbox_transform_ref()) a, b = torch.ops._caffe2.BBoxTransform( torch.tensor(rois), torch.tensor(deltas), torch.tensor(im_info), [1.0, 1.0, 1.0, 1.0], False, rotated, angle_bound_on, -90, 90, clip_angle_thresh, legacy_plus_one=True, ) torch.testing.assert_allclose(box_out, a) @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10), num_classes=st.integers(1, 10), rotated=st.booleans(), angle_bound_on=st.booleans(), clip_angle_thresh=st.sampled_from([-1.0, 1.0]), **hu.gcs_cpu_only) def test_box_with_nms_limits( self, roi_counts, num_classes, rotated, angle_bound_on, clip_angle_thresh, gc, dc, ): rotated = False # FIXME remove this after rotation is supported rois, deltas, im_info = create_bbox_transform_inputs( roi_counts, num_classes, rotated) pred_bbox, batch_splits = [ t.detach().numpy() for t in torch.ops._caffe2.BBoxTransform( torch.tensor(rois), torch.tensor(deltas), torch.tensor(im_info), [1.0, 1.0, 1.0, 1.0], False, rotated, angle_bound_on, -90, 90, clip_angle_thresh, legacy_plus_one=True, ) ] class_prob = np.random.randn(sum(roi_counts), num_classes).astype(np.float32) score_thresh = 0.5 nms_thresh = 0.5 topk_per_image = sum(roi_counts) / 2 def box_with_nms_limit_ref(): input_blobs = ["class_prob", "pred_bbox", "batch_splits"] output_blobs = [ "score_nms", "bbox_nms", "class_nms", "batch_splits_nms", "keeps_nms", "keeps_size_nms", ] ref_op = core.CreateOperator( "BoxWithNMSLimit", input_blobs, output_blobs, score_thresh=float(score_thresh), nms=float(nms_thresh), detections_per_im=int(topk_per_image), soft_nms_enabled=False, soft_nms_method="linear", soft_nms_sigma=0.5, soft_nms_min_score_thres=0.001, rotated=rotated, ) workspace.FeedBlob("class_prob", class_prob) workspace.FeedBlob("pred_bbox", pred_bbox) workspace.FeedBlob("batch_splits", batch_splits) workspace.RunOperatorOnce(ref_op) return (workspace.FetchBlob(b) for b in output_blobs) output_refs = box_with_nms_limit_ref() outputs = torch.ops._caffe2.BoxWithNMSLimit( torch.tensor(class_prob), torch.tensor(pred_bbox), torch.tensor(batch_splits), score_thresh=float(score_thresh), nms=float(nms_thresh), detections_per_im=int(topk_per_image), soft_nms_enabled=False, soft_nms_method="linear", soft_nms_sigma=0.5, soft_nms_min_score_thres=0.001, rotated=rotated, cls_agnostic_bbox_reg=False, input_boxes_include_bg_cls=True, output_classes_include_bg_cls=True, legacy_plus_one=True, ) for o, o_ref in zip(outputs, output_refs): torch.testing.assert_allclose(o, o_ref) @given( A=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), img_count=st.integers(min_value=3, max_value=3), ) def test_generate_proposals(self, A, H, W, img_count): scores = np.ones((img_count, A, H, W)).astype(np.float32) bbox_deltas = (np.linspace(0, 10, num=img_count * 4 * A * H * W).reshape( (img_count, 4 * A, H, W)).astype(np.float32)) im_info = np.ones((img_count, 3)).astype(np.float32) / 10 anchors = np.ones((A, 4)).astype(np.float32) def generate_proposals_ref(): ref_op = core.CreateOperator( "GenerateProposals", ["scores", "bbox_deltas", "im_info", "anchors"], ["rois", "rois_probs"], spatial_scale=2.0, ) workspace.FeedBlob("scores", scores) workspace.FeedBlob("bbox_deltas", bbox_deltas) workspace.FeedBlob("im_info", im_info) workspace.FeedBlob("anchors", anchors) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("rois"), workspace.FetchBlob( "rois_probs") rois, rois_probs = generate_proposals_ref() rois = torch.tensor(rois) rois_probs = torch.tensor(rois_probs) a, b = torch.ops._caffe2.GenerateProposals( torch.tensor(scores), torch.tensor(bbox_deltas), torch.tensor(im_info), torch.tensor(anchors), 2.0, 6000, 300, 0.7, 16, True, -90, 90, 1.0, legacy_plus_one=True, ) torch.testing.assert_allclose(rois, a) torch.testing.assert_allclose(rois_probs, b) @given( bsz=st.integers(1, 5), seq_lens=st.integers(1, 6), emb_lens=st.integers(5, 10), hidden_size=st.integers(3, 7), num_layers=st.integers(1, 4), has_biases=st.booleans(), is_bidirectional=st.booleans(), batch_first=st.booleans(), ) def test_inference_lstm( self, bsz, seq_lens, emb_lens, hidden_size, num_layers, has_biases, is_bidirectional, batch_first, ): num_directions = 2 if is_bidirectional else 1 hx = np.zeros((num_layers * num_directions, bsz, hidden_size), dtype=np.float32) if batch_first: inputs = np.random.randn(bsz, seq_lens, emb_lens).astype(np.float32) else: inputs = np.random.randn(seq_lens, bsz, emb_lens).astype(np.float32) torch_lstm = torch.nn.LSTM( emb_lens, hidden_size, batch_first=batch_first, bidirectional=is_bidirectional, bias=has_biases, num_layers=num_layers, ) def inference_lstm_ref(): input_names = ["inputs", "hidden_0", "hidden_1"] workspace.FeedBlob("inputs", inputs) workspace.FeedBlob("hidden_0", hx) workspace.FeedBlob("hidden_1", hx) for i, param in enumerate(torch_lstm._flat_weights): input_names.append("param_{}".format(i)) workspace.FeedBlob("param_{}".format(i), param.detach().numpy()) ref_op = core.CreateOperator( "InferenceLSTM", input_names, ["output", "hidden", "cell"], num_layers=num_layers, has_biases=has_biases, batch_first=batch_first, bidirectional=is_bidirectional, ) workspace.RunOperatorOnce(ref_op) return (workspace.FetchBlob("output"), workspace.FetchBlob("hidden"), workspace.FetchBlob("cell")) output, hidden, cell = inference_lstm_ref() output = torch.tensor(output) hidden = torch.tensor(hidden) cell = torch.tensor(cell) lstm_in = [ torch.from_numpy(inputs), torch.from_numpy(hx), torch.from_numpy(hx), ] + [param.detach() for param in torch_lstm._flat_weights] a, b, c = torch.ops._caffe2.InferenceLSTM(lstm_in, num_layers, has_biases, batch_first, is_bidirectional) torch.testing.assert_allclose(output, a) torch.testing.assert_allclose(hidden, b) torch.testing.assert_allclose(cell, c) # Test case is using workspace.has_cuda_support and not workspace.has_gpu_support # to exclude it from HIP because tensor interop doesn't work for HIP tensors yet @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") @given( A=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), img_count=st.integers(min_value=3, max_value=3), ) def test_generate_proposals_cuda(self, A, H, W, img_count): scores = np.ones((img_count, A, H, W)).astype(np.float32) bbox_deltas = (np.linspace(0, 10, num=img_count * 4 * A * H * W).reshape( (img_count, 4 * A, H, W)).astype(np.float32)) im_info = np.ones((img_count, 3)).astype(np.float32) / 10 anchors = np.ones((A, 4)).astype(np.float32) def generate_proposals_ref(): ref_op = core.CreateOperator( "GenerateProposals", ["scores", "bbox_deltas", "im_info", "anchors"], ["rois", "rois_probs"], spatial_scale=2.0, ) workspace.FeedBlob("scores", scores) workspace.FeedBlob("bbox_deltas", bbox_deltas) workspace.FeedBlob("im_info", im_info) workspace.FeedBlob("anchors", anchors) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("rois"), workspace.FetchBlob( "rois_probs") rois, rois_probs = generate_proposals_ref() rois = torch.tensor(rois) rois_probs = torch.tensor(rois_probs) a, b = torch.ops._caffe2.GenerateProposals( torch.tensor(scores).cuda(), torch.tensor(bbox_deltas).cuda(), torch.tensor(im_info).cuda(), torch.tensor(anchors).cuda(), 2.0, 6000, 300, 0.7, 16, True, -90, 90, 1.0, legacy_plus_one=True, ) torch.testing.assert_allclose(rois, a.cpu()) torch.testing.assert_allclose(rois_probs, b.cpu()) @given( N=st.integers(min_value=1, max_value=2), C=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), ) def _test_roi_align(self, N, C, H, W, device): def rand_roi(): return np.array([ float(int(N * np.random.rand())), 0.5 * np.random.rand() * W, 0.5 * np.random.rand() * H, (0.5 + 0.5 * np.random.rand()) * W, (0.5 + 0.5 * np.random.rand()) * H, ]).astype(np.float32) feature = np.random.randn(N, C, H, W).astype(np.float32) rois = np.array([rand_roi() for _ in range(10)]) def roi_align_ref(_feature, _rois): ref_op = core.CreateOperator( "RoIAlign", ["feature", "rois"], ["roi_feature"], spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, ) workspace.FeedBlob("feature", _feature) workspace.FeedBlob("rois", _rois) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("roi_feature") roi_feature_ref = roi_align_ref(feature, rois) roi_feature = torch.ops._caffe2.RoIAlign( torch.Tensor(feature).to(device), torch.Tensor(rois).to(device), order="NCHW", spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, aligned=False, ) torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu()) def test_roi_align_cpu(self): self._test_roi_align(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_roi_align_cuda(self): self._test_roi_align(device="cuda") @given( N=st.integers(min_value=1, max_value=2), C=st.integers(min_value=4, max_value=4), H=st.integers(min_value=10, max_value=10), W=st.integers(min_value=8, max_value=8), ) def _test_roi_align_rotated(self, N, C, H, W, device): def rand_rotated_roi(): return np.array([ float(int(N * np.random.rand())), np.random.rand() * W, np.random.rand() * H, np.random.rand() * W, np.random.rand() * H, np.random.rand() * 360 - 180 ]).astype(np.float32) feature = np.random.randn(N, C, H, W).astype(np.float32) rois = np.array([rand_rotated_roi() for _ in range(10)]) def roi_align_ref(_feature, _rois): ref_op = core.CreateOperator( "RoIAlignRotated", ["feature", "rois"], ["roi_feature"], spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, ) workspace.FeedBlob("feature", _feature) workspace.FeedBlob("rois", _rois) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("roi_feature") roi_feature_ref = roi_align_ref(feature, rois) roi_feature = torch.ops._caffe2.RoIAlignRotated( torch.Tensor(feature).to(device), torch.Tensor(rois).to(device), order="NCHW", spatial_scale=1.0, pooled_h=3, pooled_w=3, sampling_ratio=0, aligned=False, ) torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu()) def test_roi_align_rotated_cpu(self): self._test_roi_align_rotated(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_roi_align_rotated_cuda(self): self._test_roi_align_rotated(device="cuda") @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10)) def test_collect_and_distribute_fpn_rpn_proposals_op(self, roi_counts): batch_size = len(roi_counts) im_dims = np.random.randint(100, 600, batch_size) rpn_rois_and_scores = [] for i in range(5): rpn_rois_and_scores.append( torch.Tensor(generate_rois(roi_counts, im_dims))) for i in range(5): rpn_rois_and_scores.append(torch.rand(sum(roi_counts))) rois = torch.ops._caffe2.CollectRpnProposals( rpn_rois_and_scores, rpn_max_level=6, rpn_min_level=2, rpn_post_nms_topN=sum(roi_counts), ) fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( rois, roi_canonical_scale=224, roi_canonical_level=4, roi_max_level=5, roi_min_level=2, legacy_plus_one=True, ) all_outputs = torch.ops._caffe2.CollectAndDistributeFpnRpnProposals( rpn_rois_and_scores, roi_canonical_scale=224, roi_canonical_level=4, roi_max_level=5, roi_min_level=2, rpn_max_level=6, rpn_min_level=2, rpn_post_nms_topN=sum(roi_counts), legacy_plus_one=True, ) rois_fpn_list = fpn_outputs[:-1] rois_idx_restore_int32 = fpn_outputs[-1] # [rois] + fpn_outputs should be equal to all_outputs torch.testing.assert_allclose(rois, all_outputs[0]) for x, y in zip(fpn_outputs, all_outputs[1:]): torch.testing.assert_allclose(x, y) @given(X=hu.tensor(), fast_gelu=st.booleans()) def _test_gelu_op(self, X, fast_gelu, device): def _gelu_ref(_X): return (_X * norm.cdf(_X).astype(np.float32), ) expected_output, = _gelu_ref(X) actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu) rtol = 1e-3 if fast_gelu else 1e-4 atol = 1e-5 torch.testing.assert_allclose(expected_output, actual_output.cpu(), rtol=rtol, atol=atol) def test_gelu_op(self): self._test_gelu_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_gelu_op_cuda(self): self._test_gelu_op(device="cuda") @given(inputs=hu.lengths_tensor( dtype=np.float32, min_value=1, max_value=5, allow_empty=True, )) def _test_lengths_op(self, inputs, ref_op_name, torch_op, device): data, lengths = inputs def _lengths_ref(X, Y): ref_op = core.CreateOperator(ref_op_name, ["X", "Y"], "out") workspace.FeedBlob("X", X) workspace.FeedBlob("Y", Y) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("out") expected_output = _lengths_ref(data, lengths) actual_output = torch_op(torch.tensor(data), torch.tensor(lengths, dtype=torch.int32)) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def _test_lengths_sum_op(self, device): self._test_lengths_op("LengthsSum", torch.ops._caffe2.LengthsSum, device) def test_lengths_sum_op(self): self._test_lengths_sum_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_sum_op_cuda(self): self._test_lengths_sum_op(device="cuda") def _test_lengths_mean_op(self, device): self._test_lengths_op("LengthsMean", torch.ops._caffe2.LengthsMean, device) def test_lengths_mean_op(self): self._test_lengths_mean_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_mean_op_cuda(self): self._test_lengths_mean_op(device="cuda") def _test_lengths_max_op(self, device): self._test_lengths_op("LengthsMax", torch.ops._caffe2.LengthsMax, device) def test_lengths_max_op(self): self._test_lengths_max_op(device="cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_lengths_max_op_cuda(self): self._test_lengths_max_op(device="cuda") def _test_resize_nearest_op(self, device): data = np.random.rand(1, 2, 3, 4).astype(np.float32) def _resize_nearest_ref(X): ref_op = core.CreateOperator( "ResizeNearest", ["X"], ["Y"], width_scale=2.0, height_scale=1.5, order="NCHW", ) workspace.FeedBlob("X", X) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _resize_nearest_ref(data) actual_output = torch.ops._caffe2.ResizeNearest( torch.tensor(data).to(device), order="NCHW", width_scale=2.0, height_scale=1.5, ) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def test_resize_nearest_op_cpu(self): return self._test_resize_nearest_op("cpu") @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_resize_nearest_op_cuda(self): return self._test_resize_nearest_op("cuda") @given(input_data=hu.tensor(min_dim=2, max_dim=2)) def test_Fused8BitRowwiseQuantizedToFloat(self, input_data): QuantizeOp = core.CreateOperator( "FloatToFused8BitRowwiseQuantized", ["input_data"], ["quantized_data"], ) workspace.FeedBlob("input_data", input_data) workspace.RunOperatorOnce(QuantizeOp) quantized_data = workspace.FetchBlob("quantized_data") dequantized_data = torch.ops._caffe2.Fused8BitRowwiseQuantizedToFloat( torch.tensor(quantized_data)) reference = fused_rowwise_8bit_quantize_dequantize_reference( input_data) np.testing.assert_array_almost_equal(dequantized_data.numpy(), reference) @given(binary_input=st.booleans()) def test_piecewise_linear_op(self, binary_input): if binary_input: num_dims = 1 else: num_dims = 3 data = np.random.rand(1024, num_dims).astype(np.float32) slopes = np.zeros(4 * num_dims).astype(np.float32) bounds = np.sort(np.random.rand(5, num_dims).astype(np.float32), axis=0).flatten('F') intercepts = np.random.rand(4 * num_dims).astype(np.float32) def _piecewise_linear_ref(X): ref_op = core.CreateOperator( "PiecewiseLinearTransform", ["data", "bounds", "slopes", "intercepts"], ["calibrated"], binary=binary_input, ) workspace.FeedBlob("data", X) workspace.FeedBlob("bounds", bounds) workspace.FeedBlob("slopes", slopes) workspace.FeedBlob("intercepts", intercepts) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("calibrated") expected_output = _piecewise_linear_ref(data) actual_output = torch.ops._caffe2.PiecewiseLinearTransform( torch.tensor(data), bounds.tolist(), slopes.tolist(), intercepts.tolist(), binary_input) torch.testing.assert_allclose(torch.tensor(expected_output), actual_output) def test_alias_with_name_is_in_place(self): device = "cuda" if workspace.has_cuda_support else "cpu" x = torch.Tensor([3, 42]).to(device) y = torch.ops._caffe2.AliasWithName(x, "new_name") x[1] = 6 torch.testing.assert_allclose(x, torch.Tensor([3, 6]).to(device)) # y should also change because y is alias of x torch.testing.assert_allclose(y, torch.Tensor([3, 6]).to(device)) @unittest.skipIf(not workspace.has_cuda_support, "No cuda support") def test_copy_between_cpu_and_gpu(self): x_cpu_ref = torch.Tensor([1, 2, 3]) x_gpu_ref = x_cpu_ref.to("cuda") x_gpu = torch.ops._caffe2.CopyCPUToGPU(x_cpu_ref) torch.testing.assert_allclose(x_gpu, x_gpu_ref) x_cpu = torch.ops._caffe2.CopyGPUToCPU(x_gpu) torch.testing.assert_allclose(x_cpu, x_cpu_ref) def test_index_hash_op(self): data = np.random.randint(low=0, high=1000, size=(4, 4, 4)) def _index_hash_ref(X): ref_op = core.CreateOperator("IndexHash", ["X"], ["Y"], seed=0, modulo=100) workspace.FeedBlob("X", X) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _index_hash_ref(data) actual_output = torch.ops._caffe2.IndexHash(torch.tensor(data), seed=0, modulo=100) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def test_bucketize_op(self): data = np.random.rand(8, 10).astype(np.float32) * 1000 boundaries = np.array([1, 10, 100, 1000, 100000]).astype(np.float32) def _bucketize_ref(X): ref_op = core.CreateOperator("Bucketize", ["X"], ["Y"], boundaries=boundaries) workspace.FeedBlob("X", X) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _bucketize_ref(data) actual_output = torch.ops._caffe2.Bucketize(torch.tensor(data), boundaries) torch.testing.assert_allclose(expected_output, actual_output.cpu()) @given( X=hu.tensor(), eps=st.floats(min_value=1e-4, max_value=1e-2), ) def test_logit(self, X, eps): def ref(X, eps): ref_op = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps) workspace.FeedBlob("X", X) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = ref(X, eps) actual_output = torch.ops._caffe2.Logit(torch.tensor(X), eps) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def test_percentile(self): original_values = np.array([[3., 5., 3], [5., 1., 6.]]).astype(np.float32) value_to_pct = np.array([[3, 0.2], [5, 0.5], [1, 0.3], [3, 0.6]]).astype(np.float32) lengths = np.array([2, 1, 1]).astype(np.int32) def _percentile_ref(original_values, value_to_pct, lengths): ref_op = core.CreateOperator( 'Percentile', ["original_values", "value_to_pct", "lengths"], ["Y"]) workspace.FeedBlob("original_values", original_values) workspace.FeedBlob("value_to_pct", value_to_pct) workspace.FeedBlob("lengths", lengths) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _percentile_ref(original_values, value_to_pct, lengths) actual_output = torch.ops._caffe2.Percentile( torch.tensor(original_values), torch.Tensor(value_to_pct), torch.Tensor(lengths).int()) torch.testing.assert_allclose(expected_output, actual_output.cpu()) def test_batch_bucket_one_hot_op(self): data = np.array([[2, 3], [4, 1], [2, 5]]).astype(np.float32) lengths = np.array([2, 3]).astype(np.int32) boundaries = np.array([0.1, 2.5, 1, 3.1, 4.5]).astype(np.float32) def _batch_bucket_one_hot_ref(data, lengths, boundaries): ref_op = core.CreateOperator('BatchBucketOneHot', ["data", "lengths", "boundaries"], ["Y"]) workspace.FeedBlob("data", data) workspace.FeedBlob("lengths", lengths) workspace.FeedBlob("boundaries", boundaries) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("Y") expected_output = _batch_bucket_one_hot_ref(data, lengths, boundaries) actual_output = torch.ops._caffe2.BatchBucketOneHot( torch.tensor(data), torch.Tensor(lengths).int(), torch.Tensor(boundaries)) torch.testing.assert_allclose(expected_output, actual_output.cpu()) @given(lengths_0=st.integers(1, 10), lengths_1=st.integers(1, 10)) @settings(deadline=1000) def test_merge_id_lists(self, lengths_0, lengths_1): def _merge_id_lists(lengths, values): ref_op = core.CreateOperator( 'MergeIdLists', ["lengths_0", "values_0", "lengths_1", "values_1"], ["merged_lengths", "merged_values"]) workspace.FeedBlob("lengths_0", lengths[0]) workspace.FeedBlob("values_0", values[0]) workspace.FeedBlob("lengths_1", lengths[1]) workspace.FeedBlob("values_1", values[1]) workspace.RunOperatorOnce(ref_op) return workspace.FetchBlob("merged_lengths"), workspace.FetchBlob( "merged_values") lengths = [ np.array([lengths_0]).astype(np.int32), np.array([lengths_1]).astype(np.int32) ] values = [ np.random.choice(np.arange(0, 10), size=lengths_0, replace=False).astype(np.int32), np.random.choice(np.arange(10, 20), size=lengths_1, replace=False).astype(np.int32) ] expected_merged_lengths, expected_merged_values = _merge_id_lists( lengths, values) output_merged_lengths, output_merged_values = torch.ops._caffe2.MergeIdLists( [ torch.tensor(lengths[0]), torch.tensor(values[0]), torch.tensor(lengths[1]), torch.tensor(values[1]) ]) torch.testing.assert_allclose(expected_merged_lengths, output_merged_lengths) torch.testing.assert_allclose(expected_merged_values, output_merged_values) def test_learning_rate(self): base_lr = 0.05 no_iter = torch.tensor([0]) one_iter = torch.tensor([1]) two_iter = torch.tensor([2]) # Fixed policy self.assertEqual( base_lr, torch.ops._caffe2.LearningRate(iterations=no_iter, base_lr=base_lr, policy="fixed"), ) self.assertEqual( base_lr, torch.ops._caffe2.LearningRate(iterations=one_iter, base_lr=base_lr, policy="fixed"), ) # Step policy gamma = 0.99 stepsize = 1 self.assertEqual( base_lr, torch.ops._caffe2.LearningRate( iterations=no_iter, base_lr=base_lr, policy="step", stepsize=stepsize, gamma=gamma, ), ) self.assertAlmostEqual( base_lr * (gamma**(1.0 / stepsize)), torch.ops._caffe2.LearningRate( iterations=one_iter, base_lr=base_lr, policy="step", stepsize=stepsize, gamma=gamma, ), ) self.assertAlmostEqual( base_lr * (gamma**(2.0 / stepsize)), torch.ops._caffe2.LearningRate( iterations=two_iter, base_lr=base_lr, policy="step", stepsize=stepsize, gamma=gamma, ), )
class TestUtilityOps(hu.HypothesisTestCase): @given(X=hu.tensor(), neg=st.booleans(), **hu.gcs) def test_slice(self, X, neg, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end op = core.CreateOperator( "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc ) def slice_ref(X, starts, ends): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] self.assertReferenceChecks(gc, op, [X, starts, ends], slice_ref) self.assertDeviceChecks(dc, op, [X, starts, ends], [0]) @given(dtype=st.sampled_from([np.float32, np.int32, np.int64]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator( "Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator( "Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes),) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator( "NanCheck", ["X", "other"], ["Y"] ) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator( "Max", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y, Z], reference=max_op, ) @given( inputs=hu.lengths_tensor(max_value=30).flatmap( lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), ) ).flatmap( lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays( tup[2], dtype=np.int32, elements=st.integers( min_value=0, max_value=len(tup[1]) - 1)), ) ), **hu.gcs_cpu_only) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices))] op = core.CreateOperator( "LengthsGather", ["items", "lengths", "indices"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given(**hu.gcs) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator( "Size", ["X"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, )