示例#1
0
    def load_save(self, src_device_type, src_gpu_id, dst_device_type,
                  dst_gpu_id):
        workspace.ResetWorkspace()
        dtypes = [
            np.float16, np.float32, np.float64, np.bool, np.int8, np.int16,
            np.int32, np.int64, np.uint8, np.uint16
        ]
        arrays = [
            np.random.permutation(6).reshape(2, 3).astype(T) for T in dtypes
        ]
        assume(core.IsGPUDeviceType(src_device_type) or src_gpu_id == 0)
        assume(core.IsGPUDeviceType(dst_device_type) or dst_gpu_id == 0)
        src_device_option = core.DeviceOption(src_device_type, src_gpu_id)
        dst_device_option = core.DeviceOption(dst_device_type, dst_gpu_id)

        for i, arr in enumerate(arrays):
            self.assertTrue(workspace.FeedBlob(str(i), arr, src_device_option))
            self.assertTrue(workspace.HasBlob(str(i)))

        # Saves the blobs to a local db.
        tmp_folder = self.make_tempdir()
        op = core.CreateOperator("Save", [str(i) for i in range(len(arrays))],
                                 [],
                                 absolute_path=1,
                                 db=str(tmp_folder / "db"),
                                 db_type=self._db_type)
        self.assertTrue(workspace.RunOperatorOnce(op))

        # Reset the workspace so that anything we load is surely loaded
        # from the serialized proto.
        workspace.ResetWorkspace()
        self.assertEqual(len(workspace.Blobs()), 0)

        def _LoadTest(keep_device, device_type, gpu_id, blobs, loadAll):
            """A helper subfunction to test keep and not keep."""
            op = core.CreateOperator("Load", [],
                                     blobs,
                                     absolute_path=1,
                                     db=str(tmp_folder / "db"),
                                     db_type=self._db_type,
                                     device_option=dst_device_option,
                                     keep_device=keep_device,
                                     load_all=loadAll)
            self.assertTrue(workspace.RunOperatorOnce(op))
            for i, arr in enumerate(arrays):
                self.assertTrue(workspace.HasBlob(str(i)))
                fetched = workspace.FetchBlob(str(i))
                self.assertEqual(fetched.dtype, arr.dtype)
                np.testing.assert_array_equal(workspace.FetchBlob(str(i)), arr)
                proto = caffe2_pb2.BlobProto()
                proto.ParseFromString(workspace.SerializeBlob(str(i)))
                self.assertTrue(proto.HasField('tensor'))
                self.assertEqual(proto.tensor.device_detail.device_type,
                                 device_type)
                if core.IsGPUDeviceType(device_type):
                    self.assertEqual(proto.tensor.device_detail.device_id,
                                     gpu_id)

        blobs = [str(i) for i in range(len(arrays))]
        # Load using device option stored in the proto, i.e.
        # src_device_option
        _LoadTest(1, src_device_type, src_gpu_id, blobs, 0)
        # Load again, but this time load into dst_device_option.
        _LoadTest(0, dst_device_type, dst_gpu_id, blobs, 0)
        # Load back to the src_device_option to see if both paths are able
        # to reallocate memory.
        _LoadTest(1, src_device_type, src_gpu_id, blobs, 0)
        # Reset the workspace, and load directly into the dst_device_option.
        workspace.ResetWorkspace()
        _LoadTest(0, dst_device_type, dst_gpu_id, blobs, 0)

        # Test load all which loads all blobs in the db into the workspace.
        workspace.ResetWorkspace()
        _LoadTest(1, src_device_type, src_gpu_id, [], 1)
        # Load again making sure that overwrite functionality works.
        _LoadTest(1, src_device_type, src_gpu_id, [], 1)
        # Load again with different device.
        _LoadTest(0, dst_device_type, dst_gpu_id, [], 1)
        workspace.ResetWorkspace()
        _LoadTest(0, dst_device_type, dst_gpu_id, [], 1)
        workspace.ResetWorkspace()
        _LoadTest(1, src_device_type, src_gpu_id, blobs, 1)
        workspace.ResetWorkspace()
        _LoadTest(0, dst_device_type, dst_gpu_id, blobs, 1)
示例#2
0
    def test_sum_reduce_fp16(self, gc, dc):
        assume(core.IsGPUDeviceType(gc.device_type))

        # Set broadcast and no axis, i.e. broadcasting last dimensions.
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
        Y = np.random.rand(4, 5).astype(np.float16)
        op = core.CreateOperator(
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, device_option=gc)

        def ref_op(X, Y):
            res = np.sum(X, axis=0)
            res = np.sum(res, axis=0)
            return [res]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, Y],
            reference=ref_op,
            threshold=1e-3)

        # Set broadcast and no axis, i.e. broadcasting last dimensions.
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
        Y = np.random.rand(2, 3).astype(np.float16)
        op = core.CreateOperator(
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=0)

        def ref_op(X, Y):
            res = np.sum(X, axis=3)
            res = np.sum(res, axis=2)
            return [res]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, Y],
            reference=ref_op,
            threshold=1e-3)

        # broadcasting intermediate dimensions
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
        Y = np.random.rand(3, 4).astype(np.float16)
        op = core.CreateOperator(
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=1)

        def ref_op(X, Y):
            res = np.sum(X, axis=0)
            res = np.sum(res, axis=2)
            return [res]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, Y],
            reference=ref_op,
            threshold=1e-3)

        # broadcasting with single elem dimensions at both ends
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
        Y = np.random.rand(1, 3, 4, 1).astype(np.float16)
        op = core.CreateOperator(
            "SumReduceLike", ["X", "Y"], "out", broadcast=1)

        def ref_op(X, Y):
            res = np.sum(X, axis=0)
            res = np.sum(res, axis=2)
            return [res.reshape(Y.shape)]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, Y],
            reference=ref_op,
            threshold=1e-3)
示例#3
0
    def _run_test(self, n, m, k, transposed, multi_dim, dtype, engine, gc, dc):
        if dtype == np.float16:
            # fp16 only supported with CUDA/HIP
            assume(core.IsGPUDeviceType(gc.device_type))
            dc = [d for d in dc if core.IsGPUDeviceType(d.device_type)]

        if engine == 'TENSORCORE':
            # TensorCore only makes sense with CUDA
            assume(gc.device_type == caffe2_pb2.CUDA)
            # ensures TensorCore kernels can be called
            m *= 8
            k *= 8
            n *= 8

        X = np.random.rand(m, k).astype(dtype) - 0.5
        if multi_dim:
            if transposed:
                W = np.random.rand(k, n, 1, 1).astype(dtype) - 0.5
            else:
                W = np.random.rand(n, k, 1, 1).astype(dtype) - 0.5
        else:
            if transposed:
                W = np.random.rand(k, n).astype(dtype) - 0.5
            else:
                W = np.random.rand(n, k).astype(dtype) - 0.5
        b = np.random.rand(n).astype(dtype) - 0.5

        def fc_op(X, W, b):
            return [np.dot(X, W.reshape(n, k).transpose()) + b.reshape(n)]

        def fc_tranposed_op(X, W, b):
            return [np.dot(X, W.reshape(k, n)) + b.reshape(n)]

        op = core.CreateOperator(
            'FCTransposed' if transposed else 'FC',
            ['X', 'W', 'b'],
            'out',
            engine=engine,
        )

        if dtype == np.float16 and core.IsGPUDeviceType(gc.device_type):
            a = caffe2_pb2.Argument()
            a.i = 1
            a.name = "float16_compute"
            op.arg.extend([a])

        # Check against numpy reference
        # ReferenceChecks is flaky on rocm with threshold of 1e-4 for fp16. Relaxing to 1e-3.
        threshold = 1e-3 if (gc.device_type == caffe2_pb2.HIP
                             and dtype == np.float16) else 1e-4
        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, W, b],
            reference=fc_tranposed_op if transposed else fc_op,
            threshold=threshold)
        # Check over multiple devices
        self.assertDeviceChecks(dc, op, [X, W, b], [0])

        # Gradient checks
        threshold = 0.5 if dtype == np.float16 else 0.005
        stepsize = 0.5 if dtype == np.float16 else 0.05
        for i in range(3):
            self.assertGradientChecks(gc,
                                      op, [X, W, b],
                                      i, [0],
                                      threshold=threshold,
                                      stepsize=stepsize)