示例#1
0
def main():
    assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2

    single_init, single_net = build_net("single_net", False)
    cross_init, cross_net = build_net("cross_net", True)

    workspace.CreateNet(single_init)
    workspace.RunNet(single_init.Name())
    workspace.CreateNet(cross_init)
    workspace.RunNet(cross_init.Name())

    workspace.CreateNet(single_net)
    workspace.CreateNet(cross_net)

    for _ in range(4):
        t = time.time()
        workspace.RunNet(single_net.Name(), NUM_ITER)
        dt = time.time() - t
        print("Single socket time:", dt)
        single_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
        print("Single socket BW: {} GB/s".format(single_bw))

        t = time.time()
        workspace.RunNet(cross_net.Name(), NUM_ITER)
        dt = time.time() - t
        print("Cross socket time:", dt)
        cross_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
        print("Cross socket BW: {} GB/s".format(cross_bw))
        print("Single BW / Cross BW: {}".format(single_bw / cross_bw))
示例#2
0
def main():
    assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2

    single_net = build_net("single_net", False)
    cross_net = build_net("cross_net", True)
    workspace.CreateNet(single_net)
    workspace.CreateNet(cross_net)

    for _ in range(4):
        t = time.time()
        workspace.RunNet(single_net.Name(), 5000)
        print("Single socket time:", time.time() - t)

        t = time.time()
        workspace.RunNet(cross_net.Name(), 5000)
        print("Cross socket time:", time.time() - t)
示例#3
0
    gpu_device_option = caffe2_pb2.DeviceOption()
    gpu_device_option.device_type = caffe2_pb2.CUDA
    gpu_device_option.device_id = 0

    net.CopyCPUToGPU("output_blob_0",
                     "output_blob_0_gpu",
                     device_option=gpu_device_option)
    net.CopyCPUToGPU("output_blob_1",
                     "output_blob_1_gpu",
                     device_option=gpu_device_option)

    return net


@unittest.skipIf(not workspace.IsNUMAEnabled(), "NUMA is not enabled")
@unittest.skipIf(workspace.GetNumNUMANodes() < 2, "Not enough NUMA nodes")
@unittest.skipIf(not workspace.has_gpu_support, "No GPU support")
class NUMATest(TestCase):
    def test_numa(self):
        net = build_test_net("test_numa")

        workspace.RunNetOnce(net)

        self.assertEqual(workspace.GetBlobNUMANode("output_blob_0"), 0)
        self.assertEqual(workspace.GetBlobNUMANode("output_blob_1"), 1)


if __name__ == '__main__':
    unittest.main()
示例#4
0
    "--num-workers",
    type=int,
    default=1,
    help="the number of worker per numa node in the thread pool",
)
parser.add_argument("--num-numa-nodes", type=int, default=1)
args, extra_args = parser.parse_known_args()

### some basic setup ###
np.random.seed(args.numpy_rand_seed)
np.set_printoptions(precision=args.print_precision)
global_options = [
    "caffe2", "--caffe2_log_level=2", "--caffe2_cpu_numa_enabled=1"
] + extra_args
workspace.GlobalInit(global_options)
assert workspace.IsNUMAEnabled()
ln = np.fromstring(args.arch, dtype=int, sep="-")
# test prints
print("mlp arch (" + str(ln.size - 1) +
      " layers, with input to output dimensions):")
print(ln)

### prepare training data ###
nbatches = int(np.ceil((args.data_size * 1.0) / args.mini_batch_size))
# inputs
m0 = ln[0]
lX = []
# targets
ml = ln[ln.size - 1]
lT = []
for j in range(0, nbatches):