def testNameAndDeviceScopeTogether(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): with core.NameScope("foo"): op = core.CreateOperator("Relu", "x", "y") self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(len(op.input), 1) self.assertEqual(op.input[0], "foo/x") self.assertEqual(len(op.output), 1) self.assertEqual(op.output[0], "foo/y")
def init_data_input_workers( net, input_blob_names, fetch_fun, batch_size, num_worker_threads=2, input_source_name="train", max_buffered_batches=800, init_fun=None, external_loggers=None, ): global global_coordinator device_option = scope.CurrentDeviceScope() if (device_option is None): device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU) # Create coordinator object coordinator = DataInputCoordinator( net, input_blob_names, batch_size, device_option, scope.CurrentNameScope(), input_source_name, global_coordinator.get_queue(input_source_name, max_buffered_batches), init_fun=init_fun, external_loggers=external_loggers, ) # Launch fetch worker threads worker_ids = [ global_coordinator.get_new_worker_id() for i in range(num_worker_threads) ] workers = [ threading.Thread( target=fetcher, name="data_workers fetcher id {}".format(worker_id), args=[coordinator, worker_id, fetch_fun, batch_size, input_blob_names], ) for worker_id in worker_ids ] workers.append(threading.Thread( target=enqueuer, name="Enqueuer {} {}".format(input_source_name, scope.CurrentNameScope()), args=[coordinator])) coordinator._workers = workers global_coordinator.add(coordinator) return global_coordinator
def setUp(self): super(TestHeatmapMaxKeypointOp, self).setUp() np.random.seed(0) # initial coordinates and interpolate HEATMAP_SIZE from it HEATMAP_SMALL_SIZE = 4 bboxes_in = 500 * np.random.rand(NUM_TEST_ROI, 4).astype(np.float32) # only bbox with smaller first coordinates for i in range(NUM_TEST_ROI): if bboxes_in[i][0] > bboxes_in[i][2]: tmp = bboxes_in[i][2] bboxes_in[i][2] = bboxes_in[i][0] bboxes_in[i][0] = tmp if bboxes_in[i][1] > bboxes_in[i][3]: tmp = bboxes_in[i][3] bboxes_in[i][3] = bboxes_in[i][1] bboxes_in[i][1] = tmp # initial randomized coordinates for heatmaps and expand it with interpolation init = np.random.rand( NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SMALL_SIZE, HEATMAP_SMALL_SIZE).astype(np.float32) heatmaps_in = np.zeros( (NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SIZE, HEATMAP_SIZE) ).astype(np.float32) for roi in range(NUM_TEST_ROI): for keyp in range(NUM_KEYPOINTS): f = interpolate.interp2d( np.arange(0, 1, 1.0 / HEATMAP_SMALL_SIZE), np.arange(0, 1, 1.0 / HEATMAP_SMALL_SIZE), init[roi][keyp], kind='cubic') heatmaps_in[roi][keyp] = f( np.arange(0, 1, 1.0 / HEATMAP_SIZE), np.arange(0, 1, 1.0 / HEATMAP_SIZE)) self.heatmaps_in = heatmaps_in self.bboxes_in = bboxes_in self.op = core.CreateOperator( 'HeatmapMaxKeypoint', ['heatmaps_in', 'bboxes_in'], ['keypoints_out'], arg=[ utils.MakeArgument("should_output_softmax", True), ], device_option=caffe2_pb2.DeviceOption())
def LoadModuleFile(fname): with open(fname) as f: from caffe2.proto import caffe2_pb2 net_def = caffe2_pb2.NetDef() if os.environ.get('INT8PTXT') == "1": import google.protobuf.text_format as ptxt net_def = ptxt.Parse(f.read(), caffe2_pb2.NetDef()) else: net_def.ParseFromString(f.read()) if gpu_id == -2: device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.IDEEP for op in net_def.op: op.device_option.CopyFrom(device_opts) return net_def return None
def __init__( self, stepsize, threshold, device_option=None, workspace_name="gradient_check", input_device_options=None, ): self._stepsize = stepsize self._threshold = threshold self._device_option = device_option or caffe2_pb2.DeviceOption() self._workspace_name = workspace_name if input_device_options is None: self._input_device_options = {} else: self._input_device_options = input_device_options
def build_net(net_name, cross_socket): net = core.Net(net_name) net.Proto().type = "async_scheduling" numa_device_option = caffe2_pb2.DeviceOption() numa_device_option.device_type = caffe2_pb2.CPU numa_device_option.numa_node_id = 0 net.XavierFill([], net_name + "/input_blob", shape=[1024, 1024], device_option=numa_device_option) if cross_socket: numa_device_option.numa_node_id = 1 net.Copy(net_name + "/input_blob", net_name + "/output_blob", device_option=numa_device_option) return net
def map_ops(proto): for op in proto.op: device_option = op.device_option if op.type == "Iter": # Hack for Iters which have blob in CPU context device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CPU for b in list(op.input) + list(op.output): if b not in mapping: mapping[b] = device_option if op.type.startswith('RecurrentNetwork'): import google.protobuf.text_format as protobuftx step_args = [a for a in op.arg if a.name.endswith("step_net")] for step_arg in step_args: step_proto = caffe2_pb2.NetDef() protobuftx.Merge(step_arg.s, step_proto) map_ops(step_proto)
def get_weigts_map(config): # set device device_opt = caffe2_pb2.DeviceOption() if config['gpu_id'] is not None: device_opt.device_type = caffe2_pb2.CUDA device_opt.cuda_gpu_id = config['gpu_id'] init_net_proto = caffe2_pb2.NetDef() init_net_pb = config['network']['init_net'] weights_map = {} with open(init_net_pb, 'rb') as f: init_net_proto.ParseFromString(f.read()) for op in init_net_proto.op: _weight_shape = np.asarray(op.arg[0].ints) weights_map[op.output[0]] = _weight_shape return weights_map
def add_predictor(config, images): ''' predict the label of input single or batch images Args: config dict np.arrary images Returns: batch softmax results; type: np.array; shape: [prediction batchsize, prediction classes] ''' # set device device_opt = caffe2_pb2.DeviceOption() if config['gpu_id'] is not None: device_opt.device_type = caffe2_pb2.CUDA device_opt.cuda_gpu_id = config['gpu_id'] # add prediction model predict_model = model_helper.ModelHelper( name="predictor", init_params=False, ) # load param_init_net init_net_proto = caffe2_pb2.NetDef() with open(config['init_net'], 'rb') as f: init_net_proto.ParseFromString(f.read()) for op in init_net_proto.op: op.device_option.CopyFrom(device_opt) workspace.RunNetOnce(core.Net(init_net_proto)) # load predict_net predict_net_proto = caffe2_pb2.NetDef() with open(config['predict_net'], 'rb') as f: predict_net_proto.ParseFromString(f.read()) for op in predict_net_proto.op: op.device_option.CopyFrom(device_opt) predict_model.net = core.Net(predict_net_proto) # feed pre-processed images as input workspace.FeedBlob("data", images, device_option=device_opt) # run net workspace.CreateNet(predict_model.net) workspace.RunNet(predict_model.net) results = workspace.FetchBlob("softmax") return results
def test_cross_nets(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[ 10, ]) const = init_net.ConstantFill([], 'const', shape=[], value=1.) with core.DeviceScope(device_option): const = init_net.Add([const, const], [const]) fc_out = net.FC(["data", weight, bias], "fc1") net.Add([fc_out, const], [fc_out]) data_remap = {'data': device_option} nets, _ = core.InjectDeviceCopiesAmongNets( [init_net, net], blob_to_device_init=data_remap) op = nets[1]._net.op[0] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_w_cuda_1") op = nets[1]._net.op[1] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_b_cuda_1") op = nets[1]._net.op[2] self.assertEqual(op.type, "FC") self.assertEqual(op.input[0], "data") self.assertEqual(op.input[1], "fc_w_cuda_1") self.assertEqual(op.input[2], "fc_b_cuda_1") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) op = nets[1]._net.op[3] self.assertEqual(op.type, "Add") self.assertEqual(op.input[0], "fc1") self.assertEqual(op.input[1], "const_cuda_1") # check that moved blob is in input to the new net for c in ["data", "fc_w", "fc_b", "const_cuda_1"]: self.assertTrue(c in nets[1]._net.external_input) """
def testCreate(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 op = core.CreateOperator("Ludicrous", "x", "y", name="ludicrous", control_input="z", device_option=device_option, engine="WARP", arg1=1, arg2="2", arg3=[1, 2, 3]) self.assertEqual(op.type, "Ludicrous") self.assertEqual(op.name, "ludicrous") self.assertEqual(op.engine, "WARP") self.assertEqual(len(op.input), 1) self.assertEqual(op.input[0], "x") self.assertEqual(len(op.output), 1) self.assertEqual(op.output[0], "y") self.assertEqual(len(op.control_input), 1) self.assertEqual(op.control_input[0], "z") self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertTrue(len(op.arg), 3) # can't guarantee ordering of kwargs, so generate a set of args # to test with arg_map = {} for arg in op.arg: arg_map[arg.name] = arg # Check all elements exist that should self.assertEqual("arg1" in arg_map, True) self.assertEqual("arg2" in arg_map, True) self.assertEqual("arg3" in arg_map, True) # Now test that all args were initialized correctly self.assertEqual(arg_map["arg1"].i, 1) self.assertEqual(arg_map["arg2"].s, b"2") self.assertEqual(list(arg_map["arg3"].ints), [1, 2, 3])
def testOperatorDef2NodeProto(self): op_def = caffe2_pb2.OperatorDef() op_def.input.extend(["A", "B", "C"]) op_def.output.extend(["X", "Y"]) op_def.name = "TestOpName" op_def.type = "TestOp" arg1 = caffe2_pb2.Argument() arg1.name = "TestArg1" arg1.i = 1 arg2 = caffe2_pb2.Argument() arg2.name = "TestArg2" arg1.s = "TestInfo".encode("utf-8") op_def.arg.extend([arg1, arg2]) op_def.device_option.CopyFrom(caffe2_pb2.DeviceOption()) op_def.engine = "TestEngine".encode("utf-8") op_def.control_input.extend(["input1", "input2"]) op_def.is_gradient_op = True op_def.debug_info = "TestDebugInfo" node = convert.OperatorDefToNodeProto(op_def) self.assertEqual(node.input, op_def.input) self.assertEqual(node.output, op_def.output) self.assertEqual(node.name, op_def.name) self.assertEqual(node.op_type, op_def.type) self.assertEqual(node.attribute[0].name, op_def.arg[0].name) self.assertEqual(node.attribute[1].name, op_def.arg[1].name) self.assertEqual(node.device_option, op_def.device_option) node_engine = [ a.s.decode("utf-8") for a in node.annotations if a.name == "engine" ][0] self.assertEqual(node_engine, op_def.engine) node_control_input = [ a.strings for a in node.annotations if a.name == "control_input" ][0] self.assertEqual(len(node_control_input), len(op_def.control_input)) for x, y in zip(node_control_input, op_def.control_input): self.assertEqual(x.decode("utf-8"), y) self.assertEqual(node.doc_string, op_def.debug_info) node_is_gradient_op = [ a.i for a in node.annotations if a.name == "is_gradient_op" ][0] self.assertEqual(node_is_gradient_op, int(op_def.is_gradient_op))
def build_net(net_name, cross_socket): init_net = core.Net(net_name + "_init") init_net.Proto().type = "async_scheduling" numa_device_option = caffe2_pb2.DeviceOption() numa_device_option.device_type = caffe2_pb2.CPU numa_device_option.numa_node_id = 0 for replica_id in range(NUM_REPLICAS): init_net.XavierFill([], net_name + "/input_blob_" + str(replica_id), shape=[SHAPE_LEN, SHAPE_LEN], device_option=numa_device_option) net = core.Net(net_name) net.Proto().type = "async_scheduling" if cross_socket: numa_device_option.numa_node_id = 1 for replica_id in range(NUM_REPLICAS): net.Copy(net_name + "/input_blob_" + str(replica_id), net_name + "/output_blob_" + str(replica_id), device_option=numa_device_option) return init_net, net
def init_data_input_workers( net, input_blob_names, fetch_fun, batch_size, num_worker_threads=2, input_source_name="train", max_buffered_batches=100, ): global global_coordinator device_option = scope.CurrentDeviceScope() if (device_option is None): device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU) # Create coordinator object coordinator = DataInputCoordinator( net, input_blob_names, batch_size, device_option, scope.CurrentNameScope(), input_source_name, max_buffered_batches, ) # Launch fetch worker threads workers = [ threading.Thread( target=fetcher, args=[ coordinator, global_coordinator._fetcher_id_seq + i, fetch_fun, batch_size, input_blob_names ], ) for i in range(num_worker_threads) ] global_coordinator._fetcher_id_seq += num_worker_threads workers.append(threading.Thread(target=enqueuer, args=[coordinator])) coordinator._workers = workers global_coordinator.add(coordinator) return global_coordinator
def test_blob_inplace(self): net = core.Net("test") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 net.Adagrad(['param', 'moment', 'grad', 'lr'], ['param', 'moment']) with core.DeviceScope(device_option): net.Relu("param", "param_relu_no_sense") net, _ = core.InjectCrossDeviceCopies(net) op = net._net.op[1] self.assertEqual(op.type, 'CopyCPUToGPU') self.assertEqual(op.input[0], 'param') self.assertEqual(op.output[0], 'param_cuda_1') op = net._net.op[2] self.assertEqual(op.input[0], 'param_cuda_1') net.Relu('nonsense_input', 'moment') with self.assertRaises(RuntimeError): core.InjectCrossDeviceCopies(net)
def build_deploy_model(config): # set device device_opt = caffe2_pb2.DeviceOption() if config['gpu_id'] is not None: device_opt.device_type = caffe2_pb2.CUDA device_opt.cuda_gpu_id = config['gpu_id'] # build model with core.DeviceScope(device_opt): deploy_model = model_helper.ModelHelper( name='{}_deploy_model'.format(config['name']), init_params=False, ) pred = add_model(deploy_model, config, "data", is_test=True) softmax = brew.softmax(deploy_model, pred, 'softmax') # loss = add_softmax_loss(deploy_model, pred, "label") # init workspace for validation net workspace.RunNetOnce(deploy_model.param_init_net) workspace.CreateNet(deploy_model.net) return deploy_model
def load_model(): """ Loads the model defined in INIT_NET and PREDICT_NET into the caffe2 workspace. """ device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.CPU INIT_NET = "out/model_init.pb" PREDICT_NET = "out/model.pb" init_def = caffe2_pb2.NetDef() with open(INIT_NET, 'rb') as f: init_def.ParseFromString(f.read()) init_def.device_option.CopyFrom(device_opts) workspace.RunNetOnce(init_def.SerializeToString()) net_def = caffe2_pb2.NetDef() with open(PREDICT_NET, 'rb') as f: net_def.ParseFromString(f.read()) net_def.device_option.CopyFrom(device_opts) workspace.CreateNet(net_def.SerializeToString(), overwrite=True) return init_def, net_def
def build_validation_model(config): # set device device_opt = caffe2_pb2.DeviceOption() if config['gpu_id'] is not None: device_opt.device_type = caffe2_pb2.CUDA device_opt.cuda_gpu_id = config['gpu_id'] # build model with core.DeviceScope(device_opt): validation_model = model_helper.ModelHelper( name='{}_validation_model'.format(config['name']), init_params=False, ) data, label = add_input(validation_model, config, is_test=True) pred = add_model(validation_model, config, data, is_test=True) loss = add_loss(validation_model, config, pred, label) add_accuracy(validation_model) # init workspace for validation net workspace.RunNetOnce(validation_model.param_init_net) workspace.CreateNet(validation_model.net) return validation_model
def createNet(predict_net_path, device_opts, use_cudnn=False): net_def = caffe2_pb2.NetDef() dev = caffe2_pb2.DeviceOption() dev.device_type = caffe2_pb2.CPU global final_dev final_dev = device_opts with open(predict_net_path, 'r') as f: net_def.ParseFromString(f.read()) if use_cudnn: for op in net_def.op: if op.type == 'PriorBox': op.device_option.CopyFrom(final_dev) elif op.type == 'Concat' and op.output[0] == 'mbox_priorbox': op.device_option.CopyFrom(dev) elif op.type == 'Norm': op.device_option.CopyFrom(final_dev) else: op.device_option.CopyFrom(final_dev) op.engine = 'CUDNN' workspace.CreateNet(net_def) return net_def
def test_special_cases(self): example_bboxes = np.array([[0, 0, 100, 100]]).astype(np.float32) heatmap_tests = [] # special case #1 heatmap_tests.append(np.array([ [0.14722, 0.807823, 0.447052], [0.652919, 0.850923, -0.225462], [0.805912, 0.75778, -0.563371], ]).astype(np.float32).reshape((1, 1, 3, 3))) # special case #2 heatmap_tests.append(np.array([ [3.19541, 3.69551, 3.87579], [3.63094, 3.89978, 3.67606], [3.78555, 3.87291, 3.28083], ]).astype(np.float32).reshape((1, 1, 3, 3))) for heatmap_test in heatmap_tests: self.assertReferenceChecks( device_option=caffe2_pb2.DeviceOption(), op=self.op, inputs=[heatmap_test, example_bboxes], reference=heatmap_approx_keypoint_ref, )
def test_cross_nets_no_change(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[10, ]) net.FC(["data", weight, bias], "fc1") data_remap = {'data': device_option} nets = core.InjectDeviceCopiesAmongNetsWithoutB2D( [init_net, net], blob_to_device_init=data_remap ) op = nets[1]._net.op[0] self.assertEqual(op.type, "FC") self.assertEqual(op.input[0], "data") self.assertEqual(op.input[1], "fc_w") self.assertEqual(op.input[2], "fc_b") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1)
def Inception_v2(order, gpu_engine_ws): device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.HIP device_opts.hip_gpu_id = 0 INIT_NET_PB = '/work/models/inception_v2/init_net.pb' PREDICT_NET_PB = '/work/models/inception_v2/predict_net.pb' init_def = caffe2_pb2.NetDef() with open(INIT_NET_PB, 'rb') as f: init_def.ParseFromString(f.read()) init_def.device_option.CopyFrom(device_opts) net_def = caffe2_pb2.NetDef() with open(PREDICT_NET_PB, 'rb') as f: net_def.ParseFromString(f.read()) net_def.device_option.CopyFrom(device_opts) init_net = core.Net(init_def) predict_net = core.Net(net_def) my_arg_scope = { 'order': order, 'use_gpu_engine': True, 'gpu_engine_exhaustive_search': True, } if gpu_engine_ws: my_arg_scope['ws_nbytes_limit'] = gpu_engine_ws model = model_helper.ModelHelper( name="GoogleNet", arg_scope=my_arg_scope, ) model.param_init_net = init_net model.net = predict_net xent = model.net.LabelCrossEntropy(["prob", "label"], "xent") model.net.AveragedLoss(xent, "loss") return model, 224
def test_cross_nets_no_change(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[ 10, ]) net.FC(["data", weight, bias], "fc1") data_remap = {'data': device_option} nets = core.InjectDeviceCopiesAmongNetsWithoutB2D( [init_net, net], blob_to_device_init=data_remap) ref_str = """ name: "" op { input: "data" input: "fc_w" input: "fc_b" output: "fc1" name: "" type: "FC" device_option { device_type: 1 cuda_gpu_id: 1 } } external_input: "data" external_input: "fc_w" external_input: "fc_b" """ nets[1].Proto().name = '' # Ignore the name self.assertEqual(str(nets[1].Proto()).strip(), ref_str.strip())
def build_training_model(config): # set device device_opt = caffe2_pb2.DeviceOption() if config['gpu_id'] is not None: device_opt.device_type = caffe2_pb2.CUDA device_opt.cuda_gpu_id = config['gpu_id'] # build model with core.DeviceScope(device_opt): training_model = model_helper.ModelHelper( name='{}_training_model'.format(config['name']), ) data, label = add_input(training_model, config, is_test=False) pred = add_model_all(training_model, config, data, is_test=False) loss = add_softmax_loss(training_model, pred, label) add_training_operators(training_model, config, loss) add_accuracy(training_model) # init workspace for training net workspace.RunNetOnce(training_model.param_init_net) # if in finetune mode, we need to load pretrained weights and bias if config['finetune']: load_init_net(config['network']['init_net'], device_opt) workspace.CreateNet(training_model.net) return training_model
core, device_checker, gradient_checker, model_helper, test_util, workspace, ) from caffe2.python.gradient_checker import NetGradientChecker from caffe2.python.net_builder import ops, NetBuilder from caffe2.proto import caffe2_pb2 import unittest if workspace.has_gpu_support and workspace.NumGpuDevices() > 0: gpu_device_option = caffe2_pb2.DeviceOption() gpu_device_option.device_type = workspace.GpuDeviceType cpu_device_option = caffe2_pb2.DeviceOption() gpu_device_checker = device_checker.DeviceChecker( 0.01, [gpu_device_option] ) device_checker = device_checker.DeviceChecker( 0.01, [gpu_device_option, cpu_device_option] ) gpu_gradient_checkers = [ gradient_checker.GradientChecker( 0.005, 0.05, gpu_device_option, "gpu_checker_ws" ), ] gradient_checkers = [ gradient_checker.GradientChecker(
def GPU(self, gpu_id=0): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = gpu_id return device_option
def CPU(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CPU return device_option
def sparse_lengths_tensor(**kwargs): return sparse_segmented_tensor(segment_generator=lengths, **kwargs) def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs): dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim) return dims_.flatmap(lambda dims: st.lists( arrays(dims, dtype, elements), min_size=n, max_size=n)) cpu_do = caffe2_pb2.DeviceOption() gpu_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA) device_options = [cpu_do] + ([gpu_do] if workspace.has_gpu_support else []) # Include device option for each GPU expanded_device_options = [cpu_do] + ([ caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i) for i in range(workspace.NumCudaDevices()) ] if workspace.has_gpu_support else []) def device_checker_device_options(): return st.just(device_options) def gradient_checker_device_option(): return st.sampled_from(device_options)
def _get_device_option(producer_op): if producer_op.type == "CopyGPUToCPU": return caffe2_pb2.DeviceOption() else: return producer_op.device_option
def deviceOpts(): device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.CUDA device_opts.cuda_gpu_id = 3 return device_opts