def local_device_setter(num_devices=1, ps_device_type='cpu', worker_device='/cpu:0', ps_ops=None, ps_strategy=None): if ps_ops == None: ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] if ps_strategy is None: ps_strategy = device_setter._RoundRobinStrategy(num_devices) if not six.callable(ps_strategy): raise TypeError("ps_strategy must be callable") def _local_device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string( '/{}:{}'.format(ps_device_type, ps_strategy(op))) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string(worker_device or "") worker_device_spec.merge_from(current_device) return worker_device_spec.to_string() return _local_device_chooser
def device_setter(self, worker_device, ps_ops=None): if ps_ops is None: ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] if self.ps_type == 'CPU': ps_strategy = device_setter._RoundRobinStrategy( self.num_gpu) else: ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( self.num_gpu, tf.contrib.training.byte_size_load_fn) def device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance( op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string( '/{}:{}'.format(self.ps_type.lower(), ps_strategy(op))) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string( worker_device or "") worker_device_spec.merge_from(current_device) return worker_device_spec.to_string() return device_chooser
def test_round_robin_placement(self): ps_devices = [ '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4' ] round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) local_device_setter = replicate_model_fn._local_device_setter( ps_devices=ps_devices, ps_strategy=round_robin, worker_device='/device:GPU:2') with ops_lib.device(local_device_setter): a = variables.Variable(0.01) self.assertEqual('/device:GPU:0', a.device) b = variables.Variable(0.02) self.assertEqual('/device:GPU:1', b.device) c = variables.Variable(0.03) self.assertEqual('/device:GPU:3', c.device) a_op = array_ops.concat(a, axis=0) self.assertEqual('/device:GPU:2', a_op.device) b_op = array_ops.concat(b, axis=0) self.assertEqual('/device:GPU:2', b_op.device) c = variables.Variable(0.03) self.assertEqual('/device:GPU:4', c.device) d = variables.Variable(0.03) self.assertEqual('/device:GPU:0', d.device) c_op = array_ops.concat(c, axis=0) self.assertEqual('/device:GPU:2', c_op.device)
def test_vars_are_on_ps_but_ops_are_on_workers(self): ps_devices = ['/device:GPU:3'] round_robin = device_setter._RoundRobinStrategy( num_tasks=len(ps_devices)) local_device_setter = replicate_model_fn._local_device_setter( ps_devices=ps_devices, ps_strategy=round_robin, worker_device='/device:GPU:2') with ops_lib.device(local_device_setter): a = variables.Variable(0.01) self.assertEqual('/device:GPU:3', a.device) b = variables.Variable(0.02) self.assertEqual('/device:GPU:3', b.device) c = variables.Variable(0.03) self.assertEqual('/device:GPU:3', c.device) a_op = array_ops.concat(a, axis=0) self.assertEqual('/device:GPU:2', a_op.device) b_op = array_ops.concat(b, axis=0) self.assertEqual('/device:GPU:2', b_op.device)
def local_device_setter(num_devices=1, ps_device_type='cpu', worker_device='/cpu:0', ps_ops=None, ps_strategy=None): if ps_ops == None: ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] if ps_strategy is None: ps_strategy = device_setter._RoundRobinStrategy(num_devices) if not six.callable(ps_strategy): raise TypeError("ps_strategy must be callable") def _local_device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string('/{}:{}'.format( ps_device_type, ps_strategy(op))) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string(worker_device or "") worker_device_spec.merge_from(current_device) return worker_device_spec.to_string() return _local_device_chooser
def local_device_setter(num_devices=1, ps_device_type='cpu', worker_device='/cpu:0', ps_ops=None, ps_strategy=None): # cluster = tf.train.ClusterSpec({"worker":["147.46.15.21:123", "147.46.15.21:124", "147.46.15.23:123", "147.46.15.23:124"], "ps":["147.46.15.21:456", "147.46.15.23:678"]}) if ps_ops == None: ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] ps_strategy = device_setter._RoundRobinStrategy(num_devices) def _local_device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops and ( isinstance(node_def.op, ops.IndexedSlices) or isinstance(node_def.op, sparse_tensor.SparseTensor)): # if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string('/{}:{}'.format( ps_device_type, ps_strategy(op))) ps_device_spec.merge_from(current_device) # print("PS DEVICE STRING: ", ps_device_spec.to_string()) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string(worker_device or "") worker_device_spec.merge_from(current_device) # print("WORKER DEVICE STRING: ", worker_device_spec.to_string()) return worker_device_spec.to_string() return _local_device_chooser
def get_device_setter(device_category: DeviceCategory, device): if device_category == DeviceCategory.GPU: ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( len(get_gpu_devices()[0]), tf.contrib.training.byte_size_load_fn) else: ps_strategy = _RoundRobinStrategy(len(get_cpu_devices()[0])) ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] def _local_device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string('/{}:{}'.format( device_category.name, ps_strategy(op))) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string(device or "") worker_device_spec.merge_from(current_device) return worker_device_spec.to_string() return _local_device_chooser
def local_device_setter(num_devices: int = 1, ps_device_type: str = 'cpu', worker_device: str = '/cpu:0', ps_ops: List[str] = None, ps_strategy: Optional[str] = None) -> Callable: """ Setter for variable placement Parameters ---------- num_devices number of devices ps_device_type device type for setting of the variables, e.g. cpu or gpu worker_device name of worker device ps_ops names of parameter server operations ps_strategy strategy of parameter server Returns ------- local_device_chooser callable to pass to tf.device References ---------- source https://github.com/tensorflow/models/blob/master/tutorials/image/ cifar10_estimator/cifar10_utils.py """ if ps_ops is None: ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] if ps_strategy is None: ps_strategy = device_setter._RoundRobinStrategy(num_devices) if not six.callable(ps_strategy): raise TypeError("ps_strategy must be callable") def _local_device_chooser(op): current_device = pydev.DeviceSpec.from_string(op.device or "") node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = pydev.DeviceSpec.from_string('/{}:{}'.format( ps_device_type, ps_strategy(op))) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() else: worker_device_spec = pydev.DeviceSpec.from_string(worker_device or "") worker_device_spec.merge_from(current_device) return worker_device_spec.to_string() return _local_device_chooser
def _get_loss_towers(model_fn, mode, features, labels, params, config, devices, local_ps_devices, loss_reduction=losses.Reduction.SUM, name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): """Replicate the loss computation across devices.""" tower_specs = [] model_fn_args = util.fn_args(model_fn) optional_params = {} if 'params' in model_fn_args: optional_params['params'] = copy.deepcopy(params) if 'config' in model_fn_args: optional_params['config'] = copy.deepcopy(config) # pylint: disable=protected-access round_robin_strategy = device_setter_lib._RoundRobinStrategy( num_tasks=len(local_ps_devices)) # pylint: enable=protected-access for i, device in enumerate(devices): is_the_first_tower = (i == 0) device_setter = _local_device_setter(worker_device=device, ps_devices=local_ps_devices, ps_strategy=round_robin_strategy) # We would like to preserve the names of the variables and ops that the user # might be relying on. Names without a prefix are going to resolve to # variables and ops of the first tower. name_scope = name_scope_pattern if is_the_first_tower: name_scope = '' with variable_scope.variable_scope('', reuse=not is_the_first_tower): with ops_lib.name_scope(name_scope.format(i)): with ops_lib.device(device_setter): labels_shard = None if labels: labels_shard = labels[i] tower_spec = model_fn(mode=mode, features=features[i], labels=labels_shard, **optional_params) if loss_reduction != losses.Reduction.SUM: tower_spec = _scale_tower_loss( tower_spec, number_of_towers=len(devices)) tower_specs.append(tower_spec) return tower_specs
def local_device_setter(cluster, worker_device='/cpu:0'): ps_ops = ['Variable', 'VariableV2', 'VarHandleOp', 'AutoReloadVariable'] cluster_spec = cluster.as_dict() ps_device="/job:ps/task:0/cpu:0" ps_job_name = pydev.DeviceSpec.from_string(ps_device).job ps_tasks = len(cluster_spec[ps_job_name]) ps_strategy = device_setter._RoundRobinStrategy(ps_tasks) merge_devices=True print("LOCAL DEVICE SETTER: ") print("ps_job_name: ", ps_job_name) print("ps_strategy: ", ps_strategy) print("ps_device: ", ps_device, "ps_tasks num:", ps_tasks) print("worker_device: ", worker_device) chooser = _ReplicaDeviceChooser(ps_tasks, ps_device, worker_device, merge_devices, ps_ops, ps_strategy) return chooser.device_function
def _get_loss_towers(model_fn, mode, features, labels, params, config, devices, local_ps_devices, loss_reduction, name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): """Replicate the loss computation across devices.""" tower_specs = [] model_fn_args = function_utils.fn_args(model_fn) optional_params = {} if 'params' in model_fn_args: optional_params['params'] = copy.deepcopy(params) if 'config' in model_fn_args: optional_params['config'] = copy.deepcopy(config) # pylint: disable=protected-access round_robin_strategy = device_setter_lib._RoundRobinStrategy( num_tasks=len(local_ps_devices)) TowerOptimizer._graph_state().set_reduction_across_towers( loss_reduction, len(devices)) for i, device in enumerate(devices): is_the_first_tower = (i == 0) device_setter = _local_device_setter( worker_device=device, ps_devices=local_ps_devices, ps_strategy=round_robin_strategy) # We would like to preserve the names of the variables and ops that the user # might be relying on. Names without a prefix are going to resolve to # variables and ops of the first tower. name_scope = name_scope_pattern if is_the_first_tower: name_scope = '' with variable_scope.variable_scope( '', reuse=not is_the_first_tower) as var_scope: with ops_lib.name_scope(name_scope.format(i)) as name_scope: with TowerOptimizer._graph_state().tower( tower_id=i, var_scope=var_scope, name_scope=name_scope): with ops_lib.device(device_setter): labels_shard = None if labels: labels_shard = labels[i] tower_spec = model_fn( mode=mode, features=features[i], labels=labels_shard, **optional_params) if (tower_spec.train_op is not None and len(devices) > 1 and not TowerOptimizer.has_been_used()): raise ValueError('Please wrap optimizers with TowerOptimizer' ' in order to use replicate_model_fn with' ' multiple `devices`.') # Scaling the loss here doesn't actually affect gradients. Another # instance of scaling happens inside the TowerOptimizer. tower_spec = _scale_tower_loss( tower_spec, loss_reduction, number_of_towers=len(devices)) tower_specs.append(tower_spec) if not TowerOptimizer._did_towers_have_same_optimizer_calls(): raise ValueError('Each invocation of model_fn was supposed to make the same' ' optimizer calls.') TowerOptimizer._clear_graph_state() # pylint: enable=protected-access return tower_specs
def _get_loss_towers(model_fn, mode, features, labels, params, config, devices, local_ps_devices, loss_reduction, name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): """Replicate the loss computation across devices.""" tower_specs = [] model_fn_args = util.fn_args(model_fn) optional_params = {} if 'params' in model_fn_args: optional_params['params'] = copy.deepcopy(params) if 'config' in model_fn_args: optional_params['config'] = copy.deepcopy(config) # pylint: disable=protected-access round_robin_strategy = device_setter_lib._RoundRobinStrategy( num_tasks=len(local_ps_devices)) TowerOptimizer._graph_state().set_reduction_across_towers( loss_reduction, len(devices)) for i, device in enumerate(devices): is_the_first_tower = (i == 0) device_setter = _local_device_setter( worker_device=device, ps_devices=local_ps_devices, ps_strategy=round_robin_strategy) # We would like to preserve the names of the variables and ops that the user # might be relying on. Names without a prefix are going to resolve to # variables and ops of the first tower. name_scope = name_scope_pattern if is_the_first_tower: name_scope = '' with variable_scope.variable_scope( '', reuse=not is_the_first_tower) as var_scope: with ops_lib.name_scope(name_scope.format(i)) as name_scope: with TowerOptimizer._graph_state().tower( tower_id=i, var_scope=var_scope, name_scope=name_scope): with ops_lib.device(device_setter): labels_shard = None if labels: labels_shard = labels[i] tower_spec = model_fn( mode=mode, features=features[i], labels=labels_shard, **optional_params) if (tower_spec.train_op is not None and len(devices) > 1 and not TowerOptimizer.has_been_used()): raise ValueError('Please wrap optimizers with TowerOptimizer' ' in order to use replicate_model_fn with' ' multiple `devices`.') # Scaling the loss here doesn't actually affect gradients. Another # instance of scaling happens inside the TowerOptimizer. tower_spec = _scale_tower_loss( tower_spec, loss_reduction, number_of_towers=len(devices)) tower_specs.append(tower_spec) if not TowerOptimizer._did_towers_have_same_optimizer_calls(): raise ValueError('Each invocation of model_fn was supposed to make the same' ' optimizer calls.') TowerOptimizer._clear_graph_state() # pylint: enable=protected-access return tower_specs