def _register_as(self, type, **kwargs): """Fill self with the specific type of filler.""" filler = dragon_pb2.FillerInfo() filler.type = type.lower() variance_norm = {'fan_in': 0, 'fan_out': 1, 'fan_avg': 2} if filler.type == 'constant': filler.value = kwargs['value'] if 'value' in kwargs else 0 elif filler.type in ['normal', 'gaussian']: filler.mean = kwargs['mean'] if 'mean' in kwargs else 0 filler.std = kwargs['std'] if 'std' in kwargs else 1 filler.type = 'normal' elif filler.type == 'uniform': filler.low = kwargs['low'] if 'low' in kwargs else 0 filler.high = kwargs['high'] if 'high' in kwargs else 1 elif filler.type == 'truncated_normal': filler.mean = kwargs['mean'] if 'mean' in kwargs else 0 filler.std = kwargs['std'] if 'std' in kwargs else 1 filler.low = filler.mean - 2.0 * filler.std filler.high = filler.mean + 2.0 * filler.std elif filler.type in ['glorot_uniform', 'xavier']: filler.scale = kwargs['scale'] if 'scale' in kwargs else 3 filler.variance_norm = variance_norm[kwargs.get('mode', 'fan_in')] elif filler.type in ['glorot_normal', 'msra']: filler.scale = kwargs['scale'] if 'scale' in kwargs else 2 filler.variance_norm = variance_norm[kwargs.get('mode', 'fan_in')] workspace.get_workspace().create_tensor(self.name, filler) return self
def _set_value(input, value): """Set the copied value to input.""" if hasattr(input, 'id'): workspace.get_workspace().feed_tensor(input.id, value, enforce_cpu=True) else: raise ValueError('Input is not a legal tensor.')
def accumulate(self, momentum): """Accumulate the gradient of params. Call this method after each ``backward`` pass: ```python x = torch.ones(1, requires_grad=True) optimizer = torch.optim.SGD([x], lr=0.1) for epoch in range(2): for step in range(3): y = x + 1 y.backward() # Note to zero the accumulation at the first step optimizer.accumulate(momentum=1 if step > 0 else 1) optimizer.step() print(x) # 0.4 ``` Parameters ---------- momentum : float, required The momentum to the accumulated value. """ current_ws = workspace.get_workspace() for group in self.param_groups: group['_internal/grad_accum'] = True for param in group['params']: grad = self._steal_grad(current_ws, param) if grad is not None: training_funcs.accumulate_grad(grad)
def __init__(self): self._defs = [] self._watched = set() self._empty_grads = set() self._gc = workspace.get_workspace().collectors self._retain_graph = False self._retain_op_handles = False
def from_value(cls, value, dtype=None, name=None): """Return a tensor converted from the given value. Parameters ---------- value : array_like The value to convert. dtype: str, optional The optional data type. name: str, optional The optional tensor name. Returns ------- dragon.Tensor The output tensor. """ if not isinstance(value, numpy.ndarray): value = numpy.array(value, dtype if dtype else 'float32') name = workspace.get_workspace().unique_name( name=context.get_name_scope() + (name if name else 'Const'), suffix=':0', namespace='Tensor') ref = TensorRef(name, list(value.shape), str(value.dtype)) return ref.set_value(value)
def sum_grad(self): """Sum the gradients of all parameters. Call this method after each ``backward`` pass: ```python x = torch.ones(1, requires_grad=True) optimizer = torch.optim.SGD([x], lr=0.1) for epoch in range(2): for step in range(3): y = x + 1 y.backward() optimizer.sum_grad() optimizer.step() print(x) # 0.4 ``` """ current_ws = workspace.get_workspace() for group in self.param_groups: grads, sum_grads = [], [] for param in group['params']: grad = self._get_grad(current_ws, param) if grad is not None: grads.append(grad) sum_grads.append(grad.id + '_sum') Function.apply( 'Axpby', grads[0].device, grads, outputs=sum_grads, alpha=1., beta=1. if self._sums_grad else 0.) self._sums_grad = True
def backward(outputs, grad_outputs, retain_graph=False): """Compute the function derivatives w.r.t graph leaves.""" # Collect tapes for graph reversely. graph_tape = tapes.OrderedTape() graph_leaves, memo = set(), set() inputs = list(outputs) while len(inputs) > 0: input = inputs.pop(0) if id(input) in memo: continue memo.add(id(input)) if input._tape: graph_tape.merge_from(input._tape) inputs.extend(input._tape.get_sources()) input._tape = None if input._retains_grad: graph_leaves.add(input.id) elif input._requires_grad: graph_leaves.add(input.id) # Emit to dispatch backward execution. execute_ws = workspace.get_workspace() execute_ws.run_backward( op_defs=graph_tape.get_elements(), targets=[y.id for y in outputs], grad_targets=[dy.id for dy in grad_outputs], sources=list(graph_leaves)) # Free handles if graph not retained. if not retain_graph: for handle in graph_tape.get_handles(): execute_ws.release_handle(handle)
def add_update_defs(graph_def, optimizer): """Add the update defs.""" grads, update_defs = [], [] extra_arguments = optimizer._extra_kwargs extra_arguments['handle'] = optimizer._op_handle # Generate op defs according to the collected updates current_ws = workspace.get_workspace() for (param, grad), arguments in optimizer._param_group: if current_ws.has_tensor(grad): grads.append(grad) arguments = dict(arguments, **extra_arguments) update_defs.append( proto_util.make_operator_def(op_type=optimizer._op_type, inputs=[grad], outputs=[param], name=OpDef.get_name(), **arguments)) else: logging.info('Skip to update Tensor({}).'.format(param)) # Insert a reduce def if the process group is found. process_group = optimizer._process_group if process_group is not None: update_defs.insert( 0, proto_util.make_operator_def(op_type='Collective', inputs=grads, outputs=grads, name=OpDef.get_name(), operation='MEAN', communication='ALLREDUCE', **process_group.arguments)) graph_def.op.extend(update_defs)
def __init__(self, model, device, **kwargs): """Create a ``BackendRep``. Parameters ---------- model : str The path of onnx model file. device : onnx.Device The executing device. """ if not isinstance(device, Device): device = Device(device) execute_ws = workspace.get_workspace() if device.type == DeviceType.CPU: device_type, device_index = 'cpu', 0 elif device.type == DeviceType.CUDA: device_type, device_index = 'cuda', device.device_id else: raise ValueError('Unsupported device type: ' + device.type) with context.device(device_type, device_index): self._context = GraphLib.from_onnx(model) self._input_dict = collections.OrderedDict() self._output_dict = collections.OrderedDict() for input in self._context._def.input: impl = execute_ws.get_tensor(input) self._input_dict[input] = Tensor(impl=impl) for output in self._context._def.output: impl = execute_ws.get_tensor(output) self._output_dict[output] = Tensor(impl=impl) self._output_tuple = namedtupledict('Outputs', self._context._def.output)
def _from_array(self, array): """Create implementation from the array.""" ws = workspace.get_workspace() self._const_size = array.size self._gc, self._is_leaf = ws.collectors.TENSOR, True self._impl = ws.create_tensor(self._gc.alloc( context.get_eager_scope())).FromNumpy(array)
def scalar(input, dtype, device): """Return a cached scalar tensor. Parameters ---------- input : number The scalar value. dtype : str, optional The data type of output tensor. device : dragon.vm.torch.device The device of output tensor. Returns ------- dragon.vm.torch.Tensor The output tensor. """ if isinstance(input, Tensor): return input try: input = float(input) except (TypeError, ValueError): raise ValueError('<input> should be a python number, got {}.'.format( type(input).__name__)) cached_name = '%s(%s)' % (dtype, input) default_ws = workspace.get_workspace() impl = default_ws.get_tensor(cached_name) if impl is None: impl = default_ws.create_tensor(cached_name) impl.FromNumpy(numpy.array(input, dtype), True) return Tensor(device=device, impl=impl)
def _set_hyper(self, name, value, alias=None): """Set the specific hyper parameter.""" if name not in self._hyper: self._hyper[name] = value else: if types.is_tensor(self._hyper[name]): workspace.get_workspace().feed_tensor( self._hyper[name].id, value, dtype='float32', enforce_cpu=True, ) else: self._hyper[name] = value if alias and name not in self._alias: self._alias[name] = '/share/hyper/%s/%s' % (self._op_handle, alias)
def _add_grads(graph_def, outputs): """Add gradients.""" grad_tape = tapes.Tape() grad_outputs = [] for i, output in enumerate(outputs): if hasattr(output, '_grad_tape') and output._grad_tape: if output._grad_tape != grad_tape and len(grad_outputs) > 0: raise RuntimeError('Create graph from multiple gradient tapes.') grad_tape = output._grad_tape output._grad_tape = None grad_outputs.append(output) if grad_tape is None: return op_defs = grad_tape.get_elements() if len(op_defs) == 0: return execute_ws = workspace.get_workspace() ys = [y.id for y in grad_outputs] dys = [getattr(y._grad, 'id', '') for y in grad_outputs] grad_defs = backend.GradientTape().CreateGradientDefs( [op_def.SerializeToString() for op_def in op_defs], ys, dys) for serialized_str in grad_defs: grad_def = dragon_pb2.OperatorDef() grad_def.ParseFromString(serialized_str) grad_def.name = execute_ws.create_handle('Op') graph_def.op.extend([grad_def]) if len(grad_defs) > 0: xs = [x.id for x in grad_tape.get_sources()] graph_def.arg.extend([ proto_util.make_argument('grad_sources', xs), proto_util.make_argument('phase', 'TRAIN')])
def set_value(self, value): """Set value to the implementation. Parameters ---------- value : array_like The value to set. Returns ------- dragon.Tensor The self. """ workspace.get_workspace().feed_tensor(self, value) return self
def load_weights_from_pickle(f, layer, verbose=False): ws = workspace.get_workspace() weight_dict = six.moves.pickle.load(f) for weight in layer.weights: name = weight.name if name in weight_dict: value = weight_dict[name] value_shape = list(value.shape) weight_shape = list(weight.shape) if value_shape != weight_shape: raise ValueError( 'Shape of weight({}) is ({}), \n' 'While load from shape of ({}).' .format(name, ', '.join( [str(d) for d in weight_shape]), ', '.join([str(d) for d in value_shape])) ) weight_impl = ws.GetTensor(weight.id) if weight_impl is not None: weight_impl.FromNumpy(value.copy()) if verbose: logging.info( 'Weight({}) loaded, Size: ({})' .format(name, ', '.join([str(d) for d in value_shape]))) else: logging.warning( 'Weight({}) is not created ' 'in current workspace. Skip.'.format(name))
def zero_grad(self, reset=False): """Set the gradient of params to zero. This method is not necessary usually, as we will overwrite the gradients in the next computation. However, if some gradients are not computed every time, remember to reset them before ``step(...)``: ```python m1 = torch.nn.Linear(3, 3) m2 = torch.nn.Linear(3, 3) x = torch.ones(1, 3, requires_grad=True) for i in range(10): x = m1(x) if i in (2, 4, 6): x += m2(x) optimizer.zero_grad(reset=True) x.backward() optimizer.step() ``` Parameters ---------- reset : bool, optional, default=False **True** to reset the memory instead of zeroing. """ current_ws = workspace.get_workspace() for group in self.param_groups: for param in group['params']: grad = self._steal_grad(current_ws, param) if grad is not None: current_ws.reset_tensor(grad) if reset else grad.zero_()
def _build_graphs(self, *args, **kwargs): attributes = self._attribute_cache[workspace.get_workspace()] input_signature = self._spec.input_signature args, kwargs = self._spec.separate_inputs(*args, **kwargs) inputs = [] for i in range(self._spec.num_inputs): input_spec = None if input_signature is not None: input_spec = input_signature[i] if not isinstance(args[i], Tensor) and input_spec is None: inputs.append(args[i]) continue name = 'Input_%d' % (i + 1) shape = getattr(args[i], 'shape', None) dtype = getattr(args[i], 'dtype', None) if input_spec is not None: shape, dtype = input_spec.shape, input_spec.dtype inputs.append(Tensor(shape, dtype, name=name, symbolic=True)) with eager_context.graph_mode(): outputs = self._run_function(*inputs, **kwargs) graph_outputs, dummies, graphs = [], [], [] for output in nest.flatten(outputs): if isinstance(output, Tensor): graph_outputs.append(output) else: dummies.append(output) if len(graph_outputs) > 0: graphs.append(GraphLib.from_outputs(graph_outputs)) for obj in dummies: if isinstance(obj, GraphExec): graphs.append(obj) attributes['inputs'] = inputs attributes['outputs'] = outputs attributes['graphs'] = graphs return graphs
def placeholder(dtype=None, shape=None, name=None): """Return a symbolic tensor as the placeholder. Parameters ---------- dtype : str, optional The data type provided to cast the input. shape : Sequence[int], optional The optional tensor shape. name : str, optional The optional tensor name. Returns ------- dragon.Tensor The output tensor. """ # Construct a tensor from the explicit name return TensorRef( workspace.get_workspace().unique_name(context.get_name_scope() + name if name else 'Placeholder', suffix=':0', namespace='Tensor'), dtype=dtype if dtype else dtype, shape=shape, ).constant()
def _set_hyper(self, name, value): """Set value to a hyper parameter.""" if name not in self._hyper: default_ws = workspace.get_workspace() impl = default_ws.create_tensor(self._name + '/' + name) self._hyper[name] = impl value = numpy.array(float(value), 'float32') self._hyper[name].FromNumpy(value, False)
def _get_value(input): """Return the value stolen from input.""" if hasattr(input, 'id'): impl = workspace.get_workspace().GetTensor(input.id) if impl is not None: return impl.ToNumpy(True) return impl return input
def save_weights_to_pickle(f, layer): ws = workspace.get_workspace() weight_dict = collections.OrderedDict() for weight in layer.weights: weight_impl = ws.GetTensor(weight.id) if weight_impl is not None: weight_dict[weight.name] = weight_impl.ToNumpy(True) pickle = six.moves.pickle pickle.dump(weight_dict, f, PICKLE_DEFAULT_PROTOCOL)
def from_updates(grads_and_vars, optimizer, name=None): """Create a graph from the updates.""" name = 'Graph' if name is None else name execute_ws = workspace.get_workspace() graph_def = dragon_pb2.GraphDef(name=name) GraphLib._add_updates(graph_def, grads_and_vars, optimizer) GraphLib._add_device(graph_def) graph_def.name = execute_ws.create_graph(graph_def) return GraphExec(graph_def, execute_ws)
def _set_name(self, name=None, zero_based=True): """Set the module name.""" if name is None: self._name = workspace.get_workspace().unique_name( name=self.__class__.__name__.lower(), namespace='TensorLayerModule', zero_based=zero_based) else: self._name = name
def add(op_type, inputs, **kwargs): """Add operator to output symbols.""" op_tape = tapes.OrderedTape() graph_tape = tapes.get_tape() execute_ws = workspace.get_workspace() # Add inputs. enable_grad = False inputs = nest.flatten(inputs) for input in inputs: op_tape.add_source(input) if graph_tape and (input.requires_grad or graph_tape.is_target(id(input))): enable_grad = True # Add extra inputs. for input in nest.flatten(kwargs.pop('extra_inputs', [])): op_tape.add_source(input) op_tape.add_target(input.id) # Add outputs. name = kwargs.pop('name', None) num_outputs = kwargs.pop('num_outputs', 1) outputs = [] for i in range(num_outputs): outputs.append( Tensor(impl=execute_ws.create_tensor(scope='Tensor'), name=name if name else op_type + ':%d' % i, symbolic=True)) # Create def. op_def = proto_util.make_operator_def( op_type=op_type, inputs=[input.id for input in inputs], outputs=[output.id for output in outputs], device_option=proto_util.get_default_device_option(), name=execute_ws.create_handle('Op'), **kwargs) # Record def. op_tape.add_element(op_def) graph_tape.add_element(op_def) if enable_grad else None # Set tape for outputs. for output in outputs: output._tape = op_tape output._requires_grad = enable_grad # Add spec to outputs. add_output_spec = OpSchema.get_spec(op_type) if add_output_spec is None: add_output_spec = OpSchema.get_spec('Unchanged') outputs = add_output_spec(kwargs, inputs, outputs) # Return single or repeated outputs. return outputs[0] if num_outputs == 1 else outputs
def __call__(self, *args, **kwargs): """Call the compiled executables.""" if self.executables is None: # Graph is not created on the first call. # Compile the executables from the python function. inputs = [] input_signature = self.input_signature with context.name_scope('${%d}' % id(self)): for i in range(self._function_spec.num_inputs): name, shape, dtype = 'Input:%d' % i, None, None if input_signature is not None: if i >= len(input_signature): raise ValueError( 'When <input_signature> is provided, ' 'only define arguments covered by it.\n' 'Got %d signature(s) and %d argument(s).' % (len(input_signature), self._function_spec.num_inputs)) shape = input_signature[i].shape dtype = input_signature[i].dtype inputs.append(Tensor(shape, dtype, name).constant()) with context.name_scope('${%d}' % id(self)), eager_context.graph_mode(): returns = nest.flatten(self._python_function(*inputs)) outputs, dummies = [], [] for obj in returns: if isinstance(obj, Tensor): outputs.append(obj) else: dummies.append(obj) executables = [function_lib.create_function(outputs=outputs)] for obj in dummies: if isinstance(obj, optimizer.Optimizer): executables.append( function_lib.create_function(optimizer=obj)) self.inputs = inputs self.outputs = returns self.executables = executables # In this case, we have compiled executables. # Notify the backend to run directly. executables = self.executables inputs, kwargs = self.canonicalize_inputs(*args, **kwargs) current_ws = workspace.get_workspace() for input, value in zip(self.inputs, inputs): current_ws.feed_tensor(input, value) executables[0](return_outputs=False, **kwargs) [func(return_outputs=False) for func in executables[1:]] outputs = [] for output in self.outputs: if isinstance(output, Tensor): impl = current_ws.GetTensor(output.id) device = device_spec.DeviceSpec(*impl.device) outputs.append(EagerTensor(impl=impl, device=device)) else: outputs.append(output) return outputs[0] if len(outputs) == 1 else outputs
def get_value(self): """Return the value of implementation. Returns ------- numpy.ndarray The deep-copied value. """ return workspace.get_workspace().fetch_tensor(self)
def _init_set_name(self, name=None, zero_based=True): if name is None: self._name = workspace.get_workspace().unique_name( name=camel_to_snake(self.__class__.__name__), namespace='Object', zero_based=zero_based, ) else: if not valid_identifier(name): raise ValueError('<name> should be a legal identifier.') self._name = name
def _init_set_name(self, name, zero_based=True): """Set a name for sharing weights.""" if not name: self._name = workspace.get_workspace().unique_name( name=generic_utils.to_snake_case( self.__class__.__name__), namespace='Object', zero_based=zero_based, ) else: self._name = name
def __call__(self, bottom): name_scope = context.get_name_scope() current_ws = workspace.get_workspace() return [ TensorRef( name=current_ws.unique_name(name_scope + 'output', suffix=':{}'.format(i), namespace='Tensor'), shape=self.blob_shapes[i], dtype='float32', ).constant() for i in range(len(self.blob_shapes)) ]
def __init__(self, **kwargs): """Create a ``Optimizer``.""" self._name = workspace.get_workspace().create_handle('Optimizer') self._op_type = self.__class__.__name__ self._process_group = distributed.get_group() self._hyper = {} self._set_hyper('grad_scale', kwargs.pop('grad_scale', 1)) self._set_hyper('weight_decay', kwargs.pop('weight_decay', 0)) self._set_hyper('clip_norm', kwargs.pop('clip_norm', 0)) self._set_hyper('clip_value', kwargs.pop('clip_value', 0)) if kwargs: raise ValueError('Unexpected arguments: ' + ','.join(v for v in kwargs))