def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True): super(_BatchNorm, self).__init__() self.num_features = num_features self.eps = eps self.momentum = momentum self.affine = affine self.track_running_stats = track_running_stats if self.affine: self.weight = Parameter(Tensor(num_features)) self.bias = Parameter(Tensor(num_features)) else: self.register_buffer('weight', ones(num_features)) self.register_buffer('bias', zeros(num_features)) self.register_buffer('running_mean', zeros(num_features)) self.register_buffer('running_var', ones(num_features)) self.inputs = [ self.running_mean, self.running_var, self.weight, self.bias ] self.reset_parameters() self.register_op() self.op_metas = {'TRAIN': None, 'TEST': None}
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias): super(_ConvNd, self).__init__() if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.transposed = transposed self.output_padding = output_padding self.groups = groups if transposed: self.weight = Parameter( Tensor(in_channels, out_channels // groups, *kernel_size)) else: self.weight = Parameter( Tensor(out_channels, in_channels // groups, *kernel_size)) if bias: self.bias = Parameter(Tensor(out_channels)) else: self.bias = None self.reset_parameters() self.register_op()
def __init__( self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias, ): super(_DepthwiseConvNd, self).__init__() if in_channels != out_channels: raise ValueError('in/out channels must be same') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.weight = Parameter(Tensor(out_channels, 1, *kernel_size)) if bias: self.bias = Parameter(Tensor(out_channels)) else: self.bias = None self.reset_parameters() self.register_op()
def __init__(self, in_features, out_features, bias=True): super(Linear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(Tensor(out_features, in_features)) if bias: self.bias = Parameter(Tensor(out_features)) else: self.bias = None self.reset_parameters() self.register_op()
def __init__(self, num_features, group=32, eps=1e-5, affine=True): super(_GroupNorm, self).__init__() self.num_features = num_features self.group = group self.eps = eps self.affine = affine if self.affine: self.weight = Parameter(Tensor(num_features)) self.bias = Parameter(Tensor(num_features)) else: self.weight = self.bias = None self.inputs = [self.weight, self.bias] if self.affine else [] self.reset_parameters() self.register_op()
def _get_grad(self, param, accumulating=False): grad_name = param.name + ('_grad[acc]' if accumulating else '_grad') if dragon.workspace.HasTensor(grad_name): return Tensor(name=grad_name, own_storage=False, device=param.device) return None
def __init__(self, input_size, hidden_size, bias, num_chunks): super(RNNCellBase, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(Tensor(num_chunks * hidden_size, input_size)) self.weight_hh = Parameter( Tensor(num_chunks * hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(Tensor(num_chunks * hidden_size)) self.bias_hh = Parameter(Tensor(num_chunks * hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.reset_parameters()
def _masked_assign(output, mask, input): if not isinstance(input, Tensor): if isinstance(input, (tuple, list)): input = Tensor(input, dtype=output.dtype, device=output.device) else: input = WrapScalar(input, output.dtype, output.device) dev = MakeDevice(inputs=[input]) key = 'MaskedAssign/{}'.format(dev) module = get_module(MaskedAssign, key, dev) return module.forward(input, output, mask)
def _assign(output, starts, sizes, input): if not isinstance(input, Tensor): if isinstance(input, (tuple, list)): input = Tensor(input, dtype=output.dtype, device=output.device) else: input = WrapScalar(input, output.dtype, output.device) nstarts, nsizes = len(starts), len(sizes) dev = MakeDevice(inputs=[input]) key = 'Assign/{}/nstarts:{}/nsizes:{}'.format(dev, nstarts, nsizes) module = get_module(Assign, key, dev, nstarts=nstarts, nsizes=nsizes) return module.forward(input, output, starts, sizes)
def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. plan weights self._matrix_weights = [] self._bias_weights = [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_names = [ 'layer_{}/{}/{}'.format(layer, p, 'L' if direction == 0 else 'R') for p in ('matrix_ih', 'matrix_hh', 'bias_ih', 'bias_hh') ] w_ih = dg.Tensor(name=w_names[0], shape=[gate_size, layer_input_size]) w_hh = dg.Tensor(name=w_names[1], shape=[gate_size, self.hidden_size]) b_ih = dg.Tensor(name=w_names[2], shape=[ gate_size, ]) b_hh = dg.Tensor(name=w_names[3], shape=[ gate_size, ]) # W (0 ~ 3), R (4 ~ 7) self._matrix_weights.extend([w_ih, w_hh]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_weights.extend([b_ih, b_hh]) # 2. compute total number of parameters self._weights_count = 0 for w in self._matrix_weights + self._bias_weights: self._weights_count += np.prod(w.shape) # 3. register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. set the init flag self._init_params = False
def _run_update_ops(self, group): """Generate & Run UpdateOps. Parameters ---------- group : dict The param group. Returns ------- None """ # Collect params and grads params = [] grads = [] for p in group['params']: g_name = p.name + '_grad' if not dg.workspace.HasTensor(g_name): continue g = Tensor(dg_tensor=g_name) g._own_storage = False g._ctx = p._ctx params.append(p) grads.append(g) # Feed optimizer parameters to workspace self.feed_parameters(group) # Run a all-reduce op to accumulate grads if necessary _allreduce(grads) # Run regular update ops for p, g in zip(params, grads): _update(p, g, op_type=self._update_type, slot=group['slot'], lr_mult=group.get('lr_mult', 1.0), decay_mult=group.get('decay_mult', 1.0))
def _plan_params(self): if self.mode == 'lstm': gate_size = 4 * self.hidden_size elif self.mode == 'gru': gate_size = 3 * self.hidden_size else: gate_size = self.hidden_size # 1. Plan weights self._matrix_shape, self._bias_shape = [], [] for layer in range(self.num_layers): for direction in range(self.num_directions): layer_input_size = self.input_size if layer == 0 \ else self.hidden_size * self.num_directions w_ih_shape = [gate_size, layer_input_size] w_hh_shape = [gate_size, self.hidden_size] b_ih_shape, b_hh_shape = [gate_size], [gate_size] # W (0 ~ 3), R (4 ~ 7) self._matrix_shape.extend([w_ih_shape, w_hh_shape]) # Bw (0 ~ 3), Br (4 ~ 7) self._bias_shape.extend([b_ih_shape, b_hh_shape]) # 2. Compute total number of parameters self._weights_count = 0 for shape in self._matrix_shape + self._bias_shape: self._weights_count += numpy.prod(shape) # 3. Register the packed weights self.weights = Parameter(Tensor(int(self._weights_count))) # 4. Create the initialization grids if self.mode == 'lstm': num_params_per_layer = 8 elif self.mode == 'gru': num_params_per_layer = 6 else: num_params_per_layer = 2 self._matrix_init_grids = [[[ 'orthogonal' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] self._bias_init_grids = [[[ 'zero' for _ in range(num_params_per_layer) ] for _ in range(self.num_directions)] for _ in range(self.num_layers)] # 5. Set the init flag self._init_params = False