Пример #1
0
 def __init__(self,
              num_features,
              eps=1e-5,
              momentum=0.1,
              affine=True,
              track_running_stats=True):
     super(_BatchNorm, self).__init__()
     self.num_features = num_features
     self.eps = eps
     self.momentum = momentum
     self.affine = affine
     self.track_running_stats = track_running_stats
     if self.affine:
         self.weight = Parameter(Tensor(num_features))
         self.bias = Parameter(Tensor(num_features))
     else:
         self.register_buffer('weight', ones(num_features))
         self.register_buffer('bias', zeros(num_features))
     self.register_buffer('running_mean', zeros(num_features))
     self.register_buffer('running_var', ones(num_features))
     self.inputs = [
         self.running_mean, self.running_var, self.weight, self.bias
     ]
     self.reset_parameters()
     self.register_op()
     self.op_metas = {'TRAIN': None, 'TEST': None}
Пример #2
0
 def __init__(self, in_channels, out_channels, kernel_size, stride, padding,
              dilation, transposed, output_padding, groups, bias):
     super(_ConvNd, self).__init__()
     if in_channels % groups != 0:
         raise ValueError('in_channels must be divisible by groups')
     if out_channels % groups != 0:
         raise ValueError('out_channels must be divisible by groups')
     self.in_channels = in_channels
     self.out_channels = out_channels
     self.kernel_size = kernel_size
     self.stride = stride
     self.padding = padding
     self.dilation = dilation
     self.transposed = transposed
     self.output_padding = output_padding
     self.groups = groups
     if transposed:
         self.weight = Parameter(
             Tensor(in_channels, out_channels // groups, *kernel_size))
     else:
         self.weight = Parameter(
             Tensor(out_channels, in_channels // groups, *kernel_size))
     if bias:
         self.bias = Parameter(Tensor(out_channels))
     else:
         self.bias = None
     self.reset_parameters()
     self.register_op()
Пример #3
0
 def __init__(
     self,
     in_channels,
     out_channels,
     kernel_size,
     stride,
     padding,
     dilation,
     bias,
 ):
     super(_DepthwiseConvNd, self).__init__()
     if in_channels != out_channels:
         raise ValueError('in/out channels must be same')
     self.in_channels = in_channels
     self.out_channels = out_channels
     self.kernel_size = kernel_size
     self.stride = stride
     self.padding = padding
     self.dilation = dilation
     self.weight = Parameter(Tensor(out_channels, 1, *kernel_size))
     if bias:
         self.bias = Parameter(Tensor(out_channels))
     else:
         self.bias = None
     self.reset_parameters()
     self.register_op()
Пример #4
0
 def __init__(self, in_features, out_features, bias=True):
     super(Linear, self).__init__()
     self.in_features = in_features
     self.out_features = out_features
     self.weight = Parameter(Tensor(out_features, in_features))
     if bias:
         self.bias = Parameter(Tensor(out_features))
     else:
         self.bias = None
     self.reset_parameters()
     self.register_op()
Пример #5
0
 def __init__(self, num_features, group=32, eps=1e-5, affine=True):
     super(_GroupNorm, self).__init__()
     self.num_features = num_features
     self.group = group
     self.eps = eps
     self.affine = affine
     if self.affine:
         self.weight = Parameter(Tensor(num_features))
         self.bias = Parameter(Tensor(num_features))
     else:
         self.weight = self.bias = None
     self.inputs = [self.weight, self.bias] if self.affine else []
     self.reset_parameters()
     self.register_op()
Пример #6
0
 def _get_grad(self, param, accumulating=False):
     grad_name = param.name + ('_grad[acc]' if accumulating else '_grad')
     if dragon.workspace.HasTensor(grad_name):
         return Tensor(name=grad_name,
                       own_storage=False,
                       device=param.device)
     return None
Пример #7
0
 def __init__(self, input_size, hidden_size, bias, num_chunks):
     super(RNNCellBase, self).__init__()
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.bias = bias
     self.weight_ih = Parameter(Tensor(num_chunks * hidden_size,
                                       input_size))
     self.weight_hh = Parameter(
         Tensor(num_chunks * hidden_size, hidden_size))
     if bias:
         self.bias_ih = Parameter(Tensor(num_chunks * hidden_size))
         self.bias_hh = Parameter(Tensor(num_chunks * hidden_size))
     else:
         self.register_parameter('bias_ih', None)
         self.register_parameter('bias_hh', None)
     self.reset_parameters()
Пример #8
0
def _masked_assign(output, mask, input):
    if not isinstance(input, Tensor):
        if isinstance(input, (tuple, list)):
            input = Tensor(input, dtype=output.dtype, device=output.device)
        else:
            input = WrapScalar(input, output.dtype, output.device)
    dev = MakeDevice(inputs=[input])
    key = 'MaskedAssign/{}'.format(dev)
    module = get_module(MaskedAssign, key, dev)
    return module.forward(input, output, mask)
Пример #9
0
def _assign(output, starts, sizes, input):
    if not isinstance(input, Tensor):
        if isinstance(input, (tuple, list)):
            input = Tensor(input, dtype=output.dtype, device=output.device)
        else:
            input = WrapScalar(input, output.dtype, output.device)
    nstarts, nsizes = len(starts), len(sizes)
    dev = MakeDevice(inputs=[input])
    key = 'Assign/{}/nstarts:{}/nsizes:{}'.format(dev, nstarts, nsizes)
    module = get_module(Assign, key, dev, nstarts=nstarts, nsizes=nsizes)
    return module.forward(input, output, starts, sizes)
Пример #10
0
    def _plan_params(self):
        if self.mode == 'lstm': gate_size = 4 * self.hidden_size
        elif self.mode == 'gru': gate_size = 3 * self.hidden_size
        else: gate_size = self.hidden_size
        # 1. plan weights
        self._matrix_weights = []
        self._bias_weights = []
        for layer in range(self.num_layers):
            for direction in range(self.num_directions):
                layer_input_size = self.input_size if layer == 0 \
                    else self.hidden_size * self.num_directions
                w_names = [
                    'layer_{}/{}/{}'.format(layer, p,
                                            'L' if direction == 0 else 'R')
                    for p in ('matrix_ih', 'matrix_hh', 'bias_ih', 'bias_hh')
                ]
                w_ih = dg.Tensor(name=w_names[0],
                                 shape=[gate_size, layer_input_size])
                w_hh = dg.Tensor(name=w_names[1],
                                 shape=[gate_size, self.hidden_size])
                b_ih = dg.Tensor(name=w_names[2], shape=[
                    gate_size,
                ])
                b_hh = dg.Tensor(name=w_names[3], shape=[
                    gate_size,
                ])
                # W (0 ~ 3), R (4 ~ 7)
                self._matrix_weights.extend([w_ih, w_hh])
                # Bw (0 ~ 3), Br (4 ~ 7)
                self._bias_weights.extend([b_ih, b_hh])

        # 2. compute total number of parameters
        self._weights_count = 0
        for w in self._matrix_weights + self._bias_weights:
            self._weights_count += np.prod(w.shape)

        # 3. register the packed weights
        self.weights = Parameter(Tensor(int(self._weights_count)))

        # 4. create the initialization grids
        if self.mode == 'lstm': num_params_per_layer = 8
        elif self.mode == 'gru': num_params_per_layer = 6
        else: num_params_per_layer = 2
        self._matrix_init_grids = [[[
            'orthogonal' for _ in range(num_params_per_layer)
        ] for _ in range(self.num_directions)] for _ in range(self.num_layers)]
        self._bias_init_grids = [[[
            'zero' for _ in range(num_params_per_layer)
        ] for _ in range(self.num_directions)] for _ in range(self.num_layers)]

        # 5. set the init flag
        self._init_params = False
Пример #11
0
    def _run_update_ops(self, group):
        """Generate & Run UpdateOps.

        Parameters
        ----------
        group : dict
            The param group.

        Returns
        -------
        None

        """
        # Collect params and grads
        params = []
        grads = []
        for p in group['params']:
            g_name = p.name + '_grad'
            if not dg.workspace.HasTensor(g_name): continue
            g = Tensor(dg_tensor=g_name)
            g._own_storage = False
            g._ctx = p._ctx
            params.append(p)
            grads.append(g)

        # Feed optimizer parameters to workspace
        self.feed_parameters(group)

        # Run a all-reduce op to accumulate grads if necessary
        _allreduce(grads)

        # Run regular update ops
        for p, g in zip(params, grads):
            _update(p,
                    g,
                    op_type=self._update_type,
                    slot=group['slot'],
                    lr_mult=group.get('lr_mult', 1.0),
                    decay_mult=group.get('decay_mult', 1.0))
Пример #12
0
    def _plan_params(self):
        if self.mode == 'lstm': gate_size = 4 * self.hidden_size
        elif self.mode == 'gru': gate_size = 3 * self.hidden_size
        else: gate_size = self.hidden_size
        # 1. Plan weights
        self._matrix_shape, self._bias_shape = [], []
        for layer in range(self.num_layers):
            for direction in range(self.num_directions):
                layer_input_size = self.input_size if layer == 0 \
                    else self.hidden_size * self.num_directions
                w_ih_shape = [gate_size, layer_input_size]
                w_hh_shape = [gate_size, self.hidden_size]
                b_ih_shape, b_hh_shape = [gate_size], [gate_size]
                # W (0 ~ 3), R (4 ~ 7)
                self._matrix_shape.extend([w_ih_shape, w_hh_shape])
                # Bw (0 ~ 3), Br (4 ~ 7)
                self._bias_shape.extend([b_ih_shape, b_hh_shape])

        # 2. Compute total number of parameters
        self._weights_count = 0
        for shape in self._matrix_shape + self._bias_shape:
            self._weights_count += numpy.prod(shape)

        # 3. Register the packed weights
        self.weights = Parameter(Tensor(int(self._weights_count)))

        # 4. Create the initialization grids
        if self.mode == 'lstm': num_params_per_layer = 8
        elif self.mode == 'gru': num_params_per_layer = 6
        else: num_params_per_layer = 2
        self._matrix_init_grids = [[[
            'orthogonal' for _ in range(num_params_per_layer)
        ] for _ in range(self.num_directions)] for _ in range(self.num_layers)]
        self._bias_init_grids = [[[
            'zero' for _ in range(num_params_per_layer)
        ] for _ in range(self.num_directions)] for _ in range(self.num_layers)]

        # 5. Set the init flag
        self._init_params = False