示例#1
0
def init_weights(net, init_type='normal', init_gain=0.02):
    """
    Initialize network weights.

    Args:
        net (layers.Layer): Network to be initialized
        init_type (str): The name of an initialization method: normal | xavier.
        init_gain (float): Gain factor for normal and xavier.

    """
    for _, layer in net.cells_and_names():
        if isinstance(layer, (layers.Conv2d, layers.Conv2dTranspose)):
            if init_type == 'normal':
                layer.weight.set_data(
                    initializer(Normal(init_gain), layer.weight.shape))
            elif init_type == 'xavier':
                layer.weight.set_data(
                    initializer(XavierUniform(init_gain), layer.weight.shape))
            elif init_type == 'constant':
                layer.weight.set_data(initializer(0.001, layer.weight.shape))
            else:
                raise NotImplementedError(
                    'initialization method [%s] is not implemented' %
                    init_type)
        elif isinstance(layer, layers.BatchNorm2d):
            layer.gamma.set_data(initializer('ones', layer.gamma.shape))
            layer.beta.set_data(initializer('zeros', layer.beta.shape))
示例#2
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0.0,
                 bidirectional=False):
        super(StackLSTM, self).__init__()
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.transpose = P.Transpose()

        # direction number
        num_directions = 2 if bidirectional else 1

        # input_size list
        input_size_list = [input_size]
        for i in range(num_layers - 1):
            input_size_list.append(hidden_size * num_directions)

        # layers
        layers = []
        for i in range(num_layers):
            layers.append(
                layers.LSTMCell(input_size=input_size_list[i],
                                hidden_size=hidden_size,
                                has_bias=has_bias,
                                batch_first=batch_first,
                                bidirectional=bidirectional,
                                dropout=dropout))

        # weights
        weights = []
        for i in range(num_layers):
            # weight size
            weight_size = (input_size_list[i] +
                           hidden_size) * num_directions * hidden_size * 4
            if has_bias:
                bias_size = num_directions * hidden_size * 4
                weight_size = weight_size + bias_size

            # numpy weight
            stdv = 1 / math.sqrt(hidden_size)
            w_np = np.random.uniform(-stdv, stdv,
                                     (weight_size, 1, 1)).astype(np.float32)

            # lstm weight
            weights.append(
                Parameter(initializer(Tensor(w_np), w_np.shape),
                          name="weight" + str(i)))

        #
        self.lstm = layers
        self.weight = ParameterTuple(tuple(weights))
示例#3
0
def init_net_param(network, initialize_mode='TruncatedNormal'):
    """Init the parameters in net."""
    params = network.trainable_params()
    for p in params:
        if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
            if initialize_mode == 'TruncatedNormal':
                p.set_data(
                    initializer(TruncatedNormal(0.02), p.data.shape,
                                p.data.dtype))
            else:
                p.set_data(initialize_mode, p.data.shape, p.data.dtype)
示例#4
0
    def __init__(self,
                 embedding_size,
                 embedding_shape,
                 use_relative_positions=False,
                 use_token_type=False,
                 token_type_vocab_size=16,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 max_position_embeddings=512,
                 dropout_prob=0.1):
        super(EmbeddingPostprocessor, self).__init__()
        self.use_token_type = use_token_type
        self.token_type_vocab_size = token_type_vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.max_position_embeddings = max_position_embeddings
        self.embedding_table = Parameter(initializer
                                         (TruncatedNormal(initializer_range),
                                          [token_type_vocab_size,
                                           embedding_size]),
                                         name='embedding_table')

        self.shape_flat = (-1,)
        self.one_hot = layers.OneHot()
        self.on_value = Tensor(1.0, ts.float32)
        self.off_value = Tensor(0.1, ts.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.shape = tuple(embedding_shape)
        self.layernorm = layers.LayerNorm((embedding_size,))
        self.dropout = layers.Dropout(1 - dropout_prob)
        self.gather = P.Gather()
        self.use_relative_positions = use_relative_positions
        self.slice = P.StridedSlice()
        self.full_position_embeddings = Parameter(initializer
                                                  (TruncatedNormal(initializer_range),
                                                   [max_position_embeddings,
                                                    embedding_size]),
                                                  name='full_position_embeddings')
示例#5
0
 def __init__(self,
              vocab_size,
              embedding_size,
              embedding_shape,
              use_one_hot_embeddings=False,
              initializer_range=0.02):
     super(EmbeddingLookup, self).__init__()
     self.vocab_size = vocab_size
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.embedding_table = Parameter(initializer
                                      (TruncatedNormal(initializer_range),
                                       [vocab_size, embedding_size]))
     self.expand = P.ExpandDims()
     self.shape_flat = (-1,)
     self.gather = P.Gather()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, ts.float32)
     self.off_value = Tensor(0.0, ts.float32)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.shape = tuple(embedding_shape)
示例#6
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]))

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = layers.OneHot(depth=self.vocab_size)
        self.shape = P.Shape()
        self.gather = P.Gather()  # index_select
        self.matmul = P.BatchMatMul()
示例#7
0
    def __init__(self, network, optimizer, scale_update_layer=None):

        super(BertFinetuneLayer, self).__init__(auto_prefix=False)
        self.network = network
        self.network.set_grad()
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.optimizer.global_step = Parameter(initializer(0., [
            1,
        ]),
                                               name='global_step')
        self.grad = P.GradOperation(get_by_list=True, sens_param=True)
        self.allreduce = P.AllReduce()
        self.grad_reducer = None
        self.cast = P.Cast()
        self.gpu_target = False
        if context.get_context("device_target") == "GPU":
            self.gpu_target = True
            self.float_status = P.FloatStatus()
            self.addn = P.AddN()
            self.reshape = P.Reshape()
        else:
            self.alloc_status = P.NPUAllocFloatStatus()
            self.get_status = P.NPUGetFloatStatus()
            self.clear_before_grad = P.NPUClearFloatStatus()
        self.reduce_sum = P.ReduceSum(keep_dims=False)
        self.depend_parameter_use = P.Depend()
        self.base = Tensor(1, ts.float32)
        self.less_equal = P.LessEqual()
        self.hyper_map = P.HyperMap()
        self.loss_scale = None
        self.loss_scaling_manager = scale_update_layer
        if scale_update_layer:
            self.loss_scale = Parameter(Tensor(
                scale_update_layer.get_loss_scale(), dtype=ts.float32),
                                        name="loss_scale")