def init_weights(net, init_type='normal', init_gain=0.02): """ Initialize network weights. Args: net (layers.Layer): Network to be initialized init_type (str): The name of an initialization method: normal | xavier. init_gain (float): Gain factor for normal and xavier. """ for _, layer in net.cells_and_names(): if isinstance(layer, (layers.Conv2d, layers.Conv2dTranspose)): if init_type == 'normal': layer.weight.set_data( initializer(Normal(init_gain), layer.weight.shape)) elif init_type == 'xavier': layer.weight.set_data( initializer(XavierUniform(init_gain), layer.weight.shape)) elif init_type == 'constant': layer.weight.set_data(initializer(0.001, layer.weight.shape)) else: raise NotImplementedError( 'initialization method [%s] is not implemented' % init_type) elif isinstance(layer, layers.BatchNorm2d): layer.gamma.set_data(initializer('ones', layer.gamma.shape)) layer.beta.set_data(initializer('zeros', layer.beta.shape))
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0.0, bidirectional=False): super(StackLSTM, self).__init__() self.num_layers = num_layers self.batch_first = batch_first self.transpose = P.Transpose() # direction number num_directions = 2 if bidirectional else 1 # input_size list input_size_list = [input_size] for i in range(num_layers - 1): input_size_list.append(hidden_size * num_directions) # layers layers = [] for i in range(num_layers): layers.append( layers.LSTMCell(input_size=input_size_list[i], hidden_size=hidden_size, has_bias=has_bias, batch_first=batch_first, bidirectional=bidirectional, dropout=dropout)) # weights weights = [] for i in range(num_layers): # weight size weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4 if has_bias: bias_size = num_directions * hidden_size * 4 weight_size = weight_size + bias_size # numpy weight stdv = 1 / math.sqrt(hidden_size) w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) # lstm weight weights.append( Parameter(initializer(Tensor(w_np), w_np.shape), name="weight" + str(i))) # self.lstm = layers self.weight = ParameterTuple(tuple(weights))
def init_net_param(network, initialize_mode='TruncatedNormal'): """Init the parameters in net.""" params = network.trainable_params() for p in params: if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: if initialize_mode == 'TruncatedNormal': p.set_data( initializer(TruncatedNormal(0.02), p.data.shape, p.data.dtype)) else: p.set_data(initialize_mode, p.data.shape, p.data.dtype)
def __init__(self, embedding_size, embedding_shape, use_relative_positions=False, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [token_type_vocab_size, embedding_size]), name='embedding_table') self.shape_flat = (-1,) self.one_hot = layers.OneHot() self.on_value = Tensor(1.0, ts.float32) self.off_value = Tensor(0.1, ts.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.layernorm = layers.LayerNorm((embedding_size,)) self.dropout = layers.Dropout(1 - dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() self.full_position_embeddings = Parameter(initializer (TruncatedNormal(initializer_range), [max_position_embeddings, embedding_size]), name='full_position_embeddings')
def __init__(self, vocab_size, embedding_size, embedding_shape, use_one_hot_embeddings=False, initializer_range=0.02): super(EmbeddingLookup, self).__init__() self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [vocab_size, embedding_size])) self.expand = P.ExpandDims() self.shape_flat = (-1,) self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, ts.float32) self.off_value = Tensor(0.0, ts.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape)
def __init__(self, length, depth, max_relative_position, initializer_range, use_one_hot_embeddings=False): super(RelaPosEmbeddingsGenerator, self).__init__() self.depth = depth self.vocab_size = max_relative_position * 2 + 1 self.use_one_hot_embeddings = use_one_hot_embeddings self.embeddings_table = Parameter( initializer(TruncatedNormal(initializer_range), [self.vocab_size, self.depth])) self.relative_positions_matrix = RelaPosMatrixGenerator(length=length, max_relative_position=max_relative_position) self.reshape = P.Reshape() self.one_hot = layers.OneHot(depth=self.vocab_size) self.shape = P.Shape() self.gather = P.Gather() # index_select self.matmul = P.BatchMatMul()
def __init__(self, network, optimizer, scale_update_layer=None): super(BertFinetuneLayer, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.optimizer.global_step = Parameter(initializer(0., [ 1, ]), name='global_step') self.grad = P.GradOperation(get_by_list=True, sens_param=True) self.allreduce = P.AllReduce() self.grad_reducer = None self.cast = P.Cast() self.gpu_target = False if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = P.FloatStatus() self.addn = P.AddN() self.reshape = P.Reshape() else: self.alloc_status = P.NPUAllocFloatStatus() self.get_status = P.NPUGetFloatStatus() self.clear_before_grad = P.NPUClearFloatStatus() self.reduce_sum = P.ReduceSum(keep_dims=False) self.depend_parameter_use = P.Depend() self.base = Tensor(1, ts.float32) self.less_equal = P.LessEqual() self.hyper_map = P.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_layer if scale_update_layer: self.loss_scale = Parameter(Tensor( scale_update_layer.get_loss_scale(), dtype=ts.float32), name="loss_scale")