def train_network(self, num_total_classes, num_neg_samples, sampler, custom_dist, is_sparse): input = fluid.layers.data(name="input", shape=[10], dtype="float32") label = fluid.layers.data(name="label", shape=[1], dtype="int64") w_param = fluid.default_main_program().global_block().create_parameter( shape=[num_total_classes, 10], dtype='float32', name='nce_w', initializer=initializer.ConstantInitializer()) b_param = fluid.default_main_program().global_block().create_parameter( shape=[num_total_classes, 1], dtype='float32', name='nce_b', initializer=initializer.ConstantInitializer()) cost = fluid.layers.nce(input=input, label=label, num_total_classes=num_total_classes, sampler=sampler, custom_dist=custom_dist, sample_weight=None, param_attr='nce_w', bias_attr='nce_b', seed=1, num_neg_samples=num_neg_samples, is_sparse=is_sparse) avg_cost = fluid.layers.mean(cost) # optimizer optimizer = self.get_optimizer() optimizer.minimize(avg_cost) return [avg_cost, [input, label]]
def __init__(self, attention_dim, input_dim, position_encoding_weight=1., position_rate=1., reduction_factor=1, has_bias=False, bias_dim=0, keep_prob=1.): super(AttentionBlock, self).__init__() # positional encoding omega_default = position_rate / reduction_factor self.omega_default = omega_default # multispeaker case if has_bias: std = np.sqrt(1.0 / bias_dim) initializer = I.NormalInitializer(loc=0., scale=std) self.q_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer) self.k_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer) self.omega_initial = self.create_parameter( shape=[1], attr=I.ConstantInitializer(value=omega_default)) # mind the fact that q, k, v have the same feature dimension # so we can init k_affine and q_affine's weight as the same matrix # to get a better init attention init_weight = np.random.normal(size=(input_dim, attention_dim), scale=np.sqrt(1. / input_dim)) initializer = I.NumpyArrayInitializer(init_weight.astype(np.float32)) # 3 affine transformation to project q, k, v into attention_dim q_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.q_affine = weight_norm(q_affine, dim=-1) k_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.k_affine = weight_norm(k_affine, dim=-1) std = np.sqrt(1.0 / input_dim) initializer = I.NormalInitializer(loc=0., scale=std) v_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.v_affine = weight_norm(v_affine, dim=-1) std = np.sqrt(1.0 / attention_dim) initializer = I.NormalInitializer(loc=0., scale=std) out_affine = dg.Linear(attention_dim, input_dim, param_attr=initializer) self.out_affine = weight_norm(out_affine, dim=-1) self.keep_prob = keep_prob self.has_bias = has_bias self.bias_dim = bias_dim self.attention_dim = attention_dim self.position_encoding_weight = position_encoding_weight
def test_constant_initializer_default_value(self): """Test the constant initializer with default value """ program = framework.Program() block = program.global_block() block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="param", initializer=initializer.ConstantInitializer()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') self.assertAlmostEqual(init_op.attr('value'), 0.0, delta=DELTA)
def __init__(self, input_size, hidden_size, num_layers=1, dropout=0): super(BiLSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.dropout = dropout self.f_cells = dygraph.LayerList() self.b_cells = dygraph.LayerList() for _ in range(self.num_layers): self.f_cells.append( rnn.BasicLSTMUnit( input_size=input_size, hidden_size=hidden_size, param_attr=initializer.Xavier(uniform=False), bias_attr=initializer.ConstantInitializer(value=0.0))) self.b_cells.append( rnn.BasicLSTMUnit( input_size=input_size, hidden_size=hidden_size, param_attr=initializer.Xavier(uniform=False), bias_attr=initializer.ConstantInitializer(value=0.0))) input_size = hidden_size * 2
def test_constant_initializer(self): """Test constant initializer with supplied value """ program = framework.Program() block = program.global_block() for _ in range(2): block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="param", initializer=initializer.ConstantInitializer(2.3)) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') self.assertAlmostEqual(init_op.attr('value'), 2.3, delta=DELTA)