def __init__(self, batch_size, input_size, hidden_size, num_layers, dropout, task='pre-train'): super(ELMoBiLM, self).__init__() self._num_layers = num_layers self._dropout = dropout self._task = task self._lstm_layers = [] for direction in ['forward', 'backward']: layers = [] for i in range(num_layers): lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, direction='forward', weight_hh_attr=paddle.ParamAttr( initializer=I.XavierUniform()), weight_ih_attr=paddle.ParamAttr( initializer=I.XavierUniform()), bias_hh_attr=False, bias_ih_attr=paddle.ParamAttr( initializer=I.Constant(value=0.0))) self.add_sublayer('{}_lstm_layer_{}'.format(direction, i), lstm) hidden_state = paddle.zeros(shape=[1, batch_size, hidden_size], dtype='float32') cell_state = paddle.zeros(shape=[1, batch_size, hidden_size], dtype='float32') layers.append({ 'lstm': lstm, 'hidden_state': hidden_state, 'cell_state': cell_state }) self._lstm_layers.append(layers) if dropout: self._dropout_layer = nn.Dropout(p=dropout)
def __init__(self, d_input, d_hidden, d_output, kernel_size, n_layers): super(CNNPostNet, self).__init__() self.convs = nn.LayerList() kernel_size = kernel_size if isinstance(kernel_size, ( tuple, list)) else (kernel_size, ) padding = (kernel_size[0] - 1, 0) for i in range(n_layers): c_in = d_input if i == 0 else d_hidden c_out = d_output if i == n_layers - 1 else d_hidden self.convs.append( Conv1dBatchNorm( c_in, c_out, kernel_size, weight_attr=I.XavierUniform(), padding=padding)) self.last_bn = nn.BatchNorm1D(d_output)
def test_xavier_uniform_initializer_conv(self): """Test Xavier initializer with uniform distribution on for convolutions. """ paddle.enable_static() program = framework.Program() block = program.global_block() for _ in range(2): param = block.create_parameter( dtype="float32", shape=[5, 10, 15, 20], lod_level=0, name="param", initializer=initializer.XavierUniform()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') receptive_field_size = float(15 * 20) limit = np.sqrt( 6.0 / ((param.shape[0] + param.shape[1]) * receptive_field_size)) self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0)