示例#1
0
 def __init__(self, n_cell, size_hidden, rate_dropout):
     super(ONT_BiGRU, self).__init__()
     self.rate_dropout = rate_dropout
     with self.init_scope():
         self.rnn_a = L.NStepBiGRU(n_cell, 300, size_hidden, rate_dropout)
         self.rnn_b = L.NStepBiGRU(n_cell, 300, size_hidden, rate_dropout)
         self.l1 = L.Linear(size_hidden * 4, size_hidden * 4)
         self.l2 = L.Linear(size_hidden * 4, 4)
示例#2
0
 def __init__(self):
     super().__init__()
     with self.init_scope():
         self.seq_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT)
         self.att_linear = L.Linear(4 * EMBED, EMBED)
         self.att_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT)
         self.att_score = L.Linear(2 * EMBED, 1)
         self.state_linear = L.Linear(4 * EMBED, EMBED)
 def __init__(self, emb_size, hidden_size, use_dropout=0.1, flag_gpu=True):
     """
     emb_size:入力される分散表現ベクトル次元数
     hidden_size:隠れ層次元数
     use_dropout:float値.どの程度ドロップアウト使うか
     """
     w = chainer.initializers.HeNormal()
     super(SentenceEncoderBiGRU, self).__init__(
         # word_embed=L.EmbedID(n_vocab, emb_size, ignore_label=-1),
         word_embed=L.Linear(emb_size, hidden_size, initialW=w),
         bi_gru=L.NStepBiGRU(n_layers=1,
                             in_size=hidden_size,
                             out_size=hidden_size,
                             dropout=use_dropout))
     """
     n_layers:層数
     in_size:入力ベクトルの次元数
     out_size:出力ベクトルの次元数
     drop_out:dropout率
     """
     self.USE_DROPOUT = use_dropout
     self.USE_DROPOUT_keep = use_dropout
     self.FLAG_GPU = flag_gpu
     # GPUを使う場合はcupyを使わないときはnumpyを使う
     if flag_gpu:
         self.ARR = cupy
         cupy.cuda.Device(0).use()
     else:
         self.ARR = np
示例#4
0
    def __init__(self,
                 idim,
                 elayers,
                 cdim,
                 hdim,
                 subsample,
                 dropout,
                 typ="lstm"):
        super(BRNNP, self).__init__()
        with self.init_scope():
            for i in six.moves.range(elayers):
                if i == 0:
                    inputdim = idim
                else:
                    inputdim = hdim
                setattr(
                    self, "birnn%d" % i,
                    L.NStepBiLSTM(1, inputdim, cdim, dropout) if typ == "lstm"
                    else L.NStepBiGRU(1, inputdim, cdim, dropout))
                # bottleneck layer to merge
                setattr(self, "bt%d" % i, L.Linear(2 * cdim, hdim))

        self.elayers = elayers
        self.cdim = cdim
        self.subsample = subsample
        self.typ = typ
示例#5
0
 def __init__(self, n_layers, in_size, hidden_dims, use_dropout):
     super(GaussianEncoder, self).__init__()
     with self.init_scope():
         self.gru = L.NStepBiGRU(n_layers=n_layers,
                                 in_size=in_size,
                                 out_size=hidden_dims,
                                 dropout=use_dropout)
示例#6
0
 def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn, initialW = None):
     super(BiRNN, self).__init__(
         embed=L.EmbedID(n_vocab, 50, initialW = initialW, ignore_label  = 0),
         l1=L.NStepBiGRU(n_layer, 50, 50,
                        dropout)
         #l2=L.Linear(n_units/2, 10),
     )
示例#7
0
    def setUp(self):
        shape = (self.n_layer * 2, len(self.lengths), self.out_size)
        if self.hidden_none:
            self.h = numpy.zeros(shape, 'f')
        else:
            self.h = numpy.random.uniform(-1, 1, shape).astype('f')
        self.xs = [
            numpy.random.uniform(-1, 1, (l, self.in_size)).astype('f')
            for l in self.lengths
        ]

        self.gh = numpy.random.uniform(-1, 1, shape).astype('f')
        self.gys = [
            numpy.random.uniform(-1, 1, (l, self.out_size * 2)).astype('f')
            for l in self.lengths
        ]
        self.rnn = links.NStepBiGRU(self.n_layer,
                                    self.in_size,
                                    self.out_size,
                                    self.dropout,
                                    use_cudnn=self.use_cudnn)

        for layer in self.rnn:
            for p in layer.params():
                p.data[...] = numpy.random.uniform(-1, 1, p.data.shape)
        self.rnn.zerograds()
示例#8
0
    def __init__(self, n_embed, n_unit, n_att_unit, n_att_head, dr_hideen,
                 dr_input, pre_vec, gpu_flag):
        initializer1 = chainer.initializers.Uniform()
        initializer2 = chainer.initializers.HeNormal()

        super(GRU_Encoder, self).__init__()
        with self.init_scope():
            self.gru = L.NStepBiGRU(1, n_embed, n_unit, dr_hideen)
            self.decode = L.Linear(None, n_embed, initialW=initializer1)
            self.att_w1 = L.Linear(n_unit * 2,
                                   n_att_unit,
                                   nobias=True,
                                   initialW=initializer1)
            self.att_w2 = L.Linear(n_att_unit,
                                   n_att_head,
                                   nobias=True,
                                   initialW=initializer1)

            for w in self.gru.namedparams():
                name = w[0].split('/')[2]
                if 'w' in name:
                    shape = w[1].shape
                    w[1].initializer = initializer2
                    w[1].initialize(shape)

        self.embed = np.copy(pre_vec)
        self.gpu_flag = gpu_flag
        self.dr_input = dr_input
        self.n_att_head = n_att_head
        self.tmp_weight = None
示例#9
0
 def __init__(self, n_layer, n_units, n_hidden, dropout, cudnn, initialW = None):
     super(BiRNN_1, self).__init__(
         l1=L.NStepBiGRU(n_layer,n_units, n_hidden,
                        dropout)
     )
     if cudnn == False:
         import numpy as xp
     else:
         import cupy as xp
 def __init__(self,
              n_layer,
              n_units,
              n_hidden,
              dropout,
              cudnn,
              initialW=None):
     super(BiRNN_1, self).__init__(
         l1=L.NStepBiGRU(n_layer, n_units, n_hidden, dropout))
示例#11
0
 def __init__(self, idim, elayers, cdim, hdim, dropout, typ="lstm"):
     super(BRNN, self).__init__()
     with self.init_scope():
         self.nbrnn = L.NStepBiLSTM(
             elayers, idim, cdim,
             dropout) if typ == "lstm" else L.NStepBiGRU(
                 elayers, idim, cdim, dropout)
         self.l_last = L.Linear(cdim * 2, hdim)
     self.typ = typ
示例#12
0
 def __init__(self, n_layers, in_size, out_size, dropout):
     super(BiGRUAggregator, self).__init__()
     with self.init_scope():
         self.bigru_layer = links.NStepBiGRU(n_layers, in_size, out_size,
                                             dropout)
         self.out_layer = GraphLinear(2 * out_size, out_size)
     self.n_layers = n_layers
     self.in_size = in_size
     self.out_size = out_size
     self.dropout = dropout
示例#13
0
 def __init__(self):
     super(CRNN, self).__init__()
     with self.init_scope():
         self.conv1 = L.Convolution2D(3, 32, 3, pad=1)
         self.conv2 = L.Convolution2D(32, 32, 3, pad=1)
         self.conv3 = L.Convolution2D(32, 32, 3, pad=1)
         self.conv4 = L.Convolution2D(32, 64, 3, pad=1)
         self.conv5 = L.Convolution2D(64, 128, 3, pad=1)
         self.conv6 = L.Convolution2D(128, 128, 3, pad=1)
         self.rnn = L.NStepBiGRU(2, in_size=512, out_size=512, dropout=0.2)
         self.embedding = L.Linear(512 * 2, 63)
示例#14
0
    def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units,
                 type_unit, word_dropout, denoising_rate, direc, attr,
                 loss_type):
        super(Seq2seq, self).__init__()
        with self.init_scope():
            self.embed_x = L.EmbedID(n_source_vocab, n_units)
            self.embed_y = L.EmbedID(n_target_vocab, n_units)
            #self.attention = Attention(n_units)
            if type_unit == 'lstm':
                if direc == 'uni':
                    self.encoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1)
                    self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1)
                elif direc == 'bi':
                    self.encoder = L.NStepBiLSTM(n_layers, n_units, n_units,
                                                 0.1)
                    self.decoder = L.NStepBiLSTM(n_layers, n_units, n_units,
                                                 0.1)
            elif type_unit == 'gru':
                if direc == 'uni':
                    self.encoder = L.NStepGRU(n_layers, n_units, n_units, 0.1)
                    self.decoder = L.NStepGRU(n_layers, n_units, n_units, 0.1)
                elif direc == 'bi':
                    self.encoder = L.NStepBiGRU(n_layers, n_units, n_units,
                                                0.1)
                    self.decoder = L.NStepBiGRU(n_layers, n_units, n_units,
                                                0.1)
            if direc == 'uni':
                self.W = L.Linear(n_units, n_target_vocab)
            elif direc == 'bi':
                self.W = L.Linear(2 * n_units, n_target_vocab)
            if attr:
                self.Wc = L.Linear(2 * n_units, n_units)

        self.n_layers = n_layers
        self.n_units = n_units
        self.type_unit = type_unit
        self.word_dropout = word_dropout
        self.denoising_rate = denoising_rate
        self.attr = attr
        self.loss_type = loss_type
示例#15
0
 def __init__(self, in_size, bank_k, proj_filters1, proj_filters2):
     super(CBHG, self).__init__()
     with self.init_scope():
         self.conv1d_banks = [
             Conv1DwithBatchNorm(in_size, 128, i + 1) for i in range(bank_k)
         ]
         self.conv1d_proj1 = Conv1DwithBatchNorm(128, proj_filters1, 3)
         self.conv1d_proj2 = Conv1DwithBatchNorm(proj_filters1,
                                                 proj_filters2, 3)
         self.highways = [
             L.Highway(proj_filters2) for i in range(4)
         ]  # The parameters of the original paper are probably wrong.
         self.gru = L.NStepBiGRU(1, proj_filters2, 128, dropout=0)
示例#16
0
 def __init__(self, inv_examples):
     super().__init__()
     self.add_persistent('inv_examples', inv_examples)  # (T, I, 1+L+1)
     # Create model parameters
     with self.init_scope():
         self.embed = L.EmbedID(VOCAB, EMBED, ignore_label=0)
         self.task_embed = L.EmbedID(TASKS, EMBED)
         self.vmap_params = C.Parameter(0.0, (inv_examples.shape[:2]) +
                                        (VOCAB, ),
                                        name='vmap_params')
         self.uni_birnn = L.NStepBiGRU(1, EMBED, EMBED, 0)
         self.uni_linear = L.Linear(EMBED * 2, EMBED, nobias=True)
         self.l1 = L.Linear(LENGTH * EMBED + TASKS, EMBED * 2)
         self.l2 = L.Linear(EMBED * 2, EMBED)
         self.l3 = L.Linear(EMBED, EMBED)
     self.log = None
示例#17
0
 def __init__(self, vocab_size, character_embed_size, embed_size,
              hidden_size, batch_size, use_dropout, initial_embedding):
     super(EncoderGRU, self).__init__(
         word_embed=L.EmbedID(vocab_size,
                              character_embed_size,
                              initialW=initial_embedding,
                              ignore_label=-1),
         bi_gru=L.NStepBiGRU(n_layers=1,
                             in_size=character_embed_size,
                             out_size=hidden_size,
                             dropout=use_dropout),
         h_e=L.Linear(hidden_size * 2, hidden_size * 2),
         e_o=L.Linear(hidden_size * 2, embed_size),
     )
     self.hidden_size = hidden_size
     self.batch_size = batch_size
     self.use_dropout = use_dropout
示例#18
0
def construct_RNN(unit_type, bidirection, n_layers, n_input, n_units, dropout):
    rnn = None
    if unit_type == 'lstm':
        if bidirection:
            rnn = L.NStepBiLSTM(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepLSTM(n_layers, n_input, n_units, dropout)
    elif unit_type == 'gru':
        if bidirection:
            rnn = L.NStepBiGRU(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepGRU(n_layers, n_input, n_units, dropout)
    else:
        if bidirection:
            rnn = L.NStepBiRNNTanh(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepRNNTanh(n_layers, n_input, n_units, dropout)

    print('# RNN unit: {}, dropout={}'.format(rnn, rnn.__dict__['dropout']),
          file=sys.stderr)
    for i, c in enumerate(rnn._children):
        print('#   {}-th param'.format(i), file=sys.stderr)
        print('#      0 - W={}, b={}'.format(c.w0.shape, c.b0.shape),
              file=sys.stderr)
        print('#      1 - W={}, b={}'.format(c.w1.shape, c.b1.shape),
              file=sys.stderr)

        if unit_type == 'gru' or unit_type == 'lstm':
            print('#      2 - W={}, b={}'.format(c.w2.shape, c.b2.shape),
                  file=sys.stderr)
            print('#      3 - W={}, b={}'.format(c.w3.shape, c.b3.shape),
                  file=sys.stderr)
            print('#      4 - W={}, b={}'.format(c.w4.shape, c.b4.shape),
                  file=sys.stderr)
            print('#      5 - W={}, b={}'.format(c.w5.shape, c.b5.shape),
                  file=sys.stderr)

        if unit_type == 'lstm':
            print('#      6 - W={}, b={}'.format(c.w6.shape, c.b6.shape),
                  file=sys.stderr)
            print('#      7 - W={}, b={}'.format(c.w7.shape, c.b7.shape),
                  file=sys.stderr)

    return rnn
示例#19
0
    def __init__(self, n_voc, emb_dim, hid_dim, seq_len, gpu_num, dropout=0.2):
        super(Discriminator, self).__init__()
        self.hid_dim = hid_dim
        self.emb_dim = emb_dim
        self.seq_len = seq_len
        self.gpu_num = gpu_num
        self.dropout = dropout

        w = I.Normal(1.)
        with self.init_scope():
            self.embeddings = L.EmbedID(n_voc, emb_dim, initialW=w)
            self.gru = L.NStepBiGRU(2, emb_dim, hid_dim, dropout)
            self.gru2hidden = L.Linear(2 * 2 * hid_dim,
                                       hid_dim,
                                       initialW=w,
                                       initial_bias=I.Zero())
            self.dropout_linear = F.dropout
            self.hidden2out = L.Linear(hid_dim,
                                       1,
                                       initialW=w,
                                       initial_bias=I.Zero())
示例#20
0
 def __init__(self, rule_stories):
     super().__init__()
     # Setup rule repo
     rvctx, rvq, rva, rsupps = vectorise_stories(
         rule_stories)  # (R, Ls, L), (R, Q), (R, A), (R, I)
     self.add_persistent('rvctx', rvctx)
     self.add_persistent('rvq', rvq)
     self.add_persistent('rva', rva)
     self.add_persistent('rsupps', rsupps)
     # Create model parameters
     with self.init_scope():
         self.embed = L.EmbedID(len(word2idx), EMBED, ignore_label=0)
         # self.rulegen = RuleGen()
         self.vmap_params = C.Parameter(0.0, (rvq.shape[0], len(word2idx)),
                                        name='vmap_params')  # (R, V)
         self.mematt = MemAttention()
         self.uni_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT)
         self.uni_linear = L.Linear(EMBED, EMBED, nobias=True)
         self.rule_linear = L.Linear(EMBED, EMBED, nobias=True)
         self.answer_linear = L.Linear(EMBED, len(word2idx))
     self.log = None
示例#21
0
    def __init__(
        self,
        out_dim,
        hidden_dim=16,
        n_layers=4,
        n_atom_types=MAX_ATOMIC_NUM,
        concat_hidden=False,
        layer_aggregator=None,
        dropout_rate=0.0,
        batch_normalization=False,
        weight_tying=True,
        use_attention=False,
        update_attention=False,
        attention_tying=True,
        context=False,
        context_layers=1,
        context_dropout=0.,
        message_function='matrix_multiply',
        edge_hidden_dim=16,
        readout_function='graph_level',
        num_timesteps=3,
        num_output_hidden_layers=0,
        output_hidden_dim=16,
        output_activation=functions.relu,
        output_atoms=False,
    ):
        super(GGNN, self).__init__()
        n_readout_layer = n_layers if concat_hidden else 1
        n_message_layer = 1 if weight_tying else n_layers
        n_attention_layer = 1 if attention_tying else n_layers
        self.n_readout_layer = n_readout_layer
        self.n_message_layer = n_message_layer
        self.n_attention_layer = n_attention_layer
        self.out_dim = out_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.concat_hidden = concat_hidden
        self.layer_aggregator = layer_aggregator
        self.dropout_rate = dropout_rate
        self.batch_normalization = batch_normalization
        self.weight_tying = weight_tying
        self.use_attention = use_attention
        self.update_attention = update_attention
        self.attention_tying = attention_tying
        self.context = context
        self.context_layers = context_layers
        self.context_dropout = context_dropout
        self.message_functinon = message_function
        self.edge_hidden_dim = edge_hidden_dim
        self.readout_function = readout_function
        self.num_timesteps = num_timesteps
        self.num_output_hidden_layers = num_output_hidden_layers
        self.output_hidden_dim = output_hidden_dim
        self.output_activation = output_activation
        self.output_atoms = output_atoms

        with self.init_scope():
            # Update
            self.embed = EmbedAtomID(out_size=hidden_dim, in_size=n_atom_types)

            self.message_layers = chainer.ChainList(*[
                GraphLinear(hidden_dim, self.NUM_EDGE_TYPE * hidden_dim)
                for _ in range(n_message_layer)
            ])

            if self.message_functinon == 'edge_network':
                del self.message_layers
                self.message_layers = chainer.ChainList(*[
                    EdgeNetwork(in_dim=self.NUM_EDGE_TYPE,
                                hidden_dim=self.edge_hidden_dim,
                                node_dim=self.hidden_dim)
                    for _ in range(n_message_layer)
                ])

            if self.context:
                self.context_bilstm = links.NStepBiLSTM(
                    n_layers=self.context_layers,
                    in_size=self.hidden_dim,
                    out_size=self.hidden_dim / 2,
                    dropout=context_dropout)

            # self-attention layer
            if use_attention or update_attention:
                # these commented layers are written for GAT impelmented by TensorFlow.
                # self.linear_transform_layer = chainer.ChainList(
                #     *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=hidden_dim, ksize=1, nobias=True)
                #         for _ in range(n_attention_layer)]
                # )
                # self.conv1d_layer_1 = chainer.ChainList(
                #     *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=1, ksize=1)
                #         for _ in range(n_attention_layer)]
                # )
                # self.conv1d_layer_2 = chainer.ChainList(
                #     *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=1, ksize=1)
                #       for _ in range(n_attention_layer)]
                # )
                self.linear_transform_layer = chainer.ChainList(*[
                    links.Linear(
                        in_size=hidden_dim, out_size=hidden_dim, nobias=True)
                    for _ in range(n_attention_layer)
                ])
                self.neural_network_layer = chainer.ChainList(*[
                    links.Linear(
                        in_size=2 * self.hidden_dim, out_size=1, nobias=True)
                    for _ in range(n_attention_layer)
                ])

            # batch normalization
            if batch_normalization:
                self.batch_normalization_layer = links.BatchNormalization(
                    size=hidden_dim)

            self.update_layer = links.GRU(2 * hidden_dim, hidden_dim)
            # Readout
            self.i_layers = chainer.ChainList(*[
                GraphLinear(2 * hidden_dim, out_dim)
                for _ in range(n_readout_layer)
            ])
            self.j_layers = chainer.ChainList(*[
                GraphLinear(hidden_dim, out_dim)
                for _ in range(n_readout_layer)
            ])

            if self.readout_function == 'set2vec':
                del self.i_layers, self.j_layers
                # def __init__(self, node_dim, output_dim, num_timesteps=3, inner_prod='default',
                #   num_output_hidden_layers=0, output_hidden_dim=16, activation=chainer.functions.relu):
                self.readout_layer = chainer.ChainList(*[
                    Set2Vec(node_dim=self.hidden_dim * 2,
                            output_dim=out_dim,
                            num_timesteps=num_timesteps,
                            num_output_hidden_layers=num_output_hidden_layers,
                            output_hidden_dim=output_hidden_dim,
                            activation=output_activation)
                    for _ in range(n_readout_layer)
                ])

            if self.layer_aggregator:
                self.construct_layer_aggregator()

                if self.layer_aggregator == 'gru-attn' or 'gru':
                    self.bigru_layer = links.NStepBiGRU(
                        n_layers=1,
                        in_size=self.hidden_dim,
                        out_size=self.hidden_dim,
                        dropout=0.)
                if self.layer_aggregator == 'lstm-attn' or 'lstm':
                    self.bilstm_layer = links.NStepBiLSTM(
                        n_layers=1,
                        in_size=self.hidden_dim,
                        out_size=self.hidden_dim,
                        dropout=0.)
                if self.layer_aggregator == 'gru-attn' or 'lstm-attn' or 'attn':
                    self.attn_dense_layer = links.Linear(
                        in_size=self.n_layers, out_size=self.n_layers)
                if self.layer_aggregator == 'self-attn':
                    self.attn_linear_layer = links.Linear(
                        in_size=self.n_layers, out_size=self.n_layers)

        if self.output_atoms:
            self.atoms = None
示例#22
0
 def __init__(self, n_layer, in_size, n_units, out_size, dropout=0.5):
     super(BiGRU, self).__init__()
     with self.init_scope():
         self.embed = L.EmbedID(in_size, n_units)
         self.l1 = L.NStepBiGRU(n_layer, n_units, n_units, dropout)
         self.l2 = L.Linear(n_units * 2, out_size)
示例#23
0
    def __init__(self,
                 input_size,
                 rnn_type,
                 bidirectional,
                 num_units,
                 num_proj,
                 num_layers,
                 dropout_input,
                 dropout_hidden,
                 subsample_list=[],
                 subsample_type='drop',
                 use_cuda=False,
                 merge_bidirectional=False,
                 num_stack=1,
                 splice=1,
                 input_channel=1,
                 conv_channels=[],
                 conv_kernel_sizes=[],
                 conv_strides=[],
                 poolings=[],
                 activation='relu',
                 batch_norm=False,
                 residual=False,
                 dense_residual=False,
                 num_layers_sub=0):

        super(RNNEncoder, self).__init__()

        if len(subsample_list) > 0 and len(subsample_list) != num_layers:
            raise ValueError(
                'subsample_list must be the same size as num_layers.')
        if subsample_type not in ['drop', 'concat']:
            raise TypeError('subsample_type must be "drop" or "concat".')
        if num_layers_sub < 0 or (num_layers_sub > 1
                                  and num_layers < num_layers_sub):
            raise ValueError('Set num_layers_sub between 1 to num_layers.')

        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        self.num_units = num_units
        self.num_proj = num_proj if num_proj is not None else 0
        self.num_layers = num_layers
        self.dropout_input = dropout_input
        self.dropout_hidden = dropout_hidden
        self.merge_bidirectional = merge_bidirectional
        self.use_cuda = use_cuda

        # TODO: self.clip_activation = clip_activation

        # Setting for hierarchical encoder
        self.num_layers_sub = num_layers_sub

        # Setting for subsampling
        if len(subsample_list) == 0:
            self.subsample_list = [False] * num_layers
        else:
            self.subsample_list = subsample_list
        self.subsample_type = subsample_type
        # This implementation is bases on
        # https://arxiv.org/abs/1508.01211
        #     Chan, William, et al. "Listen, attend and spell."
        #         arXiv preprint arXiv:1508.01211 (2015).

        # Setting for residual connection
        assert not (residual and dense_residual)
        self.residual = residual
        self.dense_residual = dense_residual
        subsample_last_layer = 0
        for l_reverse, is_subsample in enumerate(subsample_list[::-1]):
            if is_subsample:
                subsample_last_layer = num_layers - l_reverse
                break
        self.residual_start_layer = subsample_last_layer + 1
        # NOTE: residual connection starts from the last subsampling layer

        with self.init_scope():
            # Setting for CNNs before RNNs# Setting for CNNs before RNNs
            if len(conv_channels) > 0 and len(conv_channels) == len(
                    conv_kernel_sizes) and len(conv_kernel_sizes) == len(
                        conv_strides):
                assert num_stack == 1 and splice == 1
                self.conv = CNNEncoder(input_size,
                                       input_channel=input_channel,
                                       conv_channels=conv_channels,
                                       conv_kernel_sizes=conv_kernel_sizes,
                                       conv_strides=conv_strides,
                                       poolings=poolings,
                                       dropout_input=0,
                                       dropout_hidden=dropout_hidden,
                                       activation=activation,
                                       use_cuda=use_cuda,
                                       batch_norm=batch_norm)
                input_size = self.conv.output_size
            else:
                input_size = input_size * splice * num_stack
                self.conv = None

            self.rnns = []
            self.projections = []
            for l in range(num_layers):
                if l == 0:
                    encoder_input_size = input_size
                elif self.num_proj > 0:
                    encoder_input_size = num_proj
                    if subsample_type == 'concat' and l > 0 and self.subsample_list[
                            l - 1]:
                        encoder_input_size *= 2
                else:
                    encoder_input_size = num_units * self.num_directions
                    if subsample_type == 'concat' and l > 0 and self.subsample_list[
                            l - 1]:
                        encoder_input_size *= 2

                if rnn_type == 'lstm':
                    if bidirectional:
                        rnn_i = L.NStepBiLSTM(n_layers=1,
                                              in_size=encoder_input_size,
                                              out_size=num_units,
                                              dropout=0)
                    else:
                        rnn_i = L.NStepLSTM(n_layers=1,
                                            in_size=encoder_input_size,
                                            out_size=num_units,
                                            dropout=0)

                elif rnn_type == 'gru':
                    if bidirectional:
                        rnn_i = L.NStepBiGRU(n_layers=1,
                                             in_size=encoder_input_size,
                                             out_size=num_units,
                                             dropout=0)
                    else:
                        rnn_i = L.NStepGRU(n_layers=1,
                                           in_size=encoder_input_size,
                                           out_size=num_units,
                                           dropout=0)

                elif rnn_type == 'rnn':
                    if bidirectional:
                        # rnn_i = L.NStepBiRNNReLU(
                        rnn_i = L.NStepBiRNNTanh(n_layers=1,
                                                 in_size=encoder_input_size,
                                                 out_size=num_units,
                                                 dropout=0)
                    else:
                        # rnn_i = L.NStepRNNReLU(
                        rnn_i = L.NStepRNNTanh(n_layers=1,
                                               in_size=encoder_input_size,
                                               out_size=num_units,
                                               dropout=0)
                else:
                    raise ValueError(
                        'rnn_type must be "lstm" or "gru" or "rnn".')

                if use_cuda:
                    rnn_i.to_gpu()
                setattr(self, rnn_type + '_l' + str(l), rnn_i)

                if l != self.num_layers - 1 and self.num_proj > 0:
                    proj_i = LinearND(num_units * self.num_directions,
                                      num_proj,
                                      dropout=dropout_hidden,
                                      use_cuda=use_cuda)

                    if use_cuda:
                        proj_i.to_gpu()
                    setattr(self, 'proj_l' + str(l), proj_i)
示例#24
0
 def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn):
     super(BiRNN, self).__init__(
         embed=L.EmbedID(n_vocab, n_units, ignore_label=0),
         l1=L.NStepBiGRU(n_layer, n_units, n_units, dropout),
         l2=L.Linear(n_units, 10),
     )