Пример #1
0
def rnn(bptt, vocab_size, num_embed, nhid, num_layers, dropout, num_proj, batch_size):
    """ word embedding + LSTM Projected """
    state_names = []
    data = S.var('data')
    weight = S.var("encoder_weight", stype='row_sparse')
    embed = S.sparse.Embedding(data=data, weight=weight, input_dim=vocab_size,
                               output_dim=num_embed, name='embed', sparse_grad=True)
    states = []
    outputs = S.Dropout(embed, p=dropout)
    for i in range(num_layers):
        prefix = 'lstmp%d_' % i
        init_h = S.var(prefix + 'init_h', shape=(batch_size, num_proj), init=mx.init.Zero())
        init_c = S.var(prefix + 'init_c', shape=(batch_size, nhid), init=mx.init.Zero())
        state_names += [prefix + 'init_h', prefix + 'init_c']
        lstmp = mx.gluon.contrib.rnn.LSTMPCell(nhid, num_proj)
        outputs, next_states = lstmp.unroll(bptt, outputs, begin_state=[init_h, init_c], \
                                            layout='NTC', merge_outputs=True)
        outputs = S.Dropout(outputs, p=dropout)
        states += [S.stop_gradient(s) for s in next_states]
    outputs = S.reshape(outputs, shape=(-1, num_proj))

    trainable_lstm_args = []
    for arg in outputs.list_arguments():
        if 'lstmp' in arg and 'init' not in arg:
            trainable_lstm_args.append(arg)
    return outputs, states, trainable_lstm_args, state_names
Пример #2
0
def getsymbol(num_classes=136):
    # define alexnet
    data = mxy.Variable(name="data")
    label = mxy.Variable(name="label")

    # group 1
    conv1_1 = mxy.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mxy.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    pool1 = mxy.Pooling(data=relu1_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1")

    # group 2
    conv2_1 = mxy.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mxy.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    pool2 = mxy.Pooling(data=relu2_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2")

    # group 3
    conv3_1 = mxy.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mxy.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mxy.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mxy.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    pool3 = mxy.Pooling(data=relu3_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3")

    # group 4
    conv4_1 = mxy.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mxy.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mxy.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mxy.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    pool4 = mxy.Pooling(data=relu4_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4")

    # group 5
    conv5_1 = mxy.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
    relu5_1 = mxy.Activation(data=conv5_1, act_type="relu", name="relu5_1")
    conv5_2 = mxy.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
    relu5_2 = mxy.Activation(data=conv5_2, act_type="relu", name="conv1_2")
    pool5 = mxy.Pooling(data=relu5_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5")

    # group 6
    flatten = mxy.Flatten(data=pool5, name="flatten")
    fc6 = mxy.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
    relu6 = mxy.Activation(data=fc6, act_type="relu", name="relu6")
    drop6 = mxy.Dropout(data=relu6, p=0.5, name="drop6")

    # group 7
    fc7 = mxy.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
    relu7 = mxy.Activation(data=fc7, act_type="relu", name="relu7")
    drop7 = mxy.Dropout(data=relu7, p=0.5, name="drop7")

    # output
    fc8 = mxy.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
    loc_loss = mxy.LinearRegressionOutput(data=fc8, label=label, name="loc_loss")

    #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=(fc8 - label), scalar=1.0)
    #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=fc8, scalar=1.0)
    #loc_loss = mx.sym.MakeLoss(name='loc_loss', data=loc_loss_)

    return loc_loss
Пример #3
0
    def __call__(self, inputs, states):
        cell, p_outputs, p_states = self.base_cell, self.zoneout_outputs, self.zoneout_states
        next_output, next_states = cell(inputs, states)
        mask = lambda p, like: symbol.Dropout(symbol.ones_like(like), p=p)

        prev_output = self.prev_output if self.prev_output else symbol.zeros((0, 0))

        output = (symbol.where(mask(p_outputs, next_output), next_output, prev_output)
                  if p_outputs != 0. else next_output)
        states = ([symbol.where(mask(p_states, new_s), new_s, old_s) for new_s, old_s in
                   zip(next_states, states)] if p_states != 0. else next_states)

        self.prev_output = output

        return output, states
Пример #4
0
 def add_forward(self, data: sym.Variable):
     """Add neural network model."""
     conv1 = sym.Convolution(
         data, name='conv1', num_filter=64, kernel=(3, 3), stride=(2, 2))
     relu1 = sym.Activation(conv1, act_type='relu')
     pool1 = sym.Pooling(relu1, pool_type='max', kernel=(3, 3), stride=(2, 2))
     fire2 = self._fire_layer('fire2', pool1, s1x1=16, e1x1=64, e3x3=64)
     fire3 = self._fire_layer('fire3', fire2, s1x1=16, e1x1=64, e3x3=64)
     pool3 = sym.Pooling(fire3, name='pool3', kernel=(3, 3), stride=(2, 2), pool_type='max')
     fire4 = self._fire_layer('fire4', pool3, s1x1=32, e1x1=128, e3x3=128)
     fire5 = self._fire_layer('fire5', fire4, s1x1=32, e1x1=128, e3x3=128)
     pool5 = sym.Pooling(fire5, name='pool5', kernel=(3, 3), stride=(2, 2), pool_type='max')
     fire6 = self._fire_layer('fire6', pool5, s1x1=48, e1x1=192, e3x3=192)
     fire7 = self._fire_layer('fire7', fire6, s1x1=48, e1x1=192, e3x3=192)
     fire8 = self._fire_layer('fire8', fire7, s1x1=64, e1x1=256, e3x3=256)
     fire9 = self._fire_layer('fire9', fire8, s1x1=64, e1x1=256, e3x3=256)
     fire10 = self._fire_layer('fire10', fire9, s1x1=96, e1x1=384, e3x3=384)
     fire11 = self._fire_layer('fire11', fire10, s1x1=96, e1x1=384, e3x3=384)
     dropout11 = sym.Dropout(fire11, p=0.1, name='drop11')
     return sym.Convolution(
         dropout11, name='conv12', num_filter=NUM_OUT_CHANNELS,
         kernel=(3, 3), stride=(1, 1), pad=(1, 1))
Пример #5
0
 def __call__(self, inputs, states):
     if self.dropout > 0:
         inputs = symbol.Dropout(data=inputs, p=self.dropout)
     return inputs, states