def rnn(bptt, vocab_size, num_embed, nhid, num_layers, dropout, num_proj, batch_size): """ word embedding + LSTM Projected """ state_names = [] data = S.var('data') weight = S.var("encoder_weight", stype='row_sparse') embed = S.sparse.Embedding(data=data, weight=weight, input_dim=vocab_size, output_dim=num_embed, name='embed', sparse_grad=True) states = [] outputs = S.Dropout(embed, p=dropout) for i in range(num_layers): prefix = 'lstmp%d_' % i init_h = S.var(prefix + 'init_h', shape=(batch_size, num_proj), init=mx.init.Zero()) init_c = S.var(prefix + 'init_c', shape=(batch_size, nhid), init=mx.init.Zero()) state_names += [prefix + 'init_h', prefix + 'init_c'] lstmp = mx.gluon.contrib.rnn.LSTMPCell(nhid, num_proj) outputs, next_states = lstmp.unroll(bptt, outputs, begin_state=[init_h, init_c], \ layout='NTC', merge_outputs=True) outputs = S.Dropout(outputs, p=dropout) states += [S.stop_gradient(s) for s in next_states] outputs = S.reshape(outputs, shape=(-1, num_proj)) trainable_lstm_args = [] for arg in outputs.list_arguments(): if 'lstmp' in arg and 'init' not in arg: trainable_lstm_args.append(arg) return outputs, states, trainable_lstm_args, state_names
def getsymbol(num_classes=136): # define alexnet data = mxy.Variable(name="data") label = mxy.Variable(name="label") # group 1 conv1_1 = mxy.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mxy.Activation(data=conv1_1, act_type="relu", name="relu1_1") pool1 = mxy.Pooling(data=relu1_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1") # group 2 conv2_1 = mxy.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mxy.Activation(data=conv2_1, act_type="relu", name="relu2_1") pool2 = mxy.Pooling(data=relu2_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2") # group 3 conv3_1 = mxy.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mxy.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mxy.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mxy.Activation(data=conv3_2, act_type="relu", name="relu3_2") pool3 = mxy.Pooling(data=relu3_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3") # group 4 conv4_1 = mxy.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mxy.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mxy.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mxy.Activation(data=conv4_2, act_type="relu", name="relu4_2") pool4 = mxy.Pooling(data=relu4_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4") # group 5 conv5_1 = mxy.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mxy.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mxy.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mxy.Activation(data=conv5_2, act_type="relu", name="conv1_2") pool5 = mxy.Pooling(data=relu5_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5") # group 6 flatten = mxy.Flatten(data=pool5, name="flatten") fc6 = mxy.FullyConnected(data=flatten, num_hidden=4096, name="fc6") relu6 = mxy.Activation(data=fc6, act_type="relu", name="relu6") drop6 = mxy.Dropout(data=relu6, p=0.5, name="drop6") # group 7 fc7 = mxy.FullyConnected(data=drop6, num_hidden=4096, name="fc7") relu7 = mxy.Activation(data=fc7, act_type="relu", name="relu7") drop7 = mxy.Dropout(data=relu7, p=0.5, name="drop7") # output fc8 = mxy.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8") loc_loss = mxy.LinearRegressionOutput(data=fc8, label=label, name="loc_loss") #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=(fc8 - label), scalar=1.0) #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=fc8, scalar=1.0) #loc_loss = mx.sym.MakeLoss(name='loc_loss', data=loc_loss_) return loc_loss
def __call__(self, inputs, states): cell, p_outputs, p_states = self.base_cell, self.zoneout_outputs, self.zoneout_states next_output, next_states = cell(inputs, states) mask = lambda p, like: symbol.Dropout(symbol.ones_like(like), p=p) prev_output = self.prev_output if self.prev_output else symbol.zeros((0, 0)) output = (symbol.where(mask(p_outputs, next_output), next_output, prev_output) if p_outputs != 0. else next_output) states = ([symbol.where(mask(p_states, new_s), new_s, old_s) for new_s, old_s in zip(next_states, states)] if p_states != 0. else next_states) self.prev_output = output return output, states
def add_forward(self, data: sym.Variable): """Add neural network model.""" conv1 = sym.Convolution( data, name='conv1', num_filter=64, kernel=(3, 3), stride=(2, 2)) relu1 = sym.Activation(conv1, act_type='relu') pool1 = sym.Pooling(relu1, pool_type='max', kernel=(3, 3), stride=(2, 2)) fire2 = self._fire_layer('fire2', pool1, s1x1=16, e1x1=64, e3x3=64) fire3 = self._fire_layer('fire3', fire2, s1x1=16, e1x1=64, e3x3=64) pool3 = sym.Pooling(fire3, name='pool3', kernel=(3, 3), stride=(2, 2), pool_type='max') fire4 = self._fire_layer('fire4', pool3, s1x1=32, e1x1=128, e3x3=128) fire5 = self._fire_layer('fire5', fire4, s1x1=32, e1x1=128, e3x3=128) pool5 = sym.Pooling(fire5, name='pool5', kernel=(3, 3), stride=(2, 2), pool_type='max') fire6 = self._fire_layer('fire6', pool5, s1x1=48, e1x1=192, e3x3=192) fire7 = self._fire_layer('fire7', fire6, s1x1=48, e1x1=192, e3x3=192) fire8 = self._fire_layer('fire8', fire7, s1x1=64, e1x1=256, e3x3=256) fire9 = self._fire_layer('fire9', fire8, s1x1=64, e1x1=256, e3x3=256) fire10 = self._fire_layer('fire10', fire9, s1x1=96, e1x1=384, e3x3=384) fire11 = self._fire_layer('fire11', fire10, s1x1=96, e1x1=384, e3x3=384) dropout11 = sym.Dropout(fire11, p=0.1, name='drop11') return sym.Convolution( dropout11, name='conv12', num_filter=NUM_OUT_CHANNELS, kernel=(3, 3), stride=(1, 1), pad=(1, 1))
def __call__(self, inputs, states): if self.dropout > 0: inputs = symbol.Dropout(data=inputs, p=self.dropout) return inputs, states