def make_stacked_gru(input_dim, hidden_dim, out_dim, layer_num): grus = [L.StatefulGRU(input_dim, hidden_dim)] grus.extend([ L.StatefulGRU(hidden_dim * 2, hidden_dim) for _ in range(layer_num - 2) ]) grus.append(L.StatefulGRU(hidden_dim * 2, out_dim)) return chainer.ChainList(*grus)
def __init__(self, n_units, n_vocab, n_turn, share=False, listener=False, speaker=False): if share: super(NaiveLanguage, self).__init__( definition=L.EmbedID(n_vocab, n_units), interpreter=L.StatefulGRU(n_units, n_units), decoder=L.StatefulGRU(n_units, n_units), ) elif listener: super(NaiveLanguage, self).__init__( definition=L.EmbedID(n_vocab, n_units), interpreter=L.StatefulGRU(n_units, n_units), ) elif speaker: super(NaiveLanguage, self).__init__( definition=L.EmbedID(n_vocab, n_units), decoder=L.StatefulGRU(n_units, n_units), ) else: print('choose language type. [share, listener, speaker]') exit() self.n_vocab = n_vocab self.n_units = n_units self.add_param('eos', (n_units,), dtype='f') self.eos.data[:] = 0 self.add_param('bos', (n_units,), dtype='f') self.bos.data[:] = 0
def __init__(self, n_layer, input_dim, n_units, dropout=0.2, cudnn=False): super(SMARNN, self).__init__(W_vp=L.Linear(input_dim, n_units), W_vpa=L.Linear(n_units, n_units), W_v=L.Linear(n_units, 1), W_f_gru=L.StatefulGRU(input_dim * 2, n_units), W_b_gru=L.StatefulGRU(input_dim * 2, n_units))
def __init__(self, n_vocab, n_units, train=True): super(RNNLMGRU, self).__init__( embed=L.EmbedID(n_vocab, n_units), l1=L.StatefulGRU(n_units, n_units), l2=L.StatefulGRU(n_units, n_units), l3=L.Linear(n_units, n_vocab), ) self.train = train
def __init__(self): super(Encoder_Decoder_Model, self).__init__() with self.init_scope(): self.hidden1 = L.Linear(4, 6) self.encoder = L.StatefulGRU(6, 2) self.hidden2 = L.Linear(4, 6) self.decoder = L.StatefulGRU(6, 2) self.output = L.Linear(2, 4)
def __init__(self, n_layer, input_dim, n_units, batch_size, dropout = 0.2, cudnn = False): super(MGRU, self).__init__( W_up=L.Linear(input_dim, n_units/2), W_vp=L.Linear(n_units/2, n_units/2), W_uq = L.Linear(input_dim, n_units/2), W_v = L.Linear(n_units/2,1), #W_g = L.Linear(input_dim * 2, input_dim * 2), W_gru_f = L.StatefulGRU(input_dim * 2, n_units/2), W_gru_b = L.StatefulGRU(input_dim * 2, n_units/2) ) self.batch_size = batch_size self.n_units = n_units
def __init__(self, n_units, n_vocab, agent_type='sender'): super(Language, self).__init__( meanings=L.EmbedID(n_vocab, n_units)) if agent_type == 'sender': self.add_link('decoder', L.StatefulGRU(n_units, n_units)) self.add_param('bos', (n_units,), dtype='f') self.bos.data[:] = 0 else: self.add_link('encoder', L.StatefulGRU(n_units, n_units)) self.n_vocab = n_vocab self.n_units = n_units
def __init__(self, n_layer, input_dim, n_units, dropout=0.2, cudnn=False): super(OutputLayer, self).__init__(Wp_h=L.Linear(input_dim, n_units), Wa_h=L.Linear(n_units, n_units), W_v=L.Linear(n_units, 1), W_vQVQ=L.Linear(100, 37), W_f_gru=L.StatefulGRU(input_dim, n_units))
def __init__(self, obs_size, layer_sz, act_sz): super(RNNAgent, self).__init__( ipt=L.StatefulGRU(obs_size, layer_sz), out=L.Linear(layer_sz, act_sz), ) self.noise_probability = 0.0 # probability to output noise self.action_size = act_sz
def __init__(self, x_sz, rand_sz, layer_sz, output_sz): super(GeneratorNet, self).__init__( ipt=L.StatefulGRU(x_sz + rand_sz, layer_sz), out=L.Linear(layer_sz, output_sz + 2), ) self.rand_sz = rand_sz self.act_size = x_sz self.spec = fitter.EnvSpec(4)
def __init__(self, n_layer, input_dim, n_units, batch_size, dropout = 0.2, cudnn = False): super(PointLayer, self).__init__( Wp_h = L.Linear(input_dim, n_units), Wa_h =L.Linear(n_units, n_units), W_v = L.Linear(input_dim,1), #W_vQVQ = L.Linear(input_dim, q_max_word), #W_rq = L.Linear(input_dim,n_units), W_f_gru = L.StatefulGRU(input_dim, n_units)) self.n_units = n_units self.batch_size = batch_size
def __init__(self, n_units, n_vocab): super(Language, self).__init__( definition=L.EmbedID(n_vocab, n_units), expression=L.Linear(n_units, n_vocab, nobias=True), interpreter=L.StatefulGRU(n_units, n_units), decoder=L.StatefulGRU(n_units, n_units), bn_first_interpreter=L.BatchNormalization(n_units, use_cudnn=False), bn_next_interpreter=L.BatchNormalization(n_units, use_cudnn=False), bn_first_expression=L.BatchNormalization(n_vocab, use_cudnn=False), bn_next_expression=L.BatchNormalization(n_vocab, use_cudnn=False), ) self.n_vocab = n_vocab self.n_units = n_units self.add_param('eos', (n_units, ), dtype='f') self.eos.data[:] = 0 self.add_param('bos', (n_units, ), dtype='f') self.bos.data[:] = 0
def __init__(self, n_layer, input_dim, n_units, batch_size, dropout=0.2, cudnn=False): super(GARNN, self).__init__(W_up=L.Linear(input_dim, n_units), W_vp=L.Linear(n_units, n_units), W_uq=L.Linear(input_dim, n_units), W_v=L.Linear(n_units, 1), W_g=L.Linear(input_dim * 2, input_dim * 2), W_gru=L.StatefulGRU(input_dim * 2, n_units)) self.batch_size = batch_size
def __init__(self, dim): super(GRULearner, self).__init__(gru0=L.StatefulGRU(dim, dim), )
def __init__(self, v, eos_id, k): super(Gru, self).__init__(embed=L.EmbedID(v, k), H=L.StatefulGRU(k, k), W=L.Linear(k, v)) self.eos_id = eos_id
def setUp(self): in_size, out_size = 10, 8 self.link = links.StatefulGRU(in_size, out_size) self.h = chainer.Variable( numpy.random.uniform(-1, 1, (3, out_size)).astype(numpy.float32))
def __init__(self): super(Tempture_NN, self).__init__() with self.init_scope(): self.c1 = L.StatefulGRU(1, 32) self.c2 = L.StatefulGRU(32, 32) self.c3 = L.StatefulGRU(32, 10)
def __init__(self, conf, name="mono"): self.name = name conf.check() wscale = 0.1 forward_lstm_attributes = {} forward_lstm_units = [(conf.ndim_char_embed, conf.lstm_hidden_units[0]) ] forward_lstm_units += zip(conf.lstm_hidden_units[:-1], conf.lstm_hidden_units[1:]) for i, (n_in, n_out) in enumerate(forward_lstm_units): if conf.rnn_type == "dsgu": forward_lstm_attributes["layer_%i" % i] = StatefulDSGU( n_in, n_out) elif conf.rnn_type == "lstm": if conf.lstm_apply_batchnorm: forward_lstm_attributes["layer_%i" % i] = BNLSTM( n_in, n_out) else: forward_lstm_attributes["layer_%i" % i] = L.LSTM( n_in, n_out) elif conf.rnn_type == "gru": forward_lstm_attributes["layer_%i" % i] = L.StatefulGRU( n_in, n_out) else: raise NotImplementedError() self.forward_lstm = StackedLSTM(**forward_lstm_attributes) self.forward_lstm.n_layers = len(forward_lstm_units) self.forward_lstm.apply_dropout = conf.lstm_apply_dropout backward_lstm_attributes = {} backward_lstm_units = [(conf.ndim_char_embed, conf.lstm_hidden_units[0])] backward_lstm_units += zip(conf.lstm_hidden_units[:-1], conf.lstm_hidden_units[1:]) for i, (n_in, n_out) in enumerate(backward_lstm_units): if conf.rnn_type == "dsgu": backward_lstm_attributes["layer_%i" % i] = StatefulDSGU( n_in, n_out) elif conf.rnn_type == "lstm": if conf.lstm_apply_batchnorm: backward_lstm_attributes["layer_%i" % i] = BNLSTM( n_in, n_out) else: backward_lstm_attributes["layer_%i" % i] = L.LSTM( n_in, n_out) elif conf.rnn_type == "gru": backward_lstm_attributes["layer_%i" % i] = L.StatefulGRU( n_in, n_out) else: raise NotImplementedError() self.backward_lstm = StackedLSTM(**backward_lstm_attributes) self.backward_lstm.n_layers = len(backward_lstm_units) self.backward_lstm.apply_dropout = conf.lstm_apply_dropout self.char_embed = L.EmbedID(conf.n_vocab, conf.ndim_char_embed, ignore_label=-1) self.f_ym = L.Linear(conf.lstm_hidden_units[-1], conf.ndim_m, nobias=True) self.f_um = L.Linear(conf.lstm_hidden_units[-1], conf.ndim_m, nobias=True) attention_fc_attributes = {} if len(conf.attention_fc_hidden_units) == 0: attention_fc_hidden_units = [(conf.ndim_m, 1)] else: attention_fc_hidden_units = [(conf.ndim_m, conf.attention_fc_hidden_units[0])] attention_fc_hidden_units += zip( conf.attention_fc_hidden_units[:-1], conf.attention_fc_hidden_units[1:]) attention_fc_hidden_units += [(conf.attention_fc_hidden_units[-1], 1)] for i, (n_in, n_out) in enumerate(attention_fc_hidden_units): attention_fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) self.attention_fc = FullyConnectedNetwork(**attention_fc_attributes) self.attention_fc.n_layers = len(attention_fc_hidden_units) self.attention_fc.hidden_activation_function = conf.attention_fc_hidden_activation_function self.attention_fc.output_activation_function = conf.attention_fc_output_activation_function self.attention_fc.apply_dropout = conf.attention_fc_apply_dropout self.f_rg = L.Linear(conf.lstm_hidden_units[-1], conf.ndim_g, nobias=True) self.f_ug = L.Linear(conf.lstm_hidden_units[-1], conf.ndim_g, nobias=True) reader_fc_attributes = {} if len(conf.reader_fc_hidden_units) == 0: reader_fc_hidden_units = [(conf.ndim_g, conf.n_vocab)] else: reader_fc_hidden_units = [(conf.ndim_g, conf.reader_fc_hidden_units[0])] reader_fc_hidden_units += zip(conf.reader_fc_hidden_units[:-1], conf.reader_fc_hidden_units[1:]) reader_fc_hidden_units += [(conf.reader_fc_hidden_units[-1], conf.n_vocab)] for i, (n_in, n_out) in enumerate(reader_fc_hidden_units): reader_fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) self.reader_fc = FullyConnectedNetwork(**reader_fc_attributes) self.reader_fc.n_layers = len(reader_fc_hidden_units) self.reader_fc.hidden_activation_function = conf.reader_fc_hidden_activation_function self.reader_fc.output_activation_function = conf.reader_fc_output_activation_function self.reader_fc.apply_dropout = conf.attention_fc_apply_dropout if conf.use_gpu: self.forward_lstm.to_gpu() self.backward_lstm.to_gpu() self.char_embed.to_gpu() self.attention_fc.to_gpu() self.reader_fc.to_gpu() self.f_ym.to_gpu() self.f_um.to_gpu() self.f_rg.to_gpu() self.f_ug.to_gpu() self.optimizer_char_embed = optimizers.Adam( alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_char_embed.setup(self.char_embed) self.optimizer_char_embed.add_hook(GradientClipping(10.0)) self.optimizer_forward_lstm = optimizers.Adam( alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_forward_lstm.setup(self.forward_lstm) self.optimizer_forward_lstm.add_hook(GradientClipping(10.0)) self.optimizer_backward_lstm = optimizers.Adam( alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_backward_lstm.setup(self.backward_lstm) self.optimizer_backward_lstm.add_hook(GradientClipping(10.0)) self.optimizer_f_um = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_f_um.setup(self.f_um) self.optimizer_f_um.add_hook(GradientClipping(10.0)) self.optimizer_f_ym = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_f_ym.setup(self.f_ym) self.optimizer_f_ym.add_hook(GradientClipping(10.0)) self.optimizer_attention_fc = optimizers.Adam( alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_attention_fc.setup(self.attention_fc) self.optimizer_attention_fc.add_hook(GradientClipping(10.0)) self.optimizer_f_rg = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_f_rg.setup(self.f_rg) self.optimizer_f_rg.add_hook(GradientClipping(10.0)) self.optimizer_f_ug = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_f_ug.setup(self.f_ug) self.optimizer_f_ug.add_hook(GradientClipping(10.0)) self.optimizer_reader_fc = optimizers.Adam( alpha=conf.learning_rate, beta1=conf.gradient_momentum) self.optimizer_reader_fc.setup(self.reader_fc) self.optimizer_reader_fc.add_hook(GradientClipping(10.0))
self.f = F.sigmoid(f) self.o = F.sigmoid(o) self.c = self.a * self.i + self.f * c self.h = self.o * F.tanh(self.c) return self.h if __name__ == "__main__": import numpy, chainer, chainer.functions as F from chainer import links as L x = chainer.Variable(numpy.ones((10, 10), dtype=numpy.float32)) # gru = TreeLSTM(lateral_init=1, upward_init=1,bias_init=0, forget_bias_init=0,in_size=10, out_size=10,children=2) gru = L.StatefulGRU(bias_init=0,inner_init=1,in_size=10, out_size=10) lstm = L.LSTM(lateral_init=1, upward_init=1, bias_init=0, forget_bias_init=0,in_size=10, out_size=10) y1 = gru(x)[1] y2 = lstm(x) y3 = F.sigmoid(x) y3 += F.sigmoid(x) print(y1.data) print(y2.data) print(y1.data.shape) print(y2.data.shape) print(numpy.allclose(y1.data,y2.data)) y3.backward()
def __init__(self, v, k): super(MyGRU, self).__init__( embed=L.EmbedID(v, k), H=L.StatefulGRU(k, k), W=L.Linear(k, v), )
def __init__(self, action_size, observation_size, layer_sz): super(DiscriminatorNet, self).__init__( ipt=L.StatefulGRU(action_size + observation_size + 2, layer_sz), # the first linear layer out=L.Linear(layer_sz, 1), # the feed-forward output layer )