def param_init_gru(options, params, prefix='gru', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] # embedding to gates transformation weights, biases W = np.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32') # recurrent transformation weights for gates U = np.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # embedding to hidden state proposal weights, biases Wx = norm_weight(nin, dim) params[_p(prefix, 'Wx')] = Wx params[_p(prefix, 'bx')] = np.zeros((dim,)).astype('float32') # recurrent transformation weights for hidden state proposal Ux = ortho_weight(dim) params[_p(prefix, 'Ux')] = Ux return params
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None): """ Init the LSTM parameters """ assert (not nin is None and not dim is None) # input to hidden weights W = numpy.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[_p(prefix, 'W')] = W # hidden to hidden (recurrent) weights U = numpy.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U # biases b = numpy.zeros((4 * dim, )) params[_p(prefix, 'b')] = b.astype(theano.config.floatX) return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) The following equations defne GRU u = sig(x_t Wu + h_t-1 Uu + bu) r = sig(x_t Wr + h_t-1 Ur + br) h = tanh(x_t Wx + (s_t-1 . r) Ux + bx) s_t = (1 - u) . h + u . s_t-1 Below some of the parameters are initlaized together and later sliced W = [Wu Wr], i.e. the (horizontal) concatination of Wu and Wr b = [bu br] U = [Uu Ur] """ if nin == None: nin = options['dim_word'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate( [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix, 'Ux')] = Ux params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32') return params
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] W = numpy.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[prfx(prefix, 'W')] = W U = numpy.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[prfx(prefix, 'U')] = U params[prfx(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32') return params
def param_init_gru_rmn(params, prefix='gru_rmn', nin=None, dim=None, vocab_size=None, memory_dim=None, memory_size=None): assert dim == memory_dim, 'Should be fixed!' # first GRU params W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'Wx')] = norm_weight(nin, dim) params[_p(prefix, 'Ux')] = ortho_weight(dim) params[_p(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32') # memory block params params[_p(prefix, 'M')] = norm_weight(vocab_size, memory_dim) params[_p(prefix, 'C')] = norm_weight(vocab_size, memory_dim) params[_p(prefix, 'T')] = norm_weight(memory_size, memory_dim) # second GRU params params[_p(prefix, 'Wz')] = norm_weight(dim, memory_dim, ortho=False) params[_p(prefix, 'Wr')] = norm_weight(dim, memory_dim, ortho=False) params[_p(prefix, 'W2')] = norm_weight(dim, memory_dim, ortho=False) params[_p(prefix, 'Uz')] = ortho_weight(dim) params[_p(prefix, 'Ur')] = ortho_weight(dim) params[_p(prefix, 'U2')] = ortho_weight(dim) return params
def param_init_lstm_peep(options, params, prefix='lstm', nin=None, dim=None): """ Code based on http://deeplearning.net/tutorial/code/lstm.py and Jamie's GRU code Long Short Term Memory Unit (LSTM) LSTM is defined by the follow equations, W = [Wi Wf Wc Wo] # input weights b = [bi bf bc bo] # biases U = [Ui Uf Uc Uo] # recurrent weights Pi Pf Po c_t-1 # peep hole params and the previous cell, c_t-1 i_t = sig(Wi x_t + Ui h_t-1 + Pi c_t-1 + bi) f_t = sig(Wf x_t + Uf h_t-1 + Pf c_t-1 + bf) c_t = f_t c_t-1 + i_t tanh(Wc x_t + Uc h_t-1 + bc) o_t = sig(Wo x_t + Uo h_t-1 + Po c_t-1 + bo) h_t = o_t tanh(c_t) """ if nin == None: nin = options['dim_word'] if dim == None: dim = options['dim_proj'] # input weight matrix is 4 times for the input gate, forget gate, output gate, and cell input W = numpy.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32') # The recurrent weight matrix U = numpy.concatenate( [ ortho_weight(dim), ortho_weight(dim), # remember this is ortho_weight(dim, dim) ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U # Peep holes weight vectors, all initialized to zero # Peep hole weights are diagonal as in Grave's paper params[_p(prefix, 'Pi')] = numpy.zeros((dim, )).astype('float32') params[_p(prefix, 'Pf')] = numpy.zeros((dim, )).astype('float32') params[_p(prefix, 'Po')] = numpy.zeros((dim, )).astype('float32') # inital h_0, and cell get made in lstm_layer or passed in # initialize forget gates to one? return params
def param_init_gru(options, param, prefix='gru', nin=None, dim=None): param[prefix + '_W'] = numpy.concatenate( [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) param[prefix + '_U'] = numpy.concatenate( [ortho_weight(dim), ortho_weight(dim)], axis=1) param[prefix + '_b'] = zero_vector(2 * dim) param[prefix + '_Wx'] = norm_weight(nin, dim) param[prefix + '_Ux'] = ortho_weight(dim) param[prefix + '_bx'] = zero_vector(dim) return param
def param_init(options, params, nin, dim, dimctx, prefix='lstm_cond'): # input to LSTM W = np.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[_p(prefix, 'W')] = W # ctx to LSTM V = np.concatenate([ norm_weight(dimctx, dim), norm_weight(dimctx, dim), norm_weight(dimctx, dim), norm_weight(dimctx, dim) ], axis=1) params[_p(prefix, 'V')] = V # LSTM to LSTM U = np.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = np.zeros((4 * dim, )).astype('float32') return params
def param_init_lstm_cond(self, options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None): #nin=512 dim=512 dimctx=2048 if nin == None: nin = options['word_dim'] if dim == None: dim = options['lstm_dim'] if dimctx == None: dimctx = options['ctx_dim'] # input to LSTM W = np.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[_p(prefix, 'W')] = W # bo_lstm_W:(512,2048) # LSTM to LSTM U = np.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U # bo_lstm_U:(512,2048) # bias to LSTM params[_p(prefix, 'b')] = np.zeros( (4 * dim, )).astype('float32') # bo_lstm_b:(2048,) # attention: context -> hidden # Wc_att = norm_weight(dimctx, ortho=False) Wc_att = norm_weight(dim, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # bo_lstm_Wc_att:(2048,2048) # attention: LSTM -> hidden # Wd_att = norm_weight(dim, dimctx) Wd_att = norm_weight(dim, dim) params[_p(prefix, 'Wd_att')] = Wd_att # bo_lstm_Wd_att:(512,2048) # attention: hidden bias # b_att = np.zeros((dimctx,)).astype('float32') b_att = np.zeros((dim, )).astype('float32') params[_p(prefix, 'b_att')] = b_att # bo_lstm_b_att:(2048,) # attention: # U_att = norm_weight(dimctx, 1) U_att = norm_weight(dim, 28) params[_p(prefix, 'U_att')] = U_att # bo_lstm_U_att:(2048,1) c_att = np.zeros((1, )).astype('float32') params[_p(prefix, 'c_att')] = c_att # bo_lstm_c_att:(1,) if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel # bo_lstm_W_sel:(512,1) b_sel = np.float32(0.) params[_p(prefix, 'b_sel')] = b_sel # bo_lstm_b_sel: 0 return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'): assert prefix is not None # Stack the weight matricies for faster dot prods W = np.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W # to_lstm_W:(512,2048) U = np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # to_lstm_U:(512,2048) params[_p(prefix, 'b')] = np.zeros((4*dim,)).astype('float32') # to_lstm_b:(2048,) return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Encoder (behind) params = get_layer(options['encoder'])[0](options, params, prefix='encoder_b', nin=options['dim_word'], dim=options['dim']) # Encoder (current) params = get_layer(options['encoder'])[0](options, params, prefix='encoder_c', nin=options['dim_word'], dim=options['dim']) # Encoder (forward) params = get_layer(options['encoder'])[0](options, params, prefix='encoder_f', nin=options['dim_word'], dim=options['dim']) return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Sentence encoder if options['encoder'] != 'bow': params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) return params
def param_init(params, nin, dim, prefix='lstm'): assert prefix is not None # Stack the weight matricies for faster dot prods W = np.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32') return params
def init_params(self, options): # all parameters params = OrderedDict() # embedding params['Wemb'] = utils.norm_weight(options['n_words'], options['dim_word']) ctx_dim = options['ctx_dim'] params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_state', nin=ctx_dim, nout=options['dim']) params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_memory', nin=ctx_dim, nout=options['dim']) # decoder: LSTM params = self.layers.get_layer('lstm_cond')[0](options, params, prefix='bo_lstm', nin=options['dim_word'], dim=options['dim'], dimctx=ctx_dim) params = self.layers.get_layer('lstm')[0](params, nin=options['dim'], dim=options['dim'], prefix='to_lstm') # readout params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_logit_bo', nin=options['dim'], nout=options['dim_word']) if options['ctx2out']: params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_logit_ctx', nin=ctx_dim, nout=options['dim_word']) params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_logit_to', nin=options['dim'], nout=options['dim_word']) params = self.layers.get_layer('ff')[0]( options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) return params
def init_params(self): options = self.options params = OrderedDict() # embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # rmn layer params = get_layer(options['encoder'])[0]( params, prefix='encoder', nin=options['dim_word'], dim=options['dim'], vocab_size=options['n_words'], memory_dim=options['memory_dim'], memory_size=options['memory_size']) # readout params = get_layer('ff')[0](params, prefix='ff_logit_lstm_h1', nin=options['dim'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](params, prefix='ff_logit_lstm_h2', nin=options['dim'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](params, prefix='ff_logit_prev', nin=options['dim_word'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) self.params = params self.init_tparams()
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Sentence encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # topic_vector encoder1 to gru #params = get_layer('ff')[0](options, params, prefix='ff_topic_vector1_emb_gru', nin=options['dim_topic'], nout=options['dim']) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) # topic encoder if options['use_topic'] : params = get_layer('ff')[0](options, params, prefix='ff_topic', nin=options['dim_topic'], nout=options['dim']) ''' # topic_vector encoder1 params = get_layer('ff')[0](options, params, prefix='ff_topic_vector1', nin=options['dim_topic'], nout=options['dim']) # topic_vector encoder2 params = get_layer('ff')[0](options, params, prefix='ff_topic_vector2', nin=options['dim_topic'], nout=options['dim']) ''' return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Decoder: next sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f', nin=options['dim_word'], dim=options['dim']) # Decoder: current sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_c', nin=options['dim_word'], dim=options['dim']) # Decoder: previous sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b', nin=options['dim_word'], dim=options['dim']) # Output layer params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def init_params(options): params = OrderedDict() # embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # readout params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm', nin=options['dim'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit_prev', nin=options['dim_word'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) return params
def init_params(options, preemb=None): """ Initialize all parameters """ params = OrderedDict() # Word embedding if preemb == None: params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) else: params['Wemb'] = preemb # init state params = get_layer('ff')[0](options, params, prefix='ff_state', nin=options['dimctx'], nout=options['dim']) # Decoder params = get_layer(options['decoder'])[0](options, params, prefix='decoder', nin=options['dim_word'], dim=options['dim']) # Output layer if options['doutput']: params = get_layer('ff')[0](options, params, prefix='ff_hid', nin=options['dim'], nout=options['dim_word']) params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) else: params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def param_init_fflayer(self, options, params, prefix='ff', nin=None, nout=None): if nin == None: nin = options['ctx_dim'] if nout == None: nout = options['lstm_dim'] params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01) params[_p(prefix, 'b')] = np.zeros((nout,)).astype('float32') return params
def param_init_lstm_cond(self, options, params, nin, dim, dimctx, prefix='lstm_cond'): # input to LSTM W = np.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix, 'W')] = W # LSTM to LSTM U = np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # bias to LSTM params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32') # context to LSTM Wc = norm_weight(dimctx,dim*4) params[_p(prefix, 'Wc')] = Wc # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim,dimctx) params[_p(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = np.zeros((dimctx,)).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = np.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel b_sel = np.float32(0.) params[_p(prefix, 'b_sel')] = b_sel return params
def param_init_gru(prefix='gru', nin=None, dim=None): #Gated Recurrent Unit (GRU) params = {} W = [norm_weight(nin, dim), norm_weight(nin, dim)] params[_p(prefix, 'W')] = W params[_p(prefix, 'b1')] = np.zeros((dim, ), dtype=np.float32) params[_p(prefix, 'b2')] = np.zeros((dim, ), dtype=np.float32) U = [ortho_weight(dim), ortho_weight(dim)] params[_p(prefix, 'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix, 'Ux')] = Ux params[_p(prefix, 'bx')] = np.zeros((dim, ), dtype=np.float32) return params[_p(prefix,'W')][0], params[_p(prefix,'W')][1], params[_p(prefix,'U')][0] , \ params[_p(prefix,'U')][1], params[_p(prefix,'b1')], params[_p(prefix,'b2')], \ params[_p(prefix,'Wx')], params[_p(prefix,'Ux')], params[_p(prefix,'bx')]
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): if nin is None: nin = options['dim_proj'] if nout is None: nout = options['dim_proj'] params[prfx(prefix,'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho) params[prfx(prefix,'b')] = numpy.zeros((nout,)).astype('float32') return params
def param_init_lnlstm(options, params, prefix='lnlstm', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] W = numpy.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[prfx(prefix, 'W')] = W U = numpy.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[prfx(prefix, 'U')] = U params[prfx(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32') # lateral parameters scale_add = 0.0 scale_mul = 1.0 params[prfx(prefix, 'b1')] = scale_add * numpy.ones( (4 * dim)).astype('float32') params[prfx(prefix, 'b2')] = scale_add * numpy.ones( (4 * dim)).astype('float32') params[prfx(prefix, 'b3')] = scale_add * numpy.ones( (1 * dim)).astype('float32') params[prfx(prefix, 's1')] = scale_mul * numpy.ones( (4 * dim)).astype('float32') params[prfx(prefix, 's2')] = scale_mul * numpy.ones( (4 * dim)).astype('float32') params[prfx(prefix, 's3')] = scale_mul * numpy.ones( (1 * dim)).astype('float32') return params
def param_init_gru(options, param, prefix='gru', nin=None, dim=None): param[prefix + '_W'] = numpy.concatenate( [ norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) param[prefix + '_U'] = numpy.concatenate( [ ortho_weight(dim), ortho_weight(dim) ], axis=1) param[prefix + '_b'] = zero_vector(2 * dim) param[prefix + '_Wx'] = norm_weight(nin, dim) param[prefix + '_Ux'] = ortho_weight(dim) param[prefix + '_bx'] = zero_vector(dim) return param
def param_init_lngru(options, params, prefix='lngru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) with LN """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate( [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W.astype('float32') params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U.astype('float32') Wx = norm_weight(nin, dim) params[_p(prefix, 'Wx')] = Wx.astype('float32') Ux = ortho_weight(dim) params[_p(prefix, 'Ux')] = Ux.astype('float32') params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32') # LN parameters scale_add = 0.0 scale_mul = 1.0 params[_p(prefix, 'b1')] = scale_add * numpy.ones( (2 * dim)).astype('float32') params[_p(prefix, 'b2')] = scale_add * numpy.ones( (1 * dim)).astype('float32') params[_p(prefix, 'b3')] = scale_add * numpy.ones( (2 * dim)).astype('float32') params[_p(prefix, 'b4')] = scale_add * numpy.ones( (1 * dim)).astype('float32') params[_p(prefix, 's1')] = scale_mul * numpy.ones( (2 * dim)).astype('float32') params[_p(prefix, 's2')] = scale_mul * numpy.ones( (1 * dim)).astype('float32') params[_p(prefix, 's3')] = scale_mul * numpy.ones( (2 * dim)).astype('float32') params[_p(prefix, 's4')] = scale_mul * numpy.ones( (1 * dim)).astype('float32') return params
def init_params(options): params = OrderedDict() # embedding params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word']) params['Wemb_dec'] = norm_weight(options['n_words'], options['dim_word']) # encoder: bidirectional RNN params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r', nin=options['dim_word'], dim=options['dim']) ctxdim = 2 * options['dim'] # init_state, init_cell params = get_layer('ff')[0](options, params, prefix='ff_state', nin=ctxdim, nout=options['dim']) # decoder params = get_layer(options['decoder'])[0](options, params, prefix='decoder', nin=options['dim_word'], dim=options['dim'], dimctx=ctxdim) # readout params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm', nin=options['dim'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit_prev', nin=options['dim_word'], nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit_ctx', nin=ctxdim, nout=options['dim_word'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) return params
def param_init_fflayer(options, param, prefix='ff', nin=None, nout=None, ortho=True): param[prefix + '_W'] = norm_weight(nin, nout) param[prefix + '_b'] = zero_vector(nout) return param
def param_init_attend(self, params, nin, dimctx, prefix='attend'): # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(nin, dimctx) params[_p(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = np.zeros((dimctx,)).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = np.zeros((1,)).astype('float32') params[_p(prefix, 'c_att')] = c_att return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): params[prefix + '_W'] = norm_weight(nin, nout) params[prefix + '_b'] = zero_vector(nout) return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None, hiero=False): if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] if not hiero: W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[prfx(prefix,'W')] = W params[prfx(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[prfx(prefix,'U')] = U Wx = norm_weight(nin, dim) params[prfx(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[prfx(prefix,'Ux')] = Ux params[prfx(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): """ Affine transformation + point-wise nonlinearity """ if nin == None: nin = options['dim_proj'] if nout == None: nout = options['dim_proj'] params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho) params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32') return params
def param_init_lstm_concat(self, options, params, nin, dim, dimctx, prefix='lstm_concat'): # input to LSTM W = np.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix, 'W')] = W # LSTM to LSTM U = np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # bias to LSTM params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32') # context to LSTM Wc = norm_weight(dimctx, dim*4) params[_p(prefix, 'Wc')] = Wc if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel b_sel = np.float32(0.) params[_p(prefix, 'b_sel')] = b_sel return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate( [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix, 'Ux')] = Ux params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32') return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Sentence encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Decoder: next sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f', nin=options['dim_word'], dim=options['dim']) # Decoder: previous sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b', nin=options['dim_word'], dim=options['dim']) # Output layer params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def param_init_gru_cond(options, params, prefix='gru_cond', nin=None, dim=None, dimctx=None): if nin is None: nin = options['dim'] if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] params = param_init_gru(options, params, prefix, nin=nin, dim=dim) # context to LSTM Wc = norm_weight(dimctx,dim*2) params[prfx(prefix,'Wc')] = Wc Wcx = norm_weight(dimctx,dim) params[prfx(prefix,'Wcx')] = Wcx # attention: prev -> hidden Wi_att = norm_weight(nin,dimctx) params[prfx(prefix,'Wi_att')] = Wi_att # attention: context -> hidden Wc_att = norm_weight(dimctx) params[prfx(prefix,'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim,dimctx) params[prfx(prefix,'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[prfx(prefix,'b_att')] = b_att # attention: U_att = norm_weight(dimctx,1) params[prfx(prefix,'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[prfx(prefix, 'c_tt')] = c_att return params
def param_init_gru_cond(options, params, prefix='gru_cond', nin=None, dim=None, dimctx=None, nin_nonlin=None, dim_nonlin=None): if nin is None: nin = options['dim'] if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] if nin_nonlin is None: nin_nonlin = nin if dim_nonlin is None: dim_nonlin = dim W = np.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32') U = np.concatenate([ortho_weight(dim_nonlin), ortho_weight(dim_nonlin)], axis=1) params[_p(prefix, 'U')] = U Wx = norm_weight(nin_nonlin, dim_nonlin) params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(dim_nonlin) params[_p(prefix, 'Ux')] = Ux params[_p(prefix, 'bx')] = np.zeros((dim_nonlin,)).astype('float32') U_nl = np.concatenate([ortho_weight(dim_nonlin), ortho_weight(dim_nonlin)], axis=1) params[_p(prefix, 'U_nl')] = U_nl params[_p(prefix, 'b_nl')] = np.zeros((2 * dim_nonlin,)).astype('float32') Ux_nl = ortho_weight(dim_nonlin) params[_p(prefix, 'Ux_nl')] = Ux_nl params[_p(prefix, 'bx_nl')] = np.zeros((dim_nonlin,)).astype('float32') # context to LSTM Wc = norm_weight(dimctx, dim * 2) params[_p(prefix, 'Wc')] = Wc Wcx = norm_weight(dimctx, dim) params[_p(prefix, 'Wcx')] = Wcx # attention: combined -> hidden W_comb_att = norm_weight(dim, dimctx) params[_p(prefix, 'W_comb_att')] = W_comb_att # attention: context -> hidden Wc_att = norm_weight(dimctx) params[_p(prefix, 'Wc_att')] = Wc_att # attention: hidden bias b_att = np.zeros((dimctx,)).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = np.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att return params
def param_init_fflayer(params, prefix='ff', nin=None, nout=None, ortho=True, add_bias=True): params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho) if add_bias: params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32') return params