def __init__(self, name, embedding, sent_encoder, classifier): super().__init__() self.name = name self.embedding = embedding self.sent_encoder = sent_encoder self.classifier = classifier param_init(self)
def __init__(self, n_in, n_out, prefix='logist', drop_rate=0.5): self.n_in = n_in self.n_out = n_out self.W0 = param_init().param((n_in, n_out), name=_p(prefix, 'W0')) self.b = param_init().param((n_out, ), name=_p(prefix, 'b')) self.params = [self.W0, self.b] self.drop_rate = drop_rate
def __init__(self, n_in, n_out, nonlinearity=nonlinearities.rectify): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = param_init().uniform((n_in, n_out)) # initialize the baises b as a vector of n_out 0s self.b = param_init().constant((n_out, )) self.params = [self.W,self.b] self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity)
def __init__(self, s_in, t_in, prefix='Attention', **kwargs): self.params = [] self.s_in = s_in self.t_in = t_in self.align_size = t_in self.prefix = prefix self.Wa = param_init().param((self.t_in, self.align_size), name=_p(prefix, 'Wa')) #self.v = param_init().param((self.align_size,), init_type='constant', #name=_p(prefix, 'v'), scale=0.001) self.v = param_init().param((self.align_size,), name=_p(prefix, 'v')) self.params += [self.Wa, self.v]
def _init_params(self): prefix = self.prefix w_size = (self.emb_size, self.n_hids) u_size = (self.n_hids, self.n_hids) self.W = param_init().param(w_size, init_type='mfunc', m=4, name=_p(prefix, 'W')) self.U = param_init().param(u_size, init_type='mfunc', m=4, name=_p(prefix, 'U')) self.b = param_init().param((self.n_hids * 4, ), name=_p(prefix, 'b')) self.params += [self.W, self.U, self.b]
def __init__(self, s_in, t_in, prefix='Attention', **kwargs): self.params = [] self.s_in = s_in self.t_in = t_in self.align_size = t_in # n_hids -> trg_nhids self.prefix = prefix self.Wa = param_init().param((self.t_in, self.align_size), name=_p(prefix, 'Wa')) # self.v = param_init().param((self.align_size,), init_type='constant', # name=_p(prefix, 'v'), scale=0.001) self.v = param_init().param((self.align_size, ), name=_p(prefix, 'v')) self.params += [self.Wa, self.v]
def __init__(self, n_in, lr_out, prefix='logist', **kwargs): self.n_in = n_in # n_out: 512 the last layer of decoder merge out self.lr_out = lr_out # 30000 trg_vocab_size which is the predicted target vocabulary size # W0: shape(n_out, 30000) # b: shape(30000,) self.W0 = param_init().param((n_in, lr_out), name=_p(prefix, 'W0')) # self.b = param_init().param((lr_out, ), name=_p(prefix, 'b')) self.b = param_init().param((lr_out, ), name=_p(prefix, 'b'), scale=numpy.log(1. / lr_out)) self.params = [self.W0, self.b] self.drop_rate = kwargs.pop('dropout', 0.5) self.alpha = kwargs.pop('alpha', 0.0) self.use_mv = kwargs.pop('use_mv', 0)
def __init__(self, embsize, vocab_size, prefix='Lookup_table'): # '%s_%s' % (pp, name) # the name of self.W is 'Lookup_table_embed': self.W self.W = param_init().param((vocab_size, embsize), name=_p(prefix, 'embed')) # the type of self.W is theano.tensor.sharedvar.TensorSharedVariable: type(self.W) # self.W.shape.eval(): (array([30000, 620]), self.W.type: TensorType(float64, matrix) # self.W.dtype: 'float64' self.params = [self.W] self.vocab_size = vocab_size self.embsize = embsize
def build_cnn_model(num_vocab, dim_word, dim_fc, windows, dim_feature, dropout_emb=0.0, dropout_fc=0.0, embedding_type=None, vectors=None, freeze_emb=True, device=None): embedding = get_embedding(num_vocab, dim_word, vectors, freeze_emb, embedding_type, dropout_emb) cnns = nn.ModuleList( nn.Conv2d(1, dim_feature, (w, dim_word)) for w in windows) dim_hidden = len(cnns) * dim_feature classifier = get_classifier(dim_hidden, dim_fc, dropout_fc) model = CNNModel(embedding, cnns, classifier) param_init(model) return model.to(device)
def interact(self, facts_rep, questions_rep): self.W_f = param_init().orth((self.n_hids, self.n_hids)) self.W_q = param_init().orth((self.n_hids, self.n_hids)) self.b_f = param_init().constant((self.n_hids,)) self.b_q = param_init().constant((self.n_hids,)) self.params += [self.W_f, self.W_q, self.b_f, self.b_q] questions_rep = T.tanh(theano.dot(questions_rep, self.W_q) + self.b_q) facts_rep = T.tanh(theano.dot(facts_rep, self.W_f) + self.b_f) def _one_step(question_rep, facts_rep): if question_rep.ndim == 1: question_rep = T.shape_padleft(question_rep, n_ones=1) inter_rep = (question_rep + facts_rep).max(axis=0) return inter_rep inter_reps, updates = theano.scan(_one_step, sequences=questions_rep, outputs_info=None, non_sequences=facts_rep ) return inter_reps
def merge_out(self, state_below, mask_below, context=None): hiddens = self.apply(state_below, mask_below, context=context) if context is None: msize = self.n_in + self.n_hids osize = self.n_hids combine = T.concatenate([state_below, hiddens], axis=2) else: msize = self.n_in + self.n_hids + self.c_hids osize = self.n_hids n_times = state_below.shape[0] m_context = repeat_x(context, n_times) combine = T.concatenate([state_below, hiddens, m_context], axis=2) self.W_m = param_init().uniform((msize, osize*2)) self.b_m = param_init().constant((osize*2,)) self.params += [self.W_m, self.b_m] merge_out = theano.dot(combine, self.W_m) + self.b_m merge_max = merge_out.reshape((merge_out.shape[0], merge_out.shape[1], merge_out.shape[2]/2, 2), ndim=4).max(axis=3) return merge_max * mask_below[:, :, None]
def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = param_init().uniform((n_in, n_out)) # initialize the baises b as a vector of n_out 0s self.b = param_init().constant((n_out, )) # compute vector of class-membership probabilities in symbolic form energy = theano.dot(input, self.W) + self.b if energy.ndim == 3: energy_exp = T.exp(energy - T.max(energy, 2, keepdims=True)) pmf = energy_exp / energy_exp.sum(2, keepdims=True) else: pmf = T.nnet.softmax(energy) self.p_y_given_x = pmf self.y_pred = T.argmax(self.p_y_given_x, axis=-1) # compute prediction as class whose probability is maximal in # symbolic form # parameters of the model self.params = [self.W, self.b]
def _init_params2(self): f = lambda name: _p(self.prefix, name) n_hids = self.n_hids size_hh = (n_hids, n_hids) self.W_hz2 = param_init().param(size_hh, 'orth', name=f('W_hz2')) self.W_hr2 = param_init().param(size_hh, 'orth', name=f('W_hr2')) self.W_hh2 = param_init().param(size_hh, 'orth', name=f('W_hh2')) self.b_z2 = param_init().param((n_hids,), name=f('b_z2')) self.b_r2 = param_init().param((n_hids,), name=f('b_r2')) self.b_h2 = param_init().param((n_hids,), name=f('b_h2')) self.Ws = param_init().param((self.c_hids, self.n_hids), name=f('Ws')) self.bs = param_init().param((self.n_hids,), name=f('bs')) self.params += [self.W_hz2, self.W_hr2, self.W_hh2, self.b_z2, self.b_r2, self.b_h2, self.Ws, self.bs]
def _init_params2(self): f = lambda name: _p(self.prefix, name) n_hids = self.n_hids size_hh = (n_hids, n_hids) self.W_hz2 = param_init().param(size_hh, 'orth', name=f('W_hz2')) self.W_hr2 = param_init().param(size_hh, 'orth', name=f('W_hr2')) self.W_hh2 = param_init().param(size_hh, 'orth', name=f('W_hh2')) self.b_z2 = param_init().param((n_hids, ), name=f('b_z2')) self.b_r2 = param_init().param((n_hids, ), name=f('b_r2')) self.b_h2 = param_init().param((n_hids, ), name=f('b_h2')) self.Ws = param_init().param((self.c_hids, self.n_hids), name=f('Ws')) self.bs = param_init().param((self.n_hids, ), name=f('bs')) self.params += [ self.W_hz2, self.W_hr2, self.W_hh2, self.b_z2, self.b_r2, self.b_h2, self.Ws, self.bs ] '''
def _init_params(self): n_in = self.n_in n_hids = self.n_hids size_xh = (n_in, n_hids) #(30,39) size_hh = (n_hids, n_hids) #(39,39) self.W_xz = param_init().uniform(size_xh) #(30,39) self.W_xr = param_init().uniform(size_xh)#(30,39) self.W_xh = param_init().uniform(size_xh)#(30,39) self.W_hz = param_init().orth(size_hh) self.W_hr = param_init().orth(size_hh) self.W_hh = param_init().orth(size_hh) self.b_z = param_init().constant((n_hids,)) self.b_r = param_init().constant((n_hids,)) self.b_h = param_init().constant((n_hids,)) self.params = [self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh, self.b_z, self.b_r, self.b_h] if self.with_contex: size_ch = (self.c_hids, self.n_hids) self.W_cz = param_init().uniform(size_ch) self.W_cr = param_init().uniform(size_ch) self.W_ch = param_init().uniform(size_ch) self.W_c_init = param_init().uniform(size_ch) self.params = self.params + [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]
def _init_params(self): f = lambda name: _p(self.prefix, name) n_in = self.n_in n_hids = self.n_hids size_xh = (n_in, n_hids) size_hh = (n_hids, n_hids) self.W_xz = param_init().param(size_xh, name=f('W_xz')) self.W_xr = param_init().param(size_xh, name=f('W_xr')) self.W_xh = param_init().param(size_xh, name=f('W_xh')) self.W_hz = param_init().param(size_hh, 'orth', name=f('W_hz')) self.W_hr = param_init().param(size_hh, 'orth', name=f('W_hr')) self.W_hh = param_init().param(size_hh, 'orth', name=f('W_hh')) self.b_z = param_init().param((n_hids,), name=f('b_z')) self.b_r = param_init().param((n_hids,), name=f('b_r')) self.b_h = param_init().param((n_hids,), name=f('b_h')) self.params = [self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh, self.b_z, self.b_r, self.b_h] if self.with_contex: size_ch = (self.c_hids, self.n_hids) size_ch_ini = (self.c_hids, self.n_hids) self.W_cz = param_init().param(size_ch, name=f('W_cz')) self.W_cr = param_init().param(size_ch, name=f('W_cr')) self.W_ch = param_init().param(size_ch, name=f('W_ch')) self.W_c_init = param_init().param(size_ch_ini, name=f('W_c_init')) self.b_init = param_init().param((self.n_hids,), name=f('b_init')) self.params = self.params + [self.W_cz, self.W_cr, self.W_ch, self.W_c_init] msize = self.n_in + self.n_hids + self.c_hids else: msize = self.n_in + self.n_hids if self.merge: osize = self.n_out if self.max_out: self.W_m = param_init().param((msize, osize*2), name=_p(self.prefix, 'W_m')) self.b_m = param_init().param((osize*2,), name=_p(self.prefix, 'b_m')) self.params += [self.W_m, self.b_m] else: self.W_m = param_init().param((msize, osize), name=_p(self.prefix, 'W_m')) self.b_m = param_init().param((osize,), name=_p(self.prefix, 'b_m')) self.params += [self.W_m, self.b_m]
def __init__(self, embsize, vocab_size, prefix='Lookup_table'): self.W = param_init().param((vocab_size, embsize), name=_p(prefix, 'embed')) self.params = [self.W] self.vocab_size = vocab_size self.embsize = embsize
def __init__(self, embsize, vocab_size): self.W = param_init().uniform((vocab_size, embsize)) self.params = [self.W] self.vocab_size = vocab_size self.embsize = embsize
def _init_params(self): f = lambda name: _p(self.prefix, name ) # return 'GRU_' + parameters name n_in = self.n_in n_hids = self.n_hids size_xh = (n_in, n_hids) size_hh = (n_hids, n_hids) # following three are parameters matrix from input layer to hidden layer: # generate numpy.ndarray by normal distribution self.W_xz = param_init().param(size_xh, name=f('W_xz')) self.W_xr = param_init().param(size_xh, name=f('W_xr')) self.W_xh = param_init().param(size_xh, name=f('W_xh')) # following three are parameters matrix from hidden layer to hidden layer: # generate numpy.ndarray by standard normal distribution with qr # factorization self.W_hz = param_init().param(size_hh, 'orth', name=f('W_hz')) self.W_hr = param_init().param(size_hh, 'orth', name=f('W_hr')) self.W_hh = param_init().param(size_hh, 'orth', name=f('W_hh')) # following three are bias vector of hidden layer: generate by normal distribution self.b_z = param_init().param((n_hids, ), name=f('b_z')) self.b_r = param_init().param((n_hids, ), name=f('b_r')) self.b_h = param_init().param((n_hids, ), name=f('b_h')) # just put all this parameters matrix (numpy.ndarray) into a list self.params = [ self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh, self.b_z, self.b_r, self.b_h ] if self.with_contex: # default False size_ch = (self.c_hids, self.n_hids) # (src_nhids*2, trg_nhids) # following there are parameters matrix from context hidden layer to hidden layer size_ch_ini = (self.c_hids, self.n_hids) self.W_cz = param_init().param(size_ch, name=f('W_cz')) self.W_cr = param_init().param(size_ch, name=f('W_cr')) self.W_ch = param_init().param(size_ch, name=f('W_ch')) self.W_c_init = param_init().param(size_ch_ini, name=f('W_c_init')) self.b_init = param_init().param((self.n_hids, ), name=f('b_init')) self.params = self.params + [ self.W_cz, self.W_cr, self.W_ch, self.W_c_init ] # just put several matrix together msize = self.n_in + self.n_hids + self.c_hids else: msize = self.n_in + self.n_hids if self.merge: # default True osize = self.n_out # default is units number of hidden layer (n_hids == trg_nhids) if self.max_out: # default True, need change here, because it is same w/o max_out self.W_m = param_init().param((msize, osize * 2), name=_p(self.prefix, 'W_m')) self.b_m = param_init().param((osize * 2, ), name=_p(self.prefix, 'b_m')) self.params += [self.W_m, self.b_m] else: self.W_m = param_init().param((msize, osize), name=_p(self.prefix, 'W_m')) self.b_m = param_init().param((osize, ), name=_p(self.prefix, 'b_m')) self.params += [self.W_m, self.b_m] # default False if self.ln: mul_scale = 1.0 add_scale = 0.0 self.g1 = param_init().param((n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_g1')) self.g2 = param_init().param((n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_g2')) self.g3 = param_init().param((n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_g3')) self.g4 = param_init().param((n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_g4')) self.b1 = param_init().param((n_hids, ), scale=add_scale, name=_p(self.prefix, 'ln_b1')) self.b2 = param_init().param((n_hids, ), scale=add_scale, name=_p(self.prefix, 'ln_b2')) self.b3 = param_init().param((n_hids, ), scale=add_scale, name=_p(self.prefix, 'ln_b3')) self.b4 = param_init().param((n_hids, ), scale=add_scale, name=_p(self.prefix, 'ln_b4')) self.params += [ self.g1, self.b1, self.g2, self.b2, self.g3, self.b3, self.g4, self.b4 ] if self.with_contex: self.gcz = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_gcz')) self.bcz = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_bcz')) self.gcr = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_gcr')) self.bcr = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_bcr')) self.gch = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_gch')) self.bch = param_init().param((self.n_hids, ), scale=mul_scale, name=_p(self.prefix, 'ln_bch'))