def __init__(self, inp_dim, hid_dim=50, initialization='glorot_normal', optimization='adadelta'): self.dim = hid_dim self.inp_dim = inp_dim initializer = nn_utils.get_initialization_function(initialization) optimizer = nn_utils.get_optimization_function(optimization) # Forming the input layer of the answer module q_sent_hid = T.vector("Question root node hidden State") ans_sent_hid = T.vector("Answer root node hidden state") ans_node_hid = T.vector("Answer word node hidden state") ans_parent_hid = T.vector("Answer word's parent hidden state") answer = T.scalar("Answer Probability") # Forming the processing layer self.W_q = initializer(shape=(self.inp_dim, self.dim)) self.W_ans_sent = initializer(shape=(self.inp_dim, self.dim)) self.W_ans_node = initializer(shape=(self.inp_dim, self.dim)) self.W_ans_parent = initializer(shape=(self.inp_dim, self.dim)) self.b_inp = nn_utils.constant_param(value=0.0, shape=(self.dim)) self.W_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, )) self.b_hid = nn_utils.constant_param(value=0.0, shape=()) self.params = [ self.W_q, self.W_ans_sent, self.W_ans_node, self.W_ans_parent, self.b_inp, self.W_hid, self.b_hid ] # Forming the output layer prediction = self.compute(q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid) # Forming the updates and loss layer loss = T.nnet.binary_crossentropy(prediction, answer) self.updates = optimizer(loss, self.params) self.train = theano.function( [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid, answer], [], updates=self.updates) self.predict = theano.function( [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid], prediction) self.get_loss = theano.function( [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid, answer], loss)
def __init__(self, **kwargs): self.dim = kwargs['dim'] self.word_vector_size = kwargs['word_vector_size'] self.input_var = T.matrix('input_var') self.q_var = T.matrix('question_var') self.input_mask_var = T.ivector('input_mask_var') self.W_inp_res_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.word_vector_size)) self.W_inp_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_inp_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_inp_upd_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.word_vector_size)) self.W_inp_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_inp_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_inp_hid_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.word_vector_size)) self.W_inp_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_inp_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) inp_c_history, _ = theano.scan(fn=self.input_gru_step, sequences=self.input_var, outputs_info=T.zeros_like( self.b_inp_hid)) self.inp_c = inp_c_history.take(self.input_mask_var, axis=0) self.q_q, _ = theano.scan(fn=self.input_gru_step, sequences=self.q_var, outputs_info=T.zeros_like(self.b_inp_hid)) self.q_q = self.q_q[-1] self.memory = [self.q_q.copy()] super().__init__(**kwargs) self.params += [ self.W_inp_res_in, self.W_inp_res_hid, self.b_inp_res, self.W_inp_upd_in, self.W_inp_upd_hid, self.b_inp_upd, self.W_inp_hid_in, self.W_inp_hid_hid, self.b_inp_hid ] updates = lasagne.updates.adadelta(self.loss, self.params) input_list = [ self.input_var, self.q_var, self.answer_var, self.input_mask_var ] output_list = [self.inp_c, self.q_q] self.generate_functions(input_list, output_list)
def __init__(self,word_vector_size,dim,dep_tags_size,visualise=False): super().__init__(word_vector_size,dim,visualise) self.W_dep=nn_utils.normal_param(std=0.1, shape=(self.dim, dep_tags_size)) depTags1 = T.lvector('dep_tags1') depTags2=T.lvector('dep_tags2') self.hid_state1,_=theano.scan(fn=self.computation_syntactic,sequences=[self.sent1,depTags1],outputs_info=[T.zeros_like(self.b_inp_hid)]) self.hid1=self.hid_state1[-1] self.hid_state2,_=theano.scan(fn=self.computation_syntactic,sequences=[self.sent2,depTags2],outputs_info=[T.zeros_like(self.b_inp_hid)]) self.hid2=self.hid_state2[-1] self.params.append(self.W_dep) self.predict=theano.function([self.sent1,depTags1],self.hid_state1) self.generate_function() self.get_similarity = theano.function([self.sent1,self.sent2,depTags1,depTags2],[self.score]) self.train = theano.function([self.sent1,self.sent2,self.similarity_score,depTags1,depTags2],[],updates=self.updates) self.dep_tags=utils.load_dep_tags()
def __init__(self,word_vector_size,dim,visualise=False): self.visualise = visualise self.dim=dim #Dimmensions of Hidden State of the GRU self.word_vector_size=word_vector_size self.W_inp_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size)) self.U_inp_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_inp_res = nn_utils.constant_param(value=0.0, shape=(self.dim,)) self.W_inp_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size)) self.U_inp_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_inp_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,)) self.W_inp_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size)) self.U_inp_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self. dim)) self.b_inp_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,)) self.similarity_score = T.dscalar('score') self.sent1=T.dmatrix('sent1') self.sent2=T.dmatrix('sent2') self.params = [ self.W_inp_res_in, self.U_inp_res_hid , self.b_inp_res , self.W_inp_upd_in, self.U_inp_upd_hid, self.b_inp_upd, self.W_inp_hid_in, self.U_inp_hid_hid, self.b_inp_hid ] self.hid1=None self.hid2=None self.hid_state1=None self.hid_state2=None self.train=None self.get_similarity=None self.updates=None self.score=None self.predict=None
def __init__(self, babi_train_raw, babi_test_raw, word2vec, word_vector_size, dim, mode, answer_module, input_mask_mode, memory_hops, l2, normalize_attention, answer_vec, debug, **kwargs): self.vocab = {} self.ivocab = {} self.debug = debug self.word2vec = word2vec self.word_vector_size = word_vector_size self.dim = dim self.mode = mode self.answer_module = answer_module self.input_mask_mode = input_mask_mode self.memory_hops = memory_hops self.l2 = l2 self.normalize_attention = normalize_attention self.answer_vec = answer_vec if self.mode != 'deploy': print("==> not used params in DMN class:", kwargs.keys()) self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input( babi_test_raw) self.vocab_size = len(self.vocab) if self.debug: print('Input:', np.array(self.train_input).shape) print('Quest:', np.array(self.train_q).shape) print('Answer:', np.array(self.train_answer).shape) print('Mask:', np.array(self.train_input_mask)) sys.exit(0) # if self.mode == 'deploy': # self.input_var = T.tensor3('input_var') # self.q_var = T.tensor3('question_var') # self.input_mask_var = T.ivector('input_mask_var') # else: if self.answer_vec == 'word2vec': self.answer_var = T.vector('answer_var') else: self.answer_var = T.iscalar('answer_var') if self.answer_vec == 'one_hot' or self.answer_vec == 'index': self.answer_size = self.vocab_size elif self.answer_vec == 'word2vec': self.answer_size = self.word_vector_size else: raise Exception("Invalid answer_vec type") if self.mode != 'deploy': print("==> building input module") if self.mode != 'deploy': print("==> creating parameters for memory module") self.W_mem_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_1 = nn_utils.normal_param(std=0.1, shape=(self.dim, 7 * self.dim + 2)) self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim)) self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, )) if self.mode != 'deploy': print( "==> building episodic memory module (fixed number of steps: %d)" % self.memory_hops) for iter in range(1, self.memory_hops + 1): current_episode = self.new_episode(self.memory[iter - 1]) self.memory.append( self.GRU_update(self.memory[iter - 1], current_episode, self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid)) self.last_mem = self.memory[-1] if self.mode != 'deploy': print("==> building answer module") self.W_a = nn_utils.normal_param(std=0.1, shape=(self.answer_size, self.dim)) if self.answer_module == 'feedforward': self.prediction = nn_utils.softmax(T.dot(self.W_a, self.last_mem)) elif self.answer_module == 'recurrent': self.W_ans_res_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_ans_upd_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_ans_hid_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) def answer_step(prev_a, prev_y): a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]), self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid) y = T.dot(self.W_a, a) if self.answer_vec == 'one_hot' or self.answer_vec == 'index': y = nn_utils.softmax(y) return [a, y] # TODO: add conditional ending dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX)) results, updates = theano.scan( fn=answer_step, outputs_info=[self.last_mem, T.zeros_like(dummy)], n_steps=1) self.prediction = results[1][-1] else: raise Exception("invalid answer_module") if self.mode != 'deploy': print("==> collecting all parameters") self.params = [ self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b, self.W_1, self.W_2, self.b_1, self.b_2, self.W_a ] if self.answer_module == 'recurrent': self.params = self.params + [ self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid ] if self.mode != 'deploy': print("==> building loss layer and computing updates") if debug: print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim) print('Answer dim:', self.answer_var.ndim) if self.answer_vec == 'word2vec': self.loss_ce = nn_utils.cosine_proximity_loss( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0][0] else: self.loss_ce = T.nnet.categorical_crossentropy( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0] if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 if debug: print(self.loss.ndim)
def __init__(self, babi_train_raw, babi_test_raw, word2vec, word_vector_size, dim, mode, answer_module, input_mask_mode, memory_hops, l2, normalize_attention, answer_vec, debug, sentEmbdLoadState, sentEmbdType="basic", **kwargs): self.vocab = {} self.ivocab = {} self.debug = debug self.word2vec = word2vec self.word_vector_size = word_vector_size self.dim = dim self.mode = mode self.answer_module = answer_module self.input_mask_mode = input_mask_mode self.memory_hops = memory_hops self.l2 = l2 self.normalize_attention = normalize_attention self.answer_vec = answer_vec self.sentEmbdType = sentEmbdType if (self.mode != 'deploy'): self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input( babi_test_raw) self.vocab_size = len(self.vocab) print(self.vocab_size) elif self.mode == 'deploy': self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.vocab_size = len(self.vocab) print(self.vocab_size) # print(self.train_input.shape) # print(self.train_q.shape) # print(self.train_input_mask.shape) #Setting up pre-trained Sentence Embedder for question and input module: if self.mode != 'deploy': print("==> Setting up pre-trained Sentence Embedder") if self.sentEmbdType == "basic": self.sent_embd = SentEmbd.SentEmbd_basic(self.word_vector_size, self.dim) else: dep_tags = utils.load_dep_tags self.sent_embd = SentEmbd.SentEmbd_syntactic( 50, hid_dim, len(dep_tags)) #TODO: Dependency Tags self.sent_embd.load_params(sentEmbdLoadState) self.input_var = T.matrix('input_var') self.q_var = T.vector('question_var') if self.answer_vec == 'word2vec': self.answer_var = T.vector('answer_var') else: self.answer_var = T.iscalar('answer_var') self.input_mask_var = T.ivector('input_mask_var') if self.answer_vec == 'one_hot' or self.answer_vec == 'index': self.answer_size = self.vocab_size elif self.answer_vec == 'word2vec': self.answer_size = self.word_vector_size else: raise Exception("Invalid answer_vec type") #Setting up Untrained Memory module if self.mode != 'deploy': print("==> Creating parameters for memory module") self.W_mem_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_1 = nn_utils.normal_param(std=0.1, shape=(self.dim, 7 * self.dim + 2)) self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim)) self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, )) if self.mode != 'deploy': print( "==> Building episodic memory module (fixed number of steps: %d)" % self.memory_hops) memory = [self.q_var.copy()] for iter in range(1, self.memory_hops + 1): current_episode = self.new_episode(memory[iter - 1]) memory.append( self.GRU_update(memory[iter - 1], current_episode, self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid)) last_mem = memory[-1] if self.mode != 'deploy': print("==> Building answer module") self.W_a = nn_utils.normal_param(std=0.1, shape=(self.answer_size, self.dim)) if self.answer_module == 'feedforward': self.prediction = nn_utils.softmax(T.dot(self.W_a, last_mem)) # elif self.answer_module == 'recurrent': # self.W_ans_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # self.W_ans_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # self.W_ans_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # def answer_step(prev_a, prev_y): # a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]), # self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, # self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, # self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid) # y = T.dot(self.W_a, a) # if self.answer_vec == 'one_hot' or self.answer_vec == 'index': # y = nn_utils.softmax(y) # return [a, y] # # TODO: add conditional ending # dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX)) # results, updates = theano.scan(fn=answer_step, # outputs_info=[last_mem, T.zeros_like(dummy)], # n_steps=1) # self.prediction = results[1][-1] else: raise Exception("invalid answer_module") if self.mode != 'deploy': print("==> Collecting all parameters to be trained") self.params = [ self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b, self.W_1, self.W_2, self.b_1, self.b_2, self.W_a ] # if self.answer_module == 'recurrent': # self.params = self.params + [self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, # self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, # self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid] if self.mode != 'deploy': print("==> Building loss layer and computing updates") if debug: print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim) print('Answer dim:', self.answer_var.ndim) if self.answer_vec == 'word2vec': self.loss_ce = nn_utils.cosine_proximity_loss( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0][0] else: self.loss_ce = T.nnet.categorical_crossentropy( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0] if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 if debug: print(self.loss.ndim) # if self.debug: print(self.loss.eval({self.input_var:self.train_input,self.q_var:self.train_q,self.answer_var:self.train_answer,self.input_mask_var:self.train_input_mask})) updates = lasagne.updates.adadelta(self.loss, self.params) if self.mode == 'deploy': self.deploy_fn = theano.function( inputs=[self.input_var, self.q_var], outputs=[self.prediction]) else: if self.mode == 'train': print("==> compiling train_fn") self.train_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print("==> compiling test_fn") self.test_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=[ self.prediction, self.loss, self.input_var, self.q_var, last_mem ]) if self.mode == 'train': print("==> computing gradients (for debugging)") gradient = T.grad(self.loss, self.params) self.get_gradient_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=gradient)