def forward(self, tree, embs, training=False): """ Child sum tree LSTM forward function :param tree: :param embs: (sentence_length, 1, 300) :param training: :return: """ # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() for idx in range(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training) loss = loss + child_loss child_c, child_h = self.get_child_states(tree) tree.state = self.node_forward(embs[tree.idx - 1], child_c, child_h) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) return tree.state, loss
def test(self, dataset): subtree_metric = SubtreeMetric() self.model.eval() self.embedding_model.eval() loss = 0 predictions = torch.zeros(len(dataset)) predictions = predictions for idx in tqdm(xrange(len(dataset)),desc='Testing epoch '+str(self.epoch)+''): tree, sent, label = dataset[idx] input = Var(sent, volatile=True) question = Var(self.question.long(), volatile=True) target = Var(map_label_to_target_sentiment(label,self.args.num_classes, fine_grain=self.args.fine_grain), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() question = question.cuda() emb = F.torch.unsqueeze(self.embedding_model(input),1) question_emb = F.torch.unsqueeze(self.embedding_model(question), 1) output, _, _= self.model(tree, emb, question_emb, training = False) # size(1,5) err = self.criterion(output, target) loss += err.data[0] if self.args.num_classes == 3: output[:,1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0][0] predictions[idx] = pred_cpu correct = pred_cpu == tree.gold_label subtree_metric.current_idx = idx subtree_metric.count_depth(correct, 0, tree.idx, pred_cpu) # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss/len(dataset), predictions, subtree_metric
def forward(self, tree, embs, training=False): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() if tree.num_children == 0: # leaf case tree.state = self.leaf_module.forward(embs[tree.idx - 1]) else: for idx in range(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training) loss = loss + child_loss lc, lh, rc, rh = self.get_child_state(tree) tree.state = self.composer.forward(lc, lh, rc, rh) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) return tree.state, loss
def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): tree, sent, label = dataset[indices[idx]] input = Var(sent) target = Var( map_label_to_target_sentiment(label, dataset.num_classes, fine_grain=self.args.fine_grain)) if self.args.cuda: input = input.cuda() target = target.cuda() output = self.model.forward(tree, input, training=True) err = self.criterion(output, target) loss += err.data[0] err.backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return loss / len(dataset)
def forward(self, tree, embs, training=False, metric=None): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() for idx in xrange(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training, metric) loss = loss + child_loss child_c, child_h = self.get_child_states(tree) tree.state = self.node_forward(embs[tree.idx - 1], child_c, child_h) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) if not training and metric is not None: # if self.args.num_classes == 3: # output[:, 1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0][0] correct = pred_cpu == tree.gold_label metric.count_depth(correct, 0, tree.idx, pred_cpu) return tree.state, loss
def forward(self, tree, embs, training=False, metric=None): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() if tree.num_children == 0: # leaf case tree.state = self.leaf_module.forward(embs[tree.idx - 1]) else: for idx in xrange(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training, metric) loss = loss + child_loss lc, lh, rc, rh = self.get_child_state(tree) tree.state = self.composer.forward(lc, lh, rc, rh) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) if not training and metric is not None: val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0] correct = pred_cpu == tree.gold_label metric.count_depth(correct, tree.depth(), tree.idx, pred_cpu) return tree.state, loss
def test(self, dataset): self.model.eval() self.embedding_model.eval() loss = 0 predictions = torch.zeros(len(dataset)) #predictions = predictions indices = torch.range(1, dataset.num_classes) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): tree, sent, label = dataset[idx] input = Var(sent, volatile=True) target = Var(map_label_to_target_sentiment( label, dataset.num_classes, fine_grain=self.args.fine_grain), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() emb = F.torch.unsqueeze(self.embedding_model(input), 1) output, _ = self.model(tree, emb) # size(1,5) err = self.criterion(output, target) loss += err.data[0] output[:, 1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) #predictions[idx] = pred.data.cpu()[0][0] predictions[idx] = pred.data.cpu()[0] # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss / len(dataset), predictions
def test(self, dataset, test_idx = None): subtree_metric = SubtreeMetric() self.model.eval() for emb_model in self.embedding_models: emb_model.eval() loss = 0 predictions = torch.zeros(len(dataset)) predictions = predictions indices = xrange(len(dataset)) if test_idx is not None: indices = test_idx predictions = torch.zeros(len(indices)) predictions = predictions for i in tqdm(xrange(len(indices)),desc='Testing epoch '+str(self.epoch)+''): idx = indices[i] subtree_metric.current_idx = idx tree, sent, label = dataset[idx] input = Var(sent, volatile=True) #TODO: fix map label to target sentiment target = Var(map_label_to_target_sentiment(label,self.args.num_classes, fine_grain=self.args.fine_grain), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() # emb = F.torch.unsqueeze(self.embedding_model(input),1) emb_list = [] for emb_model in self.embedding_models: emb = F.torch.unsqueeze(emb_model(input), 1) emb_list.append(emb) emb = torch.cat(emb_list, 1) # (seq, channel, embedding_dim) # output, _ = self.model(tree, emb, metric = subtree_metric) # size(1,5) if self.args.model_name == 'lstm' or self.args.model_name == 'bilstm': output, err = self.model(emb, target) else: output, _ = self.model(tree, emb, metric = subtree_metric) # size(1,5) err = self.criterion(output, target) loss += err.data[0] if self.args.num_classes == 3: output[:,1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0] predictions[i] = pred_cpu # if self.args.model_name == 'lstm' or self.args.model_name == 'bilstm': # correct = pred_cpu == label # else: # correct = pred_cpu == tree.gold_label #if self.args.model_name == 'lstm' or self.args.model_name == 'bilstm': #subtree_metric.count_depth(correct, 0, tree.idx, pred_cpu) # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss/len(dataset), predictions, subtree_metric
def forward(self, tree, emb, question_emb, training=False): nodes = tree.depth_first_preorder() loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() if self.train_subtrees == -1: n_subtree = len(nodes) else: n_subtree = self.train_subtrees + 1 discard_subtree = 0 # trees are discard because neutral if training == True: for i in range(n_subtree): if i == 0: node = nodes[0] elif self.train_subtrees != -1: node = nodes[int( math.ceil(np.random.uniform(0, len(nodes) - 1)))] else: node = nodes[i] lo, hi = node.lo, node.hi span_vec = emb[lo - 1:hi] # [inclusive, excludsive) output = self.dmn(span_vec, question_emb) if training and node.gold_label != None: target = utils.map_label_to_target_sentiment( node.gold_label, self.num_classes) if target is None: discard_subtree += 1 continue target = Var(target) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) loss = loss n_subtree = n_subtree - discard_subtree else: output = self.dmn(emb, question_emb) return output, loss, n_subtree
def train(self, dataset): self.model.train() self.embedding_model.train() self.embedding_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 # torch.manual_seed(789) indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)),desc='Training epoch '+str(self.epoch+1)+''): tree, sent, label = dataset[indices[idx]] input = Var(sent) target = Var(map_label_to_target_sentiment(label,dataset.num_classes, fine_grain=self.args.fine_grain)) if self.args.cuda: input = input.cuda() target = target.cuda() emb = F.torch.unsqueeze(self.embedding_model(input), 1) output, err = self.model.forward(tree, emb, training=True) # Calculating the error using the given loss function if self.args.attention_flag: err = self.criterion(output, target) #params = self.model.childsumtreelstm.getParameters() # params_norm = params.norm() err = err/self.args.batchsize # + 0.5*self.args.reg*params_norm*params_norm # custom bias loss += err.data[0] # err.backward() k += 1 if k==self.args.batchsize: for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) self.optimizer.step() self.embedding_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss/len(dataset)
def train(self, dataset): self.model.train() for emb_model in self.embedding_models: emb_model.train() emb_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 # torch.manual_seed(789) indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)),desc='Training epoch '+str(self.epoch+1)+''): tree, sent, label = dataset[indices[idx]] input = Var(sent) target = Var(map_label_to_target_sentiment(label, self.args.num_classes, fine_grain=self.args.fine_grain)) if self.args.cuda: input = input.cuda() target = target.cuda() emb_list = [] for emb_model in self.embedding_models: emb = F.torch.unsqueeze(emb_model(input), 1) emb_list.append(emb) emb = torch.cat(emb_list, 1) # (seq, channel, embedding_dim) if self.args.model_name == 'lstm' or self.args.model_name == 'bilstm': output, err = self.model(emb, target,training=True) # sentences dataset now # if self.args.train_subtrees == -1: # n_subtrees = len(tree.depth_first_preorder()) # else: # n_subtrees = self.args.train_subtrees # batch_size = self.args.batchsize * n_subtrees batch_size = self.args.batchsize else: output, err = self.model.forward(tree, emb, training=True) batch_size = self.args.batchsize err = err / batch_size # err = err / self.args.batchsize #params = self.model.childsumtreelstm.getParameters() # params_norm = params.norm() loss += err.data[0] # err.backward() k += 1 if k==self.args.batchsize: if self.args.manually_emb == 1: if self.args.embwd == 0: # save time on calculate 0 function for emb_model in self.embedding_models: for f in emb_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) else: for emb_model in self.embedding_models: for f in emb_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr + self.args.emblr*self.args.embwd*f.data) # https://stats.stackexchange.com/questions/29130/difference-between-neural-net-weight-decay-and-learning-rate self.optimizer.step() for emb_model in self.embedding_models: emb_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss/len(dataset)
def train(self, dataset): self.model.train() self.embedding_model.train() self.embedding_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 # torch.manual_seed(789) indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)),desc='Training epoch '+str(self.epoch+1)+''): tree, sent, label = dataset[indices[idx]] input = Var(sent) question = Var(self.question.long()) target = Var(map_label_to_target_sentiment(label,self.args.num_classes, fine_grain=self.args.fine_grain)) if self.args.cuda: input = input.cuda() question = question.cuda() target = target.cuda() emb = F.torch.unsqueeze(self.embedding_model(input), 1) question_emb = F.torch.unsqueeze(self.embedding_model(question), 1) output, err, n_subtrees = self.model(tree, emb, question_emb, training=True) batch_size = self.args.batchsize * n_subtrees if self.args.reg > 0 or self.args.embreg > 0: params = self.model.getParameters() params_norm = params.norm() l2_model = 0.5*self.args.reg*params_norm*params_norm emb_params = list(self.embedding_model.parameters())[0] emb_init = Var(self.emb_params_init, requires_grad = False) emb_params_norm = (emb_params - emb_init).norm() l2_emb_params = 0.5 * self.args.embreg* emb_params_norm * emb_params_norm if l2_emb_params.data[0] > 0: err = (err + l2_model + l2_emb_params) / batch_size else: err = (err + l2_model ) / batch_size else: err = err / batch_size # err = err / self.args.batchsize #params = self.model.childsumtreelstm.getParameters() # params_norm = params.norm() loss += err.data[0] # err.backward() k += 1 if k==self.args.batchsize: if self.args.manually_emb == 1: if self.args.embwd == 0: # save time on calculate 0 function for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) else: for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr + self.args.emblr*self.args.embwd*f.data) # https://stats.stackexchange.com/questions/29130/difference-between-neural-net-weight-decay-and-learning-rate self.optimizer.step() self.embedding_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss/len(dataset)