def test(self, dataset): self.model.eval() self.embedding_model.eval() loss = 0 predictions = torch.zeros(len(dataset)) #predictions = predictions indices = torch.range(1, dataset.num_classes) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): tree, sent, label = dataset[idx] input = Var(sent, volatile=True) target = Var(map_label_to_target_sentiment( label, dataset.num_classes, fine_grain=self.args.fine_grain), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() emb = F.torch.unsqueeze(self.embedding_model(input), 1) output, _ = self.model(tree, emb) # size(1,5) err = self.criterion(output, target) loss += err.data[0] output[:, 1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) #predictions[idx] = pred.data.cpu()[0][0] predictions[idx] = pred.data.cpu()[0] # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss / len(dataset), predictions
def train(epoch): total_loss = 0 total_size = 0 model.train() for batch_id, (data, target) in enumerate(train_loader): if CUDA_ON: data, target = data.cuda(), target.cuda() data, target = Var(data), Var(target) optimizer.zero_grad() output = nn.functional.sigmoid(model(data)) loss = criterion(output, target) total_loss += loss.data[0] total_size += data.size(0) loss.backward() optimizer.step() if batch_id % log_interval == 0: print( 'Train Epoch: {} [{:>5d}/{:> 5d} ({:>2.0f}%)]\tCurrent loss: {:.6f}' .format(epoch, total_size, len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0] / data.size(0))) print('Train Epoch: {} DenseNet average loss: {:.6f}'.format( epoch, total_loss / total_size)) return (total_loss / total_size)
def test_variable_data_attribute_bug(self): # previously, newly created Variable objects would lose their OpenMined given # attributes on the .data python objects they contain whenever the Variable # object is returned from a function. This bug was fixed by storing a bbackup # pointer to the .data object (.data_backup) so that the python object doesn't # get garbage collected. This test used to error out at the last line (as # indcated below) def relu(x): """Rectified linear activation""" return torch.clamp(x, min=0.) def linear(x, w): """Linear transformation of x by w""" return x.mm(w) x = Var(torch.FloatTensor([[1, 1], [2, 2]]), requires_grad=True) y = Var(torch.FloatTensor([[1, 1], [2, 2]]), requires_grad=True) z = linear(x, y) # previously we had to do the following to prevent this bug # leaving it here for reference in case the bug returns later. # print(z.data.is_pointer) # before the bugfix, the following line would error out. z = relu(z) assert True
def forward(self, tree, embs, training=False): # Zoneout mask self.mask = torch.Tensor(1, self.mem_dim).bernoulli_( 1 - self.recurrent_dropout_h) if self.cuda_flag: self.mask = self.mask.cuda() loss = Var(torch.zeros(1)) # initialize loss with zero if self.cuda_flag: loss = loss.cuda() for idx in range(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training) loss += child_loss child_c, child_h = self.get_children_states(tree) tree.state = self.node_forward(embs[tree.idx-1], child_c, child_h, training) output, output_softmax = self.output_module.forward(tree.state[1], training) tree.output_softmax = output_softmax tree.output = output if training and tree.gold_label is not None: target = Var(torch.LongTensor([tree.gold_label])) if self.cuda_flag: target = target.cuda() loss = loss + self.criterion(output, target) return tree.state, loss
def test(self, dataset): self.model.eval() loss = 0 count = 0 predictions = torch.zeros(len(dataset)) idxs = [] indices = torch.arange(1, dataset.num_classes + 1) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label = dataset[idx] if ltree is not None and rtree is not None: count = count + 1 linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) target = Var(map_label_to_target(label, dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) err = self.criterion(output, target) loss += err.data[0] output = output.data.squeeze().cpu() idxs.append(idx) predictions[idx] = torch.dot(indices, torch.exp(output)) print('Sentences processed: %d' % (count)) return loss / len(dataset), predictions, torch.from_numpy( np.asarray(idxs))
def get_train_data(self, sample): # sample num length = len(sample) # longest seq longest_length = max([len(s[1]) for s in sample]) # (batch_size, seq_length) qs = np.zeros((length, longest_length), dtype=np.float32) inputs = np.zeros((length, longest_length), dtype=np.long) outputs = np.zeros((length, longest_length), dtype=np.long) for i, sample in enumerate(sample): reward, input, output = sample # decaying reward gamma = np.full((len(input),), self.config.q_gamma) gamma[0] = 1 q = np.flip(gamma.cumprod(), axis=0) * reward # save training values qs[i, :len(input)] = q inputs[i, :len(input)] = input inputs[i, len(input):] = Constants.PAD outputs[i, :len(output)] = output outputs[i, len(output):] = Constants.PAD qs = cudafication(torch.from_numpy(qs), self.config.cuda) # (batch_size, seq_length) X = Var(cudafication(torch.from_numpy(inputs), self.config.cuda), requires_grad=False) idx = Var(cudafication(torch.from_numpy(outputs), self.config.cuda), requires_grad=False) return X, idx, qs
def forward(self, tree, embs, training=False): """ Child sum tree LSTM forward function :param tree: :param embs: (sentence_length, 1, 300) :param training: :return: """ # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() for idx in range(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training) loss = loss + child_loss child_c, child_h = self.get_child_states(tree) tree.state = self.node_forward(embs[tree.idx - 1], child_c, child_h) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) return tree.state, loss
def train_epoch(epoch, args, model, dataset, optimizer): model.train() optimizer.zero_grad() indices = torch.randperm(len(dataset)) batch_size = args.batch_size loss, k = 0.0, 0 for idx in tqdm(range(len(dataset)), desc="Training epoch {}".format(epoch)): ltree, lsent, lrel, rtree, rsent, rrel, sim = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) lrel, rrel = Var(lrel), Var(rrel) target = Var(map_label_to_target(sim, args.num_classes)) if args.cuda: linput, rinput = linput.cuda(), rinput.cuda() lrel, rrel = lrel.cuda(), rrel.cuda() target = target.cuda() output = model(ltree, linput, lrel, rtree, rinput, rrel) err = F.kl_div(output, target) loss += err.data[0] (err/batch_size).backward() k += 1 if k % batch_size == 0: optimizer.step() optimizer.zero_grad() avg_loss = loss/len(dataset) return avg_loss
def forward_decode(self, args, input, ntokens): """ :param args: :param input: LongTensor [seq_len, batch_sz] :param ntokens: :return: outputs_prob: Var seq_len, batch_sz, ntokens outputs: LongTensor seq_len, batch_sz mu, logvar """ seq_len = input.size()[0] batch_sz = input.size()[1] emb, lat, mu, logvar = self.blstm_enc(input) # emb: seq_len, batchsz, hid_dim # hidden: ([2(nlayers),10(batchsz),200],[]) outputs_prob = Var(torch.FloatTensor(seq_len, batch_sz, ntokens)) if args.cuda: outputs_prob = outputs_prob.cuda() outputs = torch.LongTensor(seq_len, batch_sz) # First time step sos sos = Var(torch.ones(batch_sz).long()) unk = Var(torch.ones(batch_sz).long()) * 2 if args.cuda: sos = sos.cuda() unk = unk.cuda() lat_to_cat = lat[0][0].unsqueeze(0) emb_t = self.drop(self.encoder(unk)).unsqueeze(0) emb_0 = self.drop(self.encoder(sos)).unsqueeze(0) emb_t_comb = torch.cat([emb_t, lat_to_cat], dim=2) emt_0_comb = torch.cat([emb_0, lat_to_cat], dim=2) hidden = None for t in range(seq_len): # input (seq_len, batch, input_size) if t == 0: emb = emt_0_comb else: emb = emb_t_comb # print(emb.size()) if hidden is None: output, hidden = self.rnn(emb, None) else: output, hidden = self.rnn(emb, hidden) output_prob = self.decoder(self.drop(output)) output_prob = output_prob.squeeze(0) outputs_prob[t] = output_prob value, ind = torch.topk(output_prob, 1, dim=1) outputs[t] = ind.squeeze(1).data return outputs_prob, outputs, mu, logvar
def test_5th_snapshot(): te_set = PosterSet(POSTER_PATH, split, 'test', gen_d=gen_d, augment=False, resize=None, ten_crop=CROP_SIZE)#, debug=True) te_load = DataLoader(te_set, batch_size=64, shuffle=False, num_workers=3, drop_last=True) model = MidrangeNetwork(CROP_SIZE, 23) state = torch.load(SNAP_PATH + "snap5th.nn") model.load_state_dict(state['state_dict']) if CUDA_ON: model.cuda() model.eval() loss = 0 skipped = 0 for X, y in tqdm(te_load, desc='5th'): X, y = Var(X, volatile=True), Var(y) bs, ncrops, c, h, w = X.size() if CUDA_ON: X, y = X.cuda(), y.cuda() out = model(X.view(-1, c, h, w)) out = out.view(bs, ncrops, -1).mean(1) for i in range(out.size(0)): try: loss += accuracy(out.data[i], y.data[i]) except: skipped += 1 return loss / (len(te_set) - skipped)
def test_3rd_snapshot(): te_set = PosterSet(POSTER_PATH, split, 'test', gen_d=gen_d, augment=False, resize=None, ten_crop=None)#, debug=True) te_load = DataLoader(te_set, batch_size=64, shuffle=False, num_workers=3, drop_last=True) model = SmallerNetwork(INP_SIZE, 23) state = torch.load(SNAP_PATH + "snap3rd.nn") model.load_state_dict(state['state_dict']) if CUDA_ON: model.cuda() model.eval() loss = 0 skipped = 0 for X, y in tqdm(te_load, desc='3rd'): X, y = Var(X, volatile=True), Var(y) if CUDA_ON: X, y = X.cuda(), y.cuda() out = model(X) for i in range(out.size(0)): try: loss += accuracy(out.data[i], y.data[i]) except: skipped += 1 return loss / (len(te_set) - skipped)
def forward(self, tree, inputs, hiddn_state_mat_all, hiddn_state_mat): _ = [ self.forward(tree[idx], inputs, hiddn_state_mat_all, hiddn_state_mat) for idx in range(len(tree)) if tree.height() != 2 ] if tree.height() == 2: child_c = Var(inputs[0].data.new(1, self.mem_dim).fill_(0.)) child_h = Var(inputs[0].data.new(1, self.mem_dim).fill_(0.)) tree.state = self.node_forward( inputs[tree.__getattribute__('idx')], child_c, child_h, tree.label(), hiddn_state_mat) hiddn_state_mat_all.append(tree.state[1]) else: child_c = [] child_h = [] for idx in range(len(tree)): child_c.append(tree[idx].state[0]) child_h.append(tree[idx].state[1]) child_c, child_h = torch.cat(child_c, dim=0), torch.cat(child_h, dim=0) tree.state = self.node_forward(None, child_c, child_h, tree.label(), hiddn_state_mat) if tree.label() != 'ROOT': hiddn_state_mat_all.append(tree.state[1]) return tree.state[0], tree.state[1], hiddn_state_mat_all
def forward(self, tree, embs, training=False, metric=None): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() for idx in xrange(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training, metric) loss = loss + child_loss child_c, child_h = self.get_child_states(tree) tree.state = self.node_forward(embs[tree.idx - 1], child_c, child_h) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) if not training and metric is not None: # if self.args.num_classes == 3: # output[:, 1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0][0] correct = pred_cpu == tree.gold_label metric.count_depth(correct, 0, tree.idx, pred_cpu) return tree.state, loss
def forward(self, tree, embs, training=False, metric=None): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() if tree.num_children == 0: # leaf case tree.state = self.leaf_module.forward(embs[tree.idx - 1]) else: for idx in xrange(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training, metric) loss = loss + child_loss lc, lh, rc, rh = self.get_child_state(tree) tree.state = self.composer.forward(lc, lh, rc, rh) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) if not training and metric is not None: val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0] correct = pred_cpu == tree.gold_label metric.count_depth(correct, tree.depth(), tree.idx, pred_cpu) return tree.state, loss
def train(self, dataset, epoch): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 batch_size = self.config.batch_size indices = torch.randperm(len(dataset)) if self.config.cuda: indices = indices.cuda() total_batches = math.floor(len(indices) / batch_size) + 1 batches = list(get_batches(indices, batch_size)) for i, batch in tqdm(enumerate(batches), desc='Training epoch ' + str(epoch + 1) + '', total=total_batches): X, y = dataset.get_batch(batch) X, y = Var(X, requires_grad=False), Var(y, requires_grad=False) loss = self.model.forward_train(X, y) total_loss += loss.item() loss.backward() self.optimizer.step() self.optimizer.zero_grad() logging.debug('Batch {}, loss {}'.format(i + 1, loss.item())) return total_loss / len(dataset)
def input_vectors(self, data): data_idxs = torch.arange(data.size(0)).long() data_idxs = data_idxs.cuda() if data.is_cuda else data_idxs hf_data = data hf_data = Var(hf_data.clone()) hf_data[hf_data >= self.word_vocab_size] = 0 hf_embeddings = self.ivectors(hf_data) lf_data = data lf_data = Var(lf_data) spelling_data = self.wordidx2spelling(lf_data).data.clone().long() spelling = spelling_data[:, :-1] lengths = spelling_data[:, -1] if self.char_composition == 'RNN': lf_embeddings = self.batch_rnn(spelling, lengths) elif self.char_composition == 'CNN': lf_embeddings = self.batch_cnn(spelling) else: raise BaseException("unknown char_composition") # embeddings = torch.cat([hf_embeddings, lf_embeddings], dim=0) # f_idx= torch.cat([hf_data_idxs, lf_data_idxs], dim=0) # embeddings[data_idxs,:] = embeddings[f_idx,:] # print('i', hf_data.size(0), lf_data.size(0)) embeddings = hf_embeddings + lf_embeddings del spelling_data, spelling, lengths, lf_data, lf_embeddings del hf_data, data, data_idxs, hf_embeddings return embeddings
def validate(self, dataset, epoch): self.model.eval() total_loss = 0.0 batch_size = self.config.batch_size indices = torch.randperm(len(dataset)) if self.config.cuda: indices = indices.cuda() total_batches = math.floor(len(indices) / batch_size) + 1 batches = list(get_batches(indices, batch_size)) for i, batch in tqdm(enumerate(batches), desc='Testing epoch ' + str(epoch + 1) + '', total=total_batches): X, y = dataset.get_batch(batch) X, y = Var(X, requires_grad=False), Var(y, requires_grad=False) loss = self.model.forward_train(X, y) total_loss += loss.item() logging.debug('Validation batch {}, loss {}'.format( i, loss.item())) total_loss /= len(dataset) return total_loss
def train(self, dataset): self.model.train() self.embedding_model.train() self.embedding_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 # torch.manual_seed(789) indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): tree, sent, label = dataset[indices[idx]] input = Var(sent) target = Var(torch.LongTensor([int(label)])) if self.args.cuda: input = input.cuda() target = target.cuda() emb = F.torch.unsqueeze(self.embedding_model(input), 1) output, err, _, _ = self.model.forward(tree, emb, training=True) #params = self.model.childsumtreelstm.getParameters() # params_norm = params.norm() err = err / self.args.batchsize # + 0.5*self.args.reg*params_norm*params_norm # custom bias loss += err.data[0] # err.backward() k += 1 if k == self.args.batchsize: for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) self.optimizer.step() self.embedding_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss / len(dataset)
def forward(self, tree, embs, training=False): # add singleton dimension for future call to node_forward # embs = F.torch.unsqueeze(self.emb(inputs),1) loss = Var(torch.zeros(1)) # init zero loss if self.cudaFlag: loss = loss.cuda() if tree.num_children == 0: # leaf case tree.state = self.leaf_module.forward(embs[tree.idx - 1]) else: for idx in range(tree.num_children): _, child_loss = self.forward(tree.children[idx], embs, training) loss = loss + child_loss lc, lh, rc, rh = self.get_child_state(tree) tree.state = self.composer.forward(lc, lh, rc, rh) if self.output_module != None: output = self.output_module.forward(tree.state[1], training) tree.output = output if training and tree.gold_label != None: target = Var( utils.map_label_to_target_sentiment(tree.gold_label)) if self.cudaFlag: target = target.cuda() loss = loss + self.criterion(output, target) return tree.state, loss
def test(self, dataset): self.model.eval() self.embedding_model.eval() loss = 0 accuracies = torch.zeros(len(dataset)) output_trees = [] outputs = [] for idx in tqdm(range(len(dataset)), desc='Testing epoch '+str(self.epoch)+''): tree, sent, dict, label = dataset[idx] input = Var(sent, volatile=True) target = Var(torch.LongTensor([int(label)]), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() embeddings = self.embedding_model(input) dictionaries = Var(dict) if dictionaries is not None: inputs = torch.cat((embeddings, dictionaries), 1) else: inputs = embeddings emb = F.torch.unsqueeze(inputs, 1) output, _, acc, tree = self.model(tree, emb) err = self.criterion(output, target) loss += err.data[0] accuracies[idx] = acc output_trees.append(tree) outputs.append(tree.output_softmax.data.numpy()) # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss/len(dataset), accuracies, outputs, output_trees
def get_child_states(self, tree): """ Get c and h of all children :param tree: :return: (tuple) child_c: (num_children, 1, mem_dim) child_h: (num_children, 1, mem_dim) """ # add extra singleton dimension in middle... # because pytorch needs mini batches... :sad: if tree.num_children == 0: child_c = Var(torch.zeros(1, 1, self.mem_dim)) child_h = Var(torch.zeros(1, 1, self.mem_dim)) if self.cudaFlag: child_c, child_h = child_c.cuda(), child_h.cuda() else: child_c = Var(torch.Tensor(tree.num_children, 1, self.mem_dim)) child_h = Var(torch.Tensor(tree.num_children, 1, self.mem_dim)) if self.cudaFlag: child_c, child_h = child_c.cuda(), child_h.cuda() for idx in range(tree.num_children): child_c[idx] = tree.children[idx].state[0] child_h[idx] = tree.children[idx].state[1] # child_c[idx], child_h[idx] = tree.children[idx].state return child_c, child_h
def forward(self, outp, inputs=None,penalize=True): outp = torch.unsqueeze(outp, 0) # Expects input of the form [btch, len, nhid] compressed_embeddings = outp.view(outp.size(1), -1) # [btch*len, nhid] hbar = self.tanh(self.ws1(self.drop(compressed_embeddings))) # [bsz*len, attention-unit] alphas = self.ws2(hbar).view(1, outp.size(1), -1) # [bsz, len, hop] alphas = torch.transpose(alphas, 1, 2).contiguous() # [bsz, hop, len] if penalize and [inputs]: # print(outp.size(),inputs.size()) top = Var(torch.zeros(inputs.size(0), self.hops)) bottom = Var(torch.ones(outp.size(1) - inputs.size(0), self.hops)) total = torch.cat((top, bottom), 0) total = torch.unsqueeze(torch.transpose(total, 0, 1), 0) penalized_term = torch.unsqueeze(total, 0) if self.cudaFlag: penalized_term = penalized_term.cuda() penalized_alphas = torch.add(alphas, -10000 * penalized_term) else: assert penalize == False and inputs == None penalized_alphas = alphas # [bsz, hop, len] + [bsz, hop, len] alphas = self.softmax(penalized_alphas.view(-1, outp.size(1))) # [bsz*hop, len] alphas = alphas.view(outp.size(0), self.hops, outp.size(1)) # [hop, len] M = torch.bmm(alphas, outp) # [bsz, hop, mem_dim] return M, alphas
def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] if ltree is not None and rtree is not None: linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) err = self.criterion(output, target) loss += err.data[0] (err / self.args.batchsize).backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return loss / len(dataset)
def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): tree, sent, label = dataset[indices[idx]] input = Var(sent) target = Var( map_label_to_target_sentiment(label, dataset.num_classes, fine_grain=self.args.fine_grain)) if self.args.cuda: input = input.cuda() target = target.cuda() output = self.model.forward(tree, input, training=True) err = self.criterion(output, target) loss += err.data[0] err.backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return loss / len(dataset)
def test_remote_backprop(self): hook = TorchHook(verbose=False) local = hook.local_worker local.verbose = False remote = VirtualWorker(id=1, hook=hook, verbose=False) local.add_worker(remote) x = Var(torch.ones(2, 2), requires_grad=True).send_(remote) x2 = Var(torch.ones(2, 2) * 2, requires_grad=True).send_(remote) y = x * x2 y.sum().backward() # remote grads should be correct assert (remote._objects[x2.id].grad.data == torch.ones(2, 2)).all() assert (remote._objects[x.id].grad.data == torch.ones(2, 2) * 2).all() assert (y.get().data == torch.ones(2, 2) * 2).all() assert (x.get().data == torch.ones(2, 2)).all() assert (x2.get().data == torch.ones(2, 2) * 2).all() assert (x.grad.data == torch.ones(2, 2) * 2).all() assert (x2.grad.data == torch.ones(2, 2)).all()
def train(self, dataset): self.model.train() self.embedding_model.train() self.embedding_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)),desc='Training epoch '+str(self.epoch+1)+''): ltree,lsent,rtree,rsent,label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label,dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() lemb = torch.unsqueeze(self.embedding_model(linput), 1) remb = torch.unsqueeze(self.embedding_model(rinput), 1) output = self.model(ltree, lemb, rtree, remb) err = self.criterion(output, target) loss += err.data[0] err.backward() k += 1 if k==self.args.batchsize: for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) self.optimizer.step() self.embedding_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss/len(dataset)
def test(): model.eval() test_loss = 0 total_length = 0 for data, target in test_loader: if CUDA_ON: data, target = data.cuda(), target.cuda() data, target = Var(data, volatile=True), Var(target) output = nn.functional.sigmoid(model(data)) for i in range(output.size(0)): try: if target.data[i].sum() == 0: continue if target.data[i].sum() >= 12: continue test_loss += accuracy(output.data[i], target.data[i]) total_length += 1 except Exception as e: print(e) print(target.data[i]) sys.exit() return test_loss / total_length
def test(self, dataset): subtree_metric = SubtreeMetric() self.model.eval() self.embedding_model.eval() loss = 0 predictions = torch.zeros(len(dataset)) predictions = predictions for idx in tqdm(xrange(len(dataset)),desc='Testing epoch '+str(self.epoch)+''): tree, sent, label = dataset[idx] input = Var(sent, volatile=True) question = Var(self.question.long(), volatile=True) target = Var(map_label_to_target_sentiment(label,self.args.num_classes, fine_grain=self.args.fine_grain), volatile=True) if self.args.cuda: input = input.cuda() target = target.cuda() question = question.cuda() emb = F.torch.unsqueeze(self.embedding_model(input),1) question_emb = F.torch.unsqueeze(self.embedding_model(question), 1) output, _, _= self.model(tree, emb, question_emb, training = False) # size(1,5) err = self.criterion(output, target) loss += err.data[0] if self.args.num_classes == 3: output[:,1] = -9999 # no need middle (neutral) value val, pred = torch.max(output, 1) pred_cpu = pred.data.cpu()[0][0] predictions[idx] = pred_cpu correct = pred_cpu == tree.gold_label subtree_metric.current_idx = idx subtree_metric.count_depth(correct, 0, tree.idx, pred_cpu) # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss/len(dataset), predictions, subtree_metric
def test_var_gradient_keeps_id_during_send_(self): # PyTorch has a tendency to delete var.grad python objects # and re-initialize them (resulting in new/random ids) # we have fixed this bug and recorded how it was fixed # as well as the creation of this unit test in the following # video (1:50:00 - 2:00:00) ish # https://www.twitch.tv/videos/275838386 data = Var(torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])) target = Var(torch.FloatTensor([[0], [0], [1], [1]])) model = Var(torch.zeros(2, 1), requires_grad=True) # generates grad objects on model pred = data.mm(model) loss = ((pred - target)**2).sum() loss.backward() # the grad's true id original_data_id = model.data.id + 0 original_grad_id = model.grad.data.id + 0 model.send(bob) assert model.data.id == original_data_id assert model.grad.data.id == original_grad_id
def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) # target = Var(map_label_to_target(label, dataset.num_classes)) target = Var(torch.LongTensor([int(label)])) # volatile=True if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput, plot_flag=False) _, pred = torch.max(output.data, 1) # print F.softmax(output), pred err = self.criterion(output, target) loss += err.data err.backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() # Does the update self.optimizer.zero_grad() # zero the gradient buffers self.epoch += 1 return loss / len(dataset)