def train_one_epoch(model, optimizer, args, train_loader, label, writer, epoch): model.train() print_freq = 10 for i, batch in enumerate(train_loader): data, index_label = batch[0].cuda(), batch[1].cuda() all_logits = model(data, 'train') if args.method_type is Method_type.baseline: label = index_label loss = F.cross_entropy(all_logits, label) acc = count_acc(all_logits, label) if i % print_freq == print_freq - 1: if args.exp_tag in ['same_labels']: print('epoch {}, train {}/{}, loss={:.4f}, KL_loss={:.4f}, acc={:.4f}'.format(epoch, i, len(train_loader), loss.item(), loss1.item(), acc)) else: print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format(epoch, i, len(train_loader), loss.item(), acc)) if writer is not None: writer.add_scalar('loss', loss) optimizer.zero_grad() loss.backward() optimizer.step()
def train(model, optimizer, source_loader, args): model.train() acc_avg = Averager() loss_avg = Averager() iter_source = iter(source_loader) num_iter = len(iter_source) for i in range(num_iter): sx, sy = iter_source.next() if args.cuda: sx, sy = sx.cuda(), sy.cuda() _, logits = model(sx) criterion = nn.CrossEntropyLoss() loss = criterion(logits, sy) optimizer.zero_grad() loss.backward() optimizer.step() acc = count_acc(logits, sy) acc_avg.add(acc) loss_avg.add(loss.item()) acc = acc_avg.item() loss = loss_avg.item() return loss, acc
def val(model, args, val_loader, label): model.eval() vl = Averager() va = Averager() with torch.no_grad(): for i, batch in tqdm(enumerate(val_loader, 1), total=len(val_loader)): data, index_label = batch[0].cuda(), batch[1] logits = model(data, mode='val') loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) vl.add(loss.item()) va.add(acc) vl = vl.item() va = va.item() return vl, va
def test(model, target_loader, args): model.eval() acc_avg = Averager() with torch.no_grad(): iter_target = iter(target_loader) num_iter = len(iter_target) for i in range(num_iter): tx, ty = iter_target.next() if torch.cuda.is_available(): tx, ty = tx.cuda(), ty.cuda() _, logits = model(tx) acc = count_acc(logits, ty) acc_avg.add(acc) acc = acc_avg.item() return acc
def val_epoch(query_data_loader, model, classifier): classifier.eval() batch_size = opt.batch_size query_clip_embedding = torch.FloatTensor( opt.test_way * opt.query * opt.n_val_samples, opt.emb_dim).cuda() with torch.no_grad(): cur_loc = 0 for i, (data, label) in enumerate(query_data_loader): batch_embedding = model(data.cuda()) cur_batch = batch_embedding.size(0) query_clip_embedding[cur_loc:cur_loc + cur_batch] = batch_embedding.squeeze() cur_loc += cur_batch clip_logits = torch.exp(classifier(query_clip_embedding)) #print(clip_logits) logits = clip_logits.reshape(opt.query * opt.test_way, opt.n_val_samples, -1).mean(dim=1) query_labels = torch.arange(opt.test_way).repeat(opt.query).cuda() acc, pred = count_acc(logits, query_labels) return acc
def test(model, label, args, few_shot_params): if args.debug: n_test = 10 print_freq = 2 else: n_test = 1000 print_freq = 100 test_file = args.dataset_dir + 'test.json' test_datamgr = SetDataManager(test_file, args.dataset_dir, args.image_size, mode = 'val',n_episode = n_test ,**few_shot_params) loader = test_datamgr.get_data_loader(aug=False) test_acc_record = np.zeros((n_test,)) warmup_state = torch.load(osp.join(args.checkpoint_dir, 'max_acc' + '.pth'))['params'] model.load_state_dict(warmup_state, strict=False) model.eval() ave_acc = Averager() with torch.no_grad(): for i, batch in enumerate(loader, 1): data, index_label = batch[0].cuda(), batch[1].cuda() logits = model(data, 'test') acc = count_acc(logits, label) ave_acc.add(acc) test_acc_record[i - 1] = acc if i % print_freq == 0: print('batch {}: {:.2f}({:.2f})'.format(i, ave_acc.item() * 100, acc * 100)) m, pm = compute_confidence_interval(test_acc_record) # print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format(trlog['max_acc_epoch'], trlog['max_acc'], # ave_acc.item())) print('Test Acc {:.4f} + {:.4f}'.format(m, pm)) acc_str = '%4.2f' % (m * 100) with open(args.save_dir + '/result.txt', 'a') as f: f.write('%s %s\n' % (acc_str, args.name))
for i, batch in enumerate(loader, 1): data, _ = [_.cuda() for _ in batch] k = args.way * args.shot data_shot, meta_support, data_query = data[:k], data[k:2*k], data[2*k:] #p = inter_fold(model, args, data_shot) x = model(data_shot) x = x.reshape(args.shot, args.way, -1).mean(dim=0) p = x lam = 0.01 proto = model(meta_support) meta_logits = euclidean_metric(proto, p) soft_labels = (F.sigmoid(meta_logits, dim=1) + lam * s_onehot) / (1 + lam) #soft_labels_norm2 = soft_labels / soft_labels.sum(dim=0) proto = torch.mm(soft_labels.permute((1, 0)), proto) logits = euclidean_metric(model(data_query), proto) label = torch.arange(args.way).repeat(args.query) label = label.type(torch.cuda.LongTensor) acc = count_acc(logits, label) ave_acc.add(acc) print('batch {}: {:.2f}({:.2f})'.format(i, ave_acc.item() * 100, acc * 100)) x = None; p = None; logits = None
def get_accuracy(self, support, query, label, model): # 5-way 5-shot test logits = model.forward_pred(support, query, args.n_way, args.k_shot) acc = utils.count_acc(logits, label) return acc
label = torch.arange(args.train_way).repeat(args.query) label = label.type(torch.cuda.LongTensor) global_new, proto_new = model_reg(support_set=torch.cat( [global_base[0], global_novel[0]]), query_set=proto_final) logits2 = euclidean_metric(proto_new, global_new) loss2 = F.cross_entropy(logits2, train_gt) similarity = F.softmax(logits2) feature = torch.matmul( similarity, torch.cat([global_base[0], global_novel[0]])) logits = euclidean_metric(model_cnn(data_query), feature) loss1 = F.cross_entropy(logits, label) acc1 = count_acc(logits, label) acc2 = count_acc(similarity, train_gt) tl1.add(loss1.item()) tl2.add(loss2.item()) ta1.add(acc1) ta2.add(acc2) optimizer_gen.zero_grad() optimizer_cnn.zero_grad() optimizer_atten.zero_grad() optimizer_global1.zero_grad() optimizer_global2.zero_grad() total_loss = loss1 + loss2 #loss.backward() total_loss.backward()
else: logits, logits_scaled = euclidean_multiscale( query, proto, B) if args.distance == 'euclidean': logits, logits_no = euclidean_normalize_multiscale( query, proto, B_no) else: logits, logits_no = dot_normalize_multiscale( query, proto, B_no) loss = (1 - lamb) * (F.cross_entropy( logits_scaled, label, size_average=True )) + lamb * F.cross_entropy(logits_no, label, size_average=True) acc = count_acc(logits_scaled, label) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() proto = None logits = None loss = None tl = tl.item() ta = ta.item()
def main(args): device = torch.device(args.device) ensure_path(args.save_path) data = Data(args.dataset, args.n_batches, args.train_way, args.test_way, args.shot, args.query) train_loader = data.train_loader val_loader = data.valid_loader model = Convnet(x_dim=2).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) def save_model(name): torch.save(model.state_dict(), osp.join(args.save_path, name + '.pth')) trlog = dict( args=vars(args), train_loss=[], val_loss=[], train_acc=[], val_acc=[], max_acc=0.0, ) timer = Timer() for epoch in range(1, args.max_epoch + 1): lr_scheduler.step() model.train() tl = Averager() ta = Averager() for i, batch in enumerate(train_loader, 1): data, _ = [_.to(device) for _ in batch] data = data.reshape(-1, 2, 105, 105) p = args.shot * args.train_way embedded = model(data) embedded_shot, embedded_query = embedded[:p], embedded[p:] proto = embedded_shot.reshape(args.shot, args.train_way, -1).mean(dim=0) label = torch.arange(args.train_way).repeat(args.query).to(device) logits = euclidean_metric(embedded_query, proto) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}' .format(epoch, i, len(train_loader), loss.item(), acc)) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward() optimizer.step() tl = tl.item() ta = ta.item() model.eval() vl = Averager() va = Averager() for i, batch in enumerate(val_loader, 1): data, _ = [_.cuda() for _ in batch] data = data.reshape(-1, 2, 105, 105) p = args.shot * args.test_way data_shot, data_query = data[:p], data[p:] proto = model(data_shot) proto = proto.reshape(args.shot, args.test_way, -1).mean(dim=0) label = torch.arange(args.test_way).repeat(args.query).to(device) logits = euclidean_metric(model(data_query), proto) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) vl.add(loss.item()) va.add(acc) vl = vl.item() va = va.item() print('epoch {}, val, loss={:.4f} acc={:.4f}'.format(epoch, vl, va)) if va > trlog['max_acc']: trlog['max_acc'] = va save_model('max-acc') trlog['train_loss'].append(tl) trlog['train_acc'].append(ta) trlog['val_loss'].append(vl) trlog['val_acc'].append(va) torch.save(trlog, osp.join(args.save_path, 'trlog')) save_model('epoch-last') if epoch % args.save_epoch == 0: save_model('epoch-{}'.format(epoch)) print('ETA:{}/{}'.format(timer.measure(), timer.measure(epoch / args.max_epoch)))
def do_pass(batches, counters, shot, way, query, expressions, train, test, id_to_token=None, id_to_tag=None, test_cls=None): model, optimizer = expressions llog, alog = Averager(), Averager() if test: output_file = open("./output.txt" + str(test_cls), 'w') for i, (batch, counter) in enumerate(zip(batches, counters), 1): #print("Batch number\t"+str(i)) data_token = [x for _, x, _, _ in batch] data_sentence = [sent for sent, _, _, _ in batch] data_label = [label for _, _, label, _ in batch] p = shot * way #print(len(data_token)) #print(p) #print(shot) #print(way) data_token_shot, data_token_query = data_token[:p], data_token[p:] data_sentence_shot, data_sentence_query = data_sentence[: p], data_sentence[ p:] counter_token, counter_query = counter[:p], counter[p:] (data_sentence_shot, sentence_shot_lens), (data_sentence_query, query_shot_lens) = pad_sentences( data_sentence_shot, MAX_SENT_LEN), pad_sentences( data_sentence_query, MAX_SENT_LEN) proto = model(data_sentence_shot, data_token_shot, sentence_shot_lens) proto = proto.reshape(shot, way, -1).mean(dim=0) ####label = torch.arange(way).repeat(query) if not train: #print(len(data_token)) #print(p) #print(way) query = int((len(data_token) - p) / way) #print(query) #exit() label = torch.arange(way).repeat(query) label = label.type(torch.LongTensor).to(device) logits = euclidean_metric( model(data_sentence_query, data_token_query, query_shot_lens), proto) #print(list(model.parameters())) #print(model.return_0class()) #print(logits.size()) logits[:, 0] = model.return_0class() #print(logits.size()) #print(label.size()) #print(len(counter_query)) #print(counter_query) #print("---") loss = F.cross_entropy(logits, label) acc = count_acc(logits, label, counter_query) llog.add(loss.item()) alog.add(acc) if train: optimizer.zero_grad() loss.backward() optimizer.step() if test: #print the outputs to a file save_dev_output(output_file, logits, label, data_label, data_sentence_query, data_token_query, query_shot_lens, id_to_token, id_to_tag) if test: output_file.close() return llog, alog
def pre_train(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = ClassifierNet(hid_dim=args.num_filters, z_dim=args.out_dim).to(device) optim = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) criterion = torch.nn.CrossEntropyLoss().to(device) scheduler = torch.optim.lr_scheduler.StepLR(optim, 5000, args.lr_decay) iter_counter = 0 max_val_acc = 0 model.train() while iter_counter <= args.n_iter: data_train = Dataset64(imsize=84, data_path=args.data_path) loader_train = DataLoader(dataset=data_train, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) data_valid = MiniImagenet(mode='val', n_way=args.n_way, k_shot=args.k_shot, k_query=args.k_query, batchsz=200, imsize=84, data_path=args.data_path, verbose=True) loader_valid = DataLoader(data_valid, batch_size=4, shuffle=True, num_workers=8, pin_memory=True) print_header() for step, batch in enumerate(loader_train): iter_counter += 1 if iter_counter > args.n_iter: break data = batch[0].to(device) label = batch[1].to(device) logits = model(data) loss = criterion(logits, label) acc = count_acc(logits, label) if iter_counter % 100 == 0: print_logs(iter_counter, loss.item(), acc, 0, max_val_acc) optim.zero_grad() loss.backward() optim.step() scheduler.step() if iter_counter % 300 == 0: model.eval() va = Averager() with torch.no_grad(): for i, task_batch in enumerate(loader_valid): support_x = task_batch[0].to(device) query_x = task_batch[1].to(device) query_y = task_batch[2].to(device) for inner_batch_idx in range(support_x.shape[0]): predictions = model.forward_proto( support_x[inner_batch_idx], query_x[inner_batch_idx], args.n_way, args.k_shot) acc = count_acc(predictions, query_y[inner_batch_idx]) va.add(acc) val_acc = va.item() if val_acc > max_val_acc: torch.save( model.state_dict(), './pre_train_file/pre_train-{}_{}.pth'.format( args.num_filters, args.out_dim)) max_val_acc = val_acc print_logs(iter_counter, 0, 0, val_acc, max_val_acc, False) model.train()
fea_all = fea_all.transpose(0,1) fea_all2 = fea_all2.transpose(0,1) fea_all = attn_net(fea_all,fea_all,fea_all) fea_all2 = attn_net(fea_all2,fea_all2,fea_all2) fea = fea_all.reshape(n_all,-1) fea2 = fea_all2.reshape(n_all,-1) if args.mix == 1: fea_shot, fea_query = fea[:n_shot], fea2[n_shot:] else: fea_shot, fea_query = fea[:n_shot], fea[n_shot:] # fea_shot: [25, 640] # fea_query: [75, 640] proto = fea_shot.reshape(args.shot, args.way, -1).mean(dim = 0) # [5, 640] logits = euclidean_metric(fea_query, proto)/args.temperature #[75, 5] fsl_loss = ce_loss(logits,label_fsl_s) acc = count_acc(logits, label_fsl_s) #con_loss con_loss = 0 if args.lambda_con > 0: similarity_f = nn.CosineSimilarity() if args.proj == 1: fea = proj_net(fea) fea_shot, fea_query = fea[:n_shot], fea[n_shot:] fea_query2 = fea2[n_shot:] proto = fea_shot.reshape(args.shot, args.way, -1).mean(dim = 0) # [5, 640] ind = torch.arange(args.query) for index in range(args.way): p = proto[index].unsqueeze(0).repeat(args.way*args.query,1)#[75, 640] s = similarity_f(p,fea_query)/args.T#[75] s_sim = s.reshape(args.query,-1).t()#[5,15] s2 = similarity_f(p,fea_query2)/args.T
print(out_str) logfile.write(out_str+'\n') logfile.flush() model_cnn.eval() for epoch in range(1, args.max_epoch + 1): for i, batch in enumerate(val_loader, 1): data, lab = [_.cuda() for _ in batch] data_shot = data[:, 3:, :] proto = model_cnn(data_shot) global_set=torch.cat([global_base[0],global_novel[0]]) logits = euclidean_metric(proto, global_set) loss = F.cross_entropy(logits, lab) acc = count_acc(logits, lab) vl.add(loss.item()) va.add(acc) proto = None; logits = None; loss = None vl = vl.item() va = va.item() log('both epoch {}, val, loss={:.4f} acc={:.4f}'.format(i, vl, va)) vl = Averager() va = Averager() for i, batch in enumerate(val_loader2, 1): data, lab = [_.cuda() for _ in batch]
def get_loss_accuracy(self, support, query, label, model): logits = model.forward_pred(support, query, self.args.n_way, self.args.k_shot) loss = F.cross_entropy(logits/self.scale, label).item() acc = count_acc(logits, label) return loss, acc