def main_attention(): args = parse_args() print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, vocab = loader.load_dataset(args) # initialize model model = {} model["G"], model["D"] = get_embedding(vocab, args) model["clf"] = get_classifier(model["G"].ebd_dim, args) best_path = '../bin/tmp-runs/16116280768954578/18' model['G'].load_state_dict(torch.load(best_path + '.G')) # model['D'].load_state_dict(torch.load(best_path + '.D')) # model['clf'].load_state_dict(torch.load(best_path + '.clf')) # if args.pretrain is not None: # model["ebd"] = load_model_state_dict(model["G"], args.pretrain) file_path = r'../data/attention_data.json' Print_Attention(file_path, vocab, model, args)
def main(): args = parse_args() print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, class_names, vocab = loader.load_dataset( args) args.id2word = vocab.itos # initialize model model = {} model["G"] = get_embedding(vocab, args) # model["G"]里面 是 词向量平均 + FC criterion = ContrastiveLoss() # model["G2"] = get_embedding_M2(vocab, args) # model["clf"] = get_classifier(model["G"].hidden_size * 2, args) if args.mode == "train": # train model on train_data, early stopping based on val_data optG = train(train_data, val_data, model, class_names, criterion, args) # 使用孪生网络,来进行maml的方法,只改变FC # val_acc, val_std, _ = test(val_data, model, args, # args.val_episodes) test_acc, test_std = test(test_data, class_names, optG, model, criterion, args, args.test_epochs, True) # path_drawn = args.path_drawn_data # with open(path_drawn, 'w') as f_w: # json.dump(drawn_data, f_w) # print("store drawn data finished.") # file_path = r'../data/attention_data.json' # Print_Attention(file_path, vocab, model, args) if args.result_path: directory = args.result_path[:args.result_path.rfind("/")] if not os.path.exists(directory): os.mkdirs(directory) result = { "test_acc": test_acc, "test_std": test_std, # "val_acc": val_acc, # "val_std": val_std } for attr, value in sorted(args.__dict__.items()): result[attr] = value with open(args.result_path, "wb") as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def main(): # make_print_to_file(path='/results') args = parse_args() print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, vocab = loader.load_dataset(args) args.id2word = vocab.itos # initialize model model = {} model["G"], model["D"] = get_embedding(vocab, args) model["clf"] = get_classifier(model["G"].ebd_dim, args) if args.mode == "train": # train model on train_data, early stopping based on val_data train(train_data, val_data, model, args) # val_acc, val_std, _ = test(val_data, model, args, # args.val_episodes) test_acc, test_std, drawn_data = test(test_data, model, args, args.test_episodes) # path_drawn = args.path_drawn_data # with open(path_drawn, 'w') as f_w: # json.dump(drawn_data, f_w) # print("store drawn data finished.") # file_path = r'../data/attention_data.json' # Print_Attention(file_path, vocab, model, args) if args.result_path: directory = args.result_path[:args.result_path.rfind("/")] if not os.path.exists(directory): os.mkdirs(directory) result = { "test_acc": test_acc, "test_std": test_std, # "val_acc": val_acc, # "val_std": val_std } for attr, value in sorted(args.__dict__.items()): result[attr] = value with open(args.result_path, "wb") as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def main(): args = parse_args() print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, vocab = loader.load_dataset(args) # initialize model model = {} model["ebd"] = ebd.get_embedding(vocab, args) model["clf"] = clf.get_classifier(model["ebd"].ebd_dim, args) if args.mode == "train": # train model on train_data, early stopping based on val_data train_utils.train(train_data, val_data, model, args) elif args.mode == "finetune": # sample an example from each class during training way = args.way query = args.query shot = args.shot args.query = 1 args.shot= 1 args.way = args.n_train_class train_utils.train(train_data, val_data, model, args) # restore the original N-way K-shot setting args.shot = shot args.query = query args.way = way # testing on validation data: only for not finetune # In finetune, we combine all train and val classes and split it into train # and validation examples. if args.mode != "finetune": val_acc, val_std = train_utils.test(val_data, model, args, args.val_episodes) else: val_acc, val_std = 0, 0 test_acc, test_std = train_utils.test(test_data, model, args, args.test_episodes) if args.result_path: directory = args.result_path[:args.result_path.rfind("/")] if not os.path.exists(directory): os.mkdirs(directory) result = { "test_acc": test_acc, "test_std": test_std, "val_acc": val_acc, "val_std": val_std } for attr, value in sorted(args.__dict__.items()): result[attr] = value with open(args.result_path, "wb") as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def train_model(model, args): meta_lr = args.meta_lr task_lr = args.task_lr train_iter = args.train_iter val_iter = args.val_iter test_iter = args.test_iter val_test_task_step = args.val_test_task_step train_task_step = args.train_task_step val_step = 500 test_step = 2000 N = args.N K = args.K L = args.L # load data train_data, val_data, test_data, label_dict, vocab = loader.load_dataset( args) n_way_k_shot = str(N) + '-way-' + str(K) + '-shot' n_way_k_shot = 'stable-PROTO-' + n_way_k_shot print('Start training ' + n_way_k_shot) cuda = torch.cuda.is_available() if cuda: model = model.cuda() data_loader = {} data_loader['train'] = loader.get_dataloader(args, train_data, label_dict, N, K, L) # class_name, support, support_label, query, query_label = next(data_loader['train']) data_loader['val'] = loader.get_dataloader(args, val_data, label_dict, N, K, L) data_loader['test'] = loader.get_dataloader(args, test_data, label_dict, N, K, L) optim_params = [{'params': model.coder.parameters(), 'lr': 5e-5}] optim_params.append({'params': model.mlp.fc1.parameters(), 'lr': meta_lr}) # optim_params.append({'params': model.mlp.fc2.parameters(), 'lr': meta_lr}) optim_params.append({'params': model.bilstm.parameters(), 'lr': meta_lr}) optim_params.append({'params': model.linear.parameters(), 'lr': meta_lr}) meta_optimizer = AdamW(optim_params, lr=1) best_acc, best_step, best_test_acc, best_test_step, best_changed = 0.0, 0, 0.0, 0, False iter_loss, iter_right, iter_sample = 0.0, 0.0, 0.0 for it in range(train_iter): meta_loss, meta_right = 0.0, 0.0 model.train() class_name, support, support_label, query, query_label = next( data_loader['train']) if cuda: support_label, query_label = support_label.cuda( ), query_label.cuda() loss_q, right_q = train_one_batch(args, class_name, support, support_label, query, query_label, model, train_task_step, task_lr, it) meta_loss = meta_loss + loss_q meta_right = meta_right + right_q meta_optimizer.zero_grad() meta_loss.backward() meta_optimizer.step() iter_loss = iter_loss + meta_loss iter_right = iter_right + meta_right iter_sample += 1 if it % val_step == 0: iter_loss, iter_right, iter_sample = 0.0, 0.0, 0.0 if (it + 1) % 100 == 0: print('[TRAIN] step: {0:4} | loss: {1:2.6f}, accuracy: {2:3.2f}%'. format(it + 1, iter_loss / iter_sample, 100 * iter_right / iter_sample)) if (it + 1) % val_step == 0: acc = test_model(args, cuda, data_loader['val'], model, val_iter, val_test_task_step, task_lr) print('[EVAL] | accuracy: {0:2.2f}%'.format(acc * 100)) if acc > best_acc: print('Best checkpoint!') best_model = copy.deepcopy(model) best_acc, best_step, best_changed = acc, (it + 1), True if (it + 1) % test_step == 0 and best_changed: best_changed = False test_acc = test_model(args, cuda, data_loader['test'], best_model, test_iter, val_test_task_step, task_lr) print('[TEST] | accuracy: {0:2.2f}%'.format(test_acc * 100)) if test_acc > best_test_acc: #torch.save(best_model.state_dict(),n_way_k_shot+'.ckpt') best_test_acc, best_test_step = test_acc, best_step best_acc = 0.0 print("\n####################\n") print('Finish training model! Best acc: ' + str(best_test_acc) + ' at step ' + str(best_test_step))
def main(): args = parse_args() # 可以打印到本地!存储下来 if args.path != "": path = args.path sys.stdout = open(path, "w") print("test sys.stdout") print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, class_names, vocab = loader.load_dataset( args) args.id2word = vocab.itos # initialize model model = {} model["G"] = get_embedding(vocab, args) print( "-------------------------------------param----------------------------------------------" ) sum = 0 for name, param in model["G"].named_parameters(): num = 1 for size in param.shape: num *= size sum += num print("{:30s} : {}".format(name, param.shape)) print("total param num {}".format(sum)) print( "-------------------------------------param----------------------------------------------" ) criterion = ContrastiveLoss() # model["G2"] = get_embedding_M2(vocab, args) # model["clf"] = get_classifier(model["G"].hidden_size * 2, args) if args.mode == "train": # train model on train_data, early stopping based on val_data optG = train(train_data, val_data, test_data, model, class_names, criterion, args) # val_acc, val_std, _ = test(val_data, model, args, # args.val_episodes) test_acc, test_std = test(test_data, class_names, optG, model, criterion, args, args.test_epochs, False) print( ("[TEST] {}, {:s} {:s}{:>7.4f} ± {:>6.4f}, ").format( datetime.datetime.now(), colored("test ", "cyan"), colored("acc:", "blue"), test_acc, test_std, # colored("train stats", "cyan"), # colored("G_grad:", "blue"), np.mean(np.array(grad['G'])), # colored("clf_grad:", "blue"), np.mean(np.array(grad['clf'])), ), flush=True) # path_drawn = args.path_drawn_data # with open(path_drawn, 'w') as f_w: # json.dump(drawn_data, f_w) # print("store drawn data finished.") # file_path = r'../data/attention_data.json' # Print_Attention(file_path, vocab, model, args) if args.result_path: directory = args.result_path[:args.result_path.rfind("/")] if not os.path.exists(directory): os.mkdirs(directory) result = { "test_acc": test_acc, "test_std": test_std, # "val_acc": val_acc, # "val_std": val_std } for attr, value in sorted(args.__dict__.items()): result[attr] = value with open(args.result_path, "wb") as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def main(): parser = argparse.ArgumentParser( description="Calculate Pascal VOC evaluation metrics") parser.add_argument("--model-path", '-p', type=str, required=True, help="path to the trained model") parser.add_argument('--dataset-style', type=str, required=True, help="style of dataset " "(supported are 'pascal-voc' and 'coco')") parser.add_argument('--image-set', type=str, default="test", help='image set (annotation file basename for COCO) ' 'to use for evaluation') parser.add_argument("--dataset", type=str, help="dataset directory path") parser.add_argument("--metric", '-m', type=str, default='pascal-voc', help="metric to calculate ('pascal-voc' or 'coco')") parser.add_argument("--nms-method", type=str, default="hard") parser.add_argument("--iou-threshold", type=float, default=0.5, help="IOU threshold (for Pascal VOC metric)") parser.add_argument( "--use-2007", action='store_true', help="Use 2007 calculation algorithm (for Pascal VOC metric)") parser.add_argument('--device', type=str, help='device to use') args = parser.parse_args() if args.device is None: device = "cuda" if torch.cuda.is_available() else "cpu" else: device = args.device if device.startswith("cuda"): logging.info("Use CUDA") timer = Timer() dataset = load_dataset(args.dataset_style, args.dataset, args.image_set) arch, model, class_names = load(args.model_path, device=device, inference=True) model.eval() if dataset.class_names != class_names: print("Dataset classes don't match the classes " "the specified model is trained with. " "No chance to get valid results, so I give up.") sys.exit(-1) mean, std = mean_std(args.dataset_style, args.dataset, args.image_set) predictor = Predictor(arch, model, device=device, mean=mean, std=std) if args.metric == 'pascal-voc': logging.info("Calculating Pascal VOC metric...") pascal_voc.eval(dataset, predictor, iou_threshold=args.iou_threshold, use_2007_metric=args.use_2007) elif args.metric == 'coco': logging.info("Calculating COCO metric...") coco.eval(dataset, predictor) else: print("Metric %s is not supported" % args.metric) sys.exit(-2)
def main(): parser = argparse.ArgumentParser( description='Detection model training utility') parser.add_argument('--dataset-style', type=str, required=True, help="style of dataset " "(supported are 'pascal-voc', 'coco' and 'widerface')") parser.add_argument('--dataset', required=True, help='dataset path') parser.add_argument('--train-image-set', type=str, default="train", help='image set (annotation file basename for COCO) ' 'to use for training') parser.add_argument('--val-image-set', type=str, default="val", help='image set (annotation file basename for COCO) ' 'to use for validation') parser.add_argument('--val-dataset', default=None, help='separate validation dataset directory path') parser.add_argument( '--net-config', help="path to network architecture configuration file " "(take a look into 'preset' directory for the reference)") # Params for optimizer parser.add_argument( '--optimizer', default="ranger", help="optimizer to use ('sgd', 'diffgrad', 'adamw', or 'ranger')") parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate') parser.add_argument( '--momentum', default=0.9, type=float, help='optional momentum for SGD optimizer (default is 0.9)') parser.add_argument('--weight-decay', default=5e-4, type=float, help='optional weight decay (L2 penalty) ' 'for SGD optimizer (default is 5e-4)') parser.add_argument('--backbone-pretrained', action='store_true') parser.add_argument('--backbone-weights', help='pretrained weights for the backbone model') parser.add_argument('--freeze-backbone', action='store_true') # Scheduler parser.add_argument( '--scheduler', default="cosine-wr", type=str, help="scheduler for SGD. It can one of 'multi-step' and 'cosine-wr'") # Params for Scheduler parser.add_argument('--milestones', default="70,100", type=str, help="milestones for MultiStepLR") parser.add_argument('--t0', default=10, type=int, help='T_0 value for Cosine Annealing Warm Restarts.') parser.add_argument( '--t-mult', default=2, type=float, help='T_mult value for Cosine Annealing Warm Restarts.') # Train params parser.add_argument('--batch-size', default=32, type=int, help='batch size') parser.add_argument('--num-epochs', default=120, type=int, help='number of epochs to train') parser.add_argument('--num-workers', default=4, type=int, help='number of workers used in dataloading') parser.add_argument('--val-epochs', default=5, type=int, help='perform validation every this many epochs') parser.add_argument('--device', type=str, help='device to use for training') parser.add_argument('--checkpoint-path', default='output', help='directory for saving checkpoint models') parser.add_argument( '--continue-training', '-p', help='continue training session for the previously trained model at ' 'the specified path') parser.add_argument( '--last-epoch', default=-1, type=int, help='last epoch to continue training session at (default is -1)') parser.add_argument( '--rand-augment', default="", type=str, help='use RandAugment augmentation pipeline for training instead of ' 'conventional one with the specified `m` and `n` values ' '(e.g. "(9, 3)") ') parser.add_argument( '--skip-train-statistics', default=False, action='store_true', help="don't calculate mean and std values for the train dataset " "and use defaults for ImageNet") parser.add_argument( '--skip-val-statistics', default=False, action='store_true', help="don't calculate mean and std values for the validation dataset " "and use defaults for ImageNet") logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') args = parser.parse_args() logging.info(args) if args.device is None: device = "cuda" if torch.cuda.is_available() else "cpu" else: device = args.device if device.startswith("cuda"): logging.info("Use CUDA") timer = Timer() if args.continue_training is not None: logging.info("Loading network") arch, net, class_names = load(args.continue_training, device=device) else: arch = get_arch(args.net_config) bbox_format = dataset_bbox_format(args.dataset_style) if args.skip_train_statistics: train_mean = (0.485, 0.456, 0.406) train_std = (0.229, 0.224, 0.225) else: train_mean, train_std = mean_std(args.dataset_style, args.dataset, args.train_image_set) if args.rand_augment == "": logging.info("Using conventional augmentation pipeline") train_transform = processing.train.Pipeline([arch.image_size] * 2, train_mean, train_std, bbox_format=bbox_format) else: m, n = literal_eval(args.rand_augment) logging.info("Using RandAugment pipeline with m=%d, n=%d" % (m, n)) train_transform = processing.randaugment.Pipeline( m, n, [arch.image_size] * 2, train_mean, train_std, bbox_format=bbox_format) if args.val_dataset is not None: val_dataset_root = args.val_dataset else: val_dataset_root = args.dataset if args.skip_val_statistics: val_mean = (0.485, 0.456, 0.406) val_std = (0.229, 0.224, 0.225) else: val_mean, val_std = mean_std(args.dataset_style, val_dataset_root, args.val_image_set) val_transform = processing.test.Pipeline([arch.image_size] * 2, val_mean, val_std, bbox_format=bbox_format) logging.info("Loading datasets...") dataset = load_dataset(args.dataset_style, args.dataset, args.train_image_set, train_transform) num_classes = len(dataset.class_names) logging.info("Train dataset size: {}".format(len(dataset))) # don't allow the last batch be of length 1 # to not lead our dear BatchNorms to crash on that drop_last = len(dataset) % args.batch_size > 0 train_loader = DataLoader(dataset, args.batch_size, collate_fn=collate, num_workers=args.num_workers, shuffle=True, drop_last=drop_last) val_dataset = load_dataset(args.dataset_style, val_dataset_root, args.val_image_set, val_transform) logging.info("Validation dataset size: {}".format(len(val_dataset))) val_loader = DataLoader(val_dataset, args.batch_size, collate_fn=collate, num_workers=args.num_workers, shuffle=False, drop_last=drop_last) if args.continue_training is None: logging.info("Building network") backbone_pretrained = args.backbone_pretrained is not None net = arch.build(num_classes, backbone_pretrained, args.batch_size) if backbone_pretrained and args.backbone_weights is not None: logging.info(f"Load backbone weights from {args.backbone_weights}") timer.start("Loading backbone model") net.load_backbone_weights(args.backbone_weights) logging.info(f'Took {timer.end("Loading backbone model"):.2f}s.') if args.freeze_backbone: net.freeze_backbone() net.to(device) last_epoch = args.last_epoch criterion = arch.loss(net, device) mapper = arch.mapper(net, device) optim_kwargs = {"lr": args.lr, "weight_decay": args.weight_decay} if args.optimizer == "sgd": optim_class = torch.optim.SGD optim_kwargs.update({"momentum": args.momentum}) elif args.optimizer == "adamw": optim_class = torch.optim.AdamW elif args.optimizer == "diffgrad": optim_class = DiffGrad else: optim_class = Ranger if args.continue_training is None: optim_params = net.parameters() else: optim_params = [{"params": net.parameters(), "initial_lr": args.lr}] optimizer = optim_class(optim_params, **optim_kwargs) logging.info(f"Optimizer parameters used: {optim_kwargs}") if args.scheduler == 'multi-step': logging.info("Uses MultiStepLR scheduler.") milestones = [int(v.strip()) for v in args.milestones.split(",")] scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1, last_epoch=last_epoch) else: logging.info("Uses Cosine annealing warm restarts scheduler.") # CosineAnnealingWarmRestarts has a bug with `last_epoch` != -1, # so we don't set it scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.t0, T_mult=args.t_mult, eta_min=1e-5) os.makedirs(args.checkpoint_path, exist_ok=True) logging.info(f"Start training from epoch {last_epoch + 1}.") for epoch in range(last_epoch + 1, last_epoch + args.num_epochs + 1): loop(train_loader, net, mapper, criterion, optimizer, device=device, epoch=epoch) scheduler.step() if ((epoch + 1) % args.val_epochs == 0 or (epoch + 1) == args.num_epochs): val_loss = loop(val_loader, net, mapper, criterion, device=device, epoch=epoch) filename = f"{arch.name}-Epoch-{epoch}-Loss-{val_loss}.pth" model_path = os.path.join(args.checkpoint_path, filename) save(arch, net, dataset.class_names, model_path) logging.info(f"Saved model {model_path}")