示例#1
0
def main_attention():

    args = parse_args()

    print_args(args)

    set_seed(args.seed)

    # load data
    train_data, val_data, test_data, vocab = loader.load_dataset(args)

    # initialize model
    model = {}
    model["G"], model["D"] = get_embedding(vocab, args)
    model["clf"] = get_classifier(model["G"].ebd_dim, args)

    best_path = '../bin/tmp-runs/16116280768954578/18'
    model['G'].load_state_dict(torch.load(best_path + '.G'))
    # model['D'].load_state_dict(torch.load(best_path + '.D'))
    # model['clf'].load_state_dict(torch.load(best_path + '.clf'))

    # if args.pretrain is not None:
    #     model["ebd"] = load_model_state_dict(model["G"], args.pretrain)

    file_path = r'../data/attention_data.json'
    Print_Attention(file_path, vocab, model, args)
def main():

    args = parse_args()

    print_args(args)

    set_seed(args.seed)

    # load data
    train_data, val_data, test_data, class_names, vocab = loader.load_dataset(
        args)

    args.id2word = vocab.itos

    # initialize model
    model = {}
    model["G"] = get_embedding(vocab, args)  # model["G"]里面 是 词向量平均 + FC

    criterion = ContrastiveLoss()
    # model["G2"] = get_embedding_M2(vocab, args)
    # model["clf"] = get_classifier(model["G"].hidden_size * 2, args)

    if args.mode == "train":
        # train model on train_data, early stopping based on val_data
        optG = train(train_data, val_data, model, class_names, criterion,
                     args)  # 使用孪生网络,来进行maml的方法,只改变FC

    # val_acc, val_std, _ = test(val_data, model, args,
    #                                         args.val_episodes)

    test_acc, test_std = test(test_data, class_names, optG, model, criterion,
                              args, args.test_epochs, True)

    # path_drawn = args.path_drawn_data
    # with open(path_drawn, 'w') as f_w:
    #     json.dump(drawn_data, f_w)
    #     print("store drawn data finished.")

    # file_path = r'../data/attention_data.json'
    # Print_Attention(file_path, vocab, model, args)

    if args.result_path:
        directory = args.result_path[:args.result_path.rfind("/")]
        if not os.path.exists(directory):
            os.mkdirs(directory)

        result = {
            "test_acc": test_acc,
            "test_std": test_std,
            # "val_acc": val_acc,
            # "val_std": val_std
        }

        for attr, value in sorted(args.__dict__.items()):
            result[attr] = value

        with open(args.result_path, "wb") as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
示例#3
0
文件: main.py 项目: hccngu/MLADA
def main():

    # make_print_to_file(path='/results')

    args = parse_args()

    print_args(args)

    set_seed(args.seed)

    # load data
    train_data, val_data, test_data, vocab = loader.load_dataset(args)

    args.id2word = vocab.itos

    # initialize model
    model = {}
    model["G"], model["D"] = get_embedding(vocab, args)
    model["clf"] = get_classifier(model["G"].ebd_dim, args)

    if args.mode == "train":
        # train model on train_data, early stopping based on val_data
        train(train_data, val_data, model, args)

    # val_acc, val_std, _ = test(val_data, model, args,
    #                                         args.val_episodes)

    test_acc, test_std, drawn_data = test(test_data, model, args,
                                          args.test_episodes)

    # path_drawn = args.path_drawn_data
    # with open(path_drawn, 'w') as f_w:
    #     json.dump(drawn_data, f_w)
    #     print("store drawn data finished.")

    # file_path = r'../data/attention_data.json'
    # Print_Attention(file_path, vocab, model, args)

    if args.result_path:
        directory = args.result_path[:args.result_path.rfind("/")]
        if not os.path.exists(directory):
            os.mkdirs(directory)

        result = {
            "test_acc": test_acc,
            "test_std": test_std,
            # "val_acc": val_acc,
            # "val_std": val_std
        }

        for attr, value in sorted(args.__dict__.items()):
            result[attr] = value

        with open(args.result_path, "wb") as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def main():
    args = parse_args()

    print_args(args)

    set_seed(args.seed)

    # load data
    train_data, val_data, test_data, vocab = loader.load_dataset(args)

    # initialize model
    model = {}
    model["ebd"] = ebd.get_embedding(vocab, args)
    model["clf"] = clf.get_classifier(model["ebd"].ebd_dim, args)


    if args.mode == "train":
        # train model on train_data, early stopping based on val_data
        train_utils.train(train_data, val_data, model, args)

    elif args.mode == "finetune":
        # sample an example from each class during training
        way = args.way
        query = args.query
        shot = args.shot
        args.query = 1
        args.shot= 1
        args.way = args.n_train_class
        train_utils.train(train_data, val_data, model, args)
        # restore the original N-way K-shot setting
        args.shot = shot
        args.query = query
        args.way = way

    # testing on validation data: only for not finetune
    # In finetune, we combine all train and val classes and split it into train
    # and validation examples.
    if args.mode != "finetune":
        val_acc, val_std = train_utils.test(val_data, model, args,
                                            args.val_episodes)
    else:
        val_acc, val_std = 0, 0

    test_acc, test_std = train_utils.test(test_data, model, args,
                                          args.test_episodes)


    if args.result_path:
        directory = args.result_path[:args.result_path.rfind("/")]
        if not os.path.exists(directory):
            os.mkdirs(directory)

        result = {
            "test_acc": test_acc,
            "test_std": test_std,
            "val_acc": val_acc,
            "val_std": val_std
        }

        for attr, value in sorted(args.__dict__.items()):
            result[attr] = value

        with open(args.result_path, "wb") as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
示例#5
0
def train_model(model, args):

    meta_lr = args.meta_lr
    task_lr = args.task_lr
    train_iter = args.train_iter
    val_iter = args.val_iter
    test_iter = args.test_iter
    val_test_task_step = args.val_test_task_step
    train_task_step = args.train_task_step

    val_step = 500
    test_step = 2000

    N = args.N
    K = args.K
    L = args.L

    # load data
    train_data, val_data, test_data, label_dict, vocab = loader.load_dataset(
        args)

    n_way_k_shot = str(N) + '-way-' + str(K) + '-shot'
    n_way_k_shot = 'stable-PROTO-' + n_way_k_shot
    print('Start training ' + n_way_k_shot)

    cuda = torch.cuda.is_available()
    if cuda:
        model = model.cuda()

    data_loader = {}
    data_loader['train'] = loader.get_dataloader(args, train_data, label_dict,
                                                 N, K, L)
    # class_name, support, support_label, query, query_label = next(data_loader['train'])
    data_loader['val'] = loader.get_dataloader(args, val_data, label_dict, N,
                                               K, L)
    data_loader['test'] = loader.get_dataloader(args, test_data, label_dict, N,
                                                K, L)

    optim_params = [{'params': model.coder.parameters(), 'lr': 5e-5}]
    optim_params.append({'params': model.mlp.fc1.parameters(), 'lr': meta_lr})
    # optim_params.append({'params': model.mlp.fc2.parameters(), 'lr': meta_lr})
    optim_params.append({'params': model.bilstm.parameters(), 'lr': meta_lr})
    optim_params.append({'params': model.linear.parameters(), 'lr': meta_lr})

    meta_optimizer = AdamW(optim_params, lr=1)

    best_acc, best_step, best_test_acc, best_test_step, best_changed = 0.0, 0, 0.0, 0, False
    iter_loss, iter_right, iter_sample = 0.0, 0.0, 0.0

    for it in range(train_iter):
        meta_loss, meta_right = 0.0, 0.0
        model.train()
        class_name, support, support_label, query, query_label = next(
            data_loader['train'])
        if cuda:
            support_label, query_label = support_label.cuda(
            ), query_label.cuda()

        loss_q, right_q = train_one_batch(args, class_name, support,
                                          support_label, query, query_label,
                                          model, train_task_step, task_lr, it)
        meta_loss = meta_loss + loss_q
        meta_right = meta_right + right_q

        meta_optimizer.zero_grad()
        meta_loss.backward()
        meta_optimizer.step()

        iter_loss = iter_loss + meta_loss
        iter_right = iter_right + meta_right
        iter_sample += 1

        if it % val_step == 0:
            iter_loss, iter_right, iter_sample = 0.0, 0.0, 0.0
        if (it + 1) % 100 == 0:
            print('[TRAIN] step: {0:4} | loss: {1:2.6f}, accuracy: {2:3.2f}%'.
                  format(it + 1, iter_loss / iter_sample,
                         100 * iter_right / iter_sample))
        if (it + 1) % val_step == 0:
            acc = test_model(args, cuda, data_loader['val'], model, val_iter,
                             val_test_task_step, task_lr)
            print('[EVAL] | accuracy: {0:2.2f}%'.format(acc * 100))
            if acc > best_acc:
                print('Best checkpoint!')
                best_model = copy.deepcopy(model)
                best_acc, best_step, best_changed = acc, (it + 1), True

        if (it + 1) % test_step == 0 and best_changed:
            best_changed = False
            test_acc = test_model(args, cuda, data_loader['test'], best_model,
                                  test_iter, val_test_task_step, task_lr)
            print('[TEST] | accuracy: {0:2.2f}%'.format(test_acc * 100))
            if test_acc > best_test_acc:
                #torch.save(best_model.state_dict(),n_way_k_shot+'.ckpt')
                best_test_acc, best_test_step = test_acc, best_step
            best_acc = 0.0

    print("\n####################\n")
    print('Finish training model! Best acc: ' + str(best_test_acc) +
          ' at step ' + str(best_test_step))
示例#6
0
def main():
    args = parse_args()

    # 可以打印到本地!存储下来
    if args.path != "":
        path = args.path
        sys.stdout = open(path, "w")
        print("test sys.stdout")

    print_args(args)

    set_seed(args.seed)

    # load data
    train_data, val_data, test_data, class_names, vocab = loader.load_dataset(
        args)

    args.id2word = vocab.itos

    # initialize model
    model = {}
    model["G"] = get_embedding(vocab, args)
    print(
        "-------------------------------------param----------------------------------------------"
    )
    sum = 0
    for name, param in model["G"].named_parameters():
        num = 1
        for size in param.shape:
            num *= size
        sum += num
        print("{:30s} : {}".format(name, param.shape))
    print("total param num {}".format(sum))
    print(
        "-------------------------------------param----------------------------------------------"
    )

    criterion = ContrastiveLoss()
    # model["G2"] = get_embedding_M2(vocab, args)
    # model["clf"] = get_classifier(model["G"].hidden_size * 2, args)

    if args.mode == "train":
        # train model on train_data, early stopping based on val_data
        optG = train(train_data, val_data, test_data, model, class_names,
                     criterion, args)

    # val_acc, val_std, _ = test(val_data, model, args,
    #                                         args.val_episodes)

    test_acc, test_std = test(test_data, class_names, optG, model, criterion,
                              args, args.test_epochs, False)
    print(
        ("[TEST] {}, {:s} {:s}{:>7.4f} ± {:>6.4f}, ").format(
            datetime.datetime.now(),
            colored("test  ", "cyan"),
            colored("acc:", "blue"),
            test_acc,
            test_std,
            # colored("train stats", "cyan"),
            # colored("G_grad:", "blue"), np.mean(np.array(grad['G'])),
            # colored("clf_grad:", "blue"), np.mean(np.array(grad['clf'])),
        ),
        flush=True)

    # path_drawn = args.path_drawn_data
    # with open(path_drawn, 'w') as f_w:
    #     json.dump(drawn_data, f_w)
    #     print("store drawn data finished.")

    # file_path = r'../data/attention_data.json'
    # Print_Attention(file_path, vocab, model, args)

    if args.result_path:
        directory = args.result_path[:args.result_path.rfind("/")]
        if not os.path.exists(directory):
            os.mkdirs(directory)

        result = {
            "test_acc": test_acc,
            "test_std": test_std,
            # "val_acc": val_acc,
            # "val_std": val_std
        }

        for attr, value in sorted(args.__dict__.items()):
            result[attr] = value

        with open(args.result_path, "wb") as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
示例#7
0
def main():
    parser = argparse.ArgumentParser(
        description="Calculate Pascal VOC evaluation metrics")

    parser.add_argument("--model-path",
                        '-p',
                        type=str,
                        required=True,
                        help="path to the trained model")

    parser.add_argument('--dataset-style',
                        type=str,
                        required=True,
                        help="style of dataset "
                        "(supported are 'pascal-voc' and 'coco')")

    parser.add_argument('--image-set',
                        type=str,
                        default="test",
                        help='image set (annotation file basename for COCO) '
                        'to use for evaluation')

    parser.add_argument("--dataset", type=str, help="dataset directory path")

    parser.add_argument("--metric",
                        '-m',
                        type=str,
                        default='pascal-voc',
                        help="metric to calculate ('pascal-voc' or 'coco')")

    parser.add_argument("--nms-method", type=str, default="hard")

    parser.add_argument("--iou-threshold",
                        type=float,
                        default=0.5,
                        help="IOU threshold (for Pascal VOC metric)")

    parser.add_argument(
        "--use-2007",
        action='store_true',
        help="Use 2007 calculation algorithm (for Pascal VOC metric)")

    parser.add_argument('--device', type=str, help='device to use')

    args = parser.parse_args()

    if args.device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    else:
        device = args.device

    if device.startswith("cuda"):
        logging.info("Use CUDA")

    timer = Timer()

    dataset = load_dataset(args.dataset_style, args.dataset, args.image_set)

    arch, model, class_names = load(args.model_path,
                                    device=device,
                                    inference=True)
    model.eval()

    if dataset.class_names != class_names:
        print("Dataset classes don't match the classes "
              "the specified model is trained with. "
              "No chance to get valid results, so I give up.")
        sys.exit(-1)

    mean, std = mean_std(args.dataset_style, args.dataset, args.image_set)

    predictor = Predictor(arch, model, device=device, mean=mean, std=std)

    if args.metric == 'pascal-voc':
        logging.info("Calculating Pascal VOC metric...")
        pascal_voc.eval(dataset,
                        predictor,
                        iou_threshold=args.iou_threshold,
                        use_2007_metric=args.use_2007)

    elif args.metric == 'coco':
        logging.info("Calculating COCO metric...")
        coco.eval(dataset, predictor)

    else:
        print("Metric %s is not supported" % args.metric)
        sys.exit(-2)
示例#8
0
def main():
    parser = argparse.ArgumentParser(
        description='Detection model training utility')

    parser.add_argument('--dataset-style',
                        type=str,
                        required=True,
                        help="style of dataset "
                        "(supported are 'pascal-voc', 'coco' and 'widerface')")
    parser.add_argument('--dataset', required=True, help='dataset path')
    parser.add_argument('--train-image-set',
                        type=str,
                        default="train",
                        help='image set (annotation file basename for COCO) '
                        'to use for training')
    parser.add_argument('--val-image-set',
                        type=str,
                        default="val",
                        help='image set (annotation file basename for COCO) '
                        'to use for validation')
    parser.add_argument('--val-dataset',
                        default=None,
                        help='separate validation dataset directory path')

    parser.add_argument(
        '--net-config',
        help="path to network architecture configuration file "
        "(take a look into 'preset' directory for the reference)")

    # Params for optimizer
    parser.add_argument(
        '--optimizer',
        default="ranger",
        help="optimizer to use ('sgd', 'diffgrad', 'adamw', or 'ranger')")
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=1e-3,
                        type=float,
                        help='initial learning rate')
    parser.add_argument(
        '--momentum',
        default=0.9,
        type=float,
        help='optional momentum for SGD optimizer (default is 0.9)')
    parser.add_argument('--weight-decay',
                        default=5e-4,
                        type=float,
                        help='optional weight decay (L2 penalty) '
                        'for SGD optimizer (default is 5e-4)')

    parser.add_argument('--backbone-pretrained', action='store_true')
    parser.add_argument('--backbone-weights',
                        help='pretrained weights for the backbone model')
    parser.add_argument('--freeze-backbone', action='store_true')

    # Scheduler
    parser.add_argument(
        '--scheduler',
        default="cosine-wr",
        type=str,
        help="scheduler for SGD. It can one of 'multi-step' and 'cosine-wr'")

    # Params for Scheduler
    parser.add_argument('--milestones',
                        default="70,100",
                        type=str,
                        help="milestones for MultiStepLR")
    parser.add_argument('--t0',
                        default=10,
                        type=int,
                        help='T_0 value for Cosine Annealing Warm Restarts.')
    parser.add_argument(
        '--t-mult',
        default=2,
        type=float,
        help='T_mult value for Cosine Annealing Warm Restarts.')

    # Train params
    parser.add_argument('--batch-size',
                        default=32,
                        type=int,
                        help='batch size')
    parser.add_argument('--num-epochs',
                        default=120,
                        type=int,
                        help='number of epochs to train')
    parser.add_argument('--num-workers',
                        default=4,
                        type=int,
                        help='number of workers used in dataloading')
    parser.add_argument('--val-epochs',
                        default=5,
                        type=int,
                        help='perform validation every this many epochs')
    parser.add_argument('--device',
                        type=str,
                        help='device to use for training')

    parser.add_argument('--checkpoint-path',
                        default='output',
                        help='directory for saving checkpoint models')

    parser.add_argument(
        '--continue-training',
        '-p',
        help='continue training session for the previously trained model at '
        'the specified path')
    parser.add_argument(
        '--last-epoch',
        default=-1,
        type=int,
        help='last epoch to continue training session at (default is -1)')
    parser.add_argument(
        '--rand-augment',
        default="",
        type=str,
        help='use RandAugment augmentation pipeline for training instead of '
        'conventional one with the specified `m` and `n` values '
        '(e.g. "(9, 3)") ')

    parser.add_argument(
        '--skip-train-statistics',
        default=False,
        action='store_true',
        help="don't calculate mean and std values for the train dataset "
        "and use defaults for ImageNet")
    parser.add_argument(
        '--skip-val-statistics',
        default=False,
        action='store_true',
        help="don't calculate mean and std values for the validation dataset "
        "and use defaults for ImageNet")

    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s')

    args = parser.parse_args()
    logging.info(args)

    if args.device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    else:
        device = args.device

    if device.startswith("cuda"):
        logging.info("Use CUDA")

    timer = Timer()

    if args.continue_training is not None:
        logging.info("Loading network")
        arch, net, class_names = load(args.continue_training, device=device)
    else:
        arch = get_arch(args.net_config)

    bbox_format = dataset_bbox_format(args.dataset_style)

    if args.skip_train_statistics:
        train_mean = (0.485, 0.456, 0.406)
        train_std = (0.229, 0.224, 0.225)
    else:
        train_mean, train_std = mean_std(args.dataset_style, args.dataset,
                                         args.train_image_set)

    if args.rand_augment == "":
        logging.info("Using conventional augmentation pipeline")
        train_transform = processing.train.Pipeline([arch.image_size] * 2,
                                                    train_mean,
                                                    train_std,
                                                    bbox_format=bbox_format)
    else:
        m, n = literal_eval(args.rand_augment)
        logging.info("Using RandAugment pipeline with m=%d, n=%d" % (m, n))
        train_transform = processing.randaugment.Pipeline(
            m,
            n, [arch.image_size] * 2,
            train_mean,
            train_std,
            bbox_format=bbox_format)

    if args.val_dataset is not None:
        val_dataset_root = args.val_dataset
    else:
        val_dataset_root = args.dataset

    if args.skip_val_statistics:
        val_mean = (0.485, 0.456, 0.406)
        val_std = (0.229, 0.224, 0.225)
    else:
        val_mean, val_std = mean_std(args.dataset_style, val_dataset_root,
                                     args.val_image_set)

    val_transform = processing.test.Pipeline([arch.image_size] * 2,
                                             val_mean,
                                             val_std,
                                             bbox_format=bbox_format)

    logging.info("Loading datasets...")

    dataset = load_dataset(args.dataset_style, args.dataset,
                           args.train_image_set, train_transform)

    num_classes = len(dataset.class_names)

    logging.info("Train dataset size: {}".format(len(dataset)))

    # don't allow the last batch be of length 1
    # to not lead our dear BatchNorms to crash on that
    drop_last = len(dataset) % args.batch_size > 0

    train_loader = DataLoader(dataset,
                              args.batch_size,
                              collate_fn=collate,
                              num_workers=args.num_workers,
                              shuffle=True,
                              drop_last=drop_last)

    val_dataset = load_dataset(args.dataset_style, val_dataset_root,
                               args.val_image_set, val_transform)

    logging.info("Validation dataset size: {}".format(len(val_dataset)))

    val_loader = DataLoader(val_dataset,
                            args.batch_size,
                            collate_fn=collate,
                            num_workers=args.num_workers,
                            shuffle=False,
                            drop_last=drop_last)

    if args.continue_training is None:
        logging.info("Building network")
        backbone_pretrained = args.backbone_pretrained is not None
        net = arch.build(num_classes, backbone_pretrained, args.batch_size)

        if backbone_pretrained and args.backbone_weights is not None:
            logging.info(f"Load backbone weights from {args.backbone_weights}")
            timer.start("Loading backbone model")
            net.load_backbone_weights(args.backbone_weights)
            logging.info(f'Took {timer.end("Loading backbone model"):.2f}s.')

    if args.freeze_backbone:
        net.freeze_backbone()

    net.to(device)

    last_epoch = args.last_epoch

    criterion = arch.loss(net, device)
    mapper = arch.mapper(net, device)

    optim_kwargs = {"lr": args.lr, "weight_decay": args.weight_decay}

    if args.optimizer == "sgd":
        optim_class = torch.optim.SGD
        optim_kwargs.update({"momentum": args.momentum})
    elif args.optimizer == "adamw":
        optim_class = torch.optim.AdamW
    elif args.optimizer == "diffgrad":
        optim_class = DiffGrad
    else:
        optim_class = Ranger

    if args.continue_training is None:
        optim_params = net.parameters()
    else:
        optim_params = [{"params": net.parameters(), "initial_lr": args.lr}]

    optimizer = optim_class(optim_params, **optim_kwargs)
    logging.info(f"Optimizer parameters used: {optim_kwargs}")

    if args.scheduler == 'multi-step':
        logging.info("Uses MultiStepLR scheduler.")
        milestones = [int(v.strip()) for v in args.milestones.split(",")]
        scheduler = MultiStepLR(optimizer,
                                milestones=milestones,
                                gamma=0.1,
                                last_epoch=last_epoch)
    else:
        logging.info("Uses Cosine annealing warm restarts scheduler.")
        # CosineAnnealingWarmRestarts has a bug with `last_epoch` != -1,
        # so we don't set it
        scheduler = CosineAnnealingWarmRestarts(optimizer,
                                                T_0=args.t0,
                                                T_mult=args.t_mult,
                                                eta_min=1e-5)

    os.makedirs(args.checkpoint_path, exist_ok=True)

    logging.info(f"Start training from epoch {last_epoch + 1}.")
    for epoch in range(last_epoch + 1, last_epoch + args.num_epochs + 1):
        loop(train_loader,
             net,
             mapper,
             criterion,
             optimizer,
             device=device,
             epoch=epoch)
        scheduler.step()

        if ((epoch + 1) % args.val_epochs == 0
                or (epoch + 1) == args.num_epochs):
            val_loss = loop(val_loader,
                            net,
                            mapper,
                            criterion,
                            device=device,
                            epoch=epoch)

            filename = f"{arch.name}-Epoch-{epoch}-Loss-{val_loss}.pth"
            model_path = os.path.join(args.checkpoint_path, filename)
            save(arch, net, dataset.class_names, model_path)
            logging.info(f"Saved model {model_path}")