def set_model(opt):
    model = models.__dict__['resnet50']()
    criterion = SupConLoss(temperature=opt.temp)

    input = 'moco_v1_200ep_pretrain.pth.tar'
    checkpoint = torch.load(input, map_location="cpu")
    state_dict = checkpoint["state_dict"]
    for k in list(state_dict.keys()):
        if k.startswith('module.encoder_q'
                        ) and not k.startswith('module.encoder_q.fc'):
            # remove prefix
            state_dict[k[len("module.encoder_q."):]] = state_dict[k]

        # delete renamed or unused k
        del state_dict[k]

        msg = model.load_state_dict(state_dict, strict=False)

        if torch.cuda.is_available():
            if torch.cuda.device_count() > 1:
                model.encoder = torch.nn.DataParallel(model.encoder)
            model = model.cuda()
            criterion = criterion.cuda()
            cudnn.benchmark = True

    return model, criterion
Exemplo n.º 2
0
def set_model(opt):
    model = SupConResNet(name=opt.model)
    criterion = SupConLoss(temperature=opt.temp)

    # enable synchronized Batch Normalization
    if opt.syncBN:
        model = apex.parallel.convert_syncbn_model(model)

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            model.encoder = torch.nn.DataParallel(model.encoder)
        model = model.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True

    return model, criterion
Exemplo n.º 3
0
def set_model(opt):
    model = SupConResNet(name=opt.model)
    classifier = LinearClassifier(name=opt.model, num_classes=opt.n_cls)

    criterions = {
        'SupConLoss': SupConLoss(temperature=opt.temp),
        'CrossEntropyLoss': torch.nn.CrossEntropyLoss()
    }

    # enable synchronized Batch Normalization
    if opt.syncBN:
        model = apex.parallel.convert_syncbn_model(model)

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            model.encoder = torch.nn.DataParallel(model.encoder)
        model = model.cuda()
        classifier = classifier.cuda()
        for name, criterion in criterions.items():
            criterions[name] = criterion.cuda()
        cudnn.benchmark = True

    return model, classifier, criterions
Exemplo n.º 4
0
def main(opt):
    opt = setup_environment(opt)
    graph = Graph("coco")

    # Dataset
    transform = transforms.Compose([
        MirrorPoses(opt.mirror_probability),
        FlipSequence(opt.flip_probability),
        RandomSelectSequence(opt.sequence_length),
        ShuffleSequence(opt.shuffle),
        PointNoise(std=opt.point_noise_std),
        JointNoise(std=opt.joint_noise_std),
        MultiInput(graph.connect_joint, opt.use_multi_branch),
        ToTensor()
    ], )

    dataset_class = dataset_factory(opt.dataset)
    dataset = dataset_class(
        opt.train_data_path,
        train=True,
        sequence_length=opt.sequence_length,
        transform=TwoNoiseTransform(transform),
    )

    dataset_valid = dataset_class(
        opt.valid_data_path,
        sequence_length=opt.sequence_length,
        transform=transforms.Compose([
            SelectSequenceCenter(opt.sequence_length),
            MultiInput(graph.connect_joint, opt.use_multi_branch),
            ToTensor()
        ]),
    )

    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=opt.batch_size,
        num_workers=opt.num_workers,
        pin_memory=True,
        shuffle=True,
    )

    val_loader = torch.utils.data.DataLoader(
        dataset_valid,
        batch_size=opt.batch_size_validation,
        num_workers=opt.num_workers,
        pin_memory=True,
    )

    # Model & criterion
    model, model_args = get_model_resgcn(graph, opt)
    criterion = SupConLoss(temperature=opt.temp)

    print("# parameters: ", count_parameters(model))

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model, opt.gpus)

    if opt.cuda:
        model.cuda()
        criterion.cuda()

    # Trainer
    optimizer, scheduler, scaler = get_trainer(model, opt, len(train_loader))

    # Load checkpoint or weights
    load_checkpoint(model, optimizer, scheduler, scaler, opt)

    # Tensorboard
    writer = SummaryWriter(log_dir=opt.tb_path)

    sample_input = torch.zeros(opt.batch_size, model_args["num_input"],
                               model_args["num_channel"], opt.sequence_length,
                               graph.num_node).cuda()
    writer.add_graph(model, input_to_model=sample_input)

    best_acc = 0
    loss = 0
    for epoch in range(opt.start_epoch, opt.epochs + 1):
        # train for one epoch
        time1 = time.time()
        loss = train(train_loader, model, criterion, optimizer, scheduler,
                     scaler, epoch, opt)

        time2 = time.time()
        print(f"epoch {epoch}, total time {time2 - time1:.2f}")

        # tensorboard logger
        writer.add_scalar("loss/train", loss, epoch)
        writer.add_scalar("learning_rate", optimizer.param_groups[0]["lr"],
                          epoch)

        # evaluation
        result, accuracy_avg, sub_accuracies, dataframe = evaluate(
            val_loader, model, opt.evaluation_fn, use_flip=True)
        writer.add_text("accuracy/validation", dataframe.to_markdown(), epoch)
        writer.add_scalar("accuracy/validation", accuracy_avg, epoch)
        for key, sub_accuracy in sub_accuracies.items():
            writer.add_scalar(f"accuracy/validation/{key}", sub_accuracy,
                              epoch)

        print(f"epoch {epoch}, avg accuracy {accuracy_avg:.4f}")
        is_best = accuracy_avg > best_acc
        if is_best:
            best_acc = accuracy_avg

        if opt.tune:
            tune.report(accuracy=accuracy_avg)

        if epoch % opt.save_interval == 0 or (
                is_best and epoch > opt.save_best_start * opt.epochs):
            save_file = os.path.join(
                opt.save_folder,
                f"ckpt_epoch_{'best' if is_best else epoch}.pth")
            save_model(model, optimizer, scheduler, scaler, opt, opt.epochs,
                       save_file)

    # save the last model
    save_file = os.path.join(opt.save_folder, "last.pth")
    save_model(model, optimizer, scheduler, scaler, opt, opt.epochs, save_file)

    log_hyperparameter(writer, opt, best_acc, loss)

    print(f"best accuracy: {best_acc*100:.2f}")
Exemplo n.º 5
0
Arquivo: train.py Projeto: yyht/SSD
def main():
    parser = argparse.ArgumentParser(description="SSD evaluation")

    parser.add_argument(
        "--results-dir",
        type=str,
        default="/data/data_vvikash/fall20/SSD/trained_models/",
    )  # change this
    parser.add_argument("--exp-name", type=str, default="temp")
    parser.add_argument("--training-mode",
                        type=str,
                        choices=("SimCLR", "SupCon", "SupCE"))

    # model
    parser.add_argument("--arch", type=str, default="resnet50")
    parser.add_argument("--num-classes", type=int, default=10)

    # training
    parser.add_argument("--dataset", type=str, default="cifar10")
    parser.add_argument("--data-dir",
                        type=str,
                        default="/data/data_vvikash/datasets/")
    parser.add_argument("--normalize", action="store_true", default=False)
    parser.add_argument("--batch-size", type=int, default=512)
    parser.add_argument("--size", type=int, default=32)
    parser.add_argument("--epochs", type=int, default=500)
    parser.add_argument("--lr", type=float, default=0.5)
    parser.add_argument("--momentum", type=float, default=0.9)
    parser.add_argument("--weight-decay", type=float, default=1e-4)
    parser.add_argument("--warmup", action="store_true")

    # ssl
    parser.add_argument("--method",
                        type=str,
                        default="SupCon",
                        choices=["SupCon", "SimCLR", "SupCE"])
    parser.add_argument("--temperature", type=float, default=0.5)

    # misc
    parser.add_argument("--print-freq", type=int, default=100)
    parser.add_argument("--save-freq", type=int, default=50)
    parser.add_argument("--ckpt", type=str, help="checkpoint path")
    parser.add_argument("--seed", type=int, default=12345)

    args = parser.parse_args()
    device = "cuda:0"

    if args.batch_size > 256 and not args.warmup:
        warnings.warn("Use warmup training for larger batch-sizes > 256")

    if not os.path.isdir(args.results_dir):
        os.mkdir(args.results_dir)

    # create resutls dir (for logs, checkpoints, etc.)
    result_main_dir = os.path.join(args.results_dir, args.exp_name)

    if os.path.exists(result_main_dir):
        n = len(next(
            os.walk(result_main_dir))[-2])  # prev experiments with same name
        result_sub_dir = result_sub_dir = os.path.join(
            result_main_dir,
            "{}--dataset-{}-arch-{}-lr-{}_epochs-{}".format(
                n + 1, args.dataset, args.arch, args.lr, args.epochs),
        )
    else:
        os.mkdir(result_main_dir)
        result_sub_dir = result_sub_dir = os.path.join(
            result_main_dir,
            "1--dataset-{}-arch-{}-lr-{}_epochs-{}".format(
                args.dataset, args.arch, args.lr, args.epochs),
        )
    create_subdirs(result_sub_dir)

    # add logger
    logging.basicConfig(level=logging.INFO, format="%(message)s")
    logger = logging.getLogger()
    logger.addHandler(
        logging.FileHandler(os.path.join(result_sub_dir, "setup.log"), "a"))
    logger.info(args)

    # seed cuda
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)

    # Create model
    if args.training_mode in ["SimCLR", "SupCon"]:
        model = SSLResNet(arch=args.arch).to(device)
    elif args.training_mode == "SupCE":
        model = SupResNet(arch=args.arch,
                          num_classes=args.num_classes).to(device)
    else:
        raise ValueError("training mode not supported")

    # load feature extractor on gpu
    model.encoder = torch.nn.DataParallel(model.encoder).to(device)

    # Dataloader
    train_loader, test_loader, _ = data.__dict__[args.dataset](
        args.data_dir,
        mode="ssl" if args.training_mode in ["SimCLR", "SupCon"] else "org",
        normalize=args.normalize,
        size=args.size,
        batch_size=args.batch_size,
    )

    criterion = (SupConLoss(
        temperature=args.temperature).cuda() if args.training_mode
                 in ["SimCLR", "SupCon"] else nn.CrossEntropyLoss().cuda())
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )

    # select training and validation methods
    trainer = (trainers.ssl if args.training_mode in ["SimCLR", "SupCon"] else
               trainers.supervised)
    val = knn if args.training_mode in ["SimCLR", "SupCon"] else baseeval

    # warmup
    if args.warmup:
        wamrup_epochs = 10
        print(f"Warmup training for {wamrup_epochs} epochs")
        warmup_lr_scheduler = torch.optim.lr_scheduler.CyclicLR(
            optimizer,
            base_lr=0.01,
            max_lr=args.lr,
            step_size_up=wamrup_epochs * len(train_loader),
        )
        for epoch in range(wamrup_epochs):
            trainer(
                model,
                device,
                train_loader,
                criterion,
                optimizer,
                warmup_lr_scheduler,
                epoch,
                args,
            )

    best_prec1 = 0

    for p in optimizer.param_groups:
        p["lr"] = args.lr
        p["initial_lr"] = args.lr
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs * len(train_loader), 1e-4)

    for epoch in range(0, args.epochs):
        trainer(model, device, train_loader, criterion, optimizer,
                lr_scheduler, epoch, args)

        prec1, _ = val(model, device, test_loader, criterion, args, epoch)

        # remember best accuracy and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        d = {
            "epoch": epoch + 1,
            "arch": args.arch,
            "state_dict": model.state_dict(),
            "best_prec1": best_prec1,
            "optimizer": optimizer.state_dict(),
        }

        save_checkpoint(
            d,
            is_best,
            os.path.join(result_sub_dir, "checkpoint"),
        )

        if not (epoch + 1) % args.save_freq:
            save_checkpoint(
                d,
                is_best,
                os.path.join(result_sub_dir, "checkpoint"),
                filename=f"checkpoint_{epoch+1}.pth.tar",
            )

        logger.info(
            f"Epoch {epoch}, validation accuracy {prec1}, best_prec {best_prec1}"
        )

        # clone results to latest subdir (sync after every epoch)
        clone_results_to_latest_subdir(
            result_sub_dir, os.path.join(result_main_dir, "latest_exp"))
Exemplo n.º 6
0
        test_df = pd.read_csv(args.testdata)
    else :
        print("local file reading")
        train_df = pd.read_csv('notebooks/files/train3.csv')
        test_df = pd.read_csv('notebooks/files/test3.csv')

    Num_label = len(train_df.label_id.value_counts())
    print('#label ', Num_label)

    device = torch.device(args.device)
    tokenizer = RobertaTokenizer.from_pretrained("./pretrained", do_lower_case=False)
    model = ContraRobertaNet(path=
        "./pretrained", embedding_dim=768, num_class=Num_label
    )

    criterion = SupConLoss(temperature=1)
    model.to(device)
    criterion.to(device)

    train_dataset = PetDataset(train_df)
    train_loader = DataLoader(
        train_dataset, batch_size=args.batchsize, shuffle=True, num_workers=2
    )

    df_dict ={}
    for label in range(Num_label) :
        df = train_df[train_df['label_id'] == label]
        df_dict[label] = df    

    writer = SummaryWriter(args.logdir)
Exemplo n.º 7
0
def train(name, df, VAL_FOLD=0, resume=None):
    dt_string = datetime.now().strftime("%d|%m_%H|%M|%S")
    print("Starting -->", dt_string)

    os.makedirs(OUTPUT_DIR, exist_ok=True)
    os.makedirs('checkpoint', exist_ok=True)
    run = f"{name}_[{dt_string}]"
    
    wandb.init(project="imanip", config=config_defaults, name=run)
    config = wandb.config


    model = SRM_Classifer(num_classes=1, encoder_checkpoint='weights/pretrain_[31|03_12|16|32].h5')

    # for name_, param in model.named_parameters():
    #     if 'classifier' in name_:
    #         continue
    #     else:
    #         param.requires_grad = False

    print("Parameters : ", sum(p.numel() for p in model.parameters() if p.requires_grad))    
    

    wandb.save('segmentation/merged_net.py')
    wandb.save('dataset.py')


    train_imgaug, train_geo_aug = get_train_transforms()
    transforms_normalize = get_transforms_normalize()
    

    #region ########################-- CREATE DATASET and DATALOADER --########################
    train_dataset = DATASET(
        dataframe=df,
        mode="train",
        val_fold=VAL_FOLD,
        test_fold=TEST_FOLD,
        transforms_normalize=transforms_normalize,
        imgaug_augment=train_imgaug,
        geo_augment=train_geo_aug,
        supcon=True
    )
    train_loader = DataLoader(train_dataset, batch_size=config.train_batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=False)

    valid_dataset = DATASET(
        dataframe=df,
        mode="val",
        val_fold=VAL_FOLD,
        test_fold=TEST_FOLD,
        transforms_normalize=transforms_normalize,
        supcon=True
    )
    valid_loader = DataLoader(valid_dataset, batch_size=config.valid_batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=False)

    test_dataset = DATASET(
        dataframe=df,
        mode="test",
        val_fold=VAL_FOLD,
        test_fold=TEST_FOLD,
        transforms_normalize=transforms_normalize,
        supcon=True
    )
    test_loader = DataLoader(test_dataset, batch_size=config.valid_batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=False)
    #endregion ######################################################################################



    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #     optimizer,
    #     patience=config.schedule_patience,
    #     mode="min",
    #     factor=config.schedule_factor,
    # )
    criterion = SupConLoss().to(device)
    es = EarlyStopping(patience=20, mode="min")


    model = nn.DataParallel(model).to(device)
    
    # wandb.watch(model, log_freq=50, log='all')

    start_epoch = 0
    if resume is not None:
        checkpoint = torch.load(resume)
        # scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        print("-----------> Resuming <------------")

    for epoch in range(start_epoch, config.epochs):
        print(f"Epoch = {epoch}/{config.epochs-1}")
        print("------------------")

        train_metrics_st1 = train_stage1(model, train_loader, optimizer, criterion, epoch)
        

        print(f"TRAIN_LOSS = {train_metrics_st1['train_loss']}")
        print("New LR", optimizer.param_groups[0]['lr'])

        
        es(
            train_metrics_st1['train_loss'],
            model,
            model_path=os.path.join(OUTPUT_DIR, f"{run}.h5"),
        )
        if es.early_stop:
            print("Early stopping")
            break

        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict' : optimizer.state_dict(),
            # 'scheduler_state_dict': scheduler.state_dict(),
        }
        torch.save(checkpoint, os.path.join('checkpoint', f"{run}.pt"))


    if os.path.exists(os.path.join(OUTPUT_DIR, f"{run}.h5")):
        print(model.load_state_dict(torch.load(os.path.join(OUTPUT_DIR, f"{run}.h5"))))
        print("LOADED FOR TEST")

    # test_metrics = test(model, test_loader, criterion)
    wandb.save(os.path.join(OUTPUT_DIR, f"{run}.h5"))