Exemplo n.º 1
0
def main():

    # Data loading
    train_dataset = CustomDataset(root=config.root_train,
                                  annFile=config.annFile_train,
                                  transforms=config.train_transforms,
                                  catagory=config.CATEGORY_FILTER)
    val_dataset = CustomDataset(root=config.root_train,
                                annFile=config.annFile_train,
                                transforms=config.val_transforms,
                                catagory=config.CATEGORY_FILTER)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=16,
                              num_workers=4,
                              pin_memory=True,
                              shuffle=False,
                              drop_last=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=16,
                            num_workers=4,
                            pin_memory=True,
                            shuffle=False,
                            drop_last=True)

    # Model
    model = YoloV3(num_classes=config.C).to(device=config.DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.LEARNING_RATE,
                           weight_decay=config.WEIGHT_DECAY)
    loss_function = YoloLoss()
    scalar = torch.cuda.amp.GradScaler()

    # Miscellaneous
    scaled_anchors = (torch.tensor(config.anchors) * torch.tensor(
        config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to(
            config.DEVICE)
    #writer = SummaryWriter()
    current_time = time.time()

    print("Train loader length:", len(train_loader))

    # Training loop
    model.train()
    for cycle, (x, y) in enumerate(train_loader):

        print("Current cycle:", cycle)

        delta_time, current_time = time_function(current_time)
Exemplo n.º 2
0
def main_worker(gpu, args):
    torch.cuda.set_device(gpu)
    device = torch.device('cuda')

    basic_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    unlabeled_dataset = CustomDataset(root=args.data,
                                      split='unlabeled',
                                      transform=basic_transforms)
    labeled_dataset = CustomDataset(root=args.data,
                                    split='train',
                                    transform=basic_transforms)

    unlabeled_dataset = torch.utils.data.DataLoader(unlabeled_dataset,
                                                    batch_size=args.batch_size,
                                                    num_workers=args.workers)
    labeled_dataset = torch.utils.data.DataLoader(labeled_dataset,
                                                  batch_size=args.batch_size,
                                                  num_workers=args.workers)

    # load pre-trained model from checkpoint
    model = ft_model()
    model.load_state_dict(
        torch.load(args.checkpoint_dir / args.checkpoint_file))
    model.eval()
    model.to(device)

    unlabeled_entropy = []
    labeled_entropy = []
    since = time.time()
    steps = 100
    with torch.no_grad():
        for i, batch in enumerate(unlabeled_dataset):
            entropy = get_entropy(model, batch, device).tolist()
            unlabeled_entropy.extend(entropy)
            if i % steps == 0:
                print(i, sum(entropy) / len(entropy))
        for i, batch in enumerate(labeled_dataset):
            entropy = get_entropy(model, batch, device).tolist()
            labeled_entropy.extend(entropy)
            if i % steps == 0:
                print(i, sum(entropy) / len(entropy))
    return unlabeled_entropy, labeled_entropy
Exemplo n.º 3
0
    def __init__(self, FLAGS):
        self.use_cuda = FLAGS.cuda and torch.cuda.is_available()
        self.gpu = FLAGS.gpu

        self.train = FLAGS.train
        self.batch_size = FLAGS.batch_size
        self.data_path = FLAGS.data_path

        self.learning_rate = FLAGS.learning_rate
        self.beta1 = FLAGS.beta_1
        self.beta2 = FLAGS.beta_2

        self.load_path = FLAGS.load_path

        self.epoch = 0
        self.end_epoch = FLAGS.end_epoch

        self.model = SRModel().double()

        if not self.train:
            if self.use_cuda:
                self.model.load_state_dict(
                    torch.load(os.path.join('checkpoints', self.load_path)))
            else:
                self.model.load_state_dict(
                    torch.load(os.path.join('checkpoints', self.load_path),
                               map_location='cpu'))

        if self.use_cuda:
            torch.cuda.set_device(self.gpu)
            self.model.cuda()

        self.optim = optim.Adam(list(self.model.parameters()),
                                lr=self.learning_rate,
                                betas=(self.beta1, self.beta2))

        date_time = datetime.now().strftime("%b%d_%H-%M-%S")
        self.save_folder = os.path.join('checkpoints', date_time)

        if (not os.path.exists(self.save_folder)) and (self.train == True):
            os.makedirs(self.save_folder, exist_ok=True)

        if self.train:
            self.writer = SummaryWriter()
            settings = ''
            for arg in vars(FLAGS):
                settings += str(arg) + '=' + str(getattr(FLAGS, arg)) + '    '
            self.writer.add_text('Settings', settings)

        print("Loading data...")
        self.train_set = CustomDataset(root=self.data_path, train=True)
        #        self.test_set = CustomDataset(root=self.data_path,train=False)
        self.loader = cycle(
            DataLoader(self.train_set,
                       batch_size=self.batch_size,
                       shuffle=True,
                       num_workers=0,
                       drop_last=True))
Exemplo n.º 4
0
def main():

    train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.transforms, catagory=config.CATEGORY_FILTER)
    val_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.transforms, catagory=config.CATEGORY_FILTER)
    train_loader = DataLoader(dataset=train_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True)

    for index, (x, y) in enumerate(train_loader):
        x = draw_y_on_x(x, y)
        grid = torchvision.utils.make_grid(x, nrow=4)
        # Save batch grid as image
        image_dir = "./batch_dir"
        image_dir_exists = os.path.exists(image_dir)
        if not image_dir_exists:
            os.makedirs(image_dir)
        img_name = str(image_dir) + "/batch_" + str(index) + ".png"
        save_image(grid.float() / 255, img_name)

        print(index)
        print(x.shape)
        print(y.shape)

    for index, (x, y) in enumerate(val_loader):
        x = draw_y_on_x(x, y)
        grid = torchvision.utils.make_grid(x, nrow=4)
        # Save batch grid as image
        image_dir = "./batch_dir"
        image_dir_exists = os.path.exists(image_dir)
        if not image_dir_exists:
            os.makedirs(image_dir)
        img_name = str(image_dir) + "/batch_" + str(index) + ".png"
        save_image(grid.float() / 255, img_name)

        print(index)
        print(x.shape)
        print(y.shape)
Exemplo n.º 5
0
def main():

    # Data loading
    # train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.train_transforms, catagory=config.CATEGORY_FILTER)
    val_dataset = CustomDataset(root=config.root_val,
                                annFile=config.annFile_val,
                                transforms=config.val_transforms,
                                catagory=config.CATEGORY_FILTER)
    # train_loader = DataLoader(dataset=train_dataset, batch_size=16, num_workers=2, pin_memory=True, shuffle=True, drop_last=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=16,
                            num_workers=2,
                            pin_memory=True,
                            shuffle=False,
                            drop_last=True)

    # Model
    model = YoloV3(num_classes=config.C).to(device=config.DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.LEARNING_RATE,
                           weight_decay=config.WEIGHT_DECAY)

    # Miscellaneous
    scaled_anchors = (torch.tensor(config.anchors) * torch.tensor(
        config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to(
            config.DEVICE)

    # Loading previously saved model weights
    load_checkpoint("res50_35k.pth.tar", model, optimizer,
                    config.LEARNING_RATE)

    # Rendering loop
    model.eval()
    for cycle, (x, y) in enumerate(val_loader):
        with torch.no_grad():
            x_gpu = x.to(config.DEVICE)
            yp = model(x_gpu)
            yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')]
        x = denormalize(x) * 255
        draw_y_on_x(x, y)
        draw_yp_on_x(x, yp, probability_threshold=0.5, anchors=config.anchors)
        # Save batch grid as image
        image_dir = "./batch_dir"
        image_dir_exists = os.path.exists(image_dir)
        if not image_dir_exists:
            os.makedirs(image_dir)
        img_name = str(image_dir) + "/batch_" + str(cycle) + ".png"
        save_image(x / 255, img_name)
Exemplo n.º 6
0
    def __init__(self, args):
        self.args = args

        # data
        dataset = CustomDataset(args)
        self.collate_fn = dataset.collate_fn  # For zero-padding

        # For K-fold
        train_size = int(len(dataset) / args.cv_num)
        # Randomly split a dataset into non-overlapping new datasets of given lengths
        self.dataset_list = random_split(dataset,
                                         [train_size for i in range(args.cv_num -1)] +\
                                         [len(dataset) - (args.cv_num - 1)*train_size]) # 각 데이터 size가 들어있는 리스트

        # arguments, loss
        self.vocab_size = len(dataset.vocab)  ## ??
        self.pad_idx = dataset.vocab.word2idx['<pad>']  ## ??
        self.embeddings = dataset.pretrained_embeddings
        self.criterion = nn.BCEWithLogitsLoss().to(device)

        # make directory if not exist data path
        if not osp.isdir(args.ck_path):
            os.makedirs(args.ck_path, exist_ok=True
                        )  # If exist_ok is False, FileExistsError is raised.
Exemplo n.º 7
0
def main_worker(gpu, args):
    torch.cuda.set_device(gpu)
    device = torch.device('cuda')

    train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(args.image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    validation_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    train_dataset = CustomDataset(root=args.data,
                                  split='train',
                                  transform=train_transforms)
    validation_dataset = CustomDataset(root=args.data,
                                       split='val',
                                       transform=validation_transforms)

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers)
    validation_dataloader = torch.utils.data.DataLoader(
        validation_dataset,
        batch_size=args.batch_size,
        num_workers=args.workers)

    # load pre-trained model from checkpoint
    model = ft_model(args.pretrained_algo, args.model_name,
                     args.pretrained_dir_file, args.finetuning,
                     args.num_classes)
    model.train()
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    criterion.to(device)
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=0.9)

    args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
    stats_file = open(args.checkpoint_dir /
                      '{}_stats.txt'.format(args.checkpoint_file[:-4]),
                      'a',
                      buffering=1)
    print(' '.join(sys.argv))
    print(' '.join(sys.argv), file=stats_file)
    best_validation_accuracy = 0
    since = time.time()

    for i in range(args.epochs):
        total_train_loss = 0.0
        total_train_correct = 0.0
        total_validation_loss = 0.0
        total_validation_correct = 0.0

        model.train()
        for batch in train_dataloader:
            loss, correct = get_loss_and_correct(model, batch, criterion,
                                                 device)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            total_train_correct += correct.item()

        with torch.no_grad():
            for batch in validation_dataloader:
                loss, correct = get_loss_and_correct(model, batch, criterion,
                                                     device)
                total_validation_loss += loss.item()
                total_validation_correct += correct.item()

        mean_train_loss = total_train_loss / len(train_dataset)
        train_accuracy = total_train_correct / len(train_dataset)
        mean_validation_loss = total_validation_loss / len(validation_dataset)
        validation_accuracy = total_validation_correct / len(
            validation_dataset)
        # save the best model
        if validation_accuracy > best_validation_accuracy:
            best_validation_accuracy = validation_accuracy
            torch.save(model.state_dict(),
                       args.checkpoint_dir / args.checkpoint_file)

        time_elapsed = time.time() - since
        print(
            'Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}, Time: {}'
            .format(i, mean_train_loss, mean_validation_loss, train_accuracy,
                    validation_accuracy, time_elapsed))
        print(
            'Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}, Time: {}'
            .format(i, mean_train_loss, mean_validation_loss, train_accuracy,
                    validation_accuracy, time_elapsed),
            file=stats_file)
Exemplo n.º 8
0
def train(img_dir,classes_csv,model_fname=None,resnet_depth=50,epochs=1000,steps=100,train_split=0.8,out_dir ='',out_prefix=''):

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Create the data loaders

    # Get all image fnames in folder
    img_list = []
    if not isinstance(img_dir, list):
        img_dir = [img_dir]
    for dir in img_dir:
        for file in os.listdir(dir):
            if file.endswith(".png"):
                img_list.append(dir + file)

    randomised_list = random.sample(img_list, len(img_list))
    num_train = int(0.8*len(img_list))
    train_imgs, val_imgs = randomised_list[:num_train], randomised_list[num_train:]

    dataset_train = CustomDataset(img_list=train_imgs, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
    dataset_val = CustomDataset(img_list=val_imgs, class_list=classes_csv,transform=transforms.Compose([Normalizer(), Resizer()]))
    sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model

    if resnet_depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # retinanet = torch.load(model_fname)

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    if model_fname is not None:
        retinanet.load_state_dict(torch.load(model_fname))

    retinanet.training = True
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    loss_hist = collections.deque(maxlen=500)
    retinanet.train()
    retinanet.module.freeze_bn()

    start_time = time.clock()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(epochs):
        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        print('Epoch: {} | Running loss: {:1.5f} | Elapsed Time: {}'.format(epoch_num, np.mean(loss_hist),(time.clock() - start_time)/60))
        mAP = csv_eval.evaluate(dataset_val, retinanet)
        scheduler.step(np.mean(epoch_loss))

        if (epoch_num) % steps == 0:
            torch.save(retinanet.module, '{}{}_model_{}.pt'.format(out_dir, out_prefix, epoch_num))
            torch.save(retinanet.state_dict(), '{}{}_state_{}.pt'.format(out_dir, out_prefix, epoch_num))

    torch.save(retinanet, out_dir + '{}model_final.pt'.format(out_prefix))
    torch.save(retinanet.state_dict(), out_dir + '{}state_final_.pt'.format(out_prefix))
Exemplo n.º 9
0
    parser.add_argument("--train", action='store_true', default=False)
    parser.add_argument("--predict", action='store_true', default=False)
    parser.add_argument("--zazu_timer", action='store_true', default=False)
    parser.add_argument("--checkpoint_path",
                        type=str,
                        default='/root/ZazuML/best_checkpoint.pt')
    parser.add_argument("--dataset_path", type=str, default='')
    parser.add_argument("--output_path", type=str, default='')
    args = parser.parse_args()

    with open('configs.json', 'r') as fp:
        configs = json.load(fp)
    logger = init_logging(__name__)

    from dataloader import CustomDataset
    CustomDataset("../data/tiny_coco", 'coco')
    zazu = ZaZu(configs['model_name'], configs['home_path'],
                configs['annotation_type'])
    if args.search:
        zazu.search(configs['search_method'], configs['epochs'],
                    configs['max_trials'], configs['max_instances_at_once'],
                    configs['augmentation_search'])
    if args.train:
        adapter = TrialAdapter(0)
        adapter.load(checkpoint_path=args.checkpoint_path)
        adapter.train()
        print('model checkpoint is saved to: ', adapter.checkpoint_path)
    if args.predict:
        predict(pred_on_path=args.dataset_path,
                output_path=args.output_path,
                checkpoint_path=args.checkpoint_path,
Exemplo n.º 10
0
aug_transform3 = transforms.Compose([
    transforms.RandomResizedCrop((96, 96),
                                 scale=(0.08, 1.0),
                                 ratio=(0.75, 1.3333333333333333)),
    rnd_color_jitter3,
    rnd_gray,
    transforms.ToTensor(),
])

train_transform = transforms.Compose([
    transforms.ToTensor(),
])

trainset = CustomDataset(root='/dataset',
                         split="train",
                         transform=train_transform)
augset1 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform1)
augset2 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform2)
augset3 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform3)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=1024,
                                          shuffle=True,
                                          num_workers=2)
augloader1 = torch.utils.data.DataLoader(augset1,
Exemplo n.º 11
0
def main_worker(gpu, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](num_classes=800,
                                       norm_layer=SubBatchNorm2d)

    # load from pre-trained, before DistributedDataParallel constructor
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading freezed checkpoint '{}'".format(args.pretrained))
            states = torch.load(args.pretrained)
            model.load_state_dict(states)

            args.start_epoch = 0

            print("=> checkpoint loaded '{}'".format(args.pretrained))
        else:
            raise ValueError("=> no freezed checkpoint found at '{}'".format(
                args.pretrained))

    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        print("=> ERROR: gpu must be assigned")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    cudnn.benchmark = True

    # Data loading code
    traindir = args.data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(96),  # add crop resize
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])
    transform_eval = transforms.Compose([
        transforms.Resize(128),  # add resize
        transforms.CenterCrop(96),  # add crop
        transforms.ToTensor(),
        normalize
    ])

    train_dataset = CustomDataset(traindir, 'train', transform_train)
    eval_dataset = CustomDataset(traindir, 'val', transform_eval)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    eval_loader = torch.utils.data.DataLoader(eval_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # training code
    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if epoch == 0 or (epoch + 1) % args.eval_per_n_epoch == 0:
            accuracy = evaluate(eval_loader, model, args)
            print(f"=> Epoch: {epoch+1}, accuracy: {accuracy:.4f}")
            # remember best acc and save checkpoint
            is_best = accuracy > best_acc1
            best_acc1 = max(accuracy, best_acc1)
            print(f"=> Epoch: {epoch+1}, isBest? : {is_best}")
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'accuracy': accuracy,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                save_dir=args.checkpoint_dir,
                epoch=(epoch + 1),
                filename=os.path.join(
                    args.checkpoint_dir,
                    'checkpoint_{:03d}.pth.tar'.format(epoch + 1)))
Exemplo n.º 12
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))
        
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass
        
    if args.dist_url == "env://" and args.rank == -1:
        args.rank = int(os.environ["RANK"])
    if args.multiprocessing_distributed:
        # For multiprocessing distributed training, rank needs to be the
        # global rank among all the processes
        args.rank = args.rank * ngpus_per_node + gpu
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)

    # create model
    print("=> creating model '{}'".format(args.arch))
    if args.arch == 'resnet50':
        model = Model(resnet50,args,width=1)
    elif args.arch == 'resnet50x2':    
        model = Model(resnet50,args,width=2)
    elif args.arch == 'resnet50x4':    
        model = Model(resnet50,args,width=4)
    else:
        raise NotImplementedError('model not supported {}'.format(args.arch))    
            
    
    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model.cuda(args.gpu)
        # When using a single GPU per process and per
        # DistributedDataParallel, we need to divide the batch size
        # ourselves based on the total number of GPUs we have
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.batch_size_u = int(args.batch_size_u / ngpus_per_node)
        args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) #find_unused_parameters=True
    else:
        model.cuda()
        # DistributedDataParallel will divide and allocate batch_size to all
        # available GPUs if device_ids are not set
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criteria_x = nn.CrossEntropyLoss().cuda(args.gpu)    
    
    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True
                               )

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True
    
    print("=> preparing dataset")
    # Data loading code         
    
    transform_strong = transforms.Compose([
            transforms.RandomResizedCrop(96, scale=(0.2, 1.)),                
            transforms.RandomApply([
                transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) 
            ], p=0.8),
            transforms.RandomGrayscale(p=0.2),                         
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])
    transform_weak = transforms.Compose([
            transforms.RandomResizedCrop(96, scale=(0.2, 1.)),                                     
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])    
    transform_eval = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])
    
    three_crops_transform = loader.ThreeCropsTransform(transform_weak, transform_strong, transform_strong)

    unlabeled_dataset = CustomDataset(args.data, 'unlabeled', transform=three_crops_transform)

    labeled_dataset = CustomDataset(args.data, 'train', transform=transform_weak)

    #labeled_sampler = torch.utils.data.distributed.DistributedSampler(labeled_dataset)
    labeled_loader = torch.utils.data.DataLoader(
        labeled_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)   
 
    #unlabeled_sampler = torch.utils.data.distributed.DistributedSampler(unlabeled_dataset)
    unlabeled_loader = torch.utils.data.DataLoader(
        unlabeled_dataset, batch_size=int(args.batch_size_u), shuffle=True,
        num_workers=args.workers, pin_memory=True)
    
    val_loader = torch.utils.data.DataLoader(CustomDataset('/content/dataset', 'val', transform_eval),
        batch_size=64, shuffle=False,
        num_workers=args.workers, pin_memory=True)
    
    # create loggers
    if args.gpu==0:
        tb_logger = tensorboard_logger.Logger(logdir=os.path.join(args.exp_dir,'tensorboard'), flush_secs=2)
        logger = setup_default_logging(args)
        logger.info(dict(args._get_kwargs()))        
    else:
        tb_logger = None
        logger = None

    for epoch in range(args.start_epoch, args.epochs):
        if epoch==0:
            args.m = 0.99 # larger update in first epoch
        else:
            args.m = args.moco_m
        # args.lr=0.01
        adjust_learning_rate(optimizer, epoch, args)

        train(labeled_loader, unlabeled_loader, model, criteria_x, optimizer, epoch, args, logger, tb_logger)

        # evaluate on validation set
        acc1 = validate(val_loader, model, args, logger, tb_logger, epoch)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint({
                'args': args,
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer' : optimizer.state_dict()
            },filename='{}/checkpoint_{:04d}.pt'.format(args.exp_dir,epoch))
    
    # evaluate ema model
    acc1 = validate(val_loader, model, args, logger, tb_logger, -1)
Exemplo n.º 13
0
def main():
	#TODO: Get args
	# python3 train_fixmatch.py --checkpoint-path ./checkpoint_path/model.pth --batch-size 1 --num-epochs 1 --num-steps 1 --train-from-start 1 --dataset-folder ./dataset
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint-path', type=str, default= "./checkpoints/model_barlow_20h.pth.tar")
	parser.add_argument('--batch-size', type=int, default= 512)
	parser.add_argument('--num-epochs', type=int, default= 10)
	parser.add_argument('--num-steps', type=int, default= 1)
	parser.add_argument('--train-from-start', type= int, default= 0)
	parser.add_argument('--dataset-folder', type= str, default= "./dataset")
	parser.add_argument('--learning-rate', type = float, default= 0.01)
	parser.add_argument('--threshold', type = float, default= 0.5)
	parser.add_argument('--mu', type= int, default= 7)
	parser.add_argument('--lambd', type= float, default= 0.005)
	parser.add_argument('--momentum', type= float, default= 0.9)
	parser.add_argument('--weight-decay', type= float, default= 1.5*1e-6)
	parser.add_argument('--warmup-epochs', type= int, default= 2)
	parser.add_argument('--scale-loss', type = float, default= 1.0/32.0)
	parser.add_argument('--wide', type= int, default= 0)
	args = parser.parse_args()

	dataset_folder = args.dataset_folder
	batch_size = args.batch_size
	n_epochs = args.num_epochs
	num_classes = 800
	lambd = args.lambd
	weight_decay = args.weight_decay
	checkpoint_path = args.checkpoint_path
	train_from_start = args.train_from_start
	
	if torch.cuda.is_available():
		device = torch.device("cuda")
	else:
		device = torch.device("cpu")

	unlabeled_train_dataset = CustomDataset(root= dataset_folder, split = "unlabeled", transform = TransformBarlowTwins())
	unlabeled_train_loader = DataLoader(unlabeled_train_dataset, batch_size= batch_size, shuffle= True, num_workers= 4)

	if args.wide == 1:
		model = lightly.models.BarlowTwins(wide_resnet50_2(pretrained= False), num_ftrs= 2048)
	else:
		model = lightly.models.BarlowTwins(resnet18(pretrained= False), num_ftrs= 512)

	optimizer = LARS(model.parameters(), lr=0, weight_decay=weight_decay,
					 weight_decay_filter=exclude_bias_and_norm,
					 lars_adaptation_filter=exclude_bias_and_norm)

	criterion = lightly.loss.BarlowTwinsLoss()

	start_epoch = 0

	model.train()
	losses = Average()

	model = model.to(device)
	criterion = criterion.to(device)

	if train_from_start == 0:
		assert os.path.isfile(checkpoint_path), "Error: no checkpoint directory found!"
		print("Restoring model from checkpoint")
		# args.out = os.path.dirname(args.resume)
		checkpoint = torch.load(checkpoint_path, map_location= device)
		if args.wide == 0:
			model = torch.nn.DataParallel(model)
		# best_acc = checkpoint['best_acc']
		start_epoch = checkpoint['epoch'] - 1
		model.load_state_dict(checkpoint['state_dict'])
		optimizer.load_state_dict(checkpoint['optimizer'])

	if torch.cuda.device_count() > 1:
		print("Let's use", torch.cuda.device_count(), "GPUs!")
		model = torch.nn.DataParallel(model)
		criterion = torch.nn.DataParallel(criterion)

	model = model.to(device)
	criterion = criterion.to(device)

	#TODO
	# scaler = torch.cuda.amp.GradScaler()
	# model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

	for epoch in tqdm(range(start_epoch, n_epochs)):

		# for batch_idx in tqdm(range(n_steps)): ## CHECK

		for batch_idx, batch in enumerate(tqdm(unlabeled_train_loader)):
			y_a = batch[0][0].to(device)
			y_b = batch[0][1].to(device)
			
			z_a, z_b = model(y_a, y_b)
			loss = criterion(z_a, z_b).mean()

			lr = adjust_learning_rate(args, optimizer, unlabeled_train_loader, epoch * len(unlabeled_train_loader) + batch_idx)
			optimizer.zero_grad()

			# scaler.scale(loss).backward()
			# scaler.step(optimizer)
			# scaler.update()
			loss.backward()
			optimizer.step()

			losses.update(loss.item())

			if batch_idx % 25 == 0:
				print(f"Epoch number: {epoch}, loss_avg: {losses.avg}, loss: {loss.item()}, lr: {lr}", flush= True)
		if torch.cuda.device_count() > 1:
			save_checkpoint({
					'epoch': epoch + 1,
					'state_dict': model.module.state_dict(),
					'optimizer': optimizer.state_dict()
				}, checkpoint_path)
		else:
			save_checkpoint({
					'epoch': epoch + 1,
					'state_dict': model.state_dict(),
					'optimizer': optimizer.state_dict()
				}, checkpoint_path)
Exemplo n.º 14
0
from torchvision import datasets, transforms, models

from dataloader import CustomDataset
from submission import get_model

#parser part
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint-dir', type=str)
args = parser.parse_args()
#train composition operations
train_transform = transforms.Compose([
    transforms.ToTensor(),
])
#trainset and trainloaders
trainset = CustomDataset(root='/dataset',
                         split="train",
                         transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)

#net = get_model().cuda()
net = get_model()

criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

print('Start Training')

#training part
Exemplo n.º 15
0
def main_worker(gpu, args):
    # args.rank += gpu
    #
    # torch.distributed.init_process_group(
    #     backend='nccl', init_method=args.dist_url,
    #     world_size=args.world_size, rank=args.rank)

    # if args.rank == 0:
    args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
    stats_file = open(args.checkpoint_dir /
                      'stats_{}.txt'.format(args.resnet_layers),
                      'a',
                      buffering=1)
    print(' '.join(sys.argv))
    print(' '.join(sys.argv), file=stats_file)

    torch.cuda.set_device(gpu)
    # torch.backends.cudnn.benchmark = True

    model = BarlowTwins(args).cuda(gpu)
    # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
    optimizer = LARS(model.parameters(),
                     lr=0,
                     weight_decay=args.weight_decay,
                     weight_decay_filter=exclude_bias_and_norm,
                     lars_adaptation_filter=exclude_bias_and_norm)

    # automatically resume from checkpoint if it exists
    if (args.checkpoint_dir /
            'checkpoint_{}.pth'.format(args.resnet_layers)).is_file():
        ckpt = torch.load(args.checkpoint_dir /
                          'checkpoint_{}.pth'.format(args.resnet_layers),
                          map_location='cpu')
        start_epoch = ckpt['epoch']
        model.load_state_dict(ckpt['model'])
        optimizer.load_state_dict(ckpt['optimizer'])
    else:
        start_epoch = 0

    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    # assert args.batch_size % args.world_size == 0
    # per_device_batch_size = args.batch_size // args.world_size
    # loader = torch.utils.data.DataLoader(
    #     dataset, batch_size=per_device_batch_size, num_workers=args.workers,
    #     pin_memory=True, sampler=sampler)

    dataset = CustomDataset(root=args.data,
                            split='unlabeled',
                            transform=Transform(args.image_size))
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         num_workers=args.workers)

    start_time = time.time()
    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(start_epoch, args.epochs):
        # sampler.set_epoch(epoch)
        for step, ((y1, y2), _) in enumerate(loader,
                                             start=epoch * len(loader)):
            y1 = y1.cuda(gpu, non_blocking=True)
            y2 = y2.cuda(gpu, non_blocking=True)
            lr = adjust_learning_rate(args, optimizer, loader, step)
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                loss = model.forward(y1, y2)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            if step % args.print_freq == 0:
                # torch.distributed.reduce(loss.div_(args.world_size), 0)
                # if args.rank == 0:
                stats = dict(epoch=epoch,
                             step=step,
                             learning_rate=lr,
                             loss=loss.item(),
                             time=int(time.time() - start_time))
                print(json.dumps(stats))
                print(json.dumps(stats), file=stats_file)
        # if args.rank == 0:
        # save checkpoint
        state = dict(epoch=epoch + 1,
                     model=model.state_dict(),
                     optimizer=optimizer.state_dict())
        torch.save(
            state, args.checkpoint_dir /
            'checkpoint_{}.pth'.format(args.resnet_layers))
    # if args.rank == 0:
    # save final model
    torch.save(model.backbone.state_dict(),
               args.checkpoint_dir / 'resnet{}.pth'.format(args.resnet_layers))
Exemplo n.º 16
0
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torch.optim.rmsprop import RMSprop
import torch.nn as nn
from tqdm import tqdm

import torch
import numpy as np
import sys

configs = Configs()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
DATASET_PATH = r"D:\Code\CV_project\lstm_fast_fcn\dataset.pt"

dataset = CustomDataset(configs)
dataset_size = len(dataset)
indices = list(range(dataset_size))

np.random.seed(0)
np.random.shuffle(indices)

split = int(np.floor(configs.valSplit * dataset_size))
trainIndices, valIndices = indices[split:], indices[:split]
trainLoader = DataLoader(dataset,
                         batch_size=configs.batchSize,
                         num_workers=0,
                         sampler=SubsetRandomSampler(trainIndices))
valLoader = DataLoader(dataset,
                       batch_size=configs.batchSize,
                       num_workers=0,
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint-path',
                        type=str,
                        default="./checkpoints/model_transfer.pth.tar")
    parser.add_argument('--transfer-path',
                        type=str,
                        default="./checkpoints/model_barlow.pth.tar")
    parser.add_argument('--best-path',
                        type=str,
                        default="./checkpoints/model_barlow_best.pth.tar")
    parser.add_argument('--batch-size', type=int, default=10)
    parser.add_argument('--num-epochs', type=int, default=100)
    parser.add_argument('--dataset-folder', type=str, default="./dataset")
    parser.add_argument('--new-dataset-folder', type=str, default="./dataset")
    parser.add_argument('--learning-rate-classifier',
                        type=float,
                        default=0.001)
    parser.add_argument('--learning-rate-model', type=float, default=0.001)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight-decay', type=float, default=0.001)
    parser.add_argument('--fine-tune', type=int, default=0)
    parser.add_argument('--wide', type=int, default=0)
    parser.add_argument('--model-name', type=str, default="moco")
    parser.add_argument('--dropout', type=float, default=0)
    parser.add_argument('--new-data', type=int, default=0)
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    dataset_folder = args.dataset_folder
    batch_size = args.batch_size
    batch_size_val = 256  #5120
    n_epochs = args.num_epochs
    weight_decay = args.weight_decay
    checkpoint_path = args.checkpoint_path

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    torch.backends.cudnn.deterministic = True

    print(f"Training with seed {args.seed}")

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    train_transform, val_transform = get_transforms(
    )  #TODO Get new transforms file

    if args.new_data == 0:
        labeled_train_dataset = CustomDataset(root=args.dataset_folder,
                                              split="train",
                                              transform=train_transform)
    else:
        labeled_train_dataset = CustomDataset(root=args.new_dataset_folder,
                                              split="train_new",
                                              transform=train_transform)
    val_dataset = CustomDataset(root=args.dataset_folder,
                                split="val",
                                transform=val_transform)

    labeled_train_loader = DataLoader(labeled_train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=4)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size_val,
                            shuffle=False,
                            num_workers=4)

    resnet = lightly.models.ResNetGenerator('resnet-18', 1, num_splits=0)
    backbone = torch.nn.Sequential(
        *list(resnet.children())[:-1],
        torch.nn.AdaptiveAvgPool2d(1),
    )

    if args.model_name == "moco":
        model = lightly.models.MoCo(backbone,
                                    num_ftrs=512,
                                    m=0.99,
                                    batch_shuffle=True)
    else:
        if args.wide == 1:
            model = lightly.models.BarlowTwins(
                wide_resnet50_2(pretrained=False), num_ftrs=2048)
        else:
            model = lightly.models.BarlowTwins(resnet18(pretrained=False),
                                               num_ftrs=512)

    checkpoint = torch.load(args.transfer_path, map_location=device)

    # print(checkpoint['state_dict'].keys())
    # print("printed keys")

    # print(model_barlow.state_dict().keys())
    # print("printed model keys")

    # if args.wide == 0:
    # model = torch.nn.DataParallel(model)

    model.load_state_dict(checkpoint['state_dict'])
    # print(model_barlow)
    if args.wide == 0:
        model = model.backbone
    else:
        model = model.backbone

    if args.wide == 1:
        classifier = Classifier(ip=2048, dp=args.dropout)
    else:
        classifier = Classifier(ip=512, dp=args.dropout)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model)
        classifier = torch.nn.DataParallel(classifier)

    if not args.fine_tune:
        model.requires_grad_(False)

    model = model.to(device)
    classifier = classifier.to(device)

    param_groups = [
        dict(params=classifier.parameters(), lr=args.learning_rate_classifier)
    ]

    if args.fine_tune:
        param_groups.append(
            dict(params=model.parameters(), lr=args.learning_rate_model))

    optimizer = optim.Adam(param_groups, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs)

    start_epoch = 0
    losses = Average()

    criterion = torch.nn.CrossEntropyLoss().to(device)

    best_val_accuracy = 25.0  #TODO

    for epoch in tqdm(range(start_epoch, n_epochs)):
        if args.fine_tune:
            model.train()
        else:
            model.eval()
        classifier.train()

        for batch_idx, batch in enumerate(tqdm(labeled_train_loader)):
            img = batch[0].to(device)
            labels = batch[1].to(device)

            model_out = model(img)
            if args.model_name == "moco":
                model_out = model_out.squeeze()
                model_out = torch.nn.functional.normalize(model_out, dim=1)
            logits = classifier(model_out)
            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            losses.update(loss.item())

            if batch_idx % 25 == 0:
                print(
                    f"Epoch number: {epoch}, loss_avg: {losses.avg}, loss: {loss.item()}, best accuracy: {best_val_accuracy:.2f}",
                    flush=True)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'classifier_state_dict': classifier.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict()
            }, checkpoint_path)

        model.eval()
        with torch.no_grad():
            val_loss = 0
            val_size = 0
            total = 0
            correct = 0
            for batch in val_loader:
                model_out = model(batch[0].to(device))
                if args.model_name == "moco":
                    model_out = model_out.squeeze()
                    model_out = torch.nn.functional.normalize(model_out, dim=1)
                logits_val = classifier(model_out)
                labels = batch[1].to(device)

                val_loss += F.cross_entropy(logits_val, labels)
                _, predicted = torch.max(logits_val.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_size += 1
                # break
        print(
            f"Val loss: {val_loss/val_size}, Accuracy: {(100 * correct / total):.2f}%",
            flush=True)

        if 100 * correct / total > best_val_accuracy:
            best_val_accuracy = 100 * correct / total
            best_val_loss = val_loss / val_size
            print(
                f"Saving the best model with {best_val_accuracy:.2f}% accuracy and {best_val_loss:.2f} loss",
                flush=True)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'classifier_state_dict': classifier.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'best_val_accuracy': best_val_accuracy,
                    'best_val_loss': best_val_loss
                }, args.best_path)
Exemplo n.º 18
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    if args.gpu is not None:
        print("=> Use GPU: {} for training".format(args.gpu))

    print("=> creating model '{}'".format(args.arch))
    model = MoCo(models.__dict__[args.arch], args.moco_dim, args.moco_k,
                 args.moco_m, args.moco_t, args.mlp)
    print(model)

    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        raise NotImplementedError("Only Single GPU is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint  # release GPU memory
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = args.data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(
                96, scale=(0.2, 1.)),  # Add back RandomResizedCrop
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(96, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_dataset = CustomDataset(
        traindir, "unlabeled",
        TwoCropsTransform(transforms.Compose(augmentation)))

    if args.small_set:
        print('=> Using 1/10 unlabeled set')
        train_sampler = torch.utils.data.RandomSampler(train_dataset,
                                                       replacement=True,
                                                       num_samples=51200)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=train_sampler)

    else:
        print('=> Using full unlabeled set')
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   drop_last=True)

    print("=> Start Training.")
    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if epoch == 0 or (epoch + 1) % args.save_checkpoint_per_epoch == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=os.path.join(
                    args.checkpoint_dir,
                    'checkpoint_{:03d}.pth'.format(epoch + 1)))
Exemplo n.º 19
0
def main():

    # Data loading
    train_dataset = CustomDataset(root=config.root_train,
                                  annFile=config.annFile_train,
                                  transforms=config.train_transforms,
                                  catagory=config.CATEGORY_FILTER)
    val_dataset = CustomDataset(root=config.root_val,
                                annFile=config.annFile_val,
                                transforms=config.val_transforms,
                                catagory=config.CATEGORY_FILTER)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=8,
                              num_workers=2,
                              pin_memory=True,
                              shuffle=True,
                              drop_last=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=8,
                            num_workers=2,
                            pin_memory=True,
                            shuffle=False,
                            drop_last=True)

    # Model
    model = YoloV3(num_classes=config.C).to(device=config.DEVICE)
    # from model_external import YOLOv3
    # model = YOLOv3(num_classes=90).to(device=config.DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.LEARNING_RATE,
                           weight_decay=config.WEIGHT_DECAY)
    from loss_external import YoloLoss
    loss_function = YoloLoss().to(device=config.DEVICE)

    # Miscellaneous
    scaled_anchors = (torch.tensor(config.anchors) * torch.tensor(
        config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to(
            config.DEVICE)
    writer = SummaryWriter()
    current_time = time.time()

    # Loading previously saved model weights
    if config.LOAD_MODEL:
        load_checkpoint("cp.pth.tar", model, optimizer, config.LEARNING_RATE)

    print(torch.cuda.memory_summary(device=None, abbreviated=False))

    # Training loop
    for cycle in range(config.CYCLES):

        print("Cycle:", cycle)

        x, y = next(iter(val_loader))
        x = x.to(config.DEVICE)
        y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE),
                      y[2].to(config.DEVICE))
        yp = model(x)
        loss_0 = loss_function(predictions=yp[0],
                               target=y0,
                               anchors=scaled_anchors[0])
        loss_1 = loss_function(predictions=yp[1],
                               target=y1,
                               anchors=scaled_anchors[1])
        loss_2 = loss_function(predictions=yp[2],
                               target=y2,
                               anchors=scaled_anchors[2])
        loss = loss_0 + loss_1 + loss_2
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Run validation
        if cycle % 100 == 0 and cycle != 0:
            model.eval()
            losses = []
            with torch.no_grad():
                x, y = next(iter(val_loader))
                x = x.to(config.DEVICE)
                y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE),
                              y[2].to(config.DEVICE))
                yp = model(x)
                loss_0 = loss_function(predictions=yp[0],
                                       target=y0,
                                       anchors=scaled_anchors[0])
                loss_1 = loss_function(predictions=yp[1],
                                       target=y1,
                                       anchors=scaled_anchors[1])
                loss_2 = loss_function(predictions=yp[2],
                                       target=y2,
                                       anchors=scaled_anchors[2])
                loss = loss_0 + loss_1 + loss_2
                losses.append(loss)
            avg_val_loss = sum(losses) / len(losses)
            writer.add_scalar("val_loss: ", avg_val_loss, cycle)
            model.train()

        # Run validation
        """
        if cycle % 100 == 0 and cycle != 0:
            model.eval()
            x, y = next(iter(val_loader))
            x = x.float()
            x = x.to(config.DEVICE)
            # y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE), y[2].to(config.DEVICE))
            with torch.no_grad():
                yp = model(x)
                # Move predictions to cpu
                yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')]
                # boxes_from_yp(yp) returns all yp bboxes in a batch
                yp_boxes = boxes_from_yp(yp=yp, iou_threshold=config.MAP_IOU_THRESH, threshold=config.CONF_THRESHOLD)
                # boxes_from_y(y) returns all y bboxes in a batch
                y_boxes = boxes_from_y(y=y)
        """

        # Save model
        if cycle % 1000 == 0 and cycle != 0:
            save_checkpoint(model,
                            optimizer,
                            cycle,
                            filename=config.CHECKPOINT_FILE)

        # Rendering loop
        if cycle % 100 == 0 and cycle != 0:
            model.eval()
            x, y = next(iter(val_loader))
            with torch.no_grad():
                x_gpu = x.to(config.DEVICE)
                yp = model(x_gpu)
                yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')]
            x = denormalize(x) * 255
            draw_y_on_x(x, y)
            draw_yp_on_x(x,
                         yp,
                         probability_threshold=0.5,
                         anchors=config.anchors)
            # Save batch grid as image
            image_dir = "./batch_dir"
            image_dir_exists = os.path.exists(image_dir)
            if not image_dir_exists:
                os.makedirs(image_dir)
            img_name = str(image_dir) + "/batch_" + str(cycle) + ".png"
            save_image(x / 255, img_name)
            model.train()

        writer.add_scalar("train_loss: ", loss.item(), cycle)
        delta_time, current_time = time_function(current_time)
        writer.add_scalar("Epoch Duration [s]", delta_time, cycle)
        writer.flush()
Exemplo n.º 20
0

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint-dir', type=str, default='checkpoints/')
parser.add_argument('--model-name', type=str, default='simsiam')
parser.add_argument('--epochs', type=int, default=1)
parser.add_argument('--net_size', type=int, default=50)
parser.add_argument('--temperature', type=int, default=1)

args = parser.parse_args()
checkpoint_path = args.checkpoint_dir + args.model_name

# sys.path.insert(1, args.checkpoint_dir)
# PATH = '/Users/colinwan/Desktop/NYU_MSDS/2572/FinalProject/DL21SP20'
PATH = ''
train_dataset = CustomDataset(root=PATH+'/dataset', split='unlabeled', transform=get_aug(train=True, image_size=96))
BATCH_SIZE = 256 
print(len(train_dataset))

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

model = SimSiam().to(device)
check = os.path.exists(
    os.path.join(checkpoint_path,
        args.model_name+"_encoder_{}.pth".format(args.net_size)))
print(os.path.join(checkpoint_path,
        args.model_name+"_encoder_{}.pth".format(args.net_size)))
from dataloader import CustomDataset
from model.seq2seq import Seq2Seq

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    with_cuda = True
    device_ids = None
    print("------- GPU Working -------")
    print("[Current GPU]:" + str(torch.cuda.get_device_name(0)))
else:
    with_cuda = False
    device_ids = None
    print("------- CPU Working -------")

data_loaded = CustomDataset(path='data/eng-fra.txt')
pad_idx = data_loaded.vocab_stoi['<pad>']

hidden_size = 1000
vocab_len = len(data_loaded.vocab_stoi)
embedding_size = 620
batch_size = 80

train_loader = torchdata.DataLoader(dataset=data_loaded,
                                    collate_fn=data_loaded.custom_collate_fn,
                                    batch_size=batch_size)

trg_max_seq_len = next(
    iter(train_loader))[1].size(1) - 1  # <s> is not included

epochs = 1
Exemplo n.º 22
0
class fullmodel(nn.Module):
    def __init__(self, num_classes=800):
        super(fullmodel, self).__init__()
        self.pretrain = resnet50()
        self.pretrain.fc = nn.Linear(self.pretrain.fc.in_features, num_classes)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(num_classes, num_classes)

    def forward(self, x):
        x = self.relu(self.pretrain(x))
        outputs = self.linear(x)
        return outputs


unlabeled_dataset = CustomDataset(PATH + '/dataset',
                                  'unlabeled',
                                  transform=transform_unlabled)
unlabeled_trainloader = torch.utils.data.DataLoader(unlabeled_dataset,
                                                    batch_size=BATCH_SIZE * mu,
                                                    shuffle=True,
                                                    num_workers=1)

labeled_dataset = CustomDataset(PATH + '/dataset',
                                'train',
                                transform=transform_labeled)
labeled_trainloader = torch.utils.data.DataLoader(labeled_dataset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True,
                                                  num_workers=1)

validation_dataset = CustomDataset(PATH + '/dataset', 'val',
Exemplo n.º 23
0
    transforms.ColorJitter(hue=.1, saturation=.1, contrast=.1),
    transforms.RandomRotation(20, resample=Image.BILINEAR),
    #     transforms.GaussianBlur(7, sigma=(0.1, 1.0)),
    transforms.ToTensor(),  # convert PIL to Pytorch Tensor
    normalize,
])

validation_transforms = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    normalize,
])
# path = '/Users/colinwan/Desktop/NYU_MSDS/2572/FinalProject/DL21SP20'
path = ''
train_dataset = CustomDataset(root=path + '/dataset',
                              split='train',
                              transform=train_transforms)
validation_dataset = CustomDataset(root=path + '/dataset',
                                   split='val',
                                   transform=validation_transforms)
BATCH_SIZE = 512

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=1)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset,
                                                    batch_size=BATCH_SIZE,
                                                    num_workers=1)

# from tqdm.notebook import tqdm
Exemplo n.º 24
0
import os
import argparse

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models

from dataloader import CustomDataset
from submission import get_model, eval_transform, team_id, team_name, email_address

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint-path', type=str)
args = parser.parse_args()

evalset = CustomDataset(root='/dataset', split="val", transform=eval_transform)
evalloader = torch.utils.data.DataLoader(evalset, batch_size=256, shuffle=False, num_workers=2)

net = get_model()
checkpoint = torch.load(args.checkpoint_path)
net.load_state_dict(checkpoint)
net = net.cuda()

net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in evalloader:
        images, labels = data

        images = images.cuda()
Exemplo n.º 25
0
#################################################################3
def addIndexToTrainData(trainset):
    returnList = []
    for iC, (x, y) in enumerate(trainset):
        if iC % 1000 == 0:
            print(">>>>>>>>", iC)
        returnList.append((x, y, iC))
    return returnList


# ==========================================================================
# ==========================================================================
from submission import get_model, eval_transform, team_id, team_name, email_address

trainset = CustomDataset(root='./dataset',
                         split="train",
                         transform=train_transform)
# trainset = addIndexToTrainData(trainset)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)

net = get_model().cuda()
net = torch.nn.DataParallel(net)
net = net.cuda()
# trainLabeledImage(net, trainloader)
#
unLabeledSet = CustomDataset(root='./dataset',
                             split="unlabeled",
                             transform=train_transform)
Exemplo n.º 26
0
def main():
    # t_img = cv2.imread(f'{root_path}/imgs/AFW_1051618982_1_0.jpg')

    lr = 0.001

    models = [MobileNetV1]
    model_name = ['mobilev1']
    for init_model in models:
        img_size = [(224, 224, 3), (224, 224, 1), (256, 256, 3), (256, 256, 1)]

        m_name = model_name[0]
        print(f'Train Model {m_name}')
        for i_size in img_size:
            print(f'Train Image size {i_size}')
            dataset = CustomDataset(root_path, anno_name, dataset_type, i_size)
            print(f"Train dataset size {len(dataset)}")
            train_loader = DataLoader(dataset,
                                      batch_size=8,
                                      num_workers=0,
                                      shuffle=True)

            model = init_model(i_size[0], i_size[-1], num_landmark)
            model.to(DEVICE)

            criterion = torch.nn.MSELoss().to(DEVICE)
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, 'min')

            pre_train = False
            model_path = 'E:/models'
            if pre_train:
                saved_state = torch.load(
                    f'{model_path}/f_landmark-tcdcn-52-0.0004.pth')
                model.load_state_dict(saved_state['model_state_dict'])
                optimizer.load_state_dict(saved_state['optimizer_state_dict'])
                init_epoch = saved_state['Epoch']
                min_loss = saved_state['loss']
            else:
                init_epoch = 0
                min_loss = 1

            epochs = 10000
            print(f'min loss : {min_loss}')
            for epoch in range(init_epoch, epochs):
                print(
                    f'{epoch} epoch start! : {datetime.datetime.now().strftime("%Y.%m.%d %H:%M:%S")}'
                )

                loss = train(train_loader, model, criterion, optimizer, DEVICE,
                             scheduler)
                print(f"    Average Loss : {loss:.6f}")

                if min_loss > loss:
                    min_loss = loss
                    state = {
                        'Epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': loss
                    }
                    model_path = os.path.join(
                        model_save_path, f'f_landmark-{m_name}-{i_size}.pth')
                    torch.save(state, model_path)
                    print(
                        f'Saved model_{m_name} _ [loss : {loss:.6f}, save_path : {model_path}\n'
                    )

                if loss < 0.000001:
                    break
def main():
	#TODO: Get args
	# python3 train_fixmatch.py --checkpoint-path ./checkpoint_path/model.pth --batch-size 1 --num-epochs 1 --num-steps 1 --train-from-start 1 --dataset-folder ./dataset
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint-path', type=str, default= "./checkpoints/model_fm_transfer.pth.tar")
	parser.add_argument('--transfer-path', type=str, default= "./checkpoints/model_transfer.pth.tar")
	parser.add_argument('--best-path', type= str, default= "./checkpoints/model_barlow_best.pth.tar")
	parser.add_argument('--batch-size', type=int, default= 64)
	parser.add_argument('--num-epochs', type=int, default= 10)
	parser.add_argument('--num-steps', type=int, default= 10)
	parser.add_argument('--train-from-start', type= int, default= 1)
	parser.add_argument('--dataset-folder', type= str, default= "./dataset")
	parser.add_argument('--new-dataset-folder', type= str, default= "./dataset")
	parser.add_argument('--learning-rate', type = float, default= 0.01)
	parser.add_argument('--threshold', type = float, default= 0.5)
	parser.add_argument('--mu', type= int, default= 7)
	parser.add_argument('--lambd', type= int, default= 1)
	parser.add_argument('--momentum', type= float, default= 0.9)
	parser.add_argument('--weight-decay', type= float, default= 0.001)
	parser.add_argument('--layers', type= int, default= 18)
	parser.add_argument('--fine-tune', type= int, default= 1)
	parser.add_argument('--new-data', type= int, default= 0)
	args = parser.parse_args()

	dataset_folder = args.dataset_folder
	batch_size_labeled = args.batch_size
	mu = args.mu
	batch_size_unlabeled = mu * args.batch_size
	batch_size_val = 256 #5120
	n_epochs = args.num_epochs
	n_steps = args.num_steps
	num_classes = 800
	threshold = args.threshold
	learning_rate = args.learning_rate
	momentum = args.momentum
	lamd = args.lambd
	tau = 0.95
	weight_decay = args.weight_decay
	checkpoint_path = args.checkpoint_path
	train_from_start = args.train_from_start
	n_layers = args.layers

	if torch.cuda.is_available():
		device = torch.device("cuda")
	else:
		device = torch.device("cpu")

	# print("pwd: ", os.getcwd())
	train_transform, val_transform = get_transforms()

	if args.new_data == 0:
		labeled_train_dataset = CustomDataset(root= args.dataset_folder, split = "train", transform = train_transform)
	else:
		labeled_train_dataset = CustomDataset(root= args.new_dataset_folder, split = "train_new", transform = train_transform)
	# labeled_train_dataset = CustomDataset(root= dataset_folder, split = "train", transform = train_transform)
	unlabeled_train_dataset = CustomDataset(root= dataset_folder, 
											split = "unlabeled", 
											transform = TransformFixMatch(mean = 0, std = 0))#TODO
											
	val_dataset = CustomDataset(root= dataset_folder, split = "val", transform = val_transform)

	labeled_train_loader = DataLoader(labeled_train_dataset, batch_size= batch_size_labeled, shuffle= True, num_workers= 4)
	unlabeled_train_loader = DataLoader(unlabeled_train_dataset, batch_size= batch_size_unlabeled, shuffle= True, num_workers= 4)
	val_loader = DataLoader(val_dataset, batch_size= batch_size_val, shuffle= False, num_workers= 4)



	labeled_iter = iter(labeled_train_loader)
	unlabeled_iter = iter(unlabeled_train_loader)


	model = wide_resnet50_2(pretrained=False, num_classes = 800)
	classifier = Classifier(ip= 2048, dp = 0)
	start_epoch = 0

	checkpoint = torch.load(args.transfer_path, map_location= device)
	model.load_state_dict(checkpoint['model_state_dict'])
	classifier.load_state_dict(checkpoint['classifier_state_dict'])

	param_groups = [dict(params=classifier.parameters(), lr=args.learning_rate)]

	if args.fine_tune:
		param_groups.append(dict(params=model.parameters(), lr=args.learning_rate))

	optimizer = torch.optim.SGD(param_groups, 
								lr = learning_rate,
								momentum= momentum,
								nesterov= True,
								weight_decay= weight_decay)

	scheduler = get_cosine_schedule_with_warmup(optimizer, 0, num_training_steps= n_epochs * n_steps)

	if torch.cuda.device_count() > 1:
		print("Let's use", torch.cuda.device_count(), "GPUs!")
		model = torch.nn.DataParallel(model)
		classifier = torch.nn.DataParallel(classifier)

	if train_from_start == 0:
		assert os.path.isfile(checkpoint_path), "Error: no checkpoint directory found!"
		print("Restoring model from checkpoint")
		# args.out = os.path.dirname(args.resume)
		checkpoint = torch.load(checkpoint_path)
		# best_acc = checkpoint['best_acc']
		start_epoch = checkpoint['epoch'] - 1
		model.load_state_dict(checkpoint['backbone_state_dict'])
		classifier.load_state_dict(checkpoint['classifier_state_dict'])
		optimizer.load_state_dict(checkpoint['optimizer'])
		scheduler.load_state_dict(checkpoint['scheduler'])

	model = model.to(device)
	classifier = classifier.to(device)
	

	model.train()
	losses = Average()
	losses_l = Average()
	losses_u = Average()
	mask_probs = Average()
	best_val_accuracy = 25.0 #TODO

	for epoch in tqdm(range(start_epoch, n_epochs)):
		if args.fine_tune:
			model.train()
			classifier.train()
		else:
			model.eval()
			classifier.train()

		for batch_idx in tqdm(range(n_steps)):
			try:
				img_lab, targets_lab = labeled_iter.next()
			except:
				labeled_iter = iter(labeled_train_loader)
				img_lab, targets_lab = labeled_iter.next()

			try:
				unlab, _ = unlabeled_iter.next()
				img_weak = unlab[0]
				img_strong = unlab[1]
			except:
				unlabeled_iter = iter(unlabeled_train_loader)
				unlab, _ = unlabeled_iter.next()
				img_weak = unlab[0]
				img_strong = unlab[1]
			
			img_lab = img_lab.to(device)
			targets_lab = targets_lab.to(device)
			img_weak = img_weak.to(device)
			img_strong = img_strong.to(device)

			img_cat = torch.cat((img_lab, img_weak, img_strong), dim = 0)
			logits_cat = classifier(model(img_cat))
			logits_lab = logits_cat[:batch_size_labeled]
			# print(logits_lab.size())
			logits_unlab = logits_cat[batch_size_labeled:]
			# print(logits_unlab)

			logits_weak, logits_strong = torch.chunk(logits_unlab, chunks= 2, dim = 0)

			pseudo_label = torch.softmax(logits_weak.detach()/tau, dim= 1)
			max_probs, targets_unlab = torch.max(pseudo_label, dim= 1)
			mask = max_probs.ge(threshold).float()
			
			loss_labeled = F.cross_entropy(logits_lab, targets_lab, reduction='mean')

			# print("CE: ", F.cross_entropy(logits_strong, targets_unlab, reduction= 'none').size())

			loss_unlabeled = (F.cross_entropy(logits_strong, targets_unlab, reduction= 'none') * mask).mean()

			# print("Loss labelled, loss unlabelled: ", loss_labeled, loss_unlabeled)

			loss_total = loss_labeled + lamd * loss_unlabeled

			# print("Total loss: ", loss_total)
			# loss_epoch += loss_total
			# loss_lab_epoch += loss_labeled
			# loss_unlab_epoch += loss_unlabeled
			losses.update(loss_total.item())
			losses_l.update(loss_labeled.item())
			losses_u.update(loss_unlabeled.item())
			mask_probs.update(mask.mean().item())

			optimizer.zero_grad()
			loss_total.backward()
			optimizer.step()
			scheduler.step()


			# break
			if batch_idx % 25 == 0:
				print(f"Epoch number: {epoch}, loss: {losses.avg}, loss lab: {losses_l.avg}, loss unlab: {losses_u.avg}, mask: {mask_probs.avg}, loss_here: {loss_total.item()}, best accuracy: {best_val_accuracy:.2f}", flush= True)
			# print(optimizer.param_groups[0]['lr'])
		

		save_checkpoint({
				'epoch': epoch + 1,
				'model_state_dict': model.state_dict(),
				'classifier_state_dict': model.state_dict(),
				'optimizer': optimizer.state_dict(),
				'scheduler': scheduler.state_dict(),
			}, checkpoint_path)

		model.eval()
		classifier.eval()
		with torch.no_grad():
			val_loss = 0
			val_size = 0
			total = 0
			correct = 0
			for batch in val_loader:
				logits_val = classifier(model(batch[0].to(device)))
				labels = batch[1].to(device)
				val_loss += F.cross_entropy(logits_val, labels)
				_, predicted = torch.max(logits_val.data, 1)
				total += labels.size(0)
				correct += (predicted == labels).sum().item()
				val_size += 1
				# break
		print(f"Val loss: {val_loss/val_size}, Accuracy: {(100 * correct / total):.2f}%", flush= True)
		if 100 * correct / total > best_val_accuracy:
			best_val_accuracy = 100 * correct / total
			best_val_loss = val_loss/val_size
			print(f"Saving the best model with {best_val_accuracy:.2f}% accuracy and {best_val_loss:.2f} loss", flush= True)
			save_checkpoint({
				'epoch': epoch + 1,
				'model_state_dict': model.state_dict(),
				'classifier_state_dict': classifier.state_dict(),
				'optimizer': optimizer.state_dict(),
				'scheduler': scheduler.state_dict(),
				'best_val_accuracy': best_val_accuracy,
				'best_val_loss': best_val_loss
			}, args.best_path)
		model.train()
		classifier.train()
Exemplo n.º 28
0
def infer(img_dir,classes_csv,model_fname,resnet_depth,score_thresh,out_dir, results_fname):

    # Create dataset
    img_list = []
    if not isinstance(img_dir, list):
        img_dir = [img_dir]
    for dir in img_dir:
        for file in os.listdir(dir):
            if file.endswith(".png"):
                img_list.append(dir + file)

    dataset_val = CustomDataset(img_list=img_list, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Resizer()]))
    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
    print(dataset_val.num_classes())

    # Create the model
    if resnet_depth == 18:
        retinanet = model.resnet18(num_classes=dataset_val.num_classes())
    elif resnet_depth == 34:
        retinanet = model.resnet34(num_classes=dataset_val.num_classes())
    elif resnet_depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes())
    elif resnet_depth == 101:
        retinanet = model.resnet101(num_classes=dataset_val.num_classes())
    elif resnet_depth == 152:
        retinanet = model.resnet152(num_classes=dataset_val.num_classes())
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    state_dict = torch.load(model_fname)
    from collections import OrderedDict

    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    # load params
    retinanet.load_state_dict(new_state_dict)

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    results = []

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}, Num objects: {}'.format(time.time() - st, len(scores)))

            idxs = np.where(scores > score_thresh)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0)).astype(np.uint8).copy()

            bboxes = []
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0] / data['scale'][0])
                y1 = int(bbox[1] / data['scale'][0])
                x2 = int(bbox[2] / data['scale'][0])
                y2 = int(bbox[3] / data['scale'][0])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

                score = float(scores[idxs[0][j]])

                bboxes.append([x1, y1, x2, y2, score])

            img_fname = ntpath.basename(data['img_fname'][0])
            results.append([img_fname, bboxes])
    #         fig, ax = plt.subplots(figsize=(12, 12))
    #         ax.imshow(img, interpolation='bilinear')

    with open(out_dir+results_fname,"wb") as output_file:
        pickle.dump(results, output_file)
aug_transform3 = transforms.Compose([
    transforms.RandomResizedCrop((96, 96),
                                 scale=(0.08, 1.0),
                                 ratio=(0.75, 1.3333333333333333)),
    rnd_color_jitter3,
    rnd_gray,
    transforms.ToTensor(),
])

train_transform = transforms.Compose([
    transforms.ToTensor(),
])

trainset = CustomDataset(root='/dataset',
                         split="train",
                         transform=train_transform)
augset1 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform1)
augset2 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform2)
augset3 = CustomDataset(root='/dataset',
                        split="train",
                        transform=aug_transform3)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)
augloader1 = torch.utils.data.DataLoader(augset1,
Exemplo n.º 30
0
def main_worker(gpu, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](num_classes=800,
                                       norm_layer=SubBatchNorm2d)
    # model = models.__dict__[args.arch](num_classes=800)

    # # freeze all layers but the last fc
    for name, param in model.named_parameters():
        if name not in ['fc.weight', 'fc.bias']:
            param.requires_grad = False
    # init the fc layer
    model.fc.weight.data.normal_(mean=0.0, std=0.1)
    model.fc.bias.data.zero_()

    # load from pre-trained, before DistributedDataParallel constructor
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading checkpoint '{}'".format(args.pretrained))
            checkpoint = torch.load(args.pretrained, map_location="cpu")

            # rename moco pre-trained keys
            state_dict = checkpoint['state_dict']
            for k in list(state_dict.keys()):
                # retain only encoder_q up to before the embedding layer
                if k.startswith(
                        'encoder_q') and not k.startswith('encoder_q.fc'):
                    # remove prefix
                    state_dict[k[len("encoder_q."):]] = state_dict[k]
                # delete renamed or unused k
                del state_dict[k]

            args.start_epoch = 0
            msg = model.load_state_dict(state_dict, strict=False)
            # print(f"=> loading state_dict: \n{list(state_dict.keys())}")
            # print(f"=> missing state keys: \n{msg.missing_keys}")
            assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

            print("=> loaded pre-trained model '{}'".format(args.pretrained))
        else:
            raise ValueError("=> no pre-trained model found at '{}'".format(
                args.pretrained))

    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        print("=> ERROR: gpu must be assigned")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    assert len(parameters) == 2  # fc.weight, fc.bias
    optimizer = torch.optim.SGD(parameters,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = args.data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(96),  # add crop resize
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])
    transform_eval = transforms.Compose([
        transforms.Resize(128),  # add resize
        transforms.CenterCrop(96),  # add crop
        transforms.ToTensor(),
        normalize
    ])

    train_dataset = CustomDataset(traindir, 'train', transform_train)
    eval_dataset = CustomDataset(traindir, 'val', transform_eval)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    eval_loader = torch.utils.data.DataLoader(eval_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # training code
    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if epoch == 0 or (epoch + 1) % args.eval_per_n_epoch == 0:
            accuracy = evaluate(eval_loader, model, args)
            print(f"=> Epoch: {epoch+1}, accuracy: {accuracy:.4f}")
            # remember best acc and save checkpoint
            is_best = accuracy > best_acc1
            best_acc1 = max(accuracy, best_acc1)
            print(f"=> Epoch: {epoch+1}, isBest? : {is_best}")
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'accuracy': accuracy,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                save_dir=args.checkpoint_dir,
                epoch=(epoch + 1),
                filename=os.path.join(
                    args.checkpoint_dir,
                    'checkpoint_{:03d}.pth.tar'.format(epoch + 1)))