def __getitem__(self, index): if self.augment_data == False: img = cv2.resize( cv2.imread(self.train_images + '{}.jpg'.format(self.img_id[index])), (512, 512)) else: img = Image.open(self.train_images + '{}.jpg'.format(self.img_id[index])).resize( (512, 512)) color = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.6, hue=0.4) blur = GaussianBlur((3, 3), sigma=(0.1, 2)) img = color(img) img = blur(img) img = np.array(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img = self.normalize(img) img = img.transpose([2, 0, 1]) heatmap, offset_x, offset_y, object_size_x, object_size_y = generate_heatmap_offset( self.img_id[index], self.dictionnary_labels_per_image) regr = np.zeros((2, 128, 128)) offset = np.zeros((2, 128, 128)) regr[0, :, :] = object_size_x regr[1, :, :] = object_size_y offset[0, :, :] = offset_x offset[1, :, :] = offset_y return img, self.img_id[index], heatmap, regr, offset
def create_blury_images(): path = './images/dog2.jpg' im = Image.open(path) sigma = 2.0 for i in range(20): gb = GaussianBlur(kernel_size=(3 + 2 * i), sigma=sigma) im_blur = gb(im) im_blur.save(f'./images/dog_blur{i:03d}.png')
def low_res_transform(crop_size, upscale_factor): return Compose([ ToPILImage(), # Lambda(randomJPEGCompresss), GaussianBlur(kernel_size=5, sigma=2), # RandomCrop(crop_size, Image.BICUBIC), Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC), ToTensor() ])
def __init__(self, images, objects): super(TextDetectionDataset, self).__init__() self.images = images self.objects = objects self.transforms = Compose([ ColorJitter(), GaussianBlur(kernel_size=5), Resize((300, 300)), ToTensor(), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True vocab = VOCABS[args.vocab] fonts = args.font.split(",") # Load val data generator st = time.time() val_set = CharacterGenerator( vocab=vocab, num_samples=args.val_samples * len(vocab), cache_samples=True, img_transforms=Compose( [ T.Resize((args.input_size, args.input_size)), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ] ), font_family=fonts, ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), ) print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) # Load doctr model model = classification.__dict__[args.arch](pretrained=args.pretrained, num_classes=len(vocab), classes=list(vocab)) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location="cpu") model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError("PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, targe device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() if args.test_only: print("Running evaluation") val_loss, acc = evaluate(model, val_loader, batch_transforms) print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})") return st = time.time() # Load train data generator train_set = CharacterGenerator( vocab=vocab, num_samples=args.train_samples * len(vocab), cache_samples=True, img_transforms=Compose( [ T.Resize((args.input_size, args.input_size)), # Augmentations T.RandomApply(T.ColorInversion(), 0.9), # GaussianNoise T.RandomApply(Grayscale(3), 0.1), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), 0.3), RandomRotation(15, interpolation=InterpolationMode.BILINEAR), ] ), font_family=fonts, ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), ) print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, list(map(vocab.__getitem__, target))) return # Optimizer optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], args.lr, betas=(0.95, 0.99), eps=1e-6, weight_decay=args.weight_decay, ) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler if args.sched == "cosine": scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4) elif args.sched == "onecycle": scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader)) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="character-classification", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "pytorch", "vocab": args.vocab, "scheduler": args.sched, "pretrained": args.pretrained, }, ) # Create loss queue min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb) # Validation loop at the end of each epoch val_loss, acc = evaluate(model, val_loader, batch_transforms) if val_loss < min_loss: print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...") torch.save(model.state_dict(), f"./{exp_name}.pt") min_loss = val_loss mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})") # W&B if args.wb: wandb.log( { "val_loss": val_loss, "acc": acc, } ) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="classification", run_config=args) if args.export_onnx: print("Exporting model to ONNX...") dummy_batch = next(iter(val_loader)) dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0] model_path = export_model_to_onnx(model, exp_name, dummy_input) print(f"Exported model saved in {model_path}")
def main(args): print(args) if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True st = time.time() val_set = DocArtefacts( train=False, download=True, img_transforms=T.Resize((args.input_size, args.input_size)), ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") # Load doctr model model = obj_detection.__dict__[args.arch](pretrained=args.pretrained, num_classes=5) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError( "PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, target device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() # Metrics metric = DetectionMetric(iou_thresh=0.5) if args.test_only: print("Running evaluation") recall, precision, mean_iou = evaluate(model, val_loader, metric, amp=args.amp) print( f"Recall: {recall:.2%} | Precision: {precision:.2%} |IoU: {mean_iou:.2%}" ) return st = time.time() # Load train data generators train_set = DocArtefacts( train=True, download=True, img_transforms=Compose([ T.Resize((args.input_size, args.input_size)), T.RandomApply(T.GaussianNoise(0., 0.25), p=0.5), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), .3), ]), sample_transforms=T.RandomHorizontalFlip(p=0.5), ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") if args.show_samples: images, targets = next(iter(train_loader)) targets = convert_to_abs_coords(targets, images.shape) plot_samples(images, targets, train_set.CLASSES) return # Backbone freezing if args.freeze_backbone: for p in model.backbone.parameters(): p.reguires_grad_(False) # Optimizer optimizer = optim.SGD([p for p in model.parameters() if p.requires_grad], lr=args.lr, weight_decay=args.weight_decay) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler scheduler = StepLR(optimizer, step_size=8, gamma=0.7) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init(name=exp_name, project="object-detection", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "sgd", "framework": "pytorch", "scheduler": "step", "pretrained": args.pretrained, "amp": args.amp, }) mb = master_bar(range(args.epochs)) max_score = 0. for epoch in mb: fit_one_epoch(model, train_loader, optimizer, scheduler, mb, amp=args.amp) # Validation loop at the end of each epoch recall, precision, mean_iou = evaluate(model, val_loader, metric, amp=args.amp) f1_score = 2 * precision * recall / (precision + recall) if ( precision + recall) > 0 else 0. if f1_score > max_score: print( f"Validation metric increased {max_score:.6} --> {f1_score:.6}: saving state..." ) torch.save(model.state_dict(), f"./{exp_name}.pt") max_score = f1_score log_msg = f"Epoch {epoch + 1}/{args.epochs} - " if any(val is None for val in (recall, precision, mean_iou)): log_msg += "Undefined metric value, caused by empty GTs or predictions" else: log_msg += f"Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%}" mb.write(log_msg) # W&B if args.wb: wandb.log({ 'recall': recall, 'precision': precision, 'mean_iou': mean_iou, }) if args.wb: run.finish()
def main(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True # ImageNet stats for now mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] transform = Compose([ ToImageMode("RGB"), Resize(1024), RandomCrop(224), RandomHorizontalFlip(p=0.5), RandomApply([ ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2) ], p=0.8), RandomApply([GaussianBlur((3, 3), (1.5, 1.5))], p=0.1), RandomGrayscale(p=0.2), ToTensor(), Normalize(mean=mean, std=std), ToPatches(16) ]) # Applying the same transform twice will give us # different transformations on the same image, # because the transformation's rng state changes. dataset = ImageDirectory(args.dataset, transform, transform) # TODO: hard coded for now, works on my 2x Titan RTX machine loader = DataLoader(dataset, batch_size=144, num_workers=40, shuffle=True, pin_memory=True, drop_last=True) # We will chop off the final layer anyway, # therefore num_classes doesn't matter here. online = VisionTransformer(num_classes=1, C=3, H=224, W=224, P=16) target = VisionTransformer(num_classes=1, C=3, H=224, W=224, P=16) # Projection heads for both networks #online.final = mlp(768, 4096, 256) #target.final = mlp(768, 4096, 256) online.final = nn.Identity() target.final = nn.Identity() # Target network does not learn on its own. # Gets average of online network's weights. online.train() target.eval() for param in target.parameters(): param.requires_grad = False def update_target(): update(target, online, 0.99) # In addition to projection heads, # The online network has predictor. #predictor = mlp(256, 4096, 256) predictor = mlp(768, 4096, 768) # Move everything to devices online = online.to(device) online = nn.DataParallel(online) predictor = predictor.to(device) predictor = nn.DataParallel(predictor) target = target.to(device) target = nn.DataParallel(target) def criterion(x, y): x = nn.functional.normalize(x, dim=-1) y = nn.functional.normalize(y, dim=-1) return 2 - 2 * (x * y).sum(dim=-1) # Online and predictor learns, target gets assigned moving average of online network's weights. lr = 0.1 epochs = 15 optimizer = torch.optim.SGD(list(online.parameters()) + list(predictor.parameters()), lr=lr, momentum=0.9) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=lr, steps_per_epoch=len(loader), epochs=epochs) scaler = torch.cuda.amp.GradScaler() step = 0 running = 0 for epoch in range(epochs): progress = tqdm(loader, desc=f"Epoch {epoch+1}", unit="batch") for inputs1, inputs2 in progress: assert inputs1.size() == inputs2.size() # Overlap data transfers to gpus, pinned memory inputs1 = inputs1.to(device, non_blocking=True) inputs2 = inputs2.to(device, non_blocking=True) optimizer.zero_grad() with torch.cuda.amp.autocast(): # Target network is in eval mode and does not # require grads, forward no grad ctx to be sure with torch.no_grad(): labels1 = target(inputs1).detach() labels2 = target(inputs2).detach() outputs1 = predictor(online(inputs1)) outputs2 = predictor(online(inputs2)) # Symmetrize the loss, both transformations # go through both networks, one at a time loss = criterion(outputs1, labels2) + criterion( outputs2, labels1) loss = loss.mean() scaler.scale(loss).backward() # Transformers need their nails clipped scaler.unscale_(optimizer) nn.utils.clip_grad_norm_(online.parameters(), 1) nn.utils.clip_grad_norm_(predictor.parameters(), 1) scaler.step(optimizer) scaler.update() scheduler.step() # After training the online network, we transfer # a weighted average of the weights to the target update_target() running += loss.item() * inputs1.size(0) if step % 100 == 0: progress.write(f"loss: {running / 100}") running = 0 step += 1 torch.save(online.state_dict(), f"vt-{epoch + 1:03d}.pth")
def _build_transform_train(cfg, choices, expected_size, normalize): print('Building transform_train') tfm_train = [] print('+ resize to {}'.format(expected_size)) tfm_train += [Resize(cfg.INPUT.SIZE)] if 'random_flip' in choices: print('+ random flip') tfm_train += [RandomHorizontalFlip()] if 'random_translation' in choices: print('+ random translation') tfm_train += [ Random2DTranslation(cfg.INPUT.SIZE[0], cfg.INPUT.SIZE[1]) ] if 'random_crop' in choices: crop_padding = cfg.INPUT.CROP_PADDING print('+ random crop (padding = {})'.format(crop_padding)) tfm_train += [RandomCrop(cfg.INPUT.SIZE, padding=crop_padding)] if 'random_resized_crop' in choices: print('+ random resized crop') tfm_train += [RandomResizedCrop(cfg.INPUT.SIZE)] if 'center_crop' in choices: print('+ center crop (on 1.125x enlarged input)') enlarged_size = [int(x * 1.125) for x in cfg.INPUT.SIZE] tfm_train += [Resize(enlarged_size)] tfm_train += [CenterCrop(cfg.INPUT.SIZE)] if 'imagenet_policy' in choices: print('+ imagenet policy') tfm_train += [ImageNetPolicy()] if 'cifar10_policy' in choices: print('+ cifar10 policy') tfm_train += [CIFAR10Policy()] if 'svhn_policy' in choices: print('+ svhn policy') tfm_train += [SVHNPolicy()] if 'randaugment' in choices: n_ = cfg.INPUT.RANDAUGMENT_N m_ = cfg.INPUT.RANDAUGMENT_M print('+ randaugment (n={}, m={})'.format(n_, m_)) tfm_train += [RandAugment(n_, m_)] if 'randaugment_fixmatch' in choices: n_ = cfg.INPUT.RANDAUGMENT_N print('+ randaugment_fixmatch (n={})'.format(n_)) tfm_train += [RandAugmentFixMatch(n_)] if 'randaugment2' in choices: n_ = cfg.INPUT.RANDAUGMENT_N print('+ randaugment2 (n={})'.format(n_)) tfm_train += [RandAugment2(n_)] if 'colorjitter' in choices: print('+ color jitter') tfm_train += [ ColorJitter(brightness=cfg.INPUT.COLORJITTER_B, contrast=cfg.INPUT.COLORJITTER_C, saturation=cfg.INPUT.COLORJITTER_S, hue=cfg.INPUT.COLORJITTER_H) ] if 'randomgrayscale' in choices: print('+ random gray scale') tfm_train += [RandomGrayscale(p=cfg.INPUT.RGS_P)] if 'gaussian_blur' in choices: print(f'+ gaussian blur (kernel={cfg.INPUT.GB_K})') tfm_train += [ RandomApply([GaussianBlur(cfg.INPUT.GB_K)], p=cfg.INPUT.GB_P) ] print('+ to torch tensor of range [0, 1]') tfm_train += [ToTensor()] if 'cutout' in choices: cutout_n = cfg.INPUT.CUTOUT_N cutout_len = cfg.INPUT.CUTOUT_LEN print('+ cutout (n_holes={}, length={})'.format(cutout_n, cutout_len)) tfm_train += [Cutout(cutout_n, cutout_len)] if 'normalize' in choices: print('+ normalization (mean={}, ' 'std={})'.format(cfg.INPUT.PIXEL_MEAN, cfg.INPUT.PIXEL_STD)) tfm_train += [normalize] if 'gaussian_noise' in choices: print('+ gaussian noise (mean={}, std={})'.format( cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)) tfm_train += [GaussianNoise(cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)] if 'instance_norm' in choices: print('+ instance normalization') tfm_train += [InstanceNormalization()] tfm_train = Compose(tfm_train) return tfm_train