def train(model_name, index_dir, train_config_fp, resume_from_checkpoint, pretrained_model_fp): check_model_name(model_name) train_transform = T.Compose([ RandomStretchAug(min_ratio=0.5, max_ratio=1.5), # RandomCrop((8, 10)), T.RandomInvert(p=0.2), T.RandomApply([T.RandomRotation(degrees=1)], p=0.4), # T.RandomAutocontrast(p=0.05), # T.RandomPosterize(bits=4, p=0.3), # T.RandomAdjustSharpness(sharpness_factor=0.5, p=0.3), # T.RandomEqualize(p=0.3), # T.RandomApply([T.GaussianBlur(kernel_size=3)], p=0.5), NormalizeAug(), # RandomPaddingAug(p=0.5, max_pad_len=72), ]) val_transform = NormalizeAug() train_config = json.load(open(train_config_fp)) data_mod = OcrDataModule( index_dir=index_dir, vocab_fp=train_config['vocab_fp'], img_folder=train_config['img_folder'], train_transforms=train_transform, val_transforms=val_transform, batch_size=train_config['batch_size'], num_workers=train_config['num_workers'], pin_memory=train_config['pin_memory'], ) # train_ds = data_mod.train # for i in range(min(100, len(train_ds))): # visualize_example(train_transform(train_ds[i][0]), 'debugs/train-1-%d' % i) # visualize_example(train_transform(train_ds[i][0]), 'debugs/train-2-%d' % i) # visualize_example(train_transform(train_ds[i][0]), 'debugs/train-3-%d' % i) # val_ds = data_mod.val # for i in range(min(10, len(val_ds))): # visualize_example(val_transform(val_ds[i][0]), 'debugs/val-1-%d' % i) # visualize_example(val_transform(val_ds[i][0]), 'debugs/val-2-%d' % i) # visualize_example(val_transform(val_ds[i][0]), 'debugs/val-2-%d' % i) # return trainer = PlTrainer(train_config, ckpt_fn=['cnocr', 'v%s' % MODEL_VERSION, model_name]) model = gen_model(model_name, data_mod.vocab) logger.info(model) if pretrained_model_fp is not None: load_model_params(model, pretrained_model_fp) trainer.fit(model, datamodule=data_mod, resume_from_checkpoint=resume_from_checkpoint)
# ~~~~~~~~~~~~~~~~~ # The :class:`~torchvision.transforms.RandomResizedCrop` transform # (see also :func:`~torchvision.transforms.functional.resized_crop`) # crops an image at a random location, and then resizes the crop to a given # size. resize_cropper = T.RandomResizedCrop(size=(32, 32)) resized_crops = [resize_cropper(orig_img) for _ in range(4)] plot(resized_crops) #################################### # RandomInvert # ~~~~~~~~~~~~ # The :class:`~torchvision.transforms.RandomInvert` transform # (see also :func:`~torchvision.transforms.functional.invert`) # randomly inverts the colors of the given image. inverter = T.RandomInvert() invertered_imgs = [inverter(orig_img) for _ in range(4)] plot(invertered_imgs) #################################### # RandomPosterize # ~~~~~~~~~~~~~~~ # The :class:`~torchvision.transforms.RandomPosterize` transform # (see also :func:`~torchvision.transforms.functional.posterize`) # randomly posterizes the image by reducing the number of bits # of each color channel. posterizer = T.RandomPosterize(bits=2) posterized_imgs = [posterizer(orig_img) for _ in range(4)] plot(posterized_imgs) ####################################
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(1, os.path.dirname(os.path.abspath(__file__))) EXAMPLE_DIR = Path(__file__).parent.parent / 'docs/examples' INDEX_DIR = Path(__file__).parent.parent / 'data/test' IMAGE_DIR = Path(__file__).parent.parent / 'data/images' from cnocr import gen_model from cnocr.data_utils.aug import NormalizeAug from cnocr.dataset import OcrDataModule from cnocr.trainer import PlTrainer train_transform = transforms.Compose( [ transforms.RandomInvert(p=0.5), transforms.RandomErasing(p=0.05), transforms.RandomRotation(degrees=2), transforms.RandomAutocontrast(p=0.05), NormalizeAug(), ] ) val_transform = NormalizeAug() def test_trainer(): data_mod = OcrDataModule( index_dir=INDEX_DIR, vocab_fp=EXAMPLE_DIR / 'label_cn.txt', img_folder=IMAGE_DIR, train_transforms=train_transform,
def data_augmentation(ToTensor=False, Resize=None, Contrast=None, Equalize=None, HFlip=None, Invert=None, VFlip=None, Rotation=None, Grayscale=None, Perspective=None, Erasing=None, Crop=None): ''' DataAgumentation 2021/03/23 by Mr.w ------------------------------------------------------------- ToTensor : False/True , 注意转为Tensor,通道会放在第一维 Resize : tuple-->(500,500) Contrast : 0-1 -->图像被自动对比度的可能 Equalize : 0-1 -->图像均衡可能性 HFlip : 0-1 --> 图像水平翻转 Invert : 0-1--> 随机翻转 VFlip : 0-1 --> 图像垂直翻转 Rotation : 0-360 --> 随机旋转度数范围, as : 90 , [-90,90] Grayscale : 0-1 --> 随机转换为灰度图像 Perspective : 0-1 --> 随机扭曲图像 Erasing : 0-1 --> 随机擦除 Crop : tuple --> (500,500) ------------------------------------------------------------- return : transforms.Compose(train_transform) --> 方法汇总 ''' #列表导入Compose train_transform = [] if ToTensor == True: trans_totensor = transforms.ToTensor() train_transform.append(trans_totensor) if Resize != None: trans_Rsize = transforms.Resize(Resize) # Resize=(500,500) train_transform.append(trans_Rsize) if Contrast != None: trans_Rcontrast = transforms.RandomAutocontrast(p=Contrast) train_transform.append(trans_Rcontrast) if Equalize != None: trans_REqualize = transforms.RandomEqualize(p=Equalize) train_transform.append(trans_REqualize) if HFlip != None: train_transform.append(transforms.RandomHorizontalFlip(p=HFlip)) if Invert != None: train_transform.append(transforms.RandomInvert(p=Invert)) if VFlip != None: train_transform.append(transforms.RandomVerticalFlip(p=VFlip)) if Rotation != None: train_transform.append( transforms.RandomRotation(Rotation, expand=False, center=None, fill=0, resample=None)) if Grayscale != None: train_transform.append(transforms.RandomGrayscale(p=Grayscale)) if Perspective != None: train_transform.append( transforms.RandomPerspective(distortion_scale=0.5, p=Perspective, fill=0)) if Erasing != None: train_transform.append( transforms.RandomErasing(p=Erasing, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False)) if Crop != None: train_transform.append( transforms.RandomCrop(Crop, padding=None, pad_if_needed=False, fill=0, padding_mode='constant')) return transforms.Compose(train_transform)