def get_dataset(options): # Choose the embedding network if options.dataset == 'miniImageNet': from torchmeta.datasets import MiniImagenet mean_pix = [ x / 255 for x in [129.37731888, 124.10583864, 112.47758569] ] std_pix = [x / 255 for x in [68.20947949, 65.43124043, 70.45866994]] if options.network == 'ResNet18': transform = Compose([ Resize(224), ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), else: transform = Compose([ ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), if options.network == 'ResNet18': dataset_test = MiniImagenet( "data", num_classes_per_task=options.way, transform=Compose([ Resize(224), ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), target_transform=Categorical(num_classes=options.way), meta_val=True, download=False) else: dataset_test = MiniImagenet( "data", num_classes_per_task=options.way, transform=Compose([ ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), target_transform=Categorical(num_classes=options.way), meta_val=True, download=False) dataset_test = ClassSplitter(dataset_test, shuffle=True, num_train_per_class=options.shot, num_test_per_class=options.query) dataloader_test = BatchMetaDataLoader(dataset_test, batch_size=1, num_workers=options.num_workers) elif options.dataset == 'tieredImageNet': from torchmeta.datasets import TieredImagenet mean_pix = [ x / 255 for x in [129.37731888, 124.10583864, 112.47758569] ] std_pix = [x / 255 for x in [68.20947949, 65.43124043, 70.45866994]] dataset_test = TieredImagenet( "data", num_classes_per_task=options.way, transform=Compose([ ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), target_transform=Categorical(num_classes=options.way), meta_test=True, download=True) dataset_test = ClassSplitter(dataset_test, shuffle=True, num_train_per_class=options.shot, num_test_per_class=options.query) dataloader_test = BatchMetaDataLoader(dataset_test, batch_size=1, num_workers=options.num_workers) elif options.dataset == 'CIFAR_FS': from torchmeta.datasets import CIFARFS mean_pix = [ x / 255.0 for x in [129.37731888, 124.10583864, 112.47758569] ] std_pix = [x / 255.0 for x in [68.20947949, 65.43124043, 70.45866994]] dataset_test = CIFARFS( "data", num_classes_per_task=options.way, transform=Compose([ ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), target_transform=Categorical(num_classes=options.way), meta_test=True, download=True) dataset_test = ClassSplitter(dataset_test, shuffle=True, num_train_per_class=options.shot, num_test_per_class=options.query) dataloader_test = BatchMetaDataLoader(dataset_test, batch_size=1, num_workers=options.num_workers) elif options.dataset == 'FC100': from torchmeta.datasets import FC100 mean_pix = [ x / 255.0 for x in [129.37731888, 124.10583864, 112.47758569] ] std_pix = [x / 255.0 for x in [68.20947949, 65.43124043, 70.45866994]] dataset_test = FC100( "data", num_classes_per_task=options.way, transform=Compose([ ToTensor(), Normalize(mean=mean_pix, std=std_pix), ]), target_transform=Categorical(num_classes=options.way), meta_test=True, download=True) dataset_test = ClassSplitter(dataset_test, shuffle=True, num_train_per_class=options.shot, num_test_per_class=options.query) dataloader_test = BatchMetaDataLoader(dataset_test, batch_size=1, num_workers=options.num_workers) else: print("Cannot recognize the dataset type") assert (False) return dataloader_test
from common.dataset import get_test_data_loader SEED = 12345 DEBUG = True OUTPUT_PATH = "output" dataset_path = Path( "/home/fast_storage/imaterialist-challenge-furniture-2018/") SAMPLE_SUBMISSION_PATH = dataset_path / "sample_submission_randomlabel.csv" TEST_TRANSFORMS = [ RandomResizedCrop(350, scale=(0.7, 1.0), interpolation=3), RandomVerticalFlip(p=0.5), RandomHorizontalFlip(p=0.5), ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ] N_CLASSES = 128 BATCH_SIZE = 24 NUM_WORKERS = 15 TEST_LOADER = get_test_data_loader(dataset_path=dataset_path / "test", test_data_transform=TEST_TRANSFORMS, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, cuda=True) MODEL = ( Path(OUTPUT_PATH) / "training_FurnitureInceptionV4_350_20180427_1512" / "model_FurnitureInceptionV4_350_13_val_loss=0.5497886.pth").as_posix()
df = main_df df['class_id'] = df['target'].map(class_map) df['is_manip'] = 0 df = df[df['target'].notnull()] df['to_rotate'] = 0 return df return None train_transform = Compose([ albu_trans.RandomCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_transform = Compose([ albu_trans.CenterCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def add_args(parser): arg = parser.add_argument arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=4) arg('--n-epochs', type=int, default=30) arg('--lr', type=float, default=0.0001)
EPOCH = 20 BATCH_SIZE = 64 LR = 0.001 MODEL = "senet" GPU_NUMS = 0 MODEL_LIST = [ { "name": "alexnet", "model": alexnet, "pretrained" : False, "transform" : Compose([Resize(256), RandomCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize([0.485, 0.456, -.406],[0.229, 0.224, 0.225]) ]) }, { "name": "vgg16", "model": vgg16, "pretrained" : False, "transform" : ToTensor() }, { "name": "vgg19", "model": vgg19, "pretrained" : False, "transform" : ToTensor() }, {
def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu"): if name not in _MODELS: raise RuntimeError( f"Model {name} not found; available models = {available_models()}") model_path = _download(_MODELS[name]) model = torch.jit.load(model_path, map_location=device).eval() n_px = model.input_resolution.item() # patch the device names device_holder = torch.jit.trace( lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) device_node = [ n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n) ][-1] def patch_device(module): graphs = [module.graph] if hasattr(module, "graph") else [] if hasattr(module, "forward1"): graphs.append(module.forward1.graph) for graph in graphs: for node in graph.findAllNodes("prim::Constant"): if "value" in node.attributeNames() and str( node["value"]).startswith("cuda"): node.copyAttributes(device_node) model.apply(patch_device) patch_device(model.encode_image) patch_device(model.encode_text) # patch dtype to float32 on CPU if device == "cpu": float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] float_node = float_input.node() def patch_float(module): graphs = [module.graph] if hasattr(module, "graph") else [] if hasattr(module, "forward1"): graphs.append(module.forward1.graph) for graph in graphs: for node in graph.findAllNodes("aten::to"): inputs = list(node.inputs()) for i in [ 1, 2 ]: # dtype can be the second or third argument to aten::to() if inputs[i].node()["value"] == 5: inputs[i].node().copyAttributes(float_node) model.apply(patch_float) patch_float(model.encode_image) patch_float(model.encode_text) model.float() transform = Compose([ Resize(n_px, interpolation=Image.BICUBIC), CenterCrop(n_px), lambda image: image.convert("RGB"), ToTensor(), Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), ]) return model, transform
def main(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): print("Using CUDA, benchmarking implementations", file=sys.stderr) torch.backends.cudnn.benchmark = True # ImageNet statistics (because we use pre-trained model) mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] transform = Compose([ ToImageMode("RGB"), Resize(256), CenterCrop(224), ToTensor(), Normalize(mean=mean, std=std) ]) dataset = ImageDirectory(root=args.dataset, transform=transform) dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers) # Binary classifier on top of resnet50 model = resnet50() model.fc = nn.Linear(model.fc.in_features, 2) model = model.to(device) model = nn.DataParallel(model) # Restore trained weights weights = torch.load(str(args.model), map_location=device) model.load_state_dict(weights) model.eval() results = [] with torch.no_grad(): for inputs, paths in tqdm(dataloader, desc="infer", unit="batch", ascii=True): inputs = inputs.to(device) outputs = model(inputs) _, preds = torch.max(outputs, dim=1) preds = preds.data.cpu().numpy() probs = nn.functional.softmax(outputs, dim=1) probs = probs.data.cpu().numpy() for path, pred, prob in zip(paths, preds, probs): result = { "class": pred.item(), "probability": round(prob.max().item(), 3), "path": Path(path).name } results.append(result) JsonIO.save(args.results, results)
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) cuda = model["common"]["cuda"] device = torch.device("cuda" if cuda else "cpu") def map_location(storage, _): return storage.cuda() if cuda else storage.cpu() if cuda and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") num_classes = len(dataset["common"]["classes"]) # https://github.com/pytorch/pytorch/issues/7178 chkpt = torch.load(args.checkpoint, map_location=map_location) net = UNet(num_classes).to(device) net = nn.DataParallel(net) if cuda: torch.backends.cudnn.benchmark = True net.load_state_dict(chkpt) net.eval() mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] transform = Compose([ConvertImageMode(mode="RGB"), ImageToTensor(), Normalize(mean=mean, std=std)]) directory = BufferedSlippyMapDirectory(args.tiles, transform=transform, size=args.tile_size, overlap=args.overlap) loader = DataLoader(directory, batch_size=args.batch_size) # don't track tensors with autograd during prediction with torch.no_grad(): for images, tiles in tqdm(loader, desc="Eval", unit="batch", ascii=True): images = images.to(device) outputs = net(images) # manually compute segmentation mask class probabilities per pixel probs = nn.functional.softmax(outputs, dim=1).data.cpu().numpy() for tile, prob in zip(tiles, probs): x, y, z = list(map(int, tile)) # we predicted on buffered tiles; now get back probs for original image prob = directory.unbuffer(prob) # Quantize the floating point probabilities in [0,1] to [0,255] and store # a single-channel `.png` file with a continuous color palette attached. assert prob.shape[0] == 2, "single channel requires binary model" assert np.allclose(np.sum(prob, axis=0), 1.), "single channel requires probabilities to sum up to one" foreground = prob[1:, :, :] anchors = np.linspace(0, 1, 256) quantized = np.digitize(foreground, anchors).astype(np.uint8) palette = continuous_palette_for_color("pink", 256) out = Image.fromarray(quantized.squeeze(), mode="P") out.putpalette(palette) os.makedirs(os.path.join(args.probs, str(z), str(x)), exist_ok=True) path = os.path.join(args.probs, str(z), str(x), str(y) + ".png") out.save(path, optimize=True)
torch.manual_seed(0) torch.cuda.manual_seed(0) torch.backends.cudnn.deterministic = True device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("PyTorch version:", torch.__version__) print("CUDA version:", torch.version.cuda) print("cuDNN version:", torch.backends.cudnn.version()) print("Using", device) input_size = 256 frames_per_video = int(sys.argv[2]) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize_transform = Normalize(mean, std) facedet = BlazeFace().to(device) facedet.load_weights("./helpers/blazeface.pth") facedet.load_anchors("./helpers/anchors.npy") _ = facedet.train(False) video_reader = VideoReader() video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) ''' Load and initialize models ''' models = [] weigths = [] stack_models = []
transform_train = transforms.Compose([ # RandomApply([cnx_aug_thin_characters()], p=0.2), # RandomApply([cnx_aug_bold_characters()], p=0.4), # cnd_aug_randomResizePadding(imgH, imgW, min_scale, max_scale, fill=fill_color), cnd_aug_resizePadding(imgW, imgH, fill=fill_color), RandomApply([cnd_aug_add_line()], p=0.3), RandomApply([cnx_aug_blur()], p=0.3), ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), RandomApply([RandomAffine(shear=(-20, 20), translate=(0.0, 0.05), degrees=0, # degrees=2, # scale=(0.8, 1), fillcolor=fill_color)], p=0.3) , ToTensor() , Normalize(mean, std) ]) transform_test = transforms.Compose([ # cnd_aug_randomResizePadding(imgH, imgW, min_scale, max_scale, fill=fill_color, train=False), cnd_aug_resizePadding(imgW, imgH, fill=fill_color, train=False), ToTensor(), Normalize(mean, std) ]) parser = argparse.ArgumentParser() parser.add_argument('--root', default=data_dir, help='path to root folder') parser.add_argument('--train', default=train_file, help='path to train set') parser.add_argument('--val', default=val_file, help='path to val set') parser.add_argument('--workers', type=int, help='number of data loading workers', default=workers) parser.add_argument('--batch_size', type=int, default=batch_size, help='input batch size')
def imagenet(root: str, img_size: int = 224, batch_size: int = 32, augment: bool = True, workers: int = 6, splits: Union[str, Tuple[str]] = ('train', 'val'), tiny: bool = False, pin_memory: bool = True, use_cache: bool = False, pre_cache: bool = False) -> Union[DataLoader, List[DataLoader]]: """Data loader for the ImageNet dataset. Args: root: The root directory where the image data is stored. Must contain a `train` and `val` directory with training and validation data respectively. If `tiny` is set to True, it must contain a `tiny` directory. img_size: The size of the image. batch_size: The batch size. augment: Whether to use data augmentation techniques. workers: The number of CPUs to use for when loading the data from disk. splits: Which splits of the data to return. Possible values are `train` and `val`. tiny: Whether to use the `Tiny ImageNet dataset <https://tiny-imagenet.herokuapp.com/>`_ instead of the full-size data. If True, `root` must contain a `tiny` directory with `train` and `val` directories inside. pin_memory: Whether to use the PyTorchs `pin memory` mechanism. use_cache: Whether to cache data in a `Cache` object. pre_cache: Whether to run caching before the first epoch. Returns: A list data loaders of the chosen splits. """ if tiny: root = os.path.join(root, 'tiny') train_dir = os.path.join(root, 'train') test_dir = os.path.join(root, 'val') normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_transform_list = list() if not tiny: val_transform_list.append(Resize(int(img_size * 8 / 7))) val_transform_list.append(CenterCrop(img_size)) val_transform_list.append(ToTensor()) val_transform_list.append(normalize) val_transform = Compose(val_transform_list) train_transform_list = list() if tiny: train_transform_list.append(RandomCrop(img_size, padding=8)) else: train_transform_list.append(RandomResizedCrop(img_size)) train_transform_list.append(RandomHorizontalFlip()) train_transform_list.append(ToTensor()) train_transform_list.append(normalize) train_transform = Compose(train_transform_list) loader_list = list() if 'train' in splits: train_set = ImageFolder(train_dir, train_transform if augment else val_transform) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=pin_memory) loader_list.append(train_loader) if 'val' or 'test' in splits: val_test_set = ImageFolder(test_dir, val_transform) val_set, test_set = torch.utils.data.random_split(val_test_set, [25000, 25000]) if 'test' in splits: if use_cache: test_set = Cashed(test_set, img_size, channels=3) test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=workers, pin_memory=pin_memory) if use_cache and pre_cache: print("Caching") for _ in tqdm(test_loader): pass test_loader.dataset.set_use_cache(True) # test_loader.dataset.pin_memory() loader_list.append(test_loader) if 'val' in splits: if use_cache: val_set = Cashed(val_set, img_size, channels=3) val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=workers, pin_memory=pin_memory) if use_cache and pre_cache: print("Caching") for _ in tqdm(val_loader): pass val_loader.dataset.set_use_cache(True) # val_loader.dataset.pin_memory() loader_list.append(val_loader) if len(loader_list) == 1: return loader_list[0] return loader_list
def gtsrb(root: str, img_size: int = 32, batch_size: int = 32, workers: int = 6, splits: Union[str, Tuple[str]] = ('train', 'val'), pin_memory: bool = True) -> Union[LoaderTypes, Cashed, LoaderLists]: """Data loader for the `German Traffic Sign Recognition Benchmark <http://benchmark.ini.rub.de/?section=gtsrb&subsection=news>`_. Args: root: The root directory where the image data is stored. Must contain a `train`, `val` and `test` directory with training, validation and test data respectively. img_size: The size of the image. batch_size: The batch size. workers: The number of CPUs to use for when loading the data from disk. splits: Which splits of the data to return. Possible values are `train`, `val` and `test`. pin_memory: Whether to use the PyTorchs `pin memory` mechanism. Returns: A list data loaders of the chosen splits. """ train_dir = os.path.join(root, 'train') val_dir = os.path.join(root, 'val') test_dir = os.path.join(root, 'test') normalize = Normalize([0.34038433, 0.3119956, 0.32119358], [0.05087305, 0.05426421, 0.05859348]) if img_size > 32: val_transform = Compose([Resize(int(img_size * 8 / 7)), CenterCrop(img_size), ToTensor(), normalize]) train_transform = Compose([RandomResizedCrop(img_size), RandomAffine(degrees=15, translate=(0.1, 0.1), shear=10), ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), ToTensor(), normalize]) else: val_transform = Compose([Resize(img_size + 10), CenterCrop(img_size), ToTensor(), normalize]) train_transform = Compose([RandomCrop(img_size, padding=4), RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10), ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), ToTensor(), normalize]) loader_list = list() if 'train' in splits: train_set = ImageFolder(train_dir, train_transform) weights = list() for c in range(43): dir_name = f"000{c}" if c > 9 else f"0000{c}" weights.append(len(os.listdir(os.path.join(train_dir, dir_name))[:-1])) weights = 1 / np.array(weights) weights = np.array([weights[t] for t in train_set.targets]) sampler = torch.utils.data.sampler.WeightedRandomSampler(torch.from_numpy(weights).double(), len(weights)) train_loader = DataLoader(train_set, batch_size=batch_size, sampler=sampler, num_workers=workers, pin_memory=pin_memory) loader_list.append(train_loader) if 'val' in splits: val_set = ImageFolder(val_dir, val_transform) if img_size > 32: val_set = Cashed(val_set, img_size, channels=3) val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=workers, pin_memory=pin_memory) for _ in val_loader: pass val_loader.dataset.set_use_cache(True) val_loader.dataset.pin_memory() loader_list.append(val_loader) else: val_set = Memory(val_set, img_size=img_size, channels=3) for _ in val_set: pass val_set.set_use_cache(True) val_set.pin_memory() loader_list.append(val_set) if 'test' in splits: test_set = ImageFolder(test_dir, val_transform) test_set = Memory(test_set, img_size=img_size, channels=3) for _ in test_set: pass test_set.set_use_cache(True) test_set.pin_memory() loader_list.append(test_set) if len(loader_list) == 1: return loader_list[0] return loader_list
self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1)) # 128x1x1 self.fc_layer = nn.Linear(128, 10) # Bx def forward(self, x): x = F.relu(self.layer1(x)) x = F.relu(self.layer2(x)) x = F.relu(self.layer3(x)) x = self.global_avg_pooling(x) x = x.squeeze() # Bx128x1x1 -> Bx128 x = self.fc_layer(x) return x transforms = Compose([ToTensor(), # -> [0,1] Normalize(mean=[0.5], std=[0.5])]) # -> [-1,1] dataset = MNIST(root='.', download=True, transform=transforms, train=True) data_loader = DataLoader(dataset=dataset, batch_size=64, shuffle=True) model = CNN() criterion = nn.CrossEntropyLoss() # loss function optim = torch.optim.Adam(model.parameters(), lr=0.01) list_loss = list() for epoch in range(10): for input, label in tqdm(data_loader): results = model(input)
def __init__(self, file_path, imageSize): self.df = pd.read_csv(file_path, delim_whitespace=True, header=None) self.transform = Compose([Resize(imageSize), RandomCrop(imageSize), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
def __init__(self, data_dir, imageSize): self.data_files = os.listdir(data_dir) self.transform = Compose([Resize(imageSize), RandomCrop(imageSize), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.data_files.sort() self.root = data_dir self.imageSize = imageSize
tmpstr = tmpstr + ')' return tmpstr LEARNING_RATE = 0.01 NEPOCHS = 30 BATCH_SIZE = 32 HIDDEN_SIZE = 32 NUM_LAYERS = 1 inputs = torch.from_numpy(np.abs(data)).type(FloatTensor) targets = Variable(torch.from_numpy(targets_onehot).type(LongTensor)) mean = [torch.mean(inputs)] # [-0.5329] std = [torch.std(inputs)] # [1302.4]# torch.std(inputs) norm = Normalize(mean, std) inputs = norm(inputs.permute(2, 0, 1)).permute(1, 2, 0) data_shape = inputs.shape # inputs.size() rand_seed = 1729 train, val, test = get_split_ind(rand_seed, data_shape, split_frac) train_data = inputs[train, :, :] val_data = inputs[val, :, :] test_data = inputs[test, :, :] train_label = targets[train, :] val_label = targets[val, :] test_label = targets[test, :] # variable->np error as variable stores history of the object and np has no provision # variable.data-> tensor->.numpy() gives array this can only be done on CPU so use .cpu() train_data_shape = train_data.size() # print np.histogram(train_label.data.cpu().numpy(), class_num)[0], np.histogram(test_label.data.cpu().numpy(), class_num)[0], np.unique(train_label.data.cpu().numpy())
def create_split_loaders(batch_size, p_val=0.1, p_test=0.2, shuffle=True, show_sample=False, extras={}): """ Creates the DataLoader objects for the training, validation, and test sets. Params: ------- - imgs_dir: directory containing the image files - labels_dir: directory containing the label files - batch_size: (int) mini-batch size to load at a time - seed: (int) Seed for random generator (use for testing/reproducibility) - transform: A torchvision.transforms object - transformations to apply to each image (Can be "transforms.Compose([transforms])") - p_val: (float) Percent (as decimal) of dataset to use for validation - p_test: (float) Percent (as decimal) of the dataset to split for testing - shuffle: (bool) Indicate whether to shuffle the dataset before splitting - show_sample: (bool) Plot a mini-example as a grid of the dataset - extras: (dict) If CUDA/GPU computing is supported, contains: - num_workers: (int) Number of subprocesses to use while loading the dataset - pin_memory: (bool) For use with CUDA - copy tensors into pinned memory (set to True if using a GPU) Otherwise, extras is an empty dict. Returns: -------- - train_loader: (DataLoader) The iterator for the training set - val_loader: (DataLoader) The iterator for the validation set - test_loader: (DataLoader) The iterator for the test set """ tf = Compose([ Resize((416,416)), # transforms.RandomHorizontalFlip(), # transforms.RandomVerticalFlip(), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset = PascalVOC2012Dataset(mode='trainval', transforms=tf) # Dimensions and indices of training set dataset_size = dataset.__len__() all_indices = list(range(dataset_size)) # Shuffle dataset before dividing into training & test sets if shuffle: np.random.seed(15) np.random.shuffle(all_indices) # Create the validation split from the full dataset val_split = int(np.floor(p_val * dataset_size)) train_ind, val_ind = all_indices[val_split :], all_indices[: val_split] # Separate a test split from the training dataset test_split = int(np.floor(p_test * len(train_ind))) train_ind, test_ind = train_ind[test_split :], train_ind[: test_split] # Use the SubsetRandomSampler as the iterator for each subset sample_train = SubsetRandomSampler(train_ind) sample_test = SubsetRandomSampler(test_ind) sample_val = SubsetRandomSampler(val_ind) num_workers = 32 pin_memory = False # If CUDA is available if extras: num_workers = extras["num_workers"] pin_memory = extras["pin_memory"] # Define the training, test, & validation DataLoaders train_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_train, num_workers=num_workers, pin_memory=pin_memory) ## set num_workers to 0 test_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_test, num_workers=0, pin_memory=pin_memory) val_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_val, num_workers=num_workers, pin_memory=pin_memory) # Return the training, validation, test DataLoader objects return train_loader, val_loader, test_loader
def perception(self, fake_imgs, real_imgs): normalizer = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) fake_imgs = normalizer(fake_imgs) real_imgs = normalizer(real_imgs) return F.l1_loss(self.loss_network(fake_imgs), self.loss_network(real_imgs))
def precompute_image_feat(mode="train"): batchsize = 3 path = os.path.join("/media/datas1/dataset/clevr/CLEVR_v1.0/images/", mode) path_to_feat = "/media/datas2/precomputed/clevr_res101_np" path_to_feat = os.path.join(path_to_feat, mode) if not os.path.exists(path_to_feat): os.mkdir(path_to_feat) model = models.resnet101(pretrained=True) modules = list(model.children())[:-3] model = nn.Sequential(*modules) model = model.eval() if USE_CUDA: model = model.cuda() mean = torch.FloatTensor([0.485, 0.456, 0.406]) std = torch.FloatTensor([0.229, 0.224, 0.225]) transform = Compose( [Resize([224, 224]), ToTensor(), Normalize(mean=mean, std=std)]) path_list = [] img_list = [] for img_path in tqdm.tqdm(glob.glob(path + "/*.png")): # Compute path and name of the image img_name_raw = os.path.basename(img_path) img_name_raw_wo_ext, ext = os.path.splitext(img_name_raw) feat_path = os.path.join(path_to_feat, img_name_raw_wo_ext + ".npy") # if os.path.exists(feat_path): # continue # Load and transform image img = Image.open(img_path, mode="r") img = img.convert('RGB') img = transform(img).unsqueeze(0) # Store in list images and his path path_list.append(feat_path) img_list.append(img) assert len(path_list) == len(img_list) # if you reach batch limit, compute forward pass and store if len(path_list) == batchsize: batch = torch.cat(img_list, dim=0) batch = Variable(batch.type(FloatTensor), volatile=True) feats = model.forward(batch).data.cpu() # save each image at the right path for num_in_batch, img_save_path in enumerate(path_list): feat = feats[num_in_batch].numpy() np.save(img_save_path, feat) # clean after mess to redo path_list = [] img_list = [] # Because you can't exactly fall on good size of batches if len(path_list) > 0: assert len(path_list) == len(img_list) batch = torch.cat(img_list, dim=0) batch = Variable(batch.type(FloatTensor), volatile=True) feats = model.forward(batch).data.cpu() # save each image at the right path for num_in_batch, img_save_path in enumerate(path_list): feat = feats[num_in_batch].numpy() np.save(img_save_path, feat)
import torch import torchvision from torchvision.transforms import ToTensor, Normalize, Compose from torchvision.datasets import MNIST torch.manual_seed(42) mnist = MNIST(root='../MNIST_Logistic_Regression/data/', train=True, download=True, transform=Compose( [ToTensor(), Normalize(mean=(0.5, ), std=(0.5, ))])) def denorm(x): out = (x + 1) / 2 return out.clamp(0, 1) from torch.utils.data import DataLoader batch_size = 100 data_loader = DataLoader(mnist, batch_size, shuffle=True) # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) image_size = 784 hidden_size = 256
def normalize(stats: Tuple = IMAGENET_STATS) -> Normalize: return Normalize(*stats)
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser( (ModelArguments, DataTrainingArguments, CustomTrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses( ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) log_level = training_args.get_process_log_level() logger.setLevel(log_level) transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. last_checkpoint = None if os.path.isdir( training_args.output_dir ) and training_args.do_train and not training_args.overwrite_output_dir: last_checkpoint = get_last_checkpoint(training_args.output_dir) if last_checkpoint is None and len(os.listdir( training_args.output_dir)) > 0: raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. " "Use --overwrite_output_dir to overcome.") elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: logger.info( f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." ) # Initialize our dataset. ds = load_dataset( data_args.dataset_name, data_args.dataset_config_name, data_files=data_args.data_files, cache_dir=model_args.cache_dir, ) # If we don't have a validation split, split off a percentage of train as validation. data_args.train_val_split = None if "validation" in ds.keys( ) else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = ds["train"].train_test_split(data_args.train_val_split) ds["train"] = split["train"] ds["validation"] = split["test"] # Load pretrained model and feature extractor # # Distributed training: # The .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, } if model_args.config_name: config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs) elif model_args.model_name_or_path: config = ViTMAEConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) else: config = ViTMAEConfig() logger.warning( "You are instantiating a new config instance from scratch.") if model_args.config_overrides is not None: logger.info(f"Overriding config: {model_args.config_overrides}") config.update_from_string(model_args.config_overrides) logger.info(f"New config: {config}") # adapt config config.update({ "mask_ratio": model_args.mask_ratio, "norm_pix_loss": model_args.norm_pix_loss, }) # create feature extractor if model_args.feature_extractor_name: feature_extractor = ViTFeatureExtractor.from_pretrained( model_args.feature_extractor_name, **config_kwargs) elif model_args.model_name_or_path: feature_extractor = ViTFeatureExtractor.from_pretrained( model_args.model_name_or_path, **config_kwargs) else: feature_extractor = ViTFeatureExtractor() # create model if model_args.model_name_or_path: model = ViTMAEForPreTraining.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) else: logger.info("Training new model from scratch") model = ViTMAEForPreTraining(config) if training_args.do_train: column_names = ds["train"].column_names else: column_names = ds["validation"].column_names if data_args.image_column_name is not None: image_column_name = data_args.image_column_name elif "image" in column_names: image_column_name = "image" elif "img" in column_names: image_column_name = "img" else: image_column_name = column_names[0] # transformations as done in original MAE paper # source: https://github.com/facebookresearch/mae/blob/main/main_pretrain.py transforms = Compose([ Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), RandomResizedCrop(feature_extractor.size, scale=(0.2, 1.0), interpolation=InterpolationMode.BICUBIC), RandomHorizontalFlip(), ToTensor(), Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std), ]) def preprocess_images(examples): """Preprocess a batch of images by applying transforms.""" examples["pixel_values"] = [ transforms(image) for image in examples[image_column_name] ] return examples if training_args.do_train: if "train" not in ds: raise ValueError("--do_train requires a train dataset") if data_args.max_train_samples is not None: ds["train"] = ds["train"].shuffle(seed=training_args.seed).select( range(data_args.max_train_samples)) # Set the training transforms ds["train"].set_transform(preprocess_images) if training_args.do_eval: if "validation" not in ds: raise ValueError("--do_eval requires a validation dataset") if data_args.max_eval_samples is not None: ds["validation"] = (ds["validation"].shuffle( seed=training_args.seed).select( range(data_args.max_eval_samples))) # Set the validation transforms ds["validation"].set_transform(preprocess_images) # Compute absolute learning rate total_train_batch_size = (training_args.train_batch_size * training_args.gradient_accumulation_steps * training_args.world_size) if training_args.base_learning_rate is not None: training_args.learning_rate = training_args.base_learning_rate * total_train_batch_size / 256 # Initialize our trainer trainer = Trainer( model=model, args=training_args, train_dataset=ds["train"] if training_args.do_train else None, eval_dataset=ds["validation"] if training_args.do_eval else None, tokenizer=feature_extractor, data_collator=collate_fn, ) # Training if training_args.do_train: checkpoint = None if training_args.resume_from_checkpoint is not None: checkpoint = training_args.resume_from_checkpoint elif last_checkpoint is not None: checkpoint = last_checkpoint train_result = trainer.train(resume_from_checkpoint=checkpoint) trainer.save_model() trainer.log_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics) trainer.save_state() # Evaluation if training_args.do_eval: metrics = trainer.evaluate() trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) # Write model card and (optionally) push to hub kwargs = { "tasks": "masked-auto-encoding", "dataset": data_args.dataset_name, "tags": ["masked-auto-encoding"], } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) else: trainer.create_model_card(**kwargs)
def __init__(self, obj_detect_weights_path, tracktor_config_path, obj_detect_config_path): super(DeepMOT, self).__init__("DeepMOT") normalize_mean = [0.485, 0.456, 0.406] normalize_std = [0.229, 0.224, 0.225] self.transforms = Compose( [ToTensor(), Normalize(normalize_mean, normalize_std)]) with open(tracktor_config_path) as config_file: tracktor = yaml.load(config_file)["tracktor"] # set all seeds torch.manual_seed(tracktor["seed"]) torch.cuda.manual_seed(tracktor["seed"]) np.random.seed(tracktor["seed"]) torch.backends.cudnn.deterministic = True ########################## # Initialize the modules # ########################## # object detection if tracktor["network"].startswith("fpn"): # FPN from src.tracktor.fpn import FPN from src.fpn.fpn.model.utils import config config.cfg.TRAIN.USE_FLIPPED = False config.cfg.CUDA = True config.cfg.TRAIN.USE_FLIPPED = False checkpoint = torch.load(obj_detect_weights_path) if "pooling_mode" in checkpoint.keys(): config.cfg.POOLING_MODE = checkpoint["pooling_mode"] else: config.cfg.POOLING_MODE = "align" set_cfgs = [ "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] config.cfg_from_file(obj_detect_config_path) config.cfg_from_list(set_cfgs) if "fpn_1_12.pth" in obj_detect_weights_path: classes = ( "__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ) else: classes = ("__background__", "pedestrian") obj_detect = FPN(classes, 101, pretrained=False) obj_detect.create_architecture() if "model" in checkpoint.keys(): model_dcit = obj_detect.state_dict() model_dcit.update(checkpoint["model"]) obj_detect.load_state_dict(model_dcit) # obj_detect.load_state_dict(checkpoint['model']) # obj_detect.load_state_dict(checkpoint['model']) else: # pick the reid branch model_dcit = obj_detect.state_dict() model_dcit.update(checkpoint) obj_detect.load_state_dict(model_dcit) else: raise NotImplementedError( f"Object detector type not known: {tracktor['network']}") obj_detect.eval() obj_detect.cuda() # tracktor self.tracker = Tracker(obj_detect, None, tracktor["tracker"])
wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage = DeepImage(pretrained=True) # transforms mean = [0.406, 0.456, 0.485] # BGR std = [0.225, 0.224, 0.229] # BGR transforms1 = [ToTensor, Normalize(mean=mean, std=std)] transforms2 = [Normalize(mean=mean, std=std)] deephead_ds = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4)) deephead_dt = nn.Sequential(nn.Linear(64, 8), nn.Linear(8, 4)) deephead_di = nn.Sequential(nn.Linear(512, 8), nn.Linear(8, 4)) # ############################################################################# # Test that runs many possible scenarios of data inputs I can think off. # Surely users will input something unexpected # ############################################################################# @pytest.mark.parametrize( "X_wide, X_tab, X_text, X_img, X_train, X_val, target, val_split, transforms", [
if not buffer: break output.write(buffer) loop.update(len(buffer)) if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: raise RuntimeError( f"Model has been downloaded but the SHA256 checksum does not not match" ) return download_target normalize_image = Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)) def load(device=("cuda" if torch.cuda.is_available() else "cpu")): model_path = _download(MODEL_PATH) model = torch.jit.load(model_path, map_location=device).eval() n_px = model.input_resolution.item() transform = Compose([ Resize(n_px, interpolation=Image.BICUBIC), CenterCrop(n_px), lambda image: image.convert("RGB"), ToTensor(), normalize_image, ])
except BaseException as e: print( "The visdom experienced an exception while running: {}\n" "The demo displays up-to-date functionality with the GitHub version, " "which may not yet be pushed to pip. Please upgrade using " "`pip install -e .` or `easy_install .`\n" "If this does not resolve the problem, please open an issue on " "our GitHub.".format(repr(e))) #=============================================================================== # Model Setup #=============================================================================== cuda_predicate = torch.cuda.is_available() criterion = nn.CrossEntropyLoss() transformation = Compose([ToTensor(), Normalize(mean=[0.2916], std=[0.2589])]) model = None if (cuda_predicate == True): print("USING GPU") model = PETmodel().cuda() else: print("USING CPU") model = PETmodel() optimizer = optim.Adam(model.parameters(), lr=0.01) test_data = PETDataset( 'D:/ML_FPGA_compton_PET/image_compton_small_module_1mm_pitch', transform=transformation, test=True,
def get_train_eval_loaders(path, batch_size=256): """Setup the dataflow: - load CIFAR100 train and test datasets - setup train/test image transforms - horizontally flipped randomly and augmented using cutout. - each mini-batch contained 256 examples - setup train/test data loaders Returns: train_loader, test_loader, eval_train_loader """ train_transform = Compose([ Pad(4), RandomCrop(32), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), RandomErasing(), ]) test_transform = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CIFAR100(root=path, train=True, transform=train_transform, download=True) test_dataset = CIFAR100(root=path, train=False, transform=test_transform, download=False) train_eval_indices = [ random.randint(0, len(train_dataset) - 1) for i in range(len(test_dataset)) ] train_eval_dataset = Subset(train_dataset, train_eval_indices) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=12, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) eval_train_loader = DataLoader(train_eval_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) return train_loader, test_loader, eval_train_loader
parser_train.add_argument('--exp', default='default') return parser.parse_args() NUM_CHANNELS = 3 NUM_CLASSES = 2 color_transform = Colorize() image_transform = ToPILImage() input_transform = Compose([ Scale(256), CenterCrop(256), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose( [Scale(256), CenterCrop(256), ToLabel(), Relabel(255, 1)]) def get_model(): Net = FCN8 model = Net(NUM_CLASSES, './vgg_16.pth') return model def train(opt, model, use_cuda): model.train()
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0') DIR_ANALYSIS = './SGAN/checkpoints/Analysis' DIR_IMAGE = './SGAN/checkpoints/Image/Training' DIR_MODEL = './SGAN/checkpoints/Model' EPOCHS = 25 ITER_DISPLAY = 100 ITER_REPORT = 10 LATENT_DIM = 100 LR = 2e-4 N_D_STEP = 1 os.makedirs(DIR_ANALYSIS, exist_ok=True) os.makedirs(DIR_IMAGE, exist_ok=True) os.makedirs(DIR_MODEL, exist_ok=True) transforms = Compose([ToTensor(), Normalize(mean=[0.5], std=[0.5])]) dataset = MNIST(root='./datasets', train=True, transform=transforms, download=True) data_loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) D = Discriminator(cnn=CNN).apply(weights_init).to(DEVICE) G = Generator(cnn=CNN).apply(weights_init).to(DEVICE) print(D, G) CELoss = nn.CrossEntropyLoss() BCELoss = nn.BCELoss()
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mim", model_args, data_args) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) log_level = training_args.get_process_log_level() logger.setLevel(log_level) transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. last_checkpoint = None if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: last_checkpoint = get_last_checkpoint(training_args.output_dir) if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. " "Use --overwrite_output_dir to overcome." ) elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: logger.info( f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." ) # Initialize our dataset. ds = load_dataset( data_args.dataset_name, data_args.dataset_config_name, data_files=data_args.data_files, cache_dir=model_args.cache_dir, use_auth_token=True if model_args.use_auth_token else None, ) # If we don't have a validation split, split off a percentage of train as validation. data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = ds["train"].train_test_split(data_args.train_val_split) ds["train"] = split["train"] ds["validation"] = split["test"] # Create config # Distributed training: # The .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, } if model_args.config_name_or_path: config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs) elif model_args.model_name_or_path: config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) else: config = CONFIG_MAPPING[model_args.model_type]() logger.warning("You are instantiating a new config instance from scratch.") if model_args.config_overrides is not None: logger.info(f"Overriding config: {model_args.config_overrides}") config.update_from_string(model_args.config_overrides) logger.info(f"New config: {config}") # make sure the decoder_type is "simmim" (only relevant for BEiT) if hasattr(config, "decoder_type"): config.decoder_type = "simmim" # adapt config model_args.image_size = model_args.image_size if model_args.image_size is not None else config.image_size model_args.patch_size = model_args.patch_size if model_args.patch_size is not None else config.patch_size model_args.encoder_stride = ( model_args.encoder_stride if model_args.encoder_stride is not None else config.encoder_stride ) config.update( { "image_size": model_args.image_size, "patch_size": model_args.patch_size, "encoder_stride": model_args.encoder_stride, } ) # create feature extractor if model_args.feature_extractor_name: feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.feature_extractor_name, **config_kwargs) elif model_args.model_name_or_path: feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.model_name_or_path, **config_kwargs) else: FEATURE_EXTRACTOR_TYPES = { conf.model_type: feature_extractor_class for conf, feature_extractor_class in FEATURE_EXTRACTOR_MAPPING.items() } feature_extractor = FEATURE_EXTRACTOR_TYPES[model_args.model_type]() # create model if model_args.model_name_or_path: model = AutoModelForMaskedImageModeling.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) else: logger.info("Training new model from scratch") model = AutoModelForMaskedImageModeling.from_config(config) if training_args.do_train: column_names = ds["train"].column_names else: column_names = ds["validation"].column_names if data_args.image_column_name is not None: image_column_name = data_args.image_column_name elif "image" in column_names: image_column_name = "image" elif "img" in column_names: image_column_name = "img" else: image_column_name = column_names[0] # transformations as done in original SimMIM paper # source: https://github.com/microsoft/SimMIM/blob/main/data/data_simmim.py transforms = Compose( [ Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), RandomResizedCrop(model_args.image_size, scale=(0.67, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)), RandomHorizontalFlip(), ToTensor(), Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std), ] ) # create mask generator mask_generator = MaskGenerator( input_size=model_args.image_size, mask_patch_size=data_args.mask_patch_size, model_patch_size=model_args.patch_size, mask_ratio=data_args.mask_ratio, ) def preprocess_images(examples): """Preprocess a batch of images by applying transforms + creating a corresponding mask, indicating which patches to mask.""" examples["pixel_values"] = [transforms(image) for image in examples[image_column_name]] examples["mask"] = [mask_generator() for i in range(len(examples[image_column_name]))] return examples if training_args.do_train: if "train" not in ds: raise ValueError("--do_train requires a train dataset") if data_args.max_train_samples is not None: ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples)) # Set the training transforms ds["train"].set_transform(preprocess_images) if training_args.do_eval: if "validation" not in ds: raise ValueError("--do_eval requires a validation dataset") if data_args.max_eval_samples is not None: ds["validation"] = ( ds["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples)) ) # Set the validation transforms ds["validation"].set_transform(preprocess_images) # Initialize our trainer trainer = Trainer( model=model, args=training_args, train_dataset=ds["train"] if training_args.do_train else None, eval_dataset=ds["validation"] if training_args.do_eval else None, tokenizer=feature_extractor, data_collator=collate_fn, ) # Training if training_args.do_train: checkpoint = None if training_args.resume_from_checkpoint is not None: checkpoint = training_args.resume_from_checkpoint elif last_checkpoint is not None: checkpoint = last_checkpoint train_result = trainer.train(resume_from_checkpoint=checkpoint) trainer.save_model() trainer.log_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics) trainer.save_state() # Evaluation if training_args.do_eval: metrics = trainer.evaluate() trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) # Write model card and (optionally) push to hub kwargs = { "finetuned_from": model_args.model_name_or_path, "tasks": "masked-image-modeling", "dataset": data_args.dataset_name, "tags": ["masked-image-modeling"], } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) else: trainer.create_model_card(**kwargs)
def __init__(self, path): self.transform = ToTensor() self.norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) self.imgs = [imread(x) for x in glob(os.path.join(path,'*.png'))]