def __init__(self, step_size=4, discrete=True, glimpse_size=(128, 128), out_size=(224, 224)): super(marlod, self).__init__() self.step_size = step_size self.discrete = discrete self.glimpse_size = glimpse_size #(x, y) self.out_size = out_size #(x, y) data = CocoDetection( root='/home/ok18/Datasets/COCO/train2017/', annFile= '/home/ok18/Datasets/COCO/annotations/instances_train2017.json', #transform=transform target_transform=target_trans) self.img, self.target = None, None self.locs = [[0, 0], [0, 0]] self.IoU_region = 0. self.IoU_agent_1 = 0. self.IoU_agent_2 = 0. self.rews = np.asarray([0., 0.]) catIds = data.coco.getCatIds(catNms=['cat']) imgIds = data.coco.getImgIds(catIds=catIds) data.ids = imgIds self.batch_iterator = iter(data) self.data = data
def main(): train_dataset = CocoDetection( root="/datasets/coco/train2017", annFile="/datasets/coco/annotations/instances_train2017.json", ) print_once(f"Training dataset has {len(train_dataset)} examples") eval_dataset = CocoDetection( root="/datasets/coco/val2017", annFile="/datasets/coco/annotations/instances_val2017.json", ) print_once(f"Eval dataset has {len(eval_dataset)} examples")
def create_coco_dataloader(coco_dir, batch_size, num_workers= 1) : train_data = CocoDetection(root = os.path.join(coco_dir, 'images','train2017'), annFile = os.path.join(root, 'annotations/instances_train2017.json') ) val_data = CocoDetection(root = os.path.join(coco_dir, 'images','val2017'), annFile = os.path.join(coco_dir, 'annotations/instances_val2017.json') ) train_loader = DataLoader(val_data, batch_size = batch_size, shuffle = True, num_workers = num_workers) val_loader = DataLoader(val_data, batch_size = batch_size, shuffle = False, num_workers = num_workers) return train_loader, val_loader
def __init__(self, images_dir, annotations_file, transforms, batch_size): CocoDetection.__init__(self, root=images_dir, annFile=annotations_file, transforms=transforms) self._annotated_classnames = { k: v['name'] for k, v in self.coco.cats.items() } self._annotated_to_detected_class_idx = { k: i for i, (k, v) in enumerate(self.coco.cats.items()) } self.batch_size = batch_size self.class_names = [v['name'] for k, v in self.coco.cats.items()]
def make_classify_dataset(base_dir, save_dir, crop_size): """ """ base_dir = Path(base_dir) assert base_dir.is_dir() save_dir = Path(save_dir) assert save_dir.is_dir() class_dict = {1: 'glass', 2: 'metal', 3: 'plastic'} for name in ('valid', 'train'): ds = CocoDetection(root=str(base_dir / name), annFile=str(base_dir / f'{name}.json'), transform=None) for img, labels in ds: for label in labels: x, y, w, h = label['bbox'] cx, cy = x + w // 2, y + h // 2 crop = centered_crop(img, cx, cy, crop_size, crop_size) # Get class name for directory structure class_name = class_dict[label['category_id']] # Use annotation id for image name save_path = save_dir / name / class_name save_path.mkdir(parents=True, exist_ok=True) crop.save(save_path / f'{label["image_id"]}_{label["id"]}.jpg')
def get_dataset(datalake_path): coco_val_2017_root_path = os.path.join(datalake_path, "coco_val_2017", "val2017") coco_val_2017_anno_path = os.path.join(datalake_path, "coco_val_2017", "annotations", "instances_val2017.json") dataset = CocoDetection(root=coco_val_2017_root_path, annFile=coco_val_2017_anno_path) return dataset
def test_classnames_from_coco(): dataset = CocoDetection(root=COCO_IMAGES_DIR, annFile=COCO_ANNOTATIONS_FILE) for k, v in dataset.coco.cats.items(): print(k, v) classnames = {k: v['name'] for k, v in dataset.coco.cats.items()} print(classnames) assert classnames[22] == 'elephant' assert classnames[3] == 'car'
def __init__(self, img_path, ann_path, sup_path, transform=None): self.coco = CocoDetection(img_path, ann_path) self.transform = transform self.support_by_cat = dict() for cat_id in os.listdir(sup_path): cat_folder = os.path.join(sup_path, cat_id) files = [ os.path.join(cat_folder, filename) for filename in os.listdir(cat_folder) ] self.support_by_cat[int(cat_id)] = files
def get_coco(img_dir_path, ann_file_path, transforms, annotated_only, is_segment): CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72] t = [FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask4Seg()] if is_segment \ else [ConvertCocoPolysToMask4Detect()] if transforms is not None: t.append(transforms) transforms = Compose(t) dataset = CocoDetection(img_dir_path, os.path.expanduser(ann_file_path), transforms=transforms) if is_segment \ else CustomCocoDetection(img_dir_path, ann_file_path, transforms=transforms) if annotated_only: dataset = remove_images_without_annotations(dataset) return dataset
def COCO(dataset_dir, grayscale, patch_size): train_transform = trans.Compose([ trans.RandomCrop(patch_size), trans.RandomHorizontalFlip(), trans.RandomVerticalFlip(), trans.RandomGrayscale(), trans.ToTensor(), ]) test_transform = trans.Compose([ trans.CenterCrop(patch_size), trans.transforms.ToTensor(), ]) num_classes = 182 return CocoDetection(root=os.path.join(dataset_dir, 'train2014'), annFile=os.path.join(dataset_dir, 'annotations_trainval2014', 'instances_train2014.json'), transform=train_transform), \ CocoDetection(root=os.path.join(dataset_dir, 'val2014'), annFile=os.path.join(dataset_dir, 'annotations_trainval2014', 'instances_val2014.json'), transform=test_transform), \ num_classes
def generate_support(ann_path, img_path, min_area=5): dataset = CocoDetection(img_path, ann_path) for X, y_list in tqdm.tqdm(dataset): X = np.array(X) for y in y_list: x1, y1, dx, dy = map(int, y['bbox']) if dx * dy < min_area: continue inst = X[y1:y1+dy, x1:x1+dx] cat_id, inst_id = y['category_id'], y['id'] cat_dir = f'{img_path}_supp/{cat_id}' os.makedirs(cat_dir, exist_ok=True) inst_path = os.path.join(cat_dir, str(inst_id) + '.jpg') imsave(inst_path, inst)
def __init__( self, root: str, data_type='train', num_class: int = 91, **kwargs: dict ): self.data_transforms = get_transforms() file_name = 'train2017' if data_type == 'train' else 'val2017' self.dataset = CocoDetection( root=osp.join(root, file_name), annFile=osp.join(root, f'annotations/instances_{file_name}.json'), # transform=data_transforms[data_type], ) """ logger.info(f'Dataset DRY Run: {len(dataset)}') self.dataset = [] for d in tqdm.tqdm(dataset): if len(d[1]) == 0: continue self.dataset.append(d) """ self.data_type = data_type self.num_class = num_class np.random.seed(256)
def __init__(self, images_folder_path, annotation_json, image_transform, input_image_size=(600, 600)): """Constructor function for the PascalVOCDetection class. Given the images folder path, coco-like annotation file for pascal voc and image size, it automatically pads input images, parametrizes associated groundtruth boxes and classes. As, all images are padded to the same size, this gives us a chance to batch input images and associated parametrized groundtruth. Parameters ---------- images_folder_path : string Global or relative path to pascal voc images folder. annotation_json : string Global or relative path to coco-like pascal annotation file (see above for download link). image_transform: torchvision.transforms object for preprocessing Stack of preprocessing functions that are being run on image input_image_size : tuple of ints Size of the all images that are being delivered -- we padd all of them to the same size (see above). """ self.input_size = input_image_size self.images_folder_path = images_folder_path self.annotation_json = annotation_json self.image_transform = image_transform self.anchor_box_manager = AnchorBoxesManager( input_image_size=input_image_size) self.pascal_cocolike_db = CocoDetection(annFile=annotation_json, root=images_folder_path)
def _initTraining(self, learningRate, dataset, useDatabase): # Dataset is DeepFashion2 print("Initiating training of DescriptionExtractor") print("Loading DeepFashion2") from torchvision import transforms from torchvision.datasets import CocoDetection self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format( dataset) self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset) self.useDatabase = useDatabase self.dataset = CocoDetection( self.cocoImgPath, self.annFile, transform=transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.permute(1, 2, 0)), transforms.Lambda(lambda x: (x * 255).byte().numpy()), transforms.Lambda(lambda x: x[:, :, ::-1]) ])) # Init LMDB_helper if useDatabase: self.lmdb = LMDBHelper("a") self.lmdb.verbose = False self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.load_model(topDir + "/models/Sanitizer.pth") self.uvMapper = UVMapper() # PyTorch things self.optimizer = torch.optim.Adam(self.classifier.parameters(), lr=learningRate, amsgrad=True) self.lossFunction = torch.nn.BCEWithLogitsLoss()
from pathlib import Path from torch.utils.data.dataloader import DataLoader from torchvision.models.detection import fasterrcnn_resnet50_fpn from torchvision.datasets import CocoDetection from torch import optim if __name__ == '__main__': model = fasterrcnn_resnet50_fpn(pretrained=True, num_classes=2).to("cuda") root = Path( "/home/vstupakov/DATA/gen3_projects/dataset #1/FasterRCNNv2/training/cocolike_dataset" ) dataset_train = CocoDetection( root=str(root / "train"), annFile=str(root / "annotations/instanses_train.json")) loader = DataLoader(dataset_train, batch_size=4, num_workers=4, pin_memory=True) optimizer = optim.Adam(model.parameters(), lr=1e-5) epoches = 10 for epoch in range(epoches): model.train() for i, (data, target) in enumerate(loader): data, target = data.to("cuda"), target.to("cuda") optimizer.zero_grad() out = model(data)
train_root = os.path.join(args.data_root, 'coco/images/train2017') val_root = os.path.join(args.data_root, 'coco/images/val2017') test_root = os.path.join(args.data_root, 'coco/images/test2017') train_annFile = os.path.join( args.data_root, 'coco/annotations/annotations_trainval2017/captions_train2017.json') val_annFile = os.path.join( args.data_root, 'coco/annotations/annotations_trainval2017/captions_val2017.json') test_annFile = os.path.join( args.data_root, 'coco/annotations/image_info_test2017/image_info_test2017.json') train_data = CocoDetection(root=train_root, annFile=train_annFile, transform=train_transform) train_loader = Data.DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) val_data = CocoDetection(root=val_root, annFile=val_annFile, transform=val_transform) val_loader = Data.DataLoader(dataset=val_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers)
from torchvision.datasets import CocoDetection from torchvision.transforms.functional import to_tensor, to_pil_image from torchvision.models.detection import fasterrcnn_resnet50_fpn from matplotlib import pyplot as plt, patches def prepare_image(img): return to_tensor(img) dataset = CocoDetection( root="C:/Users/emila/val2017", annFile="C:/Users/emila/annotations/instances_val2017.json", transform=prepare_image) model = fasterrcnn_resnet50_fpn(pretrained=True, progress=True) image = dataset[0][0] print(dataset[0]) model.eval() res = model([image]) fig, ax = plt.subplots(1) ax.imshow(to_pil_image(image)) for box, cat_id in zip(res[0]["boxes"], res[0]["labels"]): x1, y1, x2, y2 = box w, h = x2 - x1, y2 - y1 rect = patches.Rectangle((x1, y1), w, h, fill=False, edgecolor='r') category = dataset.coco.loadCats(cat_id.numpy().tolist())[0]["name"]
def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float): super().__init__(path_to_data_dir, mode, image_min_side, image_max_side) path_to_coco_dir = os.path.join(self._path_to_data_dir, 'COCO') path_to_annotations_dir = os.path.join(path_to_coco_dir, 'annotations') path_to_caches_dir = os.path.join('caches', 'coco2017-person', f'{self._mode.value}') path_to_image_ids_pickle = os.path.join(path_to_caches_dir, 'image-ids.pkl') path_to_image_id_dict_pickle = os.path.join(path_to_caches_dir, 'image-id-dict.pkl') if self._mode == COCO2017Person.Mode.TRAIN: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'train2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_train2017.json') elif self._mode == COCO2017Person.Mode.EVAL: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'val2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_val2017.json') else: raise ValueError('invalid mode') coco_dataset = CocoDetection(root=path_to_jpeg_images_dir, annFile=path_to_annotation) if os.path.exists(path_to_image_ids_pickle) and os.path.exists(path_to_image_id_dict_pickle): print('loading cache files...') with open(path_to_image_ids_pickle, 'rb') as f: self._image_ids = pickle.load(f) with open(path_to_image_id_dict_pickle, 'rb') as f: self._image_id_to_annotation_dict = pickle.load(f) else: print('generating cache files...') os.makedirs(path_to_caches_dir, exist_ok=True) self._image_id_to_annotation_dict: Dict[str, COCO2017Person.Annotation] = {} for idx, (image, annotation) in enumerate(tqdm(coco_dataset)): if len(annotation) > 0: image_id = str(annotation[0]['image_id']) # all image_id in annotation are the same annotation = COCO2017Person.Annotation( filename=os.path.join(path_to_jpeg_images_dir, '{:012d}.jpg'.format(int(image_id))), objects=[COCO2017Person.Annotation.Object( bbox=BBox( # `ann['bbox']` is in the format [left, top, width, height] left=ann['bbox'][0], top=ann['bbox'][1], right=ann['bbox'][0] + ann['bbox'][2], bottom=ann['bbox'][1] + ann['bbox'][3] ), label=ann['category_id']) for ann in annotation] ) annotation.objects = [obj for obj in annotation.objects if obj.label in [COCO2017.CATEGORY_TO_LABEL_DICT['person']]] # filtering label should refer to original `COCO2017` dataset if len(annotation.objects) > 0: self._image_id_to_annotation_dict[image_id] = annotation self._image_ids = list(self._image_id_to_annotation_dict.keys()) with open(path_to_image_ids_pickle, 'wb') as f: pickle.dump(self._image_ids, f) with open(path_to_image_id_dict_pickle, 'wb') as f: pickle.dump(self._image_id_to_annotation_dict, f)
from torchvision.datasets import CocoDetection import torch import numpy as np from torch.utils.data import Dataset import torchvision.transforms as T from PIL import Image from detection.utils import xywh2xyxy import os train_data_dir = '/home/jrlees/datasets/COCO/images/train2017/' train_anno_dir = '/home/jrlees/datasets/COCO/annotations/instances_train2017.json' val_data_dir = '/home/jrlees/datasets/COCO/images/val2017/' val_anno_dir = '/home/jrlees/datasets/COCO/annotations/instances_val2017.json' data_train = CocoDetection(root=train_data_dir, annFile=train_anno_dir, transform=T.Compose([T.RandomHorizontalFlip(0.5), T.Resize((640, 640)), T.ToTensor()])) data_val = CocoDetection(root=val_data_dir, annFile=val_anno_dir, transform=T.Compose([T.Resize((640, 640)), T.ToTensor()])) class CocoDataset(Dataset): def __init__(self, image_set='train'): # 这个函数一般是先初始化一些如位置信息以及resize的大小等 if image_set == 'train': self.data = data_train self.data_dir = train_data_dir else: self.data = data_val self.data_dir = val_data_dir def __getitem__(self, item): # 一般是按顺序读取图片
def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float): super().__init__(path_to_data_dir, mode, image_min_side, image_max_side) path_to_coco_dir = os.path.join(self._path_to_data_dir, 'coco') path_to_annotations_dir = os.path.join(path_to_coco_dir, 'annotations') path_to_caches_dir = os.path.join('caches', 'coco2017', f'{self._mode.value}') path_to_image_ids_pickle = os.path.join(path_to_caches_dir, 'image-ids.pkl') path_to_image_id_dict_pickle = os.path.join(path_to_caches_dir, 'image-id-dict.pkl') path_to_image_ratios_pickle = os.path.join(path_to_caches_dir, 'image-ratios.pkl') if self._mode == COCO2017.Mode.TRAIN: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'images/train2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_train2017.json') elif self._mode == COCO2017.Mode.EVAL: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'images/val2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_val2017.json') else: raise ValueError('invalid mode') coco_dataset = CocoDetection(root=path_to_jpeg_images_dir, annFile=path_to_annotation) if os.path.exists(path_to_image_ids_pickle) and os.path.exists( path_to_image_id_dict_pickle): print('loading cache files...') with open(path_to_image_ids_pickle, 'rb') as f: self._image_ids = pickle.load(f) with open(path_to_image_id_dict_pickle, 'rb') as f: self._image_id_to_annotation_dict = pickle.load(f) with open(path_to_image_ratios_pickle, 'rb') as f: self._image_ratios = pickle.load(f) else: print('generating cache files...') os.makedirs(path_to_caches_dir, exist_ok=True) self._image_ids: List[str] = [] self._image_id_to_annotation_dict: Dict[str, COCO2017.Annotation] = {} self._image_ratios = [] for idx, (image, annotation) in enumerate(tqdm(coco_dataset)): if len(annotation) > 0: image_id = str(annotation[0]['image_id'] ) # all image_id in annotation are the same self._image_ids.append(image_id) self._image_id_to_annotation_dict[ image_id] = COCO2017.Annotation( filename=os.path.join( path_to_jpeg_images_dir, '{:012d}.jpg'.format(int(image_id))), objects=[ COCO2017.Annotation.Object( bbox= BBox( # `ann['bbox']` is in the format [left, top, width, height] left=ann['bbox'][0], top=ann['bbox'][1], right=ann['bbox'][0] + ann['bbox'][2], bottom=ann['bbox'][1] + ann['bbox'][3]), label=ann['category_id']) for ann in annotation ]) ratio = float(image.width / image.height) self._image_ratios.append(ratio) with open(path_to_image_ids_pickle, 'wb') as f: pickle.dump(self._image_ids, f) with open(path_to_image_id_dict_pickle, 'wb') as f: pickle.dump(self._image_id_to_annotation_dict, f) with open(path_to_image_ratios_pickle, 'wb') as f: pickle.dump(self.image_ratios, f)
def __init__(self, net, annDir='/Data/Coco/', batch_size=1, train_transforms=None, val_transforms=None, th_count=mu.cpu_count()): if train_transforms is None: train_transforms = augmentation.PairCompose([ augmentation.RandomResizeTransform(), augmentation.RandomHorizontalFlipTransform(), augmentation.RandomCropTransform((416, 416)), augmentation.RandomNoiseTransform(), augmentation.RandomColorJitterTransform(), augmentation.RandomBlurTransform(), augmentation.RandomJPEGcompression(95), augmentation.TargetTransform( prior_box_sizes=net.prior_box_sizes, classes=net.classes, ratios=net.ratios, strides=net.strides), augmentation.OutputTransform() ]) if val_transforms is None: val_transforms = augmentation.PairCompose([ augmentation.PaddTransform(pad_size=2**net.depth), augmentation.TargetTransform( prior_box_sizes=net.prior_box_sizes, classes=net.classes, ratios=net.ratios, strides=net.strides), augmentation.OutputTransform() ]) self.target_to_box_transform = output_transform.TargetTransformToBoxes( prior_box_sizes=net.prior_box_sizes, classes=net.classes, ratios=net.ratios, strides=net.strides) train_dir = os.path.join(annDir, 'train2017') train_ann_file = os.path.join( annDir, 'annotations_trainval2017/annotations/instances_train2017.json') validation_dir = os.path.join(annDir, 'val2017') validation_add_file = os.path.join( annDir, 'annotations_trainval2017/annotations/instances_val2017.json') self.trainset = CocoDetection(root=train_dir, annFile=train_ann_file, transforms=train_transforms) self.validationset = CocoDetection(root=validation_dir, annFile=validation_add_file, transforms=val_transforms) if th_count == 1: self.trainset.ids = self.trainset.ids[:100] self.validationset.ids = self.validationset.ids[:10] self.trainloader = torch.utils.data.DataLoader(self.trainset, batch_size=batch_size, shuffle=True, num_workers=th_count, pin_memory=True) self.validationloader = torch.utils.data.DataLoader( self.validationset, batch_size=1, shuffle=False, num_workers=th_count, pin_memory=True) self.trainloader.cats = net.classes self.validationloader.cats = net.classes
import torch from torch import nn from torchvision.datasets import CocoDetection import cv2 import matplotlib.pyplot as plt import numpy as np testset = CocoDetection("/media/sinclair/datasets/COCO/val2017", "/media/sinclair/datasets/COCO/annotations/instances_val2017.json") def vis_coco_instance(image, label): """ :param image: pil image :param label: COCO's stupid format, a list of dictionaries containing segmentation, class, bbox for every object. """ image, label = testset.__getitem__(534) image = np.array(image) for i in label: [x,y,w,h] = i['bbox'] cv2.rectangle(image, (int(x), int(y)), (int(x+w), int(y+h)), (255,0,0), 5) print("hello") imgplot = plt.imshow(image) plt.show() def build_yolo_input_output(image, label, grid_width, anchor_box_ratios, class_mapping): """ Takes in a COCO instance, and returns two tensors. One is the specified input, the second is the target output. They're based on the anchor boxes as well. :param class_mapping: a list where class_mapping[coco class] = our_class """
def __init__(self, root: str, name: str, transforms: Optional[Compose] = None): data_path = root + '/' + name anns = root + '/annotations/instances_' + name + '.json' self._orig_coco = CocoDetection(data_path, anns) self._transforms = transforms
from pycocotools import mask from pycocotools.coco import COCO import numpy as np from matplotlib import pyplot as plt from matplotlib import patches from tqdm import tqdm import os from PIL import Image cc = COCO('data/MSCOCO/annotations/instances_train2017.json') cats = {c['id']: c['name'] for c in cc.loadCats(cc.getCatIds())} print(cats) for split in ['train', 'val']: coco = CocoDetection( 'data/MSCOCO/images/{}2017'.format(split), 'data/MSCOCO/annotations/instances_{}2017.json'.format(split)) dest = 'data/MSCOCO/imageclassification/{}/'.format(split) if not os.path.exists(dest): os.mkdir(dest) ii = 0 for x, y in tqdm(coco): w, h = x.size x = np.array(x) for _y in y: cat = _y['category_id'] rle = mask.frPyObjects(_y['segmentation'], h, w) mm = mask.toBbox(rle)
class Coco2017Dataset(torch.utils.data.Dataset): def __init__( self, root: str, data_type='train', num_class: int = 91, **kwargs: dict ): self.data_transforms = get_transforms() file_name = 'train2017' if data_type == 'train' else 'val2017' self.dataset = CocoDetection( root=osp.join(root, file_name), annFile=osp.join(root, f'annotations/instances_{file_name}.json'), # transform=data_transforms[data_type], ) """ logger.info(f'Dataset DRY Run: {len(dataset)}') self.dataset = [] for d in tqdm.tqdm(dataset): if len(d[1]) == 0: continue self.dataset.append(d) """ self.data_type = data_type self.num_class = num_class np.random.seed(256) def __getitem__(self, index: int, use_cropped: bool = True): if use_cropped: return self.get_cropped_instance(index) image, targets = self.dataset.__getitem__(index) labels = torch.zeros(self.num_class, dtype=torch.int64) for target in targets: cat_id = target['category_id'] labels[cat_id] = cat_id return image, labels def get_cropped_instance(self, index: int): while True: image, targets = self.dataset.__getitem__(index) if len(targets) > 0: break index = np.random.randint(0, len(self.dataset)) selected = np.random.randint(0, len(targets)) target = targets[selected] category_id = target['category_id'] bbox = np.array(target['bbox'], dtype=np.intp) center = np.average([bbox[:2], bbox[:2] + bbox[2:]], axis=0) new_size = np.array( ( np.random.randint(bbox[2], 2 * np.maximum(bbox[2], 1)), np.random.randint(bbox[3], 2 * np.maximum(bbox[3], 1)), ), ) x1, y1 = np.maximum(center - new_size / 2, [0, 0]).astype(np.intp) x2, y2 = np.minimum(center + new_size / 2, image.size).astype(np.intp) image = image.crop([x1 + 1, y1 + 1, x2 - 1, y2 - 1]) return self.data_transforms[self.data_type](image), category_id def __len__(self): return len(self.dataset)
def train(max_iter, device="cpu"): """Train the network. Args: device: The device to train on.""" wandb.init(project="detector_baseline") # Init model detector = Detector().to(device) wandb.watch(detector) dataset = CocoDetection( root="./dd2419_coco/training", annFile="./dd2419_coco/annotations/training.json", transforms=detector.input_transform, ) dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True) # training params max_iterations = wandb.config.max_iterations = max_iter learning_rate = wandb.config.learning_rate = 1e-4 weight_reg = wandb.config.weight_reg = 1 weight_noobj = wandb.config.weight_noobj = 1 # run name (to easily identify model later) time_string = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") run_name = wandb.config.run_name = "saved_models/det_{}".format(time_string) # init optimizer optimizer = torch.optim.Adam(detector.parameters(), lr=learning_rate) # load test images # these will be evaluated in regular intervals test_images = [] show_test_images = False directory = "./test_images" if not os.path.exists(directory): os.makedirs(directory) for file_name in os.listdir(directory): if file_name.endswith(".jpg"): file_path = os.path.join(directory, file_name) test_image = Image.open(file_path) test_images.append(TF.to_tensor(test_image)) if test_images: test_images = torch.stack(test_images) test_images = test_images.to(device) show_test_images = True print("Training started...") current_iteration = 1 while current_iteration <= max_iterations: for img_batch, target_batch in dataloader: img_batch = img_batch.to(device) target_batch = target_batch.to(device) # run network out = detector(img_batch) # positive / negative indices # (this could be passed from input_transform to avoid recomputation) pos_indices = torch.nonzero(target_batch[:, 4, :, :] == 1, as_tuple=True) neg_indices = torch.nonzero(target_batch[:, 4, :, :] == 0, as_tuple=True) # compute loss reg_mse = nn.functional.mse_loss( out[pos_indices[0], 0:4, pos_indices[1], pos_indices[2]], target_batch[pos_indices[0], 0:4, pos_indices[1], pos_indices[2]], ) pos_mse = nn.functional.mse_loss( out[pos_indices[0], 4, pos_indices[1], pos_indices[2]], target_batch[pos_indices[0], 4, pos_indices[1], pos_indices[2]], ) neg_mse = nn.functional.mse_loss( out[neg_indices[0], 4, neg_indices[1], neg_indices[2]], target_batch[neg_indices[0], 4, neg_indices[1], neg_indices[2]], ) loss = pos_mse + weight_reg * reg_mse + weight_noobj * neg_mse # optimize optimizer.zero_grad() loss.backward() optimizer.step() wandb.log( { "total loss": loss.item(), "loss pos": pos_mse.item(), "loss neg": neg_mse.item(), "loss reg": reg_mse.item(), }, step=current_iteration, ) print( "\rIteration: {}, loss: {}".format(current_iteration, loss.item()), end="", ) # generate visualization every N iterations if current_iteration % 250 == 0 and show_test_images: with torch.no_grad(): out = detector(test_images).cpu() bbs = detector.decode_output(out, 0.5) for i, test_image in enumerate(test_images): figure, ax = plt.subplots(1) plt.imshow(test_image.cpu().permute(1, 2, 0)) plt.imshow( out[i, 4, :, :], interpolation="nearest", extent=(0, 640, 480, 0), alpha=0.7, ) # add bounding boxes utils.add_bounding_boxes(ax, bbs[i]) wandb.log( {"test_img_{i}".format(i=i): figure}, step=current_iteration ) plt.close() current_iteration += 1 if current_iteration > max_iterations: break print("\nTraining completed (max iterations reached)") model_path = "{}.pt".format(run_name) utils.save_model(detector, model_path) if device == "cpu": wandb.save(model_path) print("Model weights saved at {}".format(model_path))
def __init__(self, root, annFile, transforms=None): self.coco = CocoDetection(root, annFile) self.transforms = transforms self.length = len(self.coco)
import torch from torch import nn from torchvision.datasets import CocoDetection # datasets trainset = CocoDetection("/media/sinclair/datasets/COCO/train2017", "/media/sinclair/datasets/COCO/annotations/instances_train2017.json") testset = CocoDetection("/media/sinclair/datasets/COCO/val2017", "/media/sinclair/datasets/COCO/annotations/instances_val2017.json") # dataloaders image = trainset.__getitem__(20)[0] image.show() width, height = image.size label = trainset.__getitem__(20)[1] # in pixel image coordinates, xywh print(width, height) for obj in label: # print(obj) print(obj['bbox']) print(obj['category_id']) # model # optimizer # training loop
def __len__(self): return CocoDetection.__len__(self)
def __init__(self, path_to_jpeg_images_dir: str, path_to_annotations: str, mode: Base.Mode): path_to_data_dir = os.path.dirname(path_to_jpeg_images_dir) super().__init__(path_to_data_dir, mode) path_to_caches_dir = os.path.join('caches', 'tiny-person', f'{self._mode.value}') path_to_image_ids_pickle = os.path.join(path_to_caches_dir, 'image-ids.pkl') path_to_image_id_dict_pickle = os.path.join(path_to_caches_dir, 'image-id-dict.pkl') path_to_image_ratios_pickle = os.path.join(path_to_caches_dir, 'image-ratios.pkl') if self._mode == TinyPerson.Mode.TRAIN: pass elif self._mode == TinyPerson.Mode.EVAL: pass else: raise ValueError('invalid mode') coco_dataset = CocoDetection(root=path_to_jpeg_images_dir, annFile=path_to_annotations) if os.path.exists(path_to_image_ids_pickle) and os.path.exists( path_to_image_id_dict_pickle): print('loading cache files...') with open(path_to_image_ids_pickle, 'rb') as f: self._image_ids = pickle.load(f) with open(path_to_image_id_dict_pickle, 'rb') as f: self._image_id_to_annotation_dict = pickle.load(f) with open(path_to_image_ratios_pickle, 'rb') as f: self._image_ratios = pickle.load(f) else: print('generating cache files...') os.makedirs(path_to_caches_dir, exist_ok=True) self._image_ids: List[int] = [] self._image_id_to_annotation_dict: Dict[ str, TinyPerson.Annotation] = {} self._image_ratios = [] self._class_names = [ coco_dataset.coco.cats[i + 1]['name'] for i in range(len(coco_dataset.coco.cats)) ] for idx, (image, annotation) in enumerate(tqdm(coco_dataset)): if len(annotation) > 0: image_id = annotation[0][ 'image_id'] # all image_id in annotation are the same self._image_ids.append(image_id) img_info = coco_dataset.coco.loadImgs(image_id)[0] self._image_id_to_annotation_dict[ image_id] = TinyPerson.Annotation( filename=os.path.join(path_to_jpeg_images_dir, img_info['file_name']), objects=[ TinyPerson.Annotation.Object( bbox= BBox( # `ann['bbox']` is in the format [left, top, width, height] left=ann['bbox'][0], top=ann['bbox'][1], right=ann['bbox'][0] + ann['bbox'][2], bottom=ann['bbox'][1] + ann['bbox'][3]), label=ann['category_id']) for ann in annotation ], corner=BBox(left=img_info['corner'][0], top=img_info['corner'][1], right=img_info['corner'][2], bottom=img_info['corner'][3])) ratio = float(image.width / image.height) self._image_ratios.append(ratio) with open(path_to_image_ids_pickle, 'wb') as f: pickle.dump(self._image_ids, f) with open(path_to_image_id_dict_pickle, 'wb') as f: pickle.dump(self._image_id_to_annotation_dict, f) with open(path_to_image_ratios_pickle, 'wb') as f: pickle.dump(self.image_ratios, f)