Exemplos de random_crop em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: dataloaders.image_transforms

Método / Função: random_crop

Exemplos em hotexamples.com: 2

random_crop em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de dataloaders.image_transforms.random_crop em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def __getitem__(self, index): """ Args: index (int): Index Returns: entry dict """ img_id = self.ids[index] path = self.coco.loadImgs(img_id)[0]['file_name'] image_unpadded = Image.open(os.path.join(self.root, path)).convert('RGB') ann_ids = self.coco.getAnnIds(imgIds=img_id) anns = self.coco.loadAnns(ann_ids) gt_classes = np.array([self.id_to_ind[x['category_id']] for x in anns], dtype=np.int64) if np.any(gt_classes >= len(self.ind_to_classes)): raise ValueError("OH NO {}".format(index)) if len(anns) == 0: raise ValueError("Annotations should not be empty") # gt_boxes = np.array((0, 4), dtype=np.float32) # else: gt_boxes = np.array([x['bbox'] for x in anns], dtype=np.float32) if np.any(gt_boxes[:, [0, 1]] < 0): raise ValueError("GT boxes empty columns") if np.any(gt_boxes[:, [2, 3]] < 0): raise ValueError("GT boxes empty h/w") gt_boxes[:, [2, 3]] += gt_boxes[:, [0, 1]] # Rescale so that the boxes are at BOX_SCALE if self.is_train: image_unpadded, gt_boxes = random_crop( image_unpadded, gt_boxes * BOX_SCALE / max(image_unpadded.size), BOX_SCALE, round_boxes=False, ) else: # Seems a bit silly because we won't be using GT boxes then but whatever gt_boxes = gt_boxes * BOX_SCALE / max(image_unpadded.size) w, h = image_unpadded.size box_scale_factor = BOX_SCALE / max(w, h) # Optionally flip the image if we're doing training flipped = self.is_train and np.random.random() > 0.5 if flipped: scaled_w = int(box_scale_factor * float(w)) image_unpadded = image_unpadded.transpose(Image.FLIP_LEFT_RIGHT) gt_boxes[:, [0, 2]] = scaled_w - gt_boxes[:, [2, 0]] img_scale_factor = IM_SCALE / max(w, h) if h > w: im_size = (IM_SCALE, int(w * img_scale_factor), img_scale_factor) elif h < w: im_size = (int(h * img_scale_factor), IM_SCALE, img_scale_factor) else: im_size = (IM_SCALE, IM_SCALE, img_scale_factor) entry = { 'img': self.transform_pipeline(image_unpadded), 'img_size': im_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes, 'scale': IM_SCALE / BOX_SCALE, 'index': index, 'image_id': img_id, 'flipped': flipped, 'fn': path, } return entry

Exemplo n.º 2

0

Exibir arquivo

def __getitem__(self, index): """ Get the pixels of an image, and a random synthetic scene graph for that image constructed on-the-fly from its COCO object annotations. We assume that the image will have height H, width W, C channels; there will be O object annotations, each of which will have both a bounding box and a segmentation mask of shape (M, M). There will be T triples in the scene graph. Returns a tuple of: - image: FloatTensor of shape (C, H, W) - objs: LongTensor of shape (O,) - boxes: FloatTensor of shape (O, 4) giving boxes for objects in (x0, y0, x1, y1) format, in a [0, 1] coordinate system - masks: LongTensor of shape (O, M, M) giving segmentation masks for objects, where 0 is background and 1 is object. - triples: LongTensor of shape (T, 3) where triples[t] = [i, p, j] means that (objs[i], p, objs[j]) is a triple. """ image_id = self.ids[index] filename = self.image_id_to_filename[image_id] image_path = os.path.join(self.image_dir, filename) with open(image_path, 'rb') as f: with PIL.Image.open(f) as image: WW, HH = image.size # image = self.transform(image.convert('RGB')) image_unpadded = image.convert('RGB') # H, W = self.image_size objs, boxes, masks = [], [], [] for object_data in self.image_id_to_objects[image_id]: objs.append(object_data['category_id']) x, y, w, h = object_data['bbox'] # x0 = x / WW # y0 = y / HH # x1 = (x + w) / WW # y1 = (y + h) / HH x0 = x y0 = y x1 = (x + w) y1 = (y + h) # boxes.append(torch.FloatTensor([x0, y0, x1, y1])) boxes.append([x0, y0, x1, y1]) # # This will give a numpy array of shape (HH, WW) # mask = seg_to_mask(object_data['segmentation'], WW, HH) # # # Crop the mask according to the bounding box, being careful to # # ensure that we don't crop a zero-area region # mx0, mx1 = int(round(x)), int(round(x + w)) # my0, my1 = int(round(y)), int(round(y + h)) # mx1 = max(mx0 + 1, mx1) # my1 = max(my0 + 1, my1) # mask = mask[my0:my1, mx0:mx1] # mask = imresize(255.0 * mask, (self.mask_size, self.mask_size), # mode='constant') # mask = torch.from_numpy((mask > 128).astype(np.int64)) # masks.append(mask) # # Add dummy __image__ object # objs.append(self.vocab['object_name_to_idx']['__image__']) # boxes.append(torch.FloatTensor([0, 0, 1, 1])) # masks.append(torch.ones(self.mask_size, self.mask_size).long()) # objs = torch.LongTensor(objs) # boxes = torch.stack(boxes, dim=0) gt_classes = np.array(objs, dtype=np.int64) gt_boxes = np.array(boxes, dtype=np.float32) # masks = torch.stack(masks, dim=0) # # box_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) # # # Compute centers of all objects # obj_centers = [] # _, MH, MW = masks.size() # for i, obj_idx in enumerate(objs): # x0, y0, x1, y1 = boxes[i] # mask = (masks[i] == 1) # xs = torch.linspace(x0, x1, MW).view(1, MW).expand(MH, MW) # ys = torch.linspace(y0, y1, MH).view(MH, 1).expand(MH, MW) # if mask.sum() == 0: # mean_x = 0.5 * (x0 + x1) # mean_y = 0.5 * (y0 + y1) # else: # mean_x = xs[mask].mean() # mean_y = ys[mask].mean() # obj_centers.append([mean_x, mean_y]) # obj_centers = torch.FloatTensor(obj_centers) # # # Add triples # triples = [] # num_objs = objs.size(0) # __image__ = self.vocab['object_name_to_idx']['__image__'] # real_objs = [] # if num_objs > 1: # real_objs = (objs != __image__).nonzero().squeeze(1) # for cur in real_objs: # choices = [obj for obj in real_objs if obj != cur] # if len(choices) == 0 or not self.include_relationships: # break # other = random.choice(choices) # if random.random() > 0.5: # s, o = cur, other # else: # s, o = other, cur # # # Check for inside / surrounding # sx0, sy0, sx1, sy1 = boxes[s] # ox0, oy0, ox1, oy1 = boxes[o] # d = obj_centers[s] - obj_centers[o] # theta = math.atan2(d[1], d[0]) # # if sx0 < ox0 and sx1 > ox1 and sy0 < oy0 and sy1 > oy1: # p = 'surrounding' # elif sx0 > ox0 and sx1 < ox1 and sy0 > oy0 and sy1 < oy1: # p = 'inside' # elif theta >= 3 * math.pi / 4 or theta <= -3 * math.pi / 4: # p = 'left of' # elif -3 * math.pi / 4 <= theta < -math.pi / 4: # p = 'above' # elif -math.pi / 4 <= theta < math.pi / 4: # p = 'right of' # elif math.pi / 4 <= theta < 3 * math.pi / 4: # p = 'below' # p = self.vocab['pred_name_to_idx'][p] # triples.append([s, p, o]) # # # Add __in_image__ triples # O = objs.size(0) # in_image = self.vocab['pred_name_to_idx']['__in_image__'] # for i in range(O - 1): # triples.append([i, in_image, O - 1]) # # triples = torch.LongTensor(triples) # return image, objs, boxes, masks, triples # Rescale so that the boxes are at BOX_SCALE if self.is_train: image_unpadded, gt_boxes = random_crop( image_unpadded, gt_boxes * BOX_SCALE / max(image_unpadded.size), BOX_SCALE, round_boxes=False, ) else: # Seems a bit silly because we won't be using GT boxes then but whatever gt_boxes = gt_boxes * BOX_SCALE / max(image_unpadded.size) w, h = image_unpadded.size box_scale_factor = BOX_SCALE / max(w, h) # Optionally flip the image if we're doing training flipped = self.is_train and np.random.random() > 0.5 if flipped: scaled_w = int(box_scale_factor * float(w)) image_unpadded = image_unpadded.transpose( PIL.Image.FLIP_LEFT_RIGHT) gt_boxes[:, [0, 2]] = scaled_w - gt_boxes[:, [2, 0]] img_scale_factor = IM_SCALE / max(w, h) if h > w: im_size = (IM_SCALE, int(w * img_scale_factor), img_scale_factor) elif h < w: im_size = (int(h * img_scale_factor), IM_SCALE, img_scale_factor) else: im_size = (IM_SCALE, IM_SCALE, img_scale_factor) entry = { 'img': self.transform_pipeline(image_unpadded), 'img_size': im_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes, 'scale': IM_SCALE / BOX_SCALE, 'index': index, 'image_id': image_id, 'flipped': flipped, 'fn': image_path, } return entry