def __data_pipline(self, img, ldmarks): transform = None if self.mode == 'train': transform = A.Compose( [ A.Resize(height=self.output_size[0], width=self.output_size[1], p=1), A.Crop(x_min=40, y_min=0, x_max=self.output_size[1] - 76, y_max=self.output_size[0], p=1), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.ToFloat(p=1), ], keypoint_params=A.KeypointParams(format='xy')) elif self.mode == 'test': transform = A.Compose( [ A.Resize(height=self.output_size[0], width=self.output_size[1], p=1), A.Crop(x_min=40, y_min=0, x_max=self.output_size[1] - 76, y_max=self.output_size[0], p=1), A.ToFloat(p=1), ], keypoint_params=A.KeypointParams(format='xy')) transformed = transform(image=img, keypoints=ldmarks) return transformed
def attention(self, frame, objects, index): # image augmentation helper function def augment(aug, image): return aug(image=image)['image'] # convert box float vals to ints for cropping faces box_ints = objects[index]['box'].astype(int) box_half_width = (box_ints[2] - box_ints[0]) // 2 box_mid_width = (box_ints[0] + box_ints[2]) // 2 # crop only bounding box crop_left = albu.Crop((box_mid_width - box_half_width), box_ints[1], box_mid_width, box_ints[3]) crop_right = albu.Crop(box_mid_width, box_ints[1], (box_mid_width + box_half_width), box_ints[3]) cropped_l = augment(crop_left, frame) cropped_r = augment(crop_right, frame) # increase brightness and decrease contrast for lessened lighting effect lighting_aug = albu.RandomBrightnessContrast(brightness_limit=(0.3, 0.299), contrast_limit=(-0.2, -0.199), p=1) cropped_l = augment(lighting_aug, cropped_l) cropped_r = augment(lighting_aug, cropped_r) # flip left side of cropped image horizontally flip_aug = albu.HorizontalFlip(p=1) flipped_l = augment(flip_aug, cropped_l) # convert to tensor and flatten cropped_r = torch.flatten(transforms.ToTensor()(cropped_r)) flipped_l = torch.flatten(transforms.ToTensor()(flipped_l)) # compare right side of face with flipped left side of face cos = torch.nn.CosineSimilarity(dim=0) output = cos(cropped_r, flipped_l) if output < 0.985: attn = 0 else: attn = 1 # create dictionary of attention flags for each ID self.attn_age += 1 self.attn_dict.setdefault(objects[index]['id'], []).append(attn) self.attn_age_dict.update({objects[index]['id']: self.attn_age}) for v in self.attn_dict.values(): if len(v) > self.error_time: # history of X frames per ID v.pop(0) # remove oldest attention flag for ID if len(v) > 2: # record mode of last X frames objects[index].update( {'forward_gaze': max(set(v), key=v.count)}) else: # record current flag objects[index].update({'forward_gaze': attn}) for k, v in self.attn_age_dict.items(): if (self.attn_age - v) >= 10: self.attn_dict.pop(k, None) if self.attn_age % 30 == 0: self.attn_age_dict.clear()
def test_crop_keypoints(): image = np.random.randint(0, 256, (100, 100), np.uint8) keypoints = [(50, 50, 0, 0)] aug = A.Crop(0, 0, 80, 80, p=1) result = aug(image=image, keypoints=keypoints) assert result["keypoints"] == keypoints aug = A.Crop(50, 50, 100, 100, p=1) result = aug(image=image, keypoints=keypoints) assert result["keypoints"] == [(0, 0, 0, 0)]
def get_train_transforms(bb, format=det_config.IMAGE_FORMAT): return A.Compose( [A.Crop(*bb), A.Resize(height=512, width=512, p=1), ToTensorV2(p=1.0)], p=1.0, bbox_params=A.BboxParams(format=format, label_fields=['labels']))
def true_center_by_face(image: torch.Tensor, landmarks: torch.Tensor): image, landmarks = np.transpose(image.numpy(), (1, 2, 0)), landmarks.numpy() # y_center = int(landmarks[36][0] + landmarks[45][0]) // 2 # x_center = int(landmarks[:,1].mean().item()) y, x = landmarks[:, 0], landmarks[:, 1] keypoints_landmarks = [x, y, 0, 1] # H, W, C = image.shape # W_max = min(x_center, W - x_center) # H_max = min(y_center, H - y_center) # radius = min(W_max, H_max) # y_max, y_min = min(H, y_center + H//2), max(0, y_center - H//2) # x_max, x_min = min(W, x_center + W//2), max(0, x_center - W//2) H, W, C = image.shape H09 = int(H * 0.9) rh = max(int(270 * H09 / W), 270) rw = max(int(270 * W / H09), 270) transforms = albumentations.Compose([ albumentations.Crop(x_min=0, y_min=0, x_max=W, y_max=H09), albumentations.Resize(rh, rw), albumentations.CenterCrop(256, 256), # albumentations.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) data_dict = transforms(image=image, keypoints=[keypoints_landmarks]) image_new = torch.tensor(np.transpose(data_dict['image'], (2, 0, 1))) kp_x, kp_y = data_dict['keypoints'][0][0], data_dict['keypoints'][0][1] keypoints_new = torch.cat( [torch.tensor(kp_x)[..., None], torch.tensor(kp_y)[..., None]], dim=1) return image_new, keypoints_new
def crop_aug(self, img, boxes, visualise=False): index = random.randint(0, len(boxes) - 1) window = deepcopy(boxes[index]) window[:2] -= 404 window[2:4] += 404 self.check_bboxes(img, [window]) window = list(map(int, window)) aug = A.Compose([ A.Crop(x_min=window[0], x_max=window[2], y_min=window[1], y_max=window[3], p=1.0), A.Resize(808, 808, p=1.0), A.RandomCrop(608, 608, p=1.0) ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, )) sample = aug(image=img, bboxes=boxes) res_img, res_boxes = sample['image'], sample['bboxes'] if visualise: for box in res_boxes: box = list(map(int, box)) cv2.rectangle(res_img, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 1) cv2.imwrite('temp.jpg', res_img) return res_img, res_boxes
def crop_image_train_new(self, img_path, annotations, crop_h, crop_w, json_name, im_fold): # import pdb # pdb.set_trace() save_path = osp.dirname(img_path) json_path = osp.join(save_path, json_name) save_path = osp.join(save_path, im_fold) if not osp.exists(save_path): os.makedirs(save_path) imgs = os.listdir(img_path) # crop = A.Compose( # [A.RandomCrop(crop_h, crop_w)], # bbox_params=A.BboxParams(format='coco') # ) # image = cv2.imread(osp.join(img_path, imgs[0])) new_annotations = {} id = 0 for im in tqdm(imgs): path = osp.join(img_path, im) fname, _ = osp.splitext(im) image = cv2.imread(path) # print(annotations[im]) bboxs = [[box[1][0], box[1][1], box[1][2], box[1][3], box[0]] for box in annotations[im]] H, W, C = image.shape # for i,bbox in enumerate(bboxs): for x in range(0, W - crop_w, 200): for y in range(0, H - crop_h, 200): x1 = int(x) x2 = int(x + crop_w) y1 = int(y) y2 = int(y + crop_h) # x1 = x1 if x1>0 else 0 # x2 = x2 if x2<W else W # y1 = y1 if y1>0 else 0 # y2 = y2 if y2<H else H crop = A.Compose([A.Crop(x1, y1, x2, y2)], bbox_params=A.BboxParams(format='coco')) # import pdb # pdb.set_trace() transformed = crop(image=image, bboxes=bboxs) new_image = transformed['image'] new_bbox = transformed['bboxes'] # import pdb # pdb.set_trace() # pixle_value = new_image.sum() # print(pixle_value) if new_bbox: new_name = fname + '_' + str(id) + '.png' new_box = [[box[4], box[:4]] for box in new_bbox] new_path = osp.join(save_path, new_name) new_im_info = [id, crop_h, crop_w] all_info = [new_box, new_im_info] new_annotations[new_name] = all_info cv2.imwrite(new_path, new_image) id += 1 else: continue self.convert2coco(new_annotations, json_path)
def __data_pipline(self, img, ldmarks): # Convert RGB to BGR transform = None if self.mode == 'train': transform = A.Compose( [ A.Resize(height=self.output_size[0], width=self.output_size[1], p=1), # /8--->(356, 536) A.Crop(x_min=40, y_min=0, x_max=self.output_size[1] - 76, y_max=self.output_size[0], p=1), # A.CLAHE(p=1), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.ToFloat(p=1), # (0 ~ 1) # A.Normalize(max_pixel_value=1, p=1) ], keypoint_params=A.KeypointParams(format='xy')) elif self.mode == 'test': # import random # random.seed(2020) transform = A.Compose( [ A.Resize(height=self.output_size[0], width=self.output_size[1], p=1), # /8--->(356, 536) A.Crop(x_min=40, y_min=0, x_max=self.output_size[1] - 76, y_max=self.output_size[0], p=1), # (356, 460) # A.CLAHE(p=1), A.ToFloat(p=1), # (0 ~ 1) # A.Normalize(max_pixel_value=1, p=1) ], keypoint_params=A.KeypointParams(format='xy')) transformed = transform(image=img, keypoints=ldmarks) return transformed
def forward(self, x: List[torch.Tensor]) -> List[List[ClassifiedBBox]]: results = [] bx_raw = self.detector(x) for i in range(len(x)): x_i = x[i].cpu() bx_raw_i = {k: v.cpu() for k, v in bx_raw[i].items()} bboxes = convert_bboxes(boxes=bx_raw_i['boxes'], labels=bx_raw_i['labels'], scores=bx_raw_i['scores']) bboxes = filter_bboxes(bboxes, min_score=self.min_score, categories=self.airplane_categories) bboxes = non_maximum_suppression(bboxes, self.nms_threshold, self.nms_ranking, self.nms_suppression) bboxes = bboxes[:self.top_bboxes] image = self.tensor_to_image(x_i) w, h = image.size image_rect = Rectangle(pt_from=(0, 0), pt_to=(w - 1, h - 1)) classified_bboxes = [] if len(bboxes) > 0: cropped_tensors = [] for bbox in bboxes: bbox = bbox.expand(self.expand_coeff) bbox = bbox.intersection(image_rect) crop = AlbumentationsTransform( albu.Crop(x_min=bbox.x0, y_min=bbox.y0, x_max=bbox.x1, y_max=bbox.y1, always_apply=True)) cropped_image = crop(image) cropped_tensor = self.classifier_transform(cropped_image) cropped_tensors.append(cropped_tensor) crops_i = torch.stack(cropped_tensors).to(x[i]) z_i = self.classifier(crops_i) probs_i_hat = torch.exp(log_softmax(z_i, dim=1)).cpu().numpy() for j in range(len(bboxes)): classified_bboxes.append( ClassifiedBBox(frame=bboxes[j], classes=dict( zip(self.classifier.target_classes, probs_i_hat[j].tolist())))) results.append(classified_bboxes) return results
def cropIfNeed(self, img): img = np.array(img) height, width, _ = img.shape cropped_height, cropped_width, _ = img.shape if cropped_height % 32 != 0: cropped_height = (cropped_height // 32) * 32 if cropped_width % 32 != 0: cropped_width = (cropped_width // 32) * 32 crop = albu.Crop(x_min=0, y_min=0, x_max=cropped_width, y_max=cropped_height) img = crop(image=img)["image"] return img, crop
def crop_by_bboxes(image_dir_in, image_dir_out, bboxes_path: str, expand_coeff: float = 0.25, min_score: float = 0.7, nms_threshold: float = 0.5, nms_ranking: str = 'score_sqrt_area', nms_suppression: str = 'overlap', show_progress: bool = True): pathlib.Path(image_dir_out).mkdir(parents=True, exist_ok=False) paths = list(os.listdir(image_dir_in)) with LMDBDict(bboxes_path) as lmdb_dict: bboxes_db = BoundingBoxesLMDBDict(lmdb_dict) with tqdm(total=len(paths), disable=not show_progress, file=sys.stdout) as progress_bar: progress_bar.set_description( f'Cropping images {image_dir_in} => {image_dir_out}') for path in paths: img = Image.open(os.path.join(image_dir_in, path)) w, h = img.size img_rect = Rectangle(pt_from=(0, 0), pt_to=(w - 1, h - 1)) bboxes = bboxes_db[path] bboxes = filter_bboxes(bboxes, min_score=min_score) bboxes = non_maximum_suppression(bboxes, nms_threshold, nms_ranking, nms_suppression) if len(bboxes) > 0: bbox = bboxes[0] bbox = bbox.expand(expand_coeff) bbox = bbox.intersection(img_rect) transform = albu.Crop(x_min=bbox.x0, y_min=bbox.y0, x_max=bbox.x1, y_max=bbox.y1, always_apply=True) else: transform = albu_ext.Identity() augmented = transform(image=np.array(img)) img_aug = Image.fromarray(augmented['image']) img_aug.save(os.path.join(image_dir_out, path)) progress_bar.update()
def crop_image_train(self, img_path, annotations, crop_h, crop_w, json_name, im_fold): # import pdb # pdb.set_trace() save_path = osp.dirname(img_path) json_path = osp.join(save_path, json_name) save_path = osp.join(save_path, im_fold) if not osp.exists(save_path): os.makedirs(save_path) imgs = os.listdir(img_path) # crop = A.Compose( # [A.RandomCrop(crop_h, crop_w)], # bbox_params=A.BboxParams(format='coco') # ) # image = cv2.imread(osp.join(img_path, imgs[0])) new_annotations = {} id = 0 for im in tqdm(imgs): path = osp.join(img_path, im) fname, _ = osp.splitext(im) image = cv2.imread(path) # print(annotations[im]) bboxs = [[box[1][0], box[1][1], box[1][2], box[1][3], box[0]] for box in annotations[im]] H, W, C = image.shape for i, bbox in enumerate(bboxs): center_point = [bbox[0] + bbox[2] // 2, bbox[1] + bbox[3] // 2] x1 = int(center_point[0] - crop_h // 2) x2 = int(center_point[0] + crop_h // 2) y1 = int(center_point[1] - crop_w // 2) y2 = int(center_point[1] + crop_w // 2) x1 = x1 if x1 > 0 else 0 x2 = x2 if x2 < W else W y1 = y1 if y1 > 0 else 0 y2 = y2 if y2 < H else H crop = A.Compose([A.Crop(x1, y1, x2, y2)], bbox_params=A.BboxParams(format='coco')) # import pdb # pdb.set_trace() transformed = crop(image=image, bboxes=bboxs) new_image = transformed['image'] new_bbox = transformed['bboxes'] new_name = fname + '_' + str(i) + '.png' new_box = [[box[4], box[:4]] for box in new_bbox] new_path = osp.join(save_path, new_name) new_im_info = [id, crop_h, crop_w] all_info = [new_box, new_im_info] new_annotations[new_name] = all_info cv2.imwrite(new_path, new_image) id += 1 self.convert2coco(new_annotations, json_path)
def test(resize_size=256, crop_size=224): start_center = int(round((resize_size - crop_size - 1) / 2)) return albumentations.Compose([ albumentations.Resize(resize_size, resize_size), albumentations.Crop( start_center, start_center, start_center + crop_size, start_center + crop_size, ), albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2(), ])
def __getitem__(self, index): """Return a data point and its metadata information. Parameters: index - - a random integer for data indexing Returns a dictionary that contains A, B, A_paths and B_paths A (tensor) - - an image in the input domain B (tensor) - - its corresponding image in the target domain A_paths (str) - - image paths B_paths (str) - - image paths (same as A_paths) """ # read a image given a random integer index AB_path = self.AB_paths[index] AB = Image.open(AB_path).convert('RGB') # split AB image into A and B w, h = AB.size w2 = int(w / 2) A = AB.crop((0, 0, w2, h)) B = AB.crop((w2, 0, w, h)) # apply the same transform to both A and B # transform_params = get_params(self.opt, A.size) transformOutput = Al.Compose([ Al.Crop(x_max=256, y_max=256), Al.RandomRotate90(), Al.Flip(), ]) transformInput = Al.Compose([ Al.Blur(blur_limit=3), Al.Equalize(always_apply=True), Al.RandomBrightnessContrast(), transformOutput, ]) random.seed(42) A = transformInput(image=np.array(A))['image'] B = transformOutput(image=np.array(B))['image'] # A_transform = get_transform(self.opt, transform_params, grayscale=(self.input_nc == 1)) # B_transform = get_transform(self.opt, transform_params, grayscale=(self.output_nc == 1)) # A = A_transform(A) # B = B_transform(B) return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
def detect(data_path, model): #change th = (200, 300, 1100, 900) transform_A = A.Compose([ A.Crop(x_min=th[0], y_min=th[1], x_max=th[2], y_max=th[3], always_apply=True) ]) data_frame = pd.DataFrame(columns=['image_name', 'x1', 'y1', 'x2', 'y2']) k = 0 l = 0 for img_nm in os.listdir(data_path): k += 1 image = data_path + img_nm image = io.imread(image) image = transform_A(image=image)['image'] #print(image) image_for_model = FT.to_tensor(image) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] image_for_model = FT.normalize(image_for_model, mean=mean, std=std) image_for_model = image_for_model.to(device) preds = model([image_for_model])[0] nms_preds = torchvision.ops.nms(preds['boxes'], preds['scores'], iou_threshold=0.2) result = { 'boxes': preds['boxes'][nms_preds].to('cpu').tolist(), 'labels': preds['labels'][nms_preds].to('cpu').tolist() } for bbox, lbl in zip(result['boxes'], result['labels']): if lbl == 2: l += 1 df = pd.Series({ 'image_name': img_nm, 'x1': bbox[0] + th[0], 'y1': bbox[1] + th[1], 'x2': bbox[2] + th[0], 'y2': bbox[3] + th[1] }) data_frame = data_frame.append(df, ignore_index=True) data_frame.to_csv('bboxes.csv')
def test_augment(image, eigenvectors, eigenvalues, pixel_avg, stdev): """ Create ten different images per input image, all the different augmentations specified on the paper for test time. """ height, width = image.shape[0], image.shape[1] crop_width = min(224, width) crop_height = min(224, height) crops = [albumentations.CenterCrop(crop_height, crop_width)] flipper = albumentations.HorizontalFlip() for corner in ['ur', 'lr', 'll', 'ul']: if corner[0] == 'u': y_max = height y_min = height - crop_height else: y_max = crop_height y_min = 0 if corner[1] == 'r': x_max = width x_min = width - crop_width else: x_max = crop_width x_min = 0 crops.append(albumentations.Crop(x_min, y_min, x_max, y_max)) output_images = [] for cropper in crops: for is_reflected in [True, False]: augmentations = [cropper, flipper ] if is_reflected else [cropper] augmenter = albumentations.Compose(augmentations) output_image = ((augmenter(image=image)["image"] - pixel_avg) / 255) / stdev output_images.append(ImagenetSequence.pad_image(output_image)) return output_images
def expression(self, frame, objects, index): # image augmentation helper function def augment(aug, image): return aug(image=image)['image'] # convert box float vals to ints for cropping faces box_ints = objects[index]['box'].astype(int) # crop bounding box, resize to 48x48, convert to 1-channel tensor crop_aug = albu.Crop(box_ints[0], box_ints[1], box_ints[2], box_ints[3]) cropped = augment(crop_aug, frame) size_aug = albu.Resize(48, 48) cropped = augment(size_aug, cropped) pil = transforms.ToPILImage()(cropped) grayed = transforms.Grayscale()(pil) input_tens = transforms.ToTensor()(grayed) classes = [ 'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprised', 'Neutral' ] # define and load network net = model.Model(num_classes=len(classes)) checkpoint = torch.load( '../pytorchfer/trained/private_model_291_60.t7', map_location=torch.device( "cuda" if torch.cuda.is_available() else "cpu")) net.load_state_dict(checkpoint['net']) net.eval() # backprop net - order of predicted classes img = torch.stack([input_tens]) bp = BackPropagation(model=net) prob, emo = bp.forward(img) gcam = GradCAM(model=net) _ = gcam.forward(img) gbp = GuidedBackPropagation(model=net) _ = gbp.forward(img) # most probable class actual_emotion = emo[:, 0] gbp.backward(ids=actual_emotion.reshape(1, 1)) gcam.backward(ids=actual_emotion.reshape(1, 1)) print(classes[actual_emotion.data], (prob.data[:, 0] * 100))
def _configure_slicing(self): """Configure the transformation for the slicing of images.""" slice_index = self.index % self.parent.num_slices_per_image slice_index_x = slice_index // self.parent.num_slices_per_axis slice_index_y = slice_index % self.parent.num_slices_per_axis if self.parent.num_slices_per_image > 1: if self.parent.initial_cropping_rectangle is None: image_size = image_size_hwc(self.image) else: r = self.parent.initial_cropping_rectangle image_size = (r[3] - r[1], r[2] - r[0]) num_surplus_pixels_y, num_surplus_pixels_x = [ num_pixels % self.parent.num_slices_per_axis for num_pixels in image_size ] if num_surplus_pixels_y or num_surplus_pixels_x: warnings.warn( f"Cannot slice image evenly. Discarding pixels " f"(x: {num_surplus_pixels_x}; y: {num_surplus_pixels_y}).") slice_size_y, slice_size_x = [ num_pixels // self.parent.num_slices_per_axis for num_pixels in image_size ] x_min = slice_index_x * slice_size_x x_max = x_min + slice_size_x y_min = slice_index_y * slice_size_y y_max = y_min + slice_size_y slice_rectangle = [x_min, y_min, x_max, y_max] self.transforms.append(albumentations.Crop(*slice_rectangle)) self.image_name += f"_slice{slice_index}" self.slice_index = slice_index self.slice_index_x = slice_index_x self.slice_index_y = slice_index_y
def bbox_aug(self, img, bbox, h, w): resized_h = int(h * 0.8) resized_w = int(w * 0.8) xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3] first = [max(xmax - resized_w, 0), max(ymax - resized_h, 0)] second = [min(xmin + resized_w, w) - resized_w, min(ymin + resized_h, h) - resized_h] if first[0] > second[0] or first[1] > second[1]: tf = A.Compose( [ A.HorizontalFlip(p=0.5), A.Resize(self.data_cfg.DATA.GLOBAL_SIZE[0], self.data_cfg.DATA.GLOBAL_SIZE[1]), AP.transforms.ToTensor(normalize={ 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225] }) ], bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']), )(image=img, bboxes=[bbox], class_labels=[0]) # print(tf['bboxes']) return tf['image'], tf['bboxes'][0] # return img, bbox x = random.randint(first[0], second[0]) y = random.randint(first[1], second[1]) # print(bbox) tf = A.Compose( [ A.Crop(x_min=x, y_min=y, x_max=x+resized_w, y_max=y+resized_h, p=0.5), A.HorizontalFlip(p=0.5), A.Resize(self.data_cfg.DATA.GLOBAL_SIZE[0], self.data_cfg.DATA.GLOBAL_SIZE[1]), AP.transforms.ToTensor(normalize={ 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225] }) ], bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']), )(image=img, bboxes=[bbox], class_labels=[0]) # print(tf['bboxes']) return tf['image'], tf['bboxes'][0]
def __init__(self, names, image_dir, point_dir, transform, cache_size, patch_size, inter_dist): #store filenames self.image_names = [os.path.join(image_dir, f + '.jpg') for f in names] self.point_names = [os.path.join(point_dir, f + '.txt') for f in names] self.transform = transform self.random_crop = A.Compose( [A.RandomCrop(*patch_size)], keypoint_params=A.KeypointParams(format='xy')) self.det_crop = A.Compose( [A.Crop()], keypoint_params=A.KeypointParams(format='xy')) self.patch_size = patch_size self.center = (patch_size[1] // 2, patch_size[0] // 2) self.inter_dist = inter_dist self._cache_patch = [] self._cache_points = [] self._cache_size = cache_size self.start_index = 0 # fill cache self.update_n_samples = 20 for i in range( (cache_size + self.update_n_samples - 1) // self.update_n_samples): self.update_cache()
def expression(self, frame, objects, index): # apply TenCrop to image and convert each crop ToTensor transform_test = transforms.Compose([ transforms.TenCrop(self.cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) # albumentations helper function def augment(aug, image): return aug(image=image)['image'] # face bounding box coordinates as type: integer box_ints = objects[index]['box'].astype(int) # crop bounding box from frame, resize to 48x48, convert to Tensor crop_aug = albu.Crop(box_ints[0], box_ints[1], box_ints[2], box_ints[3]) img = augment(crop_aug, frame) size_aug = albu.Resize(48, 48) img = augment(size_aug, img) img = transforms.ToPILImage()(img) # apply TenCrop to Resized + Grayscaled faces and convert ToTensor inputs inputs = transform_test(img) class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] net = VGG('VGG19') checkpoint = torch.load(model_path, # path to FER model map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")) net.load_state_dict(checkpoint['net']) net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops scores = F.softmax(outputs_avg, dim=0) max_score, predicted = torch.max(scores.data, 0) print("{0}: {1}% confidence".format(class_names[int(predicted)], (float(max_score)*100)))
def get_transform_valid_another_crop_512(): return alb.Compose([alb.Crop(12, 42, 500, 460, p=1.0)])
def get_transform_another_crop_512(): return alb.Compose([ alb.Crop(12, 42, 500, 460, p=1.0), alb.HorizontalFlip(p=0.5), alb.VerticalFlip(p=0.5), ])
def _get_val_item_offline(self, index): img_path = self.imgs[index] # print('val image path: '+ img_path) bboxes_with_cls_id = np.array(self.truth.get(img_path), dtype=np.float) img_old = cv2.imread(os.path.join(self.cfg.data_dir, img_path)) img = self.load_image_pil(os.path.join(self.cfg.data_dir, img_path)) w, h, h_cut = 2432, 1368, 1216 # 0. Dict for all augmentations bbox_params_ = { 'format': 'pascal_voc', 'min_area': 0, 'min_visibility': 0, } # 1. Resize the image, and crop out upper part of it resize_and_crop = A.Compose([ A.Resize(height=h, width=w, p=1.0), A.Crop(x_min=0, x_max=w, y_min=h - h_cut, y_max=1368, p=1.0) ], p=1.0, bbox_params=A.BboxParams(**bbox_params_)) # 2. Divide into 8 squares assert (w % 4) == 0 assert (h % 2) == 0 sides_x = [int(w * (j / 4)) for j in range(5)] sides_y = [int(h_cut * (j / 2)) for j in range(3)] crop_list = [] for i in range(4): for j in range(2): x_min, x_max = sides_x[i], sides_x[i + 1] y_min, y_max = sides_y[j], sides_y[j + 1] crop_list.append( A.Compose( [ A.Crop(x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, p=1.0), # ToTensorV2(p=1.0) ], p=1.0, bbox_params=A.BboxParams(**bbox_params_))) # 3. Apply augs temp = resize_and_crop(**{ 'image': img, 'bboxes': bboxes_with_cls_id, }) images_list, boxes_list = [], [] for crop in crop_list: sample = crop(**{ 'image': temp['image'], 'bboxes': temp['bboxes'], }) images_list.append(sample['image']) boxes_list.append(sample['bboxes']) # images_combined = torch.stack(images_list, dim=0) for i in range(len(boxes_list)): boxes_list[i] = tuple( map(lambda x: torch.tensor(x, dtype=torch.float), boxes_list[i])) boxes_combined = pad_annots(boxes_list) targets_list = [] for box in boxes_combined: num_objs = len(box) target = {} # boxes to coco format boxes = box[..., :4] boxes[..., 2:] = boxes[..., 2:] - boxes[..., :2] # box width, box height target['boxes'] = torch.as_tensor(boxes, dtype=torch.float32) target['labels'] = torch.as_tensor( bboxes_with_cls_id[..., -1].flatten(), dtype=torch.int64) # target['image_id'] = torch.tensor([get_image_id(img_path)]) target['area'] = (target['boxes'][:, 3]) * (target['boxes'][:, 2]) target['iscrowd'] = torch.zeros((num_objs, ), dtype=torch.int64) targets_list.append(target) return images_list, targets_list
def _get_val_item_offline_wgisd(self, index): img_path = self.imgs[index] # print('val image path: '+ img_path) bboxes_with_cls_id = np.array(self.truth.get(img_path), dtype=np.float) img = Image.open(img_path) img = np.array(img) w, h, h_cut = 4256, 2432, 2432 # 0. Dict for all augmentations bbox_params_ = { 'format': 'pascal_voc', 'min_area': 0, 'min_visibility': 0, } # 1. Resize the image, and crop out upper part of it resize_and_crop = A.Compose( [ A.Resize(height=h, width=w, p=1.0), # A.Crop(x_min=0, x_max=w, y_min=h - h_cut, y_max=h, p=1.0) ], p=1.0, bbox_params=A.BboxParams(**bbox_params_)) # 2. Divide into 8 squares assert (w % 7) == 0 assert (h % 4) == 0 sides_x = [int(w * (j / 7)) for j in range(8)] sides_y = [int(h_cut * (j / 4)) for j in range(5)] crop_list = [] for i in range(7): for j in range(4): x_min, x_max = sides_x[i], sides_x[i + 1] y_min, y_max = sides_y[j], sides_y[j + 1] crop_list.append( A.Compose( [ A.Crop(x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, p=1.0), # ToTensorV2(p=1.0) ], p=1.0, bbox_params=A.BboxParams(**bbox_params_))) # 3. Apply augs try: temp = resize_and_crop(**{ 'image': img, 'bboxes': bboxes_with_cls_id, }) except: return None, None images_list, boxes_list = [], [] for crop in crop_list: sample = crop(**{ 'image': temp['image'], 'bboxes': temp['bboxes'], }) images_list.append(sample['image']) boxes_list.append(sample['bboxes']) # images_combined = torch.stack(images_list, dim=0) for i in range(len(boxes_list)): boxes_list[i] = tuple( map(lambda x: torch.tensor(x, dtype=torch.float), boxes_list[i])) # boxes_combined = pad_annots(boxes_list) targets_list = [] for box in boxes_list: num_objs = len(box) target = {} # boxes to coco format target['num_objs'] = num_objs targets_list.append(target) return images_list, targets_list
def __init__( self, base_dir="deeplite_torch_zoo/data/VOC/", split="train_aug", num_classes=21, affine_augmenter=None, image_augmenter=None, target_size=(512, 512), net_type="unet", ignore_index=255, debug=False, ): self.debug = debug #########To support subclasses###################### self.classes = cfg.DATA["CLASSES"] self.all_classes = ["BACKGROUND"] + cfg.DATA["ALLCLASSES"] if num_classes == 2: self.classes = cfg.DATA["CLASSES_1"] elif num_classes == 3: self.classes = cfg.DATA["CLASSES_2"] self.classes = ["BACKGROUND"] + self.classes self.num_classes = len(self.classes) self.class_to_id = dict(zip(self.classes, range(self.num_classes))) self.id_to_class = {v: k for k, v in self.class_to_id.items()} self.class_to_id_all = dict( zip(self.all_classes, range(len(self.all_classes)))) self.map_selected_ids_to_all = { k: self.class_to_id_all[v] for k, v in self.id_to_class.items() } self.map_all_ids_to_selected = { v: k for k, v in self.map_selected_ids_to_all.items() } #########To support subclasses###################### self.base_dir = Path(base_dir) / Path("VOCdevkit/VOC2012") self.net_type = net_type self.ignore_index = ignore_index self.split = split valid_ids = self.base_dir / "ImageSets" / "Segmentation" / "val.txt" with open(valid_ids, "r") as f: valid_ids = f.readlines() if self.split == "valid": lbl_dir = "SegmentationClass" img_ids = valid_ids else: valid_set = set([valid_id.strip() for valid_id in valid_ids]) lbl_dir = "SegmentationClassAug" if "aug" in split else "SegmentationClass" all_set = set([ p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir() ]) img_ids = list(all_set - valid_set) img_ids = [_id for _id in img_ids if self._not_empty(_id, split)] self.img_paths = [ (self.base_dir / "JPEGImages" / "{}.jpg".format(img_id.strip())) for img_id in img_ids ] self.lbl_paths = [ (self.base_dir / lbl_dir / "{}.png".format(img_id.strip())) for img_id in img_ids ] # Resize if isinstance(target_size, str): target_size = eval(target_size) if "train" in self.split: if self.net_type == "deeplab": target_size = (target_size[0] + 1, target_size[1] + 1) self.resizer = albu.Compose([ albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0), PadIfNeededRightBottom( min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0, ), albu.RandomCrop(height=target_size[0], width=target_size[1], p=1.0), ]) else: # self.resizer = None self.resizer = albu.Compose([ PadIfNeededRightBottom( min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0, ), albu.Crop(x_min=0, x_max=target_size[1], y_min=0, y_max=target_size[0]), ]) # Augment if "train" in self.split: self.affine_augmenter = affine_augmenter self.image_augmenter = image_augmenter else: self.affine_augmenter = None self.image_augmenter = None
def _configure_initial_cropping(self): """Configure the transform for the initial image cropping.""" if self.parent.initial_cropping_rectangle is not None: self.transforms.append( albumentations.Crop(*self.parent.initial_cropping_rectangle))
def __init__(self, base_dir='../data/pascal_voc_2012/VOCdevkit/VOC2012', split='train_aug', affine_augmenter=None, image_augmenter=None, target_size=(512, 512), net_type='unet', ignore_index=255, debug=False): self.debug = debug self.base_dir = Path(base_dir) assert net_type in ['unet', 'deeplab'] self.net_type = net_type self.ignore_index = ignore_index self.split = split valid_ids = self.base_dir / 'ImageSets' / 'Segmentation' / 'val.txt' with open(valid_ids, 'r') as f: valid_ids = f.readlines() if self.split == 'valid': lbl_dir = 'SegmentationClass' img_ids = valid_ids else: valid_set = set([valid_id.strip() for valid_id in valid_ids]) lbl_dir = 'SegmentationClassAug' if 'aug' in split else 'SegmentationClass' all_set = set([ p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir() ]) img_ids = list(all_set - valid_set) self.img_paths = [ (self.base_dir / 'JPEGImages' / f'{img_id.strip()}.jpg') for img_id in img_ids ] self.lbl_paths = [(self.base_dir / lbl_dir / f'{img_id.strip()}.png') for img_id in img_ids] # Resize if isinstance(target_size, str): target_size = eval(target_size) if 'train' in self.split: if self.net_type == 'deeplab': target_size = (target_size[0] + 1, target_size[1] + 1) self.resizer = albu.Compose([ albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0), PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0), albu.RandomCrop(height=target_size[0], width=target_size[1], p=1.0) ]) else: # self.resizer = None self.resizer = albu.Compose([ PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0), albu.Crop(x_min=0, x_max=target_size[1], y_min=0, y_max=target_size[0]) ]) # Augment if 'train' in self.split: self.affine_augmenter = affine_augmenter self.image_augmenter = image_augmenter else: self.affine_augmenter = None self.image_augmenter = None
def __init__(self, imageInfo, opt, split): print("=> THe input CHANNELs are BGR not others.") opt.numClasses = 21 self.inputSize = (375, 500) self.input_dim = 3 self.imageInfo = imageInfo[split] self.opt = opt self.split = split self.dir = imageInfo['basedir'] self.ignore_index = 255 self.class_names = np.array([ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor', ]) self.mean_bgr = np.array([0.485, 0.456, 0.406]) self.mean_rgb = np.array([0.406, 0.456, 0.485]) self.var = np.array([0.229, 0.224, 0.225]) self.target_size = (self.inputSize[0] + 1, self.inputSize[1] + 1) if split == 'train': self.resizor = albu.Compose([ albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0), albu.PadIfNeeded(min_height=self.target_size[0], min_width=self.target_size[1], value=0, p=1.0), albu.RandomCrop(height=self.target_size[0], width=self.target_size[1], p=1.0) ]) else: self.resizor = albu.Compose([ albu.PadIfNeeded(min_height=self.target_size[0], min_width=self.target_size[1], value=0, p=1.0), albu.Crop(x_min=0, x_max=self.target_size[1], y_min=0, y_max=self.target_size[0]) ]) if 'train' in self.split: self.image_augmenter = albu.Compose([ albu.RandomBrightnessContrast(), albu.HorizontalFlip(p=0.5), albu.OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5), ]) # self.affine_augmenter = albu.Compose([albu]) else: self.image_augmenter = None
def __init__(self, base_dir='../data/deepglobe_as_pascalvoc/VOCdevkit/VOC2012', split='train', affine_augmenter=None, image_augmenter=None, target_size=(512, 512), net_type='unet', ignore_index=255, debug=False): self.debug = debug self.base_dir = Path(base_dir) assert net_type in ['unet', 'deeplab'] self.net_type = net_type self.ignore_index = ignore_index self.split = split ###################################### # This will change : # ###################################### # Generate randomized valid split valid_ids = [] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'AgricultureLand' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'BarrenLand' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Forest' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'RangeLand' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'UrbanLand' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Water' / 'val.txt' with open(valid_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) valid_ids = valid_ids + temp_ids[:69] # Generate randomized train split train_ids = [] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'AgricultureLand' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'BarrenLand' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Forest' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'RangeLand' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'UrbanLand' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Water' / 'train.txt' with open(train_ids_dir, 'r') as f: temp_ids = f.readlines() random.shuffle(temp_ids) train_ids = train_ids + temp_ids[:278] lbl_dir = 'SegmentationClass' if self.split == 'valid': img_ids = valid_ids elif self.split == 'train': img_ids = train_ids else: valid_set = set([valid_id.strip() for valid_id in valid_ids]) lbl_dir = 'SegmentationClassAug' if 'aug' in split else 'SegmentationClass' all_set = set([ p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir() ]) img_ids = list(all_set - valid_set) self.img_paths = [ (self.base_dir / 'JPEGImages' / f'{img_id.strip()}.jpg') for img_id in img_ids ] self.lbl_paths = [(self.base_dir / lbl_dir / f'{img_id.strip()}.png') for img_id in img_ids] # Resize if isinstance(target_size, str): target_size = eval(target_size) if 'train' in self.split: if self.net_type == 'deeplab': target_size = (target_size[0] + 1, target_size[1] + 1) self.resizer = albu.Compose([ albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0), PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0), albu.RandomCrop(height=target_size[0], width=target_size[1], p=1.0) ]) else: # self.resizer = None self.resizer = albu.Compose([ PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1], value=0, ignore_index=self.ignore_index, p=1.0), albu.Crop(x_min=0, x_max=target_size[1], y_min=0, y_max=target_size[0]) ]) # Augment if 'train' in self.split: self.affine_augmenter = affine_augmenter self.image_augmenter = image_augmenter else: self.affine_augmenter = None self.image_augmenter = None