def preprocess(self, im, allobj=None): """ Takes an image, return it as a numpy tensor that is readily to be fed into tfnet. If there is an accompanied annotation (allobj), meaning this preprocessing is serving the train process, then this image will be transformed with random noise to augment training data, using scale, translation, flipping and recolor. The accompanied parsed annotation (allobj) will also be modified accordingly. """ if type(im) is not np.ndarray: im = cv2.imread(im) if allobj is not None: # in training mode result = imcv2_affine_trans(im) im, dims, trans_param = result scale, offs, flip = trans_param for obj in allobj: _fix(obj, dims, scale, offs) if not flip: continue obj_1_ = obj[1] obj[1] = dims[0] - obj[3] obj[3] = dims[0] - obj_1_ im = imcv2_recolor(im) im = self.resize_input(im) if allobj is None: return im return im # , np.array(im) # for unit testing
def preprocess(imPath, allobj=None): """ Takes an image, return it as a numpy tensor that is readily to be fed into tfnet. If there is an accompanied annotation (allobj), meaning this preprocessing is serving the train process, then this image will be transformed with random noise to augment training data, using scale, translation, flipping and recolor. The accompanied parsed annotation (allobj) will also be modified accordingly. """ def fix(obj, dims, scale, offs): for i in range(1, 5): dim = dims[(i + 1) % 2] off = offs[(i + 1) % 2] obj[i] = int(obj[i] * scale - off) obj[i] = max(min(obj[i], dim), 0) im = cv2.imread(imPath) if allobj is not None: # in training mode result = imcv2_affine_trans(im) im, dims, trans_param = result scale, offs, flip = trans_param for obj in allobj: fix(obj, dims, scale, offs) if not flip: continue obj_1_ = obj[1] obj[1] = dims[0] - obj[3] obj[3] = dims[0] - obj_1_ im = imcv2_recolor(im) size = (448, 448) imsz = cv2.resize(im, size) imsz = imsz / 255. imsz = imsz[:, :, ::-1] if allobj is None: return imsz return imsz #, np.array(im) # for unit testing
def preprocess(self, im, allobj=None): """ """ if type(im) is not np.ndarray: im = cv2.imread(im) if allobj is not None: # in training mode result = imcv2_affine_trans(im) im, dims, trans_param = result scale, offs, flip = trans_param for obj in allobj: _fix(obj, dims, scale, offs) if not flip: continue obj_1_ = obj[1] obj[1] = dims[0] - obj[3] obj[3] = dims[0] - obj_1_ im = imcv2_recolor(im) im = self.resize_input(im) if allobj is None: return im return im #, np.array(im) # for unit testing
def preprocess_train(data): im_path, blob, inp_size = data boxes, gt_classes = blob['boxes'], blob['gt_classes'] im = cv2.imread(im_path) ori_im = np.copy(im) im, trans_param = imcv2_affine_trans(im) scale, offs, flip = trans_param boxes = _offset_boxes(boxes, im.shape, scale, offs, flip) if len(boxes) == 0: return im, boxes, [], [], ori_im if inp_size is not None: w, h = inp_size boxes[:, 0::2] *= float(w) / im.shape[1] boxes[:, 1::2] *= float(h) / im.shape[0] im = cv2.resize(im, (w, h)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = imcv2_recolor(im) boxes = np.asarray(boxes, dtype=np.int) debug = False if debug: import matplotlib.pyplot as plt for idx, box in enumerate(boxes): # box = [xmin, ymin, xmax, ymax] with original pixel scale bb = [int(b) for b in box] label_id = gt_classes[idx] print(label_id, bb) im[bb[1]:bb[3], bb[0], :] = 1. im[bb[1]:bb[3], bb[2], :] = 1. im[bb[1], bb[0]:bb[2], :] = 1. im[bb[3], bb[0]:bb[2], :] = 1. plt.imshow(im) plt.show() return im, boxes, gt_classes, [], ori_im
def __getitem__(self, index): # img = io.imread(self.img_path[index % self.size]) # img = resize(img, (366, 1230), preserve_range=True) img = cv2.imread(self.img_path[index % self.size]) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (1230, 366)) ori_im = (img.copy()).astype(np.uint8) img = imcv2_recolor(img) img = np.moveaxis(img, 2, 0) img = torch.from_numpy(img).type(torch.FloatTensor) # img = Image.open(self.img_path[index % self.size]).convert('RGB') # img = self.transform(img) # Apply the defined transform lbl_file = self.img_path[index % self.size].replace('.png', '.txt').replace('image_2', 'label_2') with open(lbl_file) as f: content = f.readlines() content = [x.strip() for x in content] gt_boxes = [] gt_classes = [] for c in content: bb = c.split(' ') if bb[0] in self.cfg.label_names: cc = self.cfg.label_names.index(bb[0]) else: continue bb = map(int, map(float, bb[4:8])) gt_boxes.append(bb) gt_classes.append(cc) # img = img.type(torch.FloatTensor) gt_boxes = np.array(gt_boxes) gt_classes = np.array(gt_classes) return img, gt_boxes, gt_classes, ori_im
def preprocess_train(data): im_path, blob, inp_size = data boxes, gt_classes = blob['boxes'], blob['gt_classes'] im = cv2.imread(im_path) ori_im = np.copy(im) im, trans_param = imcv2_affine_trans(im) scale, offs, flip = trans_param boxes = _offset_boxes(boxes, im.shape, scale, offs, flip) if boxes.shape == (0,): return im, boxes, [], [], ori_im if inp_size is not None: w, h = inp_size boxes[:, 0::2] *= float(w) / im.shape[1] boxes[:, 1::2] *= float(h) / im.shape[0] im = cv2.resize(im, (w, h)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = imcv2_recolor(im) boxes = np.asarray(boxes, dtype=np.int) return im, boxes, gt_classes, [], ori_im
def affine_transform(img, boxes, net_inp_size): if len(boxes) == 0: return im = np.asarray(img, dtype=np.uint8) w, h = net_inp_size im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) im, trans_param = imcv2_affine_trans(im) scale, offs, flip = trans_param boxes = offset_boxes(boxes, im.shape, scale, offs, flip) boxes[:, 0::2] *= float(w) / im.shape[1] boxes[:, 1::2] *= float(h) / im.shape[0] np.clip(boxes[:, 0::2], 0, w - 1, out=boxes[:, 0::2]) np.clip(boxes[:, 1::2], 0, h - 1, out=boxes[:, 1::2]) im = cv2.resize(im, (w, h)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = imcv2_recolor(im) boxes = np.asarray(boxes, dtype=np.int) debug = False if debug: import matplotlib.pyplot as plt for idx, box in enumerate(boxes): # box = [xmin, ymin, xmax, ymax] with original pixel scale bb = [int(b) for b in box] im[bb[1]:bb[3], bb[0], :] = 1. im[bb[1]:bb[3], bb[2], :] = 1. im[bb[1], bb[0]:bb[2], :] = 1. im[bb[3], bb[0]:bb[2], :] = 1. plt.imshow(im) plt.show() # im (pixels range 0~1) # boxes (pos range 0~max_img_size) return im, boxes
def transform(im): im = imcv2_recolor(im) im = cv2.resize(im, (416, 416)) return im
def preprocess(self, im, allobj = None): """ Takes an image, return it as a numpy tensor that is readily to be fed into tfnet. If there is an accompanied annotation (allobj), meaning this preprocessing is serving the train process, then this image will be transformed with random noise to augment training data, using scale, translation, flipping and recolor. The accompanied parsed annotation (allobj) will also be modified accordingly. """ print('Image: ', im) if isinstance(im, np.ndarray): image = im elif (slicer.isVideofile(im)): filename, frame_num = im.split(':') if '@' in frame_num: frame_num = frame_num.split('@')[0] # print('Loading frame ', frame_num, ' from video ', filename) image = slicer.getFrameFromVideo(filename, int(frame_num)) ###### ??????? Check if that necessary image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) else: filename = im image = cv2.imread(filename) #image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Bicycle for supporting frame slicing. In filename hardcoded info about tiles if not isinstance(im, np.ndarray): if '@' in im: temp = im.split('@')[1] temp = temp.split('_') win_size = int(temp[4]) position = [int(temp[0]), int(temp[1])] size = [int(temp[2]), int(temp[3])] image = cv2.resize(image, (size[1], size[0])) image = image[position[0]: position[0] + win_size, position[1]: position[1] + win_size] if not isinstance(image, np.ndarray): return None ############################################################ ############################################################ # cv2.imshow('1', image) # print(im) # print(allobj) # print('-----------------------------\n') # cv2.waitKey(0) ############################################################ ############################################################ if allobj is not None: # in training mode result = imcv2_affine_trans(image) image, dims, trans_param = result scale, offs, flip = trans_param for obj in allobj: _fix(obj, dims, scale, offs) if not flip: continue; obj_1_ = obj[1] obj[1] = dims[0] - obj[3] obj[3] = dims[0] - obj_1_ image = imcv2_recolor(image) ############################################################ ############################################################ # cv2.imshow('1', image) # print(im) # print(allobj) # print('-----------------------------\n') # cv2.waitKey(0) ############################################################ ############################################################ h, w, c = self.meta['inp_size'] scale_w = float(image.shape[0]) / w scale_h = float(image.shape[1]) / h #show2(image, allobj) imsz = cv2.resize(image, (h, w)) imsz = imsz / 255. imsz = imsz[:, :, : : -1] if allobj is None: return imsz return imsz
def transform(im): im = imcv2_recolor(im) return cv2.resize(im, (416, 416)).transpose((2,0,1))[(2,1,0),:,:]
weight_decay=cfg.weight_decay) #batch_per_epoch = imdb.batch_per_epoch train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t = Timer() step_cnt = 0 size_index = cfg.size_index for i_batch, sample_batched in enumerate(dataloader): t.tic() # batch #batch = imdb.next_batch(size_index) im = sample_batched['images'].numpy() im = imcv2_recolor(im) im = np.resize(im, (1, cfg.multi_scale_inp_size[size_index][0], cfg.multi_scale_inp_size[size_index][1], 3)) gt_boxes = sample_batched['gt_boxes'] gt_classes = sample_batched['gt_classes'] dontcare = sample_batched['dontcare'] #orgin_im = sample_batched['origin_im'] # forward im_data = net_utils.np_to_variable(im, use_cuda=cfg.use_cuda, volatile=False).permute(0, 3, 1, 2) model(im_data, gt_boxes, gt_classes, dontcare, size_index)