def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape processed_ims = [] im_scale_factors = [] size = cfg.TEST.SIZE im_scale_w = float(size) / float(im_shape[1]) im_scale_h = float(size) / float(im_shape[0]) # Prevent the biggest axis from being more than MAX_SIZE im = cv2.resize(im_orig, (size,size), interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale_w) im_scale_factors.append(im_scale_h) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # num_images = 1 processed_ims = [] im_scales = [] for i in range(num_images): im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # 对图像进行水平翻转 target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # im_scale = (target_size) / float(im_size_min),表示原始图像的短边到训练尺寸600的变换倍数 im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # 返回blob形式[1,w,h,c],im_scales表示图像resize的倍数 return blob, im_scales
def get_evaluate_batch(self, im_path, index): # Sample random scales to use for each image in this batch # Get the input image blob, formatted for caffe # im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) im = imread(im_path) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] target_size = cfg.TRAIN.SCALES[0] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_blob = im_list_to_blob([im]) blobs = {'data': im_blob} # gt boxes: (x1, y1, x2, y2, cls) gt_boxes = np.empty((0, 5), dtype=np.float32) blobs['gt_boxes'] = gt_boxes blobs['im_info'] = np.array([[im.shape[0], im.shape[1], im_scale]], dtype=np.float32) blobs['img_id'] = index return blobs
def _get_image_blob(im): """将一幅图像转化为网络需要的输入 Arguments:输入一个通道顺序为BGR的图像 im (ndarray): a color image in BGR order Returns:返回一个图像金字塔列表 blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) #减去数据训练集的统计平均值,来消除公共的部分,以凸显个体之间的特征和差异 im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: # 限制最小边为600,最大边为1000,对于输入图像优先考虑最大边的限制 # 输入图像的大小是:375*500*3,则resize后的图像大小为:600*800*3 # 输入图像的大小是:375*800*3,则resize后的图像大小为:469*1000*3 im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round( im_scale * im_size_max) > cfg.TEST.MAX_SIZE: #np.round返回四舍五入值 im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) #resize参数:输入图像、输出图像、输出尺寸、w方向缩放因子、h方向…、插值方法 im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) #Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)