def preprocess(img, bbox_labels, mode, settings): img_width, img_height = img.size sampled_labels = bbox_labels if mode == 'train': if settings._apply_distort: img = image_util.distort_image(img, settings) if settings._apply_expand: img, bbox_labels, img_width, img_height = image_util.expand_image( img, bbox_labels, img_width, img_height, settings) # sampling batch_sampler = [] # hard-code here batch_sampler.append( image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0)) sampled_bbox = image_util.generate_batch_samples( batch_sampler, bbox_labels) img = np.array(img) if len(sampled_bbox) > 0: idx = int(np.random.uniform(0, len(sampled_bbox))) img, sampled_labels = image_util.crop_image( img, bbox_labels, sampled_bbox[idx], img_width, img_height) img = Image.fromarray(img) img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) img = np.array(img) if mode == 'train': mirror = int(np.random.uniform(0, 2)) if mirror == 1: img = img[:, ::-1, :] for i in six.moves.xrange(len(sampled_labels)): tmp = sampled_labels[i][1] sampled_labels[i][1] = 1 - sampled_labels[i][3] sampled_labels[i][3] = 1 - tmp # HWC to CHW if len(img.shape) == 3: img = np.swapaxes(img, 1, 2) img = np.swapaxes(img, 1, 0) # RBG to BGR img = img[[2, 1, 0], :, :] img = img.astype('float32') img -= settings.img_mean img = img * 0.007843 return img, sampled_labels
def reader(): with open(file_list) as flist: lines = [line.strip() for line in flist] if shuffle: # 打乱数据 random.shuffle(lines) for line in lines: if mode == 'train' or mode == 'test': # 获取图像的路径和对应标注的文件路径 img_path, label_path = line.split() img_path = os.path.join(settings.data_dir, img_path) label_path = os.path.join(settings.data_dir, label_path) elif mode == 'infer': # 获取图像的路径 img_path = os.path.join(settings.data_dir, line) print img_path img = Image.open(img_path) # 获取图像的原始大小 img_width, img_height = img.size img = np.array(img) # 保存列表的结构: label | xmin | ymin | xmax | ymax | difficult if mode == 'train' or mode == 'test': # 保存每个标注框 bbox_labels = [] # 开始读取标注信息 root = xml.etree.ElementTree.parse(label_path).getroot() # 查询每个标注的信息 for object in root.findall('object'): # 每个标注框的信息 bbox_sample = [] # start from 1 bbox_sample.append( float( settings.label_list.index( object.find('name').text))) bbox = object.find('bndbox') difficult = float(object.find('difficult').text) # 获取标注信息,并计算比例保存 bbox_sample.append( float(bbox.find('xmin').text) / img_width) bbox_sample.append( float(bbox.find('ymin').text) / img_height) bbox_sample.append( float(bbox.find('xmax').text) / img_width) bbox_sample.append( float(bbox.find('ymax').text) / img_height) bbox_sample.append(difficult) # 将整个框的信息保存 bbox_labels.append(bbox_sample) sample_labels = bbox_labels if mode == 'train': batch_sampler = [] # 样本定义 batch_sampler.append( image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0)) # 将标注信息生成一个batch sampled_bbox = image_util.generate_batch_samples(batch_sampler, bbox_labels) if len(sampled_bbox) > 0: idx = int(random.uniform(0, len(sampled_bbox))) img, sample_labels = image_util.crop_image( img, bbox_labels, sampled_bbox[idx], img_width, img_height) img = Image.fromarray(img) # 设置图像大小 img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) img = np.array(img) if mode == 'train': mirror = int(random.uniform(0, 2)) if mirror == 1: img = img[:, ::-1, :] for i in xrange(len(sample_labels)): tmp = sample_labels[i][1] sample_labels[i][1] = 1 - sample_labels[i][3] sample_labels[i][3] = 1 - tmp if len(img.shape) == 3: img = np.swapaxes(img, 1, 2) img = np.swapaxes(img, 1, 0) img = img.astype('float32') img -= settings.img_mean img = img.flatten() if mode == 'train' or mode == 'test': if mode == 'train' and len(sample_labels) == 0: continue yield img.astype('float32'), sample_labels elif mode == 'infer': yield img.astype('float32')
def preprocess(img, bbox_labels, mode, settings, image_path): img_width, img_height = img.size sampled_labels = bbox_labels if mode == 'train': if settings.apply_distort: img = image_util.distort_image(img, settings) if settings.apply_expand: img, bbox_labels, img_width, img_height = image_util.expand_image( img, bbox_labels, img_width, img_height, settings) # sampling batch_sampler = [] # used for continuous evaluation if 'ce_mode' in os.environ: random.seed(0) np.random.seed(0) prob = np.random.uniform(0., 1.) if prob > settings.data_anchor_sampling_prob: scale_array = np.array([16, 32, 64, 128, 256, 512]) batch_sampler.append( image_util.sampler(1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0, True)) sampled_bbox = image_util.generate_batch_random_samples( batch_sampler, bbox_labels, img_width, img_height, scale_array, settings.resize_width, settings.resize_height) img = np.array(img) if len(sampled_bbox) > 0: idx = int(np.random.uniform(0, len(sampled_bbox))) img, sampled_labels = image_util.crop_image_sampling( img, bbox_labels, sampled_bbox[idx], img_width, img_height, settings.resize_width, settings.resize_height, settings.min_face_size) img = img.astype('uint8') img = Image.fromarray(img) else: # hard-code here batch_sampler.append( image_util.sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, True)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, True)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, True)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, True)) batch_sampler.append( image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, True)) sampled_bbox = image_util.generate_batch_samples( batch_sampler, bbox_labels, img_width, img_height) img = np.array(img) if len(sampled_bbox) > 0: idx = int(np.random.uniform(0, len(sampled_bbox))) img, sampled_labels = image_util.crop_image( img, bbox_labels, sampled_bbox[idx], img_width, img_height, settings.resize_width, settings.resize_height, settings.min_face_size) img = Image.fromarray(img) interp_mode = [ Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC, Image.LANCZOS ] interp_indx = np.random.randint(0, 5) img = img.resize((settings.resize_width, settings.resize_height), resample=interp_mode[interp_indx]) img = np.array(img) if mode == 'train': mirror = int(np.random.uniform(0, 2)) if mirror == 1: img = img[:, ::-1, :] for i in six.moves.xrange(len(sampled_labels)): tmp = sampled_labels[i][1] sampled_labels[i][1] = 1 - sampled_labels[i][3] sampled_labels[i][3] = 1 - tmp img = to_chw_bgr(img) img = img.astype('float32') img -= settings.img_mean img = img * settings.scale return img, sampled_labels
def reader(): lines = open(file_list, "r").readlines() if shuffle: random.shuffle(lines) for line_id, line in enumerate(lines): if mode == "train" or mode == "test": img_path, label_path = line.split() img_path = os.path.join(settings.data_dir, "resized_images", img_path) label_path = os.path.join(settings.data_dir, "annotations", label_path) elif mode == "infer": img_path = os.path.join(settings.data_dir, "resized_images", line.strip().split()[0]) img = Image.open(img_path) img_width, img_height = img.size img = np.array(img) if mode == "train" or mode == "test": labels = json.loads(open(label_path, "r").readline()) if len(labels) == 0: continue bbox_labels = [] for lbl in labels: bbox_labels.append([ settings.label_list[lbl["text"]], # label lbl["posX"] / ORIGINAL_WIDTH, # xmin lbl["posY"] / ORIGINAL_HEIGH, # ymin (lbl["posX"] + lbl["width"]) / ORIGINAL_WIDTH, # xmax (lbl["posY"] + lbl["height"]) / ORIGINAL_HEIGH, # ymax 1. # difficult ]) sample_labels = bbox_labels if mode == "train": sampled_bbox = image_util.generate_batch_samples( get_batch_sampler(), bbox_labels, img_width, img_height) if len(sampled_bbox) > 0: idx = int(random.uniform(0, len(sampled_bbox))) img, sample_labels = image_util.crop_image( img, bbox_labels, sampled_bbox[idx], img_width, img_height) img = Image.fromarray(img) img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) img = np.array(img) if mode == "train": mirror = int(random.uniform(0, 2)) if mirror == 1: img = img[:, ::-1, :] for i in xrange(len(sample_labels)): tmp = sample_labels[i][1] sample_labels[i][1] = 1 - sample_labels[i][3] sample_labels[i][3] = 1 - tmp img = np.transpose(img, [2, 0, 1]) # channel first img = img.astype("float32") img -= settings.img_mean img = img.flatten() if mode == "train" or mode == "test": if mode == "train" and len(sample_labels) == 0: continue yield img, sample_labels elif mode == "infer": yield img.astype("float32")