def convert_line_to_text_dataset(anno_filename, label_out_dir, show=True): file_processing.create_dir(label_out_dir) boxes_label_lists = file_processing.read_lines_image_labels(anno_filename) for image_id, box, label in boxes_label_lists: filename = image_id[:-len('jpg')] + "txt" content_list = [[c] + r for c, r in zip(label, box)] path = os.path.join(label_out_dir, filename) file_processing.write_data(path, content_list, mode='w')
def create_embedding(model_path, emb_face_dir): face_net = face_recognition.facenetEmbedding(model_path) image_list, names_list = file_processing.gen_files_labels(emb_face_dir, postfix='jpg') images = image_processing.get_images(image_list, 160, 160, whiten=True) compare_emb = face_net.get_embedding(images) h5file = h5py.File('face.h5', 'w') h5file['X_train'] = compare_emb h5file.close() file_processing.write_data('name.txt', image_list, model='w')
def convert_facebody_to_textdataset(image_list, annotations_dir, label_out_dir, class_names, coordinatesType, show=False): ''' label data format: SSD = [label_id,x,y,w,h] YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height] MMDET= [img_width,img_height,label_id,x,y,w,h] :param image_list: 图片列表 :param annotations_dir: 图片对应annotations所在目录 :param label_out_dir: label输出目录 :param class_names: :param coordinatesType: 坐标类型:SSD,YOLO,MMDET格式 :param show: 显示 :return: ''' if not os.path.exists(label_out_dir): os.makedirs(label_out_dir) name_id_list = [] nums = len(image_list) for i, image_path in enumerate(image_list): name_id = os.path.basename(image_path)[:-len(".jpg")] ann_name = name_id + '.json' annotations_file = os.path.join(annotations_dir, ann_name) if not os.path.exists(image_path): print("no image_dict:{}".format(image_path)) continue if not os.path.exists(annotations_file): print("no annotations:{}".format(annotations_file)) continue out_file = os.path.join(label_out_dir, name_id + ".txt") # rects, class_name, class_id = pascal_voc.get_annotation(annotations_file, class_names, coordinatesType) image = image_processing.read_image(image_path) image_shape = image.shape rects, class_name, class_id = face_body.get_annotation( annotations_file, class_names, image_shape, coordinatesType) if len(rects) == 0 or len(class_name) == 0 or len(class_id) == 0: print("no class in annotations:{}".format(annotations_file)) continue content_list = [[c] + r for c, r in zip(class_id, rects)] name_id_list.append(name_id) file_processing.write_data(out_file, content_list, mode='w') if show: image = image_processing.read_image(image_path) image_processing.show_image_rects_text("image_dict", image, rects, class_name) if i % 10 == 0 or i == len(image_list) - 1: print("processing image_dict:{}/{}".format(i, len(image_list) - 1)) return name_id_list
def save_label_set(filename, label_name_set): ''' 保存label数据 :param filename: :param label_name_set: :return: ''' # 将字符集char_set转为整形的标签集合 # label_set=list(range(0,len(label_name_set))) content_list = [] for label_index, name in enumerate(label_name_set): # content=name +" "+str(label_index) content = name content_list.append(content) file_processing.write_data(filename, content_list, model='w')
def create_embedding(model_path, emb_face_dir, out_emb_path, out_filename): ''' 产生embedding数据库,这些embedding其实就是人脸特征 ''' print('#3rd--FaceNet人脸库') face_net = face_recognition.facenetEmbedding(model_path) image_list,names_list=file_processing.gen_files_labels(emb_face_dir,postfix='jpg') images= image_processing.get_images(image_list,resize_height,resize_width,whiten=True) compare_emb = face_net.get_embedding(images) np.save(out_emb_path, compare_emb) file_processing.write_data(out_filename, names_list, model='w')
def convert_annotation_list(annotations_list, image_dir, label_out_dir, class_names, image_type='.jpg', show=True): ''' :param annotations_list:annotations列表 :param image_dir:图片所在路径 :param label_out_dir:输出label目录 :param class_names: :param image_type:图片的类型,如.jpg ,.png :param show: :return: ''' if not os.path.exists(label_out_dir): os.makedirs(label_out_dir) name_id_list = [] nums = len(annotations_list) for i, annotations_file in enumerate(annotations_list): name_id = os.path.basename(annotations_file)[:-len(".xml")] image_name = name_id + image_type image_path = os.path.join(image_dir, image_name) if not os.path.exists(image_path): print("no image:{}".format(image_path)) continue if not os.path.exists(annotations_file): print("no annotations:{}".format(annotations_file)) continue out_file = os.path.join(label_out_dir, name_id + ".txt") rects, class_name, class_id = pascal_voc.get_annotation( annotations_file, class_names) content_list = [[c] + r for c, r in zip(class_id, rects)] name_id_list.append(name_id) file_processing.write_data(out_file, content_list, mode='w') if show: image = image_processing.read_image(image_path) image_processing.show_image_rects_text("image", image, rects, class_name) if i % 100 == 0 or i == nums - 1: print("processing {}/{}".format(i + 1, nums)) return name_id_list
def create_face_embedding(model_path, dataset_path, out_emb_path, out_filename): ''' :param model_path: faceNet模型路径 :param dataset_path: 人脸数据库路径,每一类单独一个文件夹 :param out_emb_path: 输出embeddings的路径 :param out_filename: 输出与embeddings一一对应的标签 :return: None ''' files_list, names_list = file_processing.gen_files_labels(dataset_path, postfix='jpg') embeddings, label_list = get_face_embedding(model_path, files_list, names_list) print("label_list:{}".format(label_list)) print("have {} label".format(len(label_list))) embeddings = np.asarray(embeddings) np.save(out_emb_path, embeddings) file_processing.write_data(out_filename, label_list, model='w')
def create_dataset(out_dir, nums, filename, char_set, captcha_height, captcha_width, captcha_size): ''' 产生样本 :param out_dir: 数据集图片保存目录 :param nums: 产生数据样本个数 :param filename: 保存数据txt文件 :param char_set: 字符数据集 :param captcha_height: 验证码height :param captcha_width: 验证码width :param captcha_size: 验证码大小 :return:None ''' if not os.path.exists(out_dir): os.mkdir(out_dir) # 产生一个验证码样本并显示 i = 0 while i < nums: text, image = gen_captcha_text_and_image(char_set=char_set, captcha_height=captcha_height, captcha_width=captcha_width, captcha_size=captcha_size) # 产生的验证码图并不一定等于image_height*image_width if image.shape != (image_height, image_width, 3): continue if i == 0: image_processing.cv_show_image(text, image) # 显示验证码 image_name = str(i) + "_" + text + ".jpg" image_path = out_dir + "/" + image_name print(image_path) image_processing.save_image(image_path, image, toUINT8=False) text = [c for c in text] label_list = file_processing.label_encode(text, char_set) content = [image_name] + label_list content = ' '.join('%s' % id for id in content) file_processing.write_data(filename, [content], model='a') i += 1
def create_face_embedding_for_bzl(model_path, dataset_path, out_emb_path, out_filename): ''' :param model_path: faceNet模型路径 :param dataset_path: 人脸数据库路径,图片命名方式:张三_XXX_XXX.jpg,其中“张三”即为label :param out_emb_path: 输出embeddings的路径 :param out_filename: 输出与embeddings一一对应的标签 :return: None ''' image_list = file_processing.get_images_list(dataset_path, postfix=['*.jpg', '*.png']) names_list = [] for image_path in image_list: basename = os.path.basename(image_path) names = basename.split('_')[0] names_list.append(names) embeddings, label_list = get_face_embedding(model_path, image_list, names_list) print("label_list:{}".format(label_list)) print("have {} label".format(len(label_list))) embeddings = np.asarray(embeddings) np.save(out_emb_path, embeddings) file_processing.write_data(out_filename, label_list, mode='w')
def create_embedding(model_path, emb_face_dir, out_emb_path, out_filename): ''' 产生embedding数据库,保存在out_data_path中,这些embedding其实就是人脸的特征 :param model_path: :param emb_face_dir: :param out_emb_path: :param out_filename: :return: ''' face_net = face_recognition.facenetEmbedding(model_path) # image_list=file_processing.get_files_list(emb_face_dir,postfix='jpg') image_list, names_list = file_processing.gen_files_labels(emb_face_dir, postfix='jpg') images = image_processing.get_images(image_list, resize_height, resize_width, whiten=True) compare_emb = face_net.get_embedding(images) np.save(out_emb_path, compare_emb) # 可以选择保存image_list或者names_list作为人脸的标签 # 测试时建议保存image_list,这样方便知道被检测人脸与哪一张图片相似 file_processing.write_data(out_filename, image_list, model='w')
def save_pair_data(filename, content_list): file_processing.write_data(filename, content_list, mode='w')
else: if select_nums > num_pair_issame_1: raise Exception( "pair_nums({}) must be less than num_pair_issame_1({})".format( select_nums, num_pair_issame_1)) np.random.seed(100) index_0 = np.random.permutation(num_pair_issame_0)[:select_nums] # 打乱后的行号 index_1 = np.random.permutation(num_pair_issame_1)[:select_nums] # 打乱后的行号 pair_issame_0 = pair_issame_0[index_0, :] # 获取打乱后的训练数据 pair_issame_1 = pair_issame_1[index_1, :] # 获取打乱后的训练数据 pair_issame = np.concatenate([pair_issame_0, pair_issame_1], axis=0) print("pair_issame_0 nums:{}".format(len(pair_issame_0))) print("pair_issame_1 nums:{}".format(len(pair_issame_1))) # image_list1 = pair_issame[:, 0] # image_list2 = pair_issame[:, 1] # issame_list = pair_issame[:, 2] print("have {} pairs".format(len(pair_issame))) return pair_issame if __name__ == "__main__": # NVR VAL faceDataset dataset = '/media/dm/dm2/XMC/FaceDataset/X4/X4_Face20_Crop/' # dataset = '/media/dm/dm1/FaceDataset/X4/DMAI_Alig/' # lexue image_dir = dataset + "trainval" pair_filename = dataset + "x4_pair_data.txt" pair_issame = create_pair_data(image_dir, pair_num=0) file_processing.write_data(pair_filename, pair_issame, mode='w')
def convert_voc_to_textdataset_for_annotation(annotations_list, image_dir, label_out_dir, class_names, coordinatesType, image_type='.jpg', labelType="class_id", show=True): ''' coordinatesType: SSD = [label_id,x,y,w,h] xywh = [label_id,x,y,w,h] xyxy = [label_id,xmin,ymin,xmax,ymax] xxyy = [label_id,xmin,xmax,ymin,ymax] YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height] MMDET= [img_width,img_height,label_id,x,y,w,h] :param annotations_list:annotations列表 :param image_dir:图片所在路径 :param label_out_dir:输出label目录 :param class_names: :param image_type:图片的类型,如.jpg ,.png :param labelType:class_name,class_id :param show: :return: ''' if not os.path.exists(label_out_dir): os.makedirs(label_out_dir) name_id_list = [] nums = len(annotations_list) for i, annotations_file in enumerate(annotations_list): name_id = os.path.basename(annotations_file)[:-len(".xml")] image_name = name_id + image_type image_path = os.path.join(image_dir, image_name) if not os.path.exists(image_path): print("no image_dict:{}".format(image_path)) continue if not os.path.exists(annotations_file): print("no annotations:{}".format(annotations_file)) continue out_file = os.path.join(label_out_dir, name_id + ".txt") rects, class_name, class_id = pascal_voc.get_annotation( annotations_file, class_names, coordinatesType=coordinatesType) if not rects: continue if labelType == "class_name": label = class_name elif labelType == "class_id": label = class_id content_list = [[c] + r for c, r in zip(label, rects)] name_id_list.append(name_id) file_processing.write_data(out_file, content_list, mode='w') if show: image = image_processing.read_image(image_path) # rect_image = image_processing.get_rects_image(image_dict,rects) # save_root=DATASET_ROOT+"/trainval_faces" # image_processing.save_image_lable_dir(save_root, rect_image, class_name,i) image_processing.show_image_rects_text("image_dict", image, rects, class_name) if i % 10 == 0 or i == len(annotations_list) - 1: print("processing image_dict:{}/{}".format( i, len(annotations_list) - 1)) return name_id_list