def split_dataset(dataset, min_nrof_images_per_class, nrof_test_images_per_class): """ split_dataset - function to split the dataset into a train set and a test set args dataset - dataset to be split min_nrof_images_per_class - minimum num of images required for a class to be used nrof_train_images_per_class - num of images used for training within a class returns train_set - dataset for training test_set - dataset for testing num_classes - number of classes """ train_set = [] test_set = [] for cls in dataset: paths = cls.image_paths # Remove classes with less than min_nrof_images_per_class if len(paths) >= min_nrof_images_per_class: np.random.shuffle(paths) test_set.append( facenet.ImageClass(cls.name, paths[:nrof_test_images_per_class])) train_set.append( facenet.ImageClass(cls.name, paths[nrof_test_images_per_class:])) num_classes = len(test_set) print('Classes (Number of Faces): %d' % num_classes) return train_set, test_set, num_classes
def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class): train_set = [] test_set = [] print("Inside split_dataset function call!") print(min_nrof_images_per_class) print(nrof_train_images_per_class) for cls in dataset: paths = cls.image_paths # Remove classes with less than min_nrof_images_per_class if len(paths) >= min_nrof_images_per_class: np.random.shuffle(paths) ### Redefining the ImageClass object again from before ### ### Get the first nrof_train_images_per_class for training and the final rest for testing ### train_set.append( facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class])) test_set.append( facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:])) print("Len of training set: ", len(train_set)) print("Len of testing set: ", len(test_set)) return train_set, test_set
def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class): train_set = [] test_set = [] for cls in dataset: paths = cls.image_paths # Remove classes with less than min_nrof_images_per_class if len(paths)>=min_nrof_images_per_class: np.random.shuffle(paths) train_set.append(facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class])) test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:])) return train_set, test_set
def Remove_duplicate_names(args, dataset): image_del = 0 name_deleted = [] stak_host = [] dataset_n = [] features_filename = os.path.expanduser(args.features_filename) print("load_features from file :'%s'" % args.features_filename) with open(features_filename, 'rb') as infile: (emb_old, labels_old, class_old) = pickle.load(infile) if len(class_old) > 0: for cls in dataset: name = cls.name.replace('_', ' ') #get the class name if name in class_old: #find name_deleted.append(name) image_del += len(cls.image_paths) else: dataset_n.append(facenet.ImageClass(cls.name, cls.image_paths)) if image_del > 0: #Remove duplicate names in the deletion list ResultList = sorted(set(name_deleted), key=lambda x: name_deleted.index(x)) print( "About (%s) files of data were deleted for about '%s' people whose names already existd" % image_del, len(ResultList)) else: print("No one has been deleted(%s)" % len(name_deleted)) stak_host = emb_old, labels_old, class_old return stak_host, dataset
def get_unsupervised_dataset(path): domain_unsupervised_dataset = {} path_exp = os.path.expanduser(path) domains = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] domains.sort() for domain_name in domains: if domain_name != "id+camera": facedir = os.path.join(path_exp, domain_name) image_paths = facenet.get_image_paths(facedir) for i in range(len(image_paths) - 1, -1, -1): # for i in range(0, num_list.__len__())[::-1] extname = os.path.splitext(os.path.split(image_paths[i])[1])[1] if extname not in ['.jpg', '.png']: image_paths.pop(i) path_dir_exp = os.path.join(path_exp, domain_name) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths += facenet.get_image_paths(facedir) domain_unsupervised_dataset[domain_name] = facenet.ImageClass( domain_name, image_paths) return domain_unsupervised_dataset
def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) # 把path中包含的"~"和"~user"转换成用户目录 facedir = os.path.join(path_exp, "nil") image_paths = facenet.get_image_paths(facedir) dataset.append(facenet.ImageClass("nil", image_paths)) return dataset
def get_supervised_dataset_single(path, nrof_data_augmentation): path_exp = os.path.expanduser(path) dataset = [] path_dir_exp = os.path.join(path_exp) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir, nrof_data_augmentation) dataset.append(facenet.ImageClass(class_name, image_paths)) # logger.debug(dataset) return dataset
def get_dataset(path, has_class_directories=True): datadict = {} path_exp = os.path.expanduser(path) dirs = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] dirs.sort() for k in range(len(dirs)): dataset = [] path_dir_exp = os.path.join(path_exp, dirs[k]) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir) dataset.append(facenet.ImageClass(class_name, image_paths)) datadict[dirs[k]] = dataset return datadict
def get_dataset(paths): dataset = [] for path in paths.split(':'): path_exp = os.path.expanduser(path) classes = os.listdir(path_exp) print('Number of classes:', len(classes)) #classes.sort() nrof_classes = len(classes) for pclass in classes: classDir = os.path.join(path_exp, pclass) if os.path.isdir(classDir): videos = os.listdir(classDir) for vid in videos: videoDir = os.path.join(classDir, vid) if os.path.isdir(videoDir): images = os.listdir(videoDir)[:100] image_paths = [ os.path.join(videoDir, img) for img in images ] dataset.append( facenet.ImageClass(os.path.join(pclass, vid), image_paths)) return dataset
def get_supervised_dataset_multiple(path, nrof_data_augmentation): domain_supervised_dataset = {} path_exp = os.path.expanduser(path) domains = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] domains.sort() # # merge identical person under "id" and "camera" # def insert_image_paths(class_name, image_paths): # for key, value in domain_supervised_dataset.items(): # for cls in value: # if class_name == cls.name: # cls.image_paths += image_paths # return True # return False for domain_name in domains: dataset = [] path_dir_exp = os.path.join(path_exp, domain_name) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) # logger.debug('classes: %s' % (classes)) # logger.debug('domain_name: %s, nrof_classes: %d' % (domain_name, nrof_classes)) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir, nrof_data_augmentation) # if insert_image_paths(class_name, image_paths) is False: dataset.append(facenet.ImageClass(class_name, image_paths)) if len(dataset) > 0: domain_supervised_dataset[domain_name] = dataset return domain_supervised_dataset
def train(): # ask for the folder names all the time no function parameters to be passes at any given time. # Todo : split the datsset if the user says so and then ask for the test also if yes then call the test function according to the split set results. If split set no then on the whole dataset. path = input("\nEnter the path to the face images directory inside which multiple user folders are present or press ENTER if the default created output folder is present in this code directory only: ") if path == "": path = 'output' gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip() ''' if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) ''' model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip() if model == "": model = "20180402-114759/20180402-114759.pb" else: model += "/20180402-114759/20180402-114759.pb" batch_size = 90 ask = input("\nEnter the batch size of images to process at once OR press ENTER for default 90: ").rstrip().lstrip() if ask != "": batch_size = int(ask) image_size = 160 ask = input("\nEnter the width_size of face images OR press ENTER for default 160: ").rstrip().lstrip() if ask != "": image_size = int(ask) classifier_filename = input("Enter the output SVM classifier filename OR press ENTER for default name= classifier: ") if classifier_filename == "": classifier_filename = 'classifier.pkl' else: classifier_filename += '.pkl' classifier_filename = os.path.expanduser(classifier_filename) split_dataset = input("\nPress Y if you want to split the dataset for Training and Testing: ").rstrip().lstrip().lower() # If yes ask for the percentage of training and testing division. percentage = 70 if split_dataset == 'y': ask = input("\nEnter the percentage of training dataset for splitting OR press ENTER for default 70: ").rstrip().lstrip() if ask != "": percentage = float(ask) min_nrof_images_per_class = 0 ask = input("\nEnter the minimum number of images that much be present for a single user to include him for classification. Press ENTER for default value 0: ") if ask != "": min_nrof_images_per_class = int(ask) dataset = facenet.get_dataset(path) train_set = [] test_set = [] if split_dataset == 'y': for cls in dataset: paths = cls.image_paths # Remove classes with less than min_nrof_images_per_class if len(paths) >= min_nrof_images_per_class: np.random.shuffle(paths) # Find the number of images in training set and testing set images for this class no_train_images = int(percentage * len(paths) * 0.01) train_set.append(facenet.ImageClass(cls.name, paths[:no_train_images])) test_set.append(facenet.ImageClass(cls.name, paths[no_train_images:])) ''' Check that there are at least one training image per class for cls in train_set: assert(len(cls.image_paths)>0, '\nUnable to have at least one image in train set for one of the class. Change parameter values.') for cls in test_set: assert(len(cls.image_paths)>0, '\nUnable to have at least one image in test set for one of the class. Change parameter values.') else: # Check that there are at least one training image per class for cls in dataset: assert(len(cls.image_paths)>0, '\nThere must be at least one image for each class in the dataset') ''' paths_train = [] labels_train = [] paths_test = [] labels_test = [] emb_array = [] class_names = [] if split_dataset == 'y': paths_train, labels_train = facenet.get_image_paths_and_labels(train_set) paths_test, labels_test = facenet.get_image_paths_and_labels(test_set) print('\nNumber of classes: %d' % len(train_set)) print('\nNumber of images in TRAIN set: %d' % len(paths_train)) print('\nNumber of images in TEST set: %d' % len(paths_test)) else: paths_train, labels_train = facenet.get_image_paths_and_labels(dataset) print('\nNumber of classes: %d' % len(dataset)) print('\nNumber of images: %d' % len(paths_train)) # Find embedding emb_array = get_embeddings(model, paths_train, batch_size, image_size) # Train the classifier print('\nTraining classifier') model_svc = SVC(kernel='linear', probability=True) model_svc.fit(emb_array, labels_train) # Create a list of class names if split_dataset == 'y': class_names = [ cls.name.replace('_', ' ') for cls in train_set] else: class_names = [cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename, 'wb') as outfile: pickle.dump((model_svc, class_names), outfile) print('\nSaved classifier model to file: "%s"' % classifier_filename) if split_dataset == 'y': # Find embedding for test data emb_array = get_embeddings(model, paths_test, batch_size, image_size) # Call test on the test set. test(classifier_filename, emb_array, labels_test, model, batch_size, image_size) else: # Ask the user to test or not on the whole dataset ask = input("Press y if you want to run the TEST on whole dataset or press ENTER to exit: ").rstrip().lstrip().lower() if ask == 'y': test() else: sys.exit()
def main(args): dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor nrof_images_total = 0 nrof_successfully_aligned = 0 filtered_dct = {} if args.random_order: random.shuffle(dataset) for cls in dataset: filtered_dct[cls.name] = [] for image_path in cls.image_paths: nrof_images_total += 1 try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) ## Count this as alignment error continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] ## Accept only 1 face! if nrof_faces == 1: ## Alignment success, .. now to classify the images. nrof_successfully_aligned += 1 filtered_dct[cls.name].append(image_path) else: print('Unable to align "%s"' % image_path) ## Count this as alignment error continue print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) ## By this point our dataset is filtered to only contain images with detected faces. filtered_dataset = [ facenet.ImageClass(class_name, image_paths) for class_name, image_paths in filtered_dct.items() ] accuracy_aligned = 1. * nrof_successfully_aligned / nrof_images_total accuracy_full = test_identity(args, dataset) accuracy_filtered = test_identity(args, filtered_dataset) print() ## Alignment accuracy print('Aligned accuracy: %.3f' % accuracy_aligned) ## Accuracy for full dataset print('Identity accuracy for full test set: %.3f' % accuracy_full) ## Accuract for filtered dataset print('Identity accuracy for successful aligned images: %.3f' % accuracy_filtered)