示例#1
0
def check_dataset(dataset):
    for cls in dataset:
        assert (
            len(cls.image_paths) > 0,
            'There must be at least one image for each class in the dataset')
    paths, labels = facenet.get_image_paths_and_labels(dataset)
    print(paths)
    print(labels)
示例#2
0
    def process_data(self):
        dataset = facenet.get_dataset(self.data_dir)
        train_set, val_set = facenet.split_dataset(dataset, self.valid_ratio,
                                                   self.min_images_per_class,
                                                   self.mode)
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        val_image_list, val_label_list = facenet.get_image_paths_and_labels(
            val_set)

        image_list = self.preprocess_image(image_list)
        val_image_list = self.preprocess_image(val_image_list)

        train_data = zip(image_list, label_list)
        val_data = zip(val_image_list, val_label_list)
        train_rdd = self.sc.parallelize(train_data)
        val_rdd = self.sc.parallelize(val_data)
        train_df = train_rdd.toDF(DATAINDEX)
        val_df = val_rdd.toDF(DATAINDEX)
        return train_df, val_df
    def train(self, dataset_folder, model_name):
        dataset = facenet.get_dataset(dataset_folder)

        # Check that there are at least one training image per class
        # for cls in dataset:
        # assert(len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset')

        paths, labels = facenet.get_image_paths_and_labels(dataset)

        print('Number of classes: %d' % len(dataset))
        print('Number of images: %d' % len(paths))

        print("Calculating embeddings for new data")
        data_emb = self.calculate_embeddings(paths, False)

        class_names = [cls.name.replace('_', ' ') for cls in dataset]

        # Saving classifier model
        with open(model_name, 'wb') as outfile:
            pickle.dump((data_emb, class_names, labels), outfile)
        print('Saved classifier model to file "%s"' % model_name)
示例#4
0
    def fit(self):
        self.model_status = 'TRAIN'
        self.data_dir = My_align_dataset_mtcnn(0, 160, 32, True,
                                               0.25).output_dir

        with tf.Graph().as_default():
            with tf.Session() as sess:
                np.random.seed(seed=self.seed)
                if self.use_split_dataset:
                    dataset_tmp = facenet.get_dataset(self.data_dir)
                    train_set, test_set = self.split_dataset(
                        dataset_tmp, self.min_nrof_images_per_class,
                        self.nrof_train_images_per_class)
                    if (self.model_status == 'TRAIN'):
                        dataset = train_set
                    elif (self.model_status == 'CLASSIFY'):
                        dataset = test_set
                else:
                    dataset = facenet.get_dataset(self.data_dir)

                # Check that there are at least one training image per class
                for cls in dataset:
                    assert (
                        len(cls.image_paths) > 0,
                        'There must be at least one image for each class in the dataset'
                    )

                self.paths, self.labels = facenet.get_image_paths_and_labels(
                    dataset)
                print("paths", self.paths)
                print("labels", self.labels)
                print('Number of classes: %d' % len(dataset))
                print('Number of images: %d' % len(self.paths))

                # Load the model
                print('Loading feature extraction model', self.model)
                load_model_YesOrNo = facenet.load_model(self.model)

                # Get input and output tensors
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")
                embedding_size = embeddings.get_shape()[1]

                # Run forward pass to calculate embeddings
                print('Calculating features for images')
                nrof_images = len(self.paths)
                nrof_batches_per_epoch = int(
                    math.ceil(1.0 * nrof_images / self.batch_size))
                print(nrof_images, self.batch_size, nrof_batches_per_epoch,
                      embedding_size)
                self.emb_array = np.zeros((nrof_images, embedding_size))
                for i in range(nrof_batches_per_epoch):
                    start_index = i * self.batch_size
                    end_index = min((i + 1) * self.batch_size, nrof_images)
                    paths_batch = self.paths[start_index:end_index]
                    images = facenet.load_data(paths_batch, False, False,
                                               self.image_size)
                    feed_dict = {
                        images_placeholder: images,
                        phase_train_placeholder: False
                    }
                    self.emb_array[start_index:end_index, :] = sess.run(
                        embeddings, feed_dict=feed_dict)
                print("shape", self.emb_array.shape)

                self.classifier_filename_exp = os.path.expanduser(
                    self.classifier_filename)
                # Train classifier
                print('Training classifier')
                self.classify_model = SVC(kernel='linear', probability=True)
                self.classify_model.fit(self.emb_array, self.labels)

                # Create a list of class names
                self.class_names = [
                    cls.name.replace('_', ' ') for cls in dataset
                ]
示例#5
0
def main(args):

    network = importlib.import_module(args.model_def)
    image_size = (args.image_size, args.image_size)

    subdir = datetime.strftime(datetime.now(), "%Y%m%d-%H%M%S")
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    stat_file_name = os.path.join(log_dir, "stat.h5")

    # Write arguments to a text file
    fc.write_arguments_to_file(args, os.path.join(log_dir, "arguments.txt"))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    fc.store_revision_info(src_path, log_dir, " ".join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    dataset = fc.get_dataset(args.data_dir)
    if args.filter_filename:
        dataset = filter_dataset(
            dataset,
            os.path.expanduser(args.filter_filename),
            args.filter_percentile,
            args.filter_min_nrof_images_per_class,
        )

    if args.validation_set_split_ratio > 0.0:
        train_set, val_set = fc.split_dataset(
            dataset, args.validation_set_split_ratio,
            args.min_nrof_val_images_per_class, "SPLIT_IMAGES")
    else:
        train_set, val_set = dataset, []

    nrof_classes = len(train_set)

    print("Model directory: %s" % model_dir)
    print("Log directory: %s" % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print("Pre-trained model: %s" % pretrained_model)

    if args.lfw_dir:
        print("LFW directory: %s" % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = fc.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, "The training set should not be empty"

        val_image_list, val_label_list = fc.get_image_paths_and_labels(val_set)

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, "index_dequeue")

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name="learning_rate")
        batch_size_placeholder = tf.placeholder(tf.int32, name="batch_size")
        phase_train_placeholder = tf.placeholder(tf.bool, name="phase_train")
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name="image_paths")
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name="labels")
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name="control")

        nrof_preprocess_threads = 4
        input_queue = data_flow_ops.FIFOQueue(
            capacity=2000000,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None,
        )
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name="enqueue_op")
        image_batch, label_batch = fc.create_input_pipeline(
            input_queue, image_size, nrof_preprocess_threads,
            batch_size_placeholder)

        image_batch = tf.identity(image_batch, "image_batch")
        image_batch = tf.identity(image_batch, "input")
        label_batch = tf.identity(label_batch, "label_batch")

        print("Number of classes in training set: %d" % nrof_classes)
        print("Number of examples in training set: %d" % len(image_list))

        print("Number of classes in validation set: %d" % len(val_set))
        print("Number of examples in validation set: %d" % len(val_image_list))

        print("Building training graph")

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay,
        )
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=slim.initializers.xavier_initializer(),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope="Logits",
            reuse=False,
        )

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name="embeddings")

        # Norm for the prelogits
        eps = 1e-4
        prelogits_norm = tf.reduce_mean(
            tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p,
                    axis=1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_norm * args.prelogits_norm_loss_factor)

        # Add center loss
        prelogits_center_loss, _ = fc.center_loss(prelogits, label_batch,
                                                  args.center_loss_alfa,
                                                  nrof_classes)
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True,
        )
        tf.summary.scalar("learning_rate", learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name="cross_entropy_per_example")
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name="cross_entropy")
        tf.add_to_collection("losses", cross_entropy_mean)

        correct_prediction = tf.cast(
            tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)),
            tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name="total_loss")

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = fc.train(
            total_loss,
            global_step,
            args.optimizer,
            learning_rate,
            args.moving_average_decay,
            tf.global_variables(),
            args.log_histograms,
        )

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print("Restoring pretrained model: %s" % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print("Running training")
            nrof_steps = args.max_nrof_epochs * args.epoch_size
            nrof_val_samples = int(
                math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)
            )  # Validate every validate_every_n_epochs as well as in the last epoch
            stat = {
                "loss":
                np.zeros((nrof_steps, ), np.float32),
                "center_loss":
                np.zeros((nrof_steps, ), np.float32),
                "reg_loss":
                np.zeros((nrof_steps, ), np.float32),
                "xent_loss":
                np.zeros((nrof_steps, ), np.float32),
                "prelogits_norm":
                np.zeros((nrof_steps, ), np.float32),
                "accuracy":
                np.zeros((nrof_steps, ), np.float32),
                "val_loss":
                np.zeros((nrof_val_samples, ), np.float32),
                "val_xent_loss":
                np.zeros((nrof_val_samples, ), np.float32),
                "val_accuracy":
                np.zeros((nrof_val_samples, ), np.float32),
                "lfw_accuracy":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "lfw_valrate":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "learning_rate":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "time_train":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "time_validate":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "time_evaluate":
                np.zeros((args.max_nrof_epochs, ), np.float32),
                "prelogits_hist":
                np.zeros((args.max_nrof_epochs, 1000), np.float32),
            }
            for epoch in range(1, args.max_nrof_epochs + 1):
                step = sess.run(global_step, feed_dict=None)
                # Train for one epoch
                t = time.time()
                cont = train(
                    args,
                    sess,
                    epoch,
                    image_list,
                    label_list,
                    index_dequeue_op,
                    enqueue_op,
                    image_paths_placeholder,
                    labels_placeholder,
                    learning_rate_placeholder,
                    phase_train_placeholder,
                    batch_size_placeholder,
                    control_placeholder,
                    global_step,
                    total_loss,
                    train_op,
                    summary_op,
                    summary_writer,
                    regularization_losses,
                    args.learning_rate_schedule_file,
                    stat,
                    cross_entropy_mean,
                    accuracy,
                    learning_rate,
                    prelogits,
                    prelogits_center_loss,
                    args.random_rotate,
                    args.random_crop,
                    args.random_flip,
                    prelogits_norm,
                    args.prelogits_hist_max,
                    args.use_fixed_image_standardization,
                )
                stat["time_train"][epoch - 1] = time.time() - t

                if not cont:
                    break

                t = time.time()
                if len(val_image_list) > 0 and (
                    (epoch - 1) % args.validate_every_n_epochs
                        == args.validate_every_n_epochs - 1
                        or epoch == args.max_nrof_epochs):
                    validate(
                        args,
                        sess,
                        epoch,
                        val_image_list,
                        val_label_list,
                        enqueue_op,
                        image_paths_placeholder,
                        labels_placeholder,
                        control_placeholder,
                        phase_train_placeholder,
                        batch_size_placeholder,
                        stat,
                        total_loss,
                        regularization_losses,
                        cross_entropy_mean,
                        accuracy,
                        args.validate_every_n_epochs,
                        args.use_fixed_image_standardization,
                    )
                stat["time_validate"][epoch - 1] = time.time() - t

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, epoch)

                # Evaluate on LFW
                t = time.time()
                if args.lfw_dir:
                    evaluate(
                        sess,
                        enqueue_op,
                        image_paths_placeholder,
                        labels_placeholder,
                        phase_train_placeholder,
                        batch_size_placeholder,
                        control_placeholder,
                        embeddings,
                        label_batch,
                        lfw_paths,
                        actual_issame,
                        args.lfw_batch_size,
                        args.lfw_nrof_folds,
                        log_dir,
                        step,
                        summary_writer,
                        stat,
                        epoch,
                        args.lfw_distance_metric,
                        args.lfw_subtract_mean,
                        args.lfw_use_flipped_images,
                        args.use_fixed_image_standardization,
                    )
                stat["time_evaluate"][epoch - 1] = time.time() - t

                print("Saving statistics")
                with h5py.File(stat_file_name, "w") as f:
                    for key, value in stat.iteritems():
                        f.create_dataset(key, data=value)

    return model_dir
示例#6
0
def main(args):

    with tf.Graph().as_default():

        with tf.Session() as sess:

            np.random.seed(seed=args.seed)

            if args.use_split_dataset:
                dataset_tmp = facenet.get_dataset(args.data_dir)
                train_set, test_set = split_dataset(
                    dataset_tmp, args.min_nrof_images_per_class,
                    args.nrof_train_images_per_class)
                if (args.mode == 'TRAIN'):
                    dataset = train_set
                elif (args.mode == 'CLASSIFY'):
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(args.data_dir)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert (
                    len(cls.image_paths) > 0,
                    'There must be at least one image for each class in the dataset'
                )

            paths, labels = facenet.get_image_paths_and_labels(dataset)

            print('Number of classes: %d' % len(dataset))
            print('Number of images: %d' % len(paths))

            # Load the model
            print('Loading feature extraction model')
            facenet.load_model(args.model)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(
                math.ceil(1.0 * nrof_images / args.batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * args.batch_size
                end_index = min((i + 1) * args.batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False,
                                           args.image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            classifier_filename_exp = os.path.expanduser(
                args.classifier_filename)

            if (args.mode == 'TRAIN'):
                # Train classifier
                print('Training classifier')
                model = GaussianNB()
                #model = SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)

                # Create a list of class names
                class_names = [cls.name.replace('_', ' ') for cls in dataset]

                # Saving classifier model
                with open(classifier_filename_exp, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile)
                print('Saved classifier model to file "%s"' %
                      classifier_filename_exp)

            elif (args.mode == 'CLASSIFY'):
                # Classify images
                print('Testing classifier')
                with open(classifier_filename_exp, 'rb') as infile:
                    (model, class_names) = pickle.load(infile)

                print('Loaded classifier model from file "%s"' %
                      classifier_filename_exp)

                predictions = model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]

                for i in range(len(best_class_indices)):
                    print('%4d  %s: %.3f' %
                          (i, class_names[best_class_indices[i]],
                           best_class_probabilities[i]))

                accuracy = np.mean(np.equal(best_class_indices, labels))
                print('Accuracy: %.3f' % accuracy)
示例#7
0
def classify(use_split_dataset, mode, data_dir, min_nrof_images_per_class,
             nrof_train_images_per_class, model, classifier_filename,
             batch_size, image_size):

    seed = 666
    with tf.Graph().as_default():
        with tf.Session() as sess:
            np.random.seed(seed=seed)

            if use_split_dataset:
                dataset_tmp = facenet.get_dataset(data_dir)
                train_set, test_set = split_dataset(
                    dataset_tmp, min_nrof_images_per_class,
                    nrof_train_images_per_class)
                if (args.mode == 'TRAIN'):
                    dataset = train_set
                elif (args.mode == 'CLASSIFY'):
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(data_dir)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert (
                    len(cls.image_paths) > 0,
                    'There must be at least one image for each class in the dataset'
                )

            paths, labels = facenet.get_image_paths_and_labels(dataset)

            # print('Number of classes: %d' % len(dataset))
            # print('Number of images: %d' % len(paths))

            # Load the model
            # print('Loading feature extraction model')
            facenet.load_model(model)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(
                math.ceil(1.0 * nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * batch_size
                end_index = min((i + 1) * batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False,
                                           image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            classifier_filename_exp = os.path.expanduser(classifier_filename)

            res = []

            # print('Testing classifier')
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)

            # print('Loaded classifier model from file "%s"' % classifier_filename_exp)

            predictions = model.predict_proba(emb_array)
            best_class_indices = np.argmax(predictions, axis=1)
            best_class_probabilities = predictions[
                np.arange(len(best_class_indices)), best_class_indices]

            for i in range(len(best_class_indices)):
                print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]],
                                         best_class_probabilities[i]))
                res.append((i, class_names[best_class_indices[i]],
                            best_class_probabilities[i]))

            accuracy = np.mean(np.equal(best_class_indices, labels))
            print('Accuracy: %.3f' % accuracy)

            return res
示例#8
0
def train(use_split_dataset, mode, data_dir, min_nrof_images_per_class,
          nrof_train_images_per_class, model, classifier_filename, batch_size,
          image_size):

    seed = 666

    with tf.Graph().as_default():
        with tf.Session() as sess:
            np.random.seed(seed=seed)

            if use_split_dataset:
                dataset_tmp = facenet.get_dataset(data_dir)
                train_set, test_set = split_dataset(
                    dataset_tmp, min_nrof_images_per_class,
                    nrof_train_images_per_class)
                if (args.mode == 'TRAIN'):
                    dataset = train_set
                elif (args.mode == 'CLASSIFY'):
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(data_dir)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert (
                    len(cls.image_paths) > 0,
                    'There must be at least one image for each class in the dataset'
                )

            paths, labels = facenet.get_image_paths_and_labels(dataset)

            # print('Number of classes: %d' % len(dataset))
            # print('Number of images: %d' % len(paths))

            # Load the model
            # print('Loading feature extraction model')
            facenet.load_model(model)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(
                math.ceil(1.0 * nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * batch_size
                end_index = min((i + 1) * batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False,
                                           image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            classifier_filename_exp = os.path.expanduser(classifier_filename)

            print('Training classifier')
            model = SVC(kernel='linear', probability=True)
            model.fit(emb_array, labels)

            # Create a list of class names
            class_names = [cls.name.replace('_', ' ') for cls in dataset]

            # Saving classifier model
            with open(classifier_filename_exp, 'wb') as outfile:
                pickle.dump((model, class_names), outfile)
            print('Saved classifier model to file "%s"' %
                  classifier_filename_exp)
示例#9
0
import sys
import math
import pickle
from sklearn.svm import SVC
from sklearn.cluster import KMeans
import pandas as pd
import cv2
import os
with tf.Graph().as_default():
    with tf.Session() as sess:
        np.random.seed(seed=666)
        os.system("python facenet/src/align/align_dataset_mtcnn.py " +
                  sys.argv[1] +
                  " aligned --image_size 160 --margin 32 --random_order")
        dataset = facenet.get_dataset("aligned")
        paths, labels = facenet.get_image_paths_and_labels(dataset)
        facenet.load_model('20180402-114759.pb')
        images_placeholder = tf.get_default_graph().get_tensor_by_name(
            "input:0")
        embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(
            "phase_train:0")
        embedding_size = embeddings.get_shape()[1]
        images = facenet.load_data(paths, False, False, 150)
        emb_array_1 = np.zeros((len(labels), embedding_size))
        j = min(len(images) - 1, 500)
        i = 0
        while j < len(images):
            feed_dict = {
                images_placeholder: images[i:j],
                phase_train_placeholder: False
示例#10
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        train_set = filter_dataset(train_set,
                                   os.path.expanduser(args.filter_filename),
                                   args.filter_percentile,
                                   args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    # Removed lfw dir. Using custom dataset
    # if args.lfw_dir:
    #     print('LFW directory: %s' % args.lfw_dir)
    #     # Read the file containing the pairs used for testing
    #     pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
    #     # Get the paths for the corresponding images
    #     lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image],
                                       tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, label_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Add center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = facenet.center_loss(
                prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES,
                prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables(), args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                # saver.restore(sess, pretrained_model)
                facenet.load_model(pretrained_model)

            # Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, label_list,
                      index_dequeue_op, enqueue_op, image_paths_placeholder,
                      labels_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, batch_size_placeholder,
                      global_step, total_loss, train_op, summary_op,
                      summary_writer, regularization_losses,
                      args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # # Evaluate on LFW
                # if args.lfw_dir:
                #     evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder,
                #         embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer)
    return model_dir
示例#11
0
def embeding(model_dir='20170512-110547',
             data_dir='database_aligned',
             is_aligned=True,
             image_size=160,
             margin=44,
             gpu_memory_fraction=1.0,
             image_batch=1000,
             embeddings_name='embeddings.npy',
             labels_name='labels.npy',
             labels_strings_name='label_strings.npy',
             return_image_list=False):
    train_set = facenet.get_dataset(data_dir)
    image_list, label_list = facenet.get_image_paths_and_labels(train_set)
    # fetch the classes (labels as strings) exactly as it's done in get_dataset
    path_exp = os.path.expanduser(data_dir)
    classes = [
        path for path in os.listdir(path_exp)
        if os.path.isdir(os.path.join(path_exp, path))
    ]
    # get the label strings
    label_strings = [
        name for name in classes if os.path.isdir(os.path.join(path_exp, name))
    ]

    with tf.Graph().as_default():

        with tf.Session() as sess:

            # Load model
            facenet.load_model(model_dir)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")  # noqa: E501
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")  # noqa: E501
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")

            # Run forward pass to calculate embeddings
            nrof_images = len(image_list)
            print('Number of images: ', nrof_images)
            batch_size = image_batch
            if nrof_images % batch_size == 0:
                nrof_batches = nrof_images // batch_size
            else:
                nrof_batches = (nrof_images // batch_size) + 1
            print('Number of batches: ', nrof_batches)
            embedding_size = embeddings.get_shape()[1]
            emb_array = np.zeros((nrof_images, embedding_size))
            start_time = time.time()

            for i in range(nrof_batches):
                if i == nrof_batches - 1:
                    n = nrof_images
                else:
                    n = i * batch_size + batch_size
                # Get images for the batch
                if is_aligned is True:
                    images = facenet.load_data(image_list[i * batch_size:n],
                                               False, False, image_size)
                else:
                    images = load_and_align_data(image_list[i * batch_size:n],
                                                 image_size, margin,
                                                 gpu_memory_fraction)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                # Use the facenet model to calculate embeddings
                embed = sess.run(embeddings, feed_dict=feed_dict)
                emb_array[i * batch_size:n, :] = embed
                print('Completed batch', i + 1, 'of', nrof_batches)

            run_time = time.time() - start_time
            print('Run time: ', run_time)

            #   export embeddings and labels
            label_list = np.array(label_list)

            np.save(embeddings_name, emb_array)
            if emb_array.size > 0:
                labels_final = (label_list) - np.min(label_list)
                np.save(labels_name, labels_final)
                label_strings = np.array(label_strings)
                np.save(labels_strings_name, label_strings[labels_final])
                np.save('image_list.npy', image_list)
            if return_image_list:
                np.save('validation_image_list.npy', image_list)
                return image_list, emb_array
示例#12
0
    def like_or_dislike_users(self, users):
        # automatically like or dislike users based on your previously trained
        # model on your historical preference.

        # facenet settings from export_embeddings....
        data_dir = 'temp_images_aligned'
        embeddings_name = 'temp_embeddings.npy'
        # labels_name = 'temp_labels.npy'
        # labels_strings_name = 'temp_label_strings.npy'
        is_aligned = True
        image_size = 160
        margin = 44
        gpu_memory_fraction = 1.0
        image_batch = 1000
        prev_user = None
        for user in users:
            clean_temp_images()
            urls = user.get_photos(width='640')
            image_list = download_url_photos(urls, user.id, is_temp=True)
            # align the database
            tindetheus_align.main(input_dir='temp_images',
                                  output_dir='temp_images_aligned')
            # export the embeddings from the aligned database

            train_set = facenet.get_dataset(data_dir)
            image_list_temp, label_list = facenet.get_image_paths_and_labels(
                train_set)  # noqa: E501

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")  # noqa: E501
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")  # noqa: E501
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")  # noqa: E501

            # Run forward pass to calculate embeddings
            nrof_images = len(image_list_temp)
            print('Number of images: ', nrof_images)
            batch_size = image_batch
            if nrof_images % batch_size == 0:
                nrof_batches = nrof_images // batch_size
            else:
                nrof_batches = (nrof_images // batch_size) + 1
            print('Number of batches: ', nrof_batches)
            embedding_size = embeddings.get_shape()[1]
            emb_array = np.zeros((nrof_images, embedding_size))
            start_time = time.time()

            for i in range(nrof_batches):
                if i == nrof_batches - 1:
                    n = nrof_images
                else:
                    n = i * batch_size + batch_size
                # Get images for the batch
                if is_aligned is True:
                    images = facenet.load_data(
                        image_list_temp[i * batch_size:n],  # noqa: E501
                        False,
                        False,
                        image_size)
                else:
                    images = load_and_align_data(
                        image_list_temp[i * batch_size:n],  # noqa: E501
                        image_size,
                        margin,
                        gpu_memory_fraction)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                # Use the facenet model to calculate embeddings
                embed = self.sess.run(embeddings, feed_dict=feed_dict)
                emb_array[i * batch_size:n, :] = embed
                print('Completed batch', i + 1, 'of', nrof_batches)

            run_time = time.time() - start_time
            print('Run time: ', run_time)

            # export embeddings and labels
            label_list = np.array(label_list)

            np.save(embeddings_name, emb_array)

            if emb_array.size > 0:
                # calculate the n average embedding per profiles
                X = calc_avg_emb_temp(emb_array)
                # evaluate on the model
                yhat = self.model.predict(X)

                if yhat[0] == 1:
                    didILike = 'Like'
                    # check to see if this is the same user as before
                    if prev_user == user.id:
                        clean_temp_images_aligned()
                        print('\n\n You have already liked this user!!! \n \n')
                        print('This typically means you have used all of your'
                              ' free likes. Exiting program!!! \n\n')
                        self.likes_left = -1
                        return
                    else:
                        prev_user = user.id
                else:
                    didILike = 'Dislike'
            else:
                # there were no faces in this profile
                didILike = 'Dislike'
            print('**************************************************')
            print(user.name, user.age, didILike)
            print('**************************************************')

            dbase_names = move_images_temp(image_list, user.id)

            if didILike == 'Like':
                print(user.like())
                self.likes_left -= 1
            else:
                print(user.dislike())
            userList = [
                user.id, user.name, user.age, user.bio, user.distance_km,
                user.jobs, user.schools,
                user.get_photos(width='640'), dbase_names, didILike
            ]
            self.al_database.append(userList)
            np.save('al_database.npy', self.al_database)
            clean_temp_images_aligned()