Exemplo n.º 1
0
    def models_list(self):
        """List the generated models"""
        models_path = os.path.join(path.get_project_root(), "models")
        triplet_files = glob(os.path.join(models_path, "*.pth"))
        print_text = "\n - " + "\n - ".join(triplet_files)

        print(f"Models list: {print_text}")
Exemplo n.º 2
0
def _triplets(dataset: Raw, n_triplets: int, process_id: int):
    # Set a different random state for each process.
    random_state = np.random.RandomState(seed=None)

    classes = list(set([annotation["class_id"] for image, annotation in dataset]))
    triplet_list = []

    progress_bar = tqdm(range(int(n_triplets)))

    for _ in progress_bar:
        pos_class = neg_class = None
        pos_images = None
        positive = negative = None

        while pos_class is None or len(pos_images) < 2 or isinstance(pos_images, str):
            pos_class = random_state.choice(classes)
            pos_images = dataset.get_images(class_id=pos_class)

        while neg_class is None or neg_class == pos_class:
            neg_class = random_state.choice(classes)

        anchor = random_state.choice(pos_images)
        while positive is None or anchor == positive:
            positive = random_state.choice(pos_images)

        while negative is None or negative in pos_images:
            negative = dataset.get_images(n_random=1)

        aligned_images = transform.align_images(anchor, positive, negative)

        if len(aligned_images) == 3:
            image_paths = [
                path.change_data_category(image_path, "processed")
                for image_path in [anchor, positive, negative]
            ]

            triplet_list.append([*image_paths, pos_class, pos_class, neg_class])

            for aligned_image, image_path in zip(aligned_images, image_paths):
                # NOTE: images are saved aligned but also post-processed, therefore we
                # don't need to transform them on load using the method
                # facenet_pytorch.fixed_image_standardization.
                abs_image_path = os.path.join(path.get_project_root(), image_path)
                save_path, save_name = os.path.split(abs_image_path)
                os.makedirs(save_path, exist_ok=True)
                normalized_image = aligned_image / 255.0
                save_image(normalized_image, image_path)

    # Update the total value of triplets, since some could have been discarded (i.e. not able to
    # align, or other reason)
    n_triplets = len(triplet_list)

    temp_path = path.change_data_category(dataset.get_path(), "interim")
    os.makedirs(temp_path, exist_ok=True)
    np.save(
        os.path.join(temp_path, f"triplets_{n_triplets}_{process_id}.npy"), triplet_list
    )

    return triplet_list
Exemplo n.º 3
0
    def triplets_list(self):
        """List the generated triplets"""
        triplet_path = os.path.join(path.get_project_root(), "data",
                                    "processed", "triplets")
        triplet_files = glob(os.path.join(triplet_path, "*.npy"))
        print_text = "\n - " + "\n - ".join(triplet_files)

        print(f"Triplets list: {print_text}")
Exemplo n.º 4
0
def get_lfw_dataset() -> processed.FolderDataset:
    """
    Get the dataset containing all the aligned images of LFW.

    :return: lfw dataset.
    """
    dataset_path = os.path.join(path.get_project_root(), "data", "processed", "lfw")

    return processed.FolderDataset(dataset_path)
Exemplo n.º 5
0
def get_vggface2_dataset(shuffle=True) -> torch.utils.data.Dataset:
    """
    Get the dataset containing all the aligned images of VGGFace2.

    :return: vggface2 dataset.
    """
    dataset_path = os.path.join(
        path.get_project_root(), "data", "processed", "vggface2", "train"
    )

    return processed.FolderDataset(dataset_path, transform=True, shuffle=shuffle)
Exemplo n.º 6
0
def triplets(dataset: Raw, n_triplets: int, n_processes: int):
    """Generate a set of triplets. It saves an aligned copy of each image in a triplet in
    `data/processed` and a file `.npy` containing the list of triplets.

    :param dataset: dataset to use to generate triplets
    :param n_triplets: number o triplets to generate
    :param n_processes: number of processes to use
    """
    # NOTE: code is inspired by https://github.com/tamerthamoqa/facenet-pytorch-vggface2
    triplet_list = []

    print(f"Generating {n_triplets} triplets using {n_processes} processes...")

    triplet_residual = n_triplets % n_processes
    n_triplets_per_process = (n_triplets - triplet_residual) / n_processes

    processes = []
    for i in range(n_processes):
        processes.append(
            multiprocessing.Process(
                target=_triplets, args=(dataset, n_triplets_per_process, i)
            )
        )

    for process in processes:
        process.start()

    for process in processes:
        process.join()

    # Generate residual triplets.
    _triplets(dataset, triplet_residual, n_processes + 1)

    temp_path = path.change_data_category(dataset.get_path(), "interim")
    numpy_files = glob(os.path.join(temp_path, "*.npy"))

    for numpy_file in numpy_files:
        triplet_list.extend(np.load(numpy_file))
        os.remove(numpy_file)

    # Update the total value of triplets, since some could have been discarded (i.e. not able to
    # align, or other reason)
    n_triplets = len(triplet_list)

    save_path = os.path.join(path.get_project_root(), "data", "processed", "triplets")
    os.makedirs(save_path, exist_ok=True)

    n_files = str(len(glob(os.path.join(save_path, "*.*")))).zfill(2)
    basename = f"{n_files}_{dataset.get_name()}_{n_triplets}.npy"
    filename = os.path.join(save_path, basename)

    print(f"Saved triplets to {filename}")
    np.save(filename, triplet_list)
Exemplo n.º 7
0
    def model_test(self):
        """Test a model's performance on a dataset"""
        model = None

        if self.options["--teacher"]:
            model = nn.teacher()
        elif self.options["--student"] is not None:
            model_path = os.path.join(path.get_project_root(), "models")
            model_files = glob(os.path.join(model_path, "*.pth"))

            model_file_id = str(self.options["--student"]).zfill(2)

            for file in model_files:
                if os.path.basename(file).startswith(model_file_id):
                    weights_path = file
                    model_name = os.path.splitext(
                        os.path.basename(file))[0][3:-4]
                    break

            if model_name == "mobilenet_v3_small":
                model = nn.mobilenet_v3(weights=weights_path, mode="small")
            elif model_name == "mobilenet_v3_large":
                model = nn.mobilenet_v3(weights=weights_path, mode="large")

        if model is None:
            raise ValueError(
                f"{self.options['--student']} is an invalid file id")

        dataset, dataset_size = self._get_dataset(self.options["--set"])

        print(f"Testing model {type(model).__name__}.")
        print(f"Evaluating {self.options['--measure']} accuracy.")
        print(f"Test set composed of {dataset_size} images.")
        print(f'Using a batch size of {self.options["--batch"]}.')
        print(f'Using {self.options["--workers"]} workers.')

        functions.test(
            model,
            dataset,
            self.options["--measure"],
            batch_size=self.options["--batch"],
            num_workers=self.options["--workers"],
        )
Exemplo n.º 8
0
    def model_distill(self):
        """Distill a model with the knowledge of a teacher"""
        student = None
        if self.options["<model_name>"] == "mobilenet_v3_large":
            student = nn.mobilenet_v3(classify=True, mode="large")
        elif self.options["<model_name>"] == "mobilenet_v3_small":
            student = nn.mobilenet_v3(classify=True, mode="small")

        train_set, train_size = self._get_dataset(self.options["--train-set"])
        test_set, test_size = self._get_dataset(self.options["--test-set"])
        datasets = {"train": train_set, "test": test_set}

        print(f"Distilling model {type(student).__name__}.")
        print(f"Train set composed of {train_size} images.")
        print(f"Test set composed of {test_size} images.")
        print(f'Training for {self.options["--epochs"]} epochs.')
        print(
            f'Distillation temperature set to {self.options["--temperature"]}.'
        )
        print(f'Training with {self.options["--lr"]} learning rate.')
        if not self.options["--no-lr-scheduler"]:
            print(f"Using MultiStep learning rate.")
        print(f'Using a batch size of {self.options["--batch"]}.')
        print(f'Using {self.options["--workers"]} workers.')

        student = functions.distill(
            student,
            datasets,
            temperature=self.options["--temperature"],
            batch_size=self.options["--batch"],
            epochs=self.options["--epochs"],
            lr=self.options["--lr"],
            num_workers=self.options["--workers"],
            no_lr_scheduler="--no-lr-scheduler" in self.options,
        )

        save_path = os.path.join(path.get_project_root(), "models")
        n_files = str(len(glob(os.path.join(save_path, "*.pth")))).zfill(2)
        epochs_to_string = str(self.options["--epochs"]).zfill(3)
        basename = f"{n_files}_{self.options['<model_name>']}_{epochs_to_string}.pth"
        filename = os.path.join(save_path, basename)
        torch.save(student.state_dict(), filename)
Exemplo n.º 9
0
def get_vggface2_classes(split) -> List[str]:
    """
    Get the classes of a split of the VGGFace2 dataset

    :param split: split of the dataset, either `train` or `test`
    :return: dataset classes
    """

    assert split in ["test", "train"]

    classes = [
        os.path.basename(folder)
        for folder in glob(
            os.path.join(
                path.get_project_root(), "data", "processed", "vggface2", split, "*"
            )
        )
    ]

    return classes