示例#1
0
    def __init__(self, subset, sub_datasets, **kwargs):
        """
        Loads the subdataset

        Args:
           subset          (str): sub dataset
           sub_datasets (object): class containing the subdatasets names

        Kwargs:
            filename_pattern (str): filename with .json extension used to create the codes
                                    when the calling the create_datasets_for_LC_KSVD method.
            code_type (CodeType): Code type used. See constants.constants.CodeType class defition
            transforsm (torchvision.transforms.Compose) : transforms to be applied
            original_shape (list, tuple): shape of the original image/data. If it was a 1D vector,
                                          then just set it to (1, lenght)
        """
        assert subset in sub_datasets.SUB_DATASETS
        self.subset = subset
        filename_pattern = kwargs.get('filename_pattern')
        assert isinstance(filename_pattern, str)
        self.original_shape = kwargs.get('original_shape')
        assert isinstance(self.original_shape, (list, tuple))
        assert len(self.original_shape) == 2

        code_type = kwargs.get('code_type')
        self.transform = kwargs.get('transform', None)
        cleaned_filename = clean_json_filename(filename_pattern)
        name, extension = get_filename_and_extension(cleaned_filename)
        file_name = '{}_{}.{}'.format(name, subset, extension)
        self.data = load_codes(file_name, type_=code_type)
        self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix(
            self.data['labels'])
示例#2
0
    def format_all_for_LC_KSVD(self,
                               cnn_codes_labels,
                               save=False,
                               filename=''):
        """
        Returns a dictionary containing all the cnn_codes and labels for each sub-dataset
        created properly formatted to be used by the LC-KSVD algorithm. Optionally, it
        saves the dictionary splitted in several files with the
        format <filename>_<sub_dataset>.json at settings.CNN_CODES_FOLDER

        Args:
            cnn_codes_labels (dict): Dictionary returned by the get_all_CNN_codes method
            save             (bool): Whether or not save the result
            filename          (str): filename with .json extension

        Returns:
            {'sub_dataset_1': [cnn codes list of lists, labels list], ...}
        """
        assert isinstance(cnn_codes_labels, dict)
        assert isinstance(save, bool)

        cleaned_filename = clean_json_filename(filename)
        name, extension = get_filename_and_extension(cleaned_filename)

        formatted_data = dict()

        print("Formatting and saving sub-datasets CNN codes for LC-KSVD")
        for sub_dataset in tqdm(self.SUB_DATASETS):
            new_name = '{}_{}.{}'.format(name, sub_dataset, extension)
            formatted_data[sub_dataset] = self.format_for_LC_KSVD(
                sub_dataset, *cnn_codes_labels[sub_dataset], save, new_name)

        return formatted_data
示例#3
0
    def __init__(self, subset, sub_datasets, **kwargs):
        """
        Loads the subdataset

        Args:
           subset (str): sub dataset
           sub_datasets ():

        Kwargs:
            filename_pattern (str): filename with .json extension used to create the codes
                                    when the calling the create_datasets_for_LC_KSVD method.
            code_type   (CodeType): Code type used. See constants.constants.CodeType class defition
        """
        assert subset in sub_datasets.SUB_DATASETS
        self.subset = subset
        filename_pattern = kwargs.get('filename_pattern')
        assert isinstance(filename_pattern, str)

        code_type = kwargs.get('code_type')
        cleaned_filename = clean_json_filename(filename_pattern)
        name, extension = get_filename_and_extension(cleaned_filename)
        file_name = '{}_{}.{}'.format(name, subset, extension)
        self.data = load_codes(file_name, type_=code_type)
        self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix(
            self.data['labels'])
示例#4
0
    def select_patches(self):
        """ Selects the patches and removes the original subdataset labels files """

        print("Selecting minipatches from subdatastes")
        for split_filename, split_folder_name in tqdm(
                tuple(zip(self.split_files, self.folder_names))):
            patches = os.listdir(
                os.path.join(settings.OUTPUT_FOLDER, split_folder_name))

            for filename, _ in self.read_split_file(split_filename,
                                                    self.split_files):
                name, _ = get_filename_and_extension(filename)
                file_patches = set(
                    filter(lambda x: x.startswith('{}_'.format(name)),
                           patches))
                chosen_patches = set(
                    random.sample(file_patches, k=self.num_patches))
                patches_to_delete = file_patches.difference(chosen_patches)

                for patch in patches_to_delete:
                    os.remove(
                        os.path.join(settings.OUTPUT_FOLDER, split_folder_name,
                                     patch))

            # removing old labels file
            os.remove(
                os.path.join(settings.OUTPUT_FOLDER, split_folder_name,
                             settings.LABELS_FILENAME))
示例#5
0
    def create_datasets_for_LC_KSVD(self, filename):
        """
        Args:
            filename (str): filename with .json extension

        Usage:
            model.create_datasets_for_LC_KSVD('my_dataset.json')
        """
        clean_create_folder(self.codes_folder)
        cleaned_filename = clean_json_filename(filename)
        name, extension = get_filename_and_extension(cleaned_filename)

        print("Formatting and saving sub-datasets codes for LC-KSVD")
        for dataset in self.sub_datasets:
            print("Processing image's batches from sub-dataset: {}".format(
                dataset))
            new_name = '{}_{}.{}'.format(name, dataset, extension)
            formatted_data = {'codes': [], 'labels': []}
            self.process_data(dataset, formatted_data)
            self.format_for_LC_KSVD(formatted_data)

            with open(os.path.join(self.codes_folder, new_name), 'w') as file_:
                json.dump(formatted_data, file_)