Exemplo n.º 1
0
    def __init__(self, subset, sub_datasets, **kwargs):
        """
        Loads the subdataset

        Args:
           subset          (str): sub dataset
           sub_datasets (object): class containing the subdatasets names

        Kwargs:
            filename_pattern (str): filename with .json extension used to create the codes
                                    when the calling the create_datasets_for_LC_KSVD method.
            code_type (CodeType): Code type used. See constants.constants.CodeType class defition
            transforsm (torchvision.transforms.Compose) : transforms to be applied
            original_shape (list, tuple): shape of the original image/data. If it was a 1D vector,
                                          then just set it to (1, lenght)
        """
        assert subset in sub_datasets.SUB_DATASETS
        self.subset = subset
        filename_pattern = kwargs.get('filename_pattern')
        assert isinstance(filename_pattern, str)
        self.original_shape = kwargs.get('original_shape')
        assert isinstance(self.original_shape, (list, tuple))
        assert len(self.original_shape) == 2

        code_type = kwargs.get('code_type')
        self.transform = kwargs.get('transform', None)
        cleaned_filename = clean_json_filename(filename_pattern)
        name, extension = get_filename_and_extension(cleaned_filename)
        file_name = '{}_{}.{}'.format(name, subset, extension)
        self.data = load_codes(file_name, type_=code_type)
        self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix(
            self.data['labels'])
Exemplo n.º 2
0
    def format_all_for_LC_KSVD(self,
                               cnn_codes_labels,
                               save=False,
                               filename=''):
        """
        Returns a dictionary containing all the cnn_codes and labels for each sub-dataset
        created properly formatted to be used by the LC-KSVD algorithm. Optionally, it
        saves the dictionary splitted in several files with the
        format <filename>_<sub_dataset>.json at settings.CNN_CODES_FOLDER

        Args:
            cnn_codes_labels (dict): Dictionary returned by the get_all_CNN_codes method
            save             (bool): Whether or not save the result
            filename          (str): filename with .json extension

        Returns:
            {'sub_dataset_1': [cnn codes list of lists, labels list], ...}
        """
        assert isinstance(cnn_codes_labels, dict)
        assert isinstance(save, bool)

        cleaned_filename = clean_json_filename(filename)
        name, extension = get_filename_and_extension(cleaned_filename)

        formatted_data = dict()

        print("Formatting and saving sub-datasets CNN codes for LC-KSVD")
        for sub_dataset in tqdm(self.SUB_DATASETS):
            new_name = '{}_{}.{}'.format(name, sub_dataset, extension)
            formatted_data[sub_dataset] = self.format_for_LC_KSVD(
                sub_dataset, *cnn_codes_labels[sub_dataset], save, new_name)

        return formatted_data
Exemplo n.º 3
0
    def __init__(self, subset, sub_datasets, **kwargs):
        """
        Loads the subdataset

        Args:
           subset (str): sub dataset
           sub_datasets ():

        Kwargs:
            filename_pattern (str): filename with .json extension used to create the codes
                                    when the calling the create_datasets_for_LC_KSVD method.
            code_type   (CodeType): Code type used. See constants.constants.CodeType class defition
        """
        assert subset in sub_datasets.SUB_DATASETS
        self.subset = subset
        filename_pattern = kwargs.get('filename_pattern')
        assert isinstance(filename_pattern, str)

        code_type = kwargs.get('code_type')
        cleaned_filename = clean_json_filename(filename_pattern)
        name, extension = get_filename_and_extension(cleaned_filename)
        file_name = '{}_{}.{}'.format(name, subset, extension)
        self.data = load_codes(file_name, type_=code_type)
        self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix(
            self.data['labels'])
Exemplo n.º 4
0
    def format_for_LC_KSVD(self,
                           sub_dataset,
                           cnn_codes,
                           labels,
                           save=False,
                           filename=''):
        """
        Returns a dictionary with cnn_codes and labels for the sub_dataset chosen. Optionally,
        it saves the dictionary in the file <filename>_<sub_dataset>.json at
        settings.CNN_CODES_FOLDER

        Args:
            sub_dataset        (str): Any value from self.SUB_DATASETS
            cnn_codes (torch.Tensor): Tensor with all cnn codes.
            labels    (torch.Tensor): Tensor with all labels.

            save              (bool): Whether or not save the result
            filename           (str): Filename with .json extension

        Returns:
            {'<sub_dataset>': [codes list of lists, labels list]}

        """
        assert sub_dataset in self.SUB_DATASETS
        assert isinstance(cnn_codes, torch.Tensor)
        assert isinstance(labels, torch.Tensor)
        assert isinstance(save, bool)

        cleaned_filename = clean_json_filename(filename)

        # Workaround to serialize as JSON the numpy arrays
        formatted_cnn_codes = cnn_codes.squeeze().T.cpu().numpy()
        # TODO: review if it's necessary to use float
        formatted_labels = np.zeros((len(Label.CHOICES), labels.shape[0]),
                                    dtype=float)

        for index, label_item in enumerate(Label.CHOICES):
            formatted_labels[index, labels == label_item.id] = 1

        # Workaround to serialize numpy arrays as JSON
        formatted_data = {
            'codes': formatted_cnn_codes.tolist(),
            'labels': formatted_labels.tolist()
        }

        if save:
            if not os.path.isdir(settings.CNN_CODES_FOLDER):
                os.makedirs(settings.CNN_CODES_FOLDER)

            with open(
                    os.path.join(settings.CNN_CODES_FOLDER, cleaned_filename),
                    'w') as file_:
                json.dump(formatted_data, file_)

        return formatted_data
Exemplo n.º 5
0
    def create_datasets_for_LC_KSVD(self, filename):
        """
        Args:
            filename (str): filename with .json extension

        Usage:
            model.create_datasets_for_LC_KSVD('my_dataset.json')
        """
        clean_create_folder(self.codes_folder)
        cleaned_filename = clean_json_filename(filename)
        name, extension = get_filename_and_extension(cleaned_filename)

        print("Formatting and saving sub-datasets codes for LC-KSVD")
        for dataset in self.sub_datasets:
            print("Processing image's batches from sub-dataset: {}".format(
                dataset))
            new_name = '{}_{}.{}'.format(name, dataset, extension)
            formatted_data = {'codes': [], 'labels': []}
            self.process_data(dataset, formatted_data)
            self.format_for_LC_KSVD(formatted_data)

            with open(os.path.join(self.codes_folder, new_name), 'w') as file_:
                json.dump(formatted_data, file_)