def __init__(self, subset, sub_datasets, **kwargs): """ Loads the subdataset Args: subset (str): sub dataset sub_datasets (object): class containing the subdatasets names Kwargs: filename_pattern (str): filename with .json extension used to create the codes when the calling the create_datasets_for_LC_KSVD method. code_type (CodeType): Code type used. See constants.constants.CodeType class defition transforsm (torchvision.transforms.Compose) : transforms to be applied original_shape (list, tuple): shape of the original image/data. If it was a 1D vector, then just set it to (1, lenght) """ assert subset in sub_datasets.SUB_DATASETS self.subset = subset filename_pattern = kwargs.get('filename_pattern') assert isinstance(filename_pattern, str) self.original_shape = kwargs.get('original_shape') assert isinstance(self.original_shape, (list, tuple)) assert len(self.original_shape) == 2 code_type = kwargs.get('code_type') self.transform = kwargs.get('transform', None) cleaned_filename = clean_json_filename(filename_pattern) name, extension = get_filename_and_extension(cleaned_filename) file_name = '{}_{}.{}'.format(name, subset, extension) self.data = load_codes(file_name, type_=code_type) self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix( self.data['labels'])
def format_all_for_LC_KSVD(self, cnn_codes_labels, save=False, filename=''): """ Returns a dictionary containing all the cnn_codes and labels for each sub-dataset created properly formatted to be used by the LC-KSVD algorithm. Optionally, it saves the dictionary splitted in several files with the format <filename>_<sub_dataset>.json at settings.CNN_CODES_FOLDER Args: cnn_codes_labels (dict): Dictionary returned by the get_all_CNN_codes method save (bool): Whether or not save the result filename (str): filename with .json extension Returns: {'sub_dataset_1': [cnn codes list of lists, labels list], ...} """ assert isinstance(cnn_codes_labels, dict) assert isinstance(save, bool) cleaned_filename = clean_json_filename(filename) name, extension = get_filename_and_extension(cleaned_filename) formatted_data = dict() print("Formatting and saving sub-datasets CNN codes for LC-KSVD") for sub_dataset in tqdm(self.SUB_DATASETS): new_name = '{}_{}.{}'.format(name, sub_dataset, extension) formatted_data[sub_dataset] = self.format_for_LC_KSVD( sub_dataset, *cnn_codes_labels[sub_dataset], save, new_name) return formatted_data
def __init__(self, subset, sub_datasets, **kwargs): """ Loads the subdataset Args: subset (str): sub dataset sub_datasets (): Kwargs: filename_pattern (str): filename with .json extension used to create the codes when the calling the create_datasets_for_LC_KSVD method. code_type (CodeType): Code type used. See constants.constants.CodeType class defition """ assert subset in sub_datasets.SUB_DATASETS self.subset = subset filename_pattern = kwargs.get('filename_pattern') assert isinstance(filename_pattern, str) code_type = kwargs.get('code_type') cleaned_filename = clean_json_filename(filename_pattern) name, extension = get_filename_and_extension(cleaned_filename) file_name = '{}_{}.{}'.format(name, subset, extension) self.data = load_codes(file_name, type_=code_type) self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix( self.data['labels'])
def select_patches(self): """ Selects the patches and removes the original subdataset labels files """ print("Selecting minipatches from subdatastes") for split_filename, split_folder_name in tqdm( tuple(zip(self.split_files, self.folder_names))): patches = os.listdir( os.path.join(settings.OUTPUT_FOLDER, split_folder_name)) for filename, _ in self.read_split_file(split_filename, self.split_files): name, _ = get_filename_and_extension(filename) file_patches = set( filter(lambda x: x.startswith('{}_'.format(name)), patches)) chosen_patches = set( random.sample(file_patches, k=self.num_patches)) patches_to_delete = file_patches.difference(chosen_patches) for patch in patches_to_delete: os.remove( os.path.join(settings.OUTPUT_FOLDER, split_folder_name, patch)) # removing old labels file os.remove( os.path.join(settings.OUTPUT_FOLDER, split_folder_name, settings.LABELS_FILENAME))
def create_datasets_for_LC_KSVD(self, filename): """ Args: filename (str): filename with .json extension Usage: model.create_datasets_for_LC_KSVD('my_dataset.json') """ clean_create_folder(self.codes_folder) cleaned_filename = clean_json_filename(filename) name, extension = get_filename_and_extension(cleaned_filename) print("Formatting and saving sub-datasets codes for LC-KSVD") for dataset in self.sub_datasets: print("Processing image's batches from sub-dataset: {}".format( dataset)) new_name = '{}_{}.{}'.format(name, dataset, extension) formatted_data = {'codes': [], 'labels': []} self.process_data(dataset, formatted_data) self.format_for_LC_KSVD(formatted_data) with open(os.path.join(self.codes_folder, new_name), 'w') as file_: json.dump(formatted_data, file_)