def __init__(self, subset, sub_datasets, **kwargs): """ Loads the subdataset Args: subset (str): sub dataset sub_datasets (object): class containing the subdatasets names Kwargs: filename_pattern (str): filename with .json extension used to create the codes when the calling the create_datasets_for_LC_KSVD method. code_type (CodeType): Code type used. See constants.constants.CodeType class defition transforsm (torchvision.transforms.Compose) : transforms to be applied original_shape (list, tuple): shape of the original image/data. If it was a 1D vector, then just set it to (1, lenght) """ assert subset in sub_datasets.SUB_DATASETS self.subset = subset filename_pattern = kwargs.get('filename_pattern') assert isinstance(filename_pattern, str) self.original_shape = kwargs.get('original_shape') assert isinstance(self.original_shape, (list, tuple)) assert len(self.original_shape) == 2 code_type = kwargs.get('code_type') self.transform = kwargs.get('transform', None) cleaned_filename = clean_json_filename(filename_pattern) name, extension = get_filename_and_extension(cleaned_filename) file_name = '{}_{}.{}'.format(name, subset, extension) self.data = load_codes(file_name, type_=code_type) self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix( self.data['labels'])
def format_all_for_LC_KSVD(self, cnn_codes_labels, save=False, filename=''): """ Returns a dictionary containing all the cnn_codes and labels for each sub-dataset created properly formatted to be used by the LC-KSVD algorithm. Optionally, it saves the dictionary splitted in several files with the format <filename>_<sub_dataset>.json at settings.CNN_CODES_FOLDER Args: cnn_codes_labels (dict): Dictionary returned by the get_all_CNN_codes method save (bool): Whether or not save the result filename (str): filename with .json extension Returns: {'sub_dataset_1': [cnn codes list of lists, labels list], ...} """ assert isinstance(cnn_codes_labels, dict) assert isinstance(save, bool) cleaned_filename = clean_json_filename(filename) name, extension = get_filename_and_extension(cleaned_filename) formatted_data = dict() print("Formatting and saving sub-datasets CNN codes for LC-KSVD") for sub_dataset in tqdm(self.SUB_DATASETS): new_name = '{}_{}.{}'.format(name, sub_dataset, extension) formatted_data[sub_dataset] = self.format_for_LC_KSVD( sub_dataset, *cnn_codes_labels[sub_dataset], save, new_name) return formatted_data
def __init__(self, subset, sub_datasets, **kwargs): """ Loads the subdataset Args: subset (str): sub dataset sub_datasets (): Kwargs: filename_pattern (str): filename with .json extension used to create the codes when the calling the create_datasets_for_LC_KSVD method. code_type (CodeType): Code type used. See constants.constants.CodeType class defition """ assert subset in sub_datasets.SUB_DATASETS self.subset = subset filename_pattern = kwargs.get('filename_pattern') assert isinstance(filename_pattern, str) code_type = kwargs.get('code_type') cleaned_filename = clean_json_filename(filename_pattern) name, extension = get_filename_and_extension(cleaned_filename) file_name = '{}_{}.{}'.format(name, subset, extension) self.data = load_codes(file_name, type_=code_type) self.data['labels'] = LabelMatrixManager.get_1d_array_from_2d_matrix( self.data['labels'])
def format_for_LC_KSVD(self, sub_dataset, cnn_codes, labels, save=False, filename=''): """ Returns a dictionary with cnn_codes and labels for the sub_dataset chosen. Optionally, it saves the dictionary in the file <filename>_<sub_dataset>.json at settings.CNN_CODES_FOLDER Args: sub_dataset (str): Any value from self.SUB_DATASETS cnn_codes (torch.Tensor): Tensor with all cnn codes. labels (torch.Tensor): Tensor with all labels. save (bool): Whether or not save the result filename (str): Filename with .json extension Returns: {'<sub_dataset>': [codes list of lists, labels list]} """ assert sub_dataset in self.SUB_DATASETS assert isinstance(cnn_codes, torch.Tensor) assert isinstance(labels, torch.Tensor) assert isinstance(save, bool) cleaned_filename = clean_json_filename(filename) # Workaround to serialize as JSON the numpy arrays formatted_cnn_codes = cnn_codes.squeeze().T.cpu().numpy() # TODO: review if it's necessary to use float formatted_labels = np.zeros((len(Label.CHOICES), labels.shape[0]), dtype=float) for index, label_item in enumerate(Label.CHOICES): formatted_labels[index, labels == label_item.id] = 1 # Workaround to serialize numpy arrays as JSON formatted_data = { 'codes': formatted_cnn_codes.tolist(), 'labels': formatted_labels.tolist() } if save: if not os.path.isdir(settings.CNN_CODES_FOLDER): os.makedirs(settings.CNN_CODES_FOLDER) with open( os.path.join(settings.CNN_CODES_FOLDER, cleaned_filename), 'w') as file_: json.dump(formatted_data, file_) return formatted_data
def create_datasets_for_LC_KSVD(self, filename): """ Args: filename (str): filename with .json extension Usage: model.create_datasets_for_LC_KSVD('my_dataset.json') """ clean_create_folder(self.codes_folder) cleaned_filename = clean_json_filename(filename) name, extension = get_filename_and_extension(cleaned_filename) print("Formatting and saving sub-datasets codes for LC-KSVD") for dataset in self.sub_datasets: print("Processing image's batches from sub-dataset: {}".format( dataset)) new_name = '{}_{}.{}'.format(name, dataset, extension) formatted_data = {'codes': [], 'labels': []} self.process_data(dataset, formatted_data) self.format_for_LC_KSVD(formatted_data) with open(os.path.join(self.codes_folder, new_name), 'w') as file_: json.dump(formatted_data, file_)