def __init__(self, data: str, split: str, extension: str, length: Optional[int] = None): self.datapath = wsl_data_dir / data self.data = data if data in known_extensions.keys(): self.extension = known_extensions[data] else: self.extension = extension self.names = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist() if length is not None: self.names = random.sample(self.names, min(len(self.names), length)) self.new_size = (224, 224) self.image_transforms = Compose([ Resize(self.new_size), RepeatChannel(repeats=3), CastToType(dtype=np.float32), ToTensor() ])
def __init__(self, data: str, split: str, extension: str, classes: int, column: str, debug: bool = False): self.datapath = wsl_data_dir / data self.data = data self.classes = classes self.column = column if data in known_extensions.keys(): self.extension = known_extensions[data] else: self.extension = extension df = pd.read_csv(wsl_csv_dir / data / 'info.csv', converters={ column: literal_eval, 'box': literal_eval }) self.df = df Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist() df = df[df.Id.isin(Ids)] self.max_boxes = df['Id'].value_counts().max() self.names = list(set(df.Id.to_list())) if debug: self.names = self.names[0:100] self.image_transforms = Compose([ # Resize((224, 224)), RepeatChannel(repeats=3), CastToType(dtype=np.float32), ToTensor() ])
def __init__(self, data: str, split: str, extension: str, classes: int, column: str, variable_type: str, augmentation: bool = False, debug: bool = False): if classes != 1: print('Note: Ensure all labels are of a single type.') self.datapath = wsl_data_dir / data self.data = data self.classes = classes if data in known_extensions.keys(): self.extension = known_extensions[data] else: self.extension = extension df = pd.read_csv(wsl_csv_dir / data / 'info.csv', converters={ column: literal_eval, 'box': literal_eval }) self.df = df df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True) Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist() df = df[df.Id.isin(Ids)] self.names = df.Id.to_list() self.labels = df[column].tolist() self.variable_type = variable_type if debug: self.names = self.names[0:100] self.labels = self.labels[0:100] self.new_size = (224, 224) self.image_transforms = Compose([ Resize(self.new_size), RepeatChannel(repeats=3), CastToType(dtype=np.float32), ToTensor() ]) self.augmentation = augmentation if augmentation: self.augmentation = Affine(rotate_params=np.pi / 6, scale_params=(1.2, 1.2), translate_params=(50, 50), padding_mode='zeros') else: self.augmentation = None if self.variable_type != 'categorical': if classes == 1: self.labels = [[x] for x in self.labels] else: self.class_names = self.labels[0].keys() print('\nClass List: ', self.class_names) self.labels = [list(x.values()) for x in self.labels] # only matters for balanced case for binary variable type self.pos_weight = [ round((len(col) - sum(col)) / sum(col), 2) for col in zip(*self.labels) ]
def __init__(self, data: str, split: str, extension: str, classes: int, column: str, regression: bool, debug: bool = False): if regression and classes != 1: print('Support for multi-class regression is not available.') sys.exit(1) self.datapath = wsl_data_dir / data self.data = data self.classes = classes if data in known_extensions.keys(): self.extension = known_extensions[data] else: self.extension = extension df = pd.read_csv(wsl_csv_dir / data / 'info.csv', converters={ column: literal_eval, 'box': literal_eval }) self.df = df df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True) Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist() df = df[df.Id.isin(Ids)] self.names = df.Id.to_list() self.labels = df[column].tolist() if debug: self.names = self.names[0:100] self.labels = self.labels[0:100] self.image_transforms = Compose([ Resize((224, 224)), RepeatChannel(repeats=3), CastToType(dtype=np.float32), ToTensor() ]) if regression: self.lmax = df[column].max() self.lmin = df[column].min() self.labels = [[round((x - self.lmin) / self.lmax, 2)] for x in self.labels] else: if classes == 1: self.labels = [[x] for x in self.labels] else: self.class_names = self.labels[0].keys() self.labels = [list(x.values()) for x in self.labels] self.pos_weight = [ round((len(col) - sum(col)) / sum(col), 2) for col in zip(*self.labels) ]