示例#1
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 length: Optional[int] = None):

        self.datapath = wsl_data_dir / data
        self.data = data

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        self.names = pd.read_csv(wsl_csv_dir / data /
                                 f'{split}.csv').Id.tolist()

        if length is not None:
            self.names = random.sample(self.names, min(len(self.names),
                                                       length))

        self.new_size = (224, 224)
        self.image_transforms = Compose([
            Resize(self.new_size),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])
示例#2
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 classes: int,
                 column: str,
                 debug: bool = False):
        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes
        self.column = column

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv',
                         converters={
                             column: literal_eval,
                             'box': literal_eval
                         })
        self.df = df
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]
        self.max_boxes = df['Id'].value_counts().max()
        self.names = list(set(df.Id.to_list()))
        if debug:
            self.names = self.names[0:100]

        self.image_transforms = Compose([
            # Resize((224, 224)),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])
示例#3
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 classes: int,
                 column: str,
                 variable_type: str,
                 augmentation: bool = False,
                 debug: bool = False):

        if classes != 1:
            print('Note: Ensure all labels are of a single type.')

        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv',
                         converters={
                             column: literal_eval,
                             'box': literal_eval
                         })
        self.df = df
        df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True)
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]

        self.names = df.Id.to_list()
        self.labels = df[column].tolist()
        self.variable_type = variable_type

        if debug:
            self.names = self.names[0:100]
            self.labels = self.labels[0:100]

        self.new_size = (224, 224)
        self.image_transforms = Compose([
            Resize(self.new_size),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])

        self.augmentation = augmentation
        if augmentation:
            self.augmentation = Affine(rotate_params=np.pi / 6,
                                       scale_params=(1.2, 1.2),
                                       translate_params=(50, 50),
                                       padding_mode='zeros')
        else:
            self.augmentation = None

        if self.variable_type != 'categorical':
            if classes == 1:
                self.labels = [[x] for x in self.labels]
            else:
                self.class_names = self.labels[0].keys()
                print('\nClass List: ', self.class_names)
                self.labels = [list(x.values()) for x in self.labels]

            # only matters for balanced case for binary variable type
            self.pos_weight = [
                round((len(col) - sum(col)) / sum(col), 2)
                for col in zip(*self.labels)
            ]
示例#4
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 classes: int,
                 column: str,
                 regression: bool,
                 debug: bool = False):

        if regression and classes != 1:
            print('Support for multi-class regression is not available.')
            sys.exit(1)

        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv',
                         converters={
                             column: literal_eval,
                             'box': literal_eval
                         })
        self.df = df
        df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True)
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]

        self.names = df.Id.to_list()
        self.labels = df[column].tolist()

        if debug:
            self.names = self.names[0:100]
            self.labels = self.labels[0:100]

        self.image_transforms = Compose([
            Resize((224, 224)),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])

        if regression:
            self.lmax = df[column].max()
            self.lmin = df[column].min()
            self.labels = [[round((x - self.lmin) / self.lmax, 2)]
                           for x in self.labels]
        else:
            if classes == 1:
                self.labels = [[x] for x in self.labels]
            else:
                self.class_names = self.labels[0].keys()
                self.labels = [list(x.values()) for x in self.labels]

            self.pos_weight = [
                round((len(col) - sum(col)) / sum(col), 2)
                for col in zip(*self.labels)
            ]