def __init__(self, class_type, image_dir, label_filepath, split, label2id=None, randomize=False): self.class_type = class_type self.label2id = label2id self.__labels_csv = csv_utils.read(label_filepath) self.__metadata_train, self.__metadata_dev, self.__metadata_test = \ self.__read_folders(image_dir, split, randomize) self.train = CustomDataset(self.__metadata_train, self.label2id, self.__labels_csv) self.dev = CustomDataset(self.__metadata_dev, self.label2id, self.__labels_csv) self.test = CustomDataset(self.__metadata_test, self.label2id, self.__labels_csv) print(len(self.train)) print(len(self.__metadata_train)) print(len(self.dev)) print(len(self.__metadata_dev)) print(len(self.test)) print(len(self.__metadata_test)) assert len(self.train) + len(self.dev) + len(self.test) == \ len(self.__metadata_train) + len(self.__metadata_dev) + len(self.__metadata_test)
def gen_label(self, path, pos_label="mitosis", neg_label="no-mitosis"): # assuming path = <root>/<img_id>/<img_name>.png # ex path = root/01/0-101.png # path = <root>/tiles/12/04/0-600-90.png img_id = os.path.dirname(path).replace(self.tiles_dir + '/', '') # img_id = 12/04 try: x, y, r = os.path.basename(os.path.splitext(path)[0]).split('-') except: print(path) # x = 0, y = 600, r = 90 lbl_path = os.path.join(self.label_dir, img_id + ".csv") # lbl_path = <root>/labels/12/04.csv true_labels = [] if os.path.isfile(lbl_path): true_labels = csv_utils.read(lbl_path, is_headers=False) # TODO verify row columns for label file labels = [(int(row[0]), int(row[1])) for row in true_labels] # labels = [(70, 1782)] for label in labels: if (int(x) < int(label[0]) <= int(x) + self.tile_size) and \ (int(y) < int(label[1]) <= int(y) + self.tile_size): return pos_label return neg_label
def __init__(self, train_dir, test_dir, train_save_dir, test_save_dir, label_2_id, mu, std, tile_size=299, stride=150, dev_per=20, order=None, randomize=True, transform=None, redo_preprocessing=False): self.dir_status_file = ".dir_status" self.tile_size = tile_size self.stride = stride self.randomize = randomize self.train_dir = train_dir self.test_dir = test_dir if transform == 'normalize': self.transform = self.__normalize self.mu = mu self.std = std # dict [benign:0, normal:1 ...] self.label_2_id = label_2_id # get labels ids from csv file self.train_lbls = csv_utils.read(os.path.join(train_dir, "labels.csv")) self.test_lbls = csv_utils.read(os.path.join(test_dir, "labels.csv")) self.dispatch = { 'tile': self.__tile, 'normal_stain': self.__normal_stain, } if order is None: order = ['normal_stain', 'tile'] self.__preprocess_images(train_dir, train_save_dir, order, redo_preprocessing) self.__preprocess_images(test_dir, test_save_dir, order, redo_preprocessing) self.__create_dataset(train_save_dir, test_save_dir, dev_per)
def get_label(self, path, x, y): # assuming path = <root>/<img_id>/<img_name>.png # path = <root>/tiles/12/04/0-600-90.png img_id = os.path.splitext(path)[0].replace(self.srcdir + '/', '') # img_id = 12/04 # x = 0, y = 600, r = 90 lbl_path = os.path.join(self.label_dir, img_id + ".csv") # lbl_path = <root>/labels/12/04.csv true_labels = [] if os.path.isfile(lbl_path): true_labels = csv_utils.read(lbl_path, is_headers=False) # TODO verify row columns for label file labels = [(int(row[0]), int(row[1])) for row in true_labels] # labels = [(70, 1782)] for label in labels: if (int(x) < int(label[0]) <= int(x) + self.tile_size) and \ (int(y) < int(label[1]) <= int(y) + self.tile_size): return self.pos_label return self.neg_label
def __init__(self, class_type, image_dir, label_filepath, split, label2id=None, transform=None, filter_model=None, filter_percent=100): self.class_type = class_type self.__labels_csv = csv_utils.read(label_filepath) if label2id: self.label2id = label2id print(self.label2id) else: unique = list(OrderedSet(self.__all_labels(self.class_type))) self.label2id = dict(zip(unique, range(len(unique)))) print(self.label2id) self.no_labels = len(self.label2id.keys()) self.train, self.dev, self.test = self.gen_triples(image_dir, split, transform, filter_model, filter_percent)