def _create_bags(self, bagging_strategy): bags = [] for eye, value in self.occurrences.items(): bag_label = self.labels_df[self.labels_df.image.str.contains( eye)].iloc[0].level prefix = 'pos' if bag_label == 1 else 'neg' eye_frames = sorted([ f for f in os.listdir(join(self.root_dir, prefix)) if get_video_desc(f)['eye_id'] == eye ], key=lambda n: get_video_desc(n)['frame_id']) if bagging_strategy == 'random': random.shuffle(eye_frames) if value <= self.num_frames: bags.append({ 'frames': eye_frames, 'label': bag_label, 'name': f'{eye}_{0}', 'shortname': eye }) else: for i, start_idx in enumerate( range(0, len(eye_frames), self.num_frames)): bags.append({ 'frames': eye_frames[start_idx:start_idx + self.num_frames], 'label': bag_label, 'name': f'{eye}_{i}' }) return bags
def refine_dataframe(df, files): df_refined = pd.DataFrame(columns=df.columns) for row in df.itertuples(): video_desc = get_video_desc(row[1]) level = row[2] prefix = 'pos' if level > 0 else 'neg' matching_files = [f for f in files if get_video_desc(f)['eye_id'] == video_desc['eye_id']] for match in matching_files: df_refined = df_refined.append({'image': match, 'level': 1 if level > 0 else 0}, ignore_index=True) return df_refined
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() severity = self.labels_df.iloc[idx, 1] if self.use_prefix: prefix = 'pos' if severity == 1 else 'neg' else: prefix = '' img_name = os.path.join(self.root_dir, prefix, self.labels_df.iloc[idx, 0] + self.file_type) img = cv2.imread(img_name) assert img is not None, f'Image {img_name} has to exist' # image = image[:,:,[2, 1, 0]] sample = { 'image': img, 'label': severity, 'eye_id': get_video_desc(self.labels_df.iloc[idx, 0], only_eye=True)['eye_id'], 'name': self.labels_df.iloc[idx, 0] } if self.transform: sample['image'] = img[:, :, [2, 1, 0]] sample['image'] = self.transform(sample['image']) if self.augs: sample['image'] = self.augs(image=img)['image'] return sample
def _create_bags(self): bags = [] for eye, value in self.occurrences.items(): bag_label = self.labels_df[self.labels_df.image.str.contains( eye)].iloc[0].level prefix = 'pos' if bag_label == 1 else 'neg' eye_frames = [ f for f in os.listdir(join(self.root_dir, prefix)) if get_video_desc(f)['eye_id'] == eye ] eye_frames = random.sample(eye_frames, len(eye_frames)) if value <= self.max_bag_size: bags.append({ 'frames': eye_frames, 'label': bag_label, 'name': f'{eye}_{0}', 'shortname': eye }) else: for i, start_idx in enumerate( range(0, len(eye_frames), self.max_bag_size)): bags.append({ 'frames': eye_frames[start_idx:start_idx + self.max_bag_size], 'label': bag_label, 'name': f'{eye}_{i}' }) # print('Created bags: ', len(bags), self.occurrences) return bags
def __getitem__(self, idx): assert not torch.is_tensor(idx) severity = self.labels_df.iloc[idx, 1] if self.use_prefix: prefix = 'pos' if severity == 1 else 'neg' else: prefix = '' img_name = os.path.join(self.root_dir, prefix, self.labels_df.iloc[idx, 0] + self.file_type) processed_name = os.path.join( self.root_dir + self.suffix, prefix, self.labels_df.iloc[idx, 0] + self.file_type) img = cv2.imread(img_name) processed_img = cv2.imread(processed_name) sample = { 'image': None, 'label': severity, 'eye_id': get_video_desc(self.labels_df.iloc[idx, 0], only_eye=True)['eye_id'] } if self.augs: img = self.augs(image=img)['image'] processed_img = self.augs(image=processed_img)['image'] sample['image'] = torch.cat([img, processed_img]) return sample
def blow_up_df(file_list, df, eye_id, eye_level): files_ids = set([get_video_desc(f)['snippet_id'] for f in file_list]) for i, _ in enumerate(files_ids): df = df.append({ 'image': f'{eye_id}_{i:02d}', 'level': eye_level }, ignore_index=True) return df
def expand_df_with_snippet_info(label_train, label_val, output_path): for set_str in ['train', 'val']: df = pd.read_csv(join( output_path, label_train)) if set_str == 'train' else pd.read_csv( join(output_path, label_val)) df_refined = pd.DataFrame(columns=df.columns) files = { 'pos': os.listdir(join(output_path, set_str, 'pos')), 'neg': os.listdir(join(output_path, set_str, 'neg')) } splinters = [] for i, row in tqdm(df.iterrows(), total=len(df), desc='Refining labels'): cur_eye_id = row['image'] prefix = 'pos' if row['level'] == 1 else 'neg' corres_files = sorted( [f for f in files[prefix] if cur_eye_id in f], reverse=True, key=lambda name: get_video_desc(name)['snippet_id']) splintered_video = check_splinter(corres_files) if splintered_video: splinters.append(cur_eye_id) corres_files_splinter = [ f for f in corres_files if len(get_video_desc(f)['eye_id']) > 5 ] df_refined = blow_up_df( corres_files_splinter, df_refined, f'{cur_eye_id}{get_video_desc(corres_files_splinter[0])["eye_id"][-1]}', row['level']) df_refined = blow_up_df(corres_files, df_refined, cur_eye_id, row['level']) df_refined.to_csv(join(output_path, f'labels_{set_str}_refined.csv'), index=False) print( 'These splintered videos were detected and included in the expanded labels.csv:' ) [print('Splinter: ', spi) for spi in splinters]
def get_weight(self, idx): if torch.is_tensor(idx): idx = idx.tolist() severity = self.labels_df.iloc[idx, 1] weight = self.ratio if severity == 0 else 1.0 if self.occur_balance: weight /= self.grade_count[get_video_desc( self.labels_df.iloc[idx, 0])['eye_id']] if self.boost > 1.0 and severity == 1 and self.labels_df.iloc[idx, 2] == 1: weight *= (1. + self.labels_df.iloc[idx, 3]) return weight
def run(input_path, labels_path, dataset, mode): """ Takes a CSV file that only holds eye-ids. For every frame corresponding to these frames a new entry with the same DR grading will be created. :param input_path: Absolute path to input folder :param labels_path: Absolute path to label file that will be refined :param dataset: Dataset name (train/val/...) :param mode: Determines operation mode (snippets / frames) :return: """ df = pd.read_csv(labels_path) df[COL_CLASS] = df[COL_CLASS].astype(int) df_refined = pd.DataFrame(columns=df.columns) files = { 'pos': os.listdir(join(input_path, dataset, 'pos')), 'neg': os.listdir(join(input_path, dataset, 'neg')) } for i, row in tqdm(df.iterrows(), total=len(df)): video_desc = get_video_desc(row['image']) level = row['level'] prefix = 'pos' if row['level'] > 0 else 'neg' matching_files = [ f for f in files[prefix] if get_video_desc(f)['eye_id'] == video_desc['eye_id'] and ( mode == 'frames' or get_video_desc(f)['snippet_id'] == video_desc['snippet_id']) ] for file in matching_files: df_refined = df_refined.append( { 'image': file, 'level': 1 if level > 0 else 0 }, ignore_index=True) df_refined.to_csv(join(input_path, f'labels_{dataset}_frames.csv'), index=False)
def run(labels_path, min_conf=80): df = pd.read_csv(labels_path) drop_list = [] for i, row in tqdm(df.iterrows()): video_path = row['image'] video_desc = get_video_desc(video_path) if int(video_desc['confidence']) < min_conf: drop_list.append(i) print(f'Dropping {len(drop_list)} rows') df.drop(df.index[drop_list], inplace=True) df.to_csv(labels_path, index=False)
def __getitem__(self, idx): assert not torch.is_tensor(idx) image_idx = idx severity = self.labels_df.iloc[image_idx, 1] prefix = 'pos' if severity == 1 else 'neg' snip_name = self.labels_df.iloc[image_idx, 0] video_desc = get_video_desc(snip_name) video_all_frames = [ f for f in os.listdir(os.path.join(self.root_dir, prefix)) if video_desc['eye_id'] == get_video_desc(f)['eye_id'] ] # if len(frame_index) - 1 < video_index: # print('Problem with video ', video_name, video_index) # frame_names = sorted([f for f in files if video_desc['snippet_id'] == get_video_desc(f)['snippet_id']], key=lambda n: get_video_desc(n)['frame_id']) selection = np.random.randint( 0, len(video_all_frames), self.num_frames) # Generate random indicies selected_frames = [video_all_frames[idx] for idx in selection] sample = { 'frames': [], 'label': severity, 'name': video_desc['eye_id'][:5] } for name in selected_frames: img = cv2.imread(os.path.join(self.root_dir, prefix, name)) img = self.augs(image=img)['image'] if self.augs else img sample['frames'].append(img) sample['frames'] = torch.stack( sample['frames']) if self.augs else np.stack(sample['frames']) return sample
def __init__(self, csv_file, root_dir, augmentations=None, balance_ratio=1.0, max_bag_size=100): self.labels_df = pd.read_csv(csv_file) self.root_dir = root_dir self.augs = augmentations self.ratio = balance_ratio self.max_bag_size = max_bag_size self.occurrences = {} for row in self.labels_df.itertuples(): eye_id = nn_utils.get_video_desc(row.image)['eye_id'] entry = self.occurrences.get(eye_id) self.occurrences[ eye_id] = self.occurrences[eye_id] + 1 if entry else 1 self.bags = self._create_bags()
def run(input_path): """ Create statistics for dataset :param input_path: Absolute path to the label file of a dataset :return: """ df = pd.read_csv(input_path) names = {} for row in df.itertuples(): eye_id = nn_utils.get_video_desc(row.image)['eye_id'] entry = names.get(eye_id) if entry: names[eye_id] += 1 else: names[eye_id] = 1 name_arr = np.array([*names.values()]) print( f'Dataset stats:\n Mean> {name_arr.mean()},\n Standard Deviaton> {name_arr.std()},\n Median> {np.median(name_arr)},\n Histogram(5)> {np.histogram(name_arr, bins=5)}, \n Histogram(10)> {np.histogram(name_arr, bins=10)}' )
def __init__(self, csv_file, root_dir, num_frames, file_type='.png', balance_ratio=1.0, augmentations=None, bagging_strategy='snippet'): self.labels_df = pd.read_csv(csv_file) self.root_dir = root_dir self.file_type = file_type self.augs = augmentations self.ratio = balance_ratio self.num_frames = num_frames self.bag_strategy = bagging_strategy self.occurrences = {} for row in self.labels_df.itertuples(): eye_id = nn_utils.get_video_desc(row.image)['eye_id'] entry = self.occurrences.get(eye_id) self.occurrences[ eye_id] = self.occurrences[eye_id] + 1 if entry else 1 self.bags = self._create_bags(bagging_strategy)
def __init__(self, csv_file, root_dir, file_type='.png', balance_ratio=1.0, transform=None, augmentations=None, use_prefix=False, boost_frames=1.0, occur_balance=False): """ Retina Dataset for normal single frame data samples :param csv_file: path to csv file with labels :param root_dir: path to folder with sample images :param file_type: file ending of images (e.g '.jpg') :param balance_ratio: adjust sample weight in case of unbalanced classes :param transform: pytorch data augmentation :param augmentations: albumentation data augmentation :param use_prefix: data folder contains subfolders for classes (pos / neg) :param boost_frames: boost frames if a third weak prediciton column is available """ self.labels_df = pd.read_csv(csv_file) self.grade_count = Counter([ get_video_desc(name)['eye_id'] for name in self.labels_df['image'].tolist() ]) if occur_balance else None self.root_dir = root_dir self.file_type = file_type self.transform = transform self.augs = augmentations self.ratio = balance_ratio self.use_prefix = use_prefix self.boost = boost_frames self.occur_balance = occur_balance assert transform is None or augmentations is None assert (boost_frames > 1.0 and len(self.labels_df.columns) > 2) or boost_frames == 1.0
def check_splinter(file_list): for file in file_list: if len(get_video_desc(file)['eye_id']) > 5: # print('Detected splinter for: ', file) return True