def __init__(self, path): super().__init__(path) subsets = {} annotations = defaultdict(dict) task_dir = osp.join(path, 'Segmentation') if not osp.isdir(task_dir): return ann_files = os.listdir(task_dir) for ann_dir in ann_files: ann_parts = filter(None, ann_dir.strip().split('_')) if len(ann_parts) != 4: continue _, subset_name, mark = ann_parts if mark not in ['cls', 'inst']: continue item_dir = osp.join(task_dir, ann_dir) items = dir_items(item_dir, '.png', truncate_ext=True) items = { name: osp.join(item_dir, item + '.png') \ for name, item in items } subset = VocResultsExtractor.Subset(subset_name, self) subset.items = list(items) subsets[subset_name] = subset annotations[subset_name][mark] = items self._subsets = subsets self._annotations = dict(annotations)
def _parse_txt_ann(self, path, subsets, annotations, task): task_desc = self._SUPPORTED_TASKS[task] task_dir = osp.join(path, task_desc['dir']) ann_ext = task_desc['ext'] if not osp.isdir(task_dir): return ann_files = dir_items(task_dir, ann_ext, truncate_ext=True) for ann_file in ann_files: ann_parts = filter(None, ann_file.strip().split('_')) if len(ann_parts) != 4: continue _, mark, subset_name, label = ann_parts if mark != task_desc['mark']: continue label_id = VocLabel[label].value anns = defaultdict(list) with open(osp.join(task_dir, ann_file + ann_ext), 'r') as f: for line in f: line_parts = line.split() item = line_parts[0] anns[item].append((label_id, *line_parts[1:])) subset = VocResultsExtractor.Subset(subset_name, self) subset.items = list(anns) subsets[subset_name] = subset annotations[subset_name] = dict(anns)
def _find_subsets(path): anno_dir = osp.join(path, DatumaroPath.ANNOTATIONS_DIR) if not osp.isdir(anno_dir): raise Exception('Datumaro dataset not found at "%s"' % path) return { name: osp.join(anno_dir, name + '.json') for name in dir_items(anno_dir, '.json', truncate_ext=True) }
def _load_det_annotations(self): det_anno_dir = osp.join(self._path, VocPath.ANNOTATIONS_DIR) det_anno_items = dir_items(det_anno_dir, '.xml', truncate_ext=True) det_annotations = dict() for ann_item in det_anno_items: with open(osp.join(det_anno_dir, ann_item + '.xml'), 'r') as f: ann_file_data = f.read() det_annotations[ann_item] = ann_file_data self._annotations[VocTask.detection] = det_annotations
def _load_det_annotations(self): det_anno_dir = osp.join(self._path, VocPath.ANNOTATIONS_DIR) det_anno_items = dir_items(det_anno_dir, '.xml', truncate_ext=True) det_annotations = dict() for ann_item in det_anno_items: with open(osp.join(det_anno_dir, ann_item + '.xml'), 'r') as f: ann_file_data = f.read() ann_file_root = ET.fromstring(ann_file_data) item = ann_file_root.find('filename').text item = osp.splitext(item)[0] det_annotations[item] = ann_file_data self._annotations[VocTask.detection] = det_annotations
def _load_subsets(self, subsets_dir): dir_files = dir_items(subsets_dir, '.txt', truncate_ext=True) subset_names = [s for s in dir_files if '_' not in s] subsets = {} for subset_name in subset_names: subset = __class__.Subset(subset_name, self) with open(osp.join(subsets_dir, subset_name + '.txt'), 'r') as f: subset.items = [line.split()[0] for line in f] subsets[subset_name] = subset return subsets
def __init__(self, path): super().__init__(path) subsets = {} annotations = defaultdict(dict) task = VocTask.person_layout task_desc = self._SUPPORTED_TASKS[task] task_dir = osp.join(path, task_desc['dir']) if not osp.isdir(task_dir): return ann_ext = task_desc['ext'] ann_files = dir_items(task_dir, ann_ext, truncate_ext=True) for ann_file in ann_files: ann_parts = filter(None, ann_file.strip().split('_')) if len(ann_parts) != 4: continue _, mark, subset_name, _ = ann_parts if mark != task_desc['mark']: continue layouts = {} root = ET.parse(osp.join(task_dir, ann_file + ann_ext)) root_elem = root.getroot() for layout_elem in root_elem.findall('layout'): item = layout_elem.find('image').text obj_id = int(layout_elem.find('object').text) conf = float(layout_elem.find('confidence').text) parts = [] for part_elem in layout_elem.findall('part'): label_id = VocBodyPart[part_elem.find('class').text].value bbox_elem = part_elem.find('bndbox') xmin = float(bbox_elem.find('xmin').text) xmax = float(bbox_elem.find('xmax').text) ymin = float(bbox_elem.find('ymin').text) ymax = float(bbox_elem.find('ymax').text) bbox = [xmin, ymin, xmax - xmin, ymax - ymin] parts.append((label_id, bbox)) layouts[item] = [obj_id, conf, parts] subset = VocResultsExtractor.Subset(subset_name, self) subset.items = list(layouts) subsets[subset_name] = subset annotations[subset_name] = layouts self._subsets = subsets self._annotations = dict(annotations)
def _load_cls_annotations(self, subsets_dir, subset_names): dir_files = dir_items(subsets_dir, '.txt', truncate_ext=True) label_annotations = defaultdict(list) label_anno_files = [s for s in dir_files \ if '_' in s and s[s.rfind('_') + 1:] in subset_names] for ann_file in label_anno_files: with open(osp.join(subsets_dir, ann_file + '.txt'), 'r') as f: label = ann_file[:ann_file.rfind('_')] label_id = VocLabel[label].value for line in f: item, present = line.split() if present == '1': label_annotations[item].append(label_id) self._annotations[VocTask.classification] = dict(label_annotations)
def _load_annotations(self): annotations = defaultdict(list) task_dir = osp.dirname(self._path) anno_files = [ s for s in dir_items(task_dir, '.txt') if s.endswith('_' + osp.basename(self._path)) ] for ann_filename in anno_files: with open(osp.join(task_dir, ann_filename)) as f: label = ann_filename[:ann_filename.rfind('_')] label_id = self._get_label_id(label) for line in f: item, present = line.split() if present == '1': annotations[item].append(label_id) return dict(annotations)
def _load_cls_annotations(self, subsets_dir, subset_names): subset_file_names = [ n if n else DEFAULT_SUBSET_NAME for n in subset_names ] dir_files = dir_items(subsets_dir, '.txt', truncate_ext=True) label_annotations = defaultdict(list) label_anno_files = [s for s in dir_files \ if '_' in s and s[s.rfind('_') + 1:] in subset_file_names] for ann_filename in label_anno_files: with open(osp.join(subsets_dir, ann_filename + '.txt'), 'r') as f: label = ann_filename[:ann_filename.rfind('_')] label_id = self._get_label_id(label) for line in f: item, present = line.split() if present == '1': label_annotations[item].append(label_id) self._annotations[VocTask.classification] = dict(label_annotations)
def _load_subsets(self, subsets_dir): dir_files = dir_items(subsets_dir, '.txt', truncate_ext=True) subset_names = [s for s in dir_files if '_' not in s] subsets = {} for subset_name in subset_names: subset_file_name = subset_name if subset_name == DEFAULT_SUBSET_NAME: subset_name = None subset = __class__.Subset(subset_name, self) subset.items = [] with open(osp.join(subsets_dir, subset_file_name + '.txt'), 'r') as f: for line in f: line = line.split()[0].strip() if line: subset.items.append(line) subsets[subset_name] = subset return subsets