def details(self): """ 返回详细的分析表 按行罗列所有键,按列罗列所有字典,中间显示各字典键值 可以把结果保存到excel,然后详细筛选分析 >>> dc = DictCmper({'d1': {'a': 1, 'b': 2}, 'd2': {'b': 3, 'e': 5}, 'd3': {'d': 4}}) >>> dc.details() d1 d2 d3 a 1.0 NaN NaN b 2.0 3.0 NaN e NaN 5.0 NaN d NaN NaN 4.0 """ # 1 获得所有键 # 集合无法保存元素顺序,所以用合并字典来代替 # 还有个三方库orderedset,不想安装。就这样简便解决就好。 keys = DictTool.or_(*self.dicts.values()).keys() # 2 取出所有字典值 ls = [] for k in keys: ls.append([(d[k] if k in d else np.nan) for d in self.dicts.values()]) # 3 转为df表格 df = pd.DataFrame.from_records(ls, columns=self.dicts.keys()) df.index = keys return df
def to_labelme_cls(self, root, *, bbox=True, seg=False, info=False): """ :param root: 图片根目录 :return: extdata,存储了一些匹配异常信息 """ root, data = Dir(root), {} catid2name = {x['id']: x['name'] for x in self.gt_dict['categories']} # 1 准备工作,构建文件名索引字典 gs = PathGroups.groupby(root.select_files('**/*')) # 2 遍历生成labelme数据 not_finds = set() # coco里有的图片,root里没有找到 multimatch = dict() # coco里的某张图片,在root找到多个匹配文件 for img, anns in tqdm(self.group_gt(reserve_empty=True), disable=not info): # 2.1 文件匹配 imfiles = gs.find_files(img['file_name']) if not imfiles: # 没有匹配图片的,不处理 not_finds.add(img['file_name']) continue elif len(imfiles) > 1: multimatch[img['file_name']] = imfiles imfile = imfiles[0] else: imfile = imfiles[0] # 2.2 数据内容转换 lmdict = LabelmeDict.gen_data(imfile) img = DictTool.or_(img, {'xltype': 'image'}) lmdict['shapes'].append( LabelmeDict.gen_shape(json.dumps(img, ensure_ascii=False), [[-10, 0], [-5, 0]])) for ann in anns: if bbox: ann = DictTool.or_( ann, {'category_name': catid2name[ann['category_id']]}) label = json.dumps(ann, ensure_ascii=False) shape = LabelmeDict.gen_shape(label, xywh2ltrb(ann['bbox'])) lmdict['shapes'].append(shape) if seg: # 把分割也显示出来(用灰色) for x in ann['segmentation']: an = { 'box_id': ann['id'], 'xltype': 'seg', 'shape_color': [191, 191, 191] } label = json.dumps(an, ensure_ascii=False) lmdict['shapes'].append(LabelmeDict.gen_shape( label, x)) f = imfile.with_suffix('.json') data[f.relpath(root)] = lmdict return LabelmeDataset(root, data, extdata={ 'categories': self.gt_dict['categories'], 'not_finds': not_finds, 'multimatch': Groups(multimatch) })