def _register_toy_dataset( dataset_name, image_generator, num_images, num_classes=-1, num_keypoints=0 ): json_dataset, meta_data = create_toy_dataset( image_generator, num_images=num_images, num_classes=num_classes, num_keypoints=num_keypoints, ) with make_temp_directory("detectron2go_tmp_dataset") as tmp_dir: json_file = os.path.join(tmp_dir, "{}.json".format(dataset_name)) with open(json_file, "w") as f: json.dump(json_dataset, f) split_dict = { IM_DIR: image_generator.get_image_dir(), ANN_FN: json_file, "meta_data": meta_data, } register_dataset_split(dataset_name, split_dict) try: yield finally: DatasetCatalog.remove(dataset_name) MetadataCatalog.remove(dataset_name)
def register_catalog(self): """ Adhoc COCO (json) dataset assumes the derived dataset can be created by only changing the json file, currently it supports two sources: 1) the dataset is registered using standard COCO registering functions in D2 or register_dataset_split from D2Go, this way it uses `json_file` from the metadata to access the json file. 2) the load func in DatasetCatalog is an instance of CallFuncWithJsonFile, which gives access to the json_file. In both cases, metadata will be the same except for the `name` and potentially `json_file`. """ logger.info("Register {} from {}".format(self.new_ds_name, self.src_ds_name)) metadata = MetadataCatalog.get(self.src_ds_name) load_func = DatasetCatalog[self.src_ds_name] src_json_file = (load_func.json_file if isinstance( load_func, CallFuncWithJsonFile) else metadata.json_file) # TODO cache ? with PathManager.open(src_json_file) as f: json_dict = json.load(f) assert "images" in json_dict, "Only support COCO-style json!" json_dict = self.new_json_dict(json_dict) self.tmp_dir = tempfile.mkdtemp(prefix="detectron2go_tmp_datasets") tmp_file = os.path.join(self.tmp_dir, "{}.json".format(self.new_ds_name)) with open(tmp_file, "w") as f: json.dump(json_dict, f) # re-register DatasetCatalog if isinstance(load_func, CallFuncWithJsonFile): new_func = CallFuncWithJsonFile(func=load_func.func, json_file=tmp_file) DatasetCatalog.register(self.new_ds_name, new_func) else: # NOTE: only supports COCODataset as DS_TYPE since we cannot reconstruct # the split_dict register_dataset_split( self.new_ds_name, split_dict={ ANN_FN: tmp_file, IM_DIR: metadata.image_root }, ) # re-regisister MetadataCatalog metadata_dict = metadata.as_dict() metadata_dict["name"] = self.new_ds_name if "json_file" in metadata_dict: metadata_dict["json_file"] = tmp_file MetadataCatalog.remove(self.new_ds_name) MetadataCatalog.get(self.new_ds_name).set(**metadata_dict)
def experiment(): result_df = pd.DataFrame(columns=['w20', 'size', 'AP', 'iter']) result_df.astype({'w20': 'bool', 'size': 'int32', 'AP': 'float32'}) date = datetime.date.today() time = datetime.datetime.now() try: for trial_id in range(10**6): sample_batch = generate_train_sets(True) sample_batch.extend(generate_train_sets(False)) for sample in sample_batch: print("---------------------------------------\n", 'RUNNING EXPERIMENT WITH',sample[0],sample[1],'TRIAL_ID IS ',trial_id, "\n---------------------------------------------") #cfg = TOY_cfg_and_register(trial_id,sample) cfg = initialize_cfg_and_register(trial_id,sample) trainer = Trainer(augmentations,cfg) trainer.resume_or_load(resume=False) try: trainer.train() except hooks.StopFakeExc: ap,iter = trainer.info_at_stop else: ap,iter = trainer.storage.latest()['segm/AP'] result = {'w20': sample[1],'size':str(sample[0]),'AP': ap, 'iter' : iter } result_df = result_df.append(result,ignore_index=True) with open(f'{base_output_dir}/results', 'a+') as f: json.dump(result, f) f.write(os.linesep) agg= result_df.groupby(['w20','size']).agg({'AP': ['mean', 'std']}) t = torch.cuda.get_device_properties(0).total_memory //(10**6) r = torch.cuda.memory_reserved(0) //(10**6) a = torch.cuda.memory_allocated(0) //(10**6) f = (r - a) # free inside reserved DatasetCatalog.remove(cfg.DATASETS.TRAIN[0]) MetadataCatalog.remove(cfg.DATASETS.TRAIN[0]) print("---------------------------------------\n", agg, "\n---------------------------------------------") titles = ['TOTAL', 'RESERVED','ALLOCATED','FREE INSIDE RESERVED'] vals = [t,r,a,f] strs = [] for title,val in zip(titles,vals): strs.append(f'{title}:\t,{val}') print( "\n".join(strs) ) except Exception as e: print(e) finally: time_info_str = "-".join([str(x) for x in [date.year,date.month,date.day,time.hour,time.minute]]) result_df.to_csv(f'{base_output_dir}/results_pd-{time_info_str}.csv') agg.to_csv(f'{base_output_dir}/agg_pd-{time_info_str}.csv')
def register_dataset(config , annot_df , images_df , annot): """ Register dataset (detectron2 register coco instance) folds included """ fold = config.general["fold"] train_dataset_name=f"my_dataset_train_{fold}" test_dataset_name=f"my_dataset_test_{fold}" train_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_train_{fold}.json") test_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_test_{fold}.json") train_annot_df=annot_df[annot_df["folds"]!=fold] test_annot_df=annot_df[annot_df["folds"]==fold] train_annot_df=train_annot_df.drop(["normal_category","normal_category_id"],axis=1) test_annot_df=test_annot_df.drop(["normal_category","normal_category_id"],axis=1) train_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(train_annot_df["image_id"].unique()) else False)] test_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(test_annot_df["image_id"].unique()) else False)] train_annot=annot.copy() test_annot=annot.copy() train_annot["annotations"]=train_annot_df.reset_index(drop=True).to_dict("records") train_annot["images"]=train_images_df.reset_index(drop=True).to_dict("records") test_annot["annotations"]=test_annot_df.reset_index(drop=True).to_dict("records") test_annot["images"]=test_images_df.reset_index(drop=True).to_dict("records") json.dump(train_annot,open(train_dataset_file,"w")) json.dump(test_annot,open(test_dataset_file,"w")) if train_dataset_name in DatasetCatalog.list(): DatasetCatalog.remove(train_dataset_name) MetadataCatalog.remove(train_dataset_name) if test_dataset_name in DatasetCatalog.list(): DatasetCatalog.remove(test_dataset_name) MetadataCatalog.remove(test_dataset_name) register_coco_instances(train_dataset_name, {}, train_dataset_file, os.path.join(DATASET_PATH,"data")) register_coco_instances(test_dataset_name, {}, test_dataset_file, os.path.join(DATASET_PATH,"data"))
def preview_augmentation(self, src, outdir: str = "./preview_augmentation", n_output: int = 100): """ 面倒なのでcocoを作り直してからpreviewさせる Params:: src: str, List[str], index, List[index] """ outdir = correct_dirpath(outdir) coco = CocoManager() coco.add_json(self.coco_json_path) # src で絞る if type(src) == str: coco.df_json = coco.df_json.loc[coco.df_json["images_file_name"] == src] elif type(src) == int: coco.df_json = coco.df_json.iloc[src:src + 1] elif type(src) == list or type(src) == tuple: if type(src[0]) == str: coco.df_json = coco.df_json.loc[ coco.df_json["images_file_name"].isin(src)] elif type(src[0]) == int: coco.df_json = coco.df_json.iloc[src, :] else: raise Exception("") coco.save(self.coco_json_path + ".cocomanager.json") # 作り直したcocoで再度読み込みさせる self.coco_json_path = self.coco_json_path + ".cocomanager.json" DatasetCatalog.remove(self.dataset_name) # key を削除しないと再登録できない MetadataCatalog.remove(self.dataset_name) # key を削除しないと再登録できない self.__register_coco_instances(self.dataset_name, self.coco_json_path, self.image_root) super().__init__(self.cfg) makedirs(outdir, exist_ok=True, remake=True) count = 0 for i, x in enumerate(self.data_loader): # x には per batch 分の size (2個とか) 入っているので、それ分回す for j, data in enumerate(x): if j > 0: continue ## Visualizer を predictor と統一するため, gt_*** -> pred_*** に copy する img = self.img_conv_dataloader(data) ins = data["instances"].to("cpu") if ins.has("gt_boxes"): ins.set("pred_boxes", ins.gt_boxes) if ins.has("gt_classes"): ins.set("pred_classes", ins.gt_classes) if ins.has("gt_keypoints"): ins.set("pred_keypoints", ins.gt_keypoints) if ins.has("gt_masks"): ## gt_mask では [x1, y1, x2, y2, ... ]の形式になっているのでそれを pred [False, True, True, ...] 形式に変換する segs = ins.get("gt_masks").polygons list_ndf = [] for seg_a_class in segs: ndf = convert_seg_point_to_bool( img.shape[0], img.shape[1], seg_a_class) list_ndf.append(ndf) ndf = np.concatenate([[ndfwk] for ndfwk in list_ndf], axis=0) ins.set("pred_masks", torch.from_numpy(ndf)) # Tensor 形式に変換 data["instances"] = ins img = self.draw_annoetation(img, data) cv2.imwrite( outdir + "preview_augmentation." + str(i) + "." + str(j) + ".png", img) count += 1 if count > n_output: break DatasetCatalog.remove(self.dataset_name) # key を削除しないと再登録できない MetadataCatalog.remove(self.dataset_name) # key を削除しないと再登録できない self.coco_json_path = self.coco_json_path_org self.__register_coco_instances(self.dataset_name, self.coco_json_path, self.image_root) super().__init__(self.cfg)
def extended_coco_load(json_file, image_root, dataset_name=None, loaded_json=None): """ Load a json file with COCO's annotation format. Currently only supports instance segmentation annotations. Args: json_file (str): full path to the json file in COCO annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., "coco", "cityscapes"). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. loaded_json (str): optional loaded json content, used in InMemoryCOCO to avoid loading from json_file again. Returns: list[dict]: a list of dicts in "Detectron2 Dataset" format. (See DATASETS.md) Notes: 1. This function does not read the image files. The results do not have the "image" field. 2. When `dataset_name=='coco'`, this function will translate COCO's incontiguous category ids to contiguous ids in [0, 80). """ json_file = _cache_json_file(json_file) if loaded_json is None: coco_api = COCO(json_file) else: coco_api = InMemoryCOCO(loaded_json) id_map = None # Get filtered classes all_cat_ids = coco_api.getCatIds() all_cats = coco_api.loadCats(all_cat_ids) # Setup classes to use for creating id map classes_to_use = [ c["name"] for c in sorted(all_cats, key=lambda x: x["id"]) ] # Setup id map id_map = {} for cat_id, cat in zip(all_cat_ids, all_cats): if cat["name"] in classes_to_use: id_map[cat_id] = classes_to_use.index(cat["name"]) # Register dataset in metadata catalog if dataset_name is not None: # overwrite attrs meta_dict = MetadataCatalog.get(dataset_name).as_dict() meta_dict['thing_classes'] = classes_to_use meta_dict['thing_dataset_id_to_contiguous_id'] = id_map # update MetadataCatalog (cannot change inplace, has to remove) MetadataCatalog.remove(dataset_name) MetadataCatalog.get(dataset_name).set(**meta_dict) # assert the change assert MetadataCatalog.get( dataset_name).thing_classes == classes_to_use # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) imgs = coco_api.loadImgs(img_ids) anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] logger.info("Loaded {} images from {}".format(len(imgs), json_file)) # Return the coco converted to record list return convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name)
def tearDown(self): # Need to remove injected dataset injected_dataset = set(DatasetCatalog) - self._builtin_datasets for ds in injected_dataset: DatasetCatalog.remove(ds) MetadataCatalog.remove(ds)