def read_voxel(voxelfile): """ Reads voxel and transforms it in the form of verts """ with PathManager.open(voxelfile, "rb") as f: voxel = sio.loadmat(f)["voxel"] voxel = np.rot90(voxel, k=3, axes=(1, 2)) verts = np.argwhere(voxel > 0).astype(np.float32, copy=False) # centering and normalization min_x = np.min(verts[:, 0]) max_x = np.max(verts[:, 0]) min_y = np.min(verts[:, 1]) max_y = np.max(verts[:, 1]) min_z = np.min(verts[:, 2]) max_z = np.max(verts[:, 2]) verts[:, 0] = verts[:, 0] - (max_x + min_x) / 2 verts[:, 1] = verts[:, 1] - (max_y + min_y) / 2 verts[:, 2] = verts[:, 2] - (max_z + min_z) / 2 scale = np.sqrt(np.max(np.sum(verts ** 2, axis=1))) * 2 verts /= scale verts = torch.tensor(verts, dtype=torch.float32) return verts
def new_import(name, globals=None, locals=None, fromlist=(), level=0): if ( # Only deal with relative imports inside config files level != 0 and globals is not None and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)): cur_file = find_relative_file(globals["__file__"], name, level) _validate_py_syntax(cur_file) spec = importlib.machinery.ModuleSpec( _random_package_name(cur_file), None, origin=cur_file) module = importlib.util.module_from_spec(spec) module.__file__ = cur_file with PathManager.open(cur_file) as f: content = f.read() exec(compile(content, cur_file, "exec"), module.__dict__) for name in fromlist: # turn imported dict into DictConfig automatically val = _cast_to_config(module.__dict__[name]) module.__dict__[name] = val return module return old_import(name, globals, locals, fromlist=fromlist, level=level)
def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(set(self._tasks), predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def prepare_for_launch(args): """ Load config, figure out working directory, create runner. - when args.config_file is empty, returned cfg will be the default one - returned output_dir will always be non empty, args.output_dir has higher priority than cfg.OUTPUT_DIR. """ print(args) runner = create_runner(args.runner) cfg = runner.get_default_cfg() if args.config_file: with PathManager.open(reroute_config_path(args.config_file), "r") as f: print("Loaded config file {}:\n{}".format(args.config_file, f.read())) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) else: cfg = create_cfg_from_cli_args(args, default_cfg=cfg) cfg.freeze() assert args.output_dir or args.config_file output_dir = args.output_dir or cfg.OUTPUT_DIR return cfg, output_dir, runner
def dump_trained_model_configs( output_dir: str, trained_cfgs: Dict[str, CfgNode]) -> Dict[str, str]: """Writes trained model config files to output_dir. Args: output_dir: output file directory. trained_cfgs: map from model name to the config of trained model. Returns: A map of model name to model config path. """ trained_model_configs = {} trained_model_config_dir = os.path.join(output_dir, "trained_model_configs") PathManager.mkdirs(trained_model_config_dir) for name, trained_cfg in trained_cfgs.items(): config_file = os.path.join(trained_model_config_dir, "{}.yaml".format(name)) trained_model_configs[name] = config_file if comm.is_main_process(): logger.info("Dump trained config file: {}".format(config_file)) with PathManager.open(config_file, "w") as f: f.write(trained_cfg.dump()) return trained_model_configs
def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # first load gt # read list of images with PathManager.open(imagesetfile, "r") as f: lines = f.readlines() imagenames = [x.strip() for x in lines] # load annots recs = {} for imagename in imagenames: recs[imagename] = parse_rec(annopath.format(imagename)) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj["name"] == classname] bbox = np.array([x["bbox"] for x in R]) difficult = np.array([x["difficult"] for x in R]).astype(np.bool) # difficult = np.array([False for x in R]).astype(np.bool) # treat all "difficult" as GT det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} # read dets detfile = detpath.format(classname) with open(detfile, "r") as f: lines = f.readlines() splitlines = [x.strip().split(" ") for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R["bbox"].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1.0, 0.0) ih = np.maximum(iymax - iymin + 1.0, 0.0) inters = iw * ih # union uni = ( (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters ) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R["difficult"][jmax]: if not R["det"][jmax]: tp[d] = 1.0 R["det"][jmax] = 1 else: fp[d] = 1.0 else: fp[d] = 1.0 # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap
def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList: """ Traverses all keyframes of a video file. Returns a list of keyframe timestamps. Timestamps are counts in timebase units. Args: video_fpath (str): Video file path video_stream_idx (int): Video stream index (default: 0) Returns: List[int]: list of keyframe timestaps (timestamp is a count in timebase units) """ try: with PathManager.open(video_fpath, "rb") as io: container = av.open(io, mode="r") stream = container.streams.video[video_stream_idx] keyframes = [] pts = -1 # Note: even though we request forward seeks for keyframes, sometimes # a keyframe in backwards direction is returned. We introduce tolerance # as a max count of ignored backward seeks tolerance_backward_seeks = 2 while True: try: container.seek(pts + 1, backward=False, any_frame=False, stream=stream) except av.AVError as e: # the exception occurs when the video length is exceeded, # we then return whatever data we've already collected logger = logging.getLogger(__name__) logger.debug( f"List keyframes: Error seeking video file {video_fpath}, " f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}" ) return keyframes except OSError as e: logger = logging.getLogger(__name__) logger.warning( f"List keyframes: Error seeking video file {video_fpath}, " f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}" ) return [] packet = next(container.demux(video=video_stream_idx)) if packet.pts is not None and packet.pts <= pts: logger = logging.getLogger(__name__) logger.warning( f"Video file {video_fpath}, stream {video_stream_idx}: " f"bad seek for packet {pts + 1} (got packet {packet.pts}), " f"tolerance {tolerance_backward_seeks}." ) tolerance_backward_seeks -= 1 if tolerance_backward_seeks == 0: return [] pts += 1 continue tolerance_backward_seeks = 2 pts = packet.pts if pts is None: return keyframes if packet.is_keyframe: keyframes.append(pts) return keyframes except OSError as e: logger = logging.getLogger(__name__) logger.warning( f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}" ) except RuntimeError as e: logger = logging.getLogger(__name__) logger.warning( f"List keyframes: Error opening video file container {video_fpath}, " f"Runtime error: {e}" ) return []
def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") coco_results = list( itertools.chain(*[x["instances"] for x in predictions])) tasks = self._tasks or self._tasks_from_predictions(coco_results) # unmap the category ids for COCO if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) num_classes = len(all_contiguous_ids) assert min(all_contiguous_ids) == 0 and max( all_contiguous_ids) == num_classes - 1 reverse_id_mapping = { v: k for k, v in dataset_id_to_contiguous_id.items() } for result in coco_results: category_id = result["category_id"] assert category_id < num_classes, ( f"A prediction has class={category_id}, " f"but the dataset only has {num_classes} classes and " f"predicted class id should be in [0, {num_classes - 1}].") result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating predictions with {} COCO API...".format( "unofficial" if self._use_fast_impl else "official")) for task in sorted(tasks): coco_eval = ( _evaluate_predictions_on_coco( self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas, use_fast_impl=self._use_fast_impl, img_ids=img_ids, ) if len(coco_results) > 0 else None # cocoapi does not handle empty results very well ) res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes")) self._results[task] = res
def evaluate(self): """ Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): * Mean intersection-over-union averaged across classes (mIoU) * Frequency Weighted IoU (fwIoU) * Mean pixel accuracy averaged across classes (mACC) * Pixel Accuracy (pACC) """ if self._distributed: synchronize() conf_matrix_list = all_gather(self._conf_matrix) self._predictions = all_gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not is_main_process(): return self._conf_matrix = np.zeros_like(self._conf_matrix) for conf_matrix in conf_matrix_list: self._conf_matrix += conf_matrix if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) acc = np.full(self._num_classes, np.nan, dtype=np.float) iou = np.full(self._num_classes, np.nan, dtype=np.float) tp = self._conf_matrix.diagonal()[:-1].astype(np.float) pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) class_weights = pos_gt / np.sum(pos_gt) pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) acc_valid = pos_gt > 0 acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] iou_valid = (pos_gt + pos_pred) > 0 union = pos_gt + pos_pred - tp iou[acc_valid] = tp[acc_valid] / union[acc_valid] macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) pacc = np.sum(tp) / np.sum(pos_gt) res = {} res["mIoU"] = 100 * miou res["fwIoU"] = 100 * fiou for i, name in enumerate(self._class_names): res["IoU-{}".format(name)] = 100 * iou[i] res["mACC"] = 100 * macc res["pACC"] = 100 * pacc for i, name in enumerate(self._class_names): res["ACC-{}".format(name)] = 100 * acc[i] if self._output_dir: file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"sem_seg": res}) self._logger.info(results) return results
def load_pix2pix_json( json_path, input_folder, gt_folder, mask_folder, real_json_path=None, real_folder=None, max_num=1e10, ): """ Args: json_path (str): the directory where the json file exists which saves the filenames and labels. input_folder (str): the directory for the input/source images input_folder (str): the directory for the ground_truth/target images mask_folder (str): the directory for the masks Returns: list[dict]: a list of dicts """ real_filenames = {} if real_json_path is not None: with PathManager.open(real_json_path, 'r') as f: real_filenames = json.load(f) data = [] with PathManager.open(json_path, 'r') as f: filenames = json.load(f) in_len = len(filenames) real_len = len(real_filenames) total_len = min(max(in_len, real_len), max_num) real_keys = [*real_filenames.keys()] in_keys = [*filenames.keys()] cnt = 0 # for fname in filenames.keys(): while cnt < total_len: fname = in_keys[cnt % in_len] input_label = filenames[fname] if isinstance(input_label, tuple) or isinstance(input_label, list): assert len(input_label) == 2, ( "Save (real_name, label) as the value of the json dict for resampling" ) fname, input_label = input_label f = { "file_name": fname, "input_folder": input_folder, "gt_folder": gt_folder, "mask_folder": mask_folder, "input_label": input_label, "real_folder": real_folder } if real_len > 0: real_fname = real_keys[cnt % real_len] f["real_file_name"] = real_fname data.append(f) cnt += 1 # 5000 is the general number of images used to calculate FID in GANs # if max_num > 0 and len(data) == max_num: # logger.info("Reach maxinum of test data: {} ".format(len(data))) # return data logger.info("Total number of data dicts: {} ".format(len(data))) return data
def _open(self, path, mode="r", **kwargs): return PathManager.open(self._get_local_path(path), mode, **kwargs)
def _load_image(self, id: int) -> Image.Image: path = self.coco.loadImgs(id)[0]["file_name"] with PathManager.open(os.path.join(self.root, path), "rb") as f: image = Image.open(f).convert("RGB") return image
def dump_flops_info(model, inputs, output_dir, use_eval_mode=True): """ Dump flops information about model, using the given model inputs. Information are dumped to output_dir using various flop counting tools in different formats. Only a simple table is printed to terminal. Args: inputs: a tuple of positional arguments used to call model with. use_eval_mode: turn the model into eval mode for flop counting. Otherwise, will use the original mode. It's recommended to use eval mode, because training mode typically follows a different codepath. """ if not comm.is_main_process(): return logger.info("Evaluating model's number of parameters and FLOPS") try: model = copy.deepcopy(model) except Exception: logger.info("Failed to deepcopy the model and skip FlopsEstimation.") return # delete other forward_pre_hooks so they are not simultaneously called for k in model._forward_pre_hooks: del model._forward_pre_hooks[k] if use_eval_mode: model.eval() inputs = copy.deepcopy(inputs) # 1. using mobile_cv flop counter try: fest = flops_utils.FlopsEstimation(model) with fest.enable(): model(*inputs) fest.add_flops_info() model_str = str(model) output_file = os.path.join(output_dir, "flops_str_mobilecv.txt") with PathManager.open(output_file, "w") as f: f.write(model_str) logger.info(f"Flops info written to {output_file}") except Exception: logger.exception( "Failed to estimate flops using mobile_cv's FlopsEstimation") # 2. using d2/fvcore's flop counter output_file = os.path.join(output_dir, "flops_str_fvcore.txt") try: flops = FlopCountAnalysis(model, inputs) # 2.1: dump as model str model_str = flop_count_str(flops) with PathManager.open(output_file, "w") as f: f.write(model_str) logger.info(f"Flops info written to {output_file}") # 2.2: dump as table flops_table = flop_count_table(flops, max_depth=10) output_file = os.path.join(output_dir, "flops_table_fvcore.txt") with PathManager.open(output_file, "w") as f: f.write(flops_table) logger.info(f"Flops table (full version) written to {output_file}") # 2.3: print a table with a shallow depth flops_table = flop_count_table(flops, max_depth=3) logger.info("Flops table:\n" + flops_table) except Exception: with PathManager.open(output_file, "w") as f: traceback.print_exc(file=f) logger.warning( "Failed to estimate flops using detectron2's FlopCountAnalysis. " f"Error written to {output_file}.") flops = float("nan") return flops
parser.add_argument("--input", required=True, help="JSON file produced by the model") parser.add_argument("--output", required=True, help="output directory") parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") args = parser.parse_args() logger = setup_logger() with PathManager.open(args.input, "r") as f: predictions = json.load(f) pred_by_image = defaultdict(list) for p in predictions: pred_by_image[p["image_id"]].append(p) dicts = list(DatasetCatalog.get(args.dataset)) metadata = MetadataCatalog.get(args.dataset) if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): def dataset_id_map(ds_id): return metadata.thing_dataset_id_to_contiguous_id[ds_id] elif "lvis" in args.dataset: # LVIS results are in the same format as COCO results, but have a different
def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta): """ Args: image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) """ def _convert_category_id(segment_info, meta): if segment_info["category_id"] in meta[ "thing_dataset_id_to_contiguous_id"]: segment_info["category_id"] = meta[ "thing_dataset_id_to_contiguous_id"][ segment_info["category_id"]] segment_info["isthing"] = True else: segment_info["category_id"] = meta[ "stuff_dataset_id_to_contiguous_id"][ segment_info["category_id"]] segment_info["isthing"] = False return segment_info with PathManager.open(json_file) as f: json_info = json.load(f) ret = [] if not "fake" in gt_dir: for ann in json_info["annotations"]: image_id = int(ann["image_id"]) # TODO: currently we assume image and label has the same filename but # different extension, and images have extension ".jpg" for COCO. Need # to make image extension a user-provided argument if we extend this # function to support other COCO-like datasets. image_file = os.path.join( image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") assert PathManager.isfile(image_file), image_file label_file = os.path.join(gt_dir, ann["file_name"]) assert PathManager.isfile(label_file), label_file segments_info = [ _convert_category_id(x, meta) for x in ann["segments_info"] ] ret.append({ "file_name": image_file, "image_id": image_id, "pan_seg_file_name": label_file, "segments_info": segments_info, }) else: print('Assuming fake labels dataset.') for img in json_info["images"]: image_id = int(img["id"]) # TODO: currently we assume image and label has the same filename but # different extension, and images have extension ".jpg" for COCO. Need # to make image extension a user-provided argument if we extend this # function to support other COCO-like datasets. image_file = os.path.join( image_dir, os.path.splitext(img["file_name"])[0] + ".jpg") assert PathManager.isfile(image_file), image_file label_file = os.path.join(gt_dir, img["file_name"].replace(".jpg", ".png")) assert PathManager.isfile(label_file), label_file segments_info = [ ] #_convert_category_id(x, meta) for x in ann["segments_info"]] ret.append({ "file_name": image_file, "image_id": image_id, "pan_seg_file_name": label_file, "segments_info": segments_info, }) assert len(ret), f"No images found in {image_dir}!" assert PathManager.isfile( ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] return ret
def evaluate_for_pix3d( predictions, dataset, metadata, filter_iou, mesh_models=None, iou_thresh=0.5, mask_thresh=0.5, device=None, vis_preds=False, ): from PIL import Image if device is None: device = torch.device("cpu") F1_TARGET = "[email protected]" # classes cat_ids = sorted(dataset.getCatIds()) reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } # initialize tensors to record box & mask AP, number of gt positives box_apscores, box_aplabels = {}, {} mask_apscores, mask_aplabels = {}, {} mesh_apscores, mesh_aplabels = {}, {} npos = {} for cat_id in cat_ids: box_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] box_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] mask_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] mask_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] mesh_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] mesh_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] npos[cat_id] = 0.0 box_covered = [] mask_covered = [] mesh_covered = [] # number of gt positive instances per class for gt_ann in dataset.dataset["annotations"]: gt_label = gt_ann["category_id"] # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png} # have a mismatch between images and masks. Thus, ignore image_file_name = dataset.loadImgs([gt_ann["image_id"] ])[0]["file_name"] if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]: continue npos[gt_label] += 1.0 for prediction in predictions: original_id = prediction["image_id"] image_width = dataset.loadImgs([original_id])[0]["width"] image_height = dataset.loadImgs([original_id])[0]["height"] image_size = [image_height, image_width] image_file_name = dataset.loadImgs([original_id])[0]["file_name"] # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png} # have a mismatch between images and masks. Thus, ignore if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]: continue if "instances" not in prediction: continue num_img_preds = len(prediction["instances"]) if num_img_preds == 0: continue # predictions scores = prediction["instances"].scores boxes = prediction["instances"].pred_boxes.to(device) labels = prediction["instances"].pred_classes masks_rles = prediction["instances"].pred_masks_rle if hasattr(prediction["instances"], "pred_meshes"): meshes = prediction["instances"].pred_meshes # preditected meshes verts = [mesh[0] for mesh in meshes] faces = [mesh[1] for mesh in meshes] meshes = Meshes(verts=verts, faces=faces).to(device) else: meshes = ico_sphere(4, device) meshes = meshes.extend(num_img_preds).to(device) if hasattr(prediction["instances"], "pred_dz"): pred_dz = prediction["instances"].pred_dz heights = boxes.tensor[:, 3] - boxes.tensor[:, 1] # NOTE see appendix for derivation of pred dz pred_dz = pred_dz[:, 0] * heights.cpu() else: raise ValueError("Z range of box not predicted") assert prediction["instances"].image_size[0] == image_height assert prediction["instances"].image_size[1] == image_width # ground truth # anotations corresponding to original_id (aka coco image_id) gt_ann_ids = dataset.getAnnIds(imgIds=[original_id]) assert len( gt_ann_ids) == 1 # note that pix3d has one annotation per image gt_anns = dataset.loadAnns(gt_ann_ids)[0] assert gt_anns["image_id"] == original_id # get original ground truth mask, box, label & mesh maskfile = os.path.join(metadata.image_root, gt_anns["segmentation"]) with PathManager.open(maskfile, "rb") as f: gt_mask = torch.tensor( np.asarray(Image.open(f), dtype=np.float32) / 255.0) assert gt_mask.shape[0] == image_height and gt_mask.shape[ 1] == image_width gt_mask = (gt_mask > 0).to(dtype=torch.uint8) # binarize mask gt_mask_rle = [ mask_util.encode(np.array(gt_mask[:, :, None], order="F"))[0] ] gt_box = np.array(gt_anns["bbox"]).reshape(-1, 4) # xywh from coco gt_box = BoxMode.convert(gt_box, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_label = gt_anns["category_id"] faux_gt_targets = Boxes( torch.tensor(gt_box, dtype=torch.float32, device=device)) # load gt mesh and extrinsics/intrinsics gt_R = torch.tensor(gt_anns["rot_mat"]).to(device) gt_t = torch.tensor(gt_anns["trans_mat"]).to(device) gt_K = torch.tensor(gt_anns["K"]).to(device) if mesh_models is not None: modeltype = gt_anns["model"] gt_verts, gt_faces = ( mesh_models[modeltype][0].clone(), mesh_models[modeltype][1].clone(), ) gt_verts = gt_verts.to(device) gt_faces = gt_faces.to(device) else: # load from disc raise NotImplementedError gt_verts = shape_utils.transform_verts(gt_verts, gt_R, gt_t) gt_zrange = torch.stack([gt_verts[:, 2].min(), gt_verts[:, 2].max()]) gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces]) # box iou boxiou = pairwise_iou(boxes, faux_gt_targets) # filter predictions with iou > filter_iou valid_pred_ids = boxiou > filter_iou # mask iou miou = mask_util.iou(masks_rles, gt_mask_rle, [0]) # # gt zrange (zrange stores min_z and max_z) # # zranges = torch.stack([gt_zrange] * len(meshes), dim=0) # predicted zrange (= pred_dz) assert hasattr(prediction["instances"], "pred_dz") # It's impossible to predict the center location in Z (=tc) # from the image. See appendix for more. tc = (gt_zrange[1] + gt_zrange[0]) / 2.0 # Given a center location (tc) and a focal_length, # pred_dz = pred_dz * box_h * tc / focal_length # See appendix for more. zranges = torch.stack( [ torch.stack([ tc - tc * pred_dz[i] / 2.0 / gt_K[0], tc + tc * pred_dz[i] / 2.0 / gt_K[0] ]) for i in range(len(meshes)) ], dim=0, ) gt_Ks = gt_K.view(1, 3).expand(len(meshes), 3) meshes = transform_meshes_to_camera_coord_system( meshes, boxes.tensor, zranges, gt_Ks, image_size) if vis_preds: vis_utils.visualize_predictions( original_id, image_file_name, scores, labels, boxes.tensor, masks_rles, meshes, metadata, "/tmp/output", ) shape_metrics = compare_meshes(meshes, gt_mesh, reduce=False) # sort predictions in descending order scores_sorted, idx_sorted = torch.sort(scores, descending=True) for pred_id in range(num_img_preds): # remember we only evaluate the preds that have overlap more than # iou_filter with the ground truth prediction if valid_pred_ids[idx_sorted[pred_id], 0] == 0: continue # map to dataset category id pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]].item()] pred_miou = miou[idx_sorted[pred_id]].item() pred_biou = boxiou[idx_sorted[pred_id]].item() pred_score = scores[idx_sorted[pred_id]].view(1).to(device) # note that metrics returns f1 in % (=x100) pred_f1 = shape_metrics[F1_TARGET][ idx_sorted[pred_id]].item() / 100.0 # mask tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_miou > iou_thresh) and (original_id not in mask_covered)): tpfp[0] = 1 mask_covered.append(original_id) mask_apscores[pred_label].append(pred_score) mask_aplabels[pred_label].append(tpfp) # box tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_biou > iou_thresh) and (original_id not in box_covered)): tpfp[0] = 1 box_covered.append(original_id) box_apscores[pred_label].append(pred_score) box_aplabels[pred_label].append(tpfp) # mesh tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_f1 > iou_thresh) and (original_id not in mesh_covered)): tpfp[0] = 1 mesh_covered.append(original_id) mesh_apscores[pred_label].append(pred_score) mesh_aplabels[pred_label].append(tpfp) # check things for eval # assert npos.sum() == len(dataset.dataset["annotations"]) # convert to tensors pix3d_metrics = {} boxap, maskap, meshap = 0.0, 0.0, 0.0 valid = 0.0 for cat_id in cat_ids: cat_name = dataset.loadCats([cat_id])[0]["name"] if npos[cat_id] == 0: continue valid += 1 cat_box_ap = VOCap.compute_ap(torch.cat(box_apscores[cat_id]), torch.cat(box_aplabels[cat_id]), npos[cat_id]) boxap += cat_box_ap pix3d_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap cat_mask_ap = VOCap.compute_ap(torch.cat(mask_apscores[cat_id]), torch.cat(mask_aplabels[cat_id]), npos[cat_id]) maskap += cat_mask_ap pix3d_metrics["mask_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_mask_ap cat_mesh_ap = VOCap.compute_ap(torch.cat(mesh_apscores[cat_id]), torch.cat(mesh_aplabels[cat_id]), npos[cat_id]) meshap += cat_mesh_ap pix3d_metrics["mesh_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_mesh_ap pix3d_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid pix3d_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid pix3d_metrics["mesh_ap@%.1f" % iou_thresh] = meshap / valid # print test ground truth vis_utils.print_instances_class_histogram( [npos[cat_id] for cat_id in cat_ids], # number of instances [dataset.loadCats([cat_id])[0]["name"] for cat_id in cat_ids], # class names pix3d_metrics, ) return pix3d_metrics
def dump_cfg(cfg, path): if comm.is_main_process(): with PathManager.open(path, "w") as f: f.write(cfg.dump()) logger.info("Full config saved to {}".format(path))
def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") # list[dict] len=num_val_images coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) # "bbox" for detection tasks = self._tasks or self._tasks_from_predictions(coco_results) ### 2021.3.1 # unmap the category ids for COCO # 有 在__init__ 里面有设置 if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) num_classes = len(all_contiguous_ids) assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 # 反转id映射,把连续的类别id映射为原始(可能不连续的)的类别id reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} for result in coco_results: category_id = result["category_id"] assert category_id < num_classes, ( f"A prediction has class={category_id}, " f"but the dataset only has {num_classes} classes and " f"predicted class id should be in [0, {num_classes - 1}]." ) result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) # 此时的coco_results: list[dict:{"image_id":,"category_id":, "bbox":, "score":}] with PathManager.open(file_path, "w") as f: f.write(json.dumps(coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info( "Evaluating predictions with {} COCO API...".format( "unofficial" if self._use_fast_impl else "official" ) ) # 如果有多个任务,# bbox, segmentation, keypoints for task in sorted(tasks): coco_eval = ( # 在coco_api 上评估 predictions # return: COCOeval 对象 _evaluate_predictions_on_coco( self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas, use_fast_impl=self._use_fast_impl, img_ids=img_ids, # img_ids is None ) if len(coco_results) > 0 else None # cocoapi does not handle empty results very well ) """ 关于COCOeval 对象有: self.eval = { 'params': p,# 保存参数配置 'counts': [T, R, K, A, M], # number of threshold, recThres, class, areaRng, maxDets 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'precision': precision,[T,R,K,A,M] 'recall': recall, [T,K,A,M] 'scores': scores, [T,R,K,A,M] } self.stats:list[12] # 保存了一些总结信息 AP(default):iou=[0.5,1), recall=[0,1), num_class=80, area='all', maxdet=100 0:AP(all) 1:AP(iou>=0.5) 2:AP(iou>=0.75) 3:AP(area='small') 4:AP(area='medium') 5:AP(area='large') Recall(default):iou=[0.5,1),num_class=80, area='all', maxdet=100 6:AR(maxdet=1) 7:AR(maxdet=10) 8:AR(maxdet=100) 9:AR(area='small') 10:AR(area='medium') 11:AR(area='large') """ res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes") ) self._results[task] = res
def load_mesh_auxiliary_data(fpath: str) -> Optional[torch.Tensor]: fpath_local = PathManager.get_local_path(fpath) with PathManager.open(fpath_local, "rb") as hFile: return torch.as_tensor(pickle.load(hFile), dtype=torch.float)
if args.type == "instance": dicts = load_cityscapes_instances(args.image_dir, args.gt_dir, from_json=True, to_polygons=True) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [ k.name for k in labels if k.hasInstances and not k.ignoreInEval ] meta = Metadata().set(thing_classes=thing_classes) else: dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) logger.info("Done loading {} samples.".format(len(dicts))) stuff_names = [k.name for k in labels if k.trainId != 255] stuff_colors = [k.color for k in labels if k.trainId != 255] meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1]) # cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)
def load_mesh_geodists(geodists_fpath: str) -> Optional[torch.Tensor]: geodists_fpath_local = PathManager.get_local_path(geodists_fpath, timeout_sec=600) with PathManager.open(geodists_fpath_local, "rb") as hFile: return torch.as_tensor(pickle.load(hFile), dtype=torch.float)
def _cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a instance segmentation dataset dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in Detectron2 Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[:-len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError( "Unknown geometric structure {}".format(poly_wo_overlaps)) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] polygons = [ c.reshape(-1).tolist() for c in contours if len(c) >= 3 ] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret
def _open_cfg(cls, filename): return PathManager.open(filename, "r")
def draw_dataset_dict(self, dic): """ Draw annotations/segmentaions in Detectron2 Dataset format. Args: dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None boxes = [ BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) if len(x["bbox"]) == 4 else x["bbox"] for x in annos ] colors = None category_ids = [x["category_id"] for x in annos] if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): colors = [ self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids ] names = self.metadata.get("thing_classes", None) labels = _create_text_labels( category_ids, scores=None, class_names=names, is_crowd=[x.get("iscrowd", 0) for x in annos], ) self.overlay_instances( labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors ) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: with PathManager.open(dic["sem_seg_file_name"], "rb") as f: sem_seg = Image.open(f) sem_seg = np.asarray(sem_seg, dtype="uint8") if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) pan_seg = dic.get("pan_seg", None) if pan_seg is None and "pan_seg_file_name" in dic: with PathManager.open(dic["pan_seg_file_name"], "rb") as f: pan_seg = Image.open(f) pan_seg = np.asarray(pan_seg) from panopticapi.utils import rgb2id pan_seg = rgb2id(pan_seg) if pan_seg is not None: segments_info = dic["segments_info"] pan_seg = torch.tensor(pan_seg) self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5) return self.output
def default_export_predictor( cfg, pytorch_model, predictor_type, output_dir, data_loader ): # The default implementation acts based on the PredictorExportConfig returned by # calling "prepare_for_export". It'll export all sub models in standard way # according to the "predictor_type". assert hasattr(pytorch_model, "prepare_for_export"), pytorch_model inputs = next(iter(data_loader)) export_config = pytorch_model.prepare_for_export(cfg, inputs, predictor_type) model_inputs = ( export_config.data_generator(inputs) if export_config.data_generator is not None else (inputs,) ) predictor_path = os.path.join(output_dir, predictor_type) PathManager.mkdirs(predictor_path) predictor_init_kwargs = { "preprocess_info": export_config.preprocess_info, "postprocess_info": export_config.postprocess_info, "run_func_info": export_config.run_func_info, } if isinstance(export_config.model, dict): models_info = {} for name, model in export_config.model.items(): save_path = os.path.join(predictor_path, name) model_info = _export_single_model( predictor_path=predictor_path, model=model, input_args=model_inputs[name] if model_inputs is not None else None, save_path=save_path, model_export_method=( predictor_type if export_config.model_export_method is None else export_config.model_export_method[name] ), model_export_kwargs=( {} if export_config.model_export_kwargs is None else export_config.model_export_kwargs[name] ), ) models_info[name] = model_info predictor_init_kwargs["models"] = models_info else: save_path = predictor_path # for single model exported files are put under `predictor_path` together with predictor_info.json model_info = _export_single_model( predictor_path=predictor_path, model=export_config.model, input_args=model_inputs, save_path=save_path, model_export_method=export_config.model_export_method or predictor_type, model_export_kwargs=export_config.model_export_kwargs or {}, ) predictor_init_kwargs["model"] = model_info # assemble predictor predictor_info = PredictorInfo(**predictor_init_kwargs) with PathManager.open( os.path.join(predictor_path, "predictor_info.json"), "w" ) as f: json.dump(predictor_info.to_dict(), f, indent=4) return predictor_path
def _cached_log_stream(filename): io = PathManager.open(filename, "a") atexit.register(io.close) return io
def read_keyframes( video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0 ) -> FrameList: # pyre-ignore[11] """ Reads keyframe data from a video file. Args: video_fpath (str): Video file path keyframes (List[int]): List of keyframe timestamps (as counts in timebase units to be used in container seek operations) video_stream_idx (int): Video stream index (default: 0) Returns: List[Frame]: list of frames that correspond to the specified timestamps """ try: with PathManager.open(video_fpath, "rb") as io: container = av.open(io) stream = container.streams.video[video_stream_idx] frames = [] for pts in keyframes: try: container.seek(pts, any_frame=False, stream=stream) frame = next(container.decode(video=0)) frames.append(frame) except av.AVError as e: logger = logging.getLogger(__name__) logger.warning( f"Read keyframes: Error seeking video file {video_fpath}, " f"video stream {video_stream_idx}, pts {pts}, AV error: {e}" ) container.close() return frames except OSError as e: logger = logging.getLogger(__name__) logger.warning( f"Read keyframes: Error seeking video file {video_fpath}, " f"video stream {video_stream_idx}, pts {pts}, OS error: {e}" ) container.close() return frames except StopIteration: logger = logging.getLogger(__name__) logger.warning( f"Read keyframes: Error decoding frame from {video_fpath}, " f"video stream {video_stream_idx}, pts {pts}" ) container.close() return frames container.close() return frames except OSError as e: logger = logging.getLogger(__name__) logger.warning( f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}" ) except RuntimeError as e: logger = logging.getLogger(__name__) logger.warning( f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}" ) return []
def main(args): # utils.init_distributed_mode(args) if args.frozen_weights is not None: assert args.masks, "Frozen training is meant for segmentation only" print(args) device = torch.device(args.device) # fix the seed for reproducibility seed = args.seed + utils.get_rank() torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) model, criterion, postprocessors = build_model(args) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print("number of params:", n_parameters) param_dicts = [ { "params": [ p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad ] }, { "params": [ p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad ], "lr": args.lr_backbone, }, ] optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) dataset_train = build_dataset(image_set="train", args=args) dataset_val = build_dataset(image_set="val", args=args) if args.distributed: sampler_train = DistributedSampler(dataset_train) sampler_val = DistributedSampler(dataset_val, shuffle=False) else: sampler_train = torch.utils.data.RandomSampler(dataset_train) sampler_val = torch.utils.data.SequentialSampler(dataset_val) batch_sampler_train = torch.utils.data.BatchSampler(sampler_train, args.batch_size, drop_last=True) data_loader_train = DataLoader( dataset_train, batch_sampler=batch_sampler_train, collate_fn=utils.collate_fn, num_workers=args.num_workers, ) data_loader_val = DataLoader( dataset_val, args.batch_size, sampler=sampler_val, drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, ) if args.dataset_file == "coco_panoptic": # We also evaluate AP during panoptic training, on original coco DS coco_val = datasets.coco.build("val", args) base_ds = get_coco_api_from_dataset(coco_val) else: base_ds = get_coco_api_from_dataset(dataset_val) if args.frozen_weights is not None: checkpoint = torch.load(args.frozen_weights, map_location="cpu") model_without_ddp.detr.load_state_dict(checkpoint["model"]) if args.resume: if args.resume.startswith("https"): checkpoint = torch.hub.load_state_dict_from_url(args.resume, map_location="cpu", check_hash=True) else: checkpoint = torch.load(args.resume, map_location="cpu") model_without_ddp.load_state_dict(checkpoint["model"]) if (not args.eval and "optimizer" in checkpoint and "lr_scheduler" in checkpoint and "epoch" in checkpoint): optimizer.load_state_dict(checkpoint["optimizer"]) lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) args.start_epoch = checkpoint["epoch"] + 1 if args.eval: test_stats, coco_evaluator = evaluate( model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir, ) if args.output_dir: with PathManager.open(os.path.join(args.output_dir, "eval.pth"), "wb") as f: utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: sampler_train.set_epoch(epoch) train_stats = train_one_epoch( model, criterion, data_loader_train, optimizer, device, epoch, args.clip_max_norm, ) lr_scheduler.step() if args.output_dir: checkpoint_paths = [ ] # os.path.join(args.output_dir, 'checkpoint.pth')] # extra checkpoint before LR drop and every 10 epochs if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0: checkpoint_paths.append( os.path.join(args.output_dir, f"checkpoint{epoch:04}.pth")) for checkpoint_path in checkpoint_paths: with PathManager.open(checkpoint_path, "wb") as f: if args.gpu == 0 and args.machine_rank == 0: utils.save_on_master( { "model": model_without_ddp.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args, }, f, ) test_stats, coco_evaluator = evaluate( model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir, ) log_stats = { **{f"train_{k}": v for k, v in train_stats.items()}, **{f"test_{k}": v for k, v in test_stats.items()}, "epoch": epoch, "n_parameters": n_parameters, } if args.output_dir and utils.is_main_process(): with PathManager.open(os.path.join(args.output_dir, "log.txt"), "w") as f: f.write(json.dumps(log_stats) + "\n") # for evaluation logs if coco_evaluator is not None: PathManager.mkdirs(os.path.join(args.output_dir, "eval")) if "bbox" in coco_evaluator.coco_eval: filenames = ["latest.pth"] if epoch % 50 == 0: filenames.append(f"{epoch:03}.pth") for name in filenames: with PathManager.open( os.path.join(args.output_dir, "eval", name), "wb") as f: torch.save(coco_evaluator.coco_eval["bbox"].eval, f) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("Training time {}".format(total_time_str))
def dump_torchscript_IR(model, dir): """ Dump IR of a TracedModule/ScriptModule at various levels. Useful for debugging. Args: model (TracedModule or ScriptModule): traced or scripted module dir (str): output directory to dump files. """ PathManager.mkdirs(dir) def _get_script_mod(mod): if isinstance(mod, torch.jit.TracedModule): return mod._actual_script_module return mod # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: def get_code(mod): # Try a few ways to get code using private attributes. try: # This contains more information than just `mod.code` return _get_script_mod(mod)._c.code except AttributeError: pass try: return mod.code except AttributeError: return None def dump_code(prefix, mod): code = get_code(mod) name = prefix or "root model" if code is None: f.write(f"Could not found code for {name} (type={mod.original_name})\n") f.write("\n") else: f.write(f"\nCode for {name}, type={mod.original_name}:\n") f.write(code) f.write("\n") f.write("-" * 80) for name, m in mod.named_children(): dump_code(prefix + "." + name, m) dump_code("", model) # Recursively dump IR of all modules with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: try: f.write(_get_script_mod(model)._c.dump_to_str(True, False, False)) except AttributeError: pass # Dump IR of the entire graph (all submodules inlined) with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: f.write(str(model.inlined_graph)) # Dump the model structure in pytorch style with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: f.write(str(model))
def evaluate(self): comm.synchronize() self._predictions = comm.gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return # PanopticApi requires local files gt_json = PathManager.get_local_path(self._metadata.panoptic_json) gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: logger.info("Writing all panoptic predictions to {} ...".format(pred_dir)) for p in self._predictions: with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: f.write(p.pop("png_string")) with open(gt_json, "r") as f: json_data = json.load(f) json_data["annotations"] = self._predictions with PathManager.open(self._predictions_json, "w") as f: f.write(json.dumps(json_data)) from panopticapi.evaluation import pq_compute import string import random import shutil rand_str = lambda n: ''.join([random.choice(string.ascii_lowercase) for i in range(n)]) results_dir = None # while results_dir is None or os.path.exists(results_dir): # results_dir = '/BS/ahmed_projects/work/data/panoptic_eval/' + rand_str(10) # logger.info("Writing all panoptic predictions for future use to {} ...\n These files can be used to get results from evaluation server. Tested on COCO and Cityscapes. ".format(results_dir)) # shutil.copytree(os.path.dirname(self._predictions_json), results_dir) # png_path = os.path.join(results_dir, os.path.splitext(os.path.basename(self._predictions_json))[0]) # shutil.copytree(pred_dir, png_path) with contextlib.redirect_stdout(io.StringIO()): pq_res, pq_per_image_res = pq_compute( gt_json, PathManager.get_local_path(self._predictions_json), gt_folder=gt_folder, pred_folder=pred_dir, ) res = {} res["PQ"] = 100 * pq_res["All"]["pq"] res["SQ"] = 100 * pq_res["All"]["sq"] res["RQ"] = 100 * pq_res["All"]["rq"] res["PQ_th"] = 100 * pq_res["Things"]["pq"] res["SQ_th"] = 100 * pq_res["Things"]["sq"] res["RQ_th"] = 100 * pq_res["Things"]["rq"] res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] results = OrderedDict({"panoptic_seg": res}) # Convert class ids to names: per_class = pq_res['per_class'] new_per_class = {} for label in per_class.keys(): isthing = label in self._metadata.thing_dataset_id_to_contiguous_id.keys() if isthing: class_name = self._metadata.thing_classes[self._metadata.thing_dataset_id_to_contiguous_id[label]] else: class_name = self._metadata.stuff_classes[self._metadata.stuff_dataset_id_to_contiguous_id[label]] new_per_class[class_name] = per_class[label] pq_res['per_class'] = new_per_class _print_panoptic_results(pq_res) _print_panoptic_results_per_image(pq_per_image_res) return results