def test_net_on_dataset( weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0 ): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir ) else: all_boxes, all_segms, all_keyps = test_net( weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) results = task_evaluation.evaluate_all( dataset, all_boxes, all_segms, all_keyps, output_dir ) return results
def generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, output_dir, multi_gpu=False, gpu_id=0 ): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir ) else: # Processes entire dataset range by default _boxes, _scores, _ids, rpn_file = generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) return evaluate_proposal_file(dataset, rpn_file, output_dir)
def test_net_on_dataset( weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0 ): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps, all_bodys = \ multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir ) else: all_boxes, all_segms, all_keyps, all_bodys = test_net( weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) results = task_evaluation.evaluate_all( dataset, all_boxes, all_segms, all_keyps, all_bodys, output_dir ) return results
def generate_rpn_on_dataset(weights_file, dataset_name, _proposal_file_ignored, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir) else: # Processes entire dataset range by default _boxes, _scores, _ids, rpn_file = generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) return evaluate_proposal_file(dataset, rpn_file, output_dir)
def test_cls_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) acc = multi_gpu_test_cls_net_on_dataset(num_images, output_dir) else: acc = test_cls_net(weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) logger.info('Classification Accuracy on TEST data is: {:.2f}%'.format(acc * 100)) return {"Accuracy": acc}
def val_multiscale(opt, model, val_loader, criterion, epoch, stats, bestModelPerf, optimizer): global SIGNAL_RECEIVED from detectron.utils.timer import Timer t = Timer() model.eval() totalValLoss, ctValIt = resetValProgressMultiscale(opt, val_loader, stats) rtl_period = max(5, int(len(val_loader) / 1)) t.tic() coco_cityscapes_dataset = val_loader.data_source.dataset.dataset.dataset json_classes = coco_cityscapes_dataset.classes with torch.no_grad(): for i, data in enumerate(val_loader): # Get and prepare data inputs, targets, seqIDs = data inputs, targets = prepareMultiscaleForForwardOnGpu( inputs, targets, **{ 'gpu_id': opt['gpu_id'], 'nb_scales': opt['nb_scales'] }) targets = reshapeMultiscaleTargetsForCriterion( targets, opt['n_target_frames'], opt['nb_features'], opt['nb_scales']) # Evaluation ffpnlevels = 1 if opt['train_single_level'] else opt['FfpnLevels'] outputs = format_variable_length_multiscale_sequence( model(inputs), ffpnlevels, opt['n_target_frames'], opt['nb_scales']) loss, loss_terms = criterion(outputs, targets) # Update progress totalValLoss, ctValIt = updateValProgress(totalValLoss, ctValIt, loss.item(), loss_terms, stats, epoch, i, rtl_period) t.toc() t.tic() if SIGNAL_RECEIVED: save_checkpoint( { 'epoch': epoch + 1, 'iter': 0, 'opt_path': os.path.join(opt['logs'], 'params.pkl'), 'state_dict': model.state_dict(), 'best_prec1': bestModelPerf, 'optimizer': optimizer.state_dict(), }, False, savedir=opt['save']) logger.info( 'Saved checkpoint before exiting peacefully for job requeuing' ) exit(0) del loss, inputs, outputs, targets, loss_terms if i >= (opt['it'] - 1): break logger.info('Validation iteration average duration : %f' % t.average_time) return checkIsBest(totalValLoss, ctValIt, bestModelPerf=bestModelPerf)
def loader_loop(roi_data_loader): load_timer = Timer() iters = 100 for i in range(iters): load_timer.tic() roi_data_loader.get_next_minibatch() load_timer.toc() print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format( i + 1, iters, load_timer.average_time))
def loader_loop(roi_data_loader): load_timer = Timer() iters = 100 for i in range(iters): load_timer.tic() roi_data_loader.get_next_minibatch() load_timer.toc() print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format( i + 1, iters, load_timer.average_time))
def test_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() model = '' if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir) else: all_boxes, all_segms, all_keyps, model = test_net(weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir) roc_data = metrics.calculate_roc(all_boxes, dataset, cfg.TEST.IOU) froc_data = metrics.calculate_froc(all_boxes, dataset, cfg.TEST.IOU) auc_score = { dataset.name: { u'box': { u'AUC': auc(roc_data[0], roc_data[1]) } } } afroc_score = np.trapz(froc_data[0], froc_data[2]) afroc = {dataset.name: {u'box': {u'AFROC': afroc_score}}} print('Afroc score: {:.4f}'.format(afroc_score)) plot.plot_roc(roc_data, auc_score[dataset.name][u'box'][u'AUC'], dataset, model, output_dir) plot.plot_froc(froc_data, dataset, model, output_dir) plot.plot_afroc(froc_data, dataset, model, output_dir) save.np_save(np.stack(roc_data), 'roc', dataset, model, output_dir) save.np_save(np.stack(froc_data), 'froc', dataset, model, output_dir) results[dataset_name][u'box'].update(auc_score[dataset.name][u'box']) results[dataset_name][u'box'].update(afroc[dataset.name][u'box']) return results, auc_score, afroc_score
def test_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() ################################################################ import pickle res_file = os.path.join(output_dir, 'bbox_' + dataset_name + '_results.json') print("res_file = {}==========================".format(res_file)) if os.path.exists(res_file): import detectron.datasets.json_dataset_evaluator as json_dataset_evaluator print("res_file = {} exists! Loading res_file".format(res_file)) coco_eval = json_dataset_evaluator._do_detection_eval( dataset, res_file, output_dir) box_results = task_evaluation._coco_eval_to_box_results(coco_eval) results = OrderedDict([(dataset.name, box_results)]) return results ################################################################ det_name = "detections.pkl" det_file = os.path.join(output_dir, det_name) print("det_file = {}==========================".format(det_file)) if os.path.exists(det_file): print("{} exists! Loading detection results".format(det_file)) res = pickle.load(open(det_file)) all_boxes = res['all_boxes'] all_segms = res['all_segms'] all_keyps = res['all_keyps'] ################################################################ elif multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir) else: all_boxes, all_segms, all_keyps = test_net(weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir) return results
def test_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0, subset_pointer=None): """Run inference on a dataset.""" if dataset_name[:5] != 'live_': dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir) else: all_boxes, all_segms, all_keyps = test_net( weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id, subset_pointer=subset_pointer) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) if cfg.TEST.COCO_TO_VOC: all_boxes = coco_detects_to_voc(all_boxes) if dataset_name[:5] == 'live_': return None results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir, subset_pointer=subset_pointer) if subset_pointer is not None: # prune the subset for the following datasets: subset_pointer.subset = subset_pointer.subset[len(dataset.get_roidb() ):] print('remains', len(subset_pointer.subset) ) # should have 0 remains for the last set, voc_2012_train. return results
def generate_proposals_on_roidb( model, roidb, start_ind=None, end_ind=None, total_num_images=None, gpu_id=0, ): """Generate RPN proposals on all images in an imdb.""" _t = Timer() num_images = len(roidb) roidb_boxes = [[] for _ in range(num_images)] roidb_scores = [[] for _ in range(num_images)] roidb_ids = [[] for _ in range(num_images)] if start_ind is None: start_ind = 0 end_ind = num_images total_num_images = num_images for i in range(num_images): roidb_ids[i] = roidb[i]['id'] im = cv2.imread(roidb[i]['image']) with c2_utils.NamedCudaScope(gpu_id): _t.tic() roidb_boxes[i], roidb_scores[i] = im_proposals( model, im, roidb[i]['image']) _t.toc() if i % 10 == 0: ave_time = _t.average_time eta_seconds = ave_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) logger.info( ('rpn_generate: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s (eta: {})').format(start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, ave_time, eta)) return roidb_boxes, roidb_scores, roidb_ids
def generate_proposals_on_roidb( model, roidb, start_ind=None, end_ind=None, total_num_images=None, gpu_id=0, ): """Generate RPN proposals on all images in an imdb.""" _t = Timer() num_images = len(roidb) roidb_boxes = [[] for _ in range(num_images)] roidb_scores = [[] for _ in range(num_images)] roidb_ids = [[] for _ in range(num_images)] if start_ind is None: start_ind = 0 end_ind = num_images total_num_images = num_images for i in range(num_images): roidb_ids[i] = roidb[i]['id'] im = cv2.imread(roidb[i]['image']) with c2_utils.NamedCudaScope(gpu_id): _t.tic() roidb_boxes[i], roidb_scores[i] = im_proposals(model, im) _t.toc() if i % 10 == 0: ave_time = _t.average_time eta_seconds = ave_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) logger.info( ( 'rpn_generate: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s (eta: {})' ).format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, ave_time, eta ) ) return roidb_boxes, roidb_scores, roidb_ids
def convert(json_file, output_dir): print('Reading: {}'.format(json_file)) with open(json_file, 'r') as fid: dt = json.load(fid) print('done!') test_image_info = get_ann_fn('coco_2017_test') with open(test_image_info, 'r') as fid: info_test = json.load(fid) image_test = info_test['images'] image_test_id = [i['id'] for i in image_test] print('{} has {} images'.format(test_image_info, len(image_test_id))) test_dev_image_info = get_ann_fn('coco_2017_test-dev') with open(test_dev_image_info, 'r') as fid: info_testdev = json.load(fid) image_testdev = info_testdev['images'] image_testdev_id = [i['id'] for i in image_testdev] print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id))) dt_testdev = [] print('Filtering test-dev from test...') t = Timer() t.tic() for i in range(len(dt)): if i % 1000 == 0: print('{}/{}'.format(i, len(dt))) if dt[i]['image_id'] in image_testdev_id: dt_testdev.append(dt[i]) print('Done filtering ({:2}s)!'.format(t.toc())) filename, file_extension = os.path.splitext(os.path.basename(json_file)) filename = filename + '_test-dev' filename = os.path.join(output_dir, filename + file_extension) with open(filename, 'w') as fid: info_test = json.dump(dt_testdev, fid) print('Done writing: {}!'.format(filename))
def convert(json_file, output_dir): print('Reading: {}'.format(json_file)) with open(json_file, 'r') as fid: dt = json.load(fid) print('done!') test_image_info = DATASETS['coco_2017_test'][ANN_FN] with open(test_image_info, 'r') as fid: info_test = json.load(fid) image_test = info_test['images'] image_test_id = [i['id'] for i in image_test] print('{} has {} images'.format(test_image_info, len(image_test_id))) test_dev_image_info = DATASETS['coco_2017_test-dev'][ANN_FN] with open(test_dev_image_info, 'r') as fid: info_testdev = json.load(fid) image_testdev = info_testdev['images'] image_testdev_id = [i['id'] for i in image_testdev] print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id))) dt_testdev = [] print('Filtering test-dev from test...') t = Timer() t.tic() for i in range(len(dt)): if i % 1000 == 0: print('{}/{}'.format(i, len(dt))) if dt[i]['image_id'] in image_testdev_id: dt_testdev.append(dt[i]) print('Done filtering ({:2}s)!'.format(t.toc())) filename, file_extension = os.path.splitext(os.path.basename(json_file)) filename = filename + '_test-dev' filename = os.path.join(output_dir, filename + file_extension) with open(filename, 'w') as fid: info_test = json.dump(dt_testdev, fid) print('Done writing: {}!'.format(filename))
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize() ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetMedianValue()) ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats
class XMLDataset(object): """A class representing a xml dataset.""" def __init__(self, name): assert name in _DATASETS.keys(), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(_DATASETS[name][_IM_DIR]), \ 'Image directory \'{}\' not found'.format(_DATASETS[name][_IM_DIR]) assert os.path.exists(_DATASETS[name][_ANN_FN]), \ 'Annotation file \'{}\' not found'.format(_DATASETS[name][_ANN_FN]) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = _DATASETS[name][_IM_DIR] self.image_prefix = ('' if _IM_PREFIX not in _DATASETS[name] else _DATASETS[name][_IM_PREFIX]) self.debug_timer = Timer() # Set up dataset classes if 'traffic' in self.name: dummy_dataset = dummy_datasets.get_traffic_dataset() elif 'bupi' in self.name: dummy_dataset = dummy_datasets.get_cloth_dataset() elif 'steel' in self.name: dummy_dataset = dummy_datasets.get_steel_dataset() elif 'hanzi' in self.name: dummy_dataset = dummy_datasets.get_hanzi_dataset() categories = dummy_dataset.classes.values() category_ids = range(len(categories)) logger.info('categories\t{}'.format(categories)) self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = categories self.num_classes = len(self.classes) self.keypoints = None # self._init_keypoints() self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) # load_image_info(self) def load_image_set_index(self): image_set_index_file = os.path.join(_DATASETS[self.name][_ANN_FN]) assert os.path.exists( image_set_index_file), 'Path does not exist: {}'.format( image_set_index_file) logger.info('image_set_index_file \t{}'.format(image_set_index_file)) with open(image_set_index_file) as f: image_set_index = [x.strip() for x in f.readlines()] logger.info('number of images\t{}'.format(len(image_set_index))) return image_set_index def image_path_from_index(self, index): image_path = os.path.join(_DATASETS[self.name][_IM_DIR], index + '.jpg') assert os.path.exists(image_path), 'Path does not exist: {}'.format( image_path) return image_path def get_roidb(self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' cache_path = cfg.TRAIN.DATASET_CACHE_PATH if not os.path.exists(cache_path): os.makedirs(cache_path) cache_file = os.path.join(cache_path, self.name + '.pkl') if os.path.isfile(cache_file): roidb = pickle.load(open(cache_file, 'rb')) logger.info('load roidb from %s' % cache_file) return roidb image_ids = self.load_image_set_index() roidb = [{'index': index} for index in image_ids] for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug('_add_gt_annotations took {:.3f}s'.format( self.debug_timer.toc(average=False))) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file(roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh) logger.debug('_add_proposals_from_file took {:.3f}s'.format( self.debug_timer.toc(average=False))) _add_class_assignments(roidb) with open(cache_file, 'wb') as fw: pickle.dump(roidb, fw) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path entry['image'] = self.image_path_from_index(entry['index']) entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32)) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints), dtype=np.int32) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def load_objs_from_index(self, entry): import xml.etree.ElementTree as ET index = entry['index'] tree = ET.parse( os.path.join(_DATASETS[self.name][_ANN_DIR], index + '.xml')) size = tree.find('size') entry['width'] = int(size.find('width').text) entry['height'] = int(size.find('height').text) objs = tree.findall('object') valid_objs = [] for obj in objs: cls_name = obj.find('name').text.lower() if cls_name not in self.classes: logger.info('obj class {} not in classname list {}'.format( cls_name, self.classes)) continue tmp_obj = dict() tmp_obj['cls_name'] = cls_name bndbox = obj.find('bndbox') x1, y1, x2, y2 = map(int, [xx.text for xx in bndbox]) tmp_obj['bbox'] = [x1, y1, x2, y2] segms = obj.find('segmentation') if segms == None or len(segms) == 0: polygons = None area = 0 else: polygons = get_polygon_from_obj(segms) area = get_segmentation_area(segms) tmp_obj['area'] = area tmp_obj['segmentation'] = polygons tmp_obj['iscrowd'] = False valid_objs.append(tmp_obj) return valid_objs def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" objs = self.load_objs_from_index(entry) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue x1, y1, x2, y2 = obj['bbox'] x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) if x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): if obj['cls_name'] == 'ignore': cls = -1 else: cls = self._class_to_ind[obj['cls_name']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = 0 is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): raise NotImplementedError def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model, tensorflow_board=None): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.tblogger = tensorflow_board self.tb_ignored_keys = ['iter', 'eta', 'mb_qsize', 'mem', 'time'] def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) if self.tblogger: self.tb_log_stats(stats, cur_iter) def tb_log_stats(self, stats, cur_iter): """Log the tracked statistics to tensorboard""" for k in stats: if k not in self.tb_ignored_keys: v = stats[k] if isinstance(v, dict): self.tb_log_stats(v, cur_iter) else: self.tblogger.write_scalars({k: v}, cur_iter) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict(iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetMedianValue())), mem=int(np.ceil(mem_usage / 1024 / 1024))) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = int(20 / cfg.NUM_GPUS) # Output logging period in SGD iterations self.LOG_PERIOD = int(20 /cfg.NUM_GPUS) self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize() ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch() if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER * num_iter_per_epoch - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch() eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER * num_iter_per_epoch - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetAverageValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetAverageValue()) ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetAverageValue() return stats
class JsonDataset(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert dataset_catalog.contains(name), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(dataset_catalog.get_im_dir(name)), \ 'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name)) assert os.path.exists(dataset_catalog.get_ann_fn(name)), \ 'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name)) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = dataset_catalog.get_im_dir(name) self.image_prefix = dataset_catalog.get_im_prefix(name) self.COCO = COCO(dataset_catalog.get_ann_fn(name)) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() logger.info(self.classes) logger.info(self.json_category_id_to_contiguous_id) logger.info(self.contiguous_category_id_to_json_id) def get_roidb(self, gt=False, proposal_file=None, min_proposal_size=20, proposal_limit=-1, crowd_filter_thresh=0): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug('_add_gt_annotations took {:.3f}s'.format( self.debug_timer.toc(average=False))) if cfg.USE_PSEUDO and 'test' not in self.name: pgt_roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in pgt_roidb: self._prep_roidb_entry(entry) self._add_pseudo_gt_annotations(pgt_roidb, roidb) roidb = pgt_roidb if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file(roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh) logger.debug('_add_proposals_from_file took {:.3f}s'.format( self.debug_timer.toc(average=False))) _add_class_assignments(roidb) # roidb = _filter_no_class(self.name, roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join(self.image_directory, self.image_prefix + entry['file_name']) assert os.path.exists(im_path), 'Image \'{}\' not found'.format( im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32)) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints), dtype=np.int32) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded if segm_utils.is_poly(obj['segmentation']): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints def _add_pseudo_gt_annotations(self, roidb, gt_roidb): """ Return the database of pseudo ground-truth regions of interest from detect result. This function loads/saves from/to a cache file to speed up future calls. """ # gt_roidb = copy.deepcopy(roidb) # for entry in roidb: # self._add_gt_annotations(entry) assert 'train' in self.name or 'val' in self.name, 'Only trainval dataset has pseudo gt.' # detection.pkl is 0-based indices # the VOC result file is 1-based indices cache_files = cfg.PSEUDO_PATH if isinstance(cache_files, basestring): cache_files = (cache_files, ) all_dets = None for cache_file in cache_files: if self.name not in cache_file: continue assert os.path.exists(cache_file), cache_file # with open(cache_file, 'rb') as fid: # res = cPickle.load(fid) res = load_object(cache_file) print('{} pseudo gt roidb loaded from {}'.format( self.name, cache_file)) if all_dets is None: all_dets = res['all_boxes'] else: for i in range(len(all_dets)): all_dets[i].extend(res['all_boxes'][i]) assert len(all_dets[1]) == len(roidb), len(all_dets[1]) if len(all_dets) == self.num_classes: cls_offset = 0 elif len(all_dets) + 1 == self.num_classes: cls_offset = -1 else: raise Exception('Unknown mode.') threshold = 1.0 for im_i, entry in enumerate(roidb): if im_i % 1000 == 0: print('{:d} / {:d}'.format(im_i + 1, len(roidb))) num_valid_objs = 0 if len(gt_roidb[im_i]['gt_classes']) == 0: print(gt_roidb[im_i]) if len(gt_roidb[im_i]['is_crowd']) == sum( gt_roidb[im_i]['is_crowd']): print(gt_roidb[im_i]) # when cfg.WSL = False, background class is in. # detection.pkl only has 20 classes # fast_rcnn need 21 classes for cls in range(1, self.num_classes): # TODO(YH): we need threshold the pseudo label # filter the pseudo label # self._gt_class has 21 classes # if self._gt_classes[ix][cls] == 0: if cls not in gt_roidb[im_i]['gt_classes']: continue dets = all_dets[cls + cls_offset][im_i] if dets.shape[0] <= 0: continue # TODO(YH): keep only one box # if dets.shape[0] > 0: # num_valid_objs += 1 max_score = 0 num_valid_objs_cls = 0 for i in range(dets.shape[0]): det = dets[i] score = det[4] if score > max_score: max_score = score if score < threshold: continue num_valid_objs += 1 num_valid_objs_cls += 1 if num_valid_objs_cls == 0: if max_score > 0: num_valid_objs += 1 boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) # obn_scores = np.zeros((num_valid_objs, 1), dtype=entry['obn_scores'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) obj_i = 0 valid_segms = [] for cls in range(1, self.num_classes): # filter the pseudo label # self._gt_class has 21 classes # if self._gt_classes[ix][cls] == 0: if cls not in gt_roidb[im_i]['gt_classes']: continue dets = all_dets[cls + cls_offset][im_i] if dets.shape[0] <= 0: continue max_score = 0 max_score_bb = [] num_valid_objs_cls = 0 for i in range(dets.shape[0]): det = dets[i] x1 = det[0] y1 = det[1] x2 = det[2] y2 = det[3] score = det[4] if score > max_score: max_score = score max_score_bb = [x1, y1, x2, y2] if score < threshold: continue assert x1 >= 0 assert y1 >= 0 assert x2 >= x1 assert y2 >= y1 assert x2 < entry['width'] assert y2 < entry['height'] boxes[obj_i, :] = [x1, y1, x2, y2] gt_classes[obj_i] = cls seg_areas[obj_i] = (x2 - x1 + 1) * (y2 - y1 + 1) is_crowd[obj_i] = 0 box_to_gt_ind_map[obj_i] = obj_i gt_overlaps[obj_i, cls] = 1.0 valid_segms.append([]) obj_i += 1 num_valid_objs_cls += 1 if num_valid_objs_cls == 0: x1, y1, x2, y2 = max_score_bb[:] assert x1 >= 0 assert y1 >= 0 assert x2 >= x1 assert y2 >= y1 assert x2 < entry['width'] assert y2 < entry['height'] boxes[obj_i, :] = [x1, y1, x2, y2] gt_classes[obj_i] = cls seg_areas[obj_i] = (x2 - x1 + 1) * (y2 - y1 + 1) is_crowd[obj_i] = 0 box_to_gt_ind_map[obj_i] = obj_i gt_overlaps[obj_i, cls] = 1.0 valid_segms.append([]) obj_i += 1 assert obj_i == num_valid_objs # Show Pseudo GT boxes if True: # if False: import cv2 im = cv2.imread(entry['image']) for obj_i in range(num_valid_objs): cv2.rectangle(im, (boxes[obj_i][0], boxes[obj_i][1]), (boxes[obj_i][2], boxes[obj_i][3]), (255, 0, 0), 5) save_dir = os.path.join(cfg.OUTPUT_DIR, 'pgt') if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, str(im_i) + '.png') # print(save_path) cv2.imwrite(save_path, im) # cv2.imshow('im', im) # cv2.waitKey() entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) # entry['obn_scores'] = np.append(entry['obn_scores'], obn_scores, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix( entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) proposals = load_object(proposal_file) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _remove_proposals_not_in_roidb(proposals, roidb, id_field) _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle' } def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = int(1280 / cfg.NUM_GPUS) # Output logging period in SGD iterations self.LOG_PERIOD = int(1280 / cfg.NUM_GPUS) self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.mem = dict() self.mem = None def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) if self.mem is not None: self.GetMem() def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) if self.mem is not None: mem_sorted = sorted(self.mem.items(), key=lambda d: d[1]) print(mem_sorted) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict(iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetAverageValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetAverageValue())), mem=int(np.ceil(mem_usage / 1024 / 1024))) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetAverageValue() return stats def is_grad(self, b): name = str(b) return name.endswith("_grad") def is_shared(self, b): name = str(b) return name.endswith("_shared") def GetMem(self): for op_idx in range(len(self.model.net._net.op)): op = self.model.net._net.op[op_idx] for b in list(op.output): if self.is_grad(b): pass elif self.is_shared(b): pass else: continue blob = workspace.FetchBlob(str(b)) if b not in self.mem.keys(): self.mem[str(b)] = 0 self.mem[str(b)] = max(self.mem[str(b)], blob.size)
def test_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): load_from_tmp = False """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) if not load_from_tmp: test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps, all_personmasks, all_parss, all_bodys = \ multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir ) else: all_boxes, all_segms, all_keyps, all_personmasks, all_parss, all_bodys = test_net( weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) else: tmp_path = '/coco/results/detectron-output_mulres_intersup_mulsaclesup_lowfeat23_int05/test/dense_coco_2014_minival/generalized_rcnn/detections.pkl' #tmp_path = '/coco/results/detectron-output_mulres_intersup/test/dense_coco_2014_minival/generalized_rcnn/detections.pkl' #tmp_path = '/coco/results/detectron-output_mulres_intersup_onlysegpart_fliped/test/MHP_seg_val/generalized_rcnn/detections.pkl' #tmp_path = '/coco/results/detectron-output_mulres_intersup_onlysegpart_fliped/test/CIHP_seg_val/generalized_rcnn/detections.pkl' tmp_file = open(tmp_path, 'r') print('detections results from: ', tmp_path) tmp_pkl = pickle.load(tmp_file) all_boxes = tmp_pkl['all_boxes'] all_segms = tmp_pkl['all_segms'] all_keyps = tmp_pkl['all_keyps'] if 'all_personmasks' not in tmp_pkl.keys(): all_personmasks = None else: all_personmasks = tmp_pkl['all_personmasks'] all_parss = tmp_pkl['all_parss'] all_bodys = tmp_pkl['all_bodys'] ''' for i in range(len(all_bodys[1])): for j in range(len(all_bodys[1][i])): #print("all_bodys[1][i][j]: ",all_bodys[1][i][j].shape) all_bodys[1][i][j][0][np.where(all_bodys[1][i][j][0] == 3)] = 4 all_bodys[1][i][j][0][np.where(all_bodys[1][i][j][0] == 4)] = 3 all_bodys[1][i][j][0][np.where(all_bodys[1][i][j][0] == 6)] = 5 all_bodys[1][i][j][0][np.where(all_bodys[1][i][j][0] == 5)] = 6 ''' if cfg.VIS: vis_wholedataset( dataset_name, proposal_file, output_dir, all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, all_personmasks=all_personmasks, all_parss=all_parss, all_bodys=all_bodys, img_name=['COCO_val2014_000000464089.jpg'], show_box=False, ) results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, all_personmasks, all_parss, all_bodys, output_dir) return results
class JsonDataset(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert dataset_catalog.contains(name), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(dataset_catalog.get_im_dir(name)), \ 'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name)) assert os.path.exists(dataset_catalog.get_ann_fn(name)), \ 'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name)) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = dataset_catalog.get_im_dir(name) self.image_prefix = dataset_catalog.get_im_prefix(name) self.COCO = COCO(dataset_catalog.get_ann_fn(name)) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() def get_roidb( self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0 ): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug( '_add_gt_annotations took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file( roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh ) logger.debug( '_add_proposals_from_file took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) _add_class_assignments(roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join( self.image_directory, self.image_prefix + entry['file_name'] ) assert os.path.exists(im_path), 'Image \'{}\' not found'.format(im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False entry['has_body_uv'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] # densepose entries entry['dp_x'] = [] entry['dp_y'] = [] entry['dp_I'] = [] entry['dp_U'] = [] entry['dp_V'] = [] entry['dp_masks'] = [] # entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32) ) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty( (0, 3, self.num_keypoints), dtype=np.int32 ) if cfg.MODEL.BODY_UV_ON: entry['ignore_UV_body'] = np.empty((0), dtype=np.bool) # entry['Box_image_links_body'] = [] # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] #### valid_dp_x = [] valid_dp_y = [] valid_dp_I = [] valid_dp_U = [] valid_dp_V = [] valid_dp_masks = [] #### width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) ### if 'dp_x' in obj.keys(): valid_dp_x.append(obj['dp_x']) valid_dp_y.append(obj['dp_y']) valid_dp_I.append(obj['dp_I']) valid_dp_U.append(obj['dp_U']) valid_dp_V.append(obj['dp_V']) valid_dp_masks.append(obj['dp_masks']) else: valid_dp_x.append([]) valid_dp_y.append([]) valid_dp_I.append([]) valid_dp_U.append([]) valid_dp_V.append([]) valid_dp_masks.append([]) ### num_valid_objs = len(valid_objs) ## boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) if cfg.MODEL.BODY_UV_ON: ignore_UV_body = np.zeros((num_valid_objs)) #Box_image_body = [None]*num_valid_objs im_has_visible_keypoints = False im_has_any_body_uv = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if cfg.MODEL.BODY_UV_ON: if 'dp_x' in obj: ignore_UV_body[ix] = False im_has_any_body_uv = True else: ignore_UV_body[ix] = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) entry['dp_x'].extend(valid_dp_x) entry['dp_y'].extend(valid_dp_y) entry['dp_I'].extend(valid_dp_I) entry['dp_U'].extend(valid_dp_U) entry['dp_V'].extend(valid_dp_V) entry['dp_masks'].extend(valid_dp_masks) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints if cfg.MODEL.BODY_UV_ON: entry['ignore_UV_body'] = np.append(entry['ignore_UV_body'], ignore_UV_body) #entry['Box_image_links_body'].extend(Box_image_body) entry['has_body_uv'] = im_has_any_body_uv def _add_proposals_from_file( self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh ): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image( boxes, entry['height'], entry['width'] ) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle'} def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
class JsonDataset(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert name in DATASETS.keys(), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(DATASETS[name][IM_DIR]), \ 'Image directory \'{}\' not found'.format(DATASETS[name][IM_DIR]) if 'train' in name: assert os.path.exists(DATASETS[name][ANN_FN]), \ 'Annotation file \'{}\' not found'.format(DATASETS[name][ANN_FN]) assert os.path.exists(DATASETS[name][IM_IDS]), \ 'im_ids file \'{}\' not found'.format(DATASETS[name][IM_IDS]) logger.debug('Creating: {}'.format(name)) self.name = name self.dataset = name.split('_')[-1] # 'train' or 'val' self.image_directory = DATASETS[name][IM_DIR] self.image_prefix = ( '' if IM_PREFIX not in DATASETS[name] else DATASETS[name][IM_PREFIX] ) #self.COCO = COCO(DATASETS[name][ANN_FN]) self.debug_timer = Timer() # Set up dataset classes #category_ids = self.COCO.getCatIds() if 'ATR' in self.name: category_ids = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] categories = ['background', 'hat', 'hair', 'sunglasses', 'upperclothes', 'skirt', 'pants', 'dress', 'belt', 'leftShoes', 'right-shoe', 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag', 'scarf'] else: category_ids = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19] #categories = [c['name'] for c in self.COCO.loadCats(category_ids)] categories = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe','rightShoe'] if cfg.Ignore_left: # 14,15, 16,17, 18,19 if 'ATR' in self.name: categories = ['background', 'hat', 'hair', 'sunglasses', 'upperclothes', 'skirt', 'pants', 'dress', 'belt', 'shoe', 'face', 'leg', 'arm', 'bag', 'scarf'] category_ids = range(len(categories)) self.category_id_to_Ignore_left_id = { v: i for i, v in enumerate(range(18)) } self.category_id_to_Ignore_left_id[10] = 9 self.category_id_to_Ignore_left_id[11] = 10 self.category_id_to_Ignore_left_id[12] = 11 self.category_id_to_Ignore_left_id[13] = 11 self.category_id_to_Ignore_left_id[14] = 12 self.category_id_to_Ignore_left_id[15] = 12 self.category_id_to_Ignore_left_id[16] = 13 self.category_id_to_Ignore_left_id[17] = 14 else: categories = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'Arm', 'Leg', 'Shoe'] category_ids = range(len(categories)) self.category_id_to_Ignore_left_id = { v: i for i, v in enumerate(range(20)) } self.category_id_to_Ignore_left_id[15] = 14 self.category_id_to_Ignore_left_id[16] = 15 self.category_id_to_Ignore_left_id[17] = 15 self.category_id_to_Ignore_left_id[18] = 16 self.category_id_to_Ignore_left_id[19] = 16 self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = categories self.num_classes = len(self.classes) logger.info('classes: {}'.format(self.classes)) logger.info('num_classes: {}'.format(self.num_classes)) self.json_category_id_to_contiguous_id = { v: i for i, v in enumerate(category_ids) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() def get_roidb( self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0 ): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' roidb = self._load_lip() # load data when train or test if gt: # include gt object annotations self.debug_timer.tic() self.load_lip_annotations(roidb) logger.debug( 'load_lip_annotations took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) ############################################# if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file( roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh ) logger.debug( '_add_proposals_from_file took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) _add_class_assignments(roidb) return roidb def _load_lip(self): """ gao: load train or test dadaset of LIP""" imglist_file = DATASETS[self.name][IM_IDS] assert os.path.exists(imglist_file), 'path does not exist: {}'.format(imglist_file) imgids_list = [] with open(imglist_file) as f: for line in f.readlines(): if len(line)>1: imgids_list.append(line.strip()) # mistake label id if 'LIP_train' in self.name: mistakelist_file = os.path.join(os.path.dirname(imglist_file), 'train_mistake_id.txt') assert os.path.exists(mistakelist_file), 'path does not exist: {}'.format(mistakelist_file) im_mistake_ids = [] with open(mistakelist_file) as f: for line in f.readlines(): if len(line)>1: im_mistake_ids.append(line.strip()) roidb = [] for i in range(len(imgids_list)): if 'LIP_train' in self.name: if imgids_list[i] in im_mistake_ids: continue roi_entry = dict() roi_entry['dataset'] = self roi_entry['id'] = imgids_list[i] roi_entry['image'] = os.path.join(DATASETS[self.name][IM_DIR], imgids_list[i] + '.jpg') assert os.path.exists(roi_entry['image']), 'image path does not exist: {}'.format(roi_entry['images']) img = cv2.imread(roi_entry['image']) size = img.shape roi_entry['height'] = size[0] roi_entry['width'] = size[1] roi_entry['flipped'] = False roi_entry['has_visible_keypoints'] = False roi_entry['boxes'] = np.empty((0,4), dtype=np.float32) roi_entry['gt_classes'] = np.empty((0), dtype=np.int32) roi_entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) roi_entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0,self.num_classes), dtype=np.float32) ) roi_entry['is_crowd'] = np.empty((0), dtype=np.bool) roi_entry['seg_areas'] = np.empty((0), dtype=np.float32) roidb.append(roi_entry) return roidb def load_lip_annotations(self,roidb): # load from label of png for i in range(len(roidb)): roi_entry = roidb[i] if roi_entry['id'] in ['27969_199668']: continue #print(i, roi_entry['id']) boxes, gt_classes, ins_id, label_path, gt_overlaps = self.load_from_seg(roi_entry['id']) if boxes.size == 0: total_num_objs = 0 boxes = np.zeros((total_num_objs, 4), dtype=np.uint16) gt_overlaps = np.zeros((total_num_objs, self.num_classes), dtype=np.float32) gt_classes = np.zeros((total_num_objs, ), dtype=np.int32) roi_entry['boxes'] = boxes roi_entry['gt_classes'] = gt_classes roi_entry['box_to_gt_ind_map'] = ins_id roi_entry['ins_seg'] = label_path # full path of label png # im_label = Image.open(label_path) # pixel = list(im_label.getdata()) # im_label = np.array(pixel).reshape([im_label.size[1], im_label.size[0]]) # roi_entry['ins_seg'] = im_label roi_entry['gt_overlaps'] = gt_overlaps roi_entry['gt_overlaps'] = scipy.sparse.csr_matrix(roi_entry['gt_overlaps']) #roi_entry['max_overlaps'] = gt_overlaps.max(axis=1) #roi_entry['max_class'] = gt_overlaps.argmax(axis=1) roi_entry['is_crowd'] = np.zeros((boxes.shape[0]), dtype=np.bool) #roi_entry['has_visible_keypoints'] = False roi_entry['seg_areas'] = np.zeros((boxes.shape[0]), dtype=np.float32) roi_entry['seg_areas'][:] = 50 #roi_entry['gt_boxes'] = boxes #roidb.append(roi_entry) #return roidb def load_from_seg(self,seg_gt_id): """ gao: load from seg label png """ seg_gt = os.path.join(DATASETS[self.name][ANN_FN], seg_gt_id + '.png') assert os.path.exists(seg_gt), 'path does not exist: {}'.format(seg_gt) im = Image.open(seg_gt) pixel = list(im.getdata()) pixel = np.array(pixel).reshape([im.size[1], im.size[0]]) gt_classes = [] boxes = [] box_to_gt_ind_map = [] gt_overlaps = [] ins_id = 0 for c in range(1,self.num_classes): px = np.where(pixel == c) if len(px[0])==0: continue x_min = np.min(px[1]) y_min = np.min(px[0]) x_max = np.max(px[1]) y_max = np.max(px[0]) if x_max - x_min <= 1 or y_max - y_min <= 1: continue if cfg.Ignore_left: c = self.category_id_to_Ignore_left_id[c] # gt_classes.append(c) # boxes.append([x_min, y_min, x_max, y_max]) # box_to_gt_ind_map.append(ins_id) # ins_id += 1 # overlaps = np.zeros(self.num_classes,dtype=np.float32) # overlaps[c] = 1 # gt_overlaps.append(overlaps) if (c==3 or c==8) and 'LIP' in cfg.TRAIN.DATASETS[0]: # has gloves or socks box,gt_class,box_to_gt_ind,gt_overlap,ins_id = _get_socks_glove(pixel,c,ins_id,self.num_classes) for i in range(len(box)): boxes.append(box[i]) gt_classes.append(gt_class[i]) box_to_gt_ind_map.append(box_to_gt_ind[i]) gt_overlaps.append(gt_overlap[i]) else: gt_classes.append(c) boxes.append([x_min, y_min, x_max, y_max]) box_to_gt_ind_map.append(ins_id) ins_id += 1 overlaps = np.zeros(self.num_classes) overlaps[c] = 1 gt_overlaps.append(overlaps) return np.asarray(boxes, dtype=np.float32), np.asarray(gt_classes,dtype=np.int32), np.asarray(box_to_gt_ind_map,dtype=np.int32), seg_gt, np.asarray(gt_overlaps) def _add_proposals_from_file( self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh ): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image( boxes, entry['height'], entry['width'] ) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle'} def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 1 # Output logging period in SGD iterations self.LOG_PERIOD = 1 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int(np.round(self.smoothed_mb_qsize.GetMedianValue())), mem=int(np.ceil(mem_usage / 1024 / 1024)), ) if cfg.TRAIN.DA_FADE_IN: stats['da_weight'] = self.model.da_fade_in.get_weight() if cfg.TRAIN.PADA: stats[ 'avg_pada_weight'] = self.model.class_weight_db.get_avg_pada_weight( ) stats[ 'total_detects'] = self.model.class_weight_db.total_sum_softmax.sum( ) / 2 stats['KL_div'] = self.model.class_weight_db.get_KL_to_init() stats['accuracy_fg'] = self.model.class_weight_db.fg_acc.get() stats[ 'acc_fg_weighted'] = self.model.class_weight_db.weighted_fg_acc.get( ) target_dist = self.model.class_weight_db.get_dist() print('target_dist: {}'.format(list(target_dist))) class_weights = self.model.class_weight_db.class_weights print('class_weights: {}'.format(list(class_weights))) classes = np.array( dummy_datasets.get_coco_dataset().classes.values(), dtype=str) for dist in [target_dist, class_weights]: order = np.argsort(dist)[::-1] o_target_dist = target_dist[order] o_classes = classes[order] cwo = class_weights[order] print("dist tops: ", end='') for prob, w, c in list(zip(o_target_dist, cwo, o_classes))[:5]: print("{}:{:.3f} ({:.3f})".format(c, prob, w), end='; ') print() print() for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() # for k,v in stats.items(): # print(k,v) return stats
def train_multiscale(opt, model, train_loader, criterion, optimizer, epoch, stats, best_prec1, start_iter=0): global SIGNAL_RECEIVED from detectron.utils.timer import Timer t = Timer() model.train() runningTrainLoss = resetTrainProgressMultiscale(opt, train_loader, stats) rtl_period = max(5, int(len(train_loader) / 1)) logger.info( '-------------------------- Training epoch #%d --------------------------' % (epoch + 1)) t.tic() # set the variables for signal_handler global RESUME_PATH, NUM_GPUS RESUME_PATH = opt['save'] NUM_GPUS = opt[ 'gpu_id'] + 1 # relies assumption that the model uses the last GPU for i, data in enumerate(train_loader): # Skip the iterations included in the checkpoint if i < start_iter: continue # Get and prepare data inputs, targets, _ = data inputs, targets = prepareMultiscaleForForwardOnGpu( inputs, targets, **{ 'gpu_id': opt['gpu_id'], 'nb_scales': opt['nb_scales'] }) targets = reshapeMultiscaleTargetsForCriterion(targets, opt['n_target_frames'], opt['nb_features'], opt['nb_scales']) # Optimization optimizer.zero_grad() ffpnlevels = 1 if opt['train_single_level'] else opt['FfpnLevels'] outputs = format_variable_length_multiscale_sequence( model(inputs), ffpnlevels, opt['n_target_frames'], opt['nb_scales']) loss, loss_terms = criterion(outputs, targets) loss.backward() optimizer.step() # Update progress runningTrainLoss = updateTrainProgress(opt, runningTrainLoss, loss.item(), loss_terms, stats, i, rtl_period, epoch) if SIGNAL_RECEIVED: save_checkpoint( { 'epoch': epoch, 'iter': i + 1, 'opt_path': os.path.join(opt['logs'], 'params.pkl'), 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, False, savedir=opt['save']) logger.info( 'Saved checkpoint before exiting peacefully for job requeuing') exit(0) del loss, inputs, outputs, targets, loss_terms t.toc() t.tic() if i >= (opt['it'] - 1): break print('Training iteration average duration : %f' % t.average_time)
class JsonDataset(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert dataset_catalog.contains(name), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(dataset_catalog.get_im_dir(name)), \ 'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name)) assert os.path.exists(dataset_catalog.get_ann_fn(name)), \ 'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name)) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = dataset_catalog.get_im_dir(name) self.image_prefix = dataset_catalog.get_im_prefix(name) self.COCO = COCO(dataset_catalog.get_ann_fn(name)) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() def get_roidb(self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug('_add_gt_annotations took {:.3f}s'.format( self.debug_timer.toc(average=False))) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file(roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh) logger.debug('_add_proposals_from_file took {:.3f}s'.format( self.debug_timer.toc(average=False))) _add_class_assignments(roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join(self.image_directory, self.image_prefix + entry['file_name']) assert os.path.exists(im_path), 'Image \'{}\' not found'.format( im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32)) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints), dtype=np.int32) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle' } def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model, writer): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.writer = writer def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self, i_iter=0): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.writer.add_scalar(k, self.losses_and_metrics[k], i_iter) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) # self.smoothed_mb_qsize.AddValue( # self.model.roi_data_loader._minibatch_queue.qsize() # ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, # mb_qsize=int( # np.round(self.smoothed_mb_qsize.GetMedianValue()) # ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats def SaveTrainingStates(self, save_file): save_state = { key:0 for key in self.smoothed_losses_and_metrics.keys() } save_smooth_state = { key:0 for key in self.smoothed_losses_and_metrics.keys() } for k, v in self.smoothed_losses_and_metrics.items(): save_state[k] = v.series save_smooth_state[k] = v.smooth_series save_state['total_loss'] = self.smoothed_total_loss.series save_smooth_state['total_loss'] = self.smoothed_total_loss.series # print(save_state) with open(save_file, 'w') as f: json.dump(save_state, f) # save smooth state save_smooth_file = save_file.replace('.json', '_smooth.json') with open(save_smooth_file, 'w') as f: json.dump(save_smooth_state, f)