def do_evaluate(pred_config, output_file): num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor( pred_config, list(range(num_tower))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_tower) for k in range(num_tower)] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DatasetRegistry.get(dataset).eval_inference_results(all_results, output)
def get_eval_dataflow(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ["file_name", "image_id"]) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def _eval(self): logdir = self._output_dir if cfg.TRAINER == 'replicated': all_results = multithread_predict_dataflow(self.dataflows, self.predictors) else: filenames = [ os.path.join( logdir, 'outputs{}-part{}.json'.format(self.global_step, rank)) for rank in range(hvd.local_size()) ] if self._horovod_run_eval: local_results = predict_dataflow(self.dataflow, self.predictor) fname = filenames[hvd.local_rank()] with open(fname, 'w') as f: json.dump(local_results, f) self.barrier.eval() if hvd.rank() > 0: return all_results = [] for fname in filenames: with open(fname, 'r') as f: obj = json.load(f) all_results.extend(obj) os.unlink(fname) scores = DatasetRegistry.get( self._eval_dataset).eval_inference_results(all_results) for k, v in scores.items(): self.trainer.monitors.put_scalar(self._eval_dataset + '-' + k, v)
def get_pascal_voc_train_dataflow(batch_size=1): from dataset import register_pascal_voc # register_coco(os.path.expanduser("/media/ubuntu/Working/common_data/coco")) register_pascal_voc(os.path.expanduser("/media/ubuntu/Working/voc2012/VOC2012/")) print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) aspect_grouping = [1] aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs] group_ids = _quantize(aspect_ratios, aspect_grouping) ds = DataFromList(np.arange(len(roidbs)), shuffle=True) ds.reset_state() ds = AspectGroupingDataFlow(roidbs, ds, group_ids, batch_size=batch_size, drop_uneven=True).__iter__() preprocess = TrainingDataPreprocessor() while True: batch_roidbs = next(ds) yield preprocess(batch_roidbs)
def get_plain_train_dataflow(batch_size=2): # no aspect ratio grouping print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor() buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds.reset_state() dataiter = ds.__iter__() return dataiter
def get_train_dataflow(batch_size=2): print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) aspect_grouping = [1] aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs] group_ids = _quantize(aspect_ratios, aspect_grouping) ds = AspectGroupingDataFlow(roidbs, group_ids, batch_size=batch_size, drop_uneven=True) preprocess = TrainingDataPreprocessor() buffer_size = cfg.DATA.NUM_WORKERS * 10 # ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds.reset_state() # to get an infinite data flow ds = RepeatedData(ds, num=-1) dataiter = ds.__iter__() return dataiter
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print( "---------------------------------------------------------------- data.py:343" ) print_class_histogram(roidbs) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": # one dataflow for each process, therefore don't need large buffer buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = ( shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs, ) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"]) if is_aws: s3 = boto3.resource("s3") elif is_gcs: c = storage.Client.create_anonymous_client() bucket = c.get_bucket("determined-ai-coco-dataset") def f(fname): if is_aws: s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname) im = cv2.imdecode( np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR, ) elif is_gcs: blob = bucket.blob(fname) s = download_gcs_blob_with_backoff(blob) im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR) else: im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def do_sanity_check(pred_func, output_dir='/root/dentalpoc/logs/xxxxx', font_rs=10, thickness_rs=10): # num_tower = max(cfg.TRAIN.NUM_GPUS, 1) # graph_funcs = MultiTowerOfflinePredictor( # pred_config, list(range(num_tower))).get_predictors() os.makedirs(output_dir, exist_ok=True) for dataset in cfg.DATA.VAL: logger.info("sanity checking {} ...".format(dataset)) # dataflows = [ # get_eval_dataflow(dataset, shard=k, num_shards=num_tower, add_gt=True) # for k in range(num_tower)] # all_results = multithread_predict_dataflow(dataflows, graph_funcs) coco_format_detection = DatasetRegistry.get(dataset) coco_object = coco_format_detection.coco for _im_id, _img_dic in list(coco_object.imgs.items())[1:]: _img_path = _img_dic['path'] _img_seg_polygons = coco_object.imgToAnns[_im_id] detection_ground_truths = list( map( lambda x: DetectionResult( box=convert_box_mode_xywh_2_xyxy(x['bbox']), score=1.0, class_id=x['category_id'], mask=coco_object.annToMask(x)), _img_seg_polygons)) print("S======check") _predict_with_gt(pred_func=pred_func, input_file=_img_path, ground_truths=detection_ground_truths, output_dir=output_dir, font_rs=font_rs, thickness_rs=thickness_rs) xxx = 0
def eval_one_dataset(dataset_name, output_filename): os.environ['CUDA_VISIBLE_DEVICES'] = '1' import cv2 from collections import namedtuple from dataset import DatasetRegistry from myaug_lib import short_side_resize_image DetectionResult = namedtuple('DetectionResult', ['box', 'score', 'class_id', 'mask']) register_coco(os.path.expanduser(cfg.DATA.BASEDIR)) roidbs = DatasetRegistry.get(dataset_name).inference_roidbs() images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='images') with tf.variable_scope('resnet50'): final_boxes, final_scores, final_labels, final_inds = \ model.model_fpn(images, is_training=False, data_format='channels_last', mode='test') init_op = tf.group( [tf.global_variables_initializer(), tf.local_variables_initializer()]) sess_config = tf.ConfigProto() sess_config.allow_soft_placement = True sess_config.log_device_placement = False sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) sess.run(init_op) checkpoint_path = cfg.TRAIN.LOG_DIR + COMMON_POSTFIX # restorer = tf.train.Saver() # restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) variable_averages = tf.train.ExponentialMovingAverage( decay=cfg.TRAIN.MOVING_AVERAGE_DECAY) variable_to_restore = variable_averages.variables_to_restore() restorer = tf.train.Saver(variable_to_restore) restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) all_results = [] start = time.time() for idx, roidb in enumerate(roidbs): fname, img_id = roidb["file_name"], roidb["image_id"] im = cv2.imread(fname, cv2.IMREAD_COLOR) im = im.astype("float32") h, w = im.shape[:2] # 短边resize resized_im = short_side_resize_image(im) # 减均值 resized_im = resized_im[:, :, [2, 1, 0]] # BGR-->RGB resized_im /= 255.0 resized_im -= np.asarray(cfg.PREPROC.PIXEL_MEAN) resized_im /= np.asarray(cfg.PREPROC.PIXEL_STD) resized_h, resized_w = resized_im.shape[:2] scale = np.sqrt(resized_h * 1.0 / h * resized_w / w) mult = float(cfg.FPN.RESOLUTION_REQUIREMENT) # size divisable max_height = int(np.ceil(float(resized_h) / mult) * mult) max_width = int(np.ceil(float(resized_w) / mult) * mult) resized_im1 = np.zeros((max_height, max_width, 3), dtype=np.float32) resized_im1[:resized_h, :resized_w, :] = resized_im # profile the graph executation if 1510 <= idx <= 1520: from tensorflow.python.client import timeline options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() boxes, scores, labels = sess.run( [final_boxes, final_scores, final_labels], feed_dict={images: resized_im1[np.newaxis]}, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open( '{}/timeline_Inference_step{}.json'.format( checkpoint_path, idx), 'w') as fp: fp.write(chrome_trace) else: boxes, scores, labels = sess.run( [final_boxes, final_scores, final_labels], feed_dict={images: resized_im1[np.newaxis]}) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = boxes.reshape([-1, 4]) boxes[:, [0, 1]] = np.maximum(boxes[:, [0, 1]], 0) boxes[:, 2] = np.minimum(boxes[:, 2], w - 1) boxes[:, 3] = np.minimum(boxes[:, 3], h - 1) if idx < 5: print(boxes, scores, labels) # if masks: # full_masks = [_paste_mask(box, mask, orig_shape) # for box, mask in zip(boxes, masks[0])] # masks = full_masks # else: # # fill with none # masks = [None] * len(boxes) # postprocessing for FCOS # ################# 每一类进行nms ################## # boxes_after_nms = [] # for c in range(1, 81): # inds = np.where(labels == c) # if len(inds) > 0: # boxes_keep = np.concatenate([boxes[inds], scores[inds].reshape(-1, 1), # labels[inds].reshape(-1, 1)], axis=1) # # 类内NMS # keep = nms(boxes_keep[:, 0:5], thresh=cfg.FCOS.NMS_THRESH) # boxes_keep = boxes_keep[keep] # # 过滤得分比较低的框 # # keep = np.where(boxes_keep[:, 4] > 0.1) # 这里的阈值应该根据每一类来确定 # # boxes_keep = boxes_keep[keep] # boxes_after_nms.append(boxes_keep) # boxes_after_nms = np.concatenate(boxes_after_nms, axis=0) # [x1,y1,x2,y2,score,label] boxes_after_nms = np.concatenate( [boxes, scores.reshape(-1, 1), labels.reshape(-1, 1)], axis=1) # ################# 限制每个图片最大检测个数 ################## number_of_detections = len(boxes_after_nms) if number_of_detections > cfg.FRCNN.TEST.RESULTS_PER_IM > 0: scores_sorted = np.sort(boxes_after_nms[:, 4]) image_thresh = scores_sorted[number_of_detections - cfg.FRCNN.TEST.RESULTS_PER_IM + 1] keep = np.where(boxes_after_nms[:, 4] >= image_thresh)[0] boxes_after_nms = boxes_after_nms[keep] # ################# 类间nms ################## # keep = nms_across_class(boxes_after_nms, thresh=0.5) # boxes_after_nms = boxes_after_nms[keep] boxes = boxes_after_nms[:, 0:4] scores = boxes_after_nms[:, 4] labels = boxes_after_nms[:, 5].astype(np.int32) masks = [None] * len(boxes) for r in [ DetectionResult(*args) for args in zip(boxes, scores, labels.tolist(), masks) ]: res = { 'image_id': img_id, 'category_id': int(r.class_id), 'bbox': [round(float(x), 4) for x in r.box], 'score': round(float(r.score), 4), } all_results.append(res) if idx % 1000 == 0: print(idx, (time.time() - start) / 1000) start = time.time() DatasetRegistry.get(dataset_name).eval_inference_results( all_results, output_filename)
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # Add rpn data to dataflow: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == 'horovod': buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapDataZMQ(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds