def __init__(self, config): if config.dataset.dir: # Gets the names of the classes classes_file = os.path.join(config.dataset.dir, 'classes.json') if tf.gfile.Exists(classes_file): self.class_labels = json.load(tf.gfile.GFile(classes_file)) else: self.class_labels = None # Don't use data augmentation in predictions config.dataset.data_augmentation = None dataset_class = get_dataset(config.dataset.type) model_class = get_model(config.model.type) dataset = dataset_class(config) model = model_class(config) graph = tf.Graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.session = tf.Session(config=tf_config, graph=graph) with graph.as_default(): self.image_placeholder = tf.placeholder( tf.float32, (None, None, 3) ) image_tf, _, process_meta = dataset.preprocess( self.image_placeholder ) pred_dict = model(image_tf) # Restore checkpoint if config.train.job_dir: job_dir = config.train.job_dir if config.train.run_name: job_dir = os.path.join(job_dir, config.train.run_name) ckpt = tf.train.get_checkpoint_state(job_dir) if not ckpt or not ckpt.all_model_checkpoint_paths: raise ValueError('Could not find checkpoint in {}.'.format( job_dir )) ckpt = ckpt.all_model_checkpoint_paths[-1] saver = tf.train.Saver(sharded=True, allow_empty=True) saver.restore(self.session, ckpt) tf.logging.info('Loaded checkpoint.') else: # A prediction without checkpoint is just used for testing tf.logging.warning( 'Could not load checkpoint. Using initialized model.') init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) self.session.run(init_op) if config.model.type == 'ssd': cls_prediction = pred_dict['classification_prediction'] objects_tf = cls_prediction['objects'] objects_labels_tf = cls_prediction['labels'] objects_labels_prob_tf = cls_prediction['probs'] elif config.model.type == 'fasterrcnn': if config.model.network.get('with_rcnn', False): cls_prediction = pred_dict['classification_prediction'] objects_tf = cls_prediction['objects'] objects_labels_tf = cls_prediction['labels'] objects_labels_prob_tf = cls_prediction['probs'] else: rpn_prediction = pred_dict['rpn_prediction'] objects_tf = rpn_prediction['proposals'] objects_labels_prob_tf = rpn_prediction['scores'] # All labels without RCNN are zero objects_labels_tf = tf.zeros( tf.shape(objects_labels_prob_tf), dtype=tf.int32 ) else: raise ValueError( "Model type '{}' not supported".format(config.model.type) ) self.fetches = { 'objects': objects_tf, 'labels': objects_labels_tf, 'probs': objects_labels_prob_tf, 'scale_factor': process_meta['scale_factor'] } # If in debug mode, return the full prediction dictionary. if config.train.debug: self.fetches['_debug'] = pred_dict
def eval( dataset_split, config_files, watch, from_global_step, override_params, files_per_class, max_detections, ): """Evaluate models using dataset.""" # If the config file is empty, our config will be the base_config for the # default model. try: config = get_config(config_files, override_params=override_params) except KeyError: raise KeyError("model.type should be set on the custom config.") if not config.train.job_dir: raise KeyError("`job_dir` should be set.") if not config.train.run_name: raise KeyError("`run_name` should be set.") # `run_dir` is where the actual checkpoint and logs are located. run_dir = os.path.join(config.train.job_dir, config.train.run_name) # Only activate debug for if needed for debug visualization mode. if not config.train.debug: config.train.debug = config.eval.image_vis == "debug" if config.train.debug or config.train.tf_debug: tf.logging.set_verbosity(tf.logging.DEBUG) else: tf.logging.set_verbosity(tf.logging.INFO) # Build the dataset tensors, overriding the default dataset split. config.dataset.split = dataset_split # Disable data augmentation. config.dataset.data_augmentation = [] # Attempt to get class names, if available. classes_file = os.path.join(config.dataset.dir, "classes.json") if tf.gfile.Exists(classes_file): class_labels = json.load(tf.gfile.GFile(classes_file)) else: class_labels = None if config.model.type == "fasterrcnn": # Override max detections with specified value. if config.model.network.with_rcnn: config.model.rcnn.proposals.total_max_detections = max_detections else: config.model.rpn.proposals.post_nms_top_n = max_detections # Also overwrite `min_prob_threshold` in order to use all detections. config.model.rcnn.proposals.min_prob_threshold = 0.0 elif config.model.type == "ssd": config.model.proposals.total_max_detections = max_detections config.model.proposals.min_prob_threshold = 0.0 else: raise ValueError("Model type '{}' not supported".format( config.model.type)) # Only a single run over the dataset to calculate metrics. config.train.num_epochs = 1 # Seed setup. if config.train.seed: tf.set_random_seed(config.train.seed) # Set pretrained as not training. config.model.base_network.trainable = False model_class = get_model(config.model.type) model = model_class(config) dataset_class = get_dataset(config.dataset.type) dataset = dataset_class(config) train_dataset = dataset() train_image = train_dataset["image"] train_objects = train_dataset["bboxes"] train_filename = train_dataset["filename"] # Build the graph of the model to evaluate, retrieving required # intermediate tensors. prediction_dict = model(train_image, train_objects) if config.model.type == "ssd" or config.model.network.with_rcnn: pred = prediction_dict["classification_prediction"] pred_objects = pred["objects"] pred_objects_classes = pred["labels"] pred_objects_scores = pred["probs"] else: # Force the num_classes to 1. config.model.network.num_classes = 1 pred = prediction_dict["rpn_prediction"] pred_objects = pred["proposals"] pred_objects_scores = pred["scores"] # When using only RPN all classes are 0. pred_objects_classes = tf.zeros((tf.shape(pred_objects_scores)[0], ), dtype=tf.int32) # Retrieve *all* the losses from the model and calculate their streaming # means, so we get the loss over the whole dataset. batch_losses = model.loss(prediction_dict, return_all=True) losses = {} for loss_name, loss_tensor in batch_losses.items(): loss_mean, _ = tf.metrics.mean( loss_tensor, name=loss_name, metrics_collections="metrics", updates_collections="metric_ops", ) full_loss_name = "{}_losses/{}".format(dataset_split, loss_name) losses[full_loss_name] = loss_mean metric_ops = tf.get_collection("metric_ops") init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Using a global saver instead of the one for the model. saver = tf.train.Saver(sharded=True, allow_empty=True) # Aggregate the required ops to evaluate into a dict. ops = { "init_op": init_op, "metric_ops": metric_ops, "pred_objects": pred_objects, "pred_objects_classes": pred_objects_classes, "pred_objects_scores": pred_objects_scores, "train_objects": train_objects, "losses": losses, "prediction_dict": prediction_dict, "filename": train_filename, "train_image": train_image, } metrics_scope = "{}_metrics".format(dataset_split) # Use global writer for all checkpoints. We don't want to write different # files for each checkpoint. writer = tf.summary.FileWriter(run_dir) files_to_visualize = {} last_global_step = from_global_step while True: # Get the checkpoint files to evaluate. try: checkpoints = get_checkpoints(run_dir, last_global_step, last_only=not watch) except ValueError as e: if not watch: tf.logging.error("Missing checkpoint.") raise e tf.logging.warning( "Missing checkpoint; Checking again in a moment") time.sleep(5) continue for checkpoint in checkpoints: # Always returned in order, so it's safe to assign directly. tf.logging.info( "Evaluating global_step {} using checkpoint '{}'".format( checkpoint["global_step"], checkpoint["file"])) try: start = time.time() evaluate_once( config, writer, saver, ops, checkpoint, class_labels=class_labels, metrics_scope=metrics_scope, image_vis=config.eval.image_vis, files_per_class=files_per_class, files_to_visualize=files_to_visualize, ) last_global_step = checkpoint["global_step"] tf.logging.info("Evaluated in {:.2f}s".format(time.time() - start)) except tf.errors.NotFoundError: # The checkpoint is not ready yet. It was written in the # checkpoints file, but it still hasn't been completely saved. tf.logging.info("Checkpoint {} is not ready yet. " "Checking again in a moment.".format( checkpoint["file"])) time.sleep(5) continue # If no watching was requested, finish the execution. if not watch: return # Sleep for a moment and check for new checkpoints. tf.logging.info("All checkpoints evaluated; sleeping for a moment") time.sleep(5)
def evaluate(dataset_split, config_files, job_dir, watch, from_global_step, override_params, files_per_class): """ Evaluate models using dataset. """ # If the config file is empty, our config will be the base_config for the # default model. try: config = get_config(config_files, override_params=override_params) except KeyError: raise KeyError('model.type should be set on the custom config.') config.train.job_dir = job_dir or config.train.job_dir # Only activate debug for if needed for debug visualization mode. if not config.train.debug: config.train.debug = config.eval.image_vis == 'debug' if config.train.debug or config.train.tf_debug: tf.logging.set_verbosity(tf.logging.DEBUG) else: tf.logging.set_verbosity(tf.logging.INFO) # Build the dataset tensors, overriding the default dataset split. config.dataset.split = dataset_split # Disable data augmentation. config.dataset.data_augmentation = [] # Only a single run over the dataset to calculate metrics. config.train.num_epochs = 1 # Seed setup if config.train.seed: tf.set_random_seed(config.train.seed) # Set pretrained as not training config.model.base_network.trainable = False model_class = get_model(config.model.type) model = model_class(config) dataset_class = get_dataset(config.dataset.type) dataset = dataset_class(config) train_dataset = dataset() train_image = train_dataset['image'] train_objects = train_dataset['bboxes'] train_filename = train_dataset['filename'] # Build the graph of the model to evaluate, retrieving required # intermediate tensors. prediction_dict = model( train_image, train_objects ) if config.model.network.with_rcnn: pred = prediction_dict['classification_prediction'] pred_objects = pred['objects'] pred_objects_classes = pred['labels'] pred_objects_scores = pred['probs'] else: # Force the num_classes to 1 config.model.network.num_classes = 1 pred = prediction_dict['rpn_prediction'] pred_objects = pred['proposals'] pred_objects_scores = pred['scores'] # When using only RPN all classes are 0. pred_objects_classes = tf.zeros( (tf.shape(pred_objects_scores)[0],), dtype=tf.int32 ) # Retrieve *all* the losses from the model and calculate their streaming # means, so we get the loss over the whole dataset. batch_losses = model.loss(prediction_dict, return_all=True) losses = {} for loss_name, loss_tensor in batch_losses.items(): loss_mean, _ = tf.metrics.mean( loss_tensor, name=loss_name, metrics_collections='metrics', updates_collections='metric_ops', ) full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name) losses[full_loss_name] = loss_mean metric_ops = tf.get_collection('metric_ops') init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) # Using a global saver instead of the one for the model. saver = tf.train.Saver(sharded=True, allow_empty=True) # Aggregate the required ops to evaluate into a dict.. ops = { 'init_op': init_op, 'metric_ops': metric_ops, 'pred_objects': pred_objects, 'pred_objects_classes': pred_objects_classes, 'pred_objects_scores': pred_objects_scores, 'train_objects': train_objects, 'losses': losses, 'prediction_dict': prediction_dict, 'filename': train_filename, 'train_image': train_image } metrics_scope = '{}_metrics'.format(dataset_split) # Use global writer for all checkpoints. We don't want to write different # files for each checkpoint. writer = tf.summary.FileWriter(config.train.job_dir) files_to_visualize = {} last_global_step = from_global_step while True: # Get the checkpoint files to evaluate. try: checkpoints = get_checkpoints(config, last_global_step) except ValueError as e: if not watch: tf.logging.error('Missing checkpoint.') raise e tf.logging.warning( 'Missing checkpoint; Checking again in a minute') time.sleep(60) continue for checkpoint in checkpoints: # Always returned in order, so it's safe to assign directly. tf.logging.info( 'Evaluating global_step {} using checkpoint \'{}\''.format( checkpoint['global_step'], checkpoint['file'] ) ) try: start = time.time() evaluate_once( config, writer, saver, ops, checkpoint, metrics_scope=metrics_scope, image_vis=config.eval.image_vis, files_per_class=files_per_class, files_to_visualize=files_to_visualize ) last_global_step = checkpoint['global_step'] tf.logging.info('Evaluated in {:.2f}s'.format( time.time() - start )) except tf.errors.NotFoundError: # The checkpoint is not ready yet. It was written in the # checkpoints file, but it still hasn't been completely saved. tf.logging.info( 'Checkpoint {} is not ready yet. ' 'Checking again in a minute.'.format( checkpoint['file'] ) ) time.sleep(60) continue # If no watching was requested, finish the execution. if not watch: return # Sleep for a minute and check for new checkpoints. tf.logging.info('All checkpoints evaluated; sleeping for a minute') time.sleep(60)
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs, src_mu_lab=None, src_sigma_lab=None, debug=False): # ========================================================================= # ======================= Tile Loading ==================================== # ========================================================================= print('\n>> Loading Tile ... \n') csv_dict = {} csv_dict['PreparationTime'] = [] csv_dict['ColorDeconvTime'] = [] csv_dict['TotalTileLoadingTime'] = [] csv_dict['CKPTLoadingTime'] = [] csv_dict['ModelInfernceTime'] = [] csv_dict['DetectionTime'] = [] csv_dict['ROIShape'] = [] csv_dict['ObjectsDict'] = [] csv_dict['NumObjects'] = [] csv_dict['AnnotationWritingTime'] = [] csv_dict['AnnotationDict'] = [] csv_dict['AnalysisDict'] = [] start_time = time.time() total_tileloading_start_time = time.time() ts = large_image.getTileSource(slide_path) tile_info = ts.getSingleTile( tile_position=tile_position, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) im_tile = tile_info['tile'][:, :, :3] csv_dict['ROIShape'] = im_tile.shape[:2] prep_time = time.time() - start_time csv_dict['PreparationTime'] = round(prep_time, 3) # ========================================================================= # =================Img Normalization & Color Deconv======================== # ========================================================================= print('\n>> Color Deconvolving ... \n') start_time = time.time() im_nmzd = htk_cnorm.reinhard( im_tile, REFERENCE_MU_LAB, REFERENCE_STD_LAB, src_mu=src_mu_lab, src_sigma=src_sigma_lab ) # perform color decovolution if args.deconv_method == 'ruifrok': w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution( im_nmzd, w).Stains.astype(np.float)[:, :, :2] elif args.deconv_method == 'macenko': w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255) im_stains = htk_cdeconv.color_deconvolution( im_tile, w_est, 255).Stains.astype(np.float) ch1 = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map[args.stain_1], w_est) ch2 = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map[args.stain_2], w_est) im_stains = im_stains[:, :, [ch1, ch2]] else: raise ValueError('Invalid deconvolution method parameter.') # ========================================================================= # ====================== Fuse the stain1 & stain2 pix====================== # ========================================================================= # compute nuclear foreground mask im_fgnd_mask_stain_1 = im_stains[ :, :, 0] < threshold_yen(im_stains[:, :, 0]) im_fgnd_mask_stain_2 = im_stains[ :, :, 1] < threshold_yen(im_stains[:, :, 1]) im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2 # segment nuclei im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2)) print('---> Fusing 2 Stains') deconv_time = time.time() - start_time csv_dict['ColorDeconvTime'] = round(deconv_time, 3) # ========================================================================= # ================= Nuclie Detection Deep Learning Block ================== # ========================================================================= total_tileloading_time = time.time() - total_tileloading_start_time csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3) start_time = time.time() config = get_config(CONFIG) config.model.rcnn.proposals.total_max_detections = args.max_det config.model.rcnn.proposals.min_prob_threshold = args.min_prob im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1) # ==================================================================================================================================== tf.reset_default_graph() dataset_class = get_dataset('object_detection') model_class = get_model('fasterrcnn') dataset = dataset_class(config) model = model_class(config) graph = tf.Graph() session = tf.Session(graph=graph) with graph.as_default(): image_placeholder = tf.placeholder( tf.float32, (None, None, 3), name='Input_Placeholder' ) pred_dict = model(image_placeholder) ckpt_loading_start_time = time.time() saver = tf.train.Saver(sharded=True, allow_empty=True) saver.restore(session, CKPT_DIR) tf.logging.info('Loaded checkpoint.') ckpt_loading_time = time.time() - ckpt_loading_start_time csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3) inference_start_time = time.time() cls_prediction = pred_dict['classification_prediction'] objects_tf = cls_prediction['objects'] objects_labels_tf = cls_prediction['labels'] objects_labels_prob_tf = cls_prediction['probs'] fetches = { 'objects': objects_tf, 'labels': objects_labels_tf, 'probs': objects_labels_prob_tf, } fetched = session.run(fetches, feed_dict={ image_placeholder: np.array(im_nuc_det_input) }) inference_time = time.time() - inference_start_time csv_dict['ModelInfernceTime'] = round(inference_time, 3) objects = fetched['objects'] labels = fetched['labels'].tolist() probs = fetched['probs'].tolist() # Cast to int to consistently return the same type in Python 2 and 3 objects = [ [int(round(coord)) for coord in obj] for obj in objects.tolist() ] predictions = sorted([ { 'bbox': obj, 'label': label, 'prob': round(prob, 4), } for obj, label, prob in zip(objects, labels, probs) ], key=lambda x: x['prob'], reverse=True) print('\n>> Finishing Detection ... \n') print('***** Number of Detected Cells ****** : ', len(predictions)) detection_time = time.time() - start_time csv_dict['DetectionTime'] = round(detection_time, 3) csv_dict['NumObjects'] = len(predictions) csv_dict['ObjectsDict'] = predictions # ========================================================================= # ======================= TODO: Implement border deletion ================= # ========================================================================= # ========================================================================= # ======================= Write Annotations =============================== # ========================================================================= start_time = time.time() objects_df = pd.DataFrame(objects) formatted_annot_list,\ formatter_analysis_list = cli_utils.convert_preds_to_utilformat( objects_df, probs, args.ignore_border_nuclei, im_tile_size=args.analysis_tile_size) nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( formatted_annot_list, tile_info, args.nuclei_annotation_format) csv_dict['AnnotationDict'] = nuclei_annot_list csv_dict['AnalysisDict'] = formatter_analysis_list num_nuclei = len(nuclei_annot_list) anot_time = time.time() - start_time csv_dict['AnnotationWritingTime'] = round(anot_time, 3) return csv_dict
def get_prediction(image, config, total=None, session=None, fetches=None, image_tensor=None, class_labels=None, return_tf_vars=False): """ Gets the prediction given by the model `model_type` of the image `image`. If a checkpoint exists in the job's directory, load it. The names of the classes will be obtained from the dataset directory. Returns a dictionary with the objects, their labels and probabilities, the inference time and the scale factor. Also if the `return_tf_vars` is True, returns the image tensor, the entire prediction of the model and the sesssion. """ if session is None and fetches is None and image_tensor is None: # Don't use data augmentation in predictions config.dataset.data_augmentation = None dataset_class = get_dataset(config.dataset.type) model_class = get_model(config.model.type) dataset = dataset_class(config) model = model_class(config) graph = tf.Graph() session = tf.Session(graph=graph) with graph.as_default(): image_tensor = tf.placeholder(tf.float32, (None, None, 3)) image_tf, _, process_meta = dataset.preprocess(image_tensor) pred_dict = model(image_tf) # Restore checkpoint if config.train.job_dir: job_dir = config.train.job_dir if config.train.run_name: job_dir = os.path.join(job_dir, config.train.run_name) ckpt = tf.train.get_checkpoint_state(job_dir) if not ckpt or not ckpt.all_model_checkpoint_paths: raise ValueError( 'Could not find checkpoint in {}.'.format(job_dir)) ckpt = ckpt.all_model_checkpoint_paths[-1] saver = tf.train.Saver(sharded=True, allow_empty=True) saver.restore(session, ckpt) tf.logging.info('Loaded checkpoint.') else: # A prediction without checkpoint is just used for testing tf.logging.warning( 'Could not load checkpoint. Using initialized model.') init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) session.run(init_op) if config.model.network.with_rcnn: cls_prediction = pred_dict['classification_prediction'] objects_tf = cls_prediction['objects'] objects_labels_tf = cls_prediction['labels'] objects_labels_prob_tf = cls_prediction['probs'] else: rpn_prediction = pred_dict['rpn_prediction'] objects_tf = rpn_prediction['proposals'] objects_labels_prob_tf = rpn_prediction['scores'] # All labels without RCNN are zero objects_labels_tf = tf.zeros(tf.shape(objects_labels_prob_tf), dtype=tf.int32) fetches = { 'objects': objects_tf, 'labels': objects_labels_tf, 'probs': objects_labels_prob_tf, 'scale_factor': process_meta['scale_factor'] } # If in debug mode, return the full prediction dictionary. if config.train.debug: fetches['_debug'] = pred_dict elif session is None or fetches is None or image_tensor is None: raise ValueError( 'Either all `session`, `fetches` and `image_tensor` are None, ' 'or neither of them are.') start_time = time.time() fetched = session.run(fetches, feed_dict={image_tensor: np.array(image)}) end_time = time.time() objects = fetched['objects'] objects_labels = fetched['labels'] objects_labels_prob = fetched['probs'] scale_factor = fetched['scale_factor'] objects_labels = objects_labels.tolist() if class_labels is not None: objects_labels = [class_labels[obj] for obj in objects_labels] # Scale objects to original image dimensions objects /= scale_factor objects = objects.tolist() objects_labels_prob = objects_labels_prob.tolist() if total is not None: objects = objects[:total] objects_labels = objects_labels[:total] objects_labels_prob = objects_labels_prob[:total] res = { 'objects': objects, 'objects_labels': objects_labels, 'objects_labels_prob': objects_labels_prob, 'inference_time': end_time - start_time, } if return_tf_vars: res['image_tensor'] = image_tensor res['fetches'] = fetches res['session'] = session return res
def eval(dataset_split, config_files, watch, from_global_step, override_params, files_per_class, iou_threshold, min_probability): """Evaluate models using dataset.""" # If the config file is empty, our config will be the base_config for the # default model. try: config = get_config(config_files, override_params=override_params) except KeyError: raise KeyError('model.type should be set on the custom config.') if not config.train.job_dir: raise KeyError('`job_dir` should be set.') if not config.train.run_name: raise KeyError('`run_name` should be set.') # `run_dir` is where the actual checkpoint and logs are located. run_dir = os.path.join(config.train.job_dir, config.train.run_name) # Only activate debug for if needed for debug visualization mode. if not config.train.debug: config.train.debug = config.eval.image_vis == 'debug' if config.train.debug or config.train.tf_debug: tf.logging.set_verbosity(tf.logging.DEBUG) else: tf.logging.set_verbosity(tf.logging.INFO) # Build the dataset tensors, overriding the default dataset split. config.dataset.split = dataset_split # Disable data augmentation. config.dataset.data_augmentation = [] # Only a single run over the dataset to calculate metrics. config.train.num_epochs = 1 if config.model.network.with_rcnn: config.model.rcnn.proposals.min_prob_threshold = min_probability else: config.model.rpn.proposals.min_prob_threshold = min_probability # Seed setup if config.train.seed: tf.set_random_seed(config.train.seed) # Set pretrained as not training config.model.base_network.trainable = False model_class = get_model(config.model.type) model = model_class(config) dataset_class = get_dataset(config.dataset.type) dataset = dataset_class(config) train_dataset = dataset() train_image = train_dataset['image'] train_objects = train_dataset['bboxes'] train_filename = train_dataset['filename'] # Build the graph of the model to evaluate, retrieving required # intermediate tensors. prediction_dict = model(train_image, train_objects) if config.model.network.with_rcnn: pred = prediction_dict['classification_prediction'] pred_objects = pred['objects'] pred_objects_classes = pred['labels'] pred_objects_scores = pred['probs'] else: # Force the num_classes to 1 config.model.network.num_classes = 1 pred = prediction_dict['rpn_prediction'] pred_objects = pred['proposals'] pred_objects_scores = pred['scores'] # When using only RPN all classes are 0. pred_objects_classes = tf.zeros( (tf.shape(pred_objects_scores)[0],), dtype=tf.int32 ) # Retrieve *all* the losses from the model and calculate their streaming # means, so we get the loss over the whole dataset. batch_losses = model.loss(prediction_dict, return_all=True) losses = {} for loss_name, loss_tensor in batch_losses.items(): loss_mean, _ = tf.metrics.mean( loss_tensor, name=loss_name, metrics_collections='metrics', updates_collections='metric_ops', ) full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name) losses[full_loss_name] = loss_mean metric_ops = tf.get_collection('metric_ops') init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) # Using a global saver instead of the one for the model. saver = tf.train.Saver(sharded=True, allow_empty=True) # Aggregate the required ops to evaluate into a dict.. ops = { 'init_op': init_op, 'metric_ops': metric_ops, 'pred_objects': pred_objects, 'pred_objects_classes': pred_objects_classes, 'pred_objects_scores': pred_objects_scores, 'train_objects': train_objects, 'losses': losses, 'prediction_dict': prediction_dict, 'filename': train_filename, 'train_image': train_image } metrics_scope = '{}_metrics'.format(dataset_split) # Use global writer for all checkpoints. We don't want to write different # files for each checkpoint. writer = tf.summary.FileWriter(run_dir) files_to_visualize = {} last_global_step = from_global_step while True: # Get the checkpoint files to evaluate. try: checkpoints = get_checkpoints( run_dir, last_global_step, last_only=not watch ) except ValueError as e: if not watch: tf.logging.error('Missing checkpoint.') raise e tf.logging.warning( 'Missing checkpoint; Checking again in a moment') time.sleep(5) continue for checkpoint in checkpoints: # Always returned in order, so it's safe to assign directly. tf.logging.info( 'Evaluating global_step {} using checkpoint \'{}\''.format( checkpoint['global_step'], checkpoint['file'] ) ) try: start = time.time() evaluate_once( config, writer, saver, ops, checkpoint, metrics_scope=metrics_scope, image_vis=config.eval.image_vis, files_per_class=files_per_class, files_to_visualize=files_to_visualize, iou_threshold=iou_threshold, min_probability=min_probability ) last_global_step = checkpoint['global_step'] tf.logging.info('Evaluated in {:.2f}s'.format( time.time() - start )) except tf.errors.NotFoundError: # The checkpoint is not ready yet. It was written in the # checkpoints file, but it still hasn't been completely saved. tf.logging.info( 'Checkpoint {} is not ready yet. ' 'Checking again in a moment.'.format( checkpoint['file'] ) ) time.sleep(5) continue # If no watching was requested, finish the execution. if not watch: return # Sleep for a moment and check for new checkpoints. tf.logging.info('All checkpoints evaluated; sleeping for a moment') time.sleep(5)