Exemplos de get_dataset em Python, exemplos de luminoth.datasets.get_dataset em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: predicting.py Projeto: myelintek/luminoth

    def __init__(self, config):

        if config.dataset.dir:
            # Gets the names of the classes
            classes_file = os.path.join(config.dataset.dir, 'classes.json')
            if tf.gfile.Exists(classes_file):
                self.class_labels = json.load(tf.gfile.GFile(classes_file))
            else:
                self.class_labels = None

        # Don't use data augmentation in predictions
        config.dataset.data_augmentation = None

        dataset_class = get_dataset(config.dataset.type)
        model_class = get_model(config.model.type)
        dataset = dataset_class(config)
        model = model_class(config)

        graph = tf.Graph()
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        self.session = tf.Session(config=tf_config, graph=graph)

        with graph.as_default():
            self.image_placeholder = tf.placeholder(
                tf.float32, (None, None, 3)
            )
            image_tf, _, process_meta = dataset.preprocess(
                self.image_placeholder
            )
            pred_dict = model(image_tf)

            # Restore checkpoint
            if config.train.job_dir:
                job_dir = config.train.job_dir
                if config.train.run_name:
                    job_dir = os.path.join(job_dir, config.train.run_name)
                ckpt = tf.train.get_checkpoint_state(job_dir)
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError('Could not find checkpoint in {}.'.format(
                        job_dir
                    ))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(self.session, ckpt)
                tf.logging.info('Loaded checkpoint.')
            else:
                # A prediction without checkpoint is just used for testing
                tf.logging.warning(
                    'Could not load checkpoint. Using initialized model.')
                init_op = tf.group(
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                )
                self.session.run(init_op)

            if config.model.type == 'ssd':
                cls_prediction = pred_dict['classification_prediction']
                objects_tf = cls_prediction['objects']
                objects_labels_tf = cls_prediction['labels']
                objects_labels_prob_tf = cls_prediction['probs']
            elif config.model.type == 'fasterrcnn':
                if config.model.network.get('with_rcnn', False):
                    cls_prediction = pred_dict['classification_prediction']
                    objects_tf = cls_prediction['objects']
                    objects_labels_tf = cls_prediction['labels']
                    objects_labels_prob_tf = cls_prediction['probs']
                else:
                    rpn_prediction = pred_dict['rpn_prediction']
                    objects_tf = rpn_prediction['proposals']
                    objects_labels_prob_tf = rpn_prediction['scores']
                    # All labels without RCNN are zero
                    objects_labels_tf = tf.zeros(
                        tf.shape(objects_labels_prob_tf), dtype=tf.int32
                    )
            else:
                raise ValueError(
                    "Model type '{}' not supported".format(config.model.type)
                )

            self.fetches = {
                'objects': objects_tf,
                'labels': objects_labels_tf,
                'probs': objects_labels_prob_tf,
                'scale_factor': process_meta['scale_factor']
            }

            # If in debug mode, return the full prediction dictionary.
            if config.train.debug:
                self.fetches['_debug'] = pred_dict

Exemplo n.º 2

0

Exibir arquivo

Arquivo: eval.py Projeto: czbiohub/luminoth

def eval(
    dataset_split,
    config_files,
    watch,
    from_global_step,
    override_params,
    files_per_class,
    max_detections,
):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError("model.type should be set on the custom config.")

    if not config.train.job_dir:
        raise KeyError("`job_dir` should be set.")
    if not config.train.run_name:
        raise KeyError("`run_name` should be set.")

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == "debug"

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split

    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Attempt to get class names, if available.
    classes_file = os.path.join(config.dataset.dir, "classes.json")
    if tf.gfile.Exists(classes_file):
        class_labels = json.load(tf.gfile.GFile(classes_file))
    else:
        class_labels = None

    if config.model.type == "fasterrcnn":
        # Override max detections with specified value.
        if config.model.network.with_rcnn:
            config.model.rcnn.proposals.total_max_detections = max_detections
        else:
            config.model.rpn.proposals.post_nms_top_n = max_detections

        # Also overwrite `min_prob_threshold` in order to use all detections.
        config.model.rcnn.proposals.min_prob_threshold = 0.0
    elif config.model.type == "ssd":
        config.model.proposals.total_max_detections = max_detections
        config.model.proposals.min_prob_threshold = 0.0
    else:
        raise ValueError("Model type '{}' not supported".format(
            config.model.type))

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup.
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training.
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset["image"]
    train_objects = train_dataset["bboxes"]
    train_filename = train_dataset["filename"]

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.type == "ssd" or config.model.network.with_rcnn:
        pred = prediction_dict["classification_prediction"]
        pred_objects = pred["objects"]
        pred_objects_classes = pred["labels"]
        pred_objects_scores = pred["probs"]
    else:
        # Force the num_classes to 1.
        config.model.network.num_classes = 1

        pred = prediction_dict["rpn_prediction"]
        pred_objects = pred["proposals"]
        pred_objects_scores = pred["scores"]
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros((tf.shape(pred_objects_scores)[0], ),
                                        dtype=tf.int32)

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor,
            name=loss_name,
            metrics_collections="metrics",
            updates_collections="metric_ops",
        )
        full_loss_name = "{}_losses/{}".format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection("metric_ops")

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict.
    ops = {
        "init_op": init_op,
        "metric_ops": metric_ops,
        "pred_objects": pred_objects,
        "pred_objects_classes": pred_objects_classes,
        "pred_objects_scores": pred_objects_scores,
        "train_objects": train_objects,
        "losses": losses,
        "prediction_dict": prediction_dict,
        "filename": train_filename,
        "train_image": train_image,
    }

    metrics_scope = "{}_metrics".format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(run_dir,
                                          last_global_step,
                                          last_only=not watch)
        except ValueError as e:
            if not watch:
                tf.logging.error("Missing checkpoint.")
                raise e

            tf.logging.warning(
                "Missing checkpoint; Checking again in a moment")
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                "Evaluating global_step {} using checkpoint '{}'".format(
                    checkpoint["global_step"], checkpoint["file"]))
            try:
                start = time.time()
                evaluate_once(
                    config,
                    writer,
                    saver,
                    ops,
                    checkpoint,
                    class_labels=class_labels,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                )
                last_global_step = checkpoint["global_step"]
                tf.logging.info("Evaluated in {:.2f}s".format(time.time() -
                                                              start))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info("Checkpoint {} is not ready yet. "
                                "Checking again in a moment.".format(
                                    checkpoint["file"]))
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info("All checkpoints evaluated; sleeping for a moment")
        time.sleep(5)

Exemplo n.º 3

0

Exibir arquivo

def evaluate(dataset_split, config_files, job_dir, watch,
             from_global_step, override_params, files_per_class):
    """
    Evaluate models using dataset.
    """
    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    config.train.job_dir = job_dir or config.train.job_dir

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(
        train_image, train_objects
    )

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(config.train.job_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(config, last_global_step)
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a minute')
            time.sleep(60)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a minute.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(60)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a minute and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a minute')
        time.sleep(60)

Exemplo n.º 4

0

Exibir arquivo

def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs,
                       src_mu_lab=None, src_sigma_lab=None, debug=False):

    # =========================================================================
    # ======================= Tile Loading ====================================
    # =========================================================================
    print('\n>> Loading Tile ... \n')

    csv_dict = {}

    csv_dict['PreparationTime'] = []
    csv_dict['ColorDeconvTime'] = []
    csv_dict['TotalTileLoadingTime'] = []

    csv_dict['CKPTLoadingTime'] = []
    csv_dict['ModelInfernceTime'] = []
    csv_dict['DetectionTime'] = []

    csv_dict['ROIShape'] = []
    csv_dict['ObjectsDict'] = []
    csv_dict['NumObjects'] = []

    csv_dict['AnnotationWritingTime'] = []

    csv_dict['AnnotationDict'] = []
    csv_dict['AnalysisDict'] = []

    start_time = time.time()
    total_tileloading_start_time = time.time()

    ts = large_image.getTileSource(slide_path)
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)
    im_tile = tile_info['tile'][:, :, :3]
    csv_dict['ROIShape'] = im_tile.shape[:2]

    prep_time = time.time() - start_time
    csv_dict['PreparationTime'] = round(prep_time, 3)

    # =========================================================================
    # =================Img Normalization & Color Deconv========================
    # =========================================================================
    print('\n>> Color Deconvolving ... \n')
    start_time = time.time()

    im_nmzd = htk_cnorm.reinhard(
        im_tile,
        REFERENCE_MU_LAB,
        REFERENCE_STD_LAB,
        src_mu=src_mu_lab,
        src_sigma=src_sigma_lab
    )

    # perform color decovolution
    if args.deconv_method == 'ruifrok':

        w = cli_utils.get_stain_matrix(args)
        im_stains = htk_cdeconv.color_deconvolution(
            im_nmzd, w).Stains.astype(np.float)[:, :, :2]

    elif args.deconv_method == 'macenko':

        w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255)
        im_stains = htk_cdeconv.color_deconvolution(
            im_tile, w_est, 255).Stains.astype(np.float)
        ch1 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_1], w_est)
        ch2 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_2], w_est)
        im_stains = im_stains[:, :, [ch1, ch2]]

    else:

        raise ValueError('Invalid deconvolution method parameter.')

    # =========================================================================
    # ====================== Fuse the stain1 & stain2 pix======================
    # =========================================================================

    # compute nuclear foreground mask
    im_fgnd_mask_stain_1 = im_stains[
        :, :, 0] < threshold_yen(im_stains[:, :, 0])
    im_fgnd_mask_stain_2 = im_stains[
        :, :, 1] < threshold_yen(im_stains[:, :, 1])
    im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2

    # segment nuclei
    im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2))
    print('---> Fusing 2 Stains')
    deconv_time = time.time() - start_time
    csv_dict['ColorDeconvTime'] = round(deconv_time, 3)

    # =========================================================================
    # ================= Nuclie Detection Deep Learning Block ==================
    # =========================================================================

    total_tileloading_time = time.time() - total_tileloading_start_time
    csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3)

    start_time = time.time()

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = args.max_det
    config.model.rcnn.proposals.min_prob_threshold = args.min_prob
    im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1)

    # ====================================================================================================================================
    tf.reset_default_graph()

    dataset_class = get_dataset('object_detection')
    model_class = get_model('fasterrcnn')
    dataset = dataset_class(config)
    model = model_class(config)

    graph = tf.Graph()
    session = tf.Session(graph=graph)

    with graph.as_default():
        image_placeholder = tf.placeholder(
            tf.float32, (None, None, 3), name='Input_Placeholder'
        )
        pred_dict = model(image_placeholder)

        ckpt_loading_start_time = time.time()

        saver = tf.train.Saver(sharded=True, allow_empty=True)
        saver.restore(session, CKPT_DIR)
        tf.logging.info('Loaded checkpoint.')

        ckpt_loading_time = time.time() - ckpt_loading_start_time
        csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3)

        inference_start_time = time.time()

        cls_prediction = pred_dict['classification_prediction']
        objects_tf = cls_prediction['objects']
        objects_labels_tf = cls_prediction['labels']
        objects_labels_prob_tf = cls_prediction['probs']

        fetches = {
            'objects': objects_tf,
            'labels': objects_labels_tf,
            'probs': objects_labels_prob_tf,
        }

        fetched = session.run(fetches, feed_dict={
            image_placeholder: np.array(im_nuc_det_input)
        })

        inference_time = time.time() - inference_start_time
        csv_dict['ModelInfernceTime'] = round(inference_time, 3)

        objects = fetched['objects']
        labels = fetched['labels'].tolist()
        probs = fetched['probs'].tolist()

        # Cast to int to consistently return the same type in Python 2 and 3
        objects = [
            [int(round(coord)) for coord in obj]
            for obj in objects.tolist()
        ]

        predictions = sorted([
            {
                'bbox': obj,
                'label': label,
                'prob': round(prob, 4),
            } for obj, label, prob in zip(objects, labels, probs)
        ], key=lambda x: x['prob'], reverse=True)

    print('\n>> Finishing Detection ... \n')
    print('***** Number of Detected Cells ****** : ', len(predictions))
    detection_time = time.time() - start_time
    csv_dict['DetectionTime'] = round(detection_time, 3)
    csv_dict['NumObjects'] = len(predictions)
    csv_dict['ObjectsDict'] = predictions

    # =========================================================================
    # ======================= TODO: Implement border deletion =================
    # =========================================================================

    # =========================================================================
    # ======================= Write Annotations ===============================
    # =========================================================================

    start_time = time.time()

    objects_df = pd.DataFrame(objects)
    formatted_annot_list,\
        formatter_analysis_list = cli_utils.convert_preds_to_utilformat(
            objects_df,
            probs,
            args.ignore_border_nuclei,
            im_tile_size=args.analysis_tile_size)

    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        formatted_annot_list, tile_info, args.nuclei_annotation_format)
    csv_dict['AnnotationDict'] = nuclei_annot_list

    csv_dict['AnalysisDict'] = formatter_analysis_list

    num_nuclei = len(nuclei_annot_list)

    anot_time = time.time() - start_time
    csv_dict['AnnotationWritingTime'] = round(anot_time, 3)

    return csv_dict

Exemplo n.º 5

0

Exibir arquivo

def get_prediction(image,
                   config,
                   total=None,
                   session=None,
                   fetches=None,
                   image_tensor=None,
                   class_labels=None,
                   return_tf_vars=False):
    """
    Gets the prediction given by the model `model_type` of the image `image`.
    If a checkpoint exists in the job's directory, load it.
    The names of the classes will be obtained from the dataset directory.
    Returns a dictionary with the objects, their labels and probabilities,
    the inference time and the scale factor. Also if the `return_tf_vars` is
    True, returns the image tensor, the entire prediction of the model and
    the sesssion.
    """

    if session is None and fetches is None and image_tensor is None:
        # Don't use data augmentation in predictions
        config.dataset.data_augmentation = None

        dataset_class = get_dataset(config.dataset.type)
        model_class = get_model(config.model.type)
        dataset = dataset_class(config)
        model = model_class(config)

        graph = tf.Graph()
        session = tf.Session(graph=graph)

        with graph.as_default():
            image_tensor = tf.placeholder(tf.float32, (None, None, 3))
            image_tf, _, process_meta = dataset.preprocess(image_tensor)
            pred_dict = model(image_tf)

            # Restore checkpoint
            if config.train.job_dir:
                job_dir = config.train.job_dir
                if config.train.run_name:
                    job_dir = os.path.join(job_dir, config.train.run_name)
                ckpt = tf.train.get_checkpoint_state(job_dir)
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError(
                        'Could not find checkpoint in {}.'.format(job_dir))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(session, ckpt)
                tf.logging.info('Loaded checkpoint.')
            else:
                # A prediction without checkpoint is just used for testing
                tf.logging.warning(
                    'Could not load checkpoint. Using initialized model.')
                init_op = tf.group(tf.global_variables_initializer(),
                                   tf.local_variables_initializer())
                session.run(init_op)

            if config.model.network.with_rcnn:
                cls_prediction = pred_dict['classification_prediction']
                objects_tf = cls_prediction['objects']
                objects_labels_tf = cls_prediction['labels']
                objects_labels_prob_tf = cls_prediction['probs']
            else:
                rpn_prediction = pred_dict['rpn_prediction']
                objects_tf = rpn_prediction['proposals']
                objects_labels_prob_tf = rpn_prediction['scores']
                # All labels without RCNN are zero
                objects_labels_tf = tf.zeros(tf.shape(objects_labels_prob_tf),
                                             dtype=tf.int32)

            fetches = {
                'objects': objects_tf,
                'labels': objects_labels_tf,
                'probs': objects_labels_prob_tf,
                'scale_factor': process_meta['scale_factor']
            }

            # If in debug mode, return the full prediction dictionary.
            if config.train.debug:
                fetches['_debug'] = pred_dict

    elif session is None or fetches is None or image_tensor is None:
        raise ValueError(
            'Either all `session`, `fetches` and `image_tensor` are None, '
            'or neither of them are.')

    start_time = time.time()
    fetched = session.run(fetches, feed_dict={image_tensor: np.array(image)})
    end_time = time.time()

    objects = fetched['objects']
    objects_labels = fetched['labels']
    objects_labels_prob = fetched['probs']
    scale_factor = fetched['scale_factor']

    objects_labels = objects_labels.tolist()

    if class_labels is not None:
        objects_labels = [class_labels[obj] for obj in objects_labels]

    # Scale objects to original image dimensions
    objects /= scale_factor

    objects = objects.tolist()
    objects_labels_prob = objects_labels_prob.tolist()

    if total is not None:
        objects = objects[:total]
        objects_labels = objects_labels[:total]
        objects_labels_prob = objects_labels_prob[:total]

    res = {
        'objects': objects,
        'objects_labels': objects_labels,
        'objects_labels_prob': objects_labels_prob,
        'inference_time': end_time - start_time,
    }

    if return_tf_vars:
        res['image_tensor'] = image_tensor
        res['fetches'] = fetches
        res['session'] = session

    return res

Exemplo n.º 6

0

Exibir arquivo

Arquivo: eval.py Projeto: quan821223/pulmonary-nodules-MaskRCNN

def eval(dataset_split, config_files, watch, from_global_step,
         override_params, files_per_class, iou_threshold, min_probability):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    if not config.train.job_dir:
        raise KeyError('`job_dir` should be set.')
    if not config.train.run_name:
        raise KeyError('`run_name` should be set.')

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    if config.model.network.with_rcnn:
        config.model.rcnn.proposals.min_prob_threshold = min_probability
    else:
        config.model.rpn.proposals.min_prob_threshold = min_probability

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(
                run_dir, last_global_step, last_only=not watch
            )
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a moment')
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                    iou_threshold=iou_threshold,
                    min_probability=min_probability
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a moment.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a moment')
        time.sleep(5)