def __init__(
        self,
        dataset_loader: str,
        dataset_path: str,
        postures_generator: Optional[Generator] = None,
        video_name: str = None,
        **kwargs
    ):
        resize_options = ResizeOptions(**kwargs)
        dataset = load_dataset(dataset_loader, dataset_path, resize_options=resize_options, **kwargs)

        if postures_generator is None:
            postures_generator = PosturesModel().generate()
        if video_name is None:
            video_name = dataset.video_names[0]

        features = dataset.features_dataset[video_name]
        self.skeletons = features.skeletons
        self.measurements = features.measurements

        self.output_image_shape = dataset.image_shape

        self.synthetic_dataset = SyntheticDataset(
            frame_preprocessing=dataset.frame_preprocessing,
            output_image_shape=self.output_image_shape,
            enable_random_augmentations=False,
        )
        skel_is_not_nan = ~np.any(np.isnan(self.skeletons), axis=(1, 2))
        self.labelled_indexes = np.where(skel_is_not_nan)[0]
        if len(self.labelled_indexes) == 0:
            raise ValueError("No template frames found in the dataset, can't generate synthetic images.")
        self.frames_dataset = dataset.frames_dataset
        self.video_name = video_name
        self.postures_generator = postures_generator
    def __init__(self, dataset_loader: str, dataset_path: str, video_name=None, **kwargs):
        resize_options = ResizeOptions(**kwargs)
        dataset = load_dataset(dataset_loader, dataset_path, resize_options=resize_options)

        self.video_name = video_name if video_name is not None else dataset.video_names[0]
        self.real_dataset = RealDataset(
            frame_preprocessing=dataset.frame_preprocessing,
            output_image_shape=dataset.image_shape,
        )
        self.frames_dataset = dataset.frames_dataset
示例#3
0
def visualize(dataset_path: str, **kwargs):
    """
    Export prediction results as videos with a centerline overlay on top of the original images

    :param dataset_path: Root path of the dataset containing videos of worm
    """
    args = _parse_arguments(dataset_path, kwargs)

    results_dir = os.path.join(args.experiment_dir, default_paths.RESULTS_DIR)
    config = load_config(args.config)

    dataset = load_dataset(config.dataset_loader,
                           dataset_path,
                           selected_video_names=args.video_names)

    visualizer = _Visualizer(
        dataset=dataset,
        draw_original=args.draw_original,
        temp_dir=args.temp_dir,
        results_dir=results_dir,
    )

    for video_name in dataset.video_names:

        results_file = os.path.join(results_dir, video_name, args.results_file)
        if not os.path.exists(results_file):
            logger.error(
                f"No results file to analyze, file not found: '{results_file}'"
            )
            continue

        with h5py.File(results_file, "r") as f:
            if args.group_name not in f:
                logger.error(
                    f"Field: '{args.group_name}' not found in file: '{results_file}', "
                    f"can't visualize results.")
                continue

            group = f[args.group_name]
            scores = group["scores"][:]
            skeletons = group["skeletons"][:]

        visualizer.export_to_images(video_name=video_name,
                                    results_scores=scores,
                                    results_skel=skeletons)

    # cleanup
    shutil.rmtree(args.temp_dir)
示例#4
0
def calibrate(dataset_loader: str, dataset_path: str, **kwargs):
    """
    Calculate the image score for a certain number of labelled frames in the dataset,
    this will give an indication on choosing the image similarity threshold when predicting all frames in the dataset.

    :param dataset_loader: Name of the dataset loader, for example "tierpsy"
    :param dataset_path: Root path of the dataset containing videos of worm
    """
    _log_parameters(logger.info, {
        "dataset_loader": dataset_loader,
        "dataset_path": dataset_path
    })
    args = _parse_arguments(kwargs)

    random.seed(args.random_seed)
    np.random.seed(args.random_seed)

    dataset_name = get_dataset_name(dataset_path)
    experiment_dir = os.path.join(args.work_dir, dataset_name)
    calibration_results_dir = os.path.join(
        experiment_dir, default_paths.CALIBRATION_RESULTS_DIR)
    os.makedirs(calibration_results_dir, exist_ok=True)

    dataset = load_dataset(
        dataset_loader,
        dataset_path,
        selected_video_names=args.video_names,
        **vars(args),
    )

    calibrator = _Calibrator(
        dataset=dataset,
        results_dir=calibration_results_dir,
        image_shape=dataset.image_shape,
        num_samples=args.num_samples,
        theta_dims=args.theta_dims,
    )

    writer = _ImagesAndScoresWriter(
    ) if kwargs["save_images"] else _ScoresWriter()

    for video_name in dataset.video_names:
        results_file = calibrator(video_name=video_name, writer=writer)
        yield video_name, results_file
示例#5
0
def save_results(dataset_loader, dataset_path, results_root_dir):

    dataset = load_dataset(dataset_loader, dataset_path)

    all_scores = []
    all_theta = []

    for video_name in sorted(os.listdir(results_root_dir)):
        results_file = os.path.join(results_root_dir, video_name, "results.h5")

        features = dataset.features_dataset[video_name]
        timestamp = features.timestamp

        with h5py.File(results_file, "r") as f:
            scores = f["unaligned"]["scores"][:]
            thetas = f["unaligned"]["theta"][:]
            max_scores = np.argmax(scores, axis=1)
            results_scores = scores[np.arange(scores.shape[0]), max_scores]
            results_theta = thetas[np.arange(thetas.shape[0]), max_scores]

        non_resampled_scores = []
        non_resampled_theta = []
        for cur_time, (score,
                       theta) in enumerate(zip(results_scores, results_theta)):

            frame_index = np.where(timestamp == cur_time)[0]
            if len(frame_index) == 0:
                continue
            cur_frame_index = frame_index[0]
            non_resampled_scores.append(score)
            non_resampled_theta.append(theta)

        all_scores.append(non_resampled_scores)
        all_theta.append(non_resampled_theta)

        print(video_name, len(non_resampled_scores))

    all_scores = np.concatenate(all_scores)
    all_theta = np.concatenate(all_theta)
    print(len(all_scores))

    np.savetxt("all_scores.txt", all_scores)
    np.save("all_theta.npy", all_theta)
示例#6
0
def evaluate(dataset_path: str, **kwargs):
    """
    Evaluate a trained model by predicting synthetic data and recording the image similarity

    :param dataset_path: Root path of the dataset containing videos of worm
    """
    args = _parse_arguments(dataset_path, kwargs)

    mp.set_start_method("spawn", force=True)

    random.seed(args.random_seed)
    np.random.seed(args.random_seed)

    results_dir = os.path.join(args.experiment_dir, "evaluation")
    os.makedirs(results_dir, exist_ok=True)

    config = load_config(args.config)
    eigenworms_matrix = load_eigenworms_matrix(args.eigenworms_matrix_path)

    dataset = load_dataset(
        dataset_loader=config.dataset_loader,
        dataset_path=dataset_path,
        selected_video_names=args.video_names,
        resize_options=ResizeOptions(resize_factor=config.resize_factor),
        **{WORM_IS_LIGHTER: config.worm_is_lighter},
    )

    pkl_filenames = _generate_synthetic_data(
        dataset,
        args.num_process,
        args.num_samples,
        args.postures_generation,
        args.temp_dir,
        args.random_seed,
    )

    keras_model = tf.keras.models.load_model(args.model_path, compile=False)

    tf_dataset = tf.data.Dataset.from_generator(
        partial(_eval_data_gen, pkl_filenames),
        tf.float32,
        tf.TensorShape(dataset.image_shape + (1, )),
    ).batch(args.batch_size)

    network_predictions = keras_model.predict(tf_dataset)[:args.num_samples]
    shuffled_results = ShuffledResults(random_theta=network_predictions)

    ResultsScoring(
        frame_preprocessing=dataset.frame_preprocessing,
        num_process=args.num_process,
        temp_dir=args.temp_dir,
        image_shape=dataset.image_shape,
    )(
        results=shuffled_results,
        scoring_data_manager=_ScoringDataManager(pkl_filenames),
    )
    # Keep the maximum score between the two head/tail options for this evaluation
    image_scores = np.max(shuffled_results.scores, axis=1)

    # Now calculate the angle error and mode error
    angle_error = []
    modes_error = []
    theta_predictions = []
    _, theta_labels = _load_templates(pkl_filenames)
    for theta_label, theta_results in zip(theta_labels,
                                          shuffled_results.theta):
        dists = [
            angle_distance(theta_result, theta_label)
            for theta_result in theta_results
        ]
        closest_index = int(np.argmin(dists))
        closest_theta = theta_results[closest_index]
        theta_predictions.append(closest_theta)
        angle_error.append(dists[closest_index])
        if eigenworms_matrix is not None:
            modes_label = theta_to_modes(theta_label, eigenworms_matrix)
            modes_prediction = theta_to_modes(closest_theta, eigenworms_matrix)
            mode_error = np.abs(modes_label - modes_prediction)
            modes_error.append(mode_error)

    np.savetxt(os.path.join(results_dir, "image_score.txt"), image_scores)
    np.savetxt(os.path.join(results_dir, "angle_error.txt"), angle_error)
    np.savetxt(os.path.join(results_dir, "theta_labels.txt"), theta_labels)
    np.savetxt(os.path.join(results_dir, "theta_predictions.txt"),
               theta_predictions)
    if eigenworms_matrix is not None:
        np.savetxt(os.path.join(results_dir, "modes_error.txt"), modes_error)

    logger.info(
        f"Evaluated model with synthetic data,"
        f" average image similarity: {np.mean(image_scores):.4f},"
        f" average angle error (degrees): {np.rad2deg(np.mean(angle_error)):.2f}"
    )
示例#7
0
def post_process(dataset_path: str, **kwargs):
    """
    Process the raw network results with interpolation and smoothing

    :param dataset_path: Root path of the dataset containing videos of worm
    """
    args = _parse_arguments(dataset_path, kwargs)

    results_root_dir = os.path.join(args.experiment_dir,
                                    default_paths.RESULTS_DIR)

    eigenworms_matrix = load_eigenworms_matrix(args.eigenworms_matrix_path)

    config = load_config(args.config)

    dataset = load_dataset(config.dataset_loader, dataset_path)

    spline_interpolation = _SplineInterpolation()

    results_files = list(
        sorted(glob.glob(os.path.join(results_root_dir, "*",
                                      RESULTS_FILENAME))))
    if len(results_files) == 0:
        raise FileNotFoundError("No results file to analyze was found")

    for results_file in results_files:
        video_name = os.path.basename(os.path.dirname(results_file))

        with h5py.File(results_file, "r") as results_f:

            try:
                results_raw = BaseResults(
                    theta=results_f["resolved"]["theta"][:],
                    skeletons=results_f["resolved"]["skeletons"][:],
                    scores=results_f["resolved"]["scores"][:],
                )
            except Exception:
                logger.error(f"Couldn't read results in file {results_file}.")
                continue

            results_orig = OriginalResults(
                theta=results_f["original"]["theta"][:],
                skeletons=results_f["original"]["skeletons"][:])

            features = dataset.features_dataset[video_name]

            missing_values = np.any(np.isnan(results_raw.theta), axis=1)
            if missing_values.sum() == len(results_raw.theta):
                logger.warning(
                    f"No valid result was found, stopping postprocessing for {video_name}"
                )
                continue

            segments_boundaries = _get_valid_segments(
                is_valid_series=~missing_values,
                max_gap_size=args.max_gap_size,
                min_segment_size=args.min_segment_size,
            )
            # interpolate and smooth in angles space
            thetas_interp = spline_interpolation.interpolate_tseries(
                results_raw.theta, segments_boundaries, args.std_fraction)
            results_interp = _calculate_skeleton(thetas_interp, args, dataset,
                                                 video_name)

            thetas_smooth = _smooth_tseries(
                thetas_interp,
                args.smoothing_window,
                args.poly_order,
                segments_boundaries,
            )
            results_smooth = _calculate_skeleton(thetas_smooth, args, dataset,
                                                 video_name)

            flipped = False

            if features.ventral_side == "clockwise":
                results_orig.theta = _dorsal_ventral_flip_theta(
                    results_orig.theta)
                results_raw.theta = _dorsal_ventral_flip_theta(
                    results_raw.theta)
                results_interp.theta = _dorsal_ventral_flip_theta(
                    results_interp.theta)
                results_smooth.theta = _dorsal_ventral_flip_theta(
                    results_smooth.theta)
                flipped = True

            if eigenworms_matrix is not None:
                setattr(
                    results_orig, "modes",
                    _thetas_to_modes(results_orig.theta, eigenworms_matrix))
                setattr(results_raw, "modes",
                        _thetas_to_modes(results_raw.theta, eigenworms_matrix))
                setattr(
                    results_interp, "modes",
                    _thetas_to_modes(results_interp.theta, eigenworms_matrix))
                setattr(
                    results_smooth, "modes",
                    _thetas_to_modes(results_smooth.theta, eigenworms_matrix))

        # save results
        results_saver = ResultsSaver(
            temp_dir=args.temp_dir,
            results_root_dir=results_root_dir,
            results_filename=POSTPROCESSED_RESULTS_FILENAME)

        metadata = {
            "max_gap_size": args.max_gap_size,
            "min_segment_size": args.min_segment_size,
            "smoothing_window": args.smoothing_window,
            "poly_order": args.poly_order,
            "std_fraction": args.std_fraction,
            "dorsal_ventral_flip": flipped,
        }

        results_saver.save(
            results={
                "orig": results_orig,
                "raw": results_raw,
                "interp": results_interp,
                "smooth": results_smooth
            },
            metadata=metadata,
            video_name=video_name,
        )
        logger.info(
            f"Post-processed worm: {video_name} {'(flipped dorsal-ventral)' if flipped else ''}"
        )
示例#8
0
def predict(dataset_path: str, **kwargs):
    """
    Use a trained model to predict the centerlines of worm for videos in a dataset

    :param dataset_path: Root path of the dataset containing videos of worm
    """
    args = _parse_arguments(dataset_path, kwargs)

    mp.set_start_method("spawn", force=True)

    if args.random_seed is not None:
        os.environ["TF_DETERMINISTIC_OPS"] = "1"
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)
        tf.random.set_seed(args.random_seed)

    results_root_dir = os.path.join(args.experiment_dir,
                                    default_paths.RESULTS_DIR)
    os.makedirs(results_root_dir, exist_ok=True)

    config = load_config(args.config)

    dataset = load_dataset(
        dataset_loader=config.dataset_loader,
        dataset_path=dataset_path,
        selected_video_names=args.video_names,
        resize_options=ResizeOptions(resize_factor=config.resize_factor),
    )

    keras_model = tf.keras.models.load_model(args.model_path, compile=False)

    results_saver = ResultsSaver(temp_dir=args.temp_dir,
                                 results_root_dir=results_root_dir,
                                 results_filename=RESULTS_FILENAME)

    tf_dataset_maker = _make_tf_dataset(
        data_generator=PredictDataGenerator(
            dataset=dataset,
            num_process=args.num_process,
            temp_dir=args.temp_dir,
            image_shape=config.image_shape,
            batch_size=args.batch_size,
        ),
        batch_size=args.batch_size,
        image_shape=config.image_shape,
    )

    results_scoring = ResultsScoring(
        frame_preprocessing=dataset.frame_preprocessing,
        num_process=args.num_process,
        temp_dir=args.temp_dir,
        image_shape=config.image_shape,
    )
    predictor = _Predictor(results_scoring=results_scoring,
                           keras_model=keras_model)

    for video_name in dataset.video_names:
        logger.info(f'Processing video: "{video_name}"')
        features = dataset.features_dataset[video_name]

        template_indexes = features.labelled_indexes
        if len(template_indexes) == 0:
            logger.error(
                f"Can't calculate image metric, there is no labelled frame in the video to use as a template, "
                f"stopping analysis for {video_name}.")
            continue

        original_results, shuffled_results = predictor(
            input_frames=tf_dataset_maker(video_name),
            num_frames=dataset.num_frames(video_name),
            features=features,
            scoring_data_manager=ScoringDataManager(
                video_name=video_name,
                frames_dataset=dataset.frames_dataset,
                features=features,
            ),
        )

        results = {"original": original_results, "unaligned": shuffled_results}
        if _can_resolve_results(
                shuffled_results,
                video_name=video_name,
                score_threshold=args.score_threshold,
        ):
            final_results = resolve_head_tail(
                shuffled_results=shuffled_results,
                original_results=original_results,
                frame_rate=features.frame_rate,
                score_threshold=args.score_threshold,
            )
            results["resolved"] = final_results
            _apply_resize_factor(results["resolved"], config.resize_factor)

        _apply_resize_factor(results["unaligned"], config.resize_factor)

        results_saver.save(results=results, video_name=video_name)

    # cleanup
    shutil.rmtree(args.temp_dir)
def generate(dataset_loader: str, dataset_path: str, **kwargs):
    """
    Generate synthetic images (training data) and processed real images (evaluation data)
    and save them to TFrecord files using multiprocessing

    :param dataset_loader: Name of the dataset loader, for example "tierpsy"
    :param dataset_path: Root path of the dataset containing videos of worm
    """
    _log_parameters(logger.info, {
        "dataset_loader": dataset_loader,
        "dataset_path": dataset_path
    })
    args = _parse_arguments(kwargs)

    mp.set_start_method("spawn", force=True)

    random.seed(args.random_seed)
    np.random.seed(args.random_seed)

    # setup folders
    if not os.path.exists(args.work_dir):
        os.mkdir(args.work_dir)
    experiment_dir = os.path.join(args.work_dir,
                                  get_dataset_name(dataset_path))
    os.makedirs(experiment_dir, exist_ok=True)
    tfrecords_dataset_root = os.path.join(experiment_dir,
                                          default_paths.TRAINING_DATA_DIR)
    if os.path.exists(tfrecords_dataset_root):
        shutil.rmtree(tfrecords_dataset_root)

    dataset = load_dataset(
        dataset_loader=dataset_loader,
        dataset_path=dataset_path,
        resize_options=args.resize_options,
        selected_video_names=args.video_names,
    )

    start = time.time()
    synthetic_data_generator = SyntheticDataGenerator(
        num_process=args.num_process,
        temp_dir=args.temp_dir,
        dataset=dataset,
        postures_generation_fn=args.postures_generation,
        enable_random_augmentations=True,
        writer=TfrecordLabeledDataWriter,
        random_seed=args.random_seed,
    )
    gen = synthetic_data_generator.generate(
        num_samples=args.num_train_samples,
        file_pattern=os.path.join(args.temp_dir, SYNTH_TRAIN_DATASET_NAMES),
    )
    for progress in gen:
        yield progress
    yield 1.0

    theta_dims = len(next(args.postures_generation()))
    num_eval_samples = eval_data_generator.generate(
        dataset=dataset,
        num_samples=args.num_eval_samples,
        theta_dims=theta_dims,
        file_pattern=os.path.join(args.temp_dir, REAL_EVAL_DATASET_NAMES),
    )

    shutil.copytree(args.temp_dir, tfrecords_dataset_root)
    save_config(
        ExperimentConfig(
            dataset_loader=dataset_loader,
            image_shape=dataset.image_shape,
            theta_dimensions=theta_dims,
            num_train_samples=args.num_train_samples,
            num_eval_samples=num_eval_samples,
            resize_factor=args.resize_options.resize_factor,
            video_names=dataset.video_names,
        ),
        os.path.join(experiment_dir, CONFIG_FILENAME),
    )

    end = time.time()
    logger.info(f"Done generating training data in : {end - start:.1f}s")