def __init__( self, dataset_loader: str, dataset_path: str, postures_generator: Optional[Generator] = None, video_name: str = None, **kwargs ): resize_options = ResizeOptions(**kwargs) dataset = load_dataset(dataset_loader, dataset_path, resize_options=resize_options, **kwargs) if postures_generator is None: postures_generator = PosturesModel().generate() if video_name is None: video_name = dataset.video_names[0] features = dataset.features_dataset[video_name] self.skeletons = features.skeletons self.measurements = features.measurements self.output_image_shape = dataset.image_shape self.synthetic_dataset = SyntheticDataset( frame_preprocessing=dataset.frame_preprocessing, output_image_shape=self.output_image_shape, enable_random_augmentations=False, ) skel_is_not_nan = ~np.any(np.isnan(self.skeletons), axis=(1, 2)) self.labelled_indexes = np.where(skel_is_not_nan)[0] if len(self.labelled_indexes) == 0: raise ValueError("No template frames found in the dataset, can't generate synthetic images.") self.frames_dataset = dataset.frames_dataset self.video_name = video_name self.postures_generator = postures_generator
def __init__(self, dataset_loader: str, dataset_path: str, video_name=None, **kwargs): resize_options = ResizeOptions(**kwargs) dataset = load_dataset(dataset_loader, dataset_path, resize_options=resize_options) self.video_name = video_name if video_name is not None else dataset.video_names[0] self.real_dataset = RealDataset( frame_preprocessing=dataset.frame_preprocessing, output_image_shape=dataset.image_shape, ) self.frames_dataset = dataset.frames_dataset
def visualize(dataset_path: str, **kwargs): """ Export prediction results as videos with a centerline overlay on top of the original images :param dataset_path: Root path of the dataset containing videos of worm """ args = _parse_arguments(dataset_path, kwargs) results_dir = os.path.join(args.experiment_dir, default_paths.RESULTS_DIR) config = load_config(args.config) dataset = load_dataset(config.dataset_loader, dataset_path, selected_video_names=args.video_names) visualizer = _Visualizer( dataset=dataset, draw_original=args.draw_original, temp_dir=args.temp_dir, results_dir=results_dir, ) for video_name in dataset.video_names: results_file = os.path.join(results_dir, video_name, args.results_file) if not os.path.exists(results_file): logger.error( f"No results file to analyze, file not found: '{results_file}'" ) continue with h5py.File(results_file, "r") as f: if args.group_name not in f: logger.error( f"Field: '{args.group_name}' not found in file: '{results_file}', " f"can't visualize results.") continue group = f[args.group_name] scores = group["scores"][:] skeletons = group["skeletons"][:] visualizer.export_to_images(video_name=video_name, results_scores=scores, results_skel=skeletons) # cleanup shutil.rmtree(args.temp_dir)
def calibrate(dataset_loader: str, dataset_path: str, **kwargs): """ Calculate the image score for a certain number of labelled frames in the dataset, this will give an indication on choosing the image similarity threshold when predicting all frames in the dataset. :param dataset_loader: Name of the dataset loader, for example "tierpsy" :param dataset_path: Root path of the dataset containing videos of worm """ _log_parameters(logger.info, { "dataset_loader": dataset_loader, "dataset_path": dataset_path }) args = _parse_arguments(kwargs) random.seed(args.random_seed) np.random.seed(args.random_seed) dataset_name = get_dataset_name(dataset_path) experiment_dir = os.path.join(args.work_dir, dataset_name) calibration_results_dir = os.path.join( experiment_dir, default_paths.CALIBRATION_RESULTS_DIR) os.makedirs(calibration_results_dir, exist_ok=True) dataset = load_dataset( dataset_loader, dataset_path, selected_video_names=args.video_names, **vars(args), ) calibrator = _Calibrator( dataset=dataset, results_dir=calibration_results_dir, image_shape=dataset.image_shape, num_samples=args.num_samples, theta_dims=args.theta_dims, ) writer = _ImagesAndScoresWriter( ) if kwargs["save_images"] else _ScoresWriter() for video_name in dataset.video_names: results_file = calibrator(video_name=video_name, writer=writer) yield video_name, results_file
def save_results(dataset_loader, dataset_path, results_root_dir): dataset = load_dataset(dataset_loader, dataset_path) all_scores = [] all_theta = [] for video_name in sorted(os.listdir(results_root_dir)): results_file = os.path.join(results_root_dir, video_name, "results.h5") features = dataset.features_dataset[video_name] timestamp = features.timestamp with h5py.File(results_file, "r") as f: scores = f["unaligned"]["scores"][:] thetas = f["unaligned"]["theta"][:] max_scores = np.argmax(scores, axis=1) results_scores = scores[np.arange(scores.shape[0]), max_scores] results_theta = thetas[np.arange(thetas.shape[0]), max_scores] non_resampled_scores = [] non_resampled_theta = [] for cur_time, (score, theta) in enumerate(zip(results_scores, results_theta)): frame_index = np.where(timestamp == cur_time)[0] if len(frame_index) == 0: continue cur_frame_index = frame_index[0] non_resampled_scores.append(score) non_resampled_theta.append(theta) all_scores.append(non_resampled_scores) all_theta.append(non_resampled_theta) print(video_name, len(non_resampled_scores)) all_scores = np.concatenate(all_scores) all_theta = np.concatenate(all_theta) print(len(all_scores)) np.savetxt("all_scores.txt", all_scores) np.save("all_theta.npy", all_theta)
def evaluate(dataset_path: str, **kwargs): """ Evaluate a trained model by predicting synthetic data and recording the image similarity :param dataset_path: Root path of the dataset containing videos of worm """ args = _parse_arguments(dataset_path, kwargs) mp.set_start_method("spawn", force=True) random.seed(args.random_seed) np.random.seed(args.random_seed) results_dir = os.path.join(args.experiment_dir, "evaluation") os.makedirs(results_dir, exist_ok=True) config = load_config(args.config) eigenworms_matrix = load_eigenworms_matrix(args.eigenworms_matrix_path) dataset = load_dataset( dataset_loader=config.dataset_loader, dataset_path=dataset_path, selected_video_names=args.video_names, resize_options=ResizeOptions(resize_factor=config.resize_factor), **{WORM_IS_LIGHTER: config.worm_is_lighter}, ) pkl_filenames = _generate_synthetic_data( dataset, args.num_process, args.num_samples, args.postures_generation, args.temp_dir, args.random_seed, ) keras_model = tf.keras.models.load_model(args.model_path, compile=False) tf_dataset = tf.data.Dataset.from_generator( partial(_eval_data_gen, pkl_filenames), tf.float32, tf.TensorShape(dataset.image_shape + (1, )), ).batch(args.batch_size) network_predictions = keras_model.predict(tf_dataset)[:args.num_samples] shuffled_results = ShuffledResults(random_theta=network_predictions) ResultsScoring( frame_preprocessing=dataset.frame_preprocessing, num_process=args.num_process, temp_dir=args.temp_dir, image_shape=dataset.image_shape, )( results=shuffled_results, scoring_data_manager=_ScoringDataManager(pkl_filenames), ) # Keep the maximum score between the two head/tail options for this evaluation image_scores = np.max(shuffled_results.scores, axis=1) # Now calculate the angle error and mode error angle_error = [] modes_error = [] theta_predictions = [] _, theta_labels = _load_templates(pkl_filenames) for theta_label, theta_results in zip(theta_labels, shuffled_results.theta): dists = [ angle_distance(theta_result, theta_label) for theta_result in theta_results ] closest_index = int(np.argmin(dists)) closest_theta = theta_results[closest_index] theta_predictions.append(closest_theta) angle_error.append(dists[closest_index]) if eigenworms_matrix is not None: modes_label = theta_to_modes(theta_label, eigenworms_matrix) modes_prediction = theta_to_modes(closest_theta, eigenworms_matrix) mode_error = np.abs(modes_label - modes_prediction) modes_error.append(mode_error) np.savetxt(os.path.join(results_dir, "image_score.txt"), image_scores) np.savetxt(os.path.join(results_dir, "angle_error.txt"), angle_error) np.savetxt(os.path.join(results_dir, "theta_labels.txt"), theta_labels) np.savetxt(os.path.join(results_dir, "theta_predictions.txt"), theta_predictions) if eigenworms_matrix is not None: np.savetxt(os.path.join(results_dir, "modes_error.txt"), modes_error) logger.info( f"Evaluated model with synthetic data," f" average image similarity: {np.mean(image_scores):.4f}," f" average angle error (degrees): {np.rad2deg(np.mean(angle_error)):.2f}" )
def post_process(dataset_path: str, **kwargs): """ Process the raw network results with interpolation and smoothing :param dataset_path: Root path of the dataset containing videos of worm """ args = _parse_arguments(dataset_path, kwargs) results_root_dir = os.path.join(args.experiment_dir, default_paths.RESULTS_DIR) eigenworms_matrix = load_eigenworms_matrix(args.eigenworms_matrix_path) config = load_config(args.config) dataset = load_dataset(config.dataset_loader, dataset_path) spline_interpolation = _SplineInterpolation() results_files = list( sorted(glob.glob(os.path.join(results_root_dir, "*", RESULTS_FILENAME)))) if len(results_files) == 0: raise FileNotFoundError("No results file to analyze was found") for results_file in results_files: video_name = os.path.basename(os.path.dirname(results_file)) with h5py.File(results_file, "r") as results_f: try: results_raw = BaseResults( theta=results_f["resolved"]["theta"][:], skeletons=results_f["resolved"]["skeletons"][:], scores=results_f["resolved"]["scores"][:], ) except Exception: logger.error(f"Couldn't read results in file {results_file}.") continue results_orig = OriginalResults( theta=results_f["original"]["theta"][:], skeletons=results_f["original"]["skeletons"][:]) features = dataset.features_dataset[video_name] missing_values = np.any(np.isnan(results_raw.theta), axis=1) if missing_values.sum() == len(results_raw.theta): logger.warning( f"No valid result was found, stopping postprocessing for {video_name}" ) continue segments_boundaries = _get_valid_segments( is_valid_series=~missing_values, max_gap_size=args.max_gap_size, min_segment_size=args.min_segment_size, ) # interpolate and smooth in angles space thetas_interp = spline_interpolation.interpolate_tseries( results_raw.theta, segments_boundaries, args.std_fraction) results_interp = _calculate_skeleton(thetas_interp, args, dataset, video_name) thetas_smooth = _smooth_tseries( thetas_interp, args.smoothing_window, args.poly_order, segments_boundaries, ) results_smooth = _calculate_skeleton(thetas_smooth, args, dataset, video_name) flipped = False if features.ventral_side == "clockwise": results_orig.theta = _dorsal_ventral_flip_theta( results_orig.theta) results_raw.theta = _dorsal_ventral_flip_theta( results_raw.theta) results_interp.theta = _dorsal_ventral_flip_theta( results_interp.theta) results_smooth.theta = _dorsal_ventral_flip_theta( results_smooth.theta) flipped = True if eigenworms_matrix is not None: setattr( results_orig, "modes", _thetas_to_modes(results_orig.theta, eigenworms_matrix)) setattr(results_raw, "modes", _thetas_to_modes(results_raw.theta, eigenworms_matrix)) setattr( results_interp, "modes", _thetas_to_modes(results_interp.theta, eigenworms_matrix)) setattr( results_smooth, "modes", _thetas_to_modes(results_smooth.theta, eigenworms_matrix)) # save results results_saver = ResultsSaver( temp_dir=args.temp_dir, results_root_dir=results_root_dir, results_filename=POSTPROCESSED_RESULTS_FILENAME) metadata = { "max_gap_size": args.max_gap_size, "min_segment_size": args.min_segment_size, "smoothing_window": args.smoothing_window, "poly_order": args.poly_order, "std_fraction": args.std_fraction, "dorsal_ventral_flip": flipped, } results_saver.save( results={ "orig": results_orig, "raw": results_raw, "interp": results_interp, "smooth": results_smooth }, metadata=metadata, video_name=video_name, ) logger.info( f"Post-processed worm: {video_name} {'(flipped dorsal-ventral)' if flipped else ''}" )
def predict(dataset_path: str, **kwargs): """ Use a trained model to predict the centerlines of worm for videos in a dataset :param dataset_path: Root path of the dataset containing videos of worm """ args = _parse_arguments(dataset_path, kwargs) mp.set_start_method("spawn", force=True) if args.random_seed is not None: os.environ["TF_DETERMINISTIC_OPS"] = "1" random.seed(args.random_seed) np.random.seed(args.random_seed) tf.random.set_seed(args.random_seed) results_root_dir = os.path.join(args.experiment_dir, default_paths.RESULTS_DIR) os.makedirs(results_root_dir, exist_ok=True) config = load_config(args.config) dataset = load_dataset( dataset_loader=config.dataset_loader, dataset_path=dataset_path, selected_video_names=args.video_names, resize_options=ResizeOptions(resize_factor=config.resize_factor), ) keras_model = tf.keras.models.load_model(args.model_path, compile=False) results_saver = ResultsSaver(temp_dir=args.temp_dir, results_root_dir=results_root_dir, results_filename=RESULTS_FILENAME) tf_dataset_maker = _make_tf_dataset( data_generator=PredictDataGenerator( dataset=dataset, num_process=args.num_process, temp_dir=args.temp_dir, image_shape=config.image_shape, batch_size=args.batch_size, ), batch_size=args.batch_size, image_shape=config.image_shape, ) results_scoring = ResultsScoring( frame_preprocessing=dataset.frame_preprocessing, num_process=args.num_process, temp_dir=args.temp_dir, image_shape=config.image_shape, ) predictor = _Predictor(results_scoring=results_scoring, keras_model=keras_model) for video_name in dataset.video_names: logger.info(f'Processing video: "{video_name}"') features = dataset.features_dataset[video_name] template_indexes = features.labelled_indexes if len(template_indexes) == 0: logger.error( f"Can't calculate image metric, there is no labelled frame in the video to use as a template, " f"stopping analysis for {video_name}.") continue original_results, shuffled_results = predictor( input_frames=tf_dataset_maker(video_name), num_frames=dataset.num_frames(video_name), features=features, scoring_data_manager=ScoringDataManager( video_name=video_name, frames_dataset=dataset.frames_dataset, features=features, ), ) results = {"original": original_results, "unaligned": shuffled_results} if _can_resolve_results( shuffled_results, video_name=video_name, score_threshold=args.score_threshold, ): final_results = resolve_head_tail( shuffled_results=shuffled_results, original_results=original_results, frame_rate=features.frame_rate, score_threshold=args.score_threshold, ) results["resolved"] = final_results _apply_resize_factor(results["resolved"], config.resize_factor) _apply_resize_factor(results["unaligned"], config.resize_factor) results_saver.save(results=results, video_name=video_name) # cleanup shutil.rmtree(args.temp_dir)
def generate(dataset_loader: str, dataset_path: str, **kwargs): """ Generate synthetic images (training data) and processed real images (evaluation data) and save them to TFrecord files using multiprocessing :param dataset_loader: Name of the dataset loader, for example "tierpsy" :param dataset_path: Root path of the dataset containing videos of worm """ _log_parameters(logger.info, { "dataset_loader": dataset_loader, "dataset_path": dataset_path }) args = _parse_arguments(kwargs) mp.set_start_method("spawn", force=True) random.seed(args.random_seed) np.random.seed(args.random_seed) # setup folders if not os.path.exists(args.work_dir): os.mkdir(args.work_dir) experiment_dir = os.path.join(args.work_dir, get_dataset_name(dataset_path)) os.makedirs(experiment_dir, exist_ok=True) tfrecords_dataset_root = os.path.join(experiment_dir, default_paths.TRAINING_DATA_DIR) if os.path.exists(tfrecords_dataset_root): shutil.rmtree(tfrecords_dataset_root) dataset = load_dataset( dataset_loader=dataset_loader, dataset_path=dataset_path, resize_options=args.resize_options, selected_video_names=args.video_names, ) start = time.time() synthetic_data_generator = SyntheticDataGenerator( num_process=args.num_process, temp_dir=args.temp_dir, dataset=dataset, postures_generation_fn=args.postures_generation, enable_random_augmentations=True, writer=TfrecordLabeledDataWriter, random_seed=args.random_seed, ) gen = synthetic_data_generator.generate( num_samples=args.num_train_samples, file_pattern=os.path.join(args.temp_dir, SYNTH_TRAIN_DATASET_NAMES), ) for progress in gen: yield progress yield 1.0 theta_dims = len(next(args.postures_generation())) num_eval_samples = eval_data_generator.generate( dataset=dataset, num_samples=args.num_eval_samples, theta_dims=theta_dims, file_pattern=os.path.join(args.temp_dir, REAL_EVAL_DATASET_NAMES), ) shutil.copytree(args.temp_dir, tfrecords_dataset_root) save_config( ExperimentConfig( dataset_loader=dataset_loader, image_shape=dataset.image_shape, theta_dimensions=theta_dims, num_train_samples=args.num_train_samples, num_eval_samples=num_eval_samples, resize_factor=args.resize_options.resize_factor, video_names=dataset.video_names, ), os.path.join(experiment_dir, CONFIG_FILENAME), ) end = time.time() logger.info(f"Done generating training data in : {end - start:.1f}s")