def test_readonly_exception(self, tmp_path):
        # Store the original folder permissions
        test_dir_chmod = Path.stat(tmp_path).st_mode

        # Create a new subdir and make it read + execute
        test_dir = create_subdirectory(tmp_path, SUB_DIR)
        test_dir.chmod(555)

        try:
            with pytest.raises(EnvironmentError):
                create_subdirectory(test_dir, SUB_DIR)
        finally:
            # Restore original folder permissions
            test_dir.chmod(test_dir_chmod)
    def test_ioerror(self, tmp_path):
        # Store the original folder permissions
        test_dir_chmod = Path.stat(tmp_path).st_mode

        # Create a new subdir and make it read + execute
        test_dir = create_subdirectory(tmp_path, SUB_DIR)
        test_dir.chmod(555)

        try:
            with pytest.raises(IOError):
                save_to_csv("", "", test_dir.joinpath("test_csv"))
        finally:
            # Restore original folder permissions
            test_dir.chmod(test_dir_chmod)
    def test_write_exception(self, tmp_path):
        temp_file = create_temp_file(tmp_path)
        # Store the original folder permissions
        test_dir_chmod = Path.stat(tmp_path).st_mode

        # Create a new subdir and make it read + execute
        test_dir = create_subdirectory(tmp_path, SUB_DIR)
        test_dir.chmod(555)

        try:
            with pytest.raises(EnvironmentError):
                move_to_subdirectory([temp_file], test_dir)
        finally:
            # Restore original folder permissions
            test_dir.chmod(test_dir_chmod)
Exemplo n.º 4
0
def get_plate_directory(parent_path, row, col, create_dir = True):
    """
    Determine the directory path for a specified plate

    Can create the directory if needed

    :param parent_path: a path object
    :param row: a lattice co-ordinate row
    :param col: a lattice co-ordinate column
    :param create_dir: specify if the directory should be created
    :returns: a path object for the specified plate
    """

    child_path = '_'.join(['row', str(row), 'col', str(col)])
    if create_dir:
        return file_access.create_subdirectory(parent_path, child_path)
    else:
        return parent_path.joinpath(child_path)
Exemplo n.º 5
0
def image_file_to_timepoints(
    image_file: ndarray,
    plates: PlateCollection,
    plate_noise_masks: Dict[int, ndarray],
    plot_path: Path = None
) -> Dict[int, List[Colony.Timepoint]]:
    """
    Get Timepoint object data from a plate image

    Lists the results in a dict with the plate number as the key

    :param image_file: an ImageFile object
    :param plates: a PlateCollection of Plate instances
    :param plate_noise_masks: a dict of plate images to use as noise masks
    :param plot_path: a Path directory to save the segmented image plot
    :returns: a Dict of lists, each containing Timepoint objects
    """
    from collections import defaultdict
    from skimage.color import rgb2gray

    plate_timepoints = defaultdict(list)

    # Split image into individual plates
    plate_images = plates.slice_plate_image(image_file.image)

    for plate_id, plate_image in plate_images.items():
        plate_image_gray = rgb2gray(plate_image)
        # Segment each image
        plate_images[plate_id] = segment_image(plate_image_gray, plate_mask = plate_image_gray > 0, plate_noise_mask = plate_noise_masks[plate_id], area_min = 1.5)
        # Create Timepoint objects for each plate
        plate_timepoints[plate_id].extend(timepoints_from_image(plate_images[plate_id], image_file.timestamp_elapsed, image = plate_image))
        # Save segmented image plot, if required
        if plot_path is not None:
            save_path = file_access.create_subdirectory(plot_path, f"plate{plate_id}")
            plots.plot_plate_segmented(plate_image_gray, plate_images[plate_id], image_file.timestamp, save_path)

    return plate_timepoints
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        description = "An image analysis tool for measuring microorganism colony growth",
        formatter_class = argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("path", type = str,
                        help = "Image files location", default = None)
    parser.add_argument("-dpi", "--dots_per_inch", type = int, default = 300,
                        help = "The image DPI (dots per inch) setting")
    parser.add_argument("-mp", "--multiprocessing", type = strtobool, default = True,
                        help = "Enables use of more CPU cores, faster but more resource intensive")
    parser.add_argument("-p", "--plots", type = int, default = 1,
                        help = "The detail level of plot images to store on disk")
    parser.add_argument("--plate_edge_cut", type = int, default = 5,
                        help = "The exclusion area from the plate edge, as a percentage of the plate diameter")
    parser.add_argument("--plate_labels", type = str, nargs = "*", default = list(),
                        help = "A list of labels to identify each plate. Plates are ordered from top left, in rows. Example usage: --plate_labels plate1 plate2")
    parser.add_argument("--plate_lattice", type = int, nargs = 2, default = (3, 2),
                        metavar = ("ROW", "COL"),
                        help = "The row and column co-ordinate layout of plates. Example usage: --plate_lattice 3 3")
    parser.add_argument("--plate_size", type = int, default = 90,
                        help = "The plate diameter, in millimetres")
    parser.add_argument("--use_cached_data", type = strtobool, default = False,
                        help = "Allow use of previously calculated data")
    parser.add_argument("-v", "--verbose", type = int, default = 1,
                        help = "Information output level")

    args = parser.parse_args()
    BASE_PATH = args.path
    PLOTS = args.plots
    PLATE_LABELS = {plate_id: label for plate_id, label in enumerate(args.plate_labels, start = 1)}
    PLATE_LATTICE = tuple(args.plate_lattice)
    PLATE_SIZE = int(imaging.mm_to_pixels(args.plate_size, dots_per_inch = args.dots_per_inch))
    PLATE_EDGE_CUT = int(round(PLATE_SIZE * (args.plate_edge_cut / 100)))
    USE_CACHED = args.use_cached_data
    VERBOSE = args.verbose
    POOL_MAX = 1
    if args.multiprocessing:
        POOL_MAX = cpu_count() - 1 if cpu_count() > 1 else 1

    if VERBOSE >= 1:
        print("Starting ColonyScanalyser analysis")
    if VERBOSE >= 2 and POOL_MAX > 1:
        print(f"Multiprocessing enabled, utilising {POOL_MAX} of {cpu_count()} processors")

    # Resolve working directory
    if BASE_PATH is None:
        raise ValueError("A path to a working directory must be supplied")
    else:
        BASE_PATH = Path(args.path).resolve()
    if not BASE_PATH.exists():
        raise EnvironmentError(f"The supplied folder path could not be found: {BASE_PATH}")
    if VERBOSE >= 1:
        print(f"Working directory: {BASE_PATH}")

    # Check if processed image data is already stored and can be loaded
    segmented_image_data_filename = "cached_data"
    plates = None
    if USE_CACHED:
        if VERBOSE >= 1:
            print("Attempting to load cached data")
        plates = file_access.load_file(
            BASE_PATH.joinpath("data", segmented_image_data_filename),
            file_access.CompressionMethod.LZMA,
            pickle = True
        )
        # Check that segmented image data has been loaded for all plates
        # Also that data is not from an older format (< v0.4.0)
        if (
            VERBOSE >= 1 and plates is not None
            and plates.count == PlateCollection.coordinate_to_index(PLATE_LATTICE)
            and isinstance(plates.items[0], Plate)
        ):
            print("Successfully loaded cached data")
            image_files = None
        else:
            print("Unable to load cached data, starting image processing")
            plates = None

    if not USE_CACHED or plates is None:
        # Find images in working directory
        image_formats = ["tif", "tiff", "png"]
        image_paths = file_access.get_files_by_type(BASE_PATH, image_formats)

        # Store images as ImageFile objects
        # Timestamps are automatically read from filenames
        image_files = ImageFileCollection()
        for image_path in image_paths:
            image_files.add(
                file_path = image_path,
                timestamp = None,
                timestamp_initial = None,
                cache_image = False
            )

        # Check if images have been loaded and timestamps could be read
        if image_files.count > 0:
            if VERBOSE >= 1:
                print(f"{image_files.count} images found")
        else:
            raise IOError(f"No images could be found in the supplied folder path."
            " Images are expected in these formats: {image_formats}")
        if image_files.count != len(image_files.timestamps):
            raise IOError("Unable to load timestamps from all image filenames."
            " Please check that images have a filename with YYYYMMDD_HHMM timestamps")

        # Set intial timestamp
        image_files.timestamps_initial = image_files.timestamps[0]

        # Process images to Timepoint data objects
        plate_images_mask = None
        plate_timepoints = defaultdict(list)

        if VERBOSE >= 1:
            print("Preprocessing images to locate plates")

        # Load the first image to get plate coordinates and mask
        with image_files.items[0] as image_file:
            # Only find centers using first image. Assume plates do not move
            if plates is None:
                if VERBOSE >= 2:
                    print(f"Locating plate centres in image: {image_file.file_path}")

                # Create new Plate instances to store the information
                plates = PlateCollection.from_image(
                    shape = PLATE_LATTICE,
                    image = image_file.image_gray,
                    diameter = PLATE_SIZE,
                    search_radius = PLATE_SIZE // 20,
                    edge_cut = PLATE_EDGE_CUT,
                    labels = PLATE_LABELS
                )

                if not plates.count > 0:
                    print(f"Unable to locate plates in image: {image_file.file_path}")
                    print(f"Processing unable to continue")
                    sys.exit()
                
                if VERBOSE >= 3:
                    for plate in plates.items:
                        print(f"Plate {plate.id} center: {plate.center}")

            # Use the first plate image as a noise mask
            plate_noise_masks = plates.slice_plate_image(image_file.image_gray)

        if VERBOSE >= 1:
            print("Processing colony data from all images")

        # Thin wrapper to display a progress bar
        def progress_update(result, progress):
            utilities.progress_bar(progress, message = "Processing images")

        processes = list()
        with Pool(processes = POOL_MAX) as pool:
            for i, image_file in enumerate(image_files.items):
                # Allow args to be passed to callback function
                callback_function = partial(progress_update, progress = ((i + 1) / image_files.count) * 100)

                # Create processes
                processes.append(pool.apply_async(
                    image_file_to_timepoints,
                    args = (image_file, plates, plate_noise_masks),
                    kwds = {"plot_path" : None},
                    callback = callback_function
                ))

            # Consolidate the results to a single dict
            for process in processes:
                result = process.get()
                for plate_id, timepoints in result.items():
                    plate_timepoints[plate_id].extend(timepoints)

        # Clear objects to free up memory
        processes = None
        plate_images = None
        plate_noise_masks = None
        img = None

        if VERBOSE >= 1:
            print("Calculating colony properties")

        # Group Timepoints by centres and create Colony objects
        for plate_id, plate_timepoints in plate_timepoints.items():
            # If no objects are found
            if not len(plate_timepoints) > 0:
                break

            plate = plates.get_item(plate_id)
            plate.items = colonies_from_timepoints(plate_timepoints, distance_tolerance = 2)
            if VERBOSE >= 3:
                print(f"{plate.count} objects located on plate {plate.id}, before filtering")

            # Filter colonies to remove noise, background objects and merged colonies
            timestamp_diff_std = diff(image_files.timestamps_elapsed_seconds[1:]).std()
            timestamp_diff_std += 20
            plate.items = list(filter(lambda colony:
                # Remove objects that do not have sufficient data points
                len(colony.timepoints) > 5 and
                # No colonies should be visible at the start of the experiment
                colony.time_of_appearance.total_seconds() > 0 and
                # Remove objects with large gaps in the data
                diff([t.timestamp.total_seconds() for t in colony.timepoints[1:]]).std() < timestamp_diff_std and
                # Remove object that do not show growth, these are not colonies
                colony.timepoint_last.area > 4 * colony.timepoint_first.area and
                # Objects that appear with a large initial area are either merged colonies or noise
                colony.timepoint_first.area < 10,
                plate.items
            ))

            if VERBOSE >= 1:
                print(f"{plate.count} colonies identified on plate {plate.id}")

        if not any([plate.count for plate in plates.items]):
            if VERBOSE >= 1:
                print("Unable to locate any colonies in the images provided")
                print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")
            sys.exit()

    # Store pickled data to allow quick re-use
    save_path = file_access.create_subdirectory(BASE_PATH, "data")
    save_path = save_path.joinpath(segmented_image_data_filename)
    save_status = file_access.save_file(save_path, plates, file_access.CompressionMethod.LZMA)
    if VERBOSE >= 1:
        if save_status:
            print(f"Cached data saved to {save_path}")
        else:
            print(f"An error occurred and cached data could not be written to disk at {save_path}")

    # Store colony data in CSV format
    if VERBOSE >= 1:
        print("Saving data to CSV")
        
    save_path = BASE_PATH.joinpath("data")
    for plate in plates.items:
        for colony in plate.items:
            test = colony.__iter__()
        # Save data for all colonies on one plate
        plate.colonies_to_csv(save_path)

        # Save data for each colony on a plate
        plate.colonies_timepoints_to_csv(save_path)

    # Save summarised data for all plates
    plates.plates_to_csv(save_path)

    # Only generate plots when working with original images
    # Can't guarantee that the original images and full list of time points
    # will be available when using cached data
    if image_files is not None:
        save_path = file_access.create_subdirectory(BASE_PATH, "plots")
        if PLOTS >= 1:
            if VERBOSE >= 1:
                print("Saving plots")
            # Summary plots for all plates
            plots.plot_growth_curve(plates.items, save_path)
            plots.plot_appearance_frequency(plates.items, save_path, timestamps = image_files.timestamps_elapsed)
            plots.plot_appearance_frequency(plates.items, save_path, timestamps = image_files.timestamps_elapsed, bar = True)
            plots.plot_doubling_map(plates.items, save_path)
            plots.plot_colony_map(image_files.items[-1].image, plates.items, save_path)

            for plate in plates.items:
                if VERBOSE >= 2:
                    print(f"Saving plots for plate #{plate.id}")
                save_path_plate = file_access.create_subdirectory(save_path, file_access.file_safe_name([f"plate{plate.id}", plate.name]))
                # Plot colony growth curves, ID map and time of appearance for each plate
                plots.plot_growth_curve([plate], save_path_plate)
                plots.plot_appearance_frequency([plate], save_path_plate, timestamps = image_files.timestamps_elapsed)
                plots.plot_appearance_frequency([plate], save_path_plate, timestamps = image_files.timestamps_elapsed, bar = True)

        if PLOTS >= 4:
            # Plot individual plate images as an animation
            if VERBOSE >= 1:
                print("Saving plate image animations. This may take several minutes")

            # Original size images
            plots.plot_plate_images_animation(
                plates,
                image_files,
                save_path,
                fps = 8,
                pool_max = POOL_MAX,
                image_size_maximum = (800, 800)
            )
            # Smaller images
            plots.plot_plate_images_animation(
                plates,
                image_files,
                save_path,
                fps = 8,
                pool_max = POOL_MAX,
                image_size = (250, 250),
                image_name = "plate_image_animation_small"
            )

    else:
        if VERBOSE >= 1:
            print("Unable to generate plots from cached data. Run analysis on original images to generate plot images")

    if VERBOSE >= 1:
        print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")

    sys.exit()
 def test_return_existing(self, tmp_path, result):
     # Check that an existing subdir will not cause issues
     existing = create_subdirectory(tmp_path, SUB_DIR)
     assert result == existing
 def result(self, tmp_path):
     yield create_subdirectory(tmp_path, SUB_DIR)
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(
        description = "An image analysis tool for measuring microorganism colony growth",
        formatter_class = argparse.ArgumentDefaultsHelpFormatter
        )
    parser.add_argument("path", type = str,
                        help = "Image files location", default = None)
    parser.add_argument("-v", "--verbose", type = int, default = 1,
                        help = "Information output level")
    parser.add_argument("-dpi", "--dots_per_inch", type = int, default = 2540,
                        help = "The image DPI (dots per inch) setting")
    parser.add_argument("--plate_size", type = int, default = 100,
                        help = "The plate diameter, in millimetres")
    parser.add_argument("--plate_lattice", type = int, nargs = 2, default = (3, 2),
                        metavar = ("ROW", "COL"),
                        help = "The row and column co-ordinate layout of plates. Example usage: --plate_lattice 3 3")
    parser.add_argument("--save_plots", type = int, default = 1,
                        help = "The detail level of plot images to store on disk")
    parser.add_argument("--use_saved", type = strtobool, default = True,
                        help = "Allow or prevent use of previously calculated data")
    parser.add_argument("-mp", "--multiprocessing", type = strtobool, default = True,
                        help = "Enables use of more CPU cores, faster but more resource intensive")

    args = parser.parse_args()
    BASE_PATH = args.path
    VERBOSE = args.verbose
    PLATE_SIZE = imaging.mm_to_pixels(args.plate_size, dots_per_inch = args.dots_per_inch)
    PLATE_LATTICE = tuple(args.plate_lattice)
    SAVE_PLOTS = args.save_plots
    USE_SAVED = args.use_saved
    POOL_MAX = 1
    if args.multiprocessing:
        POOL_MAX = cpu_count()

    if VERBOSE >= 1:
        print("Starting ColonyScanalyser analysis")

    # Resolve working directory
    if BASE_PATH is None:
        raise ValueError("A path to a working directory must be supplied")
    else:
        BASE_PATH = Path(args.path).resolve()
    if not BASE_PATH.exists():
        raise EnvironmentError(f"The supplied folder path could not be found: {BASE_PATH}")
    if VERBOSE >= 1:
        print(f"Working directory: {BASE_PATH}")

    # Find images in working directory
    image_formats = ["tif", "tiff", "png"]
    image_files = file_access.get_files_by_type(BASE_PATH, image_formats)

    # Check if images have been loaded
    if len(image_files) > 0:
        if VERBOSE >= 1:
            print(f"{len(image_files)} images found")
    else:
        raise IOError(f"No images could be found in the supplied folder path."
        " Images are expected in these formats: {image_formats}")

    # Get date and time information from filenames
    time_points = get_image_timestamps(image_files)
    time_points_elapsed = get_image_timestamps(image_files, elapsed_minutes = True)
    if len(time_points) != len(image_files) or len(time_points) != len(image_files):
        raise IOError("Unable to load timestamps from all image filenames."
        " Please check that images have a filename with YYYYMMDD_HHMM timestamps")

    # Check if processed image data is already stored and can be loaded
    segmented_image_data_filename = "processed_data"
    if USE_SAVED:
        if VERBOSE >= 1:
            print("Attempting to load cached data")
        plate_colonies = file_access.load_file(
            BASE_PATH.joinpath("data", segmented_image_data_filename),
            file_access.CompressionMethod.LZMA,
            pickle = True
            )
        # Check that segmented image data has been loaded for all plates
        if (VERBOSE >= 1 and plate_colonies is not None and
            len(plate_colonies) == utilities.coordinate_to_index_number(PLATE_LATTICE)):
            print("Successfully loaded cached data")
        else:
            print("Unable to load cached data, starting image processing")
            plate_colonies = None

    # Process images to Timepoint data objects
    if not USE_SAVED or plate_colonies is None:
        plate_coordinates = None
        plate_images_mask = None
        plate_timepoints = defaultdict(list)

        if VERBOSE >= 1:
            print("Preprocessing images to locate plates")

        # Load the first image to get plate coordinate and mask
        with image_files[0] as image_file:
            # Load image
            img = imread(str(image_file), as_gray = True)

            # Only find centers using first image. Assume plates do not move
            if plate_coordinates is None:
                if VERBOSE >= 2:
                    print(f"Locating plate centres in image: {image_file}")
                plate_coordinates = imaging.get_image_circles(
                    img,
                    int(PLATE_SIZE / 2),
                    circle_count = utilities.coordinate_to_index_number(PLATE_LATTICE),
                    search_radius = 50
                    )
                if VERBOSE >= 3:
                    for k, center in enumerate(plate_coordinates, start = 1):
                        print(f"Plate {k} center: {center[0]}")

            # Split image into individual plates
            plate_images = get_plate_images(img, plate_coordinates, edge_cut = 60)

            # Use the first plate images as a noise mask
            if plate_images_mask is None:
                plate_images_mask = plate_images

        if VERBOSE >= 1:
            print("Processing colony data from all images")

        # Thin wrapper to display a progress bar
        def progress_update(result, progress):
            utilities.progress_bar(progress, message = "Processing images")

        processes = list()
        with Pool(processes = POOL_MAX) as pool:
            for i, image_file in enumerate(image_files):
                # Allow args to be passed to callback function
                callback_function = partial(progress_update, progress = ((i + 1) / len(image_files)) * 100)

                # Create processes
                processes.append(pool.apply_async(
                    image_file_to_timepoints,
                    args = (image_file, plate_coordinates, plate_images_mask, time_points[i], time_points_elapsed[i]),
                    kwds = {"plot_path" : None},
                    callback = callback_function
                    ))

            # Consolidate the results to a single dict
            for process in processes:
                result = process.get()
                for plate_id, timepoints in result.items():
                    plate_timepoints[plate_id].extend(timepoints)

        # Clear objects to free up memory
        processes = None
        plate_images = None
        plate_images_mask = None
        img = None

        if VERBOSE >= 1:
            print("Calculating colony properties")

        # Group Timepoints by centres and create Colony objects
        plate_colonies = dict()
        for plate_id, plate in plate_timepoints.items():
            plate_colonies[plate_id] = {colony.id : colony for colony in colonies_from_timepoints(plate)}

            # Filter colonies to remove noise, background objects and merged colonies
            plate_colonies[plate_id] = dict(filter(lambda item:
                # Remove objects that do not have sufficient data points, usually just noise
                len(item[1].timepoints) > len(time_points) * 0.2 and
                # Remove object that do not show growth, these are not colonies
                item[1].growth_rate > 1 and
                # Colonies that appear with a large initial area are most likely merged colonies, not new colonies
                item[1].timepoint_first.area < 50,
                plate_colonies[plate_id].items()
                ))

            if VERBOSE >= 1:
                print(f"Colony data stored for {len(plate_colonies[plate_id])} colonies on plate {plate_id}")

        if not any([len(plate) for plate in plate_colonies.values()]):
            if VERBOSE >= 1:
                print("Unable to locate any colonies in the images provided")
                print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")
            sys.exit()

    # Store pickled data to allow quick re-use
    save_path = file_access.create_subdirectory(BASE_PATH, "data")
    save_path = save_path.joinpath(segmented_image_data_filename)
    save_status = file_access.save_file(save_path, plate_colonies, file_access.CompressionMethod.LZMA)
    if VERBOSE >= 1:
        if save_status:
            print(f"Cached data saved to {save_path}")
        else:
            print(f"An error occurred and cached data could not be written to disk at {save_path}")

    # Store colony data in CSV format
    if VERBOSE >= 1:
        print("Saving data to CSV")
        
    save_path = BASE_PATH.joinpath("data")
    for plate_id, plate in plate_colonies.items():
        headers = [
            "Colony ID",
            "Time of appearance",
            "Time of appearance (elapsed minutes)",
            "Center point averaged (row, column)",
            "Growth rate average",
            "Growth rate",
            "Doubling time average (minutes)",
            "Doubling times (minutes)",
            "First detection (elapsed minutes)",
            "First center point (row, column)",
            "First area (pixels)",
            "First diameter (pixels)",
            "Final detection (elapsed minutes)",
            "Final center point (row, column)",
            "Final area (pixels)",
            "Final diameter (pixels)"
            ]

        # Save data for all colonies on one plate
        file_access.save_to_csv(
            plate.values(),
            headers,
            save_path.joinpath(f"plate{plate_id}_colonies")
            )

        # Save data for each colony on a plate
        headers = [
            "Colony ID",
            "Date/Time",
            "Elapsed time (minutes)",
            "Area (pixels)",
            "Center (row, column)",
            "Diameter (pixels)",
            "Perimeter (pixels)"
        ]
        colony_timepoints = list()
        for colony_id, colony in plate.items():
            for timepoint in colony.timepoints.values():
                # Unpack timepoint properties to a flat list
                colony_timepoints.append([colony_id, *timepoint])

        file_access.save_to_csv(
            colony_timepoints,
            headers,
            save_path.joinpath(f"plate{plate_id}_colony_timepoints")
            )

    if VERBOSE >= 1:
        print("Saving plots")

    # Plot colony growth curves and time of appearance for the plate
    if SAVE_PLOTS >= 2:
        for plate_id, plate in plate_colonies.items():
            row, col = utilities.index_number_to_coordinate(plate_id, PLATE_LATTICE)
            save_path = get_plate_directory(BASE_PATH.joinpath("plots"), row, col, create_dir = True)
            plate_item = {plate_id : plate}
            plots.plot_growth_curve(plate_item, time_points_elapsed, save_path)
            plots.plot_appearance_frequency(plate_item, time_points_elapsed, save_path)
            plots.plot_appearance_frequency(plate_item, time_points_elapsed, save_path, bar = True)

    # Plot colony growth curves for all plates
    if SAVE_PLOTS >= 1:
        save_path = file_access.create_subdirectory(BASE_PATH, "plots")
        plots.plot_growth_curve(plate_colonies, time_points_elapsed, save_path)
        plots.plot_appearance_frequency(plate_colonies, time_points_elapsed, save_path)
        plots.plot_appearance_frequency(plate_colonies, time_points_elapsed, save_path, bar = True)
        plots.plot_doubling_map(plate_colonies, time_points_elapsed, save_path)

    if VERBOSE >= 1:
        print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")

    sys.exit()
Exemplo n.º 10
0
def main():
    parser = argparse_init(
        description=
        "An image analysis tool for measuring microorganism colony growth",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        usage="%(prog)s '/image/file/path/' [OPTIONS]")

    # Retrieve and parse arguments
    args = parser.parse_args()
    BASE_PATH = args.path
    ANIMATION = args.animation
    IMAGE_ALIGN_STRATEGY = AlignStrategy[args.image_align]
    IMAGE_ALIGN_TOLERANCE = args.image_align_tolerance
    IMAGE_FORMATS = args.image_formats
    PLOTS = not args.no_plots
    PLATE_LABELS = {
        plate_id: label
        for plate_id, label in enumerate(args.plate_labels, start=1)
    }
    PLATE_LATTICE = tuple(args.plate_lattice)
    PLATE_SIZE = int(
        imaging.mm_to_pixels(args.plate_size,
                             dots_per_inch=args.dots_per_inch))
    PLATE_EDGE_CUT = int(round(PLATE_SIZE * (args.plate_edge_cut / 100)))
    SILENT = args.silent
    USE_CACHED = args.use_cached_data
    VERBOSE = args.verbose
    POOL_MAX = 1
    if not args.single_process:
        POOL_MAX = cpu_count() - 1 if cpu_count() > 1 else 1

    if not SILENT:
        print("Starting ColonyScanalyser analysis")
    if VERBOSE and POOL_MAX > 1:
        print(
            f"Multiprocessing enabled, utilising {POOL_MAX} of {cpu_count()} processors"
        )

    # Resolve working directory
    if BASE_PATH is None:
        raise ValueError("A path to a working directory must be supplied")
    else:
        BASE_PATH = Path(args.path).resolve()
    if not BASE_PATH.exists():
        raise EnvironmentError(
            f"The supplied folder path could not be found: {BASE_PATH}")
    if not SILENT:
        print(f"Working directory: {BASE_PATH}")

    # Check if processed image data is already stored and can be loaded
    plates = None
    if USE_CACHED:
        if not SILENT:
            print("Attempting to load cached data")
        plates = file_access.load_file(BASE_PATH.joinpath(
            config.DATA_DIR, config.CACHED_DATA_FILE_NAME),
                                       file_access.CompressionMethod.LZMA,
                                       pickle=True)
        # Check that segmented image data has been loaded for all plates
        # Also that data is not from an older format (< v0.4.0)
        if (VERBOSE and plates is not None and plates.count
                == PlateCollection.coordinate_to_index(PLATE_LATTICE)
                and isinstance(plates.items[0], Plate)):
            print("Successfully loaded cached data")
            image_files = None
        else:
            print("Unable to load cached data, starting image processing")
            plates = None

    if not USE_CACHED or plates is None:
        # Find images in working directory. Raises IOError if images not loaded correctly
        image_files = ImageFileCollection.from_path(BASE_PATH,
                                                    IMAGE_FORMATS,
                                                    cache_images=False)
        if not SILENT:
            print(f"{image_files.count} images found")

        # Verify image alignment
        if IMAGE_ALIGN_STRATEGY != AlignStrategy.none:
            if not SILENT:
                print(
                    f"Verifying image alignment with '{IMAGE_ALIGN_STRATEGY.name}' strategy. This process will take some time"
                )

            # Initialise the model and determine which images need alignment
            align_model, image_files_align = calculate_transformation_strategy(
                image_files.items,
                IMAGE_ALIGN_STRATEGY,
                tolerance=IMAGE_ALIGN_TOLERANCE)

            # Apply image alignment according to selected strategy
            if len(image_files_align) > 0:
                if not SILENT:
                    print(
                        f"{len(image_files_align)} of {image_files.count} images require alignment"
                    )

                with Pool(processes=POOL_MAX) as pool:
                    results = list()
                    job = pool.imap_unordered(func=partial(
                        apply_align_transform, align_model=align_model),
                                              iterable=image_files_align,
                                              chunksize=2)
                    # Store results and update progress bar
                    for i, result in enumerate(job, start=1):
                        results.append(result)
                        if not SILENT:
                            utilities.progress_bar(
                                (i / len(image_files_align)) * 100,
                                message="Correcting image alignment")

                    image_files.update(results)

        # Process images to Timepoint data objects
        plate_images_mask = None
        plate_timepoints = defaultdict(list)

        if not SILENT:
            print("Preprocessing images to locate plates")

        # Load the first image to get plate coordinates and mask
        with image_files.items[0] as image_file:
            # Only find centers using first image. Assume plates do not move
            if plates is None:
                if VERBOSE:
                    print(
                        f"Locating plate centres in image: {image_file.file_path}"
                    )

                # Create new Plate instances to store the information
                plates = PlateCollection.from_image(
                    shape=PLATE_LATTICE,
                    image=image_file.image_gray,
                    diameter=PLATE_SIZE,
                    search_radius=PLATE_SIZE // 20,
                    edge_cut=PLATE_EDGE_CUT,
                    labels=PLATE_LABELS)

                if not plates.count > 0:
                    if not SILENT:
                        print(
                            f"Unable to locate plates in image: {image_file.file_path}"
                        )
                        print(f"Processing unable to continue")
                    sys.exit()

                if VERBOSE:
                    for plate in plates.items:
                        print(f"Plate {plate.id} center: {plate.center}")

            # Use the first plate image as a noise mask
            plate_noise_masks = plates.slice_plate_image(image_file.image_gray)

        if not SILENT:
            print("Processing colony data from all images")

        # Process images to Timepoints
        with Pool(processes=POOL_MAX) as pool:
            results = list()
            job = pool.imap(func=partial(image_file_to_timepoints,
                                         plates=plates,
                                         plate_noise_masks=plate_noise_masks),
                            iterable=image_files.items,
                            chunksize=2)
            # Store results and update progress bar
            for i, result in enumerate(job, start=1):
                results.append(result)
                if not SILENT:
                    utilities.progress_bar((i / image_files.count) * 100,
                                           message="Processing images")
            plate_timepoints = utilities.dicts_merge(list(results))

        if not SILENT:
            print("Calculating colony properties")

        # Calculate deviation in timestamps (i.e. likelihood of missing data)
        timestamp_diff_std = diff(
            image_files.timestamps_elapsed_seconds[1:]).std()
        timestamp_diff_std += config.COLONY_TIMESTAMP_DIFF_MAX

        # Group and consolidate Timepoints into Colony instances
        plates = plates_colonies_from_timepoints(plates, plate_timepoints,
                                                 config.COLONY_DISTANCE_MAX,
                                                 timestamp_diff_std, POOL_MAX)

        if not any([plate.count for plate in plates.items]):
            if not SILENT:
                print("Unable to locate any colonies in the images provided")
                print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")
            sys.exit()
        elif not SILENT:
            for plate in plates.items:
                print(f"{plate.count} colonies identified on plate {plate.id}")

    # Store pickled data to allow quick re-use
    save_path = file_access.create_subdirectory(BASE_PATH, config.DATA_DIR)
    save_path = save_path.joinpath(config.CACHED_DATA_FILE_NAME)
    save_status = file_access.save_file(save_path, plates,
                                        file_access.CompressionMethod.LZMA)
    if not SILENT:
        if save_status:
            print(f"Cached data saved to {save_path}")
        else:
            print(
                f"An error occurred and cached data could not be written to disk at {save_path}"
            )

    # Store colony data in CSV format
    if not SILENT:
        print("Saving data to CSV")

    save_path = BASE_PATH.joinpath(config.DATA_DIR)
    for plate in plates.items:
        # Save data for all colonies on one plate
        plate.colonies_to_csv(save_path)
        # Save data for each colony on a plate
        plate.colonies_timepoints_to_csv(save_path)

    # Save summarised data for all plates
    plates.plates_to_csv(save_path)

    # Only generate plots when working with original images
    # Can't guarantee that the original images and full list of time points
    # will be available when using cached data
    if image_files is not None:
        if PLOTS or ANIMATION:
            save_path = file_access.create_subdirectory(
                BASE_PATH, config.PLOTS_DIR)
        if PLOTS:
            if not SILENT:
                print("Saving plots")
            # Summary plots for all plates
            plots.plot_growth_curve(plates.items, save_path)
            plots.plot_appearance_frequency(
                plates.items,
                save_path,
                timestamps=image_files.timestamps_elapsed)
            plots.plot_appearance_frequency(
                plates.items,
                save_path,
                timestamps=image_files.timestamps_elapsed,
                bar=True)
            plots.plot_doubling_map(plates.items, save_path)
            plots.plot_colony_map(image_files.items[-1].image, plates.items,
                                  save_path)

            for plate in plates.items:
                if VERBOSE:
                    print(f"Saving plots for plate #{plate.id}")
                save_path_plate = file_access.create_subdirectory(
                    save_path,
                    file_access.file_safe_name(
                        [f"plate{plate.id}", plate.name]))
                # Plot colony growth curves, ID map and time of appearance for each plate
                plots.plot_growth_curve([plate], save_path_plate)
                plots.plot_appearance_frequency(
                    [plate],
                    save_path_plate,
                    timestamps=image_files.timestamps_elapsed)
                plots.plot_appearance_frequency(
                    [plate],
                    save_path_plate,
                    timestamps=image_files.timestamps_elapsed,
                    bar=True)

        if ANIMATION:
            # Plot individual plate images as an animation
            if not SILENT:
                print(
                    "Saving plate image animations. This may take several minutes"
                )

            # Original size images
            plots.plot_plate_images_animation(plates,
                                              image_files,
                                              save_path,
                                              fps=8,
                                              pool_max=POOL_MAX,
                                              image_size_maximum=(800, 800))
            # Smaller images
            plots.plot_plate_images_animation(
                plates,
                image_files,
                save_path,
                fps=8,
                pool_max=POOL_MAX,
                image_size=(250, 250),
                image_name="plate_image_animation_small")

    else:
        if not SILENT:
            print(
                "Unable to generate plots or animations from cached data. Run analysis on original images to generate plot images"
            )

    if not SILENT:
        print(f"ColonyScanalyser analysis completed for: {BASE_PATH}")

    sys.exit()