示例#1
0
def check_column_for_alignment(cluster_strategy, rotation_adjustment, channel, snr, sequencing_chip, um_per_pixel, fia,
                               end_tiles, column, possible_tile_keys, h5_filename):
    base_name = os.path.splitext(h5_filename)[0]
    with h5py.File(h5_filename) as h5:
        grid = GridImages(h5, channel)
        # we assume odd numbers of rows, and good enough for now
        if grid.height > 2:
            center_row = grid.height / 2
            rows_to_check = (center_row, center_row + 1, center_row - 1)
        else:
            # just one or two rows, might as well try them all
            rows_to_check = tuple([i for i in range(grid.height)])
        for row in rows_to_check:
            image = grid.get(row, column)
            if image is None:
                log.warn("Could not find an image for %s Row %d Column %d" % (base_name, row, column))
                return
            log.debug("Aligning %s Row %d Column %d against PhiX" % (base_name, row, column))
            fia = process_alignment_image(cluster_strategy, rotation_adjustment, snr, sequencing_chip, base_name, um_per_pixel, image, possible_tile_keys, deepcopy(fia))
            if fia.hitting_tiles:
                log.debug("%s aligned to at least one tile!" % image.index)
                # because of the way we iterate through the images, if we find one that aligns,
                # we can just stop because that gives us the outermost column of images and the
                # outermost FastQ tile
                end_tiles[h5_filename] = [tile.key for tile in fia.hitting_tiles], image.column
                break
    del fia
    gc.collect()
示例#2
0
def iterate_all_images(h5_filenames, end_tiles, channel, path_info):
    # We need an iterator over all images to feed the parallel processes. Since each image is
    # processed independently and in no particular order, we need to return information in addition
    # to the image itself that allow files to be written in the correct place and such
    for h5_filename in h5_filenames:
        base_name = os.path.splitext(h5_filename)[0]
        with h5py.File(h5_filename) as h5:
            grid = GridImages(h5, channel)
            min_column, max_column, tile_map = end_tiles[h5_filename]
            for column in range(min_column, max_column):
                for row in range(grid._height):
                    image = grid.get(row, column)
                    if image is None:
                        continue
                    stats_path = os.path.join(
                        path_info.results_directory, base_name,
                        '{}_stats.txt'.format(image.index))
                    alignment_path = os.path.join(
                        path_info.results_directory, base_name,
                        '{}_all_read_rcs.txt'.format(image.index))
                    already_aligned = alignment_is_complete(
                        stats_path) and os.path.exists(alignment_path)
                    if already_aligned:
                        log.debug(
                            "Image already aligned/checkpointed: {}/{}".format(
                                h5_filename, image.index))
                        continue
                    yield row, column, channel, h5_filename, tile_map[
                        image.column], base_name
示例#3
0
def count_images(h5_filenames, channel):
    image_count = 0
    for h5_filename in h5_filenames:
        with h5py.File(h5_filename, 'r') as h5:
            grid = GridImages(h5, channel)
            image_count += len(grid)
    return image_count
示例#4
0
文件: align.py 项目: fbain79/champ
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames,
                  alignment_channel, snr, metadata, sequencing_chip, fia):
    right_end_tiles = {}
    left_end_tiles = {}
    for cluster_strategy in cluster_strategies:
        with h5py.File(h5_filenames[0]) as first_file:
            grid = GridImages(first_file, alignment_channel)
            # no reason to use all cores yet, since we're IO bound?
            num_processes = len(h5_filenames)
            pool = multiprocessing.Pool(num_processes)
            log.info("Checking Columns")
            base_column_checker = functools.partial(
                check_column_for_alignment, cluster_strategy,
                rotation_adjustment, alignment_channel, snr, sequencing_chip,
                metadata['microns_per_pixel'], fia)
            #            start = time.time()
            #FLETCHER            log.info("Left Tiles: %s Right Tiles: %s" % (sequencing_chip.left_side_tiles, sequencing_chip.right_side_tiles))
            log.info(
                "------------------Searching Left End Tile-------------------")
            left_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            grid.columns, sequencing_chip.left_side_tiles,
                            sequencing_chip.cluster_size))
            log.info("Left End Tiles: %s " % left_end_tiles)
            #            re_time = time.time()
            log.info(
                "******************Searching Right End Tile******************")
            right_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            reversed(grid.columns),
                            sequencing_chip.miseq_tiles,
                            sequencing_chip.cluster_size))
            log.info("Right End Tiles:  %s" % right_end_tiles)
            pool.close()
            pool.join()
            if left_end_tiles and right_end_tiles:
                break
    if not left_end_tiles and not right_end_tiles:
        error.fail(
            "End tiles could not be found! Try adjusting the rotation or look at the raw images."
        )
        log.info("Left End Tiles: %s and Right End Tiles: %s" % left_end_tiles,
                 right_end_tiles)
    default_left_tile, default_left_column, default_left_correlation = decide_default_tiles_and_columns(
        left_end_tiles)
    default_right_tile, default_right_column, default_right_correlation = decide_default_tiles_and_columns(
        right_end_tiles)
    end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles,
                                default_left_tile, default_left_correlation,
                                right_end_tiles, default_right_tile,
                                default_left_column, default_right_column,
                                default_right_correlation)
    return end_tiles
示例#5
0
def create_fits_files(h5_base_name):
    h5_filename = h5_base_name + ".h5"
    log.info("Creating fits files for %s" % h5_filename)
    h5 = h5py.File(h5_filename)
    for channel in h5.keys():
        grid = GridImages(h5, channel)
        for image in grid:
            fits_path = '%s.fits' % os.path.join(h5_base_name, image.index)
            # Source Extractor can handle at most 32-bit values, so we have to cast down from our 64-bit images or
            # else it will throw an error. We clip to ensure there's no overflow, although this seems improbable
            # given that most cameras are 16 bit
            clipped_image = np.clip(image, 0, 2**32 - 1).astype(np.uint32)
            hdu = fits.PrimaryHDU(clipped_image)
            hdu.writeto(fits_path, clobber=True)
    log.info("Done creating fits files for %s" % h5_base_name)
示例#6
0
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames,
                  alignment_channel, snr, metadata, sequencing_chip, fia,
                  floor_alignment):
    #print "champ_align, floor = ", floor_alignment
    right_end_tiles = {}
    left_end_tiles = {}
    for cluster_strategy in cluster_strategies:
        with h5py.File(h5_filenames[0]) as first_file:
            grid = GridImages(first_file, alignment_channel)
            # no reason to use all cores yet, since we're IO bound?
            num_processes = len(h5_filenames)
            pool = multiprocessing.Pool(num_processes)
            if floor_alignment:
                floor = 1
            else:
                floor = 0
            #print "fia = ", fia
            base_column_checker = functools.partial(
                check_column_for_alignment, cluster_strategy,
                rotation_adjustment, alignment_channel, snr, sequencing_chip,
                metadata['microns_per_pixel'], fia, floor)
            # print "pass_base_column", floor_alignment, type(floor_alignment)
            left_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            grid.columns, sequencing_chip.left_side_tiles))
            right_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            reversed(grid.columns),
                            sequencing_chip.right_side_tiles))
            # print "pass left and right"
            pool.close()
            pool.join()
            if left_end_tiles and right_end_tiles:
                break
    if not left_end_tiles and not right_end_tiles:
        error.fail(
            "End tiles could not be found! Try adjusting the rotation or look at the raw images."
        )
    default_left_tile, default_left_column = decide_default_tiles_and_columns(
        left_end_tiles)
    default_right_tile, default_right_column = decide_default_tiles_and_columns(
        right_end_tiles)
    end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles,
                                default_left_tile, right_end_tiles,
                                default_right_tile, default_left_column,
                                default_right_column)
    return end_tiles
示例#7
0
def otsu_cluster_func(h5_base_name):
    h5_filename = h5_base_name + ".h5"
    log.info("Finding clusters for %s" % h5_filename)
    h5 = h5py.File(h5_filename)
    for channel in h5.keys():
        grid = GridImages(h5, channel)
        for image in grid:
            out_filepath = os.path.join(h5_base_name,
                                        image.index + '.clusters.otsu')
            threshold = threshold_otsu(image)
            mask_pixels = (image > threshold)
            mask = ndimage.binary_closing(ndimage.binary_opening(mask_pixels))
            label_image, num_labels = ndimage.label(mask)
            log.debug("Found %d clusters in %s/%s" %
                      (num_labels, h5_base_name, image.index))
            center_of_masses = ndimage.center_of_mass(image, label_image,
                                                      range(num_labels + 1))
            write_cluster_locations(center_of_masses, out_filepath)
示例#8
0
def load_image(h5_filename, channel, row, column):
    with h5py.File(h5_filename) as h5:
        grid = GridImages(h5, channel)
        return grid.get(row, column)