def check_column_for_alignment(cluster_strategy, rotation_adjustment, channel, snr, sequencing_chip, um_per_pixel, fia, end_tiles, column, possible_tile_keys, h5_filename): base_name = os.path.splitext(h5_filename)[0] with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) # we assume odd numbers of rows, and good enough for now if grid.height > 2: center_row = grid.height / 2 rows_to_check = (center_row, center_row + 1, center_row - 1) else: # just one or two rows, might as well try them all rows_to_check = tuple([i for i in range(grid.height)]) for row in rows_to_check: image = grid.get(row, column) if image is None: log.warn("Could not find an image for %s Row %d Column %d" % (base_name, row, column)) return log.debug("Aligning %s Row %d Column %d against PhiX" % (base_name, row, column)) fia = process_alignment_image(cluster_strategy, rotation_adjustment, snr, sequencing_chip, base_name, um_per_pixel, image, possible_tile_keys, deepcopy(fia)) if fia.hitting_tiles: log.debug("%s aligned to at least one tile!" % image.index) # because of the way we iterate through the images, if we find one that aligns, # we can just stop because that gives us the outermost column of images and the # outermost FastQ tile end_tiles[h5_filename] = [tile.key for tile in fia.hitting_tiles], image.column break del fia gc.collect()
def iterate_all_images(h5_filenames, end_tiles, channel, path_info): # We need an iterator over all images to feed the parallel processes. Since each image is # processed independently and in no particular order, we need to return information in addition # to the image itself that allow files to be written in the correct place and such for h5_filename in h5_filenames: base_name = os.path.splitext(h5_filename)[0] with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) min_column, max_column, tile_map = end_tiles[h5_filename] for column in range(min_column, max_column): for row in range(grid._height): image = grid.get(row, column) if image is None: continue stats_path = os.path.join( path_info.results_directory, base_name, '{}_stats.txt'.format(image.index)) alignment_path = os.path.join( path_info.results_directory, base_name, '{}_all_read_rcs.txt'.format(image.index)) already_aligned = alignment_is_complete( stats_path) and os.path.exists(alignment_path) if already_aligned: log.debug( "Image already aligned/checkpointed: {}/{}".format( h5_filename, image.index)) continue yield row, column, channel, h5_filename, tile_map[ image.column], base_name
def load_image(h5_filename, channel, row, column): with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) return grid.get(row, column)