def check_column_for_alignment(cluster_strategy, rotation_adjustment, channel, snr, sequencing_chip, um_per_pixel, fia, end_tiles, column, possible_tile_keys, h5_filename): base_name = os.path.splitext(h5_filename)[0] with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) # we assume odd numbers of rows, and good enough for now if grid.height > 2: center_row = grid.height / 2 rows_to_check = (center_row, center_row + 1, center_row - 1) else: # just one or two rows, might as well try them all rows_to_check = tuple([i for i in range(grid.height)]) for row in rows_to_check: image = grid.get(row, column) if image is None: log.warn("Could not find an image for %s Row %d Column %d" % (base_name, row, column)) return log.debug("Aligning %s Row %d Column %d against PhiX" % (base_name, row, column)) fia = process_alignment_image(cluster_strategy, rotation_adjustment, snr, sequencing_chip, base_name, um_per_pixel, image, possible_tile_keys, deepcopy(fia)) if fia.hitting_tiles: log.debug("%s aligned to at least one tile!" % image.index) # because of the way we iterate through the images, if we find one that aligns, # we can just stop because that gives us the outermost column of images and the # outermost FastQ tile end_tiles[h5_filename] = [tile.key for tile in fia.hitting_tiles], image.column break del fia gc.collect()
def iterate_all_images(h5_filenames, end_tiles, channel, path_info): # We need an iterator over all images to feed the parallel processes. Since each image is # processed independently and in no particular order, we need to return information in addition # to the image itself that allow files to be written in the correct place and such for h5_filename in h5_filenames: base_name = os.path.splitext(h5_filename)[0] with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) min_column, max_column, tile_map = end_tiles[h5_filename] for column in range(min_column, max_column): for row in range(grid._height): image = grid.get(row, column) if image is None: continue stats_path = os.path.join( path_info.results_directory, base_name, '{}_stats.txt'.format(image.index)) alignment_path = os.path.join( path_info.results_directory, base_name, '{}_all_read_rcs.txt'.format(image.index)) already_aligned = alignment_is_complete( stats_path) and os.path.exists(alignment_path) if already_aligned: log.debug( "Image already aligned/checkpointed: {}/{}".format( h5_filename, image.index)) continue yield row, column, channel, h5_filename, tile_map[ image.column], base_name
def count_images(h5_filenames, channel): image_count = 0 for h5_filename in h5_filenames: with h5py.File(h5_filename, 'r') as h5: grid = GridImages(h5, channel) image_count += len(grid) return image_count
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames, alignment_channel, snr, metadata, sequencing_chip, fia): right_end_tiles = {} left_end_tiles = {} for cluster_strategy in cluster_strategies: with h5py.File(h5_filenames[0]) as first_file: grid = GridImages(first_file, alignment_channel) # no reason to use all cores yet, since we're IO bound? num_processes = len(h5_filenames) pool = multiprocessing.Pool(num_processes) log.info("Checking Columns") base_column_checker = functools.partial( check_column_for_alignment, cluster_strategy, rotation_adjustment, alignment_channel, snr, sequencing_chip, metadata['microns_per_pixel'], fia) # start = time.time() #FLETCHER log.info("Left Tiles: %s Right Tiles: %s" % (sequencing_chip.left_side_tiles, sequencing_chip.right_side_tiles)) log.info( "------------------Searching Left End Tile-------------------") left_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, grid.columns, sequencing_chip.left_side_tiles, sequencing_chip.cluster_size)) log.info("Left End Tiles: %s " % left_end_tiles) # re_time = time.time() log.info( "******************Searching Right End Tile******************") right_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, reversed(grid.columns), sequencing_chip.miseq_tiles, sequencing_chip.cluster_size)) log.info("Right End Tiles: %s" % right_end_tiles) pool.close() pool.join() if left_end_tiles and right_end_tiles: break if not left_end_tiles and not right_end_tiles: error.fail( "End tiles could not be found! Try adjusting the rotation or look at the raw images." ) log.info("Left End Tiles: %s and Right End Tiles: %s" % left_end_tiles, right_end_tiles) default_left_tile, default_left_column, default_left_correlation = decide_default_tiles_and_columns( left_end_tiles) default_right_tile, default_right_column, default_right_correlation = decide_default_tiles_and_columns( right_end_tiles) end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles, default_left_tile, default_left_correlation, right_end_tiles, default_right_tile, default_left_column, default_right_column, default_right_correlation) return end_tiles
def create_fits_files(h5_base_name): h5_filename = h5_base_name + ".h5" log.info("Creating fits files for %s" % h5_filename) h5 = h5py.File(h5_filename) for channel in h5.keys(): grid = GridImages(h5, channel) for image in grid: fits_path = '%s.fits' % os.path.join(h5_base_name, image.index) # Source Extractor can handle at most 32-bit values, so we have to cast down from our 64-bit images or # else it will throw an error. We clip to ensure there's no overflow, although this seems improbable # given that most cameras are 16 bit clipped_image = np.clip(image, 0, 2**32 - 1).astype(np.uint32) hdu = fits.PrimaryHDU(clipped_image) hdu.writeto(fits_path, clobber=True) log.info("Done creating fits files for %s" % h5_base_name)
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames, alignment_channel, snr, metadata, sequencing_chip, fia, floor_alignment): #print "champ_align, floor = ", floor_alignment right_end_tiles = {} left_end_tiles = {} for cluster_strategy in cluster_strategies: with h5py.File(h5_filenames[0]) as first_file: grid = GridImages(first_file, alignment_channel) # no reason to use all cores yet, since we're IO bound? num_processes = len(h5_filenames) pool = multiprocessing.Pool(num_processes) if floor_alignment: floor = 1 else: floor = 0 #print "fia = ", fia base_column_checker = functools.partial( check_column_for_alignment, cluster_strategy, rotation_adjustment, alignment_channel, snr, sequencing_chip, metadata['microns_per_pixel'], fia, floor) # print "pass_base_column", floor_alignment, type(floor_alignment) left_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, grid.columns, sequencing_chip.left_side_tiles)) right_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, reversed(grid.columns), sequencing_chip.right_side_tiles)) # print "pass left and right" pool.close() pool.join() if left_end_tiles and right_end_tiles: break if not left_end_tiles and not right_end_tiles: error.fail( "End tiles could not be found! Try adjusting the rotation or look at the raw images." ) default_left_tile, default_left_column = decide_default_tiles_and_columns( left_end_tiles) default_right_tile, default_right_column = decide_default_tiles_and_columns( right_end_tiles) end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles, default_left_tile, right_end_tiles, default_right_tile, default_left_column, default_right_column) return end_tiles
def otsu_cluster_func(h5_base_name): h5_filename = h5_base_name + ".h5" log.info("Finding clusters for %s" % h5_filename) h5 = h5py.File(h5_filename) for channel in h5.keys(): grid = GridImages(h5, channel) for image in grid: out_filepath = os.path.join(h5_base_name, image.index + '.clusters.otsu') threshold = threshold_otsu(image) mask_pixels = (image > threshold) mask = ndimage.binary_closing(ndimage.binary_opening(mask_pixels)) label_image, num_labels = ndimage.label(mask) log.debug("Found %d clusters in %s/%s" % (num_labels, h5_base_name, image.index)) center_of_masses = ndimage.center_of_mass(image, label_image, range(num_labels + 1)) write_cluster_locations(center_of_masses, out_filepath)
def load_image(h5_filename, channel, row, column): with h5py.File(h5_filename) as h5: grid = GridImages(h5, channel) return grid.get(row, column)