def normalize_scores(self, verbose=True): """Normalizes scores. The normalizing constant for each image is determined by Z = mode(pixel values) / median(all modes in h5_fpath) """ def get_mode_in_im(im): w = 200 hw = w / 2 rmid, cmid = int(im.shape[0] / 2), int(im.shape[1] / 2) vmin, vmax = im.min(), im.max() # remove saturation pct95 = vmin + 0.95 * (vmax - vmin) vals = [ v for v in im[rmid - hw:rmid + hw, cmid - hw:cmid + hw].flatten() if v < pct95 ] return misc.get_mode(vals) self.scores = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in self.h5_fpaths } self.normalizing_constants = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in self.h5_fpaths } for h5_fpath in self.h5_fpaths: if verbose: print(F"Basename of h5_fpath: {os.path.basename(h5_fpath)}") for channel in self.scores[h5_fpath].keys(): mode_given_pos_tup = {} for pos_tup in self.raw_scores[h5_fpath][channel].keys(): pos_key = hdf5tools.get_image_key(*pos_tup) with h5py.File(h5_fpath) as f: im = np.array(f[channel][pos_key]) mode_given_pos_tup[pos_tup] = get_mode_in_im(im) median_of_modes = np.median(mode_given_pos_tup.values()) for pos_tup in mode_given_pos_tup.keys(): Z = mode_given_pos_tup[pos_tup] / float(median_of_modes) self.normalizing_constants[h5_fpath][channel][pos_tup] = Z im_scores = self.raw_scores[h5_fpath][channel][pos_tup] self.scores[h5_fpath][channel][pos_tup] = { read_name: im_scores[read_name] / Z for read_name in self.get_read_names_in_image( h5_fpath, channel, pos_tup) } if verbose: print
def normalize_scores_by_ref_read_names(self, ref_read_names_given_channel, verbose=True): """Normalizes scores. The normalizing constant for each image is determined by Z = median(reference read scores) / 100 """ self.scores = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in self.h5_fpaths } self.normalizing_constants = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in self.h5_fpaths } for h5_fpath in self.h5_fpaths: log.debug(os.path.basename(h5_fpath)) for channel in self.scores[h5_fpath].keys(): ref_read_names = ref_read_names_given_channel[channel] for pos_tup in self.raw_scores[h5_fpath][channel].keys(): ref_read_names_in_image = (self.get_read_names_in_image( h5_fpath, channel, pos_tup) & ref_read_names) if len(ref_read_names_in_image) < 10: print( F"Warning: 10 > {len(ref_read_names_in_image)} reference reads in im_idx {h5_fpath}, {channel}, {pos_tup}" ) med = np.median([ self.raw_scores[h5_fpath][channel][pos_tup][read_name] for read_name in ref_read_names_in_image ]) Z = med / 100.0 self.normalizing_constants[h5_fpath][channel][pos_tup] = Z im_scores = self.raw_scores[h5_fpath][channel][pos_tup] self.scores[h5_fpath][channel][pos_tup] = { read_name: im_scores[read_name] / Z for read_name in self.get_read_names_in_image( h5_fpath, channel, pos_tup) }
def __init__(self, h5_fpaths): """Initialize h5_fpaths and scores. scores is a dict accessed as: scores[h5_fpath][channel][pos_tup][read_name] """ self.h5_fpaths = h5_fpaths self.raw_scores = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in h5_fpaths } self.scores = self.raw_scores
def build_score_given_read_name_given_channel(self): self.score_given_read_name_in_channel = { h5_fpath: { channel: {} for channel in hdf5tools.load_channel_names(h5_fpath) } for h5_fpath in self.h5_fpaths } for h5_fpath in self.h5_fpaths: print(F"h5_fpath: {h5_fpath}") i = 0 for channel in self.scores[h5_fpath].keys(): score_given_read_name = self.score_given_read_name_in_channel[ h5_fpath][channel] for pos_tup in self.scores[h5_fpath][channel].keys(): for read_name, score in self.scores[h5_fpath][channel][ pos_tup].items(): score_given_read_name[read_name] = score i += 1