Пример #1
0
    def normalize_scores(self, verbose=True):
        """Normalizes scores. The normalizing constant for each image is determined by

            Z = mode(pixel values) / median(all modes in h5_fpath)
        """
        def get_mode_in_im(im):
            w = 200
            hw = w / 2
            rmid, cmid = int(im.shape[0] / 2), int(im.shape[1] / 2)
            vmin, vmax = im.min(), im.max()
            # remove saturation
            pct95 = vmin + 0.95 * (vmax - vmin)
            vals = [
                v for v in im[rmid - hw:rmid + hw,
                              cmid - hw:cmid + hw].flatten() if v < pct95
            ]
            return misc.get_mode(vals)

        self.scores = {
            h5_fpath: {
                channel: {}
                for channel in hdf5tools.load_channel_names(h5_fpath)
            }
            for h5_fpath in self.h5_fpaths
        }
        self.normalizing_constants = {
            h5_fpath: {
                channel: {}
                for channel in hdf5tools.load_channel_names(h5_fpath)
            }
            for h5_fpath in self.h5_fpaths
        }
        for h5_fpath in self.h5_fpaths:
            if verbose:
                print(F"Basename of h5_fpath: {os.path.basename(h5_fpath)}")
            for channel in self.scores[h5_fpath].keys():
                mode_given_pos_tup = {}
                for pos_tup in self.raw_scores[h5_fpath][channel].keys():
                    pos_key = hdf5tools.get_image_key(*pos_tup)
                    with h5py.File(h5_fpath) as f:
                        im = np.array(f[channel][pos_key])

                    mode_given_pos_tup[pos_tup] = get_mode_in_im(im)

                median_of_modes = np.median(mode_given_pos_tup.values())
                for pos_tup in mode_given_pos_tup.keys():
                    Z = mode_given_pos_tup[pos_tup] / float(median_of_modes)
                    self.normalizing_constants[h5_fpath][channel][pos_tup] = Z
                    im_scores = self.raw_scores[h5_fpath][channel][pos_tup]
                    self.scores[h5_fpath][channel][pos_tup] = {
                        read_name: im_scores[read_name] / Z
                        for read_name in self.get_read_names_in_image(
                            h5_fpath, channel, pos_tup)
                    }
            if verbose: print
Пример #2
0
    def normalize_scores_by_ref_read_names(self,
                                           ref_read_names_given_channel,
                                           verbose=True):
        """Normalizes scores. The normalizing constant for each image is determined by

            Z = median(reference read scores) / 100
        """
        self.scores = {
            h5_fpath: {
                channel: {}
                for channel in hdf5tools.load_channel_names(h5_fpath)
            }
            for h5_fpath in self.h5_fpaths
        }
        self.normalizing_constants = {
            h5_fpath: {
                channel: {}
                for channel in hdf5tools.load_channel_names(h5_fpath)
            }
            for h5_fpath in self.h5_fpaths
        }
        for h5_fpath in self.h5_fpaths:
            log.debug(os.path.basename(h5_fpath))
            for channel in self.scores[h5_fpath].keys():
                ref_read_names = ref_read_names_given_channel[channel]
                for pos_tup in self.raw_scores[h5_fpath][channel].keys():
                    ref_read_names_in_image = (self.get_read_names_in_image(
                        h5_fpath, channel, pos_tup)
                                               & ref_read_names)
                    if len(ref_read_names_in_image) < 10:
                        print(
                            F"Warning: 10 > {len(ref_read_names_in_image)} reference reads in im_idx {h5_fpath}, {channel}, {pos_tup}"
                        )

                    med = np.median([
                        self.raw_scores[h5_fpath][channel][pos_tup][read_name]
                        for read_name in ref_read_names_in_image
                    ])

                    Z = med / 100.0
                    self.normalizing_constants[h5_fpath][channel][pos_tup] = Z
                    im_scores = self.raw_scores[h5_fpath][channel][pos_tup]
                    self.scores[h5_fpath][channel][pos_tup] = {
                        read_name: im_scores[read_name] / Z
                        for read_name in self.get_read_names_in_image(
                            h5_fpath, channel, pos_tup)
                    }
Пример #3
0
    def __init__(self, h5_fpaths):
        """Initialize h5_fpaths and scores. scores is a dict accessed as:

            scores[h5_fpath][channel][pos_tup][read_name]
        """
        self.h5_fpaths = h5_fpaths
        self.raw_scores = {
            h5_fpath: {
                channel: {}
                for channel in hdf5tools.load_channel_names(h5_fpath)
            }
            for h5_fpath in h5_fpaths
        }
        self.scores = self.raw_scores
Пример #4
0
 def build_score_given_read_name_given_channel(self):
     self.score_given_read_name_in_channel = {
         h5_fpath: {
             channel: {}
             for channel in hdf5tools.load_channel_names(h5_fpath)
         }
         for h5_fpath in self.h5_fpaths
     }
     for h5_fpath in self.h5_fpaths:
         print(F"h5_fpath: {h5_fpath}")
         i = 0
         for channel in self.scores[h5_fpath].keys():
             score_given_read_name = self.score_given_read_name_in_channel[
                 h5_fpath][channel]
             for pos_tup in self.scores[h5_fpath][channel].keys():
                 for read_name, score in self.scores[h5_fpath][channel][
                         pos_tup].items():
                     score_given_read_name[read_name] = score
                     i += 1