Exemplo n.º 1
0
    def conf_mat(
        self,
        true_set_size=None,
        pred_set_size=None,
        mask=None,
    ):
        """
        Build a confusion matrix from the call bag.

        If the set_size parameters are not given it
        will determine those sizes by asking the prep_result.
        """
        true = self.df["true_pep_iz"].values
        pred = self.df["pred_pep_iz"].values

        # Compute true_set_size and pred_set_size if they are not specified
        if true_set_size is None:
            true_set_size = self._prep_result.n_peps

        if pred_set_size is None:
            pred_set_size = self._prep_result.n_peps

        n_rows = len(self.df)
        if mask is not None:
            if isinstance(mask, pd.Series):
                mask = mask.values
            check.array_t(mask, shape=(n_rows, ), dtype=np.bool_)
            pred = np.copy(pred)
            pred[~mask] = 0

        return ConfMat.from_true_pred(true, pred, true_set_size, pred_set_size)
Exemplo n.º 2
0
def filter_im(im, low_inflection, low_sharpness, high_inflection,
              high_sharpness):
    """
    Use a band-pass filter to remove background and "bloom" which is
    the light that scatters from foreground to background.

    Note: A low_inflection of -10 effectively removes the low-pass filter
    and a high_inflection of +10 effectively removes the high-pass filter

    Values of sharpness = 50.0 are usually fine.

    Returns:
         Filtered image
    """

    # These number were hand-tuned to Abbe (512x512) and might be wrong for other
    # sizes/instruments and will need to be derived and/or calibrated.
    im = im.astype(np.float64)
    check.array_t(im, ndim=2, is_square=True, dtype=np.float64)
    low_cut = imops.generate_center_weighted_tanh(im.shape[0],
                                                  inflection=low_inflection,
                                                  sharpness=low_sharpness)
    high_cut = 1 - imops.generate_center_weighted_tanh(
        im.shape[0], inflection=high_inflection, sharpness=high_sharpness)
    filtered_im = imops.fft_filter_with_mask(im, mask=low_cut * high_cut)

    # The filters do not necessarily create a zero-centered background so
    # not remove the median to pull the background to zero.
    filtered_im -= np.median(filtered_im)

    return filtered_im
Exemplo n.º 3
0
def _pixel_to_subpixel_one_im(im, peak_dim, locs):
    """
    This is a subtle calculation.

    locs is given as an *integer* position (only has pixel accuracy).
    We then extract out a sub-image using an *integer* half width.
    Peak_dim is typically odd. Suppose it is (11, 11)
    That makes half_peak_mea_i be 11 // 2 = 5

    Suppose that a peak is at (17.5, 17.5).

    Suppose that peak was found a (integer) location (17, 17)
    which is within 1 pixel of its center as expected.

    We extract the sub-image at (17 - 5, 17 - 5) = (12:23, 12:23)

    The Center-of-mass calculation should return (5.5, 5.5) because that is
    relative to the sub-image which was extracted

    We wish to return (17.5, 17.5). So that's the lower left
    (17 - 5) of the peak plus the COM found.
    """
    check.array_t(locs, dtype=int)
    assert peak_dim[0] == peak_dim[1]
    half_peak_mea_i = peak_dim[0] // 2
    lower_left_locs = locs - half_peak_mea_i
    com_per_loc = np.zeros(locs.shape)
    for loc_i, loc in enumerate(lower_left_locs):
        peak_im = imops.crop(im, off=YX(loc), dim=peak_dim, center=False)
        com_per_loc[loc_i] = imops.com(peak_im**2)
    return lower_left_locs + com_per_loc
Exemplo n.º 4
0
 def channel_aln_offsets(self, ch_aln):
     """
     TODO: This probably should move to Peaks so that it conforms
           to similar pattern of channel_scale_factor()
     """
     check.array_t(ch_aln, shape=(self.n_channels, 2))
     self.ch_aln = ch_aln
Exemplo n.º 5
0
    def multichannel_dyt_random_choice(self, dyts, probs):
        """
        dyts: (n_options, n_channels, n_cycles)
            [
                [[1, 1, 1],  [0, 0, 0]],  # On in ch 0
                [[0, 0, 0],  [1, 1, 1]],  # On in ch 1
                [[1, 1, 1],  [1, 1, 1]],  # Shared
            ],

        probs: (n_options)
            [0.80, 0.18, 0.02],  # Choices

        """

        dyts = np.array(dyts)
        check.array_t(dyts, ndim=3)
        _n_options, _n_channels, _n_cycles = dyts.shape

        assert _n_channels == self.n_channels
        assert _n_cycles == self.n_cycles

        probs = np.array(probs)
        check.array_t(probs, ndim=1)
        assert probs.shape[0] == _n_options

        self.counts = np.zeros((self.n_peaks, self.n_cycles, self.n_channels))
        self.dyt_iz = np.random.choice(_n_options, size=self.n_peaks, p=probs)
        for ch_i in range(self.n_channels):
            self.counts[:, :, ch_i] = dyts[self.dyt_iz, ch_i]

        return self
Exemplo n.º 6
0
 def scale_by_abundance(self, abundance):
     """
     DHW 9/28/2020 - I profiled the check.array_t and the assert and in practice the impact appears minimal (<1ms in my test case)
     """
     check.array_t(abundance, shape=(self.shape[1],))
     assert np.all((abundance >= 1.0) | (abundance == 0.0))
     return (self * abundance).astype(int)
Exemplo n.º 7
0
def _do_predict(classifier, X):
    """
    The Scikit Learn Random Classifier has a predict() and a predict_proba()
    functions. But the expensive part is the predict_proba().
    Oddly, the predict() calls predict_proba() so I was previously doing
    the expensive part twice. Since I only want a single call per row
    I need to re-implement the predict() which
    is nothing more than taking the best score on each row.

    I also extract both the winner (best score) and the runnerup
    under the theory that the distance between these might be informative.

    Note, this must be a module-level function so that it can pickle.
    """

    check.array_t(X, ndim=2)

    classifier.n_jobs = 1

    # all_scores is the score for each row of X against EVERY classification class
    all_scores = classifier.predict_proba(X)

    # Sort by score along each row so that we can get the winner and runner up
    sorted_iz = np.argsort(all_scores, axis=1)
    winner_iz = sorted_iz[:, -1]
    runnerup_iz = sorted_iz[:, -2]

    winner_scores = all_scores[np.arange(all_scores.shape[0]), winner_iz]
    runnerup_scores = all_scores[np.arange(all_scores.shape[0]), runnerup_iz]

    winner_y = classifier.classes_.take(winner_iz, axis=0)
    runnerup_y = classifier.classes_.take(runnerup_iz, axis=0)

    return winner_y, winner_scores, runnerup_y, runnerup_scores
Exemplo n.º 8
0
    def classify(self, X, progress=None):
        check.array_t(X, ndim=2)

        n_rows = X.shape[0]

        if n_rows < 100:
            winner_y, winner_scores, runnerup_y, runnerup_scores = _do_predict(
                classifier=self.classifier, X=X)
        else:
            n_work_orders = n_rows // 100

            with zap.Context(progress=progress, trap_exceptions=False):
                results = zap.work_orders([
                    Munch(classifier=self.classifier, X=X, fn=_do_predict)
                    for X in np.array_split(X, n_work_orders, axis=0)
                ])
            winner_y = utils.listi(results, 0)
            winner_scores = utils.listi(results, 1)
            runnerup_y = utils.listi(results, 2)
            runnerup_scores = utils.listi(results, 3)

            winner_y = np.concatenate(winner_y)
            winner_scores = np.concatenate(winner_scores)
            runnerup_y = np.concatenate(runnerup_y)
            runnerup_scores = np.concatenate(runnerup_scores)

        return winner_y, winner_scores, runnerup_y, runnerup_scores
Exemplo n.º 9
0
def radiometry_cy_ims(cy_ims, locs, reg_psf_samples, peak_mea):
    """
    Compute radiometry on the stack of cycle images for one field on one channel

    Returns:
        output_radmat: ndarray(n_peaks, n_cycles, (sig, noi, bg_med, bg_std))
    """
    with context(cy_ims=cy_ims,
                 locs=locs,
                 reg_psf_samples=reg_psf_samples,
                 peak_mea=peak_mea) as ctx:
        check.array_t(locs, ndim=2, dtype=np.float64)
        n_peaks = locs.shape[0]
        if n_peaks > 0:
            batches = zap.make_batch_slices(n_rows=locs.shape[0],
                                            _batch_size=100)
            with zap.Context(trap_exceptions=False, mode="thread"):
                zap.work_orders([
                    dict(
                        fn=_do_radiometry_field_stack_peak_batch,
                        ctx=ctx,
                        peak_start_i=batch[0],
                        peak_stop_i=batch[1],
                    ) for batch in batches
                ])

    return ctx._out_radiometry
Exemplo n.º 10
0
    def start(self):
        rf_v2_params = RFV2Params(**self.config.parameters)

        radmat = None
        if self.inputs.get("sim_v2"):
            sim_result = SimV2Result.load_from_folder(self.inputs.sim_v2)
            radmat = sim_result.flat_test_radmat()
        elif self.inputs.get("sigproc_v2"):
            sigproc_v2_result = SigprocV2Result.load_from_folder(self.inputs.sigproc_v2)
            radmat = sigproc_v2_result.sig(flat_chcy=True)

        check.array_t(radmat, ndim=2)

        rf_train_v2_result = RFTrainV2Result.load_from_folder(self.inputs.rf_train_v2)

        rf_v2_result = rf_classify(
            rf_v2_params, rf_train_v2_result, radmat, progress=self.progress,
        )

        if self.inputs.get("sim_v2"):
            # Stuff the true value into the results to simplify downstream processing
            sim_result = SimV2Result.load_from_folder(self.inputs.sim_v2)
            rf_v2_result.true_pep_iz = sim_result.test_true_pep_iz

        rf_v2_result.save()
Exemplo n.º 11
0
    def estimate(self, samples):
        """
        samples is an array spot radiometries of 3 columns: (y, x, val)
        """

        check.array_t(samples, ndim=2)
        assert samples.shape[1] == 3

        from scipy.optimize import curve_fit

        ys = samples[:, 0]
        xs = samples[:, 1]
        vals = samples[:, 2]
        cen = self.hyper_im_mea / 2

        def _fit_wrapper(*args, **kwargs):
            return self.fit_func(self.hyper_im_mea, *args, **kwargs)

        # I had previously seeded the initial_falloff parameter with 0.4 and found that
        # it failed to converge in some cases, for example, jim/jhm2021_06_17_01_tetraspec_3channel.
        # It seems happier to start at 0.0 so I'm setting it there for a while
        # but I would not be shocked if that causes it to fail in some other case.
        # That said, the jim/jhm2021_06_17_01_tetraspec_3channel is not even a single-count
        # experiment and is only being used in a self-calibration mode so it should
        # definitely not be setting the standard for this.
        initial_falloff = 0.0
        popt, pcov = curve_fit(
            _fit_wrapper,
            (xs, ys),
            vals,
            p0=(cen, cen, initial_falloff, np.nanmean(vals)),
        )
        self.cen_x, self.cen_y, self.falloff, _ = popt
Exemplo n.º 12
0
def _step_2_create_inverse_variances(dt_mat, channel_i_to_vpd):
    """
    Using the Variance Per Dye find the inverse for each row of dyemat.
    This deals with zero-dyes by assigning the half the variance of the 1-count dye.
    vpd stands for variance per dye. Our models indicate that the standard deviation
    goes up roughly linearly with the number of dyes.
    The later code (_do_nn_and_gmm) needs the inverse variance, so we square the standard deviation to obtain the
    variance and take the inverse.
    Arguments:
        dt_mat is the unique dyetracks

    Returns:
        ndarray(n_rows, n_channels * n_cycles): inverse variance for each row (flatten)
    """

    check.array_t(dt_mat, ndim=3)
    check.array_t(channel_i_to_vpd, ndim=1)
    # Variances of zero will cause div by zeros so all zeros
    # are set to 0.5 which is chosen arbitrarily because it is > 0 and < 1.
    dt_mat = dt_mat.astype(float)
    dt_mat[dt_mat == 0] = 0.5
    vpd_broadcast = channel_i_to_vpd[None, :, None]
    spd = np.sqrt(vpd_broadcast)
    return 1.0 / np.square(
        spd * dt_mat)  # Scaling by the standard deviation per dye by channel
Exemplo n.º 13
0
 def set(self, ch_aln):
     check.array_t(ch_aln, ndim=2)
     assert ch_aln.shape[1] == 2
     assert np.all(
         ch_aln[0, :] == 0.0
     )  # Everything is calibrated relative to channel 0
     self.ch_aln = ch_aln
     return self
Exemplo n.º 14
0
def _mask_anomalies_im(im, den_threshold=300):
    """
    Operates on pre-balanced images.
    The den_threshold of 300 was found empirically on Val data

    Sets anomalies to nan
    """
    import skimage.transform  # Defer slow imports
    import cv2

    check.array_t(im, is_square=True)

    # SLICE into square using numpy-foo by reshaping the image
    # into a four-dimensional array can then by np.mean on the inner dimensions.
    sub_mea = 4  # Size of the sub-sample region
    im_mea, _ = im.shape

    squares = im.reshape(im_mea // sub_mea, sub_mea, im_mea // sub_mea,
                         sub_mea)
    # At this point, im is now 4-dimensional like: (256, 2, 256, 2)
    # But we want the small_dims next to each other for simplicity so swap the inner axes
    squares = squares.swapaxes(1, 2)
    # Now squares is (256, 256, 2, 2.)

    # squares is like: 256, 256, 2, 2. So we need the mean of the last two axes
    squares = np.mean(squares, axis=(2, 3))

    bad_mask = (squares > den_threshold).astype(float)

    # EXPAND the bad areas by erosion and dilate.
    # Erosion gets rid of the single-pixel hits and dilation expands the bad areas
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.erode(bad_mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=3)

    scale = im.shape[0] // mask.shape[0]

    full_size_mask = skimage.transform.rescale(
        mask,
        scale=scale,
        multichannel=False,
        mode="constant",
        anti_aliasing=False).astype(bool)

    # FIND rect contours of bad areas
    contours, hierarchy = cv2.findContours(full_size_mask.astype("uint8"),
                                           cv2.RETR_LIST,
                                           cv2.CHAIN_APPROX_SIMPLE)
    bad_rects = [cv2.boundingRect(cnt) for cnt in contours]
    im = im.copy()
    for rect in bad_rects:
        imops.fill(im,
                   loc=XY(rect[0], rect[1]),
                   dim=WH(rect[2], rect[3]),
                   val=np.nan)

    return im
Exemplo n.º 15
0
def scale_im(im, scale):
    """Scale an image up or down"""
    check.array_t(im, ndim=2, dtype=float)
    rows, cols = im.shape
    M = np.array([[scale, 0.0, 0.0], [0.0, scale, 0.0]])
    return cv2.warpAffine(im,
                          M,
                          dsize=(int(scale * cols), int(scale * rows)),
                          flags=cv2.INTER_CUBIC)
Exemplo n.º 16
0
def locs_to_region(locs, n_divs, im_dim):
    """
    Convert a matrix of locs in (y, x) columns into
    the regional coords of im_dim divided in n_divs.
    """
    check.array_t(locs, shape=(None, 2))
    reg_locs = np.floor(n_divs * locs / im_dim).astype(int)
    assert np.all((0 <= reg_locs) & (reg_locs < n_divs))
    return reg_locs
Exemplo n.º 17
0
def _step_2_create_neighbors_lookup(dyemat, true_pep_iz):
    """
    The dyemat may have many duplicate rows, each from some number of peps.

    These duplicate rows are consolidated so that each coordinate in dyemat space
    is given a unique "dye_i".

    Returns:
        dt_mat: The unique (sorted) rows of dyemat
        dyetracks_df: DF(dye_i, weight).
            Where weight is the sum of all rows that pointed to this dyetrack
        dt_pep_sources_df: DF(dye_i, pep_i, n_rows)
            Records how many times each peptide generated dye_i where count > 0.
        flann: A fast Approximate Nearest Neighbors lookup using PYFLANN.
    """
    check.array_t(dyemat, ndim=3)

    dt_mat, true_dt_iz, dt_counts = np.unique(dyemat,
                                              return_inverse=True,
                                              return_counts=True,
                                              axis=0)

    prune_rare = False
    if prune_rare:
        keep_mask = dt_counts > 0
        keep_mask[0] = True

        dt_mat = dt_mat[keep_mask]
        dt_counts = dt_counts[keep_mask]

        n_old = len(keep_mask)
        n_new = keep_mask.sum()
        orig = np.arange(n_old)
        new_to_old = orig[keep_mask]
        old_to_new = np.zeros((n_old, ))
        new_i = np.arange(n_new)
        old_to_new[new_to_old] = new_i
        true_dt_iz = old_to_new[true_dt_iz]

    _, n_channels, n_cycles = dt_mat.shape

    # PREPEND a zero element that represent nul
    dt_mat = np.vstack((np.zeros((1, n_channels, n_cycles)), dt_mat))
    dt_counts = np.concatenate(([0], dt_counts))
    true_dt_iz += 1

    dyetracks_df = (pd.DataFrame(dict(weight=dt_counts)).reset_index().rename(
        columns=dict(index="dye_i")))

    dt_pep_sources_df = (pd.DataFrame(dict(
        dye_i=true_dt_iz, pep_i=true_pep_iz)).groupby(
            ["dye_i", "pep_i"]).size().to_frame("n_rows").reset_index())

    flann = _create_flann(dt_mat)

    return dt_mat, dyetracks_df, dt_pep_sources_df, flann
Exemplo n.º 18
0
def sub_pixel_shift(im, offset):
    """
    Shift with offset in y, x array form.
    A positive x will shift right. A positive y will shift up.
    """
    check.array_t(im, ndim=2, dtype=float)
    rows, cols = im.shape
    M = np.array([[1.0, 0.0, offset[1]], [0.0, 1.0, offset[0]]])
    # Note the reversal of the dimensions
    return cv2.warpAffine(im, M, dsize=(cols, rows), flags=cv2.INTER_CUBIC)
Exemplo n.º 19
0
def _dyemat_sim(sim_v2_params, pcbs, n_samples, progress=None):
    """
    Run via the C fast_sim module a dyemat sim.

    Arguments:
        sim_v2_params: SimV2Params
        pcbs: This is an encoding of flus. See SimV2Params.pcbs()
            Each peptide has a row per amino-acid and either a
            channel number or a np.nan to indicate a label at that
            position, plus a p_bright for that aa.
            n_samples: number of samples to try ...
                BUT NOT NEC. THE NUMBER RETURNED! -- because
                all-dark samples are not returned.
                See "Dealing with dark-rows" above

    Returns:
        dyemat: ndarray(n_uniq_dyetracks, n_channels, n_cycle)
        dyepep: ndarray(dye_i, pep_i, count)
        pep_recalls: ndarray(n_peps)
    """

    check.t(sim_v2_params, SimV2Params)
    check.array_t(pcbs, shape=(None, 3), dtype=float)

    # TODO:  Refactor to use priors correctly
    #        The following is assuming that all dyes have the same p_bleach

    dyemat, dyepeps, pep_recalls = sim_v2_fast.sim(
        pcbs,
        n_samples,
        sim_v2_params.n_channels,
        len(sim_v2_params.labels),
        sim_v2_params.cycles_array(),
        # TODO: Needs to be per-channel and sampled correctly
        sim_v2_params.channel__priors().set_index("ch_i"
                                                  ).iloc[0].p_bleach.sample(),
        # TODO: The following two need to be sampled correctly
        sim_v2_params.priors.get_mle("p_detach"),
        sim_v2_params.priors.get_mle("p_edman_failure"),
        sim_v2_params.allow_edman_cterm,
        n_threads=get_cpu_limit(),
        rng_seed=sim_v2_params.random_seed,
        progress=progress,
    )

    # lex sort dyemats and then remap
    n_rows, n_cols = dyemat.shape
    lex_cols = tuple(dyemat[:, n_cols - i - 1] for i in range(n_cols))
    sort_args = np.lexsort(lex_cols)
    lut = np.zeros((n_rows, ), dtype=int)
    lut[sort_args] = np.arange(n_rows, dtype=int)
    dyepeps[:, 0] = lut[dyepeps[:, 0]]

    return dyemat[sort_args], dyepeps, pep_recalls
Exemplo n.º 20
0
 def it_finds():
     _, pred, score, _ = _do_nn_and_gmm(x,
                                        dyerow,
                                        dt_mat,
                                        np.ones_like(dt_mat),
                                        dt_weights,
                                        flann,
                                        use_gmm=False)
     check.array_t(pred, shape=(1, ))
     check.array_t(score, shape=(1, ))
     assert pred.tolist() == [3]
     assert score[0] > 0.9
Exemplo n.º 21
0
    def from_psf_ims(cls, im_mea, psf_ims):
        """
        Fit to a Gaussian for one-channel
        """
        check.array_t(psf_ims, ndim=4)
        divs_y, divs_x, peak_mea_h, peak_mea_w = psf_ims.shape
        assert divs_y == divs_x
        assert peak_mea_h == peak_mea_w
        reg_psf = cls(im_mea, peak_mea_h, divs_y)
        reg_psf.estimate(psf_ims)

        return reg_psf
Exemplo n.º 22
0
def align(im_stack, return_shifted_ims=False, bounds=None):
    """
    Align the image stack (1 pixel accuracy) relative to the first frame in the stack
    Arguments:
        im_stack (3 dimensions)
        return_shifted_ims:
            If True, also return the shifted images truncated to the common
            region of interest
        bounds: If not None limit the search space

    Returns:
        list of YX tuples
        shifted_ims (optional)
    """
    check.array_t(im_stack, ndim=3, dtype=np.float64)
    n_cycles, mea_h, mea_w = im_stack.shape
    assert mea_h == mea_w

    offsets = [YX(0, 0)]
    primary = im_stack[0]
    for im in im_stack[1:]:

        # TODO: This could be optimized by using fft instead of
        #       cv2.filter2D() which would avoid the fft of the
        #       unchanging primary.
        conv = convolve(src=primary, kernel=im)

        # conv is now zero-centered; that is, the peak is
        # an offset relative to the center of the image.

        if bounds is not None:
            edge_fill(conv, (mea_w - 2 * bounds) // 2, val=0)

        peak = YX(np.unravel_index(conv.argmax(), conv.shape))
        center = HW(conv.shape) // 2
        offsets += [center - peak]

    if return_shifted_ims:
        raw_dim = im_stack.shape[-2:]
        roi = intersection_roi_from_aln_offsets(offsets, raw_dim)
        roi_dim = (roi[0].stop - roi[0].start, roi[1].stop - roi[1].start)

        pixel_aligned_cy_ims = np.zeros((n_cycles, mea_h, mea_w))
        for cy_i, offset in zip(range(n_cycles), offsets):
            shifted_im = shift(im_stack[cy_i], offset * -1)
            pixel_aligned_cy_ims[cy_i, 0:roi_dim[0],
                                 0:roi_dim[1]] = shifted_im[roi[0], roi[1]]
        return np.array(offsets), pixel_aligned_cy_ims

    else:
        return np.array(offsets)
Exemplo n.º 23
0
def synth_image(im, peak_mea, locs, amps, std_xs, std_ys):
    """
    Generate a synthetic image using the Gaussians in the parallel arrays
    and accumulate into im
    """
    lib = load_lib()

    n_locs = int(len(locs))

    check.array_t(amps, shape=(n_locs,))
    check.array_t(std_xs, shape=(n_locs,))
    check.array_t(std_ys, shape=(n_locs,))
    params = np.zeros((n_locs, Gauss2FitParams.N_FIT_PARAMS))
    params[:, Gauss2FitParams.AMP] = amps
    params[:, Gauss2FitParams.CENTER_Y] = locs[:, 0]
    params[:, Gauss2FitParams.CENTER_X] = locs[:, 1]
    params[:, Gauss2FitParams.SIGMA_X] = std_xs
    params[:, Gauss2FitParams.SIGMA_Y] = std_ys

    check.array_t(im, ndim=2)
    params = np.ascontiguousarray(params, dtype=np.float64)
    im = np.ascontiguousarray(im, dtype=np.float64)

    im_h, im_w = im.shape
    error = lib.synth_image(im, im_w, im_h, peak_mea, n_locs, params)
    if error is not None:
        raise CException(error)
Exemplo n.º 24
0
def _step_4_gmm_classify(
    radmat,
    dyemat,
    dt_mat,
    dt_inv_var_mat,
    dt_weights,
    flann,
    n_neighbors,
    dt_score_mode,
    dt_filter_threshold,
    dt_score_metric,
    dt_score_bias,
    penalty_coefs,
    rare_penalty,
    radius,
    progress,
):
    """
    The dyemat is passed so that we can get the true_dt_iz for debugging
    """
    check.array_t(radmat, ndim=3)
    true_dt_iz, pred_dt_iz, scores, vdists = zap.arrays(
        _do_nn_and_gmm,
        dict(unit_radrow=radmat, dyerow=dyemat),
        dt_mat=dt_mat,
        dt_inv_var_mat=dt_inv_var_mat,
        dt_weights=dt_weights,
        flann=flann,
        n_neighbors=n_neighbors,
        dt_score_mode=dt_score_mode,
        dt_filter_threshold=dt_filter_threshold,
        dt_score_metric=dt_score_metric,
        dt_score_bias=dt_score_bias,
        penalty_coefs=penalty_coefs,
        rare_penalty=rare_penalty,
        radius=radius,
        _progress=progress,
        _stack=True,
    )

    # I use the dt counts as a weighting factor on the PDFs
    # which means that the scores can be > 1.0.
    # To ensure that all rows get an equal treatment in
    # normalization I simply divide them through by the
    # max value to put them into 0-1 range.

    scores = scores.flatten()
    scores /= np.max(scores)

    return true_dt_iz.flatten(), pred_dt_iz.flatten(), scores, vdists
Exemplo n.º 25
0
def _step_1_create_neighbors_lookup_singleprocess(dyemat, output_dt_mat):
    """
    The dyemat may have many duplicate rows, each from some number of peps.

    These duplicate rows are consolidated so that each coordinate in dyemat space
    is given a unique "dye_i".

    The unique (sorted) dyetracks are written to output_dt_mat which is expected
    to be large enough to hold them.

    Returns:
        dyetracks_df: DF(dye_i, weight).
            Where weight is the sum of all rows that pointed to this dyetrack
        dt_pep_sources_df: DF(dye_i, pep_i, n_rows)
            Records how many times each peptide generated dye_i where count > 0.
        flann: A fast Approximate Nearest Neighbors lookup using PYFLANN.
        n_dts: Number of actual unique dts
    """
    check.array_t(dyemat,
                  ndim=4)  # (n_peps, n_samples, n_channels, n_cycles): uint8

    n_peps, n_samples, n_channels, n_cycles = dyemat.shape
    true_pep_iz = np.repeat(np.arange(n_peps), n_samples)

    # Example usage of unique
    # b = np.array([1, 4, 3, 2, 1, 2])
    # p = np.unique(b, return_inverse=True, return_counts=True, )
    # p == (array([1, 2, 3, 4]), array([0, 3, 2, 1, 0, 1]), array([2, 2, 1, 1]))
    _dyemat = dyemat.reshape(
        (dyemat.shape[0] * dyemat.shape[1], dyemat.shape[2] * dyemat.shape[3]))
    dt_mat, true_dt_iz, dt_counts = np.unique(_dyemat,
                                              return_inverse=True,
                                              return_counts=True,
                                              axis=0)

    dt_mat = dt_mat.reshape((dt_mat.shape[0], n_channels, n_cycles))
    n_dts, n_channels, n_cycles = dt_mat.shape
    output_dt_mat[0:n_dts] = dt_mat

    # Check that the nul row exists and it the first element
    if not np.all(dt_mat[0] == 0):
        raise ValueError("No null row was included in the dyemat")

    flann = _create_flann(dt_mat)

    dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df = _setup_pep_source_dfs(
        true_dt_iz, true_pep_iz, dt_counts)
    return dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df, flann, n_dts
Exemplo n.º 26
0
def mat_lessflat(mat, dim1=None, dim2=None):
    """
    To unflatten you must know either dim1 or dim2

    Example, suppose mat is (2, 6)

        m = mat_lessflat(mat, dim2=3)
        assert m.shape == (2, 2, 3)
    """
    check.array_t(mat, ndim=2)
    check.affirm(dim1 is not None or dim2 is not None)
    if dim1 is None:
        dim1 = mat.shape[1] // dim2
    if dim2 is None:
        dim2 = mat.shape[1] // dim1
    return mat.reshape(mat.shape[0], dim1, dim2)
Exemplo n.º 27
0
def do_field_cycle(ch_ims: np.ndarray, field_i: int, cycle_i: int,
                   reg_psf_samples, peak_mea):
    lib = load_lib()

    check.array_t(ch_ims, ndim=3, is_square=True)

    with context(ch_ims=ch_ims,
                 reg_psf_samples=reg_psf_samples,
                 peak_mea=peak_mea,
                 field_i=field_i) as ctx:

        error = lib.do_field_cycle(ctx, cycle_i)
        if error is not None:
            raise CException(error)

    return ctx._out_align, ctx._out_locs, ctx._out_radiometry
Exemplo n.º 28
0
def psf_fields_one_channel(ims_import_result,
                           sigproc_v2_params,
                           field_iz,
                           channel_i,
                           progress=None) -> priors.RegPSFPrior:
    """
    Build up a regional PSF for one channel on the RAW images.

    Implemented in a parallel zap over every field and then combine the
    fields into a single RegPSF which stores: (divs, divs, peak_mea, peak_mea)
    """

    if ims_import_result.n_fields == 0:
        return None

    with zap.Context(progress=progress):
        region_to_psf_per_field = zap.arrays(
            _do_psf_one_field_one_channel,
            dict(field_i=field_iz),
            _stack=True,
            peak_mea=sigproc_v2_params.peak_mea,
            divs=sigproc_v2_params.divs,
            bandpass_kwargs=dict(
                low_inflection=sigproc_v2_params.low_inflection,
                low_sharpness=sigproc_v2_params.low_sharpness,
                high_inflection=sigproc_v2_params.high_inflection,
                high_sharpness=sigproc_v2_params.high_sharpness,
            ),
            ims_import_result=ims_import_result,
            channel_i=channel_i,
            n_cycles_limit=sigproc_v2_params.n_cycles_limit,
        )

    # SUM over fields
    psf_ims = np.sum(region_to_psf_per_field, axis=0)
    psf_ims = psf_normalize(psf_ims)

    # At this point psf_ims is a pixel image of the PSF at each reg div.
    # ie, 4 dimensional: (divs_y, divs_x, n_pixels_h, n_pixels_w)
    # Now we convert it to Gaussian Parameters by fitting so we don't have
    # to store the pixels anymore: just the 3 critical shape parameters:
    # sigma_x, sigma_y, and rho.
    # Use one frame of ims_import_result to sort out dimensions
    im = ims_import_result.ims[0, 0, 0]
    check.array_t(im, is_square=True)
    reg_psf = priors.RegPSFPrior.from_psf_ims(im.shape[-1], psf_ims)
    return reg_psf
Exemplo n.º 29
0
    def estimate(self, psf_ims):
        check.array_t(psf_ims, ndim=4)
        n_divs_y, n_divs_x, peak_mea_h, peak_mea_w = psf_ims.shape
        assert n_divs_y == n_divs_x and self.hyper_n_divs == n_divs_y
        assert peak_mea_h == peak_mea_w and self.hyper_peak_mea == peak_mea_h

        self.sigma_x = np.zeros((self.hyper_n_divs, self.hyper_n_divs))
        self.sigma_y = np.zeros((self.hyper_n_divs, self.hyper_n_divs))
        self.rho = np.zeros((self.hyper_n_divs, self.hyper_n_divs))
        for y in range(n_divs_y):
            for x in range(n_divs_x):
                im = psf_ims[y, x]
                if np.sum(im) > 0:
                    fit_params, _ = imops.fit_gauss2(im)
                    self.sigma_x[y, x] = fit_params[Gauss2Params.SIGMA_X]
                    self.sigma_y[y, x] = fit_params[Gauss2Params.SIGMA_Y]
                    self.rho[y, x] = fit_params[Gauss2Params.RHO]
Exemplo n.º 30
0
def _any_identical_non_zero_rows(a, b):
    """
    Checks if two mats a and b are identical in ANY non-zero rows.
    """
    check.array_t(a, ndim=2)
    check.array_t(b, ndim=2)

    arg_sample = stats.arg_subsample(a, 100)
    a = a[arg_sample]
    b = b[arg_sample]

    zero_rows = np.all(a == 0, axis=1)
    a = a[~zero_rows]
    b = b[~zero_rows]

    if a.shape[0] > 0:
        return np.any(a == b)