Exemplo n.º 1
0
def test_maha():

    # Want implementation of Mahalanobis distance to match this R session:

    # > x1 <- round(rnorm(10,3), 3)
    # > x2 <- round(x1 + rnorm(10), 3)
    # > x3 <- round(x2 + runif(10), 3)
    # > x1
    # [1] 3.853 2.401 2.253 3.067 1.887 3.293 3.995 2.559 2.785 2.228
    # > x2
    # [1] 4.294 1.915 1.315 4.641 1.611 2.838 3.696 1.337 2.853 2.434
    # > x3
    # [1] 4.785 2.352 2.023 4.978 2.329 3.101 4.494 2.204 3.468 3.075
    # > obs <- cbind(x1, x2, x3)
    # > S <- var(obs)
    # > S
    #          x1        x2       x3
    # x1 0.5020374 0.6667232 0.633355
    # x2 0.6667232 1.4434718 1.326026
    # x3 0.6333550 1.3260262 1.248315
    # > mahalanobis(obs, c(mean(x1), mean(x2), mean(x3)), S)
    # [1] 2.1838336 1.9673401 1.3335029 4.9191627 2.1246818 5.3297995 4.9022487
    # [8] 2.5335913 0.1952562 1.5105832

    from scitbx.array_family import flex

    from dials.algorithms.statistics.fast_mcd import cov, maha_dist_sq

    # test Mahalanobis distance.
    x1 = flex.double(
        (3.853, 2.401, 2.253, 3.067, 1.887, 3.293, 3.995, 2.559, 2.785, 2.228))
    x2 = flex.double(
        (4.294, 1.915, 1.315, 4.641, 1.611, 2.838, 3.696, 1.337, 2.853, 2.434))
    x3 = flex.double(
        (4.785, 2.352, 2.023, 4.978, 2.329, 3.101, 4.494, 2.204, 3.468, 3.075))
    cols = [x1, x2, x3]
    center = [flex.mean(e) for e in cols]
    covmat = cov(x1, x2, x3)

    maha = maha_dist_sq(cols, center, covmat)

    from libtbx.test_utils import approx_equal

    R_result = [
        2.1838336,
        1.9673401,
        1.3335029,
        4.9191627,
        2.1246818,
        5.3297995,
        4.9022487,
        2.5335913,
        0.1952562,
        1.5105832,
    ]
    assert approx_equal(list(maha), R_result)
Exemplo n.º 2
0
def test_maha():

  # Want implementation of Mahalanobis distance to match this R session:

  #> x1 <- round(rnorm(10,3), 3)
  #> x2 <- round(x1 + rnorm(10), 3)
  #> x3 <- round(x2 + runif(10), 3)
  #> x1
  # [1] 3.853 2.401 2.253 3.067 1.887 3.293 3.995 2.559 2.785 2.228
  #> x2
  # [1] 4.294 1.915 1.315 4.641 1.611 2.838 3.696 1.337 2.853 2.434
  #> x3
  # [1] 4.785 2.352 2.023 4.978 2.329 3.101 4.494 2.204 3.468 3.075
  #> obs <- cbind(x1, x2, x3)
  #> S <- var(obs)
  #> S
  #          x1        x2       x3
  #x1 0.5020374 0.6667232 0.633355
  #x2 0.6667232 1.4434718 1.326026
  #x3 0.6333550 1.3260262 1.248315
  #> mahalanobis(obs, c(mean(x1), mean(x2), mean(x3)), S)
  # [1] 2.1838336 1.9673401 1.3335029 4.9191627 2.1246818 5.3297995 4.9022487
  # [8] 2.5335913 0.1952562 1.5105832

  from scitbx.array_family import flex
  from dials.algorithms.statistics.fast_mcd import maha_dist_sq, cov

  # test Mahalanobis distance.
  x1 = flex.double((3.853, 2.401, 2.253, 3.067, 1.887, 3.293, 3.995, 2.559, 2.785, 2.228))
  x2 = flex.double((4.294, 1.915, 1.315, 4.641, 1.611, 2.838, 3.696, 1.337, 2.853, 2.434))
  x3 = flex.double((4.785, 2.352, 2.023, 4.978, 2.329, 3.101, 4.494, 2.204, 3.468, 3.075))
  cols = [x1, x2, x3]
  center = [flex.mean(e) for e in cols]
  covmat = cov(x1, x2, x3)
  n = len(cols[0])

  maha = maha_dist_sq(cols, center, covmat)

  from libtbx.test_utils import approx_equal
  R_result = [2.1838336, 1.9673401, 1.3335029, 4.9191627, 2.1246818,
              5.3297995, 4.9022487, 2.5335913, 0.1952562, 1.5105832]
  assert approx_equal(list(maha), R_result)
  print "OK"
  return
Exemplo n.º 3
0
    def _detect_outliers(self, cols):

        fast_mcd = FastMCD(
            cols,
            alpha=self._alpha,
            max_n_groups=self._max_n_groups,
            min_group_size=self._min_group_size,
            n_trials=self._n_trials,
            k1=self._k1,
            k2=self._k2,
            k3=self._k3,
        )

        # get location and MCD scatter estimate
        T, S = fast_mcd.get_corrected_T_and_S()

        # get squared Mahalanobis distances
        d2s = maha_dist_sq(cols, T, S)

        # compare to the threshold
        outliers = d2s > self._mahasq_cutoff

        return outliers
Exemplo n.º 4
0
  def _detect_outliers(self, cols):

    outliers = flex.bool(len(cols[0]), False)

    fast_mcd = FastMCD(cols,
                       alpha = self._alpha,
                       max_n_groups = self._max_n_groups,
                       min_group_size = self._min_group_size,
                       n_trials = self._n_trials,
                       k1 = self._k1,
                       k2 = self._k2,
                       k3 = self._k3)

    # get location and MCD scatter estimate
    T, S = fast_mcd.get_corrected_T_and_S()

    # get squared Mahalanobis distances
    d2s = maha_dist_sq(cols, T, S)

    # compare to the threshold
    outliers = d2s > self._mahasq_cutoff

    return outliers
Exemplo n.º 5
0
    def _filter_reflections_based_on_centroid_distance(self):
        """
            Filter reflections too far from predicted position

        <<<<<<< HEAD
        """

        # Compute the x and y residuals
        Xobs, Yobs, _ = self.reflections["xyzobs.px.value"].parts()
        Xcal, Ycal, _ = self.reflections["xyzcal.px"].parts()
        Xres = Xobs - Xcal
        Yres = Yobs - Ycal

        # Compute the epsilon residual
        s0_length = 1.0 / self.experiments[0].beam.get_wavelength()
        s1x, s1y, s1z = self.reflections["s2"].parts()
        s1_length = flex.sqrt(s1x**2 + s1y**2 + s1z**2)
        Eres = s1_length - s0_length

        # Initialise the fast_mcd outlier algorithm
        # fast_mcd = FastMCD((Xres, Yres, Eres))
        fast_mcd = FastMCD((Xres, Yres))

        # get location and MCD scatter estimate
        T, S = fast_mcd.get_corrected_T_and_S()

        # get squared Mahalanobis distances
        # d2s = maha_dist_sq((Xres, Yres, Eres), T, S)
        d2s = maha_dist_sq((Xres, Yres), T, S)

        # Compute the cutoff
        mahasq_cutoff = chisq_quantile(
            2, self.params.refinement.outlier_probability)

        # compare to the threshold and select reflections
        selection1 = d2s < mahasq_cutoff
        selection2 = (flex.sqrt(Xres**2 + Yres**2) <
                      self.params.refinement.max_separation)
        selection = selection1 & selection2
        self.reflections = self.reflections.select(selection)

        # Print some stuff
        logger.info("-" * 80)
        logger.info("Centroid outlier rejection")
        logger.info(" Using MCD algorithm with probability = %f" %
                    self.params.refinement.outlier_probability)
        logger.info(" Max X residual: %f" % flex.max(flex.abs(Xres)))
        logger.info(" Max Y residual: %f" % flex.max(flex.abs(Yres)))
        logger.info(" Max E residual: %f" % flex.max(flex.abs(Eres)))
        logger.info(" Mean X RMSD: %f" % (sqrt(flex.sum(Xres**2) / len(Xres))))
        logger.info(" Mean Y RMSD: %f" % (sqrt(flex.sum(Yres**2) / len(Yres))))
        logger.info(" Mean E RMSD: %f" % (sqrt(flex.sum(Eres**2) / len(Eres))))
        logger.info(" MCD location estimate: %.4f, %.4f" % tuple(T))
        logger.info(""" MCD scatter estimate:
      %.7f, %.7f,
      %.7f, %.7f""" % tuple(list(S)))
        # logger.info(" MCD location estimate: %.4f, %.4f, %.4f" % tuple(T))
        # logger.info(''' MCD scatter estimate:
        #   %.7f, %.7f, %.7f,
        #   %.7f, %.7f, %.7f,
        #   %.7f, %.7f, %.7f''' % tuple(list(S)))
        logger.info(" Number of outliers: %d" % selection1.count(False))
        logger.info(
            " Number of reflections with residual > %0.2f pixels: %d" %
            (self.params.refinement.max_separation, selection2.count(False)))
        logger.info(" Number of reflections selection for refinement: %d" %
                    len(self.reflections))
        logger.info("-" * 80)

        # Throw exception
        if len(self.reflections) < self.params.refinement.min_n_reflections:
            raise RuntimeError(
                "Too few reflections to perform refinement: got %d, expected %d"
                % (len(self.reflections),
                   self.params.refinement.min_n_reflections))
Exemplo n.º 6
0
def _filter_reflections_based_on_centroid_distance(
    reflection_table,
    experiment,
    outlier_probability=0.975,
    max_separation=2,
):
    """
    Filter reflections too far from predicted position

    """

    # Compute the x and y residuals
    Xobs, Yobs, _ = reflection_table["xyzobs.px.value"].parts()
    Xcal, Ycal, _ = reflection_table["xyzcal.px"].parts()
    Xres = Xobs - Xcal
    Yres = Yobs - Ycal

    # Compute the epsilon residual
    s0_length = 1.0 / experiment.beam.get_wavelength()
    s1x, s1y, s1z = reflection_table["s2"].parts()
    s1_length = flex.sqrt(s1x**2 + s1y**2 + s1z**2)
    Eres = s1_length - s0_length

    # Initialise the fast_mcd outlier algorithm
    # fast_mcd = FastMCD((Xres, Yres, Eres))
    fast_mcd = FastMCD((Xres, Yres))

    # get location and MCD scatter estimate
    T, S = fast_mcd.get_corrected_T_and_S()

    # get squared Mahalanobis distances
    # d2s = maha_dist_sq((Xres, Yres, Eres), T, S)
    d2s = maha_dist_sq((Xres, Yres), T, S)

    # Compute the cutoff
    mahasq_cutoff = chisq_quantile(2, outlier_probability)

    # compare to the threshold and select reflections
    selection1 = d2s < mahasq_cutoff
    selection2 = flex.sqrt(Xres**2 + Yres**2) < max_separation
    selection = selection1 & selection2
    reflection_table = reflection_table.select(selection)
    n_refl = reflection_table.size()

    # Print some stuff
    logger.info("-" * 80)
    logger.info("Centroid outlier rejection")
    logger.info(
        f" Using MCD algorithm with probability = {outlier_probability}")
    logger.info(" Max X residual: %f" % flex.max(flex.abs(Xres)))
    logger.info(" Max Y residual: %f" % flex.max(flex.abs(Yres)))
    logger.info(" Max E residual: %f" % flex.max(flex.abs(Eres)))
    logger.info(" Mean X RMSD: %f" % (sqrt(flex.sum(Xres**2) / len(Xres))))
    logger.info(" Mean Y RMSD: %f" % (sqrt(flex.sum(Yres**2) / len(Yres))))
    logger.info(" Mean E RMSD: %f" % (sqrt(flex.sum(Eres**2) / len(Eres))))
    logger.info(" MCD location estimate: %.4f, %.4f" % tuple(T))
    logger.info(""" MCD scatter estimate:
    %.7f, %.7f,
    %.7f, %.7f""" % tuple(S))
    logger.info(" Number of outliers: %d" % selection1.count(False))
    logger.info(" Number of reflections with residual > %0.2f pixels: %d" %
                (max_separation, selection2.count(False)))
    logger.info(f"Number of reflections selection for refinement: {n_refl}")
    logger.info("-" * 80)

    return reflection_table