Пример #1
0
  def scale(self,other):
    from cctbx import miller
    matches = miller.match_indices(self.f_model_real.indices(),other.indices())
    sel0 = flex.size_t([p[0] for p in matches.pairs()])
    sel1 = flex.size_t([p[1] for p in matches.pairs()])

    val0 = self.f_model_real.data().select(sel0)
    val1 = other.data().select(sel1)
    plot=False
    if plot:
      from matplotlib import pyplot as plt
      plt.plot([-1,4],[-1,4],"g-")
      plt.plot(flex.log10(val0),flex.log10(val1),"r.")
      plt.show()

    from xfel.cxi.cxi_cc import correlation
    slope,offset,corr,N = correlation(
      self = self.f_model_real.select(sel0),
      other = other.select(sel1))
    print slope,offset,corr,N
    if plot:
      from matplotlib import pyplot as plt
      plt.plot([-1,4],[-1,4],"g-")
      plt.plot(flex.log10(val0),flex.log10(slope * val1),"r,")
      plt.show()
    return slope
Пример #2
0
    def scale(self, other):
        from cctbx import miller
        matches = miller.match_indices(self.f_model_real.indices(),
                                       other.indices())
        sel0 = flex.size_t([p[0] for p in matches.pairs()])
        sel1 = flex.size_t([p[1] for p in matches.pairs()])

        val0 = self.f_model_real.data().select(sel0)
        val1 = other.data().select(sel1)
        plot = False
        if plot:
            from matplotlib import pyplot as plt
            plt.plot([-1, 4], [-1, 4], "g-")
            plt.plot(flex.log10(val0), flex.log10(val1), "r.")
            plt.show()

        from xfel.cxi.cxi_cc import correlation
        slope, offset, corr, N = correlation(
            self=self.f_model_real.select(sel0), other=other.select(sel1))
        print slope, offset, corr, N
        if plot:
            from matplotlib import pyplot as plt
            plt.plot([-1, 4], [-1, 4], "g-")
            plt.plot(flex.log10(val0), flex.log10(slope * val1), "r,")
            plt.show()
        return slope
Пример #3
0
def i_over_sig_i_vs_i_plot(intensities, sigmas):
    """Plot unscaled I / sigma_adjusted vs unscaled I."""
    sel = (intensities > 0) & (sigmas > 0)
    intensities = intensities.select(sel)
    sigmas = sigmas.select(sel)
    x = flex.log10(intensities)
    y = intensities / sigmas

    H, xedges, yedges = np.histogram2d(x.as_numpy_array(),
                                       y.as_numpy_array(),
                                       bins=(200, 200))
    nonzeros = np.nonzero(H)
    z = np.empty(H.shape)
    z[:] = np.NAN
    z[nonzeros] = H[nonzeros]

    return {
        "i_over_sig_i_vs_i": {
            "data": [{
                "x": xedges.tolist(),
                "y": yedges.tolist(),
                "z": z.transpose().tolist(),
                "type": "heatmap",
                "name": "Isigma distribution",
                "colorbar": {
                    "title": "Number of reflections",
                    "titleside": "right",
                },
                "colorscale": "Jet",
            }],
            "layout": {
                "title": u"I/σ(I) vs I",
                "xaxis": {
                    "title": "log I"
                },
                "yaxis": {
                    "title": u"I/σ(I)"
                },
            },
            "help":
            u"""\
This plot shows the distribution of I/σ(I) as a function of I, which can
give indication of the errors within the dataset. The I/σ(I) asymptotic
limit can be seen at the plateau in the top-right of the plot, if the measured
data are strong enough.

[1] Diederichs, K. (2010). Acta Cryst. D, 66(6), 733-740.
https://doi.org/10.1107/S0907444910014836
""",
        }
    }
Пример #4
0
    def __init__(self,
                 reflections,
                 step_size=45,
                 tolerance=1.5,
                 max_height_fraction=0.25,
                 percentile=None,
                 histogram_binning='linear',
                 nn_per_bin=5):
        self.tolerance = tolerance  # Margin of error for max unit cell estimate
        from scitbx.array_family import flex
        NEAR = 10
        self.NNBIN = nn_per_bin  # target number of neighbors per histogram bin
        self.histogram_binning = histogram_binning

        direct = flex.double()

        if 'entering' in reflections:
            entering_flags = reflections['entering']
        else:
            entering_flags = flex.bool(reflections.size(), True)
        rs_vectors = reflections['rlp']
        phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180 / math.pi)

        d_spacings = flex.double()
        # nearest neighbor analysis
        from annlib_ext import AnnAdaptor
        for imageset_id in range(flex.max(reflections['imageset_id']) + 1):
            sel_imageset = reflections['imageset_id'] == imageset_id
            if sel_imageset.count(True) == 0:
                continue
            phi_min = flex.min(phi_deg.select(sel_imageset))
            phi_max = flex.max(phi_deg.select(sel_imageset))
            d_phi = phi_max - phi_min
            n_steps = max(int(math.ceil(d_phi / step_size)), 1)

            for n in range(n_steps):
                sel_step = sel_imageset & (phi_deg >=
                                           (phi_min + n * step_size)) & (
                                               phi_deg <
                                               (phi_min + (n + 1) * step_size))

                for entering in (True, False):
                    sel_entering = sel_step & (entering_flags == entering)
                    if sel_entering.count(True) == 0:
                        continue

                    query = flex.double()
                    query.extend(rs_vectors.select(sel_entering).as_double())

                    if query.size() == 0:
                        continue

                    IS_adapt = AnnAdaptor(data=query, dim=3, k=1)
                    IS_adapt.query(query)

                    direct.extend(1 / flex.sqrt(IS_adapt.distances))
                    d_spacings.extend(1 / rs_vectors.norms())

        assert len(direct) > NEAR, (
            "Too few spots (%d) for nearest neighbour analysis." % len(direct))

        perm = flex.sort_permutation(direct)
        direct = direct.select(perm)
        d_spacings = d_spacings.select(perm)

        # eliminate nonsensical direct space distances
        sel = direct > 1
        direct = direct.select(sel)
        d_spacings = d_spacings.select(sel)

        if percentile is None:
            # reject top 1% of longest distances to hopefully get rid of any outliers
            n = int(math.floor(0.99 * len(direct)))
            direct = direct[:n]
            d_spacings = d_spacings[:n]

        # determine the most probable nearest neighbor distance (direct space)
        if self.histogram_binning == 'log':
            hst = flex.histogram(flex.log10(direct),
                                 n_slots=int(len(direct) / self.NNBIN))
        else:
            hst = flex.histogram(direct, n_slots=int(len(direct) / self.NNBIN))
        centers = hst.slot_centers()
        if self.histogram_binning == 'log':
            self.slot_start = flex.double(
                [10**(s - 0.5 * hst.slot_width()) for s in hst.slot_centers()])
            self.slot_end = flex.double(
                [10**(s + 0.5 * hst.slot_width()) for s in hst.slot_centers()])
            self.slot_width = self.slot_end - self.slot_start
        else:
            self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width()
            self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width()
            self.slot_width = hst.slot_width()
        self.relative_frequency = hst.slots().as_double() / self.slot_width
        highest_bin_height = flex.max(self.relative_frequency)

        if False:  # to print out the histogramming analysis
            smin, smax = flex.min(direct), flex.max(direct)
            stats = flex.mean_and_variance(direct)
            import sys
            out = sys.stdout
            print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
            print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
                stats.mean(), stats.unweighted_sample_standard_deviation(),
                direct.size())
            hst.show(f=out, prefix="    ", format_cutoffs="%6.2f")
            print >> out, ""

        if percentile is not None:
            # determine the nth-percentile direct-space distance
            perm = flex.sort_permutation(direct, reverse=True)
            self.max_cell = self.tolerance * direct[perm[int(
                (1 - percentile) * len(direct))]]

        else:
            # choose a max cell based on bins above a given fraction of the highest bin height
            # given multiple
            isel = (self.relative_frequency.as_double() >
                    (max_height_fraction * highest_bin_height)).iselection()
            self.max_cell = (self.tolerance *
                             self.slot_end[int(flex.max(isel.as_double()))])

        self.reciprocal_lattice_vectors = rs_vectors
        self.d_spacings = d_spacings
        self.direct = direct
        self.histogram = hst
Пример #5
0
  def __init__(self, reflections, step_size=45, tolerance=1.5,
               max_height_fraction=0.25, percentile=0.05,
               histogram_binning='linear'):
    self.tolerance = tolerance # Margin of error for max unit cell estimate
    from scitbx.array_family import flex
    NEAR = 10
    self.NNBIN = 5 # target number of neighbors per histogram bin
    self.histogram_binning = histogram_binning

    direct = flex.double()

    if 'entering' in reflections:
      entering_flags = reflections['entering']
    else:
      entering_flags = flex.bool(reflections.size(), True)
    rs_vectors = reflections['rlp']
    phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180/math.pi)

    d_spacings = flex.double()
    # nearest neighbor analysis
    from annlib_ext import AnnAdaptor
    for imageset_id in range(flex.max(reflections['imageset_id'])+1):
      sel = reflections['imageset_id'] == imageset_id
      if sel.count(True) == 0:
        continue
      phi_min = flex.min(phi_deg.select(sel))
      phi_max = flex.max(phi_deg.select(sel))
      d_phi = phi_max - phi_min
      n_steps = max(int(math.ceil(d_phi / step_size)), 1)

      for n in range(n_steps):
        sel &= (phi_deg >= (phi_min+n*step_size)) & (phi_deg < (phi_min+(n+1)*step_size))

        for entering in (True, False):
          sel  &= entering_flags == entering
          if sel.count(True) == 0:
            continue

          query = flex.double()
          query.extend(rs_vectors.select(sel).as_double())

          if query.size() == 0:
            continue

          IS_adapt = AnnAdaptor(data=query,dim=3,k=1)
          IS_adapt.query(query)

          direct.extend(1/flex.sqrt(IS_adapt.distances))
          d_spacings.extend(1/rs_vectors.norms())

    assert len(direct)>NEAR, (
      "Too few spots (%d) for nearest neighbour analysis." %len(direct))

    perm = flex.sort_permutation(direct)
    direct = direct.select(perm)
    d_spacings = d_spacings.select(perm)

    # reject top 1% of longest distances to hopefully get rid of any outliers
    n = int(math.floor(0.99*len(direct)))
    direct = direct[:n]
    d_spacings = d_spacings[:n]

    # determine the most probable nearest neighbor distance (direct space)
    if self.histogram_binning == 'log':
      hst = flex.histogram(
        flex.log10(direct), n_slots=int(len(direct)/self.NNBIN))
    else:
      hst = flex.histogram(direct, n_slots=int(len(direct)/self.NNBIN))
    centers = hst.slot_centers()
    if self.histogram_binning == 'log':
      self.slot_start = flex.double(
        [10**s for s in hst.slot_centers() - 0.5 * hst.slot_width()])
      self.slot_end = flex.double(
        [10**s for s in hst.slot_centers() + 0.5 * hst.slot_width()])
      self.slot_width = self.slot_end - self.slot_start
    else:
      self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width()
      self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width()
      self.slot_width = hst.slot_width()
    self.relative_frequency = hst.slots().as_double()/self.slot_width
    highest_bin_height = flex.max(self.relative_frequency)

    if False:  # to print out the histogramming analysis
      smin, smax = flex.min(direct), flex.max(direct)
      stats = flex.mean_and_variance(direct)
      import sys
      out = sys.stdout
      print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
      print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
        stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size())
      hst.show(f=out, prefix="    ", format_cutoffs="%6.2f")
      print >> out, ""

    # choose a max cell based on bins above a given fraction of the highest bin height
    # given multiple
    isel = (self.relative_frequency.as_double() > (
      max_height_fraction * highest_bin_height)).iselection()
    self.max_cell = (
      self.tolerance * self.slot_end[int(flex.max(isel.as_double()))])

    # determine the 5th-percentile direct-space distance
    perm = flex.sort_permutation(direct, reverse=True)
    self.percentile = direct[perm[int(percentile * len(direct))]]

    self.reciprocal_lattice_vectors = rs_vectors
    self.d_spacings = d_spacings
    self.direct = direct
    self.histogram = hst