示例#1
0
 def __init__(self, *args, **kwargs):
     LBFGSsolver.__init__(self, *args, **kwargs)
     if self.IAprm_truth is not None:
         self.IAprm_truth = flex.log(self.IAprm_truth)
         self.IBprm_truth = flex.log(self.IBprm_truth)
     IAx = flex.log(self.x[:self.Nhkl])
     IBx = flex.log(self.x[self.Nhkl:2 * self.Nhkl])
     Gx = self.x[2 * self.Nhkl:]
     self.x = IAx.concatenate(IBx)
     self.x = self.x.concatenate(Gx)
示例#2
0
    def relative_entropy(self, other_pofx):
        this_one = self.base(self.x_int)
        that_one = other_pofx.base(self.x_int)
        this_one_log = flex.log(this_one + 1e-12)
        that_one_log = flex.log(that_one + 1e-12)
        this_that = this_one * (this_one_log - that_one_log)
        this_that = this_that * self.w_int
        this_that = flex.sum(this_that)

        that_this = that_one * (-this_one_log + that_one_log)
        that_this = that_this * self.w_int
        that_this = flex.sum(that_this)
        return that_this + this_that
示例#3
0
def plot_centroid_weights_histograms(reflections, n_slots=50):
    from matplotlib import pyplot
    from scitbx.array_family import flex

    variances = flex.vec3_double([r.centroid_variance for r in reflections])
    vx, vy, vz = variances.parts()
    idx = (vx > 0).__and__(vy > 0).__and__(vz > 0)
    vx = vx.select(idx)
    vy = vy.select(idx)
    vz = vz.select(idx)
    wx = 1 / vx
    wy = 1 / vy
    wz = 1 / vz
    wx = flex.log(wx)
    wy = flex.log(wy)
    wz = flex.log(wz)
    hx = flex.histogram(wx, n_slots=n_slots)
    hy = flex.histogram(wy, n_slots=n_slots)
    hz = flex.histogram(wz, n_slots=n_slots)
    fig = pyplot.figure()

    idx2 = flex.max_index(wx)
    idx3 = flex.int(range(len(reflections))).select(idx)[idx2]
    print(reflections[idx3])
    return

    # outliers = reflections.select(wx > 50)
    # for refl in outliers:
    # print refl

    for i, h in enumerate([hx, hy, hz]):
        ax = fig.add_subplot(311 + i)

        slots = h.slots().as_double()
        bins, data = hist_outline(h)
        log_scale = True
        if log_scale:
            data.set_selected(
                data == 0, 0.1
            )  # otherwise lines don't get drawn when we have some empty bins
            ax.set_yscale("log")
        ax.plot(bins, data, "-k", linewidth=2)
        # pyplot.suptitle(title)
        data_min = min(
            [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
        data_max = max(
            [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
        ax.set_xlim(data_min, data_max + h.slot_width())
    pyplot.show()
示例#4
0
def print_scaling_model_error_summary(experiments):
    """Get a summary of the error distribution of the models."""
    models = [e.scaling_model.to_dict() for e in experiments]
    first_model = models[0]
    component = first_model["configuration_parameters"]["corrections"][0]
    msg = ""
    if "est_standard_devs" in first_model[component]:
        p_sigmas = flex.double()
        for model in models:
            for component in model["configuration_parameters"]["corrections"]:
                if "est_standard_devs" in model[component]:
                    params = flex.double(model[component]["parameters"])
                    sigmas = flex.double(model[component]["est_standard_devs"])
                    null_value = flex.double(
                        len(params), model[component]["null_parameter_value"])
                    p_sigmas.extend(flex.abs(params - null_value) / sigmas)
        log_p_sigmas = flex.log(p_sigmas)
        frac_high_uncertainty = (log_p_sigmas <
                                 0.69315).count(True) / len(log_p_sigmas)
        if frac_high_uncertainty > 0.5:
            msg = (
                "Warning: Over half ({:.2f}%) of model parameters have signficant\n"
                "uncertainty (sigma/abs(parameter) > 0.5), which could indicate a\n"
                "poorly-determined scaling problem or overparameterisation.\n"
            ).format(frac_high_uncertainty * 100)
        else:
            msg = ("{:.2f}% of model parameters have signficant uncertainty\n"
                   "(sigma/abs(parameter) > 0.5)\n").format(
                       frac_high_uncertainty * 100)
    return msg
示例#5
0
def estimate_wilson_b_factor(miller_array, low_res_cutoff=4.0):

    miller_array = miller_array.resolution_filter(
        d_max=low_res_cutoff).as_intensity_array()
    # print(miller_array.data().as_numpy_array())
    # print(miller_array.data())

    # Setup binner and extract radial averages
    binner = miller_array.setup_binner(auto_binning=True)
    binned = miller_array.wilson_plot(use_binning=True)
    #
    # print(binned.data[1:-1])
    # print(type(binned.data[1:-1]))
    # print(type(binned.data[1]))
    # print(binned.data[1:-1])
    # print([type(x) for x in binned.data[1:-1]])
    # print(type(1.0))
    binned_data = [
        float(x) if type(x) == float else 1.0 for x in binned.data[1:-1]
    ]
    # print(flex.double(binned_data))

    # Convert to scale
    y_values = flex.log(flex.double(binned_data))
    x_values = flex.pow2(binner.bin_centers(1))
    # Check all values are valid
    # mask = flex.bool((True - numpy.isnan(list(y_values)) - numpy.isnan(list(x_values))).tolist())
    mask = flex.bool((True ^ numpy.isnan(list(y_values))
                      ^ numpy.isnan(list(x_values))).tolist())

    # Perform scaling
    scl = LinearScaling(x_values=x_values.select(mask),
                        ref_values=y_values.select(mask))

    return -0.5 * scl.optimised_values[1]
示例#6
0
    def __call__(self, sigma_m):
        '''Calculate the fraction of observed intensity for each observation.

    Params:
        sigma_m The mosaicity

    Returns:
        A list of log intensity fractions

    '''
        from math import sqrt
        from scitbx.array_family import flex
        import scitbx.math

        # Tiny value
        TINY = 1e-10
        assert (sigma_m > TINY)

        # Calculate the two components to the fraction
        a = scitbx.math.erf(self.e1 / sigma_m)
        b = scitbx.math.erf(self.e2 / sigma_m)

        # Calculate the fraction of observed reflection intensity
        R = (a - b) / 2.0

        # Set any points <= 0 to 1e-10 (otherwise will get a floating
        # point error in log calculation below).
        assert (R.all_ge(0))
        mask = R < TINY
        assert (mask.count(True) < len(mask))
        R.set_selected(mask, TINY)

        # Return the logarithm of r
        return flex.log(R)
示例#7
0
 def compute_functional_and_gradients(self):
     self.a = self.x
     f = 0.
     g = flex.double(self.n)
     vector_T = flex.double(
         len(self.SP), self.x[0]
     ) + self.SP * self.x[1] + self.FP * self.x[2] + 0.5 * (
         self.SS * self.x[3] + self.SF * self.x[4] + self.FF * self.x[5])
     vector_lambda = vector_T / self.gain
     if (vector_lambda <= 0).count(True) > 0:
         raise RuntimeError("raising exception to avoid log(value<=0)")
     f = flex.sum(vector_lambda - (self.KI * flex.log(vector_lambda)))
     inner_paren = flex.double(len(self.SP), 1.) - (self.KI / vector_lambda)
     g_list = [
         flex.sum(deriv * inner_paren) for deriv in [
             flex.double(len(self.SP), 1.), self.SP, self.FP, self.SS,
             self.SF, self.FF
         ]
     ]
     #self.print_step("LBFGS stp",f)
     g_list[3] = 0.
     g_list[4] = 0.
     g_list[5] = 0.  # turn off the 2nd-order Taylor term
     g = flex.double(g_list) / self.gain
     return f, g
示例#8
0
 def compute_rg_from_data(self,q,i):
   q_sq = q*q
   ln_i = flex.log( i )
   cc_obj = flex.linear_regression( q_sq, ln_i )
   rg2 = -cc_obj.slope()*3.0
   lni = cc_obj.y_intercept()
   return rg2, lni
示例#9
0
 def score_by_rmsd_xy(self, reverse=False):
     # smaller rmsds = better
     rmsd_x, rmsd_y, rmsd_z = flex.vec3_double(
         s.rmsds for s in self.all_solutions).parts()
     rmsd_xy = flex.sqrt(flex.pow2(rmsd_x) + flex.pow2(rmsd_y))
     score = flex.log(rmsd_xy) / math.log(2)
     return self.rmsd_weight * (score - flex.min(score))
示例#10
0
def Hn(m):
  m_ = m
  sc = math.log(m_.size())
  s = m_>0
  m_ = m_.select(s.iselection())
  m_ = m_/flex.sum(m_)
  return -flex.sum(m_*flex.log(m_))/sc
示例#11
0
文件: calculator.py 项目: dials/dials
  def __call__(self, sigma_m):
    '''Calculate the fraction of observed intensity for each observation.

    Params:
        sigma_m The mosaicity

    Returns:
        A list of log intensity fractions

    '''
    from math import sqrt
    from scitbx.array_family import flex
    import scitbx.math

    # Tiny value
    TINY = 1e-10
    assert(sigma_m > TINY)

    # Calculate the two components to the fraction
    a = scitbx.math.erf(self.e1 / sigma_m)
    b = scitbx.math.erf(self.e2 / sigma_m)

    # Calculate the fraction of observed reflection intensity
    R = (a - b) / 2.0

    # Set any points <= 0 to 1e-10 (otherwise will get a floating
    # point error in log calculation below).
    assert(R.all_ge(0))
    mask = R < TINY
    assert(mask.count(True) < len(mask))
    R.set_selected(mask, TINY)

    # Return the logarithm of r
    return flex.log(R)
def another_example(np=41,nt=5):
  x = flex.double( range(np) )/(np-1)
  y = 0.99*flex.exp(-x*x*0.5)
  y = -flex.log(1.0/y-1)
  w = y*y/1.0
  d = (flex.random_double(np)-0.5)*w
  y_obs = y+d

  y = 1.0/( 1.0 + flex.exp(-y) )

  fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt,
                                              x,
                                              y_obs,
                                              w )
  fit_w_f = chebyshev_polynome(
    nt, fit_w.low_limit, fit_w.high_limit, fit_w.coefs)


  fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt,
                                              x,
                                              y_obs)
  fit_nw_f = chebyshev_polynome(
    nt, fit_nw.low_limit, fit_nw.high_limit, fit_nw.coefs)
  print
  print "Coefficients from weighted lsq"
  print list( fit_w.coefs )
  print "Coefficients from non-weighted lsq"
  print list( fit_nw.coefs )
  assert flex.max( flex.abs(fit_nw.coefs-fit_w.coefs) ) > 0
示例#13
0
        def target(self, log_sigma):
            ''' The target for minimization. '''
            from math import sqrt, exp, pi, log
            from scitbx.array_family import flex
            import scitbx.math

            sigma_m = exp(log_sigma[0])

            # Tiny value
            TINY = 1e-10
            assert (sigma_m > TINY)

            # Calculate the two components to the fraction
            a = scitbx.math.erf(self.e1 / sigma_m)
            b = scitbx.math.erf(self.e2 / sigma_m)
            n = self.n
            K = self.K

            # Calculate the fraction of observed reflection intensity
            zi = (a - b) / 2.0

            # Set any points <= 0 to 1e-10 (otherwise will get a floating
            # point error in log calculation below).
            assert (zi.all_ge(0))
            mask = zi < TINY
            assert (mask.count(True) < len(mask))
            zi.set_selected(mask, TINY)

            # Compute the likelihood
            #
            # The likelihood here is a result of the sum of two log likelihood
            # functions:
            #
            # The first is the same as the one in Kabsch2010 as applied to the
            # reflection as a whole. This results in the term log(Z)
            #
            # The second is the likelihood for each reflection modelling as a Poisson
            # distribtution with shape given by sigma M. This gives sum(ci log(zi)) -
            # sum(ci)*log(sum(zi))
            #
            # If the reflection is recorded on 1 frame, the second component is zero
            # and so the likelihood is dominated by the first term which can be seen
            # as a prior for sigma, which accounts for which reflections were actually
            # recorded.
            #
            L = 0
            for j, (i0,
                    i1) in enumerate(zip(self.indices[:-1], self.indices[1:])):
                selection = flex.size_t(range(i0, i1))
                zj = zi.select(selection)
                nj = n.select(selection)
                kj = K[j]
                Z = flex.sum(zj)
                #L += flex.sum(nj * flex.log(zj)) - kj * Z
                #L += flex.sum(nj * flex.log(zj)) - kj * log(Z)
                L += flex.sum(nj * flex.log(zj)) - kj * log(Z) + log(Z)
            logger.debug("Sigma M: %f, log(L): %f" % (sigma_m * 180 / pi, L))

            # Return the logarithm of r
            return -L
示例#14
0
def plot_centroid_weights_histograms(reflections, n_slots=50):
  from matplotlib import pyplot
  from scitbx.array_family import flex
  variances = flex.vec3_double([r.centroid_variance for r in reflections])
  vx, vy, vz = variances.parts()
  idx = (vx > 0).__and__(vy > 0).__and__(vz > 0)
  vx = vx.select(idx)
  vy = vy.select(idx)
  vz = vz.select(idx)
  wx = 1/vx
  wy = 1/vy
  wz = 1/vz
  wx = flex.log(wx)
  wy = flex.log(wy)
  wz = flex.log(wz)
  hx = flex.histogram(wx, n_slots=n_slots)
  hy = flex.histogram(wy, n_slots=n_slots)
  hz = flex.histogram(wz, n_slots=n_slots)
  fig = pyplot.figure()

  idx2 = flex.max_index(wx)
  idx3 = flex.int(range(len(reflections))).select(idx)[idx2]
  print reflections[idx3]
  return

  #outliers = reflections.select(wx > 50)
  #for refl in outliers:
    #print refl

  for i, h in enumerate([hx, hy, hz]):
    ax = fig.add_subplot(311+i)

    slots = h.slots().as_double()
    bins, data = hist_outline(h)
    log_scale = True
    if log_scale:
      data.set_selected(data == 0, 0.1) # otherwise lines don't get drawn when we have some empty bins
      ax.set_yscale("log")
    ax.plot(bins, data, '-k', linewidth=2)
    #pyplot.suptitle(title)
    data_min = min([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
    data_max = max([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
    ax.set_xlim(data_min, data_max+h.slot_width())
  pyplot.show()
示例#15
0
    def __init__(self, use_curvatures=True, *args, **kwargs):
        LBFGSsolver.__init__(self, *args, **kwargs)
        if self.IAprm_truth is not None:
            self.IAprm_truth = flex.log(self.IAprm_truth)
            self.IBprm_truth = flex.log(self.IBprm_truth)
            #self.Gprm_truth = flex.log(self.Gprm_truth)

        IAx = flex.log(self.x[:self.Nhkl])
        IBx = flex.log(self.x[self.Nhkl:2 * self.Nhkl])
        Gx = self.x[2 * self.Nhkl:]
        #Gx = flex.log(self.x[2*self.Nhkl:])
        self.x = IAx.concatenate(IBx)
        self.x = self.x.concatenate(Gx)

        if use_curvatures:
            self.minimizer = lbfgs_with_curvatures_mix_in.__init__(
                self,
                min_iterations=0,
                max_iterations=None,
                use_curvatures=True)
示例#16
0
 def get_z_scores(self, scale, b_value):
   i_scaled = flex.exp( self.calc_d_star_sq*b_value )*self.mean_calc*scale
   sel = ((self.mean_obs > 0) & (i_scaled > 0)) .iselection()
   ratio  = self.mean_obs.select(sel) / i_scaled.select(sel)
   mean = self.curve( self.calc_d_star_sq ).select(sel)
   assert ratio.all_gt(0) # FIXME need to filter first!
   ratio = flex.log(ratio)
   var = self.std(self.calc_d_star_sq).select(sel)
   d_star_sq = self.calc_d_star_sq.select(sel)
   assert var.all_ne(0)
   z = flex.abs(ratio-mean)/var
   z_ = flex.double(self.mean_obs.size(), -1)
   z_.set_selected(sel, z)
   return z_
示例#17
0
 def get_z_scores(self, scale, b_value):
     i_scaled = flex.exp(
         self.calc_d_star_sq * b_value) * self.mean_calc * scale
     sel = ((self.mean_obs > 0) & (i_scaled > 0)).iselection()
     ratio = self.mean_obs.select(sel) / i_scaled.select(sel)
     mean = self.curve(self.calc_d_star_sq).select(sel)
     assert ratio.all_gt(0)  # FIXME need to filter first!
     ratio = flex.log(ratio)
     var = self.std(self.calc_d_star_sq).select(sel)
     d_star_sq = self.calc_d_star_sq.select(sel)
     assert var.all_ne(0)
     z = flex.abs(ratio - mean) / var
     z_ = flex.double(self.mean_obs.size(), -1)
     z_.set_selected(sel, z)
     return z_
示例#18
0
 def compute_functional_and_gradients(self):
   self.a = self.x
   f = 0.;
   g = flex.double(self.n)
   vector_T = flex.double(len(self.SP),self.x[0]) + self.SP*self.x[1] + self.FP*self.x[2] + 0.5*(
          self.SS*self.x[3] + self.SF*self.x[4] + self.FF*self.x[5])
   vector_lambda = vector_T/self.gain
   f = flex.sum(vector_lambda - (self.KI * flex.log(vector_lambda)))
   inner_paren = flex.double(len(self.SP),1.) - (self.KI/vector_lambda)
   g_list = [flex.sum( deriv * inner_paren ) for deriv in
              [flex.double(len(self.SP),1.), self.SP, self.FP, self.SS, self.SF, self.FF]]
   #self.print_step("LBFGS stp",f)
   g_list[3]=0.; g_list[4]=0.; g_list[5]=0. # turn off the 2nd-order Taylor term
   g = flex.double(g_list)/self.gain
   return f,g
示例#19
0
 def summary(self):
     i_scaled = flex.exp( self.calc_d_star_sq*self.b_value ) * \
                 self.mean_calc * self.scale
     sel = (self.mean_obs > 0).iselection()
     ratio = flex.log(i_scaled.select(sel) / self.mean_obs.select(sel))
     ratio_ = flex.double(self.mean_obs.size(), 0)
     ratio_.set_selected(sel, ratio)
     curves = [
         self.calc_d_star_sq,
         -ratio_,  # observed
         self.curve(self.calc_d_star_sq),  # expected
         self.get_z_scores(self.scale, self.b_value)
     ]
     return summary(all_curves=curves,
                    level=self.level,
                    all_bad_z_scores=self.all_bad_z_scores)
示例#20
0
 def summary (self) :
   i_scaled = flex.exp( self.calc_d_star_sq*self.b_value ) * \
               self.mean_calc * self.scale
   sel = (self.mean_obs > 0).iselection()
   ratio  = flex.log(i_scaled.select(sel) / self.mean_obs.select(sel))
   ratio_ = flex.double(self.mean_obs.size(), 0)
   ratio_.set_selected(sel, ratio)
   curves = [
     self.calc_d_star_sq,
     -ratio_, # observed
     self.curve( self.calc_d_star_sq ), # expected
     self.get_z_scores(self.scale, self.b_value)
   ]
   return summary(
     all_curves=curves,
     level=self.level,
     all_bad_z_scores=self.all_bad_z_scores)
  def __call__(self, sigma_m):
    '''Calculate the fraction of observed intensity for each observation.

    Params:
        sigma_m The mosaicity

    Returns:
        A list of fractions of length n

    '''
    from math import sqrt, erf
    from scitbx.array_family import flex
    import numpy

    # Tiny value
    TINY = 1e-10

    # Ensure value for sigma_m is valid
    if sigma_m < TINY:
      raise ValueError('sigma_m must be > 0')

    # Oscillation range / 2
    dphi2 = self.dphi / 2

    # Calculate the denominator to the fraction
    den =  sqrt(2) * sigma_m / flex.abs(self.zeta)

    # Calculate the two components to the fraction
    a = flex.double([erf(x) for x in (self.tau + dphi2) / den])
    b = flex.double([erf(x) for x in (self.tau - dphi2) / den])

    # Calculate the fraction of observed reflection intensity
    R = (a - b) / 2.0

    # Set any points <= 0 to 1e-10 (otherwise will get a floating
    # point error in log calculation below).
    bad_index = numpy.where(R.as_numpy_array() < TINY)[0]
    for i in bad_index:
      R[int(i)] = TINY

    # Return the logarithm of r
    return flex.log(R)
def another_example(np=41, nt=5):
    x = flex.double(range(np)) / (np - 1)
    y = 0.99 * flex.exp(-x * x * 0.5)
    y = -flex.log(1.0 / y - 1)
    w = y * y / 1.0
    d = (flex.random_double(np) - 0.5) * w
    y_obs = y + d

    y = 1.0 / (1.0 + flex.exp(-y))

    fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs, w)
    fit_w_f = chebyshev_polynome(nt, fit_w.low_limit, fit_w.high_limit,
                                 fit_w.coefs)

    fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs)
    fit_nw_f = chebyshev_polynome(nt, fit_nw.low_limit, fit_nw.high_limit,
                                  fit_nw.coefs)
    print
    print "Coefficients from weighted lsq"
    print list(fit_w.coefs)
    print "Coefficients from non-weighted lsq"
    print list(fit_nw.coefs)
    assert flex.max(flex.abs(fit_nw.coefs - fit_w.coefs)) > 0
示例#23
0
def pseudo_normalized_abs_delta_i(N=100):
  x = flex.random_double(size=N)
  x = -0.5*flex.log( 1.0-x )
  return(x)
示例#24
0
def normal_variate(mu=0.0,sigma=1.0,N=100):
  "Normal variate via Box-Muller transform"
  U1 = flex.random_double(size=N)
  U2 = flex.random_double(size=N)
  return flex.sqrt(-2.0*flex.log(U1))*flex.cos(2.0*math.pi*U2)*sigma+mu
def normal_variate(mu=0.0, sigma=1.0, N=100):
    "Normal variate via Box-Muller transform"
    U1 = flex.random_double(size=N)
    U2 = flex.random_double(size=N)
    return flex.sqrt(-2.0 * flex.log(U1)) * flex.cos(
        2.0 * math.pi * U2) * sigma + mu
def pseudo_normalized_abs_delta_i(N=100):
    x = flex.random_double(size=N)
    x = -0.5 * flex.log(1.0 - x)
    return (x)
示例#27
0
def run_cc(params, reindexing_op, output):
    uniform, selected_uniform, have_iso_ref = load_cc_data(
        params, reindexing_op, output)
    NBIN = params.output.n_bins

    if have_iso_ref:
        slope, offset, corr_iso, N_iso = correlation(selected_uniform[1],
                                                     selected_uniform[0],
                                                     params.include_negatives)
        print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso,
                                                               N_iso)

    slope, offset, corr_int, N_int = correlation(selected_uniform[2],
                                                 selected_uniform[3],
                                                 params.include_negatives)
    print >> output, "C.C. int is %.1f%% on %d indices" % (100. * corr_int,
                                                           N_int)

    if have_iso_ref:
        binned_cc_ref, binned_cc_ref_N = binned_correlation(
            selected_uniform[1], selected_uniform[0], params.include_negatives)
        #binned_cc_ref.show(f=output)

        ref_scale = scale_factor(selected_uniform[1],
                                 selected_uniform[0],
                                 weights=flex.pow(selected_uniform[1].sigmas(),
                                                  -2),
                                 use_binning=True)
        #ref_scale.show(f=output)

        ref_riso = r1_factor(selected_uniform[1],
                             selected_uniform[0],
                             scale_factor=ref_scale,
                             use_binning=True)
        #ref_riso.show(f=output)

        ref_scale_all = scale_factor(selected_uniform[1],
                                     selected_uniform[0],
                                     weights=flex.pow(
                                         selected_uniform[1].sigmas(), -2))

        ref_riso_all = r1_factor(selected_uniform[1],
                                 selected_uniform[0],
                                 scale_factor=ref_scale_all)

    binned_cc_int, binned_cc_int_N = binned_correlation(
        selected_uniform[2], selected_uniform[3], params.include_negatives)
    #binned_cc_int.show(f=output)

    oe_scale = scale_factor(
        selected_uniform[2],
        selected_uniform[3],
        weights=flex.pow(selected_uniform[2].sigmas(), -2) +
        flex.pow(selected_uniform[3].sigmas(), -2),
        use_binning=True)
    #oe_scale.show(f=output)

    oe_rint = r1_factor(selected_uniform[2],
                        selected_uniform[3],
                        scale_factor=oe_scale,
                        use_binning=True)
    #oe_rint.show(f=output)

    oe_rsplit = r_split(selected_uniform[2],
                        selected_uniform[3],
                        use_binning=True)

    oe_scale_all = scale_factor(
        selected_uniform[2],
        selected_uniform[3],
        weights=flex.pow(selected_uniform[2].sigmas(), -2) +
        flex.pow(selected_uniform[3].sigmas(), -2),
    )

    oe_rint_all = r1_factor(selected_uniform[2],
                            selected_uniform[3],
                            scale_factor=oe_scale_all)
    oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3])
    if have_iso_ref:
        print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % (
            100. * ref_riso_all, 100. * oe_rint_all)
    else:
        print >> output, "R factor Rint = %.1f%%" % (100. * oe_rint_all)

    split_sigma_data = split_sigma_test(selected_uniform[2],
                                        selected_uniform[3],
                                        scale=oe_scale,
                                        use_binning=True,
                                        show_plot=False)
    split_sigma_data_all = split_sigma_test(selected_uniform[2],
                                            selected_uniform[3],
                                            scale=oe_scale_all,
                                            use_binning=False,
                                            show_plot=False)

    print >> output
    if reindexing_op == "h,k,l":
        print >> output, "Table of Scaling Results:"
    else:
        print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op

    from libtbx import table_utils
    table_header = [
        "", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale",
        "SpSig"
    ]
    table_header2 = [
        "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso",
        "int", "split", "iso", "int", "iso", "Test"
    ]
    table_data = []
    table_data.append(table_header)
    table_data.append(table_header2)

    items = binned_cc_int.binner.range_used()

    # XXX Make it clear what the completeness here actually is!
    cumulative_counts_given = 0
    cumulative_counts_complete = 0
    for bin in items:
        table_row = []
        table_row.append("%3d" % bin)
        table_row.append("%-13s" %
                         binned_cc_int.binner.bin_legend(i_bin=bin,
                                                         show_bin_number=False,
                                                         show_bin_range=False,
                                                         show_d_range=True,
                                                         show_counts=False))
        table_row.append("%13s" %
                         binned_cc_int.binner.bin_legend(i_bin=bin,
                                                         show_bin_number=False,
                                                         show_bin_range=False,
                                                         show_d_range=False,
                                                         show_counts=True))
        cumulative_counts_given += binned_cc_int.binner._counts_given[bin]
        cumulative_counts_complete += binned_cc_int.binner._counts_complete[
            bin]
        table_row.append("%.1f%%" % (100. * binned_cc_int.data[bin]))
        table_row.append("%7d" % (binned_cc_int_N.data[bin]))

        if have_iso_ref and binned_cc_ref.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin]))
        else:
            table_row.append("--")

        if have_iso_ref and binned_cc_ref_N.data[bin] is not None:
            table_row.append("%6d" % (binned_cc_ref_N.data[bin]))
        else:
            table_row.append("--")

        if oe_rint.data[bin] is not None:
            table_row.append("%.1f%%" % (100. * oe_rint.data[bin]))
        else:
            table_row.append("--")

        if oe_rsplit.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin]))
        else:
            table_row.append("--")

        if have_iso_ref and ref_riso.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * ref_riso.data[bin]))
        else:
            table_row.append("--")

        if oe_scale.data[bin] is not None:
            table_row.append("%.3f" % oe_scale.data[bin])
        else:
            table_row.append("--")

        if have_iso_ref and ref_scale.data[bin] is not None:
            table_row.append("%.3f" % ref_scale.data[bin])
        else:
            table_row.append("--")

        if split_sigma_data.data[bin] is not None:
            table_row.append("%.4f" % split_sigma_data.data[bin])
        else:
            table_row.append("--")

        table_data.append(table_row)
    table_data.append([""] * len(table_header))

    table_row = [
        format_value("%3s", "All"),
        format_value("%-13s", "                 "),
        format_value(
            "%13s",
            "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)),
        format_value("%.1f%%", 100 * corr_int),
        format_value("%7d", N_int)
    ]

    if have_iso_ref:
        table_row.extend(
            (format_value("%.1f%%",
                          100 * corr_iso), format_value("%6d", N_iso)))
    else:
        table_row.extend(("--", "--"))

    table_row.extend((format_value("%.1f%%", 100 * oe_rint_all),
                      format_value("%.1f%%", 100 * oe_rsplit_all)))
    if have_iso_ref:
        table_row.append(format_value("%.1f%%", 100 * ref_riso_all))
    else:
        table_row.append("--")

    table_row.append(format_value("%.3f", oe_scale_all))
    if have_iso_ref:
        table_row.append(format_value("%.3f", ref_scale_all))
    else:
        table_row.append("--")

    if split_sigma_data_all is not None:
        table_row.append("%.1f" % split_sigma_data_all)
    else:
        table_row.append("--")

    table_data.append(table_row)

    print >> output
    print >> output, table_utils.format(table_data,
                                        has_header=2,
                                        justify='center',
                                        delim=" ")
    print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images.
  Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images.
  "iso" columns compare the whole XFEL dataset to the isomorphous reference."""

    print >> output, """Niso: result vs. reference common set""",
    if params.include_negatives:
        print >> output, """including negative merged intensities (set by phil parameter)."""
    elif params.scaling.log_cutoff is None:
        print >> output
    else:
        print >> output, """with intensites < %7.2g filtered out (controlled by
    scaling.log_cutoff phil parameter set to %5.1f)""" % (math.exp(
            params.scaling.log_cutoff), params.scaling.log_cutoff)

    if have_iso_ref:
        assert N_iso == flex.sum(
            flex.double([x for x in binned_cc_ref_N.data if x is not None]))
    assert N_int == flex.sum(
        flex.double([x for x in binned_cc_int_N.data if x is not None]))

    if params.scaling.show_plots:
        from matplotlib import pyplot as plt
        plt.plot(flex.log(selected_uniform[-2].data()),
                 flex.log(selected_uniform[-1].data()), 'r.')
        plt.show()
        if have_iso_ref:
            plt.plot(flex.log(selected_uniform[0].data()),
                     flex.log(selected_uniform[1].data()), 'r.')
            plt.show()
    print >> output
示例#28
0
 def score_by_volume(self, reverse=False):
     # smaller volume = better
     volumes = flex.double(s.crystal.get_unit_cell().volume()
                           for s in self.all_solutions)
     score = flex.log(volumes) / math.log(2)
     return self.volume_weight * (score - flex.min(score))
示例#29
0
def exercise_gaussian_fit():

  # test fitting of a gaussian
  def do_gaussian_fit(scale, mu, sigma):
    start = mu - 6 * sigma
    stop = mu + 6 * sigma
    step = (stop - start)/1000
    x = flex.double(frange(start, stop, step))
    y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
    fit = curve_fitting.single_gaussian_fit(x, y)
    assert approx_equal(fit.a, scale, 1e-4)
    assert approx_equal(fit.b, mu, eps=1e-4)
    assert approx_equal(fit.c, sigma, eps=1e-4)

  for i in range(10):
    scale = random.random() * 1000
    sigma = (random.random() + 0.0001) * 10
    mu = (-1)**random.randint(0,1) * random.random() * 1000
    functor = curve_fitting.gaussian(scale, mu, sigma)
    start = mu - 6 * sigma
    stop = mu + 6 * sigma
    step = (stop - start)/1000
    x = flex.double(frange(start, stop, step))
    fd_grads = finite_differences(functor, x)
    assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4)
    do_gaussian_fit(scale, mu, sigma)

  # if we take the log of a gaussian we can fit a parabola
  scale = 123
  mu = 3.2
  sigma = 0.1
  x = flex.double(frange(2, 4, 0.01))
  y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
  # need to be careful to only use values of y > 0
  eps = 1e-15
  x = flex.double([x[i] for i in range(x.size()) if y[i] > eps])
  y = flex.double([y[i] for i in range(y.size()) if y[i] > eps])
  fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2)
  c, b, a = fit.params
  assert approx_equal(mu, -b/(2*a))
  assert approx_equal(sigma*sigma, -1/(2*a))

  # test multiple gaussian fits
  gaussians = [curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268),
               curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)]
  x = flex.double(frange(0, 10, 0.1))
  y = flex.double(x.size())
  for i in range(len(gaussians)):
    g = gaussians[i]
    scale, mu, sigma = g.a, g.b, g.c
    y += g(x)

  starting_gaussians = [
    curve_fitting.gaussian(1, 4, 1),
    curve_fitting.gaussian(1, 5, 1)]
  fit = curve_fitting.gaussian_fit(x, y, starting_gaussians)
  for g1, g2 in zip(gaussians, fit.gaussians):
    assert approx_equal(g1.a, g2.a, eps=1e-4)
    assert approx_equal(g1.b, g2.b, eps=1e-4)
    assert approx_equal(g1.c, g2.c, eps=1e-4)

  # use example of 5-gaussian fit from here:
  # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm
  gaussians = [curve_fitting.gaussian(0.10516252, 23.32727, 2.436638),
               curve_fitting.gaussian(0.46462715, 33.09053, 2.997594),
               curve_fitting.gaussian(0.29827916, 41.27244, 4.274585),
               curve_fitting.gaussian(0.08986616, 51.24468, 5.077521),
               curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)]

  x = flex.double(frange(0, 80, 0.1))
  y = flex.double(x.size())
  for i in range(len(gaussians)):
    g = gaussians[i]
    scale, mu, sigma = g.a, g.b, g.c
    y += g(x)

  termination_params = scitbx.lbfgs.termination_parameters(
    min_iterations=500)
  starting_gaussians = [curve_fitting.gaussian(1, 21, 2.1),
                        curve_fitting.gaussian(1, 30, 2.8),
                        curve_fitting.gaussian(1, 40, 2.2),
                        curve_fitting.gaussian(1, 51, 1.2),
                        curve_fitting.gaussian(1, 60, 2.3)]
  fit = curve_fitting.gaussian_fit(
    x, y, starting_gaussians, termination_params=termination_params)
  y_calc = fit.compute_y_calc()
  assert approx_equal(y, y_calc, eps=1e-2)

  have_cma_es = libtbx.env.has_module("cma_es")
  if have_cma_es:
    fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y)
    y_calc = fit.compute_y_calc()
    assert approx_equal(y, y_calc, eps=5e-2)
示例#30
0
def Hw(m):
  s = m>0
  m_ = m
  m_ = m_.select(s.iselection())
  return -flex.sum(m_*flex.log(m_))
示例#31
0
 def score_by_fraction_indexed(self, reverse=False):
     # more indexed reflections = better
     fraction_indexed = flex.double(s.fraction_indexed
                                    for s in self.all_solutions)
     score = flex.log(fraction_indexed) / math.log(2)
     return self.n_indexed_weight * (-score + flex.max(score))
示例#32
0
    def calculate_scaling(self,
                          miller_array,
                          convergence_crit_perc=0.01,
                          convergence_reject_perc=97.5,
                          max_iter=20):
        """Calculate the scaling between two arrays"""

        assert convergence_reject_perc > 90.0

        # Convert to intensities and extract d_star_sq
        new_miller = miller_array.as_intensity_array()
        new_kernel = self._kernel_normalisation(miller_array=new_miller)
        # Calculate new range of d_star_sq
        d_star_sq_min, d_star_sq_max = self._common_d_star_sq_range(
            d_star_sq=new_kernel.d_star_sq_array)

        # Create interpolator for the two arrays (new and reference)
        interpolator = scale_curves.curve_interpolator(d_star_sq_min,
                                                       d_star_sq_max,
                                                       self._npoints)

        # Interpolate the two curves (use full range of the two array)
        new_itpl_d_star_sq, new_itpl_mean_I, dummy, dummy = interpolator.interpolate(
            x_array=new_kernel.d_star_sq_array,
            y_array=new_kernel.mean_I_array)
        ref_itpl_d_star_sq, ref_itpl_mean_I, dummy, dummy = interpolator.interpolate(
            x_array=self.ref_kernel.d_star_sq_array,
            y_array=self.ref_kernel.mean_I_array)

        # Initalise convergence loop - begin by scaling over all points
        selection = flex.bool(self._npoints, True)
        # Set initial scale factor to small value
        curr_b = 1e-6
        # Percent change between iterations - convergence when delta <convergence_criterion
        n_iter = 0
        # Report in case of error
        report = Report('Scaling log:', verbose=False)
        while n_iter < max_iter:
            report('---')
            report('ITER: ' + str(n_iter))

            if selection.all_eq(False):
                print("Selection now empty, breaking")
                break

            # Run optimisation on the linear scaling
            lsc = ExponentialScaling(x_values=interpolator.target_x,
                                     ref_values=ref_itpl_mean_I,
                                     scl_values=new_itpl_mean_I,
                                     weights=selection.as_double())
            # Calculate scaling B-factor
            lsc.scaling_b_factor = -0.5 * list(lsc.optimised_values)[0]
            # Break if fitted to 0
            if approx_equal_relatively(0.0, lsc.scaling_b_factor, 1e-6):
                report('Scaling is approximately 0.0 - stopping')
                break
            # Calculate percentage change
            report('Curr/New: ' + str(curr_b) + '\t' +
                   str(lsc.scaling_b_factor))
            delta = abs((curr_b - lsc.scaling_b_factor) / curr_b)
            report('Delta: ' + str(delta))
            if delta < convergence_crit_perc:
                report('Scaling has converged to within tolerance - stopping')
                break
            # Update selection
            report('Curr Selection Size: ' + str(sum(selection)))
            ref_diffs = flex.log(lsc.ref_values) - flex.log(lsc.out_values)
            #abs_diffs = flex.abs(ref_diffs)
            sel_diffs = ref_diffs.select(selection)
            rej_val_high = numpy.percentile(sel_diffs, convergence_reject_perc)
            rej_val_low = numpy.percentile(sel_diffs,
                                           100.0 - convergence_reject_perc)
            report('Percentile: ' + str(convergence_reject_perc) + '\t<' +
                   str(rej_val_low) + '\t>' + str(rej_val_high))
            selection.set_selected(ref_diffs > rej_val_high, False)
            selection.set_selected(ref_diffs < rej_val_low, False)

            report('New Selection Size: ' + str(sum(selection)))
            # Update loop params
            curr_b = lsc.scaling_b_factor
            n_iter += 1

        lsc.unscaled_ln_rmsd = (flex.log(lsc.ref_values) - flex.log(
            lsc.scl_values)).norm() / (lsc.ref_values.size()**0.5)
        lsc.scaled_ln_rmsd = (flex.log(lsc.ref_values) - flex.log(
            lsc.out_values)).norm() / (lsc.ref_values.size()**0.5)

        lsc.unscaled_ln_dev = flex.sum(
            flex.abs(flex.log(lsc.ref_values) - flex.log(lsc.scl_values)))
        lsc.scaled_ln_dev = flex.sum(
            flex.abs(flex.log(lsc.ref_values) - flex.log(lsc.out_values)))

        return lsc
示例#33
0
def run_cc(params, reindexing_op, output):
    uniform, selected_uniform, have_iso_ref = load_cc_data(params, reindexing_op, output)
    NBIN = params.output.n_bins

    if have_iso_ref:
        slope, offset, corr_iso, N_iso = correlation(selected_uniform[1], selected_uniform[0], params.include_negatives)
        print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso)

    slope, offset, corr_int, N_int = correlation(selected_uniform[2], selected_uniform[3], params.include_negatives)
    print >> output, "C.C. int is %.1f%% on %d indices" % (100.0 * corr_int, N_int)

    if have_iso_ref:
        binned_cc_ref, binned_cc_ref_N = binned_correlation(
            selected_uniform[1], selected_uniform[0], params.include_negatives
        )
        # binned_cc_ref.show(f=output)

        ref_scale = scale_factor(
            selected_uniform[1],
            selected_uniform[0],
            weights=flex.pow(selected_uniform[1].sigmas(), -2),
            use_binning=True,
        )
        # ref_scale.show(f=output)

        ref_riso = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True)
        # ref_riso.show(f=output)

        ref_scale_all = scale_factor(
            selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2)
        )

        ref_riso_all = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all)

    binned_cc_int, binned_cc_int_N = binned_correlation(
        selected_uniform[2], selected_uniform[3], params.include_negatives
    )
    # binned_cc_int.show(f=output)

    oe_scale = scale_factor(
        selected_uniform[2],
        selected_uniform[3],
        weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2),
        use_binning=True,
    )
    # oe_scale.show(f=output)

    oe_rint = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True)
    # oe_rint.show(f=output)

    oe_rsplit = r_split(selected_uniform[2], selected_uniform[3], use_binning=True)

    oe_scale_all = scale_factor(
        selected_uniform[2],
        selected_uniform[3],
        weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2),
    )

    oe_rint_all = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all)
    oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3])
    if have_iso_ref:
        print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % (100.0 * ref_riso_all, 100.0 * oe_rint_all)
    else:
        print >> output, "R factor Rint = %.1f%%" % (100.0 * oe_rint_all)

    split_sigma_data = split_sigma_test(
        selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False
    )
    split_sigma_data_all = split_sigma_test(
        selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False
    )

    print >> output
    if reindexing_op == "h,k,l":
        print >> output, "Table of Scaling Results:"
    else:
        print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op

    from libtbx import table_utils

    table_header = ["", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig"]
    table_header2 = [
        "Bin",
        "Resolution Range",
        "Completeness",
        "int",
        "int",
        "iso",
        "iso",
        "int",
        "split",
        "iso",
        "int",
        "iso",
        "Test",
    ]
    table_data = []
    table_data.append(table_header)
    table_data.append(table_header2)

    items = binned_cc_int.binner.range_used()

    # XXX Make it clear what the completeness here actually is!
    cumulative_counts_given = 0
    cumulative_counts_complete = 0
    for bin in items:
        table_row = []
        table_row.append("%3d" % bin)
        table_row.append(
            "%-13s"
            % binned_cc_int.binner.bin_legend(
                i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False
            )
        )
        table_row.append(
            "%13s"
            % binned_cc_int.binner.bin_legend(
                i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True
            )
        )
        cumulative_counts_given += binned_cc_int.binner._counts_given[bin]
        cumulative_counts_complete += binned_cc_int.binner._counts_complete[bin]
        table_row.append("%.1f%%" % (100.0 * binned_cc_int.data[bin]))
        table_row.append("%7d" % (binned_cc_int_N.data[bin]))

        if have_iso_ref and binned_cc_ref.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin]))
        else:
            table_row.append("--")

        if have_iso_ref and binned_cc_ref_N.data[bin] is not None:
            table_row.append("%6d" % (binned_cc_ref_N.data[bin]))
        else:
            table_row.append("--")

        if oe_rint.data[bin] is not None:
            table_row.append("%.1f%%" % (100.0 * oe_rint.data[bin]))
        else:
            table_row.append("--")

        if oe_rsplit.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin]))
        else:
            table_row.append("--")

        if have_iso_ref and ref_riso.data[bin] is not None:
            table_row.append("%.1f%%" % (100 * ref_riso.data[bin]))
        else:
            table_row.append("--")

        if oe_scale.data[bin] is not None:
            table_row.append("%.3f" % oe_scale.data[bin])
        else:
            table_row.append("--")

        if have_iso_ref and ref_scale.data[bin] is not None:
            table_row.append("%.3f" % ref_scale.data[bin])
        else:
            table_row.append("--")

        if split_sigma_data.data[bin] is not None:
            table_row.append("%.4f" % split_sigma_data.data[bin])
        else:
            table_row.append("--")

        table_data.append(table_row)
    table_data.append([""] * len(table_header))

    table_row = [
        format_value("%3s", "All"),
        format_value("%-13s", "                 "),
        format_value("%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)),
        format_value("%.1f%%", 100 * corr_int),
        format_value("%7d", N_int),
    ]

    if have_iso_ref:
        table_row.extend((format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso)))
    else:
        table_row.extend(("--", "--"))

    table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all)))
    if have_iso_ref:
        table_row.append(format_value("%.1f%%", 100 * ref_riso_all))
    else:
        table_row.append("--")

    table_row.append(format_value("%.3f", oe_scale_all))
    if have_iso_ref:
        table_row.append(format_value("%.3f", ref_scale_all))
    else:
        table_row.append("--")

    if split_sigma_data_all is not None:
        table_row.append("%.1f" % split_sigma_data_all)
    else:
        table_row.append("--")

    table_data.append(table_row)

    print >> output
    print >> output, table_utils.format(table_data, has_header=2, justify="center", delim=" ")
    print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images.
  Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images.
  "iso" columns compare the whole XFEL dataset to the isomorphous reference."""

    print >> output, """Niso: result vs. reference common set""",
    if params.include_negatives:
        print >> output, """including negative merged intensities (set by phil parameter)."""
    elif params.scaling.log_cutoff is None:
        print >> output
    else:
        print >> output, """with intensites < %7.2g filtered out (controlled by
    scaling.log_cutoff phil parameter set to %5.1f)""" % (
            math.exp(params.scaling.log_cutoff),
            params.scaling.log_cutoff,
        )

    if have_iso_ref:
        assert N_iso == flex.sum(flex.double([x for x in binned_cc_ref_N.data if x is not None]))
    assert N_int == flex.sum(flex.double([x for x in binned_cc_int_N.data if x is not None]))

    if params.scaling.show_plots:
        from matplotlib import pyplot as plt

        plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), "r.")
        plt.show()
        if have_iso_ref:
            plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), "r.")
            plt.show()
    print >> output
示例#34
0
    def __init__(self,
                 conj_grad=True,
                 weights=None,
                 plot_truth=False,
                 plot=False,
                 sovlerization_maximus=True,
                 *args,
                 **kwargs):
        solvers.LBFGSsolver.__init__(self, *args,
                                     **kwargs)  # NOTE: do it with lbfgs=False
        # ^ brings in Yobs, LA, LB, PA, PB, Nhkl, Ns, Nmeas,   Aidx, Gidx

        # correct because working with logs
        if self.IAprm_truth is not None:
            self.IAprm_truth = flex.log(self.IAprm_truth)
            self.IBprm_truth = flex.log(self.IBprm_truth)
            self.Gprm_truth = self.Gprm_truth
            self.x_truth = (self.IAprm_truth.concatenate(
                self.IBprm_truth)).concatenate(self.Gprm_truth)

        self.x_init = flex.double(np.ascontiguousarray(
            self.guess["IAprm"])).concatenate(
                flex.double(np.ascontiguousarray(
                    self.guess["IBprm"]))).concatenate(
                        flex.double(np.ascontiguousarray(self.guess["Gprm"])))
        assert (len(self.x_init) == self.Nhkl * 2 + self.Ns)

        IAx = flex.log(self.x_init[:self.Nhkl])
        IBx = flex.log(self.x_init[self.Nhkl:2 * self.Nhkl])
        Gx = self.x_init[2 * self.Nhkl:]

        self.x_init = IAx.concatenate(IBx)
        self.x_init = self.x_init.concatenate(Gx)

        self.counter = 0

        # set dummie weights for now
        if weights is None:
            self.Wobs = flex.double(np.ones(len(self.Yobs)))
        else:
            self.Wobs = weights

        if plot_truth:
            try:
                truth = self.x_truth
            except AttributeError as error:
                print(error)
                truth = None
        else:
            truth = None

        self.helper = eigen_helper(initial_estimates=self.x_init,
                                   Nhkl=self.Nhkl,
                                   plot=plot,
                                   truth=truth)
        self.helper.eigen_wrapper.conj_grad = conj_grad
        self.helper.set_basic_data(self.Yobs, self.Wobs, self.Aidx, self.Gidx,
                                   self.PA, self.PB, self.LA, self.LB,
                                   self.Nhkl, self.Ns)

        self.helper.restart()

        if sovlerization_maximus:
            try:
                _ = normal_eqns_solving.levenberg_marquardt_iterations_encapsulated_eqns(
                    non_linear_ls=self.helper,
                    n_max_iterations=10000,
                    track_all=True,
                    step_threshold=0.0001)
            except (KeyboardInterrupt, AssertionError):
                pass
            print "End of minimization: Converged", self.helper.counter, "cycles"
            print self.helper.get_eigen_summary()
            print "Converged functional: ", self.helper.functional_basic(
                self.helper.x)
示例#35
0
def exercise_gaussian_fit():

    # test fitting of a gaussian
    def do_gaussian_fit(scale, mu, sigma):
        start = mu - 6 * sigma
        stop = mu + 6 * sigma
        step = (stop - start) / 1000
        x = flex.double(frange(start, stop, step))
        y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
        fit = curve_fitting.single_gaussian_fit(x, y)
        assert approx_equal(fit.a, scale, 1e-4)
        assert approx_equal(fit.b, mu, eps=1e-4)
        assert approx_equal(fit.c, sigma, eps=1e-4)

    for i in range(10):
        scale = random.random() * 1000
        sigma = (random.random() + 0.0001) * 10
        mu = (-1)**random.randint(0, 1) * random.random() * 1000
        functor = curve_fitting.gaussian(scale, mu, sigma)
        start = mu - 6 * sigma
        stop = mu + 6 * sigma
        step = (stop - start) / 1000
        x = flex.double(frange(start, stop, step))
        fd_grads = finite_differences(functor, x)
        assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4)
        do_gaussian_fit(scale, mu, sigma)

    # if we take the log of a gaussian we can fit a parabola
    scale = 123
    mu = 3.2
    sigma = 0.1
    x = flex.double(frange(2, 4, 0.01))
    y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
    # need to be careful to only use values of y > 0
    eps = 1e-15
    x = flex.double([x[i] for i in range(x.size()) if y[i] > eps])
    y = flex.double([y[i] for i in range(y.size()) if y[i] > eps])
    fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2)
    c, b, a = fit.params
    assert approx_equal(mu, -b / (2 * a))
    assert approx_equal(sigma * sigma, -1 / (2 * a))

    # test multiple gaussian fits
    gaussians = [
        curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268),
        curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)
    ]
    x = flex.double(frange(0, 10, 0.1))
    y = flex.double(x.size())
    for i in range(len(gaussians)):
        g = gaussians[i]
        scale, mu, sigma = g.a, g.b, g.c
        y += g(x)

    starting_gaussians = [
        curve_fitting.gaussian(1, 4, 1),
        curve_fitting.gaussian(1, 5, 1)
    ]
    fit = curve_fitting.gaussian_fit(x, y, starting_gaussians)
    for g1, g2 in zip(gaussians, fit.gaussians):
        assert approx_equal(g1.a, g2.a, eps=1e-4)
        assert approx_equal(g1.b, g2.b, eps=1e-4)
        assert approx_equal(g1.c, g2.c, eps=1e-4)

    # use example of 5-gaussian fit from here:
    # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm
    gaussians = [
        curve_fitting.gaussian(0.10516252, 23.32727, 2.436638),
        curve_fitting.gaussian(0.46462715, 33.09053, 2.997594),
        curve_fitting.gaussian(0.29827916, 41.27244, 4.274585),
        curve_fitting.gaussian(0.08986616, 51.24468, 5.077521),
        curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)
    ]

    x = flex.double(frange(0, 80, 0.1))
    y = flex.double(x.size())
    for i in range(len(gaussians)):
        g = gaussians[i]
        scale, mu, sigma = g.a, g.b, g.c
        y += g(x)

    termination_params = scitbx.lbfgs.termination_parameters(
        min_iterations=500)
    starting_gaussians = [
        curve_fitting.gaussian(1, 21, 2.1),
        curve_fitting.gaussian(1, 30, 2.8),
        curve_fitting.gaussian(1, 40, 2.2),
        curve_fitting.gaussian(1, 51, 1.2),
        curve_fitting.gaussian(1, 60, 2.3)
    ]
    fit = curve_fitting.gaussian_fit(x,
                                     y,
                                     starting_gaussians,
                                     termination_params=termination_params)
    y_calc = fit.compute_y_calc()
    assert approx_equal(y, y_calc, eps=1e-2)

    have_cma_es = libtbx.env.has_module("cma_es")
    if have_cma_es:
        fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y)
        y_calc = fit.compute_y_calc()
        assert approx_equal(y, y_calc, eps=5e-2)