def scale_data(indices, iobs, scale_ref, parameter, calc_cc):
    k, b, cc = 1, float("nan"), float("nan")

    sortp = yamtbx_utils_ext.sort_permutation_fast_less(indices)
    indices = indices.select(sortp)
    iobs = iobs.select(sortp)

    sel0, sel1 = yamtbx_utils_ext.my_common_indices(scale_ref.indices(), indices)
    #indices = indices.select(sel1)
    iobs_c = iobs.select(sel1)
    ref_c = scale_ref.data().select(sel0)

    if iobs_c.size() < 10 and ref_c.size() < 10:
        return k, b, cc

    if parameter == "k":
        k = flex.sum(ref_c*iobs_c) / flex.sum(flex.pow2(iobs_c))
    elif parameter == "kb":
        from yamtbx.dataproc.scale_data import kBdecider
        kbd = kBdecider(scale_ref,
                        miller.array(scale_ref.customized_copy(indices=indices),data=iobs))
        k, b = kbd.run()
    else:
        raise "Never reaches here"
    
    if calc_cc:
        corr = flex.linear_correlation(ref_c, iobs_c)
        if corr.is_well_defined(): cc = corr.coefficient()

    return k, b, cc
Exemplo n.º 2
0
        #if "hkl" in mtzfile:
        #    Is[-1][1] = miller.array(miller_set=Is[-1][1], data= Is[-1][1].data() * flex.exp(4.8*Is[-1][1].d_star_sq().data()))

    # Take common sets
    Is = commonalize(Is) ####

    # Decide scale
    if not params.noscale:
        for i in xrange(1, len(Is)):
            I = Is[i][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin)
            I0 = Is[0][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin)
            I, I0 = I.common_sets(I0, assert_is_similar_symmetry=False)

            if params.scale.bscale:
                Is[i][2] = kBdecider(I0, I).run()
                print "Scale for", Is[i][0], "is", Is[i][2]
            else:
                scale = flex.sum(I0.data()*I.data()) / flex.sum(flex.pow2(I.data()))
                Is[i][2] = scale, 0
                print "Scale for", Is[i][0], "is", scale

    print Is[0][1].data().size()
    # Prepare plot data
    for_plot = OrderedDict() # {name: [mean, ...], ..}
    binner = Is[0][1].setup_binner(n_bins=params.nbins)#reflections_per_bin=50)
    for i_bin in binner.range_used():
        for name, I, (scale, b) in Is:
            dmax, dmin = binner.bin_d_range(i_bin)
            Isel = I.resolution_filter(d_max=dmax, d_min=dmin)
Exemplo n.º 3
0
def run(params, mtzfiles):
    arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax)

    if params.take_common:
        arrays = commonalize(arrays)

    maxlen_f = max(map(lambda x: len(x[0]), arrays))

    ref_f_obs = arrays[0][1]

    scales = []
    for f, f_obs, f_model, flag in arrays:
        if ref_f_obs == f_obs: k, B = 1., 0
        else: k, B = kBdecider(ref_f_obs, f_obs).run()

        scales.append((k, B))

    if params.reference != "first":
        if params.reference == "bmin": # scale to strongest
            kref, bref = max(scales, key=lambda x:x[1])
        elif params.reference == "bmax": # scale to most weak
            kref, bref = min(scales, key=lambda x:x[1])
        elif params.reference == "bmed": # scale to most weak
            perm = range(len(scales))
            perm.sort(key=lambda i:scales[i][1])
            kref, bref = scales[perm[len(perm)//2]]
        else:
            raise "Never reaches here"

        print "# Set K=%.2f B=%.2f as reference" % (kref,bref)
        scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later

    print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename"
    for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales):
        d_star_sq = f_obs.d_star_sq().data()
        scale = k * flex.exp(-B*d_star_sq)
        
        # Normalized
        #f_obs.setup_binner(auto_binning=True)
        #f_model.setup_binner(auto_binning=True)
        #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model))
        e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True)
        e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f()
        e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True)
        e_model = e_model.normalised_miller_dev_eps.f_sq_as_f()

        f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data())
        f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data())

        e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data())
        e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data())

        r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data()))
        r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data()))

        cc_work_E = calc_cc(e_obs_w, e_model_w, False)
        cc_free_E = calc_cc(e_obs_t, e_model_t, False)
        #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True)
        #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True)

        se = calc_sigmaa(f_obs, f_model, flag)
        sigmaa = flex.mean(se.sigmaa().data())
        fom = flex.mean(se.fom().data())

        print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
def run(params, xfiles):
    # read reference
    arrays = iotbx.file_reader.any_file(
        params.reference.file).file_server.miller_arrays
    arrays = filter(lambda ar: ar.is_xray_data_array(), arrays)
    if params.reference.label is not None:
        arrays = filter(
            lambda ar: ar.info().label_string() == params.reference.label,
            arrays)

    if len(arrays) != 1:
        print "Can't decide data to use in reference file:", params.reference.file
        print "Choose label"
        for ar in arrays:
            print ar.info().label_string()
        return

    refdata = arrays[0].as_intensity_array()
    refdata = refdata.resolution_filter(d_max=params.reference.d_max,
                                        d_min=params.reference.d_min)

    print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga"

    for xf in xfiles:
        print "# Reading", xf
        try:
            xfile = DenzoXfile(xf)
        except:
            traceback.print_exc()
            continue
        a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag())
        a = a.select(a.sigmas() > 0)
        a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max)
        if params.sigma_cutoff is not None:
            a = a.select(a.data() / a.sigmas() >= params.sigma_cutoff)

        a = a.merge_equivalents(use_internal_variance=False).array()

        tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False)
        n_common = tmp.size()

        if n_common == 0:
            print "# No useful reflection in this file. skip."
            continue

        corr = flex.linear_correlation(tmp.data(), a.data())
        cc_org = corr.coefficient() if corr.is_well_defined() else float("nan")

        # Calc CC in resolution bin and average
        tmp.setup_binner(auto_binning=True)
        cc_bins = []
        for i_bin in tmp.binner().range_used():
            sel = tmp.binner().selection(i_bin)
            corr = flex.linear_correlation(
                tmp.select(sel).data(),
                a.select(sel).data())
            if not corr.is_well_defined(): continue
            cc_bins.append(corr.coefficient())

        cc_mean = sum(cc_bins) / float(
            len(cc_bins)) if len(cc_bins) > 0 else float("nan")

        # Determine scale and B
        k, b = kBdecider(tmp, a).run()

        bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1.
        corr = flex.linear_correlation(tmp.data(), a.data() * k * bfac)
        cc_scaled = corr.coefficient() if corr.is_well_defined() else float(
            "nan")

        print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org,
                                                   cc_mean, cc_scaled),
        print("%.3f " * 6) % a.unit_cell().parameters()

        if params.show_plot:
            import pylab
            from matplotlib.ticker import FuncFormatter
            s3_formatter = lambda x, pos: "inf" if x == 0 else "%.2f" % (x**(
                -1 / 3))

            fig, ax1 = pylab.plt.subplots()

            plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3),
                         tmp.binner().range_used())

            #for name, ar in (("reference", tmp), ("data", a)):
            vals = map(
                lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i)
                                                      )),
                tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="reference")

            scale = flex.sum(tmp.data() * a.data()) / flex.sum(
                flex.pow2(a.data()))
            print "Linear-scale=", scale
            vals = map(
                lambda i: scale * flex.mean(a.data().select(tmp.binner().
                                                            selection(i))),
                tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data")
            vals = map(
                lambda i: flex.mean(
                    (a.data() * k * bfac).select(tmp.binner().selection(i))),
                tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled")
            """
            from mmtbx.scaling import absolute_scaling, relative_scaling
            ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True)
            ls_scaling.show()
            vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled2")
            """

            pylab.legend()
            pylab.xlabel('resolution (d^-3)')
            pylab.ylabel('<I>')
            pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small")
            pylab.title('Scaled with B-factors (%.2f)' % b)

            pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter))

            ax2 = ax1.twinx()
            ax2.plot(plot_x, cc_bins, "black")
            ax2.set_ylabel('CC')
            pylab.show()
Exemplo n.º 5
0
def run(params, xfiles):
    # read reference
    arrays = iotbx.file_reader.any_file(params.reference.file).file_server.miller_arrays
    arrays = filter(lambda ar: ar.is_xray_data_array(), arrays)
    if params.reference.label is not None:
        arrays = filter(lambda ar: ar.info().label_string() == params.reference.label, arrays)

    if len(arrays) != 1:
        print "Can't decide data to use in reference file:", params.reference.file
        print "Choose label"
        for ar in arrays: print ar.info().label_string()
        return

    refdata = arrays[0].as_intensity_array()
    refdata = refdata.resolution_filter(d_max=params.reference.d_max, d_min=params.reference.d_min)

    print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga"
    
    for xf in xfiles:
        print "# Reading", xf
        try:
            xfile = DenzoXfile(xf)
        except:
            traceback.print_exc()
            continue
        a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag())
        a = a.select(a.sigmas() > 0)
        a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max)
        if params.sigma_cutoff is not None:
            a = a.select(a.data()/a.sigmas() >= params.sigma_cutoff)

        a = a.merge_equivalents(use_internal_variance=False).array()

        tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False)
        n_common = tmp.size()

        if n_common == 0:
            print "# No useful reflection in this file. skip."
            continue

        corr = flex.linear_correlation(tmp.data(), a.data())
        cc_org = corr.coefficient() if corr.is_well_defined() else float("nan")

        # Calc CC in resolution bin and average
        tmp.setup_binner(auto_binning=True)
        cc_bins = []
        for i_bin in tmp.binner().range_used():
            sel = tmp.binner().selection(i_bin)
            corr = flex.linear_correlation(tmp.select(sel).data(), a.select(sel).data())
            if not corr.is_well_defined(): continue
            cc_bins.append(corr.coefficient())

        cc_mean = sum(cc_bins) / float(len(cc_bins)) if len(cc_bins) > 0 else float("nan")
            
        # Determine scale and B
        k, b = kBdecider(tmp, a).run()

        bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1.
        corr = flex.linear_correlation(tmp.data(), a.data() * k*bfac)
        cc_scaled = corr.coefficient() if corr.is_well_defined() else float("nan")

        print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org, cc_mean, cc_scaled),
        print ("%.3f "*6)%a.unit_cell().parameters()

        if params.show_plot:
            import pylab
            from matplotlib.ticker import FuncFormatter
            s3_formatter = lambda x,pos: "inf" if x == 0 else "%.2f" % (x**(-1/3))

            fig, ax1 = pylab.plt.subplots()

            plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3), tmp.binner().range_used())

            #for name, ar in (("reference", tmp), ("data", a)):
            vals = map(lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="reference")

            scale = flex.sum(tmp.data()*a.data()) / flex.sum(flex.pow2(a.data()))
            print "Linear-scale=", scale
            vals = map(lambda i: scale*flex.mean(a.data().select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data")
            vals = map(lambda i: flex.mean((a.data()*k*bfac).select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled")

            """
            from mmtbx.scaling import absolute_scaling, relative_scaling
            ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True)
            ls_scaling.show()
            vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled2")
            """
            
            pylab.legend()
            pylab.xlabel('resolution (d^-3)')
            pylab.ylabel('<I>')
            pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small")
            pylab.title('Scaled with B-factors (%.2f)' % b)

            pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter))

            ax2 = ax1.twinx()
            ax2.plot(plot_x, cc_bins, "black")
            ax2.set_ylabel('CC')
            pylab.show()
Exemplo n.º 6
0
            x[1] = x[1].customized_copy(crystal_symmetry=Is[0][1])

    # Take common sets
    Is = commonalize(Is)  ####

    # Decide scale
    if not params.noscale:
        for i in xrange(1, len(Is)):
            I = Is[i][1].resolution_filter(d_max=params.scale.dmax,
                                           d_min=params.scale.dmin)
            I0 = Is[0][1].resolution_filter(d_max=params.scale.dmax,
                                            d_min=params.scale.dmin)
            I, I0 = I.common_sets(I0, assert_is_similar_symmetry=False)

            if params.scale.bscale:
                Is[i][2] = kBdecider(I0, I).run()
                print "Scale for", Is[i][0], "is", Is[i][2]
            else:
                scale = flex.sum(I0.data() * I.data()) / flex.sum(
                    flex.pow2(I.data()))
                Is[i][2] = scale, 0
                print "Scale for", Is[i][0], "is", scale

    print Is[0][1].data().size()
    # Prepare plot data
    for_plot = OrderedDict()  # {name: [mean, ...], ..}
    binner = Is[0][1].setup_binner(
        n_bins=params.nbins)  #reflections_per_bin=50)
    for i_bin in binner.range_used():
        for name, I, (scale, b) in Is:
            dmax, dmin = binner.bin_d_range(i_bin)