Exemplo n.º 1
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    # Check parameters
    if params.program == "xscale":
        if (params.xscale.frames_per_batch,
                params.xscale.degrees_per_batch).count(None) == 0:
            print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch"
            return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs)
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log",
                 open(os.path.join(params.workdir, "multi_merge.log"), "w"),
                 atexit_send_to=None)
    out.register("stdout", sys.stdout)
    out.write("kamo.multi_merge started at %s\n\n" %
              time.strftime("%Y-%m-%d %H:%M:%S"))
    time_started = time.time()

    print >> out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out,
                                                             prefix=" ")
    print >> out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(
        os.path.abspath(params.workdir))
    try:
        html_report.add_params(params, master_params_str)
    except:
        print >> out, traceback.format_exc()

    xds_ascii_files = util.read_path_list(params.lstin,
                                          only_exists=True,
                                          as_abspath=True,
                                          err_out=out)

    if not xds_ascii_files:
        print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin
        return

    if len(xds_ascii_files) < 2:
        print >> out, "ERROR! Only one file in %s." % params.lstin
        print >> out, "       Give at least two files for merging."
        return

    cells = collections.OrderedDict()
    laues = {}  # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            print >> out, "Error in reading %s" % xac
            print >> out, traceback.format_exc()
            return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(
            False).info()
        laues.setdefault(str(laue), {}).setdefault(
            symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >> out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % (
                    sg, len(laues[laue][sg]))
                for f in laues[laue][sg]:
                    print >> out, "  %s" % f
                print >> out, ""
        return

    space_group = None
    if params.space_group is not None:
        space_group = sgtbx.space_group_info(params.space_group).group()
        laue_given = str(
            space_group.build_derived_reflection_intensity_group(False).info())
        if laue_given != laues.keys()[0]:
            print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (
                params.space_group, laues.keys()[0])
            return

        sg_refset = space_group.info().as_reference_setting().group()
        if space_group != sg_refset:
            print >> out, "Sorry! currently space group in non-reference setting is not supported."
            print >> out, "(You requested %s, which is different from reference setting: %s)" % (
                space_group.info(), sg_refset.info())
            return
    else:
        tmp = sgtbx.space_group_info(
            laues.values()[0].keys()
            [0]).group().build_derived_reflection_intensity_group(True)
        print >> out, "Space group for merging:", tmp.info()

    test_flag_will_be_transferred = False

    if params.reference.data is not None:
        params.reference.data = os.path.abspath(params.reference.data)
        print >> out, "Reading reference data file: %s" % params.reference.data

        tmp = iotbx.file_reader.any_file(params.reference.data,
                                         force_type="hkl",
                                         raise_sorry_if_errors=True)
        if params.reference.copy_test_flag:
            from yamtbx.dataproc.command_line import copy_free_R_flag
            if None in copy_free_R_flag.get_flag_array(
                    tmp.file_server.miller_arrays, log_out=out):
                print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data
            else:
                test_flag_will_be_transferred = True
                print >> out, " test flag will be transferred"

        if space_group is not None:
            if space_group != tmp.file_server.miller_arrays[0].space_group():
                print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % (
                    space_group.info(),
                    tmp.file_server.miller_arrays[0].space_group_info())
                return
        else:
            space_group = tmp.file_server.miller_arrays[0].space_group()
            print >> out, " space group for merging: %s" % space_group.info()

    if params.add_test_flag:
        if test_flag_will_be_transferred:
            print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given."
        else:
            from cctbx import r_free_utils

            med_cell = numpy.median(cells.values(), axis=0)
            d_min = max(
                params.d_min - 0.2, 1.0
            ) if params.d_min is not None else 1.5  # to prevent infinite set
            sg = space_group
            if not sg:
                sg = sgtbx.space_group_info(
                    laues.values()[0].keys()
                    [0]).group().build_derived_reflection_intensity_group(True)
            tmp = miller.build_set(crystal.symmetry(tuple(med_cell),
                                                    space_group=sg),
                                   False,
                                   d_min=d_min,
                                   d_max=None)
            print >> out, "Generating test set using the reference symmetry:"
            crystal.symmetry.show_summary(tmp, out, " ")
            tmp = tmp.generate_r_free_flags(fraction=0.05,
                                            max_free=None,
                                            lattice_symmetry_max_delta=5.0,
                                            use_lattice_symmetry=True,
                                            n_shells=20)
            tmp.show_r_free_flags_info(out=out, prefix=" ")
            tmp = tmp.customized_copy(
                data=r_free_utils.export_r_free_flags_for_ccp4(
                    flags=tmp.data(), test_flag_value=True))

            mtz_object = tmp.as_mtz_dataset(
                column_root_label="FreeR_flag").mtz_object()
            test_flag_mtz = os.path.abspath(
                os.path.join(params.workdir, "test_flag.mtz"))
            mtz_object.write(file_name=test_flag_mtz)

            # Override the parameters
            params.reference.copy_test_flag = True
            params.reference.data = test_flag_mtz

    try:
        html_report.add_cells_and_files(cells, laues.keys()[0])
    except:
        print >> out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >> out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir,
                                             d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(
            out=open(summary_out, "w"))
        print >> out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl,
                              clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl,
                              clusters)
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun,
                              clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun,
                              clusters)
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV,
                              clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: blend_clusters.files[x - 1],
                                       IDs), LCV, aLCV, clh))
        print >> out
        try:
            html_report.add_clutering_result(clusters, "blend")
        except:
            print >> out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(
            ccc_wdir,
            xds_ascii_files,
            d_min=params.cc_clustering.d_min
            if params.cc_clustering.d_min is not None else params.d_min,
            min_ios=params.cc_clustering.min_ios)
        print >> out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(
            nproc=params.cc_clustering.nproc,
            b_scale=params.cc_clustering.b_scale,
            use_normalized=params.cc_clustering.use_normalized,
            cluster_method=params.cc_clustering.method,
            distance_eqn=params.cc_clustering.cc_to_distance,
            min_common_refs=params.cc_clustering.min_common_refs,
            html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min,
                                                    out=open(summary_out, "w"))
        print >> out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl,
                              clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl,
                              clusters)
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun,
                              clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(
                lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(
                lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: xds_ascii_files[x - 1],
                                       IDs), float("nan"), float("nan"), clh))
        print >> out

        try:
            html_report.add_clutering_result(clusters, "cc_clustering")
        except:
            print >> out, traceback.format_exc()

    else:
        data_for_merge.append((os.path.join(params.workdir,
                                            "all_data"), xds_ascii_files,
                               float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"),
                       "w")
    ofs_summary.write(
        "# d_min= %.3f A\n" %
        (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write(
        "     cluster    ClH  LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n"
    )

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files,
                          stats):
        tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n"
        ofs_summary.write(tmps % (
            os.path.relpath(workdir, params.workdir),
            clh,
            LCV,
            aLCV,
            cycle,
            len(xds_files),
            num_files,
            stats["cmpl"][0],
            stats["redundancy"][0],
            stats["i_over_sigma"][0],
            stats["r_meas"][0],
            stats["cc_half"][0],
            stats["cmpl"][2],
            stats["redundancy"][2],
            stats["i_over_sigma"][2],
            stats["r_meas"][2],
            stats["cc_half"][2],
            stats["cmpl"][1],
            stats["redundancy"][1],
            stats["i_over_sigma"][1],
            stats["r_meas"][1],
            stats["cc_half"][1],
            stats["sig_ano"][1],
            stats["cc_ano"][1],
            stats["xtriage_log"].wilson_b,
            #stats["xtriage_log"].anisotropy,
            stats["aniso"]["d_min_best"],
            stats["aniso"]["d_min_worst"],
            stats["dmin_est"],
        ))
        ofs_summary.flush()

    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells,
                         space_group),
                        open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
cd "%s" || exit 1
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ret = merge_datasets(*args); \
pickle.dump(ret, open("result.pkl","w")); \
'
""" % (os.path.abspath(workdir), sys.executable))
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir,
                                                        "result.pkl")))
            except:
                print >> out, "Error in unpickling result in %s" % workdir
                print >> out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            lcv, alcv = float("nan"), float("nan")
            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            # Last lcv & alcv
            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >> out, "Merging %s..." % os.path.relpath(
                workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells,
                                     space_group)

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()

    try:
        html_report.write_html()
    except:
        print >> out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")

    out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S"))
    out.write("Total wall-clock time: %.2f sec.\n" %
              (time.time() - time_started))

    return
Exemplo n.º 2
0
def xds_sequence(root, params):
    print
    print os.path.relpath(root, params.topdir)

    init_lp = os.path.join(root, "INIT.LP")
    xparm = os.path.join(root, "XPARM.XDS")
    gxparm = os.path.join(root, "GXPARM.XDS")
    defpix_lp = os.path.join(root, "DEFPIX.LP")
    correct_lp = os.path.join(root, "CORRECT.LP")
    integrate_hkl = os.path.join(root, "INTEGRATE.HKL")
    xac_hkl = os.path.join(root, "XDS_ASCII.HKL")
    integrate_lp = os.path.join(root, "INTEGRATE.LP")
    spot_xds = os.path.join(root, "SPOT.XDS")
    xdsinp = os.path.join(root, "XDS.INP")

    assert os.path.isfile(xdsinp)
    if params.cell_prior.force: assert params.cell_prior.check

    xdsinp_dict = dict(get_xdsinp_keyword(xdsinp))

    if params.cell_prior.sgnum > 0:
        xs_prior = crystal.symmetry(params.cell_prior.cell,
                                    params.cell_prior.sgnum)
    else:
        xs_prior = None

    decilog = multi_out()
    decilog.register("log",
                     open(os.path.join(root, "decision.log"), "a"),
                     atexit_send_to=None)
    try:
        print >> decilog, "xds_sequence started at %s in %s\n" % (
            time.strftime("%Y-%m-%d %H:%M:%S"), root)

        if not kamo_test_installation.tst_xds():
            print >> decilog, "XDS is not installed or expired!!"
            return

        if params.show_progress:
            decilog.register("stdout", sys.stdout)

        if params.mode == "initial" and params.resume and os.path.isfile(
                correct_lp):
            print >> decilog, " Already processed."
            return

        if params.mode == "recycle" and not os.path.isfile(gxparm):
            print >> decilog, "GXPARM.XDS not found. Cannot do recycle."
            return

        if params.fast_delphi and (params.nproc is None or params.nproc > 1):
            delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc)
            print >> decilog, " Setting delphi to ", delphi
            modify_xdsinp(xdsinp, inp_params=[
                ("DELPHI", str(delphi)),
            ])

        if params.nproc is not None and params.nproc > 1:
            modify_xdsinp(xdsinp,
                          inp_params=[
                              ("MAXIMUM_NUMBER_OF_PROCESSORS",
                               str(params.nproc)),
                          ])

        if params.mode == "initial":
            modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT")])
            run_xds(wdir=root, show_progress=params.show_progress)
            initlp = InitLp(init_lp)
            first_bad = initlp.check_bad_first_frames()
            if first_bad:
                print >> decilog, " first frames look bad (too weak) exposure:", first_bad
                new_data_range = map(
                    int,
                    dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split())
                new_data_range[0] = first_bad[-1] + 1
                print >> decilog, " changing DATA_RANGE= to", new_data_range
                modify_xdsinp(xdsinp,
                              inp_params=[("JOB", "INIT"),
                                          ("DATA_RANGE",
                                           "%d %d" % tuple(new_data_range))])
                for f in xds_files.generated_by_INIT:
                    util.rotate_file(os.path.join(root, f), copy=False)
                run_xds(wdir=root, show_progress=params.show_progress)

            # Peak search
            modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")])
            run_xds(wdir=root, show_progress=params.show_progress)
            if params.auto_frame_exclude_spot_based:
                sx = idxreflp.SpotXds(spot_xds)
                sx.set_xdsinp(xdsinp)
                spots = filter(lambda x: 5 < x[-1] < 30,
                               sx.collected_spots())  # low-res (5 A)
                frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1,
                                                spots))
                data_range = map(
                    int,
                    dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split())
                # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed?
                h = numpy.histogram(frame_numbers,
                                    bins=numpy.arange(data_range[0],
                                                      data_range[1] + 2,
                                                      step=1))
                q14 = numpy.percentile(h[0], [25, 75])
                iqr = q14[1] - q14[0]
                cutoff = max(
                    h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5  # magic number
                print >> decilog, "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % (
                    iqr, q14, cutoff)
                cut_frames = h[1][h[0] < cutoff]
                keep_frames = h[1][h[0] >= cutoff]
                print >> decilog, "DEBUG:: keep_frames=", keep_frames
                print >> decilog, "DEBUG::  cut_frames=", cut_frames

                if len(cut_frames) > 0:
                    cut_ranges = [
                        [cut_frames[0], cut_frames[0]],
                    ]
                    for fn in cut_frames:
                        if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn
                        else: cut_ranges.append([fn, fn])

                    # Edit XDS.INP
                    cut_inp_str = "".join(
                        map(
                            lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple(
                                x), cut_ranges))
                    open(xdsinp, "a").write("\n" + cut_inp_str)

                    # Edit SPOT.XDS
                    shutil.copyfile(spot_xds, spot_xds + ".org")
                    sx.write(open(spot_xds, "w"),
                             frame_selection=set(keep_frames))

            # Indexing
            if params.cell_prior.method == "use_first":
                modify_xdsinp(xdsinp,
                              inp_params=[
                                  ("JOB", "IDXREF"),
                                  ("UNIT_CELL_CONSTANTS", " ".join(
                                      map(lambda x: "%.3f" % x,
                                          params.cell_prior.cell))),
                                  ("SPACE_GROUP_NUMBER",
                                   "%d" % params.cell_prior.sgnum),
                              ])
            else:
                modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")])

            run_xds(wdir=root, show_progress=params.show_progress)
            print >> decilog, ""  # TODO indexing stats like indexed percentage here.

            if params.tryhard:
                try_indexing_hard(root,
                                  params.show_progress,
                                  decilog,
                                  known_sgnum=params.cell_prior.sgnum,
                                  known_cell=params.cell_prior.cell,
                                  tol_length=params.cell_prior.tol_length,
                                  tol_angle=params.cell_prior.tol_angle)

            if not os.path.isfile(xparm):
                print >> decilog, " Indexing failed."
                return

            if params.cell_prior.sgnum > 0:
                # Check anyway
                xsxds = XPARM(xparm).crystal_symmetry()
                cosets = reindex.reindexing_operators(
                    xs_prior, xsxds, params.cell_prior.tol_length,
                    params.cell_prior.tol_angle)
                if cosets.double_cosets is None:
                    if params.cell_prior.check:
                        print >> decilog, " Incompatible cell. Indexing failed."
                        return
                    else:
                        print >> decilog, " Warning: Incompatible cell."

                elif params.cell_prior.method == "symm_constraint_only":
                    cell = xsxds.unit_cell().change_basis(
                        cosets.combined_cb_ops()[0])
                    print >> decilog, " Trying symmetry-constrained cell parameter:", cell
                    modify_xdsinp(xdsinp,
                                  inp_params=[
                                      ("JOB", "IDXREF"),
                                      ("UNIT_CELL_CONSTANTS", " ".join(
                                          map(lambda x: "%.3f" % x,
                                              cell.parameters()))),
                                      ("SPACE_GROUP_NUMBER",
                                       "%d" % params.cell_prior.sgnum),
                                  ])
                    for f in xds_files.generated_by_IDXREF:
                        util.rotate_file(os.path.join(root, f),
                                         copy=(f == "SPOT.XDS"))

                    run_xds(wdir=root, show_progress=params.show_progress)

                    if not os.path.isfile(xparm):
                        print >> decilog, " Indexing failed."
                        return

                    # Check again
                    xsxds = XPARM(xparm).crystal_symmetry()
                    if not xsxds.unit_cell().is_similar_to(
                            xs_prior.unit_cell(), params.cell_prior.tol_length,
                            params.cell_prior.tol_angle):
                        print >> decilog, "  Resulted in different cell. Indexing failed."
                        return

        elif params.mode == "recycle":
            print >> decilog, " Start recycle. original ISa= %.2f" % correctlp.get_ISa(
                correct_lp, check_valid=True)
            for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS",
                                                         "plot_integrate.log"):
                util.rotate_file(os.path.join(root, f), copy=True)
            shutil.copyfile(gxparm + ".1", xparm)
        else:
            raise "Unknown mode (%s)" % params.mode

        # To Integration
        modify_xdsinp(xdsinp,
                      inp_params=[("JOB", "DEFPIX INTEGRATE"),
                                  ("INCLUDE_RESOLUTION_RANGE", "50 0")])
        run_xds(wdir=root, show_progress=params.show_progress)
        if os.path.isfile(integrate_lp):
            xds_plot_integrate.run(integrate_lp,
                                   os.path.join(root, "plot_integrate.log"))
        if not os.path.isfile(integrate_hkl):
            print >> decilog, " Integration failed."
            return

        # Make _noscale.HKL if needed
        if params.no_scaling:
            bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True)
            xparm_obj = XPARM(xparm)
            modify_xdsinp(xdsinp,
                          inp_params=[
                              ("JOB", "CORRECT"),
                              ("CORRECTIONS", ""),
                              ("NBATCH", "1"),
                              ("MINIMUM_I/SIGMA", "50"),
                              ("REFINE(CORRECT)", ""),
                              ("UNIT_CELL_CONSTANTS", " ".join(
                                  map(lambda x: "%.3f" % x,
                                      xparm_obj.unit_cell))),
                              ("SPACE_GROUP_NUMBER",
                               "%d" % xparm_obj.spacegroup),
                          ])
            print >> decilog, " running CORRECT without empirical scaling"
            run_xds(wdir=root, show_progress=params.show_progress)
            for f in xds_files.generated_by_CORRECT + ("XDS.INP", ):
                ff = os.path.join(root, f)
                if not os.path.isfile(ff): continue
                if ff.endswith(".cbf"):
                    os.remove(ff)
                else:
                    os.rename(ff, ff + "_noscale")

            revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True)

        # Run pointless
        pointless_integrate = {}
        if params.use_pointless:
            worker = Pointless()
            pointless_integrate = worker.run_for_symm(
                xdsin=integrate_hkl,
                logout=os.path.join(root, "pointless_integrate.log"))
            if "symm" in pointless_integrate:
                symm = pointless_integrate["symm"]
                print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info(
                )
                if xs_prior:
                    if xtal.is_same_space_group_ignoring_enantiomorph(
                            symm.space_group(), xs_prior.space_group()):
                        print >> decilog, " which is consistent with given symmetry."
                    elif xtal.is_same_laue_symmetry(symm.space_group(),
                                                    xs_prior.space_group()):
                        print >> decilog, " which has consistent Laue symmetry with given symmetry."
                    else:
                        print >> decilog, " which is inconsistent with given symmetry."

                sgnum = symm.space_group_info().type().number()
                cell = " ".join(
                    map(lambda x: "%.2f" % x,
                        symm.unit_cell().parameters()))
                modify_xdsinp(xdsinp,
                              inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum),
                                          ("UNIT_CELL_CONSTANTS", cell)])
            else:
                print >> decilog, " pointless failed."

        flag_do_not_change_symm = False

        if xs_prior and params.cell_prior.force:
            modify_xdsinp(xdsinp,
                          inp_params=[("UNIT_CELL_CONSTANTS", " ".join(
                              map(lambda x: "%.3f" % x,
                                  params.cell_prior.cell))),
                                      ("SPACE_GROUP_NUMBER",
                                       "%d" % params.cell_prior.sgnum)])
            flag_do_not_change_symm = True
        elif params.cell_prior.method == "correct_only":
            xsxds = XPARM(xparm).crystal_symmetry()
            cosets = reindex.reindexing_operators(xs_prior, xsxds,
                                                  params.cell_prior.tol_length,
                                                  params.cell_prior.tol_angle)
            if cosets.double_cosets is not None:
                cell = xsxds.unit_cell().change_basis(
                    cosets.combined_cb_ops()[0])
                print >> decilog, " Using given symmetry in CORRECT with symmetry constraints:", cell
                modify_xdsinp(xdsinp,
                              inp_params=[
                                  ("UNIT_CELL_CONSTANTS", " ".join(
                                      map(lambda x: "%.3f" % x,
                                          cell.parameters()))),
                                  ("SPACE_GROUP_NUMBER",
                                   "%d" % params.cell_prior.sgnum),
                              ])
                flag_do_not_change_symm = True
            else:
                print >> decilog, " Tried to use given symmetry in CORRECT, but cell in integration is incompatible."

        # Do Scaling
        modify_xdsinp(xdsinp, inp_params=[
            ("JOB", "CORRECT"),
        ])

        run_xds(wdir=root, show_progress=params.show_progress)

        if not os.path.isfile(xac_hkl):
            print >> decilog, " CORRECT failed."
            return

        if not os.path.isfile(gxparm):
            print >> decilog, " Refinement in CORRECT failed."

        print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
            correct_lp, check_valid=True)

        ret = calc_merging_stats(xac_hkl)
        if params.cut_resolution:
            if ret is not None and ret[0] is not None:
                d_min = ret[0]
                modify_xdsinp(xdsinp,
                              inp_params=[("JOB", "CORRECT"),
                                          ("INCLUDE_RESOLUTION_RANGE",
                                           "50 %.2f" % d_min)])
                print >> decilog, " Re-scale at %.2f A" % d_min
                os.rename(os.path.join(root, "CORRECT.LP"),
                          os.path.join(root, "CORRECT_fullres.LP"))
                os.rename(xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL"))
                run_xds(wdir=root, show_progress=params.show_progress)
                print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
                    correct_lp, check_valid=True)
                print >> decilog, " (Original files are saved as *_fullres.*)"
            else:
                print >> decilog, "error: Can't decide resolution."

        last_ISa = correctlp.get_ISa(correct_lp, check_valid=True)

        # Run pointless and (if result is different from INTEGRATE) re-scale.
        if params.use_pointless:
            worker = Pointless()
            pointless_correct = worker.run_for_symm(
                xdsin=xac_hkl,
                logout=os.path.join(root, "pointless_correct.log"))
            pointless_best_symm = None

            if "symm" in pointless_correct:
                symm = pointless_correct["symm"]
                need_rescale = False

                if pointless_integrate.get("symm"):
                    symm_by_integrate = pointless_integrate["symm"]

                    if not xtal.is_same_laue_symmetry(
                            symm_by_integrate.space_group(),
                            symm.space_group()):
                        print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % (
                            symm.space_group_info(),
                            symm_by_integrate.space_group_info())
                        prob_integrate = pointless_integrate.get(
                            "laue_prob", float("nan"))
                        prob_correct = pointless_correct.get(
                            "laue_prob", float("nan"))

                        print >> decilog, " Prob(%s |INTEGRATE), Prob(%s |CORRECT) = %.4f, %.4f." % (
                            symm_by_integrate.space_group_info(),
                            symm.space_group_info(), prob_integrate,
                            prob_correct)
                        if prob_correct > prob_integrate:
                            need_rescale = True
                            pointless_best_symm = symm
                        else:
                            pointless_best_symm = symm_by_integrate
                else:
                    need_rescale = True
                    pointless_best_symm = symm
                    print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info(
                    )
                    if xs_prior:
                        if xtal.is_same_space_group_ignoring_enantiomorph(
                                symm.space_group(), xs_prior.space_group()):
                            print >> decilog, " which is consistent with given symmetry."
                        elif xtal.is_same_laue_symmetry(
                                symm.space_group(), xs_prior.space_group()):
                            print >> decilog, " which has consistent Laue symmetry with given symmetry."
                        else:
                            print >> decilog, " which is inconsistent with given symmetry."

                if need_rescale and not flag_do_not_change_symm:
                    sgnum = symm.space_group_info().type().number()
                    cell = " ".join(
                        map(lambda x: "%.2f" % x,
                            symm.unit_cell().parameters()))
                    modify_xdsinp(xdsinp,
                                  inp_params=[
                                      ("JOB", "CORRECT"),
                                      ("SPACE_GROUP_NUMBER", "%d" % sgnum),
                                      ("UNIT_CELL_CONSTANTS", cell),
                                      ("INCLUDE_RESOLUTION_RANGE", "50 0")
                                  ])

                    run_xds(wdir=root, show_progress=params.show_progress)

                    ret = calc_merging_stats(xac_hkl)

                    if params.cut_resolution:
                        if ret is not None and ret[0] is not None:
                            d_min = ret[0]
                            modify_xdsinp(xdsinp,
                                          inp_params=[
                                              ("JOB", "CORRECT"),
                                              ("INCLUDE_RESOLUTION_RANGE",
                                               "50 %.2f" % d_min)
                                          ])
                            print >> decilog, " Re-scale at %.2f A" % d_min
                            os.rename(os.path.join(root, "CORRECT.LP"),
                                      os.path.join(root, "CORRECT_fullres.LP"))
                            os.rename(
                                xac_hkl,
                                os.path.join(root, "XDS_ASCII_fullres.HKL"))
                            run_xds(wdir=root,
                                    show_progress=params.show_progress)
                            print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
                                correct_lp, check_valid=True)
                            print >> decilog, " (Original files are saved as *_fullres.*)"
                        else:
                            print >> decilog, "error: Can't decide resolution."
                            for f in ("CORRECT_fullres.LP",
                                      "XDS_ASCII_fullres.HKL"):
                                if os.path.isfile(os.path.join(root, f)):
                                    print >> decilog, "removing", f
                                    os.remove(os.path.join(root, f))

                    ISa = correctlp.get_ISa(correct_lp, check_valid=True)

                    if ISa >= last_ISa or last_ISa != last_ISa:  # if improved or last_ISa is nan
                        print >> decilog, "ISa improved= %.2f" % ISa
                    else:
                        print >> decilog, "ISa got worse= %.2f" % ISa

            if pointless_best_symm:
                xac_symm = XDS_ASCII(xac_hkl, read_data=False).symm
                if not xtal.is_same_space_group_ignoring_enantiomorph(
                        xac_symm.space_group(),
                        pointless_best_symm.space_group()):
                    if xtal.is_same_laue_symmetry(
                            xac_symm.space_group(),
                            pointless_best_symm.space_group()):
                        tmp = "same Laue symmetry"
                    else:
                        tmp = "different Laue symmetry"
                    print >> decilog, "WARNING: symmetry in scaling is different from Pointless result (%s)." % tmp

        run_xdsstat(wdir=root)
        print
        if params.make_report: html_report.make_individual_report(root, root)
    except:
        print >> decilog, traceback.format_exc()
    finally:
        print >> decilog, "\nxds_sequence finished at %s" % time.strftime(
            "%Y-%m-%d %H:%M:%S")
        decilog.close()
Exemplo n.º 3
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs)
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >>out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ")
    print >>out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir))
    try: html_report.add_params(params, master_params_str)
    except: print >>out, traceback.format_exc()

    xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin))
    xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files)
    xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files)

    cells = collections.OrderedDict()
    laues = {} # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            try:
                symm = 	any_reflection_file(xac).as_miller_arrays()[0].crystal_symmetry()
            except:
                print >>out, "Error in reading %s" % xac
                print >>out, traceback.format_exc()
                return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(False).info()
        laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >>out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg]))
                for f in laues[laue][sg]: print >>out, "  %s" % f
                print >>out, ""
        return

    space_group = None
    if params.space_group is not None:
        space_group = sgtbx.space_group_info(params.space_group).group()
        laue_given = str(space_group.build_derived_reflection_intensity_group(False).info())
        if laue_given != laues.keys()[0]:
            print >>out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (params.space_group, laues.keys()[0])
            return
    else:
        tmp = sgtbx.space_group_info(laues.values()[0].keys()[0]).group().build_derived_reflection_intensity_group(True)
        print >>out, "Space group for merging:", tmp.info()
            
    try: html_report.add_cells_and_files(cells, laues.keys()[0])
    except: print >>out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >>out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w"))
        print >>out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters)            
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters)            
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters)

        if params.max_clusters is not None and len(clusters) > params.max_clusters:
            print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: blend_clusters.files[x-1], IDs),
                                   LCV, aLCV,clh))
        print >>out
        try: html_report.add_clutering_result(clusters, "blend")
        except: print >>out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files,
                                                 d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min,
                                                 min_ios=params.cc_clustering.min_ios)
        print >>out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(nproc=params.cc_clustering.nproc,
                                  b_scale=params.cc_clustering.b_scale,
                                  use_normalized=params.cc_clustering.use_normalized,
                                  html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w"))
        print >>out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters)            
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)            
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        if params.max_clusters is not None and len(clusters) > params.max_clusters:
            print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: xds_ascii_files[x-1], IDs),
                                   float("nan"),float("nan"),clh))
        print >>out

        try: html_report.add_clutering_result(clusters, "cc_clustering")
        except: print >>out, traceback.format_exc()
        
    else:
        data_for_merge.append((os.path.join(params.workdir, "all_data"),
                               xds_ascii_files, float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w")
    ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write("     cluster  ClH   LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso  \n")

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats):
        tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n"
        ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle,
                                  len(xds_files), num_files,
                                  stats["cmpl"][0],
                                  stats["redundancy"][0],
                                  stats["i_over_sigma"][0],
                                  stats["r_meas"][0],
                                  stats["cc_half"][0],
                                  stats["cmpl"][2],
                                  stats["redundancy"][2],
                                  stats["i_over_sigma"][2],
                                  stats["r_meas"][2],
                                  stats["cc_half"][2],
                                  stats["cmpl"][1],
                                  stats["redundancy"][1],
                                  stats["i_over_sigma"][1],
                                  stats["r_meas"][1],
                                  stats["cc_half"][1],
                                  stats["sig_ano"][1],
                                  stats["cc_ano"][1],
                                  stats["xtriage_log"].wilson_b,
                                  stats["xtriage_log"].anisotropy,
                                  ))
        ofs_summary.flush()
    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
cd "%s" || exit 1
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ofs = open("result.pkl","w"); \
ret = merge_datasets(*args); \
pickle.dump(ret, ofs); \
'
""" % (os.path.abspath(workdir), sys.executable))
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir, "result.pkl")))
            except:
                print >>out, "Error in unpickling result in %s" % workdir
                print >>out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            lcv, alcv = float("nan"), float("nan")
            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats)

            # Last lcv & alcv
            try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells, space_group, batchjobs)
            
            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()

    try: html_report.write_html()
    except: print >>out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")
    return
Exemplo n.º 4
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal()
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >>out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ")
    print >>out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir))
    try: html_report.add_params(params, master_params_str)
    except: print >>out, traceback.format_exc()

    xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin))
    xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files)
    xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files)

    cells = collections.OrderedDict()
    laues = {} # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            print >>out, "Error in reading %s" % xac
            print >>out, traceback.format_exc()
            return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(False).info()
        laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >>out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg]))
                for f in laues[laue][sg]: print >>out, "  %s" % f
                print >>out, ""
        return
            
    try: html_report.add_cells_and_files(cells, laues.keys()[0])
    except: print >>out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >>out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w"))
        print >>out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters)            
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters)            
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters)

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: blend_clusters.files[x-1], IDs),
                                   LCV, aLCV,clh))
        print >>out
        try: html_report.add_clutering_result(clusters, "blend")
        except: print >>out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files,
                                                 d_min=params.cc_clustering.d_min,
                                                 min_ios=params.cc_clustering.min_ios)
        print >>out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(nproc=params.cc_clustering.nproc,
                                  b_scale=params.cc_clustering.b_scale,
                                  use_normalized=params.cc_clustering.use_normalized,
                                  html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w"))
        print >>out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters)            
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)            
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: xds_ascii_files[x-1], IDs),
                                   float("nan"),float("nan"),clh))
        print >>out

        try: html_report.add_clutering_result(clusters, "cc_clustering")
        except: print >>out, traceback.format_exc()
        
    else:
        data_for_merge.append((os.path.join(params.workdir, "all_data"),
                               xds_ascii_files, float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w")
    ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write("     cluster  ClH   LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso  \n")

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats):
        tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n"
        ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle,
                                  len(xds_files), num_files,
                                  stats["cmpl"][0],
                                  stats["redundancy"][0],
                                  stats["i_over_sigma"][0],
                                  stats["r_meas"][0],
                                  stats["cc_half"][0],
                                  stats["cmpl"][2],
                                  stats["redundancy"][2],
                                  stats["i_over_sigma"][2],
                                  stats["r_meas"][2],
                                  stats["cc_half"][2],
                                  stats["cmpl"][1],
                                  stats["redundancy"][1],
                                  stats["i_over_sigma"][1],
                                  stats["r_meas"][1],
                                  stats["cc_half"][1],
                                  stats["sig_ano"][1],
                                  stats["cc_ano"][1],
                                  stats["xtriage_log"].wilson_b,
                                  stats["xtriage_log"].anisotropy,
                                  ))
        ofs_summary.flush()
    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ret = merge_datasets(*args); \
pickle.dump(ret, open("result.pkl","w")); \
'
""" % sys.executable)
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir, "result.pkl")))
            except:
                print >>out, "Error in unpickling result in %s" % workdir
                print >>out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))
            for cycle, wd, num_files, stats in results:
                write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells, batchjobs)
            
            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()

    try: html_report.write_html()
    except: print >>out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")
    return