Пример #1
0
def _calc_cchalf_by_removing_worker_2(wdir, tmpdir, iex, stat_bin):
    assert stat_bin in ("total", "outer")
    newlp = os.path.join(tmpdir, "XSCALE.LP")
    newinp = os.path.join(tmpdir, "XSCALE.INP")
    table = xscalelp.read_stats_table(newlp)
    if table is None:
        shutil.rmtree(tmpdir)
        return iex, float("nan"), -1

    assert table["dmin"][-1] is None # None for total
    i_stat = -1 if stat_bin == "total" else -2
    cchalf_exi = table["cc_half"][i_stat]
    nuniq = table["nuniq"][i_stat]

    # backup .INP and .LP, and then remove directory.
    os.rename(newinp, os.path.join(wdir, "XSCALE.INP.ex%.3d"%iex))
    os.rename(newlp, os.path.join(wdir, "XSCALE.LP.ex%.3d"%iex))
    shutil.rmtree(tmpdir)

    return iex, cchalf_exi, nuniq
Пример #2
0
def _calc_cchalf_by_removing_worker_2(wdir, tmpdir, iex, stat_bin):
    assert stat_bin in ("total", "outer")
    newlp = os.path.join(tmpdir, "XSCALE.LP")
    newinp = os.path.join(tmpdir, "XSCALE.INP")
    table = xscalelp.read_stats_table(newlp)
    if table is None:
        shutil.rmtree(tmpdir)
        return iex, float("nan"), -1

    assert table["dmin"][-1] is None # None for total
    i_stat = -1 if stat_bin == "total" else -2
    cchalf_exi = table["cc_half"][i_stat]
    nuniq = table["nuniq"][i_stat]

    # backup .INP and .LP, and then remove directory.
    os.rename(newinp, os.path.join(wdir, "XSCALE.INP.ex%.3d"%iex))
    os.rename(newlp, os.path.join(wdir, "XSCALE.LP.ex%.3d"%iex))
    shutil.rmtree(tmpdir)

    return iex, cchalf_exi, nuniq
Пример #3
0
def merge_datasets(params, workdir, xds_files, cells, space_group):
    if not os.path.exists(workdir): os.makedirs(workdir)
    out = open(os.path.join(workdir, "merge.log"), "w")

    if params.program == "xscale":
        cycles = multi_merging.xscale.XscaleCycles(
            workdir,
            anomalous_flag=params.anomalous,
            d_min=params.d_min,
            d_max=params.d_max,
            reject_method=params.reject_method,
            reject_params=params.rejection,
            xscale_params=params.xscale,
            res_params=params.resolution,
            reference_file=params.reference_file,
            space_group=space_group,
            ref_mtz=params.reference.data
            if params.reference.copy_test_flag else None,
            out=out,
            nproc=params.nproc,
            batch_params=params.batch)

        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >> out
        print >> out, " SUMMARY "
        print >> out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >> out, "\n%6s %4d files:\n" % (
                ("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(
                map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f),
                                         "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(
                        filter(lambda x: x.startswith("Resolution:"),
                               lines)[0].split()[-1])
                    cmpl = float(
                        filter(lambda x: x.startswith("Completeness:"),
                               lines)[0].split()[-1].replace("%", ""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1:  # print reason
                    print >> out, "%-15s" % reasons.get(f, "unknown"),
                print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath(
                    f, params.workdir), cell,
                #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        ret = []
        tkvals = lambda x: (x[-1], x[0], x[-2])  # overall, inner, outer

        for i in xrange(1, cycles.get_last_cycle_number() + 1):
            wd = os.path.join(workdir, "run_%.2d" % i)
            xscale_lp = os.path.join(wd, "XSCALE.LP")
            table = xscalelp.read_stats_table(xscale_lp)
            num_files = len(xscalelp.get_read_data(xscale_lp))
            xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log")
            aniso = xds_aniso_analysis.parse_logfile(
                os.path.join(wd, "aniso.log"))
            cellinfo = cycles.cell_info_at_cycles[i]
            ret.append([
                i, wd, num_files,
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=tkvals(table["sig_ano"]),
                     cc_ano=tkvals(table["cc_ano"]),
                     drange=tkvals(table["d_range"]),
                     lp=xscale_lp,
                     xtriage_log=xtriage.XtriageLogfile(xtriage_logfile),
                     aniso=aniso,
                     lcv=cellinfo[1],
                     alcv=cellinfo[2],
                     dmin_est=cycles.dmin_est_at_cycles.get(i, float("nan")))
            ])

        xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP")
        print >> out, "\nFinal statistics:\n"
        print >> out, xscalelp.snip_stats_table(xscale_lp)

        return ret

    elif params.program == "aimless":
        worker = Pointless()
        print >> out, "\nRunning pointless"
        runinfo = worker.run_copy(hklout="pointless.mtz",
                                  wdir=workdir,
                                  xdsin=xds_files,
                                  logout=os.path.join(workdir,
                                                      "pointless.log"),
                                  tolerance=30)

        # Table of file name -> Batch range
        assert len(xds_files) == len(runinfo)
        batch_info = collections.OrderedDict(
            map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo)))

        cycles = multi_merging.aimless.AimlessCycles(
            workdir,
            anomalous_flag=params.anomalous,
            d_min=params.d_min,
            d_max=params.d_max,
            reject_method=params.reject_method,
            cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff,
            delta_cchalf_bin=params.rejection.delta_cchalf.bin,
            mtzin=os.path.join(workdir, "pointless.mtz"),
            batch_info=batch_info,
            out=out,
            nproc=params.nproc,
            nproc_each=params.batch.nproc_each,
            batchjobs=None)  # FIXME batchjobs
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >> out
        print >> out, " SUMMARY "
        print >> out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >> out, "\n%6s %4d files:\n" % (
                ("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(
                map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f),
                                         "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(
                        filter(lambda x: x.startswith("Resolution:"),
                               lines)[0].split()[-1])
                    cmpl = float(
                        filter(lambda x: x.startswith("Completeness:"),
                               lines)[0].split()[-1].replace("%", ""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1:  # print reason
                    print >> out, "%-15s" % reasons.get(f, "unknown"),
                print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath(
                    f, params.workdir), cell,
                print >> out, "ISa=%5.1f" % correctlp.get_ISa(
                    os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log")
        print >> out, "\nFinal statistics:\n"
        print >> out, aimless.snip_summary(aimless_log)

        # Write summary
        table = aimless.read_summary(aimless_log)

        tkvals = lambda x: (x[0], x[1], x[2])  # overall, inner, outer
        return [
            [
                cycles.get_last_cycle_number(),
                cycles.current_working_dir(),
                len(used_files),
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=(float("nan"), ) * 3,
                     cc_ano=tkvals(table["cc_ano"]))
            ],
        ]

        #print >>out, "\nRunning aimless"
        #aimless.run_aimless(mtzin="pointless.mtz",
        #                    wdir=workdir,
        #                    anomalous=params.anomalous, d_min=params.d_min, prefix=None)

    else:
        print >> out, "Unknown program:", params.program
        return []
Пример #4
0
    def run_cycle(self, xds_ascii_files, reference_idx=None):
        if len(xds_ascii_files) == 0:
            print >>self.out, "Error: no files given."
            return

        xscale_inp = os.path.join(self.workdir, "XSCALE.INP")
        xscale_lp = os.path.join(self.workdir, "XSCALE.LP")

        # Get averaged cell for scaling
        sg, cell, lcv, alcv = self.average_cells(xds_ascii_files)
        self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv)
        
        # Choose directory containing XDS_ASCII.HKL and set space group (but how??)
        inp_out = open(xscale_inp, "w")
        inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc)
        inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell))
        inp_out.write(self.xscale_inp_head)

        for i, xds_ascii in enumerate(xds_ascii_files):
            f = self.altfile.get(xds_ascii, xds_ascii)
            tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x))
            refstr = "*" if i==reference_idx else " "
            inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp))
            if len(self.xscale_params.corrections) != 3:
                inp_out.write("  CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections))
            if self.xscale_params.frames_per_batch is not None:
                frame_range = XDS_ASCII(f, read_data=False).get_frame_range()
                nframes = frame_range[1] - frame_range[0]
                nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch))
                print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch)
                inp_out.write("  NBATCH= %d\n" % nbatch)

        inp_out.close()

        print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files)
        try:
            xscale.run_xscale(xscale_inp, cbf_to_dat=True,
                              use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available)
        except:
            print >>self.out, traceback.format_exc()

        xscale_log = open(xscale_lp).read()
        if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log:
            print >>self.out, "DEBUG:: Need to choose files."

            # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections.
            # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves?
            # Older versions just print correlation table and stop.
            if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log:
                G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10)
                #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot"))
                cliques = [c for c in nx.find_cliques(G)]
                cliques.sort(key=lambda x:len(x))
                if self._counter == 1:
                    max_clique = cliques[-1]
                else:
                    idx_prevfile = 1 if self.reference_file else 0
                    max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included!

                if self.reference_file:
                    max_clique = [0,] + filter(lambda x: x!=0, max_clique)

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes()))

                print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique),
                                                                                                      len(try_later))
                print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes()))
                for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))):
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique))

                assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1
                if len(try_later) > 0:
                    print >>self.out, "Trying to merge %d remaining files.." % len(try_later)
                    next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later
                    if self.reference_file: next_files = [self.reference_file,] + next_files
                    self.workdir = self.request_next_workdir()
                    self.run_cycle(next_files)
                    return
            else:
                bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp)
                print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes))

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                # XXX Actually, not all datasets need to be thrown.. some of them are useful..
                for i in bad_idxes:
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], 
                                   filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files)))))

            return
        elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log:
            print >>self.out, "DEBUG:: Need to discard useless data."
            unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp))
            if len(unuseful_data) == 0:
                print >>self.out, "I don't know how to fix it.."
                return
            remove_idxes = map(lambda x: x[0]-1, unuseful_data)
            remove_idxes = self.check_remove_list(remove_idxes)
            keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
            for i in remove_idxes:
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = "useless"

            for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
            return
        elif "INACCURATE SCALING FACTORS." in xscale_log:
            # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem).
            print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored."
        elif "!!! ERROR !!!" in xscale_log:
            print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program."
            return

        # Re-scale by changing reference
        rescale_for = None
        if len(self.reject_method) == 0:
            rescale_for = self.reference_choice # may be None
        elif reference_idx is None:
            rescale_for = "bmed"
        
        if rescale_for is not None and len(xds_ascii_files) > 1:
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling with %s" % rescale_for
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(xds_ascii_files, reference_idx=ref_num)

        if len(self.reject_method) == 0:
            return

        # Remove bad data
        remove_idxes = []
        remove_reasons = {}

        if self.reject_method[0] == "framecc":
            print >>self.out, "Rejections based on frame CC"
            from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged

            # list of [frame, n_all, n_common, cc] in the same order
            framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"),
                                                   output_dir=self.workdir,
                                                   nproc=self.nproc).values()
            if self.reject_params.framecc.method == "tukey":
                ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc)))
                ccs = ccs[ccs==ccs] # Remove nan
                q25, q75 = numpy.percentile(ccs, [25, 75])
                cc_cutoff  = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25)
                print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff)
            else:
                cc_cutoff = self.reject_params.framecc.abs_cutoff
                print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff

            for i, cclist in enumerate(framecc):
                useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist))
                if len(useframes) == 0:
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                f = xds_ascii_files[i]
                xac = XDS_ASCII(f)
                if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))):
                    continue # All useful frames.

                sel = xac.iframe == useframes[0]
                for x in useframes[1:]: sel |= xac.iframe == x
                if sum(sel) < 10: # XXX care I/sigma
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)),
                                                                               min(xac.iframe), max(xac.iframe),
                                                                               f)

                newf = self.request_file_modify(f)
                xac.write_selected(sel, newf)

            self.reject_method.pop(0) # Perform only once

        elif self.reject_method[0] == "lpstats":
            if "bfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc
                Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp)))
                q25, q75 = numpy.percentile(Bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(Bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_B")
                        count += 1

                print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.b" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc
                bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.b")
                        count += 1

                print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.ab" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc
                vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, ab in enumerate(vals):
                    if ab < lowlim or ab > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.ab")
                        count += 1

                print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "rfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc
                rstats = xscalelp.get_rfactors_for_each(xscale_lp)
                vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, v in enumerate(vals):
                    if v < lowlim or v > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_R")
                        count += 1

                print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "pairwise_cc" in self.reject_params.lpstats.stats:
                corrs = xscalelp.get_pairwise_correlations(xscale_lp)
                if self.reject_params.lpstats.pwcc.method == "tukey":
                    q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75])
                    iqr = q75 - q25
                    lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr
                    print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr)
                else:
                    lowlim = self.reject_params.lpstats.pwcc.abs_cutoff
                    print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim

                bad_corrs = filter(lambda x: x[3] < lowlim, corrs)
                idx_bad = {}
                for i, j, common_refs, corr, ratio, bfac in bad_corrs:
                    idx_bad[i] = idx_bad.get(i, 0) + 1
                    idx_bad[j] = idx_bad.get(j, 0) + 1

                idx_bad = idx_bad.items()
                idx_bad.sort(key=lambda x:x[1])
                count = 0
                for idx, badcount in reversed(idx_bad):
                    remove_idxes.append(idx-1)
                    remove_reasons.setdefault(idx-1, []).append("bad_pwcc")
                    bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs)
                    if len(bad_corrs) == 0: break
                    fun_key = lambda x: x[3]
                    print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, 
                                                                                                   min(bad_corrs,key=fun_key)[3],
                                                                                                   max(bad_corrs,key=fun_key)[3],
                                                                                                   len(bad_corrs))
                    count += 1
                print >>self.out, " %4d pairwise CC outliers removed" % count

            self.reject_method.pop(0) # Perform only once
        elif self.reject_method[0] == "delta_cc1/2":
            print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = xscalelp.read_stats_table(xscale_lp)
            i_stat = -1 if self.delta_cchalf_bin == "total" else -2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files)))

            # For consistent resolution limit
            inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)
            count = 0
            for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head,
                                                             inpfiles=remaining_files.keys(),
                                                             stat_bin=self.delta_cchalf_bin,
                                                             nproc=self.nproc,
                                                             nproc_each=self.nproc_each,
                                                             batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]]
                
                # Decision making by CC1/2
                print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, 
                                                                                          cc_i, nuniq_i,
                                                                                          prev_cchalf, prev_nuniq)
                if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break
                print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf")
                del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table
                count += 1

            print >>self.out, " %4d removed by DeltaCC1/2 method" % count

            if self.next_delta_cchalf_bin != []:
                self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            else:
                self.reject_method.pop(0)
        else:
            print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        # Remove duplicates
        remove_idxes = list(set(remove_idxes))
        remove_idxes = self.check_remove_list(remove_idxes)
        if len(remove_idxes) > 0:
            print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes)
            for i in sorted(remove_idxes): 
                print >>self.out, " %.3d %s" % (i, xds_ascii_files[i])
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i])

        # Next run
        keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
        if len(self.reject_method) > 0 or len(remove_idxes) > 0:
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
        elif self.reference_choice is not None and len(keep_idxes) > 1:
            # Just re-scale with B reference
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling2 with %s" % self.reference_choice
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
Пример #5
0
def calc_delta_cchalf(prev_lp, tmpdir, with_sigma=False, precalc_cchalf_all=None):
    """
    Obsolete function. Maybe useful when starting with the last XSCALE.LP...
    """

    assert not with_sigma # Not supported now

    if not os.path.exists(tmpdir): os.makedirs(tmpdir)

    newinp = os.path.join(tmpdir, "XSCALE.INP")
    newlp = os.path.join(tmpdir, "XSCALE.LP")

    rel_org = os.path.relpath(os.path.dirname(prev_lp), tmpdir)

    if precalc_cchalf_all is None:
        # read CC1/2(all) from cwd
        orgtable = xscalelp.read_stats_table(prev_lp)
        assert orgtable["dmin"][-1] is None # None for total
        cchalf_all = orgtable["cc_half"][-1]
    else:
        cchalf_all = precalc_cchalf_all

    datout = open(os.path.join(tmpdir, "delta_cchalf.dat"), "w")
    datout.write("# CC1/2(all)= %.4f\n" % cchalf_all)
    datout.write("idx exfile cc1/2 delta_cc1/2\n")

    # Read inp and extract input files.
    # XXX What if reference file is included???
    orgkwds = xscalelp.read_control_cards(prev_lp)
    inpfiles = map(lambda x:x[1],
                   filter(lambda y: y[0]=="INPUT_FILE", orgkwds))
                          
    # XXX Need to take care of xscale specific inp manner - order matters!!

    delta_cchalf = []

    for iex in xrange(len(inpfiles)):
        print "Doing", iex
        files = inpfiles[:iex] + inpfiles[iex+1:]
        ofs = open(newinp, "w")
        for k, v in orgkwds:
            if k not in ("INPUT_FILE", "INCLUDE_RESOLUTION_RANGE"):
                ofs.write("%s= %s\n" % (k,v))

        for f in files:
            if not os.path.isabs(f): f = os.path.join(rel_org, f)
            ofs.write("INPUT_FILE= %s\n" % f)
        ofs.close()

        util.call(xscale_comm, wdir=tmpdir)
        table = xscalelp.read_stats_table(newlp)
        assert table["dmin"][-1] is None # None for total
        cchalf_exi = table["cc_half"][-1]
        delta_cchalf.append((iex, cchalf_exi - cchalf_all))

        os.rename(newinp, newinp+".ex%.3d"%iex)
        os.rename(newlp, newlp+".ex%.3d"%iex)

        datout.write("%3d %s %.4f %.4f\n" % (iex, inpfiles[iex], cchalf_exi, cchalf_exi-cchalf_all))

    delta_cchalf.sort(key=lambda x: -x[1])
    print
    print "# Sorted table"
    for idx, dch in delta_cchalf:
        print "%3d %-.4f %s" % (idx, dch, inpfiles[idx])

    return delta_cchalf, cchalf_all
Пример #6
0
def merge_datasets(params, workdir, xds_files, cells, batchjobs):
    if not os.path.exists(workdir): os.makedirs(workdir)
    out = open(os.path.join(workdir, "merge.log"), "w")

    if params.program == "xscale":
        cycles = multi_merging.xscale.XscaleCycles(workdir, 
                                                   anomalous_flag=params.anomalous,
                                                   d_min=params.d_min, d_max=params.d_max, 
                                                   reject_method=params.reject_method,
                                                   reject_params=params.rejection,
                                                   xscale_params=params.xscale,
                                                   reference_file=params.reference_file,
                                                   out=out, nproc=params.nproc,
                                                   nproc_each=params.batch.nproc_each,
                                                   batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None)
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >>out
        print >>out, " SUMMARY "
        print >>out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f), "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1])
                    cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%",""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1: # print reason
                    print >>out, "%-15s"%reasons.get(f, "unknown"),
                print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell,
                #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        ret = []
        tkvals = lambda x: (x[-1], x[0], x[-2]) # overall, inner, outer

        for i in xrange(1, cycles.get_last_cycle_number()+1):
            wd = os.path.join(workdir, "run_%.2d"%i)
            xscale_lp = os.path.join(wd, "XSCALE.LP")
            table = xscalelp.read_stats_table(xscale_lp)
            num_files = len(xscalelp.get_read_data(xscale_lp))
            xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log")
            ret.append([i, wd, num_files,
                        dict(cmpl=tkvals(table["cmpl"]),
                             redundancy=tkvals(table["redundancy"]),
                             i_over_sigma=tkvals(table["i_over_sigma"]),
                             r_meas=tkvals(table["r_meas"]),
                             cc_half=tkvals(table["cc_half"]),
                             sig_ano=tkvals(table["sig_ano"]),
                             cc_ano=tkvals(table["cc_ano"]),
                             drange=tkvals(table["d_range"]),
                             lp=xscale_lp,
                             xtriage_log=xtriage.XtriageLogfile(xtriage_logfile))
                        ])

        xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP")
        print >>out, "\nFinal statistics:\n"
        print >>out, xscalelp.snip_stats_table(xscale_lp)

        return ret

    elif params.program == "aimless":
        worker = Pointless()
        print >>out, "\nRunning pointless"
        runinfo = worker.run_copy(hklout="pointless.mtz", wdir=workdir,
                                  xdsin=xds_files,
                                  logout=os.path.join(workdir, "pointless.log"),
                                  tolerance=30)

        # Table of file name -> Batch range
        assert len(xds_files) == len(runinfo)
        batch_info = collections.OrderedDict(map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo)))

        cycles = multi_merging.aimless.AimlessCycles(workdir, 
                                                     anomalous_flag=params.anomalous,
                                                     d_min=params.d_min, d_max=params.d_max, 
                                                     reject_method=params.reject_method,
                                                     cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff,
                                                     delta_cchalf_bin=params.rejection.delta_cchalf.bin,
                                                     mtzin=os.path.join(workdir, "pointless.mtz"),
                                                     batch_info=batch_info,
                                                     out=out, nproc=params.nproc,
                                                     nproc_each=params.batch.nproc_each,
                                                     batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None)
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >>out
        print >>out, " SUMMARY "
        print >>out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f), "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1])
                    cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%",""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1: # print reason
                    print >>out, "%-15s"%reasons.get(f, "unknown"),
                print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell,
                print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log")
        print >>out, "\nFinal statistics:\n"
        print >>out, aimless.snip_summary(aimless_log)

        # Write summary
        table = aimless.read_summary(aimless_log)

        tkvals = lambda x: (x[0], x[1], x[2]) # overall, inner, outer
        return [[cycles.get_last_cycle_number(), cycles.current_working_dir(), len(used_files),
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=(float("nan"),)*3,
                     cc_ano=tkvals(table["cc_ano"]))], ]

        #print >>out, "\nRunning aimless"
        #aimless.run_aimless(mtzin="pointless.mtz",
        #                    wdir=workdir,
        #                    anomalous=params.anomalous, d_min=params.d_min, prefix=None)

    else:
        print >>out, "Unknown program:", params.program
        return []
Пример #7
0
def calc_delta_cchalf(prev_lp, tmpdir, with_sigma=False, precalc_cchalf_all=None):
    """
    Obsolete function. Maybe useful when starting with the last XSCALE.LP...
    """

    assert not with_sigma # Not supported now

    if not os.path.exists(tmpdir): os.makedirs(tmpdir)

    newinp = os.path.join(tmpdir, "XSCALE.INP")
    newlp = os.path.join(tmpdir, "XSCALE.LP")

    rel_org = os.path.relpath(os.path.dirname(prev_lp), tmpdir)

    if precalc_cchalf_all is None:
        # read CC1/2(all) from cwd
        orgtable = xscalelp.read_stats_table(prev_lp)
        assert orgtable["dmin"][-1] is None # None for total
        cchalf_all = orgtable["cc_half"][-1]
    else:
        cchalf_all = precalc_cchalf_all

    datout = open(os.path.join(tmpdir, "delta_cchalf.dat"), "w")
    datout.write("# CC1/2(all)= %.4f\n" % cchalf_all)
    datout.write("idx exfile cc1/2 delta_cc1/2\n")

    # Read inp and extract input files.
    # XXX What if reference file is included???
    orgkwds = xscalelp.read_control_cards(prev_lp)
    inpfiles = map(lambda x:x[1],
                   filter(lambda y: y[0]=="INPUT_FILE", orgkwds))
                          
    # XXX Need to take care of xscale specific inp manner - order matters!!

    delta_cchalf = []

    for iex in xrange(len(inpfiles)):
        print "Doing", iex
        files = inpfiles[:iex] + inpfiles[iex+1:]
        ofs = open(newinp, "w")
        for k, v in orgkwds:
            if k not in ("INPUT_FILE", "INCLUDE_RESOLUTION_RANGE"):
                ofs.write("%s= %s\n" % (k,v))

        for f in files:
            if not os.path.isabs(f): f = os.path.join(rel_org, f)
            ofs.write("INPUT_FILE= %s\n" % f)
        ofs.close()

        util.call(xscale_comm, wdir=tmpdir)
        table = xscalelp.read_stats_table(newlp)
        assert table["dmin"][-1] is None # None for total
        cchalf_exi = table["cc_half"][-1]
        delta_cchalf.append((iex, cchalf_exi - cchalf_all))

        os.rename(newinp, newinp+".ex%.3d"%iex)
        os.rename(newlp, newlp+".ex%.3d"%iex)

        datout.write("%3d %s %.4f %.4f\n" % (iex, inpfiles[iex], cchalf_exi, cchalf_exi-cchalf_all))

    delta_cchalf.sort(key=lambda x: -x[1])
    print
    print "# Sorted table"
    for idx, dch in delta_cchalf:
        print "%3d %-.4f %s" % (idx, dch, inpfiles[idx])

    return delta_cchalf, cchalf_all
Пример #8
0
    def run_cycle(self, xds_ascii_files, reference_idx=None):
        if len(xds_ascii_files) == 0:
            print >>self.out, "Error: no files given."
            return

        xscale_inp = os.path.join(self.workdir, "XSCALE.INP")
        xscale_lp = os.path.join(self.workdir, "XSCALE.LP")

        # Get averaged cell for scaling
        sg, cell = self.average_cells(xds_ascii_files)
        
        # Choose directory containing XDS_ASCII.HKL and set space group (but how??)
        inp_out = open(xscale_inp, "w")
        inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc)
        inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell))
        inp_out.write(self.xscale_inp_head)

        for i, xds_ascii in enumerate(xds_ascii_files):
            f = self.altfile.get(xds_ascii, xds_ascii)
            tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x))
            refstr = "*" if i==reference_idx else " "
            inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp))
            if len(self.xscale_params.corrections) != 3:
                inp_out.write("  CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections))
            if self.xscale_params.frames_per_batch is not None:
                frame_range = XDS_ASCII(f, read_data=False).get_frame_range()
                nframes = frame_range[1] - frame_range[0]
                nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch))
                print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch)
                inp_out.write("  NBATCH= %d\n" % nbatch)

        inp_out.close()

        print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files)
        xscale.run_xscale(xscale_inp)
        #util.call(xscale_comm, wdir=self.workdir)

        cbfouts = glob.glob(os.path.join(self.workdir, "*.cbf"))
        if len(cbfouts) > 0:
            # This doesn't affect anything, so I don't want program to stop if this failed
            try:
                xscalelp.cbf_to_dat(xscale_lp)
                for f in cbfouts: os.remove(f)
            except:
                print >>self.out, traceback.format_exc()

        xscale_log = open(xscale_lp).read()
        if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log:
            print >>self.out, "DEBUG:: Need to choose files."

            # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections.
            # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves?
            # Older versions just print correlation table and stop.
            if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log:
                G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10)
                #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot"))
                cliques = [c for c in nx.find_cliques(G)]
                cliques.sort(key=lambda x:len(x))
                if self._counter == 1:
                    max_clique = cliques[-1]
                else:
                    idx_prevfile = 1 if self.reference_file else 0
                    max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included!

                if self.reference_file:
                    max_clique = [0,] + filter(lambda x: x!=0, max_clique)

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes()))

                print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique),
                                                                                                      len(try_later))
                print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes()))
                for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))):
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique))

                assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1
                if len(try_later) > 0:
                    print >>self.out, "Trying to merge %d remaining files.." % len(try_later)
                    next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later
                    if self.reference_file: next_files = [self.reference_file,] + next_files
                    self.workdir = self.request_next_workdir()
                    self.run_cycle(next_files)
                    return
            else:
                bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp)
                print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes))

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                # XXX Actually, not all datasets need to be thrown.. some of them are useful..
                for i in bad_idxes:
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], 
                                   filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files)))))

            return
        elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log:
            print >>self.out, "DEBUG:: Need to discard useless data."
            unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp))
            if len(unuseful_data) == 0:
                print >>self.out, "I don't know how to fix it.."
                return
            remove_idxes = map(lambda x: x[0]-1, unuseful_data)
            remove_idxes = self.check_remove_list(remove_idxes)
            keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
            for i in remove_idxes:
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = "useless"

            for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
            return
        elif "INACCURATE SCALING FACTORS." in xscale_log:
            # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem).
            print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored."
        elif "!!! ERROR !!!" in xscale_log:
            print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program."
            return

        # Re-scale by changing reference
        rescale_for = None
        if len(self.reject_method) == 0:
            rescale_for = self.reference_choice # may be None
        elif reference_idx is None:
            rescale_for = "bmed"
        
        if rescale_for is not None and len(xds_ascii_files) > 1:
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling with %s" % rescale_for
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(xds_ascii_files, reference_idx=ref_num)

        if len(self.reject_method) == 0:
            return

        # Remove bad data
        remove_idxes = []
        remove_reasons = {}

        if self.reject_method[0] == "framecc":
            print >>self.out, "Rejections based on frame CC"
            from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged

            # list of [frame, n_all, n_common, cc] in the same order
            framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"),
                                                   output_dir=self.workdir,
                                                   nproc=self.nproc).values()
            if self.reject_params.framecc.method == "tukey":
                ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc)))
                q25, q75 = numpy.percentile(ccs, [25, 75])
                cc_cutoff  = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25)
                print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff)
            else:
                cc_cutoff = self.reject_params.framecc.abs_cutoff
                print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff

            for i, cclist in enumerate(framecc):
                useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist))
                if len(useframes) == 0:
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                f = xds_ascii_files[i]
                xac = XDS_ASCII(f)
                if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))):
                    continue # All useful frames.

                sel = xac.iframe == useframes[0]
                for x in useframes[1:]: sel |= xac.iframe == x
                if sum(sel) < 10: # XXX care I/sigma
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)),
                                                                               min(xac.iframe), max(xac.iframe),
                                                                               f)

                newf = self.request_file_modify(f)
                xac.write_selected(sel, newf)

            self.reject_method.pop(0) # Perform only once

        elif self.reject_method[0] == "lpstats":
            if "bfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc
                Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp)))
                q25, q75 = numpy.percentile(Bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(Bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_B")
                        count += 1

                print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.b" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc
                bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.b")
                        count += 1

                print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.ab" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc
                vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, ab in enumerate(vals):
                    if ab < lowlim or ab > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.ab")
                        count += 1

                print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "rfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc
                rstats = xscalelp.get_rfactors_for_each(xscale_lp)
                vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, v in enumerate(vals):
                    if v < lowlim or v > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_R")
                        count += 1

                print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "pairwise_cc" in self.reject_params.lpstats.stats:
                corrs = xscalelp.get_pairwise_correlations(xscale_lp)
                if self.reject_params.lpstats.pwcc.method == "tukey":
                    q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75])
                    iqr = q75 - q25
                    lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr
                    print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr)
                else:
                    lowlim = self.reject_params.lpstats.pwcc.abs_cutoff
                    print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim

                bad_corrs = filter(lambda x: x[3] < lowlim, corrs)
                idx_bad = {}
                for i, j, common_refs, corr, ratio, bfac in bad_corrs:
                    idx_bad[i] = idx_bad.get(i, 0) + 1
                    idx_bad[j] = idx_bad.get(j, 0) + 1

                idx_bad = idx_bad.items()
                idx_bad.sort(key=lambda x:x[1])
                count = 0
                for idx, badcount in reversed(idx_bad):
                    remove_idxes.append(idx-1)
                    remove_reasons.setdefault(idx-1, []).append("bad_pwcc")
                    bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs)
                    if len(bad_corrs) == 0: break
                    fun_key = lambda x: x[3]
                    print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, 
                                                                                                   min(bad_corrs,key=fun_key)[3],
                                                                                                   max(bad_corrs,key=fun_key)[3],
                                                                                                   len(bad_corrs))
                    count += 1
                print >>self.out, " %4d pairwise CC outliers removed" % count

            self.reject_method.pop(0) # Perform only once
        elif self.reject_method[0] == "delta_cc1/2":
            print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = xscalelp.read_stats_table(xscale_lp)
            i_stat = -1 if self.delta_cchalf_bin == "total" else -2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files)))

            # For consistent resolution limit
            inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)
            count = 0
            for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head,
                                                             inpfiles=remaining_files.keys(),
                                                             stat_bin=self.delta_cchalf_bin,
                                                             nproc=self.nproc,
                                                             nproc_each=self.nproc_each,
                                                             batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]]
                
                # Decision making by CC1/2
                print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, 
                                                                                          cc_i, nuniq_i,
                                                                                          prev_cchalf, prev_nuniq)
                if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break
                print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf")
                del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table
                count += 1

            print >>self.out, " %4d removed by DeltaCC1/2 method" % count

            if self.next_delta_cchalf_bin != []:
                self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            else:
                self.reject_method.pop(0)
        else:
            print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        # Remove duplicates
        remove_idxes = list(set(remove_idxes))
        remove_idxes = self.check_remove_list(remove_idxes)
        if len(remove_idxes) > 0:
            print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes)
            for i in sorted(remove_idxes): 
                print >>self.out, " %.3d %s" % (i, xds_ascii_files[i])
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i])

        # Next run
        keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
        if len(self.reject_method) > 0 or len(remove_idxes) > 0:
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
        elif self.reference_choice is not None and len(keep_idxes) > 1:
            # Just re-scale with B reference
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling2 with %s" % self.reference_choice
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)