def _calc_cchalf_by_removing_worker_2(wdir, tmpdir, iex, stat_bin): assert stat_bin in ("total", "outer") newlp = os.path.join(tmpdir, "XSCALE.LP") newinp = os.path.join(tmpdir, "XSCALE.INP") table = xscalelp.read_stats_table(newlp) if table is None: shutil.rmtree(tmpdir) return iex, float("nan"), -1 assert table["dmin"][-1] is None # None for total i_stat = -1 if stat_bin == "total" else -2 cchalf_exi = table["cc_half"][i_stat] nuniq = table["nuniq"][i_stat] # backup .INP and .LP, and then remove directory. os.rename(newinp, os.path.join(wdir, "XSCALE.INP.ex%.3d"%iex)) os.rename(newlp, os.path.join(wdir, "XSCALE.LP.ex%.3d"%iex)) shutil.rmtree(tmpdir) return iex, cchalf_exi, nuniq
def merge_datasets(params, workdir, xds_files, cells, space_group): if not os.path.exists(workdir): os.makedirs(workdir) out = open(os.path.join(workdir, "merge.log"), "w") if params.program == "xscale": cycles = multi_merging.xscale.XscaleCycles( workdir, anomalous_flag=params.anomalous, d_min=params.d_min, d_max=params.d_max, reject_method=params.reject_method, reject_params=params.rejection, xscale_params=params.xscale, res_params=params.resolution, reference_file=params.reference_file, space_group=space_group, ref_mtz=params.reference.data if params.reference.copy_test_flag else None, out=out, nproc=params.nproc, batch_params=params.batch) unused_files, reasons = cycles.run_cycles(xds_files) used_files = set(xds_files).difference(set(unused_files)) print >> out print >> out, " SUMMARY " print >> out, "========================" for i, files in enumerate((used_files, unused_files)): print >> out, "\n%6s %4d files:\n" % ( ("Used", "Unused")[i], len(files)) if len(files) == 0: continue maxlen_f = max( map(lambda f: len(os.path.relpath(f, params.workdir)), files)) for f in files: cell = cells[f] merge_log = os.path.join(os.path.dirname(f), "merging_stats.log") try: lines = open(merge_log).readlines() resn = float( filter(lambda x: x.startswith("Resolution:"), lines)[0].split()[-1]) cmpl = float( filter(lambda x: x.startswith("Completeness:"), lines)[0].split()[-1].replace("%", "")) except: resn = float("nan") cmpl = float("nan") if i == 1: # print reason print >> out, "%-15s" % reasons.get(f, "unknown"), print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath( f, params.workdir), cell, #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")), print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn) ret = [] tkvals = lambda x: (x[-1], x[0], x[-2]) # overall, inner, outer for i in xrange(1, cycles.get_last_cycle_number() + 1): wd = os.path.join(workdir, "run_%.2d" % i) xscale_lp = os.path.join(wd, "XSCALE.LP") table = xscalelp.read_stats_table(xscale_lp) num_files = len(xscalelp.get_read_data(xscale_lp)) xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log") aniso = xds_aniso_analysis.parse_logfile( os.path.join(wd, "aniso.log")) cellinfo = cycles.cell_info_at_cycles[i] ret.append([ i, wd, num_files, dict(cmpl=tkvals(table["cmpl"]), redundancy=tkvals(table["redundancy"]), i_over_sigma=tkvals(table["i_over_sigma"]), r_meas=tkvals(table["r_meas"]), cc_half=tkvals(table["cc_half"]), sig_ano=tkvals(table["sig_ano"]), cc_ano=tkvals(table["cc_ano"]), drange=tkvals(table["d_range"]), lp=xscale_lp, xtriage_log=xtriage.XtriageLogfile(xtriage_logfile), aniso=aniso, lcv=cellinfo[1], alcv=cellinfo[2], dmin_est=cycles.dmin_est_at_cycles.get(i, float("nan"))) ]) xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP") print >> out, "\nFinal statistics:\n" print >> out, xscalelp.snip_stats_table(xscale_lp) return ret elif params.program == "aimless": worker = Pointless() print >> out, "\nRunning pointless" runinfo = worker.run_copy(hklout="pointless.mtz", wdir=workdir, xdsin=xds_files, logout=os.path.join(workdir, "pointless.log"), tolerance=30) # Table of file name -> Batch range assert len(xds_files) == len(runinfo) batch_info = collections.OrderedDict( map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo))) cycles = multi_merging.aimless.AimlessCycles( workdir, anomalous_flag=params.anomalous, d_min=params.d_min, d_max=params.d_max, reject_method=params.reject_method, cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff, delta_cchalf_bin=params.rejection.delta_cchalf.bin, mtzin=os.path.join(workdir, "pointless.mtz"), batch_info=batch_info, out=out, nproc=params.nproc, nproc_each=params.batch.nproc_each, batchjobs=None) # FIXME batchjobs unused_files, reasons = cycles.run_cycles(xds_files) used_files = set(xds_files).difference(set(unused_files)) print >> out print >> out, " SUMMARY " print >> out, "========================" for i, files in enumerate((used_files, unused_files)): print >> out, "\n%6s %4d files:\n" % ( ("Used", "Unused")[i], len(files)) if len(files) == 0: continue maxlen_f = max( map(lambda f: len(os.path.relpath(f, params.workdir)), files)) for f in files: cell = cells[f] merge_log = os.path.join(os.path.dirname(f), "merging_stats.log") try: lines = open(merge_log).readlines() resn = float( filter(lambda x: x.startswith("Resolution:"), lines)[0].split()[-1]) cmpl = float( filter(lambda x: x.startswith("Completeness:"), lines)[0].split()[-1].replace("%", "")) except: resn = float("nan") cmpl = float("nan") if i == 1: # print reason print >> out, "%-15s" % reasons.get(f, "unknown"), print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath( f, params.workdir), cell, print >> out, "ISa=%5.1f" % correctlp.get_ISa( os.path.join(os.path.dirname(f), "CORRECT.LP")), print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn) aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log") print >> out, "\nFinal statistics:\n" print >> out, aimless.snip_summary(aimless_log) # Write summary table = aimless.read_summary(aimless_log) tkvals = lambda x: (x[0], x[1], x[2]) # overall, inner, outer return [ [ cycles.get_last_cycle_number(), cycles.current_working_dir(), len(used_files), dict(cmpl=tkvals(table["cmpl"]), redundancy=tkvals(table["redundancy"]), i_over_sigma=tkvals(table["i_over_sigma"]), r_meas=tkvals(table["r_meas"]), cc_half=tkvals(table["cc_half"]), sig_ano=(float("nan"), ) * 3, cc_ano=tkvals(table["cc_ano"])) ], ] #print >>out, "\nRunning aimless" #aimless.run_aimless(mtzin="pointless.mtz", # wdir=workdir, # anomalous=params.anomalous, d_min=params.d_min, prefix=None) else: print >> out, "Unknown program:", params.program return []
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell, lcv, alcv = self.average_cells(xds_ascii_files) self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) try: xscale.run_xscale(xscale_inp, cbf_to_dat=True, use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) ccs = ccs[ccs==ccs] # Remove nan q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
def calc_delta_cchalf(prev_lp, tmpdir, with_sigma=False, precalc_cchalf_all=None): """ Obsolete function. Maybe useful when starting with the last XSCALE.LP... """ assert not with_sigma # Not supported now if not os.path.exists(tmpdir): os.makedirs(tmpdir) newinp = os.path.join(tmpdir, "XSCALE.INP") newlp = os.path.join(tmpdir, "XSCALE.LP") rel_org = os.path.relpath(os.path.dirname(prev_lp), tmpdir) if precalc_cchalf_all is None: # read CC1/2(all) from cwd orgtable = xscalelp.read_stats_table(prev_lp) assert orgtable["dmin"][-1] is None # None for total cchalf_all = orgtable["cc_half"][-1] else: cchalf_all = precalc_cchalf_all datout = open(os.path.join(tmpdir, "delta_cchalf.dat"), "w") datout.write("# CC1/2(all)= %.4f\n" % cchalf_all) datout.write("idx exfile cc1/2 delta_cc1/2\n") # Read inp and extract input files. # XXX What if reference file is included??? orgkwds = xscalelp.read_control_cards(prev_lp) inpfiles = map(lambda x:x[1], filter(lambda y: y[0]=="INPUT_FILE", orgkwds)) # XXX Need to take care of xscale specific inp manner - order matters!! delta_cchalf = [] for iex in xrange(len(inpfiles)): print "Doing", iex files = inpfiles[:iex] + inpfiles[iex+1:] ofs = open(newinp, "w") for k, v in orgkwds: if k not in ("INPUT_FILE", "INCLUDE_RESOLUTION_RANGE"): ofs.write("%s= %s\n" % (k,v)) for f in files: if not os.path.isabs(f): f = os.path.join(rel_org, f) ofs.write("INPUT_FILE= %s\n" % f) ofs.close() util.call(xscale_comm, wdir=tmpdir) table = xscalelp.read_stats_table(newlp) assert table["dmin"][-1] is None # None for total cchalf_exi = table["cc_half"][-1] delta_cchalf.append((iex, cchalf_exi - cchalf_all)) os.rename(newinp, newinp+".ex%.3d"%iex) os.rename(newlp, newlp+".ex%.3d"%iex) datout.write("%3d %s %.4f %.4f\n" % (iex, inpfiles[iex], cchalf_exi, cchalf_exi-cchalf_all)) delta_cchalf.sort(key=lambda x: -x[1]) print print "# Sorted table" for idx, dch in delta_cchalf: print "%3d %-.4f %s" % (idx, dch, inpfiles[idx]) return delta_cchalf, cchalf_all
def merge_datasets(params, workdir, xds_files, cells, batchjobs): if not os.path.exists(workdir): os.makedirs(workdir) out = open(os.path.join(workdir, "merge.log"), "w") if params.program == "xscale": cycles = multi_merging.xscale.XscaleCycles(workdir, anomalous_flag=params.anomalous, d_min=params.d_min, d_max=params.d_max, reject_method=params.reject_method, reject_params=params.rejection, xscale_params=params.xscale, reference_file=params.reference_file, out=out, nproc=params.nproc, nproc_each=params.batch.nproc_each, batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None) unused_files, reasons = cycles.run_cycles(xds_files) used_files = set(xds_files).difference(set(unused_files)) print >>out print >>out, " SUMMARY " print >>out, "========================" for i, files in enumerate((used_files, unused_files)): print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files)) if len(files) == 0: continue maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files)) for f in files: cell = cells[f] merge_log = os.path.join(os.path.dirname(f), "merging_stats.log") try: lines = open(merge_log).readlines() resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1]) cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%","")) except: resn = float("nan") cmpl = float("nan") if i == 1: # print reason print >>out, "%-15s"%reasons.get(f, "unknown"), print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell, #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")), print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn) ret = [] tkvals = lambda x: (x[-1], x[0], x[-2]) # overall, inner, outer for i in xrange(1, cycles.get_last_cycle_number()+1): wd = os.path.join(workdir, "run_%.2d"%i) xscale_lp = os.path.join(wd, "XSCALE.LP") table = xscalelp.read_stats_table(xscale_lp) num_files = len(xscalelp.get_read_data(xscale_lp)) xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log") ret.append([i, wd, num_files, dict(cmpl=tkvals(table["cmpl"]), redundancy=tkvals(table["redundancy"]), i_over_sigma=tkvals(table["i_over_sigma"]), r_meas=tkvals(table["r_meas"]), cc_half=tkvals(table["cc_half"]), sig_ano=tkvals(table["sig_ano"]), cc_ano=tkvals(table["cc_ano"]), drange=tkvals(table["d_range"]), lp=xscale_lp, xtriage_log=xtriage.XtriageLogfile(xtriage_logfile)) ]) xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP") print >>out, "\nFinal statistics:\n" print >>out, xscalelp.snip_stats_table(xscale_lp) return ret elif params.program == "aimless": worker = Pointless() print >>out, "\nRunning pointless" runinfo = worker.run_copy(hklout="pointless.mtz", wdir=workdir, xdsin=xds_files, logout=os.path.join(workdir, "pointless.log"), tolerance=30) # Table of file name -> Batch range assert len(xds_files) == len(runinfo) batch_info = collections.OrderedDict(map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo))) cycles = multi_merging.aimless.AimlessCycles(workdir, anomalous_flag=params.anomalous, d_min=params.d_min, d_max=params.d_max, reject_method=params.reject_method, cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff, delta_cchalf_bin=params.rejection.delta_cchalf.bin, mtzin=os.path.join(workdir, "pointless.mtz"), batch_info=batch_info, out=out, nproc=params.nproc, nproc_each=params.batch.nproc_each, batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None) unused_files, reasons = cycles.run_cycles(xds_files) used_files = set(xds_files).difference(set(unused_files)) print >>out print >>out, " SUMMARY " print >>out, "========================" for i, files in enumerate((used_files, unused_files)): print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files)) if len(files) == 0: continue maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files)) for f in files: cell = cells[f] merge_log = os.path.join(os.path.dirname(f), "merging_stats.log") try: lines = open(merge_log).readlines() resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1]) cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%","")) except: resn = float("nan") cmpl = float("nan") if i == 1: # print reason print >>out, "%-15s"%reasons.get(f, "unknown"), print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell, print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")), print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn) aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log") print >>out, "\nFinal statistics:\n" print >>out, aimless.snip_summary(aimless_log) # Write summary table = aimless.read_summary(aimless_log) tkvals = lambda x: (x[0], x[1], x[2]) # overall, inner, outer return [[cycles.get_last_cycle_number(), cycles.current_working_dir(), len(used_files), dict(cmpl=tkvals(table["cmpl"]), redundancy=tkvals(table["redundancy"]), i_over_sigma=tkvals(table["i_over_sigma"]), r_meas=tkvals(table["r_meas"]), cc_half=tkvals(table["cc_half"]), sig_ano=(float("nan"),)*3, cc_ano=tkvals(table["cc_ano"]))], ] #print >>out, "\nRunning aimless" #aimless.run_aimless(mtzin="pointless.mtz", # wdir=workdir, # anomalous=params.anomalous, d_min=params.d_min, prefix=None) else: print >>out, "Unknown program:", params.program return []
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell = self.average_cells(xds_ascii_files) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) xscale.run_xscale(xscale_inp) #util.call(xscale_comm, wdir=self.workdir) cbfouts = glob.glob(os.path.join(self.workdir, "*.cbf")) if len(cbfouts) > 0: # This doesn't affect anything, so I don't want program to stop if this failed try: xscalelp.cbf_to_dat(xscale_lp) for f in cbfouts: os.remove(f) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)