def run(lstin): data = [] for l in open(lstin): xdsasc = l.strip() xa = XDS_ASCII(xdsasc, sys.stdout, i_only=True) ma = miller.array(miller_set=xa.as_miller_set(anomalous_flag=False), data=xa.iobs) data.append((xdsasc, ma)) print "index filename" for i, d in enumerate(data): print i, d[0] print "i j n.i n.j n.common cc" for i in xrange(len(data)-1): for j in xrange(i+1, len(data)): di, dj = data[i][1].common_sets(data[j][1], assert_is_similar_symmetry=False) print i, j, data[i][1].data().size(), data[j][1].data().size(), if len(di.data()) == 0: print 0, "nan" else: corr = flex.linear_correlation(di.data(), dj.data()) assert corr.is_well_defined() cc = corr.coefficient() print len(di.data()), cc
def calc_merging_stats(xac_file, cut_resolution=True): import iotbx.merging_statistics from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII wdir = os.path.dirname(xac_file) pklout = os.path.join(wdir, "merging_stats.pkl") logout = open(os.path.join(wdir, "merging_stats.log"), "w") print >>logout, xac_file print >>logout, "" print >>logout, "Estimate cutoff" print >>logout, "================" obj = XDS_ASCII(xac_file, i_only=True) i_obs = obj.i_obs() d_min = None if i_obs.size() < 10: return try: cutoffs = resolution_cutoff.estimate_crude_resolution_cutoffs(i_obs=i_obs) cutoffs.show(out=logout) if cutoffs.cc_one_half_cut != float("inf") and cut_resolution: d_min = cutoffs.cc_one_half_cut except Sorry, e: print >>logout, e.message
def __init__(self, xac_files, d_min=3, min_ios=3, nproc=1, max_delta=3, log_out=null_out()): self.xac_files = xac_files self.log_out = log_out self.nproc = nproc self.arrays = [] self.max_delta = max_delta self.best_operators = None print >> self.log_out, "Reading" for i, f in enumerate(self.xac_files): print >> self.log_out, "%4d %s" % (i, f) xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=d_min) if min_ios is not None: a = a.select(a.data() / a.sigmas() >= min_ios) a = a.as_non_anomalous_array().merge_equivalents( use_internal_variance=False).array() self.arrays.append(a) print >> self.log_out, ""
def run(lstin): data = [] for l in open(lstin): xdsasc = l.strip() xa = XDS_ASCII(xdsasc, sys.stdout, i_only=True) ma = miller.array(miller_set=xa.as_miller_set(anomalous_flag=False), data=xa.iobs) data.append((xdsasc, ma)) print "index filename" for i, d in enumerate(data): print i, d[0] print "i j n.i n.j n.common cc" for i in xrange(len(data) - 1): for j in xrange(i + 1, len(data)): di, dj = data[i][1].common_sets(data[j][1], assert_is_similar_symmetry=False) print i, j, data[i][1].data().size(), data[j][1].data().size(), if len(di.data()) == 0: print 0, "nan" else: corr = flex.linear_correlation(di.data(), dj.data()) assert corr.is_well_defined() cc = corr.coefficient() print len(di.data()), cc
def get_xac_info(xac, get_nframes=False): ret = {} for l in open(xac): if l.startswith( "!FORMAT=XDS_ASCII" ): # !FORMAT=XDS_ASCII MERGE=FALSE FRIEDEL'S_LAW=FALSE ret["friedels_law"] = l[l.rindex("=") + 1:].strip() if l.startswith("!INCLUDE_RESOLUTION_RANGE="): ret["resol_range"] = l[l.index("=") + 1:].strip() elif l.startswith("!SPACE_GROUP_NUMBER="): ret["spgr_num"] = l[l.index("=") + 1:].strip() elif l.startswith("!UNIT_CELL_CONSTANTS="): ret["cell"] = l[l.index("=") + 1:].strip() elif l.startswith("!END_OF_HEADER"): break if not "resol_range" in ret: d_max_min = XDS_ASCII(xac, i_only=True).as_miller_set().d_max_min() ret["resol_range"] = "%.3f %.3f" % d_max_min if get_nframes: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() ret["nframes"] = frame_range[1] - frame_range[0] return ret
def modify_xds_ascii_files(self, suffix="_reidx", cells_dat_out=None): #ofs_lst = open("for_merge_new.lst", "w") if cells_dat_out: cells_dat_out.write("file a b c al be ga\n") new_files = [] print >>self.log_out, "Writing reindexed files.." for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)): xac = XDS_ASCII(f, read_data=False) if op.is_identity_op(): new_files.append(f) if cells_dat_out: cell = xac.symm.unit_cell().parameters() cells_dat_out.write(f+" "+" ".join(map(lambda x:"%7.3f"%x, cell))+"\n") continue newf = f.replace(".HKL", suffix+".HKL") if ".HKL" in f else os.path.splitext(f)[0]+suffix+".HKL" print >>self.log_out, "%4d %s" % (i, newf) cell_tr = xac.write_reindexed(op, newf, space_group=self.arrays[0].crystal_symmetry().space_group()) #ofs_lst.write(newf+"\n") new_files.append(newf) if cells_dat_out: cells_dat_out.write(newf+" "+" ".join(map(lambda x:"%7.3f"%x, cell_tr.parameters()))+"\n") return new_files
def calc_merging_stats(xac_file, cut_resolution=True): import iotbx.merging_statistics from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII wdir = os.path.dirname(xac_file) pklout = os.path.join(wdir, "merging_stats.pkl") logout = open(os.path.join(wdir, "merging_stats.log"), "w") print >> logout, xac_file print >> logout, "" print >> logout, "Estimate cutoff" print >> logout, "================" obj = XDS_ASCII(xac_file, i_only=True) i_obs = obj.i_obs() d_min = None if i_obs.size() < 10: return try: cutoffs = resolution_cutoff.estimate_crude_resolution_cutoffs( i_obs=i_obs) cutoffs.show(out=logout) if cutoffs.cc_one_half_cut != float("inf") and cut_resolution: d_min = cutoffs.cc_one_half_cut except Sorry, e: print >> logout, e.message
def modify_xds_ascii_files(self, suffix="_reidx", cells_dat_out=None): #ofs_lst = open("for_merge_new.lst", "w") if cells_dat_out: cells_dat_out.write("file a b c al be ga\n") new_files = [] print >> self.log_out, "Writing reindexed files.." for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)): xac = XDS_ASCII(f, read_data=False) if op.is_identity_op(): new_files.append(f) if cells_dat_out: cell = xac.symm.unit_cell().parameters() cells_dat_out.write( f + " " + " ".join(map(lambda x: "%7.3f" % x, cell)) + "\n") continue newf = f.replace( ".HKL", suffix + ".HKL" ) if ".HKL" in f else os.path.splitext(f)[0] + suffix + ".HKL" print >> self.log_out, "%4d %s" % (i, newf) cell_tr = xac.write_reindexed( op, newf, space_group=self.arrays[0].crystal_symmetry().space_group()) #ofs_lst.write(newf+"\n") new_files.append(newf) if cells_dat_out: cells_dat_out.write(newf + " " + " ".join( map(lambda x: "%7.3f" % x, cell_tr.parameters())) + "\n") return new_files
def reindex_with_specified_symm(topdir, reference_symm, dirs, out): print >>out print >>out, "Re-index to specified symmetry:" reference_symm.show_summary(out, " ") print >>out print >>out cells = {} # cell and file sgnum_laue = reference_symm.space_group().build_derived_reflection_intensity_group(False).type().number() for wd in dirs: print >>out, "%s:" % os.path.relpath(wd, topdir), # Find appropriate data xac_file = util.return_first_found_file(("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"), wd=wd) if xac_file is None: print >>out, "Can't find XDS_ASCII file in %s" % wd continue if xac_file.endswith(".org"): xac_file_org, xac_file = xac_file, xac_file[:-4] else: xac_file_org = xac_file+".org" if not os.path.isfile(xac_file_org): os.rename(xac_file, xac_file_org) xac = XDS_ASCII(xac_file_org, read_data=False) print >>out, "%s %s (%s)" % (os.path.basename(xac_file), xac.symm.space_group_info(), ",".join(map(lambda x: "%.2f"%x, xac.symm.unit_cell().parameters()))) if xac.symm.reflection_intensity_symmetry(False).space_group_info().type().number() == sgnum_laue: if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1, 10): print >>out, " Already scaled with specified symmetry" os.rename(xac_file_org, xac_file) # rename back cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()), xac_file) continue cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20) if len(cosets.combined_cb_ops())==0: print >>out, "Can't find operator:" xac.symm.show_summary(out, " ") reference_symm.show_summary(out, " ") continue newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0], space_group=reference_symm.space_group(), hklout=xac_file) cells[wd] = (numpy.array(newcell.parameters()), xac_file) newcell = " ".join(map(lambda x: "%.3f"%x, newcell.parameters())) print >>out, " Reindexed to transformed cell: %s with %s" % (newcell, cosets.combined_cb_ops()[0].as_hkl()) return cells
def run(hklin, hklin_merged=None, cone_angle=20., n_bins=10, anomalous=None, do_fit=True, log_out=null_out()): if 1: xac = XDS_ASCII(hklin, i_only=True) xac.remove_rejected() i_obs = xac.i_obs() #else: # import iotbx.mtz # i_obs = filter(lambda x: "SIGI" in x.info().label_string(), iotbx.mtz.object(hklin).as_miller_arrays(merge_equivalents=False))[0] print >> log_out, "Unmerged intensity read from", hklin i_obs.show_summary(log_out, prefix=" ") print >> log_out, "" if anomalous is not None and i_obs.anomalous_flag() != anomalous: print >> log_out, "Changing anomalous flag based on user's input" i_obs = i_obs.customized_copy(anomalous_flag=anomalous) if hklin_merged is not None: f = iotbx.file_reader.any_file(hklin) array_merged = f.file_server.get_xray_data(file_name=None, labels=None, ignore_all_zeros=True, parameter_scope="", prefer_anomalous=False, prefer_amplitudes=False) print >> log_out, "Merged intensity read from", hklin_merged array_merged.show_summary(log_out, prefix=" ") else: array_merged = i_obs.merge_equivalents( use_internal_variance=False).array() print >> log_out, "Merged intensity calculated" print >> log_out, "" bad_data = array_merged.select( array_merged.data() < -3 * array_merged.sigmas()) # FIXME What if already omitted.. i_obs = i_obs.delete_indices(other=bad_data) array_merged = array_merged.select(array_merged.sigmas() > 0) if anomalous is not None and not anomalous and array_merged.anomalous_flag( ): print >> log_out, "Converting to non-anomalous data..\n" array_merged = array_merged.average_bijvoet_mates() return make_aniso_stats_table(i_obs, array_merged, cone_angle, n_bins, do_fit, log_out)
def run_cycles(self, xds_ascii_files): self.all_data_root = os.path.dirname(os.path.commonprefix(xds_ascii_files)) self.removed_files = [] self.removed_reason = {} print >>self.out, "********************* START FUNCTION ***********************" if self.reference_file: self.run_cycle([self.reference_file,]+xds_ascii_files) else: self.run_cycle(xds_ascii_files) if self.res_params.estimate: #self.cut_resolution(self.get_last_cycle_number()) for run_i in xrange(1, self.get_last_cycle_number()+1): try: self.estimate_resolution(run_i) except: print >>self.out, traceback.format_exc() # Don't want to stop the program. for wd in glob.glob(os.path.join(self.workdir_org, "run_*")): if os.path.exists(os.path.join(wd, "ccp4")): continue xscale_hkl = os.path.abspath(os.path.join(wd, "xscale.hkl")) sg = None # Use user-specified one. Otherwise follow pointless. try: sg = XDS_ASCII(xscale_hkl, read_data=False).symm.space_group() laue_symm_str = str(sg.build_derived_reflection_intensity_group(False).info()) worker = Pointless() result = worker.run_for_symm(xdsin=xscale_hkl, logout=os.path.join(wd, "pointless.log"), choose_laue=laue_symm_str, xdsin_to_p1=True) if "symm" in result: print >>self.out, "Pointless suggestion (forcing %s symmetry):" % laue_symm_str result["symm"].show_summary(self.out, " ") sg = str(result["symm"].space_group_info()) else: print >>self.out, "Pointless failed." except: # Don't want to stop the program. print >>self.out, traceback.format_exc() if self.space_group is not None: sg = str(self.space_group.info()) try: xds2mtz.xds2mtz(xds_file=xscale_hkl, dir_name=os.path.join(wd, "ccp4"), run_xtriage=True, run_ctruncate=True, with_multiplicity=True, space_group=sg, flag_source=self.ref_mtz) except: # Don't want to stop the program. print >>self.out, traceback.format_exc() return self.removed_files, self.removed_reason
def read_xac_files(xac_files, d_min=None, d_max=None, min_ios=None): arrays = collections.OrderedDict() for f in xac_files: xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=d_min, d_max=d_max) a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array() if min_ios is not None: a = a.select(a.data()/a.sigmas()>=min_ios) arrays[f] = a return arrays
def read_xac_files(xac_files, d_min=None, d_max=None, min_ios=None): arrays = collections.OrderedDict() for f in xac_files: xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=d_min, d_max=d_max) a = a.as_non_anomalous_array().merge_equivalents( use_internal_variance=False).array() if min_ios is not None: a = a.select(a.data() / a.sigmas() >= min_ios) arrays[f] = a return arrays
def read_xac_files(self, from_p1=False): op_to_p1 = None if from_p1: """ This option is currently for multi_determine_symmetry. Do not use this for ambiguity resolution! op_to_p1 is not considered when writing new HKL files. """ self.log_out.write("\nAveraging symmetry of all inputs..\n") cells = [] sgs = [] for f in self.xac_files: xac = XDS_ASCII(f, read_data=False) cells.append(xac.symm.unit_cell().parameters()) sgs.append(xac.symm.space_group()) assert len(set(sgs)) < 2 avg_symm = crystal.symmetry(list(numpy.median(cells, axis=0)), space_group=sgs[0]) op_to_p1 = avg_symm.change_of_basis_op_to_niggli_cell() self.log_out.write(" Averaged symmetry: %s (%s)\n" % (format_unit_cell(avg_symm.unit_cell()), sgs[0].info())) self.log_out.write(" Operator to Niggli cell: %s\n" % op_to_p1.as_hkl()) self.log_out.write(" Niggli cell: %s\n" % format_unit_cell(avg_symm.unit_cell().change_basis(op_to_p1))) print >>self.log_out, "\nReading" cells = [] bad_files, good_files = [], [] for i, f in enumerate(self.xac_files): print >>self.log_out, "%4d %s" % (i, f) xac = XDS_ASCII(f, i_only=True) self.log_out.write(" d_range: %6.2f - %5.2f" % xac.i_obs().resolution_range()) self.log_out.write(" n_ref=%6d" % xac.i_obs().size()) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=self.d_min) if self.min_ios is not None: a = a.select(a.data()/a.sigmas()>=self.min_ios) self.log_out.write(" n_ref_filtered=%6d" % a.size()) if from_p1: a = a.change_basis(op_to_p1).customized_copy(space_group_info=sgtbx.space_group_info("P1")) a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array() self.log_out.write(" n_ref_merged=%6d\n" % a.size()) if a.size() < 2: self.log_out.write(" !! WARNING !! number of reflections is dangerously small!!\n") bad_files.append(f) else: self.arrays.append(a) cells.append(a.unit_cell().parameters()) good_files.append(f) if bad_files: self.xac_files = good_files self.bad_files = bad_files assert len(self.xac_files) == len(self.arrays) == len(cells) print >>self.log_out, "" self._representative_xs = crystal.symmetry(list(numpy.median(cells, axis=0)), space_group_info=self.arrays[0].space_group_info())
def read_strong_i_from_xds_ascii(xds_ascii_in): tmp = XDS_ASCII(xds_ascii_in, i_only=True).i_obs(anomalous_flag=False) sel = tmp.sigmas() > 0 sel &= tmp.data() / tmp.sigmas() > 2 sel &= tmp.d_spacings() > 3 if sel.count(True) < 10: return None tmp = tmp.select(sel) merge = tmp.merge_equivalents(use_internal_variance=False) return merge.array()
def modify_xds_ascii_files(self, suffix="_reidx"): #ofs_lst = open("for_merge_new.lst", "w") new_files = [] print >>self.log_out, "Writing reindexed files.." for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)): if op.is_identity_op(): #ofs_lst.write(f+"\n") new_files.append(f) continue newf = f.replace(".HKL", suffix+".HKL") if ".HKL" in f else os.path.splitext(f)[0]+suffix+".HKL" print >>self.log_out, "%4d %s" % (i, newf) xac = XDS_ASCII(f, read_data=False) xac.write_reindexed(op, newf) #ofs_lst.write(newf+"\n") new_files.append(newf) return new_files
def __init__(self, xac_files, d_min=3, min_ios=3, nproc=1, max_delta=3, log_out=null_out()): self.xac_files = xac_files self.log_out = log_out self.nproc = nproc self.arrays = [] self.max_delta = max_delta self.best_operators = None print >>self.log_out, "Reading" for i, f in enumerate(self.xac_files): print >>self.log_out, "%4d %s" % (i, f) xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=d_min) if min_ios is not None: a = a.select(a.data()/a.sigmas()>=min_ios) a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array() self.arrays.append(a) print >>self.log_out, ""
def est_resol(xscale_hkl, res_params, plt_out): iobs = XDS_ASCII(xscale_hkl, i_only=True).i_obs() est = estimate_resolution_based_on_cc_half(iobs, res_params.cc_one_half_min, res_params.cc_half_tol, res_params.n_bins, log_out=self.out) est.show_plot(False, plt_out) if None not in (est.d_min, est.cc_at_d_min): self.out.write("Best resolution cutoff= %.2f A @CC1/2= %.4f\n" % (est.d_min, est.cc_at_d_min)) else: self.out.write("Can't decide resolution cutoff. No reflections??\n") return est.d_min
def estimate_resolution(self, cycle_number): print >>self.out, "**** Determining resolution cutoff in run_%.2d ****" % cycle_number last_wd = os.path.join(self.workdir_org, "run_%.2d"%cycle_number) xscale_hkl = os.path.abspath(os.path.join(last_wd, "xscale.hkl")) i_obs = XDS_ASCII(xscale_hkl, i_only=True).i_obs() d_min_est, _ = initial_estimate_byfit_cchalf(i_obs, cc_half_min=self.res_params.cc_one_half_min, anomalous_flag=False, log_out=self.out) self.out.write("Estimated resolution cutoff= %.2f A @CC1/2= %.4f\n" % (d_min_est, self.res_params.cc_one_half_min)) self.dmin_est_at_cycles[cycle_number] = d_min_est
def read_strong_i_from_xds_ascii(xds_ascii_in): tmp = XDS_ASCII(xds_ascii_in, i_only=True).i_obs(anomalous_flag=False) sel = tmp.sigmas() > 0 sel &= tmp.data()/tmp.sigmas() > 2 sel &= tmp.d_spacings() > 3 if sel.count(True) < 10: return None tmp = tmp.select(sel) merge = tmp.merge_equivalents(use_internal_variance=False) return merge.array()
def get_p1cell_and_symm(self, xdsdir): dials_hkl = os.path.join(xdsdir, "DIALS.HKL") xac_file = util.return_first_found_file( ("XDS_ASCII.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale"), wd=xdsdir) p1cell, xs = None, None if xac_file: correct_lp = util.return_first_found_file( ("CORRECT.LP_noscale", "CORRECT.LP"), wd=xdsdir) if not correct_lp: print "CORRECT.LP not found in %s" % xdsdir return None, None p1cell = correctlp.get_P1_cell(correct_lp, force_obtuse_angle=True) try: xac = XDS_ASCII(xac_file, read_data=False) except: print "Invalid XDS_ASCII format:", xac_file return None, None xs = xac.symm elif os.path.isfile(dials_hkl): # DIALS xs = run_dials_auto.get_most_possible_symmetry(xdsdir) if xs is None: print "Cannot get crystal symmetry:", xdsdir return None, None p1cell = list(xs.niggli_cell().unit_cell().parameters()) # force obtuse angle tmp = map(lambda x: (x[0] + 3, abs(90. - x[1])), enumerate( p1cell[3:])) # Index and difference from 90 deg tmp.sort(key=lambda x: x[1], reverse=True) if p1cell[tmp[0][0]] < 90: tmp = map(lambda x: (x[0] + 3, 90. - x[1]), enumerate(p1cell[3:])) # Index and 90-val. tmp.sort(key=lambda x: x[1], reverse=True) for i, v in tmp[:2]: p1cell[i] = 180. - p1cell[i] p1cell = uctbx.unit_cell(p1cell) return p1cell, xs
def run(xscale_inp): inp_dir = os.path.dirname(xscale_inp) files = map( lambda y: y[1].replace("*", ""), filter(lambda x: x[0] == "INPUT_FILE", get_xdsinp_keyword(xscale_inp))) files = map( lambda x: os.path.join(inp_dir, x) if not os.path.isabs(x) else x, files) symms = map(lambda x: XDS_ASCII(x, read_data=False).symm, files) cells = numpy.array(map(lambda x: x.unit_cell().parameters(), symms)) sgs = map(lambda x: str(x.space_group_info()), symms) laues = map( lambda x: str(x.space_group().build_derived_reflection_intensity_group( False).info()), symms) median_cell = map(lambda i: numpy.median(cells[:, i]), xrange(6)) mean_cell = map(lambda i: cells[:, i].mean(), xrange(6)) cell_sd = map(lambda i: numpy.std(cells[:, i]), xrange(6)) print "%4d files loaded" % len(files) print "Space groups:", ", ".join( map(lambda x: "%s (%d files)" % (x, sgs.count(x)), set(sgs))) print " Laue groups:", ", ".join( map(lambda x: "%s (%d files)" % (x, laues.count(x)), set(laues))) print " Median cell:", " ".join(map(lambda x: "%7.3f" % x, median_cell)) print " Mean cell:", " ".join(map(lambda x: "%7.3f" % x, mean_cell)) print " SD:", " ".join(map(lambda x: "%7.1e" % x, cell_sd)) # for BLEND $CCP4/share/blend/R/blend0.R # names(macropar) <- c("cn","a","b","c","alpha","beta","gamma","mosa","ctoddist","wlength") ofs = open("forR_macropar.dat", "w") for i, cell in enumerate(cells): print >> ofs, "%4d" % (i + 1), print >> ofs, " ".join(map(lambda x: "%7.3f" % x, cell)), print >> ofs, " 0 0 0" ofs.close() shutil.copyfile("forR_macropar.dat", "forR_macropar.dat.bak") print print "Run BLEND?" print "Rscript $CCP4/share/blend/R/blend0.R"
def decide_resolution(summarydat, params, log_out): best = choose_best_result(summarydat, log_out) if best is None: log_out.write("No data for deciding resolution cutoff.\n") return None log_out.write("Using %s for deciding resolution cutoff.\n" % best) iobs = XDS_ASCII(best, i_only=True).i_obs() # Result with max CC1/2 est = estimate_resolution_based_on_cc_half(iobs, params.cc_one_half_min, params.cc_half_tol, params.n_bins, log_out=log_out) if None not in (est.d_min, est.cc_at_d_min): log_out.write("Best resolution cutoff= %.2f A @CC1/2= %.4f\n" % (est.d_min, est.cc_at_d_min)) else: log_out.write("Can't decide resolution cutoff. No reflections??\n") return est.d_min
def get_most_possible_symmetry(workdir): try: pointless_log = os.path.join(workdir, "pointless.log") xs = pointless.parse_pointless_output_for_symm( open(pointless_log).read()).get("symm") if xs is not None: return xs except: pass try: xs = get_crystal_symmetry_from_json( os.path.join(workdir, "integrated_experiments.json")) if xs is not None: return xs except: pass try: xac = XDS_ASCII(os.path.join(workdir, "DIALS.HKL"), read_data=False) return xac.symm except: pass
add_dataset(name="dataset", wavelength=0) mtz_dataset.add_miller_array(miller_array=i_obs.select(~remove_sel), column_root_label="ICUT") mtz_dataset.add_miller_array(miller_array=i_obs.select(remove_sel), column_root_label="IREMOVED") if f_obs is not None: mtz_dataset.add_miller_array(miller_array=f_obs.select(~remove_sel), column_root_label="FCUT") mtz_dataset.add_miller_array(miller_array=f_obs.select(remove_sel), column_root_label="FREMOVED") mtz_dataset.mtz_object().write(file_name=params.hklout) if params.xds_ascii is not None: # XXX Need to check unit cell compatiblity from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII from cctbx import miller xa = XDS_ASCII(params.xds_ascii, sys.stdout) miller.map_to_asu(xa.symm.space_group_info().type(), False, xa.indices) removed_indices = i_obs.indices().select(remove_sel) out = open("removed_positions.dat", "w") for hkl, x, y, z, i, sigi in zip(xa.indices, xa.xd, xa.yd, xa.zd, xa.iobs, xa.sigma_iobs): if sigi <= 0: print "sigi<=0", x, y, z, i, sigi continue if hkl in removed_indices: print >>out, x, y, z, i, sigi if params.hklref is not None: #from eval_Rfree_factors_with_common_reflections import get_flag from cctbx.array_family import flex calc_r = lambda f_obs, f_model: flex.sum(flex.abs(f_obs.data() - f_model.data())) / flex.sum(f_obs.data())
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell, lcv, alcv = self.average_cells(xds_ascii_files) self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) try: xscale.run_xscale(xscale_inp, cbf_to_dat=True, use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) ccs = ccs[ccs==ccs] # Remove nan q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs) else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) print >>out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >>out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >>out, traceback.format_exc() xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin)) xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files) xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files) cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: try: symm = any_reflection_file(xac).as_miller_arrays()[0].crystal_symmetry() except: print >>out, "Error in reading %s" % xac print >>out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group(False).info() laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >>out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >>out, " %s" % f print >>out, "" return space_group = None if params.space_group is not None: space_group = sgtbx.space_group_info(params.space_group).group() laue_given = str(space_group.build_derived_reflection_intensity_group(False).info()) if laue_given != laues.keys()[0]: print >>out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (params.space_group, laues.keys()[0]) return else: tmp = sgtbx.space_group_info(laues.values()[0].keys()[0]).group().build_derived_reflection_intensity_group(True) print >>out, "Space group for merging:", tmp.info() try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >>out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >>out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w")) print >>out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) if params.max_clusters is not None and len(clusters) > params.max_clusters: print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: blend_clusters.files[x-1], IDs), LCV, aLCV,clh)) print >>out try: html_report.add_clutering_result(clusters, "blend") except: print >>out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min, min_ios=params.cc_clustering.min_ios) print >>out, "\nRunning CC-based clustering" cc_clusters.do_clustering(nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >>out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) if params.max_clusters is not None and len(clusters) > params.max_clusters: print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: xds_ascii_files[x-1], IDs), float("nan"),float("nan"),clh)) print >>out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >>out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write(" cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso \n") out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n" ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, stats["xtriage_log"].anisotropy, )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ cd "%s" || exit 1 "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ofs = open("result.pkl","w"); \ ret = merge_datasets(*args); \ pickle.dump(ret, ofs); \ ' """ % (os.path.abspath(workdir), sys.executable)) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >>out, "Error in unpickling result in %s" % workdir print >>out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) lcv, alcv = float("nan"), float("nan") for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) # Last lcv & alcv try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, space_group, batchjobs) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() try: html_report.write_html() except: print >>out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") return
def xds2shelx(xds_file, dir_name, prefix=None, dmin=None, dmax=None, force_anomalous=False, space_group=None, flag_source=None, add_flag=False): if prefix is None: prefix = os.path.splitext(os.path.basename(xds_file))[0] hklout = prefix + ".hkl" # if output file already exists, exit. if os.path.isfile(os.path.join(dir_name, hklout)): raise Exception(os.path.join(dir_name, hklout), "already exists.") # read header xac = XDS_ASCII(xds_file, read_data=False) wavelength = xac.wavelength if xac.wavelength is None and xac.input_files: wavelength = float(xac.input_files.values()[0][1]) else: wavelength = 1.0 anom_flag = xac.anomalous if force_anomalous: anom_flag = True sginfo_org = xac.symm.space_group_info() if space_group: sginfo = sgtbx.space_group_info(space_group) else: sginfo = sginfo_org sg = sginfo.group() # make output directory if not os.path.isdir(dir_name): os.makedirs(dir_name) logout = open(os.path.join(dir_name, "xds2shelx.log"), "w") print >> logout, "xds2shelx.py running in %s" % os.getcwd() print >> logout, "output directory: %s" % dir_name print >> logout, "original file: %s" % xds_file print >> logout, "flag_source: %s" % flag_source print >> logout, "space group: %s (original=%s, requested space_group=%s)" % ( sginfo, sginfo_org, space_group) if sginfo_org.group().build_derived_reflection_intensity_group( False) != sg.build_derived_reflection_intensity_group(False): print >> logout, " WARNING!! specified space group is incompatible with original file (%s)." % sginfo_org print >> logout, "anomalous: %s (original=%s force_anomalous=%s)" % ( anom_flag, xac.anomalous, force_anomalous) print >> logout, "" logout.flush() if sg.is_centric() and not sg.is_origin_centric(): print >> logout, "Error: in shelx, the origin must lie on a center of symmetry." logout.flush() return ## if not os.path.exists(os.path.join(dir_name, "original")): os.symlink(xds_file, os.path.join(dir_name, "original")) ## # prepare XDSCONV.INP and run # with open(os.path.join(dir_name, "XDSCONV.INP"), "w") as ofs: ofs.write("OUTPUT_FILE=%s SHELX\n" % hklout) ofs.write("INPUT_FILE=original\n") ofs.write("MERGE= FALSE\n") ofs.write("FRIEDEL'S_LAW= %s\n" % ("FALSE" if anom_flag else "TRUE")) if None not in (dmin, dmax): ofs.write("INCLUDE_RESOLUTION_RANGE= %s %s\n" % (dmax, dmin)) call(cmd="xdsconv", wdir=dir_name, expects_in=["original"], expects_out=[hklout], stdout=logout) cell_str = xtal.format_unit_cell(xac.symm.unit_cell(), lfmt="%8.4f", afmt="%7.3f") with open(os.path.join(dir_name, "%s.ins" % prefix), "w") as ofs: ofs.write("CELL %.4f %s\n" % (wavelength, cell_str)) ofs.write("ZERR 1 0 0 0 0 0 0\n") ofs.write("LATT %s\n" % xtal.shelx_latt(sg)) for iop in range(1, sg.n_smx()): ofs.write("SYMM %s\n" % sg(iop).as_xyz( decimal=True, t_first=True, symbol_letters="XYZ")) ofs.write("SFAC C N O S\n") ofs.write("UNIT 6 6 6 6\n") ofs.write("FIND 10\n") # TODO more intelligent ofs.write("NTRY 1000\n") ofs.write("HKLF 4\n") ofs.write("END\n")
print "altered:", idxes ksb.assign_operators([debug_op, sgtbx.change_of_basis_op("h,k,l")]) print "right?:", [ i for i, x in enumerate(ksb.best_operators) if not x.is_identity_op() ] #ksb.debug_write_mtz() #ksb.modify_xds_ascii_files() quit() arrays = [] for f in xac_files: print "Reading", f xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=3) a = a.merge_equivalents(use_internal_variance=False).array() arrays.append(a) symm = arrays[0].crystal_symmetry() cosets = reindex.reindexing_operators(symm, symm) reidx_ops = cosets.combined_cb_ops() reidx_ops.sort(key=lambda x: not x.is_identity_op()) print " Possible reindex operators:", map(lambda x: str(x.as_hkl()), reidx_ops) determined = set([ 0, ])
def reindex_with_specified_symm_worker(wd, wdr, topdir, log_out, reference_symm, sgnum_laue, prep_dials_files=False): """ wd: directory where XDS file exists wdr: wd to return; a directory where transformed file should be saved. If wd!=wdr, files in wd/ are unchanged during procedure. Multiprocessing is unsafe when wd==wdr. """ out = StringIO() print >> out, "%s:" % os.path.relpath(wd, topdir), # Find appropriate data xac_file = util.return_first_found_file( ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII.HKL", "DIALS.HKL.org", "DIALS.HKL"), wd=wd) if xac_file is None: print >> out, "Can't find XDS_ASCII file in %s" % wd log_out.write(out.getvalue()) log_out.flush() return (wdr, None) if xac_file.endswith(".org"): xac_file_out = xac_file[:-4] else: xac_file_out = xac_file xac = XDS_ASCII(xac_file, read_data=False) print >> out, "%s %s (%s)" % (os.path.basename(xac_file), xac.symm.space_group_info(), ",".join( map(lambda x: "%.2f" % x, xac.symm.unit_cell().parameters()))) if xac.symm.reflection_intensity_symmetry( False).space_group_info().type().number() == sgnum_laue: if xac.symm.unit_cell().is_similar_to( reference_symm.unit_cell(), 0.1, 10): # XXX Check unit cell consistency!! print >> out, " Already scaled with specified symmetry" log_out.write(out.getvalue()) log_out.flush() if wd != wdr: shutil.copy2(xac_file, wdr) if prep_dials_files and "DIALS.HKL" not in xac_file: prepare_dials_files(wd, out, moveto=wdr) return (wdr, (numpy.array(xac.symm.unit_cell().parameters()), os.path.join(wdr, os.path.basename(xac_file)))) cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20) # XXX ISN'T THIS TOO LARGE? if len(cosets.combined_cb_ops()) == 0: print >> out, "Can't find operator:" xac.symm.show_summary(out, " ") reference_symm.show_summary(out, " ") log_out.write(out.getvalue()) log_out.flush() return (wdr, None) if wd == wdr: dest = tempfile.mkdtemp(prefix="multiprep", dir=wd) else: dest = wdr hklout = os.path.join(dest, os.path.basename(xac_file_out)) newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0], space_group=reference_symm.space_group(), hklout=hklout) if "DIALS.HKL" in os.path.basename(xac_file): outstr = 'output.experiments="%sreindexed_experiments.json" ' % os.path.join( dest, "") outstr += 'output.reflections="%sreindexed_reflections.pickle" ' % os.path.join( dest, "") for f in ("experiments.json", "indexed.pickle"): if not os.path.isfile(os.path.join(os.path.dirname(xac_file), f)): continue util.call( 'dials.reindex %s change_of_basis_op=%s space_group="%s" %s' % (f, cosets.combined_cb_ops()[0].as_abc(), reference_symm.space_group_info(), outstr), wdir=os.path.dirname(xac_file)) elif prep_dials_files: prepare_dials_files(wd, out, space_group=reference_symm.space_group(), reindex_op=cosets.combined_cb_ops()[0], moveto=dest) newcell_str = " ".join(map(lambda x: "%.3f" % x, newcell.parameters())) print >> out, " Reindexed to transformed cell: %s with %s" % ( newcell_str, cosets.combined_cb_ops()[0].as_hkl()) log_out.write(out.getvalue()) log_out.flush() if wd == wdr: for f in glob.glob(os.path.join(dest, "*")): f_in_wd = os.path.join(wd, os.path.basename(f)) if os.path.exists(f_in_wd) and not os.path.exists(f_in_wd + ".org"): os.rename(f_in_wd, f_in_wd + ".org") os.rename(f, f_in_wd) shutil.rmtree(dest) ret = (numpy.array(newcell.parameters()), os.path.join(wd, os.path.basename(xac_file_out))) else: ret = (numpy.array(newcell.parameters()), hklout) return (wdr, ret)
def run(params, log_out): xa = XDS_ASCII(params.xds_ascii, log_out) rejected_array = miller.array(miller_set=miller.set( crystal_symmetry=xa.symm, indices=xa.indices, anomalous_flag=False), data=xa.sigma_iobs < 0) xa_zd = miller.array(miller_set=miller.set(crystal_symmetry=xa.symm, indices=xa.indices, anomalous_flag=False), data=xa.zd) # Read ZCAL, not ZOBS, because ZOBS (and XOBS, YOBS) can be zero (in case unobserved). integ_data = integrate_hkl_as_flex.reader(params.integrate_hkl, ["MAXC", "ZCAL"]).arrays() maxc_array, integ_zcal = integ_data["MAXC"], integ_data["ZCAL"] assert integ_zcal.unit_cell().is_similar_to( xa_zd.unit_cell()) # two set of indices should be comparable. overload_flags = maxc_array.customized_copy( data=maxc_array.data() == params.overload) print "Overloaded observations in INTEGRATE.HKL:", overload_flags.data( ).count(True) print "Rejected (sigma<0) observations in XDS_ASCII.HKL:", rejected_array.data( ).count(True) # common_sets() does not work correctly for unmerged data! rejected_zd = xa_zd.select(rejected_array.data()) #reject_indices = flex.bool([False for i in xrange(overload_flags.size())]) print "making indices..........." import yamtbx_utils_ext integ_zcal = integ_zcal.sort( by_value="packed_indices" ) # Must be sorted before C++ function below!! reject_indices = yamtbx_utils_ext.make_selection_for_xds_unmerged( rejected_zd.indices(), rejected_zd.data(), integ_zcal.indices(), integ_zcal.data(), 3.) """ # This loop is too slow! for i in xrange(rejected_zd.size()): sel = integ_zcal.indices() == rejected_zd.indices()[i] sel &= (integ_zcal.data() - rejected_zd.data()[i]) < 3 reject_indices.set_selected(sel, True) print i, rejected_zd.size(), sel.count(True) """ """ # This loop is also too slow! for j in xrange(integ_zcal.size()): # j: INTEGRATE.HKL if rejected_zd.indices()[i] != integ_zcal.indices()[j]: continue if abs(rejected_zd.data()[i] - integ_zcal.data()[j]) < 3: # within 3 frames.. OK? reject_indices[j] = True """ print "Found rejected observations in INTEGRATE.HKL:", reject_indices.count( True) overload_flags.data().set_selected(reject_indices, False) # Set 'Un-overloaded' print "Remained overloaded observations:", overload_flags.data().count( True) overload_flags_partial = overload_flags.map_to_asu().merge_equivalents( incompatible_flags_replacement=True).array() overload_flags_all = overload_flags.map_to_asu().merge_equivalents( incompatible_flags_replacement=False).array() mtz_object = iotbx.mtz.object(params.hklin). \ add_crystal("crystal", "project", overload_flags_all.unit_cell()). \ add_dataset(name="dataset", wavelength=0). \ add_miller_array(miller_array=overload_flags_all, column_root_label="SATURATED_ALL"). \ add_miller_array(miller_array=overload_flags_partial, column_root_label="SATURATED_PART"). \ mtz_object() mtz_object.write(file_name=params.hklout)
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return # Check parameters if params.program == "xscale": if (params.xscale.frames_per_batch, params.xscale.degrees_per_batch).count(None) == 0: print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch" return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs) else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) out.write("kamo.multi_merge started at %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S")) time_started = time.time() print >> out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >> out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti( os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >> out, traceback.format_exc() xds_ascii_files = util.read_path_list(params.lstin, only_exists=True, as_abspath=True, err_out=out) if not xds_ascii_files: print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin return if len(xds_ascii_files) < 2: print >> out, "ERROR! Only one file in %s." % params.lstin print >> out, " Give at least two files for merging." return cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: print >> out, "Error in reading %s" % xac print >> out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group( False).info() laues.setdefault(str(laue), {}).setdefault( symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >> out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % ( sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >> out, " %s" % f print >> out, "" return space_group = None if params.space_group is not None: space_group = sgtbx.space_group_info(params.space_group).group() laue_given = str( space_group.build_derived_reflection_intensity_group(False).info()) if laue_given != laues.keys()[0]: print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % ( params.space_group, laues.keys()[0]) return sg_refset = space_group.info().as_reference_setting().group() if space_group != sg_refset: print >> out, "Sorry! currently space group in non-reference setting is not supported." print >> out, "(You requested %s, which is different from reference setting: %s)" % ( space_group.info(), sg_refset.info()) return else: tmp = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) print >> out, "Space group for merging:", tmp.info() test_flag_will_be_transferred = False if params.reference.data is not None: params.reference.data = os.path.abspath(params.reference.data) print >> out, "Reading reference data file: %s" % params.reference.data tmp = iotbx.file_reader.any_file(params.reference.data, force_type="hkl", raise_sorry_if_errors=True) if params.reference.copy_test_flag: from yamtbx.dataproc.command_line import copy_free_R_flag if None in copy_free_R_flag.get_flag_array( tmp.file_server.miller_arrays, log_out=out): print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data else: test_flag_will_be_transferred = True print >> out, " test flag will be transferred" if space_group is not None: if space_group != tmp.file_server.miller_arrays[0].space_group(): print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % ( space_group.info(), tmp.file_server.miller_arrays[0].space_group_info()) return else: space_group = tmp.file_server.miller_arrays[0].space_group() print >> out, " space group for merging: %s" % space_group.info() if params.add_test_flag: if test_flag_will_be_transferred: print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given." else: from cctbx import r_free_utils med_cell = numpy.median(cells.values(), axis=0) d_min = max( params.d_min - 0.2, 1.0 ) if params.d_min is not None else 1.5 # to prevent infinite set sg = space_group if not sg: sg = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) tmp = miller.build_set(crystal.symmetry(tuple(med_cell), space_group=sg), False, d_min=d_min, d_max=None) print >> out, "Generating test set using the reference symmetry:" crystal.symmetry.show_summary(tmp, out, " ") tmp = tmp.generate_r_free_flags(fraction=0.05, max_free=None, lattice_symmetry_max_delta=5.0, use_lattice_symmetry=True, n_shells=20) tmp.show_r_free_flags_info(out=out, prefix=" ") tmp = tmp.customized_copy( data=r_free_utils.export_r_free_flags_for_ccp4( flags=tmp.data(), test_flag_value=True)) mtz_object = tmp.as_mtz_dataset( column_root_label="FreeR_flag").mtz_object() test_flag_mtz = os.path.abspath( os.path.join(params.workdir, "test_flag.mtz")) mtz_object.write(file_name=test_flag_mtz) # Override the parameters params.reference.copy_test_flag = True params.reference.data = test_flag_mtz try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >> out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >> out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary( out=open(summary_out, "w")) print >> out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: blend_clusters.files[x - 1], IDs), LCV, aLCV, clh)) print >> out try: html_report.add_clutering_result(clusters, "blend") except: print >> out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering( ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min, min_ios=params.cc_clustering.min_ios) print >> out, "\nRunning CC-based clustering" cc_clusters.do_clustering( nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, cluster_method=params.cc_clustering.method, distance_eqn=params.cc_clustering.cc_to_distance, min_common_refs=params.cc_clustering.min_common_refs, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >> out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter( lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter( lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: xds_ascii_files[x - 1], IDs), float("nan"), float("nan"), clh)) print >> out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >> out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write( "# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write( " cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n" ) out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n" ofs_summary.write(tmps % ( os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, #stats["xtriage_log"].anisotropy, stats["aniso"]["d_min_best"], stats["aniso"]["d_min_worst"], stats["dmin_est"], )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ cd "%s" || exit 1 "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ret = merge_datasets(*args); \ pickle.dump(ret, open("result.pkl","w")); \ ' """ % (os.path.abspath(workdir), sys.executable)) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >> out, "Error in unpickling result in %s" % workdir print >> out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) lcv, alcv = float("nan"), float("nan") for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) # Last lcv & alcv try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >> out, "Merging %s..." % os.path.relpath( workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, space_group) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() try: html_report.write_html() except: print >> out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")) out.write("Total wall-clock time: %.2f sec.\n" % (time.time() - time_started)) return
for i in idxes[:len(ksb.arrays)//2]: ksb.arrays[i] = ksb.arrays[i].customized_copy(indices=debug_op.apply(ksb.arrays[i].indices())) print "altered:", idxes ksb.assign_operators([debug_op, sgtbx.change_of_basis_op("h,k,l")]) print "right?:", [i for i, x in enumerate(ksb.best_operators) if not x.is_identity_op()] #ksb.debug_write_mtz() #ksb.modify_xds_ascii_files() quit() arrays = [] for f in xac_files: print "Reading", f xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=3) a = a.merge_equivalents(use_internal_variance=False).array() arrays.append(a) symm = arrays[0].crystal_symmetry() cosets = reindex.reindexing_operators(symm, symm) reidx_ops = cosets.combined_cb_ops() reidx_ops.sort(key=lambda x: not x.is_identity_op()) print " Possible reindex operators:", map(lambda x: str(x.as_hkl()), reidx_ops) determined = set([0,]) old_ops = map(lambda x:0, xrange(len(arrays))) for ncycle in xrange(100): # max cycle
def rescale_with_specified_symm(topdir, dirs, symms, out, sgnum=None, reference_symm=None): assert (sgnum, reference_symm).count(None) == 1 if sgnum is not None: sgnum_laue = sgtbx.space_group_info(sgnum).group( ).build_derived_reflection_intensity_group(False).type().number() matches = filter( lambda x: x.reflection_intensity_symmetry(False).space_group_info( ).type().number() == sgnum_laue, symms) matched_cells = numpy.array( map(lambda x: x.unit_cell().parameters(), matches)) median_cell = map(lambda x: numpy.median(matched_cells[:, x]), xrange(6)) reference_symm = crystal.symmetry(median_cell, sgnum) else: sgnum = reference_symm.space_group_info().type().number() sgnum_laue = reference_symm.space_group( ).build_derived_reflection_intensity_group(False).type().number() print >> out print >> out, "Re-scaling with specified symmetry:", reference_symm.space_group_info( ).symbol_and_number() print >> out, " reference cell:", reference_symm.unit_cell() print >> out print >> out cells = {} # cell and file for sym, wd in zip(symms, dirs): print >> out, os.path.relpath(wd, topdir), # Find appropriate data xac_file = util.return_first_found_file( ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"), wd=wd) if xac_file is None: print >> out, "Can't find XDS_ASCII file in %s" % wd continue xac = XDS_ASCII(xac_file, read_data=False) print >> out, "%s %s (%s)" % ( os.path.basename(xac_file), xac.symm.space_group_info(), ",".join( map(lambda x: "%.2f" % x, xac.symm.unit_cell().parameters()))) if xac.symm.reflection_intensity_symmetry( False).space_group_info().type().number() == sgnum_laue: if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1, 10): print >> out, " Already scaled with specified symmetry" cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()), xac_file) continue xdsinp = os.path.join(wd, "XDS.INP") cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20) if len(cosets.combined_cb_ops()) == 0: print >> out, "Can't find operator:" sym.show_summary(out, " ") reference_symm.show_summary(out, " ") continue newcell = reference_symm.space_group().average_unit_cell( xac.symm.change_basis(cosets.combined_cb_ops()[0]).unit_cell()) newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters())) print >> out, "Scaling with transformed cell:", newcell #for f in xds_files.generated_by_CORRECT: # util.rotate_file(os.path.join(wd, f)) bk_prefix = make_backup(xds_files.generated_by_CORRECT, wdir=wd, quiet=True) modify_xdsinp( xdsinp, inp_params=[ ("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", newcell), ("INCLUDE_RESOLUTION_RANGE", "50 0"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", None), # use default ("REFINE(CORRECT)", None), # use default ]) run_xds(wd) for f in ("XDS.INP", "CORRECT.LP", "XDS_ASCII.HKL", "GXPARM.XDS"): if os.path.exists(os.path.join(wd, f)): shutil.copyfile(os.path.join(wd, f), os.path.join(wd, f + "_rescale")) revert_files(xds_files.generated_by_CORRECT, bk_prefix, wdir=wd, quiet=True) new_xac = os.path.join(wd, "XDS_ASCII.HKL_rescale") new_gxparm = os.path.join(wd, "GXPARM.XDS_rescale") if os.path.isfile(new_xac) and os.path.isfile(new_gxparm): cells[wd] = (XPARM(new_gxparm).unit_cell, new_xac) print "OK:", cells[wd][0] else: print >> out, "Error: rescaling failed (Can't find XDS_ASCII.HKL)" continue return cells, reference_symm
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell = self.average_cells(xds_ascii_files) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) xscale.run_xscale(xscale_inp) #util.call(xscale_comm, wdir=self.workdir) cbfouts = glob.glob(os.path.join(self.workdir, "*.cbf")) if len(cbfouts) > 0: # This doesn't affect anything, so I don't want program to stop if this failed try: xscalelp.cbf_to_dat(xscale_lp) for f in cbfouts: os.remove(f) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
def run(params, out=sys.stdout): cm = CellGraph(tol_length=params.tol_length, tol_angle=params.tol_angle) if not params.xdsdir and params.topdir: params.xdsdir = map( lambda x: x[0], filter( lambda x: any( map(lambda y: y.startswith("XDS_ASCII.HKL"), x[2])) or "DIALS.HKL" in x[2], os.walk(params.topdir))) for i, xdsdir in enumerate(params.xdsdir): cm.add_proc_result(i, xdsdir) cm.group_xds_results(out) ret = cm.grouped_dirs if len(ret) == 0: return cm print >> out print >> out, "About the largest group:" for idx, wd in enumerate(ret[0]): xac_hkl = os.path.join(wd, "XDS_ASCII.HKL") correct_lp = os.path.join(wd, "CORRECT.LP") print >> out, "%.3d %s" % (idx, os.path.relpath(wd, params.topdir) if params.topdir is not None else wd), if not os.path.isfile(xac_hkl): print >> out, "Unsuccessful" continue sg = XDS_ASCII(xac_hkl, read_data=False).symm.space_group_info() clp = correctlp.CorrectLp(correct_lp) if "all" in clp.table: cmpl = clp.table["all"]["cmpl"][-1] else: cmpl = float("nan") ISa = clp.a_b_ISa[-1] print >> out, "%10s ISa=%5.2f Cmpl=%5.1f " % (sg, ISa, cmpl) if params.do_pointless: worker = pointless.Pointless() files = map(lambda x: os.path.join(x, "INTEGRATE.HKL"), ret[0]) #print files files = filter(lambda x: os.path.isfile(x), files) print >> out, "\nRunning pointless for the largest member." result = worker.run_for_symm(xdsin=files, logout="pointless.log", tolerance=10, d_min=5) if "symm" in result: print >> out, " pointless suggested", result[ "symm"].space_group_info() if 0: import pylab pos = nx.spring_layout(G) #pos = nx.spectral_layout(G) #pos = nx.circular_layout(G) #nx.draw_networkx_nodes(G, pos, node_size = 100, nodelist=others, node_color = 'w') nx.draw_networkx_nodes(G, pos, node_size=100, node_color='w') nx.draw_networkx_edges(G, pos, width=1) nx.draw_networkx_labels(G, pos, font_size=12, font_family='sans-serif', font_color='r') pylab.xticks([]) pylab.yticks([]) pylab.savefig("network.png") pylab.show() return cm
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal() else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) print >>out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >>out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >>out, traceback.format_exc() xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin)) xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files) xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files) cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: print >>out, "Error in reading %s" % xac print >>out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group(False).info() laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >>out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >>out, " %s" % f print >>out, "" return try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >>out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >>out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w")) print >>out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: blend_clusters.files[x-1], IDs), LCV, aLCV,clh)) print >>out try: html_report.add_clutering_result(clusters, "blend") except: print >>out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min, min_ios=params.cc_clustering.min_ios) print >>out, "\nRunning CC-based clustering" cc_clusters.do_clustering(nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >>out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: xds_ascii_files[x-1], IDs), float("nan"),float("nan"),clh)) print >>out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >>out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write(" cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso \n") out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n" ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, stats["xtriage_log"].anisotropy, )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ret = merge_datasets(*args); \ pickle.dump(ret, open("result.pkl","w")); \ ' """ % sys.executable) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >>out, "Error in unpickling result in %s" % workdir print >>out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, batchjobs) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() try: html_report.write_html() except: print >>out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") return
def rescale_with_specified_symm_worker(sym_wd_wdr, topdir, log_out, reference_symm, sgnum, sgnum_laue, prep_dials_files=False): # XXX Unsafe if multiple processes run this function for the same target directory at the same time sym, wd, wdr = sym_wd_wdr out = StringIO() print >> out, os.path.relpath(wd, topdir), # Find appropriate data # XXX not works for DIALS data!! xac_file = util.return_first_found_file( ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"), wd=wd) if xac_file is None: print >> out, "Can't find XDS_ASCII file in %s" % wd log_out.write(out.getvalue()) log_out.flush() return (wd, None) xac = XDS_ASCII(xac_file, read_data=False) print >> out, "%s %s (%s)" % (os.path.basename(xac_file), xac.symm.space_group_info(), ",".join( map(lambda x: "%.2f" % x, xac.symm.unit_cell().parameters()))) if xac.symm.reflection_intensity_symmetry( False).space_group_info().type().number() == sgnum_laue: if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1, 10): print >> out, " Already scaled with specified symmetry" log_out.write(out.getvalue()) log_out.flush() if wd != wdr: shutil.copy2(xac_file, wdr) if prep_dials_files: prepare_dials_files(wd, out, moveto=wdr) return (wdr, (numpy.array(xac.symm.unit_cell().parameters()), os.path.join(wdr, os.path.basename(xac_file)))) xdsinp = os.path.join(wd, "XDS.INP") cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20) if len(cosets.combined_cb_ops()) == 0: print >> out, "Can't find operator:" sym.show_summary(out, " ") reference_symm.show_summary(out, " ") log_out.write(out.getvalue()) log_out.flush() return (wdr, None) newcell = reference_symm.space_group().average_unit_cell( xac.symm.change_basis(cosets.combined_cb_ops()[0]).unit_cell()) newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters())) print >> out, "Scaling with transformed cell:", newcell #for f in xds_files.generated_by_CORRECT: # util.rotate_file(os.path.join(wd, f)) bk_prefix = make_backup(xds_files.generated_by_CORRECT, wdir=wd, quiet=True) modify_xdsinp( xdsinp, inp_params=[ ("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", newcell), ("INCLUDE_RESOLUTION_RANGE", "50 0"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", None), # use default ("REFINE(CORRECT)", None), # use default ]) run_xds(wd) for f in ("XDS.INP", "CORRECT.LP", "XDS_ASCII.HKL", "GXPARM.XDS"): if os.path.exists(os.path.join(wd, f)): shutil.copyfile(os.path.join(wd, f), os.path.join(wdr, f + "_rescale")) revert_files(xds_files.generated_by_CORRECT, bk_prefix, wdir=wd, quiet=True) new_xac = os.path.join(wdr, "XDS_ASCII.HKL_rescale") if prep_dials_files: prepare_dials_files(wd, out, space_group=reference_symm.space_group(), reindex_op=cosets.combined_cb_ops()[0], moveto=wdr) ret = None if os.path.isfile(new_xac): ret = (XDS_ASCII(new_xac, read_data=False).symm.unit_cell().parameters(), new_xac) print >> out, " OK:", ret[0] else: print >> out, "Error: rescaling failed (Can't find XDS_ASCII.HKL)" return (wd, ret)
def xds_sequence(root, params): print print os.path.relpath(root, params.topdir) init_lp = os.path.join(root, "INIT.LP") xparm = os.path.join(root, "XPARM.XDS") gxparm = os.path.join(root, "GXPARM.XDS") defpix_lp = os.path.join(root, "DEFPIX.LP") correct_lp = os.path.join(root, "CORRECT.LP") integrate_hkl = os.path.join(root, "INTEGRATE.HKL") xac_hkl = os.path.join(root, "XDS_ASCII.HKL") integrate_lp = os.path.join(root, "INTEGRATE.LP") spot_xds = os.path.join(root, "SPOT.XDS") xdsinp = os.path.join(root, "XDS.INP") assert os.path.isfile(xdsinp) if params.cell_prior.force: assert params.cell_prior.check xdsinp_dict = dict(get_xdsinp_keyword(xdsinp)) if params.cell_prior.sgnum > 0: xs_prior = crystal.symmetry(params.cell_prior.cell, params.cell_prior.sgnum) else: xs_prior = None decilog = multi_out() decilog.register("log", open(os.path.join(root, "decision.log"), "a"), atexit_send_to=None) try: print >> decilog, "xds_sequence started at %s in %s\n" % ( time.strftime("%Y-%m-%d %H:%M:%S"), root) if not kamo_test_installation.tst_xds(): print >> decilog, "XDS is not installed or expired!!" return if params.show_progress: decilog.register("stdout", sys.stdout) if params.mode == "initial" and params.resume and os.path.isfile( correct_lp): print >> decilog, " Already processed." return if params.mode == "recycle" and not os.path.isfile(gxparm): print >> decilog, "GXPARM.XDS not found. Cannot do recycle." return if params.fast_delphi and (params.nproc is None or params.nproc > 1): delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc) print >> decilog, " Setting delphi to ", delphi modify_xdsinp(xdsinp, inp_params=[ ("DELPHI", str(delphi)), ]) if params.nproc is not None and params.nproc > 1: modify_xdsinp(xdsinp, inp_params=[ ("MAXIMUM_NUMBER_OF_PROCESSORS", str(params.nproc)), ]) if params.mode == "initial": modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT")]) run_xds(wdir=root, show_progress=params.show_progress) initlp = InitLp(init_lp) first_bad = initlp.check_bad_first_frames() if first_bad: print >> decilog, " first frames look bad (too weak) exposure:", first_bad new_data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) new_data_range[0] = first_bad[-1] + 1 print >> decilog, " changing DATA_RANGE= to", new_data_range modify_xdsinp(xdsinp, inp_params=[("JOB", "INIT"), ("DATA_RANGE", "%d %d" % tuple(new_data_range))]) for f in xds_files.generated_by_INIT: util.rotate_file(os.path.join(root, f), copy=False) run_xds(wdir=root, show_progress=params.show_progress) # Peak search modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")]) run_xds(wdir=root, show_progress=params.show_progress) if params.auto_frame_exclude_spot_based: sx = idxreflp.SpotXds(spot_xds) sx.set_xdsinp(xdsinp) spots = filter(lambda x: 5 < x[-1] < 30, sx.collected_spots()) # low-res (5 A) frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1, spots)) data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed? h = numpy.histogram(frame_numbers, bins=numpy.arange(data_range[0], data_range[1] + 2, step=1)) q14 = numpy.percentile(h[0], [25, 75]) iqr = q14[1] - q14[0] cutoff = max( h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5 # magic number print >> decilog, "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % ( iqr, q14, cutoff) cut_frames = h[1][h[0] < cutoff] keep_frames = h[1][h[0] >= cutoff] print >> decilog, "DEBUG:: keep_frames=", keep_frames print >> decilog, "DEBUG:: cut_frames=", cut_frames if len(cut_frames) > 0: cut_ranges = [ [cut_frames[0], cut_frames[0]], ] for fn in cut_frames: if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn else: cut_ranges.append([fn, fn]) # Edit XDS.INP cut_inp_str = "".join( map( lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple( x), cut_ranges)) open(xdsinp, "a").write("\n" + cut_inp_str) # Edit SPOT.XDS shutil.copyfile(spot_xds, spot_xds + ".org") sx.write(open(spot_xds, "w"), frame_selection=set(keep_frames)) # Indexing if params.cell_prior.method == "use_first": modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) else: modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")]) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, "" # TODO indexing stats like indexed percentage here. if params.tryhard: try_indexing_hard(root, params.show_progress, decilog, known_sgnum=params.cell_prior.sgnum, known_cell=params.cell_prior.cell, tol_length=params.cell_prior.tol_length, tol_angle=params.cell_prior.tol_angle) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return if params.cell_prior.sgnum > 0: # Check anyway xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators( xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is None: if params.cell_prior.check: print >> decilog, " Incompatible cell. Indexing failed." return else: print >> decilog, " Warning: Incompatible cell." elif params.cell_prior.method == "symm_constraint_only": cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Trying symmetry-constrained cell parameter:", cell modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(root, f), copy=(f == "SPOT.XDS")) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return # Check again xsxds = XPARM(xparm).crystal_symmetry() if not xsxds.unit_cell().is_similar_to( xs_prior.unit_cell(), params.cell_prior.tol_length, params.cell_prior.tol_angle): print >> decilog, " Resulted in different cell. Indexing failed." return elif params.mode == "recycle": print >> decilog, " Start recycle. original ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS", "plot_integrate.log"): util.rotate_file(os.path.join(root, f), copy=True) shutil.copyfile(gxparm + ".1", xparm) else: raise "Unknown mode (%s)" % params.mode # To Integration modify_xdsinp(xdsinp, inp_params=[("JOB", "DEFPIX INTEGRATE"), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) if os.path.isfile(integrate_lp): xds_plot_integrate.run(integrate_lp, os.path.join(root, "plot_integrate.log")) if not os.path.isfile(integrate_hkl): print >> decilog, " Integration failed." return # Make _noscale.HKL if needed if params.no_scaling: bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True) xparm_obj = XPARM(xparm) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", "50"), ("REFINE(CORRECT)", ""), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, xparm_obj.unit_cell))), ("SPACE_GROUP_NUMBER", "%d" % xparm_obj.spacegroup), ]) print >> decilog, " running CORRECT without empirical scaling" run_xds(wdir=root, show_progress=params.show_progress) for f in xds_files.generated_by_CORRECT + ("XDS.INP", ): ff = os.path.join(root, f) if not os.path.isfile(ff): continue if ff.endswith(".cbf"): os.remove(ff) else: os.rename(ff, ff + "_noscale") revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True) # Run pointless pointless_integrate = {} if params.use_pointless: worker = Pointless() pointless_integrate = worker.run_for_symm( xdsin=integrate_hkl, logout=os.path.join(root, "pointless_integrate.log")) if "symm" in pointless_integrate: symm = pointless_integrate["symm"] print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry(symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell)]) else: print >> decilog, " pointless failed." flag_do_not_change_symm = False if xs_prior and params.cell_prior.force: modify_xdsinp(xdsinp, inp_params=[("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum)]) flag_do_not_change_symm = True elif params.cell_prior.method == "correct_only": xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators(xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is not None: cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Using given symmetry in CORRECT with symmetry constraints:", cell modify_xdsinp(xdsinp, inp_params=[ ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) flag_do_not_change_symm = True else: print >> decilog, " Tried to use given symmetry in CORRECT, but cell in integration is incompatible." # Do Scaling modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ]) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xac_hkl): print >> decilog, " CORRECT failed." return if not os.path.isfile(gxparm): print >> decilog, " Refinement in CORRECT failed." print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min)]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." last_ISa = correctlp.get_ISa(correct_lp, check_valid=True) # Run pointless and (if result is different from INTEGRATE) re-scale. if params.use_pointless: worker = Pointless() pointless_correct = worker.run_for_symm( xdsin=xac_hkl, logout=os.path.join(root, "pointless_correct.log")) pointless_best_symm = None if "symm" in pointless_correct: symm = pointless_correct["symm"] need_rescale = False if pointless_integrate.get("symm"): symm_by_integrate = pointless_integrate["symm"] if not xtal.is_same_laue_symmetry( symm_by_integrate.space_group(), symm.space_group()): print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % ( symm.space_group_info(), symm_by_integrate.space_group_info()) prob_integrate = pointless_integrate.get( "laue_prob", float("nan")) prob_correct = pointless_correct.get( "laue_prob", float("nan")) print >> decilog, " Prob(%s |INTEGRATE), Prob(%s |CORRECT) = %.4f, %.4f." % ( symm_by_integrate.space_group_info(), symm.space_group_info(), prob_integrate, prob_correct) if prob_correct > prob_integrate: need_rescale = True pointless_best_symm = symm else: pointless_best_symm = symm_by_integrate else: need_rescale = True pointless_best_symm = symm print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry( symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." if need_rescale and not flag_do_not_change_symm: sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell), ("INCLUDE_RESOLUTION_RANGE", "50 0") ]) run_xds(wdir=root, show_progress=params.show_progress) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min) ]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename( xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." for f in ("CORRECT_fullres.LP", "XDS_ASCII_fullres.HKL"): if os.path.isfile(os.path.join(root, f)): print >> decilog, "removing", f os.remove(os.path.join(root, f)) ISa = correctlp.get_ISa(correct_lp, check_valid=True) if ISa >= last_ISa or last_ISa != last_ISa: # if improved or last_ISa is nan print >> decilog, "ISa improved= %.2f" % ISa else: print >> decilog, "ISa got worse= %.2f" % ISa if pointless_best_symm: xac_symm = XDS_ASCII(xac_hkl, read_data=False).symm if not xtal.is_same_space_group_ignoring_enantiomorph( xac_symm.space_group(), pointless_best_symm.space_group()): if xtal.is_same_laue_symmetry( xac_symm.space_group(), pointless_best_symm.space_group()): tmp = "same Laue symmetry" else: tmp = "different Laue symmetry" print >> decilog, "WARNING: symmetry in scaling is different from Pointless result (%s)." % tmp run_xdsstat(wdir=root) print if params.make_report: html_report.make_individual_report(root, root) except: print >> decilog, traceback.format_exc() finally: print >> decilog, "\nxds_sequence finished at %s" % time.strftime( "%Y-%m-%d %H:%M:%S") decilog.close()
def reindex_with_specified_symm(topdir, reference_symm, dirs, out): print >> out print >> out, "Re-index to specified symmetry:" reference_symm.show_summary(out, " ") print >> out print >> out cells = {} # cell and file sgnum_laue = reference_symm.space_group( ).build_derived_reflection_intensity_group(False).type().number() for wd in dirs: print >> out, "%s:" % os.path.relpath(wd, topdir), # Find appropriate data xac_file = util.return_first_found_file( ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"), wd=wd) if xac_file is None: print >> out, "Can't find XDS_ASCII file in %s" % wd continue if xac_file.endswith(".org"): xac_file_org, xac_file = xac_file, xac_file[:-4] else: xac_file_org = xac_file + ".org" if not os.path.isfile(xac_file_org): os.rename(xac_file, xac_file_org) xac = XDS_ASCII(xac_file_org, read_data=False) print >> out, "%s %s (%s)" % ( os.path.basename(xac_file), xac.symm.space_group_info(), ",".join( map(lambda x: "%.2f" % x, xac.symm.unit_cell().parameters()))) if xac.symm.reflection_intensity_symmetry( False).space_group_info().type().number() == sgnum_laue: if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1, 10): print >> out, " Already scaled with specified symmetry" os.rename(xac_file_org, xac_file) # rename back cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()), xac_file) continue cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20) if len(cosets.combined_cb_ops()) == 0: print >> out, "Can't find operator:" xac.symm.show_summary(out, " ") reference_symm.show_summary(out, " ") continue newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0], space_group=reference_symm.space_group(), hklout=xac_file) cells[wd] = (numpy.array(newcell.parameters()), xac_file) newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters())) print >> out, " Reindexed to transformed cell: %s with %s" % ( newcell, cosets.combined_cb_ops()[0].as_hkl()) return cells
def run(lstin, params): xac_files = read_path_list(lstin) common0 = len(os.path.commonprefix(xac_files)) arrays = [] for f in xac_files: xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max) a = a.merge_equivalents(use_internal_variance=False).array() a = a.select(a.data() / a.sigmas() >= params.min_ios) arrays.append(a) # Prep args = [] for i in xrange(len(arrays) - 1): for j in xrange(i + 1, len(arrays)): args.append((i, j)) # Calc all CC worker = lambda x: calc_cc(arrays[x[0]], arrays[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=params.nproc) # Make matrix mat = numpy.zeros(shape=(len(arrays), len(arrays))) for (i, j), (cc, nref) in zip(args, results): print j, i, cc mat[j, i] = cc open("%s.names" % params.prefix, "w").write("\n".join(map(lambda x: os.path.dirname(x[common0:]), xac_files))) open("%s.matrix" % params.prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten()))) ofs = open("%s.dat" % params.prefix, "w") ofs.write("i j cc nref\n") for (i, j), (cc, nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref)) open("%s_ana.R" % params.prefix, "w").write( """\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%s.matrix") md<-matrix(1-cc, ncol=%d, byrow=TRUE) labs<-read.table("%s.names") filenames<-read.table("%s")$V1 rownames(md)<-labs$V1 hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() hc$labels <- 1:nrow(md) groups <- treeToList2(hc) cat("ClNumber Nds Clheight\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- paste(sprintf(" %%03d %%3d %%7.3f\\n", i,length(groups[[i]]),hc$height[i]),sep="") cat(linea, file="./CLUSTERS.txt", append=TRUE) write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE) } q(save="yes") """ % (params.prefix, len(arrays), params.prefix, lstin) ) print "R --vanilla < %s_ana.R" % params.prefix