def run(file_names): assert len(file_names) == 2 tabs = [] for file_name in file_names: tabs.append(itvc_section61_io.read_table6111(file_name)) all_labels = {} for tab in tabs: for label in tab.elements: all_labels[label] = 1 all_labels = all_labels.keys() for label in all_labels: e0 = tabs[0].entries.get(label, None) e1 = tabs[1].entries.get(label, None) if ([e0,e1].count(None) == 0): assert e0.atomic_number == e1.atomic_number, \ (label, e0.atomic_number, e1.atomic_number) assert e0.method == e1.method, \ (label, e0.method, e1.method) min_size = min(e0.table_y.size(), e1.table_y.size()) show_differences_if_any( label=label, y0=e0.table_y[:min_size], y1=e1.table_y[:min_size], stols=international_tables_stols[:min_size]) for tab in tabs: ei = tab.entries.get(label, None) if (ei is not None and ei.element != ei.atomic_symbol): ee = tab.entries.get(ei.atomic_symbol, None) if ([ee,ei].count(None) == 0): assert ee.table_y.size() == 62 assert ei.table_y.size() == 62 show_differences_if_any( label=label, y0=ee.table_y[-6:], y1=ei.table_y[-6:], stols=international_tables_stols[-6:])
def run(file_names): assert len(file_names) == 2 tabs = [] for file_name in file_names: tabs.append(itvc_section61_io.read_table6111(file_name)) all_labels = {} for tab in tabs: for label in tab.elements: all_labels[label] = 1 for label in all_labels: e0 = tabs[0].entries.get(label, None) e1 = tabs[1].entries.get(label, None) if ([e0, e1].count(None) == 0): assert e0.atomic_number == e1.atomic_number, \ (label, e0.atomic_number, e1.atomic_number) assert e0.method == e1.method, \ (label, e0.method, e1.method) min_size = min(e0.table_y.size(), e1.table_y.size()) show_differences_if_any( label=label, y0=e0.table_y[:min_size], y1=e1.table_y[:min_size], stols=international_tables_stols[:min_size]) for tab in tabs: ei = tab.entries.get(label, None) if (ei is not None and ei.element != ei.atomic_symbol): ee = tab.entries.get(ei.atomic_symbol, None) if ([ee, ei].count(None) == 0): assert ee.table_y.size() == 62 assert ei.table_y.size() == 62 show_differences_if_any( label=label, y0=ee.table_y[-6:], y1=ei.table_y[-6:], stols=international_tables_stols[-6:])
def run(args, cutoff, high_resolution_only, plots_dir="itvc_kissel_plots", verbose=0): itab = itvc_section61_io.read_table6111(args[0]) itab_x = cctbx.eltbx.gaussian_fit.international_tables_stols isel = itab_x <= cutoff + 1.e-6 if (high_resolution_only): isel &= itab_x > 2 + 1.e-6 itab_x = itab_x.select(isel) for file_name in args[1:]: ktab = kissel_io.read_table(file_name) if (ktab.element == "Es"): continue sel = ktab.x <= cutoff + 1 ktab_x = ktab.x.select(sel) ktab_y = ktab.y.select(sel) ktabs_sigmas = ktab.sigmas.select(sel) itab_entry = itab.entries[ktab.element] itab_y = itab_entry.table_y.select(isel) itab_sigmas = itab_entry.table_sigmas.select(isel) f = open(os.path.join(plots_dir, ktab.element + ".xy"), "w") cctbx.eltbx.gaussian_fit.write_plot(f, ktab_x, ktab_y) cctbx.eltbx.gaussian_fit.write_plot(f, itab_x, itab_y) f.close() ktab_y_i = flex.linear_interpolation(ktab.x, ktab.y, itab_x) ktab_sigmas_i = flex.linear_interpolation(ktab.x, ktab.sigmas, itab_x) assert ktab_y_i.all_gt(0) s = StringIO.StringIO() print >> s, "stol kissel itvc delta sig_itvc rel_sig rel_del tol_del" max_delta = 0 max_tol_del = 0 for x, ky, ksig, iy, isig in zip(itab_x, ktab_y_i, ktab_sigmas_i, itab_y, itab_sigmas): if (iy > 0): ie = "%7.4f" % abs(isig / iy) else: ie = " ******" delta = iy - ky rel_del = abs(delta) / ky tol_del = max(0, abs(delta) - ksig - isig) / ky print >> s, "%4.2f %7.4f %7.4f %7.4f %8.5f %-s %7.4f %7.4f" % ( x, ky, iy, delta, isig, ie, rel_del, tol_del) max_delta = max(max_delta, abs(delta)) max_tol_del = max(max_tol_del, tol_del) print "Element:", ktab.element, "max_delta=%.4f, max_tol_del=%.4f" % ( max_delta, max_tol_del) sys.stdout.write(s.getvalue()) print
def run(args, cutoff, high_resolution_only, plots_dir="itvc_kissel_plots", verbose=0): itab = itvc_section61_io.read_table6111(args[0]) itab_x = cctbx.eltbx.gaussian_fit.international_tables_stols isel = itab_x <= cutoff + 1.e-6 if (high_resolution_only): isel &= itab_x > 2 + 1.e-6 itab_x = itab_x.select(isel) for file_name in args[1:]: ktab = kissel_io.read_table(file_name) if (ktab.element == "Es"): continue sel = ktab.x <= cutoff + 1 ktab_x = ktab.x.select(sel) ktab_y = ktab.y.select(sel) ktabs_sigmas = ktab.sigmas.select(sel) itab_entry = itab.entries[ktab.element] itab_y = itab_entry.table_y.select(isel) itab_sigmas = itab_entry.table_sigmas.select(isel) f = open(os.path.join(plots_dir, ktab.element + ".xy"), "w") cctbx.eltbx.gaussian_fit.write_plot(f, ktab_x, ktab_y) cctbx.eltbx.gaussian_fit.write_plot(f, itab_x, itab_y) f.close() ktab_y_i = flex.linear_interpolation(ktab.x, ktab.y, itab_x) ktab_sigmas_i = flex.linear_interpolation(ktab.x, ktab.sigmas, itab_x) assert ktab_y_i.all_gt(0) s = StringIO.StringIO() print >> s, "stol kissel itvc delta sig_itvc rel_sig rel_del tol_del" max_delta = 0 max_tol_del = 0 for x,ky,ksig,iy,isig in zip(itab_x, ktab_y_i, ktab_sigmas_i, itab_y, itab_sigmas): if (iy > 0): ie = "%7.4f" % abs(isig/iy) else: ie = " ******" delta = iy - ky rel_del = abs(delta) / ky tol_del = max(0, abs(delta)-ksig-isig) / ky print >> s, "%4.2f %7.4f %7.4f %7.4f %8.5f %-s %7.4f %7.4f" % ( x,ky,iy,delta,isig,ie,rel_del,tol_del) max_delta = max(max_delta, abs(delta)) max_tol_del = max(max_tol_del, tol_del) print "Element:", ktab.element, "max_delta=%.4f, max_tol_del=%.4f" % ( max_delta, max_tol_del) sys.stdout.write(s.getvalue()) print
def run(gaussian_fit_pickle_file_names, itvc_file_name, kissel_dir): itvc_tab = None if (itvc_file_name is not None): itvc_tab = itvc_section61_io.read_table6111(itvc_file_name) fits = read_pickled_fits(gaussian_fit_pickle_file_names) #easy_pickle.dump("all_fits.pickle", fits) for k, v in fits.parameters.items(): print "# %s:" % k, v print max_errors = flex.double() labeled_fits = [] n_processed = 0 for label in expected_labels(kissel_dir): try: fit_group = fits.all[label] except Exception: print "# Warning: Missing scattering_type:", label else: print "scattering_type:", label prev_fit = None for fit in fit_group: if (prev_fit is not None): if (fit.stol > prev_fit.stol): print "# Warning: decreasing stol" elif (fit.stol == prev_fit.stol): if (fit.max_error < prev_fit.max_error): print "# Warning: same stol but previous has larger error" prev_fit = fit fit.sort().show() gaussian_fit = None if (itvc_tab is not None and label != "O2-"): entry = itvc_tab.entries[label] sel = international_tables_stols <= fit.stol + 1.e-6 gaussian_fit = scitbx.math.gaussian.fit( international_tables_stols.select(sel), entry.table_y.select(sel), entry.table_sigmas.select(sel), fit) elif (kissel_dir is not None): file_name = os.path.join( kissel_dir, "%02d_%s_rf" % (tiny_pse.table(label).atomic_number(), label)) tab = kissel_io.read_table(file_name) sel = tab.itvc_sampling_selection() & (tab.x <= fit.stol + 1.e-6) gaussian_fit = scitbx.math.gaussian.fit( tab.x.select(sel), tab.y.select(sel), tab.sigmas.select(sel), fit) if (gaussian_fit is not None): max_errors.append( flex.max(gaussian_fit.significant_relative_errors())) labeled_fits.append(labeled_fit(label, gaussian_fit)) n_processed += 1 print if (n_processed != len(fits.all)): print "# Warning: %d fits were not processed." % (len(fits.all) - n_processed) print if (max_errors.size() > 0): print "Summary:" perm = flex.sort_permutation(data=max_errors, reverse=True) max_errors = max_errors.select(perm) labeled_fits = flex.select(labeled_fits, perm) quick_summary = {} for me, lf in zip(max_errors, labeled_fits): print lf.label, "n_terms=%d max_error: %.4f" % ( lf.gaussian_fit.n_terms(), me) quick_summary[lf.label + "_" + str(lf.gaussian_fit.n_terms())] = me if (me > 0.01): fit = lf.gaussian_fit re = fit.significant_relative_errors() for s, y, a, r in zip(fit.table_x(), fit.table_y(), fit.fitted_values(), re): comment = "" if (r > 0.01): comment = " large error" print "%4.2f %7.4f %7.4f %7.4f %7.4f%s" % (s, y, a, a - y, r, comment) print print
def run(file_name, args, cutoff, params, zig_zag=False, six_term=False, full_fits=None, plots_dir="itvc_fits_plots", verbose=0): tab = itvc_section61_io.read_table6111(file_name) chunk_n = 1 chunk_i = 0 if (len(args) > 0 and len(args[0].split(",")) == 2): chunk_n, chunk_i = [int(i) for i in args[0].split(",")] args = args[1:] if (not six_term and not zig_zag): if (not os.path.isdir(plots_dir)): print("No plots because target directory does not exist (mkdir %s)." % \ plots_dir) plots_dir = None if (chunk_n > 1): assert plots_dir is not None stols_more = cctbx.eltbx.gaussian_fit.international_tables_stols sel = stols_more <= cutoff + 1.e-6 stols = stols_more.select(sel) i_chunk = 0 for element in tab.elements + ["O2-", "SDS"]: if (len(args) > 0 and element not in args): continue flag = i_chunk % chunk_n == chunk_i i_chunk += 1 if (not flag): continue results = {} results["fit_parameters"] = params if (element == "SDS"): wrk_lbl = element from cctbx.eltbx.development.hydrogen_plots import fit_input fi = fit_input() sel = fi.stols <= cutoff + 1.e-6 null_fit = scitbx.math.gaussian.fit( fi.stols.select(sel), fi.data.select(sel), fi.sigmas.select(sel), xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( fi.stols, fi.data, fi.sigmas, xray_scattering.gaussian(0, False)) else: wrk_lbl = xray_scattering.wk1995(element, True) if (element != "O2-"): entry = tab.entries[element] null_fit = scitbx.math.gaussian.fit( stols, entry.table_y[:stols.size()], entry.table_sigmas[:stols.size()], xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( stols_more, entry.table_y[:stols_more.size()], entry.table_sigmas[:stols_more.size()], xray_scattering.gaussian(0, False)) else: rrg_stols_more = rez_rez_grant.table_2_stol sel = rrg_stols_more <= cutoff + 1.e-6 rrg_stols = rrg_stols_more.select(sel) null_fit = scitbx.math.gaussian.fit( rrg_stols, rez_rez_grant.table_2_o2minus[:rrg_stols.size()], rez_rez_grant.table_2_sigmas[:rrg_stols.size()], xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( rrg_stols_more, rez_rez_grant.table_2_o2minus[:rrg_stols_more.size()], rez_rez_grant.table_2_sigmas[:rrg_stols_more.size()], xray_scattering.gaussian(0, False)) if (zig_zag): results[wrk_lbl] = cctbx.eltbx.gaussian_fit.zig_zag_fits( label=wrk_lbl, null_fit=null_fit, null_fit_more=null_fit_more, params=params) elif (full_fits is not None): assert len(full_fits.all[wrk_lbl]) == 1 results[wrk_lbl] = cctbx.eltbx.gaussian_fit.decremental_fits( label=wrk_lbl, null_fit=null_fit, full_fit=full_fits.all[wrk_lbl][0], params=params, plots_dir=plots_dir, verbose=verbose) elif (not six_term): results[wrk_lbl] = cctbx.eltbx.gaussian_fit.incremental_fits( label=wrk_lbl, null_fit=null_fit, params=params, plots_dir=plots_dir, verbose=verbose) else: best_min = scitbx.math.gaussian_fit.fit_with_golay_starts( label=wrk_lbl, null_fit=null_fit, null_fit_more=null_fit_more, params=params) g = best_min.final_gaussian_fit results[wrk_lbl] = [ xray_scattering.fitted_gaussian(stol=g.table_x()[-1], gaussian_sum=g) ] sys.stdout.flush() pickle_file_name = "%s_fits.pickle" % identifier(wrk_lbl) easy_pickle.dump(pickle_file_name, results)
def run(file_name, args, cutoff, params, zig_zag=False, six_term=False, full_fits=None, plots_dir="itvc_fits_plots", verbose=0): tab = itvc_section61_io.read_table6111(file_name) chunk_n = 1 chunk_i = 0 if (len(args) > 0 and len(args[0].split(",")) == 2): chunk_n, chunk_i = [int(i) for i in args[0].split(",")] args = args[1:] if (not six_term and not zig_zag): if (not os.path.isdir(plots_dir)): print "No plots because target directory does not exist (mkdir %s)." % \ plots_dir plots_dir = None if (chunk_n > 1): assert plots_dir is not None stols_more = cctbx.eltbx.gaussian_fit.international_tables_stols sel = stols_more <= cutoff + 1.e-6 stols = stols_more.select(sel) i_chunk = 0 for element in tab.elements + ["O2-", "SDS"]: if (len(args) > 0 and element not in args): continue flag = i_chunk % chunk_n == chunk_i i_chunk += 1 if (not flag): continue results = {} results["fit_parameters"] = params if (element == "SDS"): wrk_lbl = element from cctbx.eltbx.development.hydrogen_plots import fit_input fi = fit_input() sel = fi.stols <= cutoff + 1.e-6 null_fit = scitbx.math.gaussian.fit( fi.stols.select(sel), fi.data.select(sel), fi.sigmas.select(sel), xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( fi.stols, fi.data, fi.sigmas, xray_scattering.gaussian(0, False)) else: wrk_lbl = xray_scattering.wk1995(element, True) if (element != "O2-"): entry = tab.entries[element] null_fit = scitbx.math.gaussian.fit( stols, entry.table_y[:stols.size()], entry.table_sigmas[:stols.size()], xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( stols_more, entry.table_y[:stols_more.size()], entry.table_sigmas[:stols_more.size()], xray_scattering.gaussian(0, False)) else: rrg_stols_more = rez_rez_grant.table_2_stol sel = rrg_stols_more <= cutoff + 1.e-6 rrg_stols = rrg_stols_more.select(sel) null_fit = scitbx.math.gaussian.fit( rrg_stols, rez_rez_grant.table_2_o2minus[:rrg_stols.size()], rez_rez_grant.table_2_sigmas[:rrg_stols.size()], xray_scattering.gaussian(0, False)) null_fit_more = scitbx.math.gaussian.fit( rrg_stols_more, rez_rez_grant.table_2_o2minus[:rrg_stols_more.size()], rez_rez_grant.table_2_sigmas[:rrg_stols_more.size()], xray_scattering.gaussian(0, False)) if (zig_zag): results[wrk_lbl] = cctbx.eltbx.gaussian_fit.zig_zag_fits( label=wrk_lbl, null_fit=null_fit, null_fit_more=null_fit_more, params=params) elif (full_fits is not None): assert len(full_fits.all[wrk_lbl]) == 1 results[wrk_lbl] = cctbx.eltbx.gaussian_fit.decremental_fits( label=wrk_lbl, null_fit=null_fit, full_fit=full_fits.all[wrk_lbl][0], params=params, plots_dir=plots_dir, verbose=verbose) elif (not six_term): results[wrk_lbl] = cctbx.eltbx.gaussian_fit.incremental_fits( label=wrk_lbl, null_fit=null_fit, params=params, plots_dir=plots_dir, verbose=verbose) else: best_min = scitbx.math.gaussian_fit.fit_with_golay_starts( label=wrk_lbl, null_fit=null_fit, null_fit_more=null_fit_more, params=params) g = best_min.final_gaussian_fit results[wrk_lbl] = [xray_scattering.fitted_gaussian( stol=g.table_x()[-1], gaussian_sum=g)] sys.stdout.flush() pickle_file_name = "%s_fits.pickle" % identifier(wrk_lbl) easy_pickle.dump(pickle_file_name, results)
def run(file_name, table_of_gaussians, cutoff, low_resolution_only=False, high_resolution_only=False, significant_errors_only=False, plots_dir=None, quiet=0, verbose=0): assert not (low_resolution_only and high_resolution_only) tab = itvc_section61_io.read_table6111(file_name) for wk in xray_scattering.wk1995_iterator(): label = wk.label() if (not label in tab.entries): print("Warning: missing scatterer:", label) stols = cctbx.eltbx.gaussian_fit.international_tables_stols sel = stols <= cutoff + 1.e-6 stols = stols.select(sel) if (low_resolution_only): sel = stols <= 2 stols = stols.select(sel) assert stols.size() == 56 elif (high_resolution_only): sel = stols > 2 stols = stols.select(sel) assert stols.size() == 6 range_62 = flex.size_t(range(62)) labels = flex.std_string() errors = [] correlations = flex.double() max_errors = flex.double() cmp_plots = flex.std_string() for element in tab.elements: entry = tab.entries[element] wk = table_of_gaussians(element, 1) assert entry.table_y.size() == 62 if (not flex.sort_permutation(data=entry.table_y, reverse=True).all_eq(range_62)): print("Increasing: %s (%d)" % (element, entry.atomic_number)) prev_y = entry.table_y[0] for y in entry.table_y: if (y > prev_y): print("higher:", y, "before:", prev_y) prev_y = y raise RuntimeError("Data values are not increasing.") if (low_resolution_only): gaussian_fit = scitbx.math.gaussian.fit(stols, entry.table_y[:-6], entry.table_sigmas[:-6], wk.fetch()) elif (high_resolution_only): gaussian_fit = scitbx.math.gaussian.fit(stols, entry.table_y[-6:], entry.table_sigmas[-6:], wk.fetch()) elif (entry.element != entry.atomic_symbol and entry.table_y[-6:].all_eq(0)): atom_entry = tab.entries[entry.atomic_symbol] patched_table_y = entry.table_y[:-6] patched_table_y.append(atom_entry.table_y[-6:]) patched_table_sigmas = entry.table_sigmas[:-6] patched_table_sigmas.append(atom_entry.table_sigmas[-6:]) gaussian_fit = scitbx.math.gaussian.fit(stols, patched_table_y, patched_table_sigmas, wk.fetch()) else: gaussian_fit = scitbx.math.gaussian.fit( stols, entry.table_y[:stols.size()], entry.table_sigmas[:stols.size()], wk.fetch()) labels.append(element) errors.append(gaussian_fit.significant_relative_errors()) max_errors.append(flex.max(errors[-1])) correlations.append( flex.linear_correlation( gaussian_fit.table_y(), gaussian_fit.fitted_values()).coefficient()) if (plots_dir is not None): if (not os.path.isdir(plots_dir)): print("No plots because the directory %s does not exist." % plots_dir) plots_dir = None else: cmp_plots.append( cctbx.eltbx.gaussian_fit.write_plots( plots_dir=plots_dir, label=element, gaussian_fit=gaussian_fit)) perm = flex.sort_permutation(data=max_errors, reverse=True) labels = labels.select(perm) errors = flex.select(errors, perm) correlations = correlations.select(perm) if (plots_dir is None): cmp_plots = [None] * len(labels) else: cmp_plots = cmp_plots.select(perm) for l, e, cc, p in zip(labels, errors, correlations, cmp_plots): entry = tab.entries[l] y = entry.table_y perm = flex.sort_permutation(data=e, reverse=True)[:3] high = [] for i in perm: if (significant_errors_only and e[i] < 0.01): break s = stols[i] a = "" if (not quiet and s < 2.1): a = "@%.3f" % y[i] high.append("%7.4f %4.2f%s" % (e[i], s, a)) if (high_resolution_only): break if (verbose or len(high) > 0): print("Element %-5s(%2d) cc=%.4f:" % (l, entry.atomic_number, cc), ", ".join(high)) if (verbose and p is not None): print(p) sys.stdout.write(open(p).read()) print()
def run(gaussian_fit_pickle_file_names, itvc_file_name, kissel_dir): itvc_tab = None if (itvc_file_name is not None): itvc_tab = itvc_section61_io.read_table6111(itvc_file_name) fits = read_pickled_fits(gaussian_fit_pickle_file_names) #easy_pickle.dump("all_fits.pickle", fits) for k,v in fits.parameters.items(): print "# %s:" % k, v print max_errors = flex.double() labeled_fits = [] n_processed = 0 for label in expected_labels(kissel_dir): try: fit_group = fits.all[label] except Exception: print "# Warning: Missing scattering_type:", label else: print "scattering_type:", label prev_fit = None for fit in fit_group: if (prev_fit is not None): if (fit.stol > prev_fit.stol): print "# Warning: decreasing stol" elif (fit.stol == prev_fit.stol): if (fit.max_error < prev_fit.max_error): print "# Warning: same stol but previous has larger error" prev_fit = fit fit.sort().show() gaussian_fit = None if (itvc_tab is not None and label != "O2-"): entry = itvc_tab.entries[label] sel = international_tables_stols <= fit.stol + 1.e-6 gaussian_fit = scitbx.math.gaussian.fit( international_tables_stols.select(sel), entry.table_y.select(sel), entry.table_sigmas.select(sel), fit) elif (kissel_dir is not None): file_name = os.path.join(kissel_dir, "%02d_%s_rf" % ( tiny_pse.table(label).atomic_number(), label)) tab = kissel_io.read_table(file_name) sel = tab.itvc_sampling_selection() & (tab.x <= fit.stol + 1.e-6) gaussian_fit = scitbx.math.gaussian.fit( tab.x.select(sel), tab.y.select(sel), tab.sigmas.select(sel), fit) if (gaussian_fit is not None): max_errors.append( flex.max(gaussian_fit.significant_relative_errors())) labeled_fits.append(labeled_fit(label, gaussian_fit)) n_processed += 1 print if (n_processed != len(fits.all)): print "# Warning: %d fits were not processed." % ( len(fits.all) - n_processed) print if (max_errors.size() > 0): print "Summary:" perm = flex.sort_permutation(data=max_errors, reverse=True) max_errors = max_errors.select(perm) labeled_fits = flex.select(labeled_fits, perm) quick_summary = {} for me,lf in zip(max_errors, labeled_fits): print lf.label, "n_terms=%d max_error: %.4f" % ( lf.gaussian_fit.n_terms(), me) quick_summary[lf.label + "_" + str(lf.gaussian_fit.n_terms())] = me if (me > 0.01): fit = lf.gaussian_fit re = fit.significant_relative_errors() for s,y,a,r in zip(fit.table_x(),fit.table_y(),fit.fitted_values(),re): comment = "" if (r > 0.01): comment = " large error" print "%4.2f %7.4f %7.4f %7.4f %7.4f%s" % (s,y,a,a-y,r,comment) print print
def main(): parser = OptionParser( usage="usage: python %prog [options]" + " itvc_table all_fits_six_terms.pickle" + " all_fits_decremental.pickle all_fits_incremental.pickle") (options, args) = parser.parse_args() if (len(args) != 4): parser.print_help() return itvc_tab = itvc_section61_io.read_table6111(args[0]) six_term_fits = easy_pickle.load(args[1]) fits = [] for file_name in args[2:]: fits.append(easy_pickle.load(file_name)) for label, fit_group in fits[-1].all.items(): for fit in fit_group: reset_max_error(itvc_tab.entries[label], fit) best_fits = {} n_less = 0 n_greater = 0 n_equal = 0 n_less_list = [0] * 10 n_greater_list = [0] * 10 n_equal_list = [0] * 10 for label in expected_labels(kissel_dir=None): fit_group_0 = fits[0].all.get(label, None) fit_group_1 = fits[1].all.get(label, None) if (fit_group_0 is None and fit_group_1 is None): best_fits[label] = None continue if (fit_group_0 is None): best_fits[label] = fit_group_0 continue if (fit_group_1 is None): best_fits[label] = fit_group_1 continue best_group = [] all_n_terms = {} n_terms_dicts = [] for fit_group in [fit_group_0, fit_group_1]: n_terms_dict = {} for fit in fit_group: n_terms_dict[fit.n_terms()] = fit n_terms_dicts.append(n_terms_dict) all_n_terms.update(n_terms_dicts[-1]) all_n_terms = all_n_terms.keys() all_n_terms.sort() for n_terms in all_n_terms: fit_0 = n_terms_dicts[0].get(n_terms, None) fit_1 = n_terms_dicts[1].get(n_terms, None) if (fit_0 is None): best_group.append(fit_1) continue if (fit_1 is None): best_group.append(fit_0) continue if (fit_0.stol < fit_1.stol): best_group.append(fit_1) status = "less" n_less += 1 n_less_list[n_terms] += 1 elif (fit_0.stol > fit_1.stol): best_group.append(fit_0) status = "greater" n_greater += 1 n_greater_list[n_terms] += 1 else: best_group.append(pick_nicest_fit(fit_0, fit_1)) status = "equal" n_equal += 1 n_equal_list[n_terms] += 1 print "%-4s n_terms=%d %4.2f %4.2f %s" % ( label, n_terms, fit_0.stol, fit_1.stol, status) best_fits[label] = best_group print "n_less:", n_less print "n_greater:", n_greater print "n_equal:", n_equal print "total:", n_less + n_greater + n_equal n_terms = -1 for n_less, n_greater, n_equal in zip(n_less_list, n_greater_list, n_equal_list): n_terms += 1 if (n_less == 0 and n_greater == 0 and n_equal == 0): continue print "n_terms:", n_terms print " n_less:", n_less print " n_greater:", n_greater print " n_equal:", n_equal print " total:", n_less + n_greater + n_equal print for label in expected_labels(kissel_dir=None): if (best_fits[label] is None): print "# Warning: Missing scattering_type:", label print print "Best fits:" print for label in expected_labels(kissel_dir=None): fit_group = best_fits[label] if (fit_group is None): continue print "scattering_type:", label assert len(six_term_fits.all[label]) == 1 assert six_term_fits.all[label][0].n_terms() == 6 fit_group.append(six_term_fits.all[label][0]) reset_max_error(itvc_tab.entries[label], fit_group[-1]) trimmed_fit_group = [] prev_fit = None for fit in fit_group: if (prev_fit is None or fit.stol > prev_fit.stol or (fit.stol == prev_fit.stol and fit.max_error < prev_fit.max_error)): trimmed_fit_group.append(fit) fit.show() prev_fit = fit else: print "# skipped: %s, n_terms: %d, stol: %.2f, max_error: %.4f" % ( label, fit.n_terms(), fit.stol, fit.max_error) best_fits[label] = trimmed_fit_group print easy_pickle.dump("best_fits.pickle", best_fits)
def run( file_name, table_of_gaussians, cutoff, low_resolution_only=False, high_resolution_only=False, significant_errors_only=False, plots_dir=None, quiet=0, verbose=0, ): assert not (low_resolution_only and high_resolution_only) tab = itvc_section61_io.read_table6111(file_name) for wk in xray_scattering.wk1995_iterator(): label = wk.label() if not label in tab.entries: print "Warning: missing scatterer:", label stols = cctbx.eltbx.gaussian_fit.international_tables_stols sel = stols <= cutoff + 1.0e-6 stols = stols.select(sel) if low_resolution_only: sel = stols <= 2 stols = stols.select(sel) assert stols.size() == 56 elif high_resolution_only: sel = stols > 2 stols = stols.select(sel) assert stols.size() == 6 range_62 = flex.size_t(xrange(62)) labels = flex.std_string() errors = [] correlations = flex.double() max_errors = flex.double() cmp_plots = flex.std_string() for element in tab.elements: entry = tab.entries[element] wk = table_of_gaussians(element, 1) assert entry.table_y.size() == 62 if not flex.sort_permutation(data=entry.table_y, reverse=True).all_eq(range_62): print "Increasing: %s (%d)" % (element, entry.atomic_number) prev_y = entry.table_y[0] for y in entry.table_y: if y > prev_y: print "higher:", y, "before:", prev_y prev_y = y raise RuntimeError("Data values are not increasing.") if low_resolution_only: gaussian_fit = scitbx.math.gaussian.fit(stols, entry.table_y[:-6], entry.table_sigmas[:-6], wk.fetch()) elif high_resolution_only: gaussian_fit = scitbx.math.gaussian.fit(stols, entry.table_y[-6:], entry.table_sigmas[-6:], wk.fetch()) elif entry.element != entry.atomic_symbol and entry.table_y[-6:].all_eq(0): atom_entry = tab.entries[entry.atomic_symbol] patched_table_y = entry.table_y[:-6] patched_table_y.append(atom_entry.table_y[-6:]) patched_table_sigmas = entry.table_sigmas[:-6] patched_table_sigmas.append(atom_entry.table_sigmas[-6:]) gaussian_fit = scitbx.math.gaussian.fit(stols, patched_table_y, patched_table_sigmas, wk.fetch()) else: gaussian_fit = scitbx.math.gaussian.fit( stols, entry.table_y[: stols.size()], entry.table_sigmas[: stols.size()], wk.fetch() ) labels.append(element) errors.append(gaussian_fit.significant_relative_errors()) max_errors.append(flex.max(errors[-1])) correlations.append(flex.linear_correlation(gaussian_fit.table_y(), gaussian_fit.fitted_values()).coefficient()) if plots_dir is not None: if not os.path.isdir(plots_dir): print "No plots because the directory %s does not exist." % plots_dir plots_dir = None else: cmp_plots.append( cctbx.eltbx.gaussian_fit.write_plots(plots_dir=plots_dir, label=element, gaussian_fit=gaussian_fit) ) perm = flex.sort_permutation(data=max_errors, reverse=True) labels = labels.select(perm) errors = flex.select(errors, perm) correlations = correlations.select(perm) if plots_dir is None: cmp_plots = [None] * len(labels) else: cmp_plots = cmp_plots.select(perm) for l, e, cc, p in zip(labels, errors, correlations, cmp_plots): entry = tab.entries[l] y = entry.table_y perm = flex.sort_permutation(data=e, reverse=True)[:3] high = [] for i in perm: if significant_errors_only and e[i] < 0.01: break s = stols[i] a = "" if not quiet and s < 2.1: a = "@%.3f" % y[i] high.append("%7.4f %4.2f%s" % (e[i], s, a)) if high_resolution_only: break if verbose or len(high) > 0: print "Element %-5s(%2d) cc=%.4f:" % (l, entry.atomic_number, cc), ", ".join(high) if verbose and p is not None: print p sys.stdout.write(open(p).read()) print
def main(): parser = OptionParser( usage="usage: python %prog [options]" +" itvc_table all_fits_six_terms.pickle" +" all_fits_decremental.pickle all_fits_incremental.pickle") (options, args) = parser.parse_args() if (len(args) != 4): parser.print_help() return itvc_tab = itvc_section61_io.read_table6111(args[0]) six_term_fits = easy_pickle.load(args[1]) fits = [] for file_name in args[2:]: fits.append(easy_pickle.load(file_name)) for label,fit_group in fits[-1].all.items(): for fit in fit_group: reset_max_error(itvc_tab.entries[label], fit) best_fits = {} n_less = 0 n_greater = 0 n_equal = 0 n_less_list = [0] * 10 n_greater_list = [0] * 10 n_equal_list = [0] * 10 for label in expected_labels(kissel_dir=None): fit_group_0 = fits[0].all.get(label, None) fit_group_1 = fits[1].all.get(label, None) if (fit_group_0 is None and fit_group_1 is None): best_fits[label] = None continue if (fit_group_0 is None): best_fits[label] = fit_group_0 continue if (fit_group_1 is None): best_fits[label] = fit_group_1 continue best_group = [] all_n_terms = {} n_terms_dicts = [] for fit_group in [fit_group_0, fit_group_1]: n_terms_dict = {} for fit in fit_group: n_terms_dict[fit.n_terms()] = fit n_terms_dicts.append(n_terms_dict) all_n_terms.update(n_terms_dicts[-1]) all_n_terms = all_n_terms.keys() all_n_terms.sort() for n_terms in all_n_terms: fit_0 = n_terms_dicts[0].get(n_terms, None) fit_1 = n_terms_dicts[1].get(n_terms, None) if (fit_0 is None): best_group.append(fit_1) continue if (fit_1 is None): best_group.append(fit_0) continue if (fit_0.stol < fit_1.stol): best_group.append(fit_1) status = "less" n_less += 1 n_less_list[n_terms] += 1 elif (fit_0.stol > fit_1.stol): best_group.append(fit_0) status = "greater" n_greater += 1 n_greater_list[n_terms] += 1 else: best_group.append(pick_nicest_fit(fit_0, fit_1)) status = "equal" n_equal += 1 n_equal_list[n_terms] += 1 print "%-4s n_terms=%d %4.2f %4.2f %s" % ( label, n_terms, fit_0.stol, fit_1.stol, status) best_fits[label] = best_group print "n_less:", n_less print "n_greater:", n_greater print "n_equal:", n_equal print "total:", n_less + n_greater + n_equal n_terms = -1 for n_less,n_greater,n_equal in zip(n_less_list,n_greater_list,n_equal_list): n_terms += 1 if (n_less == 0 and n_greater == 0 and n_equal == 0): continue print "n_terms:", n_terms print " n_less:", n_less print " n_greater:", n_greater print " n_equal:", n_equal print " total:", n_less + n_greater + n_equal print for label in expected_labels(kissel_dir=None): if (best_fits[label] is None): print "# Warning: Missing scattering_type:", label print print "Best fits:" print for label in expected_labels(kissel_dir=None): fit_group = best_fits[label] if (fit_group is None): continue print "scattering_type:", label assert len(six_term_fits.all[label]) == 1 assert six_term_fits.all[label][0].n_terms() == 6 fit_group.append(six_term_fits.all[label][0]) reset_max_error(itvc_tab.entries[label], fit_group[-1]) trimmed_fit_group =[] prev_fit = None for fit in fit_group: if (prev_fit is None or fit.stol > prev_fit.stol or (fit.stol == prev_fit.stol and fit.max_error < prev_fit.max_error)): trimmed_fit_group.append(fit) fit.show() prev_fit = fit else: print "# skipped: %s, n_terms: %d, stol: %.2f, max_error: %.4f" % ( label, fit.n_terms(), fit.stol, fit.max_error) best_fits[label] = trimmed_fit_group print easy_pickle.dump("best_fits.pickle", best_fits)