def analyze_distances(self, params, pdb_hierarchy=None, log=sys.stdout): assert 0 # Not used anywhere? atoms = None if params.verbose: assert pdb_hierarchy is not None atoms = pdb_hierarchy.atoms() remove_outliers = params.secondary_structure.protein.remove_outliers atoms = pdb_hierarchy.atoms() hist = flex.histogram(self.bond_lengths, 10) print >> log, " Distribution of hydrogen bond lengths without filtering:" hist.show(f=log, prefix=" ", format_cutoffs="%.4f") print >> log, "" if not remove_outliers: return False for i, distance in enumerate(self.bond_lengths): if distance > distance_max: self.flag_use_bond[i] = False if params.verbose: print >> log, "Excluding H-bond with length %.3fA" % distance i_seq, j_seq = self.bonds[i] print >> log, " %s" % atoms[i_seq].fetch_labels().id_str() print >> log, " %s" % atoms[j_seq].fetch_labels().id_str() print >> log, " After filtering: %d bonds remaining." % \ self.flag_use_bond.count(True) print >> log, " Distribution of hydrogen bond lengths after applying cutoff:" hist = flex.histogram(self.bond_lengths.select(self.flag_use_bond), 10) hist.show(f=log, prefix=" ", format_cutoffs="%.4f") print >> log, "" return True
def analyze_distances (self, params, pdb_hierarchy=None, log=sys.stdout) : assert 0 # Not used anywhere? atoms = None if params.verbose : assert pdb_hierarchy is not None atoms = pdb_hierarchy.atoms() remove_outliers = params.secondary_structure.protein.remove_outliers atoms = pdb_hierarchy.atoms() hist = flex.histogram(self.bond_lengths, 10) print >> log, " Distribution of hydrogen bond lengths without filtering:" hist.show(f=log, prefix=" ", format_cutoffs="%.4f") print >> log, "" if not remove_outliers : return False for i, distance in enumerate(self.bond_lengths) : if distance > distance_max : self.flag_use_bond[i] = False if params.verbose : print >> log, "Excluding H-bond with length %.3fA" % distance i_seq, j_seq = self.bonds[i] print >> log, " %s" % atoms[i_seq].fetch_labels().id_str() print >> log, " %s" % atoms[j_seq].fetch_labels().id_str() print >> log, " After filtering: %d bonds remaining." % \ self.flag_use_bond.count(True) print >> log, " Distribution of hydrogen bond lengths after applying cutoff:" hist = flex.histogram(self.bond_lengths.select(self.flag_use_bond), 10) hist.show(f=log, prefix=" ", format_cutoffs="%.4f") print >> log, "" return True
def process_image(process): import sys last_update = start = timeit.default_timer() i = process if use_python_counter: local_hist = Counter() else: local_hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins) max_images = image_count // nproc if process >= image_count % nproc: max_images += 1 while i < image_count: data = read_cbf_image(image_list[i]) if not use_python_counter: data = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins) local_hist.update(data) i = i + nproc if process == 0: if timeit.default_timer() > (last_update + 3): last_update = timeit.default_timer() if sys.stdout.isatty(): sys.stdout.write('\033[A') print 'Processed %d%% (%d seconds remain) ' % (100 * i // image_count, round((image_count - i) * (last_update - start) / (i+1))) return local_hist
def get_map_histograms(data, n_slots=20, data_1=None, data_2=None): h0, h1, h2 = None, None, None data_min = None hmhcc = None if (data_1 is None): h0 = flex.histogram(data=data.as_1d(), n_slots=n_slots) else: data_min = min(flex.min(data_1), flex.min(data_2)) data_max = max(flex.max(data_1), flex.max(data_2)) h0 = flex.histogram(data=data.as_1d(), n_slots=n_slots) h1 = flex.histogram(data=data_1.as_1d(), data_min=data_min, data_max=data_max, n_slots=n_slots) h2 = flex.histogram(data=data_2.as_1d(), data_min=data_min, data_max=data_max, n_slots=n_slots) hmhcc = flex.linear_correlation( x=h1.slots().as_double(), y=h2.slots().as_double()).coefficient() return group_args(h_map=h0, h_half_map_1=h1, h_half_map_2=h2, _data_min=data_min, half_map_histogram_cc=hmhcc)
def process_image(process): last_update = start = timeit.default_timer() i = process if use_python_counter: local_hist = Counter() else: local_hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins) max_images = image_count // nproc if process >= image_count % nproc: max_images += 1 while i < image_count: data = read_cbf_image(image_list[i]) if not use_python_counter: data = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins) local_hist.update(data) i = i + nproc if process == 0: if timeit.default_timer() > (last_update + 3): last_update = timeit.default_timer() if sys.stdout.isatty(): sys.stdout.write('\033[A') print 'Processed %d%% (%d seconds remain) ' % ( 100 * i // image_count, round((image_count - i) * (last_update - start) / (i + 1))) return local_hist
def process_file(file_object, n_slots, data_min, data_max, format_cutoffs): data = flex.double() for line in file_object.read().splitlines(): data.append(float(line)) print "total number of data points:", data.size() if (data_min is None): data_min = flex.min(data) if (data_max is None): data_max = flex.max(data) flex.histogram( data=data, n_slots=n_slots, data_min=data_min, data_max=data_max).show( format_cutoffs=format_cutoffs)
def process_file(file_object, n_slots, data_min, data_max, format_cutoffs): data = flex.double() for line in file_object.read().splitlines(): data.append(float(line)) print("total number of data points:", data.size()) if (data_min is None): data_min = flex.min(data) if (data_max is None): data_max = flex.max(data) flex.histogram( data=data, n_slots=n_slots, data_min=data_min, data_max=data_max).show( format_cutoffs=format_cutoffs)
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1 / vx wy = 1 / vy wz = 1 / vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print(reflections[idx3]) return # outliers = reflections.select(wx > 50) # for refl in outliers: # print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311 + i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected( data == 0, 0.1 ) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, "-k", linewidth=2) # pyplot.suptitle(title) data_min = min( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max + h.slot_width()) pyplot.show()
def build_hist(): from scitbx.array_family import flex if len(sys.argv) == 2 and sys.argv[1].endswith('.json'): from dxtbx import datablock db = datablock.DataBlockFactory.from_json_file(sys.argv[1])[0] image_list = db.extract_imagesets()[0].paths() else: image_list = sys.argv[1:] image_count = len(image_list) # Faster, yet still less than ideal and wasting a lot of resources. limit = get_overload(image_list[0]) binfactor = 5 # register up to 500% counts histmax = (limit * binfactor) + 0.0 histbins = (limit * binfactor) + 1 hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins) print "Processing %d images" % image_count start = timeit.default_timer() last_update = start # image_maxima = [None] * image_count for i in range(image_count): data = read_cbf_image(image_list[i]) tmp_hist = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins) # image_max = histmax # for b in reversed(tmp_hist.slots()): # if b != 0: # image_maxima[i] = int(image_max) # break # image_max -= 1 hist.update(tmp_hist) if timeit.default_timer() > (last_update + 3): last_update = timeit.default_timer() if sys.stdout.isatty(): sys.stdout.write('\033[A') print 'Processed %d of %d images (%d seconds remain) ' % (i+1, image_count, round((image_count - i) * (last_update - start) / (i+1))) # print image_maxima results = { 'scale_factor': 1 / limit, 'bin_count': histbins, 'bins': list(hist.slots()), 'image_files': image_list } print "Writing results to overload.json" with open('overload.json', 'w') as fh: json.dump(results, fh)
def plot_number_of_crystals(experiments): image_to_expts = {} for expt in experiments: img = expt.imageset.get_image_identifier(0) image_to_expts.setdefault(img, []) image_to_expts[img].append(expt) n_crystals_per_image = flex.int(len(expts) for expts in image_to_expts.values()) nmax = flex.max(n_crystals_per_image) hist = flex.histogram(n_crystals_per_image.as_double(), 0, nmax, n_slots=nmax) # hist.show() from matplotlib import pyplot as plt plt.style.use("ggplot") plt.bar( hist.slot_centers(), hist.slots(), align="center", width=hist.slot_width(), zorder=10, color="black", edgecolor=None, ) plt.savefig("n_crystals_hist.png") plt.clf()
def filter_histogram_of_key_value(database_dict, key, max_reject_fraction, edge_tolerance_small=1.e-4, n_slots=3): values = database_dict[key] new_values = convert_to_numeric(values=values) #print flex.min(new_values), flex.max(new_values) size = new_values.size() #print size if (size == 0): return while True: values = database_dict[key] new_values = convert_to_numeric(values=values) selection = flex.bool(new_values.size(), True) histogram = flex.histogram(data=new_values, n_slots=n_slots) l = histogram.data_min() for i, s in enumerate(histogram.slots()): r = histogram.data_min() + histogram.slot_width() * (i + 1) r = r + edge_tolerance_small l = max(0, l - edge_tolerance_small) #print "%8.4f %8.4f %d" % (l, r, s) if (s < size * max_reject_fraction): selection &= ~((new_values >= l) & (new_values <= r)) l = r #print leave, remove = selection.count(True), selection.count(False) #print leave, remove if (remove == 0): break if (size - leave > int(size * max_reject_fraction)): break database_dict = select_dict(database_dict=database_dict, selection=selection) return database_dict
def show(self, sites_cart, n_slots_difference_histogram=6, out=None, prefix=""): if (out is None): out = sys.stdout selection_strings = self.group.selection_strings for i_op,pair,mx,rms in zip( count(1), self.group.selection_pairs, self.matrices, self.rms): print >> out, prefix + "NCS operator %d:" % i_op print >> out, prefix + " Reference selection:", \ show_string(selection_strings[0]) print >> out, prefix + " Other selection:", \ show_string(selection_strings[i_op]) print >> out, prefix + " Number of atom pairs:", len(pair[0]) print >> out, mx.r.mathematica_form( label="Rotation", format="%.6g", one_row_per_line=True, prefix=prefix+" ") print >> out, mx.t.mathematica_form( label="Translation", format="%.6g", prefix=prefix+" ") x = sites_cart.select(pair[0]) y = mx * sites_cart.select(pair[1]) d_sq = (x-y).dot() if (n_slots_difference_histogram is not None): print >> out, prefix + " Histogram of differences:" diff_histogram = flex.histogram( data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram) diff_histogram.show( f=out, prefix=prefix+" ", format_cutoffs="%8.6f") print >> out, \ prefix + " RMS difference with respect to the reference: %8.6f" %(rms)
def filter_histogram_of_key_value(database_dict, key, max_reject_fraction, edge_tolerance_small = 1.e-4, n_slots = 3): values = database_dict[key] new_values = convert_to_numeric(values = values) #print flex.min(new_values), flex.max(new_values) size = new_values.size() #print size if(size == 0): return while True: values = database_dict[key] new_values = convert_to_numeric(values = values) selection = flex.bool(new_values.size(), True) histogram = flex.histogram(data = new_values, n_slots = n_slots) l = histogram.data_min() for i, s in enumerate(histogram.slots()): r = histogram.data_min() + histogram.slot_width() * (i+1) r = r+edge_tolerance_small l = max(0, l-edge_tolerance_small) #print "%8.4f %8.4f %d" % (l, r, s) if(s < size * max_reject_fraction): selection &= ~((new_values >= l) & (new_values <= r)) l = r #print leave, remove = selection.count(True), selection.count(False) #print leave, remove if(remove == 0): break if(size - leave > int(size * max_reject_fraction)): break database_dict = select_dict(database_dict = database_dict, selection = selection) return database_dict
def show(self, sites_cart, n_slots_difference_histogram=6, out=None, prefix=""): if (out is None): out = sys.stdout selection_strings = self.group.selection_strings for i_op, pair, mx, rms in zip(count(1), self.group.selection_pairs, self.matrices, self.rms): print >> out, prefix + "NCS operator %d:" % i_op print >> out, prefix + " Reference selection:", \ show_string(selection_strings[0]) print >> out, prefix + " Other selection:", \ show_string(selection_strings[i_op]) print >> out, prefix + " Number of atom pairs:", len(pair[0]) print >> out, mx.r.mathematica_form(label="Rotation", format="%.6g", one_row_per_line=True, prefix=prefix + " ") print >> out, mx.t.mathematica_form(label="Translation", format="%.6g", prefix=prefix + " ") x = sites_cart.select(pair[0]) y = mx * sites_cart.select(pair[1]) d_sq = (x - y).dot() if (n_slots_difference_histogram is not None): print >> out, prefix + " Histogram of differences:" diff_histogram = flex.histogram( data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram) diff_histogram.show(f=out, prefix=prefix + " ", format_cutoffs="%8.6f") print >> out, \ prefix + " RMS difference with respect to the reference: %8.6f" %(rms)
def get_mean_statistic_for_resolution (d_min, stat_type, range=0.2, out=None) : if (out is None) : out = sys.stdout from scitbx.array_family import flex pkl_file = libtbx.env.find_in_repositories( relative_path = "chem_data/polygon_data/all_mvd.pickle", test = os.path.isfile) db = easy_pickle.load(pkl_file) all_d_min = db['high_resolution'] stat_values = db[stat_type] values_for_range = flex.double() for (d_, v_) in zip(all_d_min, stat_values) : try : d = float(d_) v = float(v_) except ValueError : continue else : if (d > (d_min - range)) and (d < (d_min + range)) : values_for_range.append(v) h = flex.histogram(values_for_range, n_slots=10) print >> out, " %s for d_min = %.3f - %.3f A" % (stat_names[stat_type], d_min-range, d_min+range) min = flex.min(values_for_range) max = flex.max(values_for_range) mean = flex.mean(values_for_range) print >> out, " count: %d" % values_for_range.size() print >> out, " min: %.2f" % min print >> out, " max: %.2f" % max print >> out, " mean: %.2f" % mean print >> out, " histogram of values:" h.show(prefix=" ") return mean
def get_mean_statistic_for_resolution(d_min, stat_type, range=0.2, out=None): if (out is None): out = sys.stdout from scitbx.array_family import flex pkl_file = libtbx.env.find_in_repositories( relative_path="chem_data/polygon_data/all_mvd.pickle", test=os.path.isfile) db = easy_pickle.load(pkl_file) all_d_min = db['high_resolution'] stat_values = db[stat_type] values_for_range = flex.double() for (d_, v_) in zip(all_d_min, stat_values): try: d = float(d_) v = float(v_) except ValueError: continue else: if (d > (d_min - range)) and (d < (d_min + range)): values_for_range.append(v) h = flex.histogram(values_for_range, n_slots=10) print >> out, " %s for d_min = %.3f - %.3f A" % ( stat_names[stat_type], d_min - range, d_min + range) min = flex.min(values_for_range) max = flex.max(values_for_range) mean = flex.mean(values_for_range) print >> out, " count: %d" % values_for_range.size() print >> out, " min: %.2f" % min print >> out, " max: %.2f" % max print >> out, " mean: %.2f" % mean print >> out, " histogram of values:" h.show(prefix=" ") return mean
def get_map_histograms(data, n_slots=20, data_1=None, data_2=None): h0, h1, h2 = None, None, None data_min = None if(data_1 is None): h0 = flex.histogram(data = data.as_1d(), n_slots = n_slots) else: data_min = min(flex.min(data), flex.min(data_1), flex.min(data_2)) data_max = max(flex.max(data), flex.max(data_1), flex.max(data_2)) h0 = flex.histogram(data = data.as_1d(), data_min=data_min, data_max=data_max, n_slots = n_slots) h1 = flex.histogram(data = data_1.as_1d(), data_min=data_min, data_max=data_max, n_slots = n_slots) h2 = flex.histogram(data = data_2.as_1d(), data_min=data_min, data_max=data_max, n_slots = n_slots) return group_args(h_map = h0, h_half_map_1 = h1, h_half_map_2 = h2, _data_min = data_min)
def run(): m = any_reflection_file(sys.argv[1]).file_content() sg = m.space_group() mi = m.extract_miller_indices() mas = m.as_miller_arrays() absent = flex.bool([sg.is_sys_absent(indx) for indx in mi]) intensities = None for ma in mas: if ma.is_xray_intensity_array(): intensities = ma break assert intensities, "intensity data not found in %s" % sys.argv[1] print("Removing %d absent reflections" % absent.count(True)) intensities = intensities.select(~absent) i_over_sig = intensities.data() / intensities.sigmas() hist = flex.histogram(i_over_sig, n_slots=50) print("I/sig(I) N") for centre, value in zip(hist.slot_centers(), hist.slots()): print("%5.1f %d" % (centre, value))
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss): prf_sel = reflections.get_flags(reflections.flags.integrated_prf) if prf_sel.count(True) > 0: reflections = reflections.select(prf_sel) intensities = reflections["intensity.prf.value"] variances = reflections["intensity.prf.variance"] else: sum_sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sum_sel) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] i_sigi = intensities / flex.sqrt(variances) xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc phi_min = flex.min(phi) phi_max = flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) mean_i_sigi = flex.double() for i, slot_info in enumerate(hist.slot_infos()): sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff) if sel.count(True) == 0: mean_i_sigi.append(0) else: mean_i_sigi.append(flex.mean(i_sigi.select(sel))) fractional_mean_i_sigi = mean_i_sigi / flex.max(mean_i_sigi) potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi)) xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(mean_i_sigi), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0}, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss): if not len(reflections): raise Sorry("Input contains no reflections") xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc array_range = scan.get_array_range() phi_min = scan.get_angle_from_array_index(array_range[0]) phi_max = scan.get_angle_from_array_index(array_range[1]) assert phi_min <= flex.min(phi) assert phi_max >= flex.max(phi) n_steps = int(round((phi_max - phi_min) / phi_step)) hist = flex.histogram( z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps ) logger.debug("Histogram:") logger.debug(hist.as_str()) counts = hist.slots() fractional_counts = counts.as_double() / flex.max(counts) potential_blank_sel = fractional_counts <= fractional_loss xmin, xmax = zip( *[ (slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos() ] ) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(hist.slots()), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": { "xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0, }, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def plot_uc_vs_detector_distance( uc_params, panel_distances, outliers, steps_per_angstrom=20, filename="uc_vs_distance.png", ): from matplotlib import pyplot as plt plt.style.use("ggplot") f = plt.figure(figsize=(12, 8)) ax1 = plt.subplot2grid((2, 3), (0, 0)) ax2 = plt.subplot2grid((2, 3), (0, 1), sharey=ax1) ax3 = plt.subplot2grid((2, 3), (0, 2), sharey=ax1) ax4 = plt.subplot2grid((2, 3), (1, 0), colspan=3) a, b, c = uc_params[:3] def hist2d(p1, p2, ax): nbins = 100 H, xedges, yedges = np.histogram2d(p1, p2, bins=nbins) H = np.rot90(H) H = np.flipud(H) Hmasked = np.ma.masked_where(H == 0, H) ax.pcolormesh(xedges, yedges, Hmasked) hist2d(a, panel_distances[0], ax1) hist2d(b, panel_distances[0], ax2) hist2d(c, panel_distances[0], ax3) mmm = flex.min_max_mean_double(panel_distances[0]) steps_per_mm = 20 Amin = math.floor(mmm.min * steps_per_mm) / steps_per_mm Amax = math.floor(mmm.max * steps_per_mm) / steps_per_mm n_slots = max(1, int((Amax - Amin) * steps_per_mm)) if Amin == Amax: eps = 0.05 Amin -= eps Amax += eps hist = flex.histogram(panel_distances[0], Amin, Amax, n_slots=n_slots) ax4.bar( hist.slot_centers(), hist.slots(), align="center", width=hist.slot_width(), zorder=10, color="red", edgecolor=None, linewidth=0, ) ax1.set_ylabel("Detector distance (mm)") ax1.set_xlabel(r"a ($\AA$)") ax2.set_xlabel(r"b ($\AA$)") ax3.set_xlabel(r"c ($\AA$)") ax4.set_xlabel("Detector distance (mm)") f.savefig(filename) plt.tight_layout() f.clf()
def __init__(self, map_1, map_2, sites_cart, unit_cell, radius, n_slots): assert_same_gridding(map_1, map_2) self.ccs = from_map_map_atoms_per_atom(map_1=map_1, map_2=map_2, sites_cart=sites_cart, unit_cell=unit_cell, radius=radius) self.hist = flex.histogram(data=self.ccs, n_slots=n_slots)
def __init__(self, map_1, map_2, sites_cart, unit_cell, radius, n_slots): assert_same_gridding(map_1, map_2) self.ccs = from_map_map_atoms_per_atom( map_1 = map_1, map_2 = map_2, sites_cart = sites_cart, unit_cell = unit_cell, radius = radius) self.hist = flex.histogram(data = self.ccs, n_slots = n_slots)
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1/vx wy = 1/vy wz = 1/vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print reflections[idx3] return #outliers = reflections.select(wx > 50) #for refl in outliers: #print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311+i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected(data == 0, 0.1) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, '-k', linewidth=2) #pyplot.suptitle(title) data_min = min([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max+h.slot_width()) pyplot.show()
def normal_probability_plot(self, data, rankits_sel=None, plot=False): """ Use normal probability analysis to determine if a set of data is normally distributed See https://en.wikipedia.org/wiki/Normal_probability_plot. Rankits are computed in the same way as qqnorm does in R. @param data flex array @param rankits_sel only use the rankits in a certain range. Useful for outlier rejection. Should be a tuple such as (-0.5,0.5). @param plot whether to show the normal probabilty plot """ from scitbx.math import distributions import numpy as np norm = distributions.normal_distribution() n = len(data) if n <= 10: a = 3/8 else: a = 0.5 sorted_data = flex.sorted(data) rankits = flex.double([norm.quantile((i+1-a)/(n+1-(2*a))) for i in range(n)]) if rankits_sel is None: corr, slope, offset = self.get_overall_correlation_flex(sorted_data, rankits) else: sel = (rankits >= rankits_sel[0]) & (rankits <= rankits_sel[1]) corr, slope, offset = self.get_overall_correlation_flex(sorted_data.select(sel), rankits.select(sel)) if plot: from matplotlib import pyplot as plt f = plt.figure(0) lim = -5, 5 x = np.linspace(lim[0],lim[1],100) # 100 linearly spaced numbers y = slope * x + offset plt.plot(sorted_data, rankits, '-') #plt.plot(x,y) plt.title("CC: %.3f Slope: %.3f Offset: %.3f"%(corr, slope, offset)) plt.xlabel("Sorted data") plt.ylabel("Rankits") plt.xlim(lim); plt.ylim(lim) plt.axes().set_aspect('equal') f = plt.figure(1) h = flex.histogram(sorted_data, n_slots=100, data_min = lim[0], data_max = lim[1]) stats = flex.mean_and_variance(sorted_data) plt.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.xlim(lim) plt.xlabel("Sorted data") plt.ylabel("Count") plt.title("Normalized data mean: %.3f +/- %.3f"%(stats.mean(), stats.unweighted_sample_standard_deviation())) if self.scaler.params.raw_data.error_models.sdfac_refine.plot_refinement_steps: plt.ion() plt.pause(0.05) return corr, slope, offset
def show_histogram(data=None, n_slots=None, data_min=None, data_max=None, log=None): from cctbx.array_family import flex hm = flex.histogram(data = data, n_slots = n_slots, data_min = data_min, data_max = data_max) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) print >> log, "%10.4f - %-10.4f : %d" % (lc_1, hc_1, n_1) lc_1 = hc_1
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss): if not len(reflections): raise Sorry('Input contains no reflections') xyz_px = reflections['xyzobs.px.value'] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc array_range = scan.get_array_range() phi_min = scan.get_angle_from_array_index(array_range[0]) phi_max = scan.get_angle_from_array_index(array_range[1]) assert phi_min <= flex.min(phi) assert phi_max >= flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) counts = hist.slots() fractional_counts = counts.as_double() / flex.max(counts) potential_blank_sel = fractional_counts <= fractional_loss xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { 'data': [{ 'x': list(hist.slot_centers()), 'y': list(hist.slots()), 'xlow': xmin, 'xhigh': xmax, 'blank': list(potential_blank_sel), 'type': 'bar', 'name': 'blank_counts_analysis' }], 'layout': { 'xaxis': { 'title': 'z observed (images)' }, 'yaxis': { 'title': 'Number of reflections' }, 'bargap': 0, }, } blank_regions = blank_regions_from_sel(d['data'][0]) d['blank_regions'] = blank_regions return d
def show_histogram(data, n_slots): hm = flex.histogram(data = data, n_slots = n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) tmp = None for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) #print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1) lc_1 = hc_1 if(tmp is None): tmp = hc_1 return tmp
def __init__(self, rs_vectors, percentile=0.05): from scitbx.array_family import flex NEAR = 10 self.NNBIN = 5 # target number of neighbors per histogram bin # nearest neighbor analysis from annlib_ext import AnnAdaptor query = flex.double() for spot in rs_vectors: # spots, in reciprocal space xyz query.append(spot[0]) query.append(spot[1]) query.append(spot[2]) assert len( rs_vectors) > NEAR # Can't do nearest neighbor with too few spots IS_adapt = AnnAdaptor(data=query, dim=3, k=1) IS_adapt.query(query) direct = flex.double() for i in range(len(rs_vectors)): direct.append(1.0 / math.sqrt(IS_adapt.distances[i])) # determine the most probable nearest neighbor distance (direct space) hst = flex.histogram(direct, n_slots=int(len(rs_vectors) / self.NNBIN)) centers = hst.slot_centers() islot = hst.slots() highest_bin_height = flex.max(islot) most_probable_neighbor = centers[list(islot).index(highest_bin_height)] if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print(" range: %6.2f - %.2f" % (smin, smax), file=out) print(" mean: %6.2f +/- %6.2f on N = %d" % (stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()), file=out) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print("", file=out) # determine the 5th-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) percentile = direct[perm[int(percentile * len(rs_vectors))]] MAXTOL = 1.5 # Margin of error for max unit cell estimate self.max_cell = max(MAXTOL * most_probable_neighbor, MAXTOL * percentile) if False: self.plot(direct)
def multihist(unmerged_mtz): from iotbx.reflection_file_reader import any_reflection_file reader = any_reflection_file(unmerged_mtz) assert reader.file_type() == "ccp4_mtz" mtz_object = reader.file_content() arrays = reader.as_miller_arrays(merge_equivalents=False) for ma in arrays: if ma.info().labels == ["I", "SIGI"]: intensities = ma elif ma.info().labels == ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]: intensities = ma indices = mtz_object.extract_original_index_miller_indices() intensities = intensities.customized_copy(indices=indices, info=intensities.info()) intensities = intensities.resolution_filter(d_min=2.0) merging = intensities.merge_equivalents() multiplicities = merging.redundancies().complete_array(new_data_value=0) mult_acentric = multiplicities.select_acentric().data() mult_centric = multiplicities.select_centric().data() from scitbx.array_family import flex max_mult = max(flex.max(mult_acentric), flex.max(mult_centric)) hist_acentric = flex.histogram(mult_acentric.as_double(), data_min=0, data_max=max_mult, n_slots=max_mult) hist_centric = flex.histogram(mult_centric.as_double(), data_min=0, data_max=max_mult, n_slots=max_mult) for s, a, c in zip(hist_acentric.slot_centers(), hist_acentric.slots(), hist_centric.slots()): print(s, a, c)
def __init__(self, rs_vectors, percentile=0.05): from scitbx.array_family import flex NEAR = 10 self.NNBIN = 5 # target number of neighbors per histogram bin # nearest neighbor analysis from annlib_ext import AnnAdaptor query = flex.double() for spot in rs_vectors: # spots, in reciprocal space xyz query.append(spot[0]) query.append(spot[1]) query.append(spot[2]) assert len(rs_vectors)>NEAR # Can't do nearest neighbor with too few spots IS_adapt = AnnAdaptor(data=query,dim=3,k=1) IS_adapt.query(query) direct = flex.double() for i in xrange(len(rs_vectors)): direct.append(1.0/math.sqrt(IS_adapt.distances[i])) # determine the most probable nearest neighbor distance (direct space) hst = flex.histogram(direct, n_slots=int(len(rs_vectors)/self.NNBIN)) centers = hst.slot_centers() islot = hst.slots() highest_bin_height = flex.max(islot) most_probable_neighbor = centers[list(islot).index(highest_bin_height)] if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, "" # determine the 5th-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) percentile = direct[perm[int(percentile * len(rs_vectors))]] MAXTOL = 1.5 # Margin of error for max unit cell estimate self.max_cell = max( MAXTOL * most_probable_neighbor, MAXTOL * percentile) if False: self.plot(direct)
def show_histogram(data, n_slots, data_min, data_max, log=sys.stdout): from cctbx.array_family import flex h_data = flex.double() hm = flex.histogram( data=data, n_slots=n_slots, data_min=data_min, data_max=data_max) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) #print >> log, "%10.5f - %-10.5f : %d" % (lc_1, hc_1, n_1) #print >> log, "%10.2f : %d" % ((lc_1+hc_1)/2, n_1) print ("%10.2f : %10.4f" % ((lc_1+hc_1)/2, n_1*100./data.size()), file=log) lc_1 = hc_1 return h_data
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss): if not len(reflections): raise Sorry("Input contains no reflections") xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc array_range = scan.get_array_range() phi_min = scan.get_angle_from_array_index(array_range[0]) phi_max = scan.get_angle_from_array_index(array_range[1]) assert phi_min <= flex.min(phi) assert phi_max >= flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) counts = hist.slots() fractional_counts = counts.as_double() / flex.max(counts) potential_blank_sel = fractional_counts <= fractional_loss xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(hist.slots()), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0}, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def compare_data_with_model(cif_file, mtz_file): from iotbx import cif, mtz from scitbx.array_family import flex import math import random # read model, compute Fc, square to F^2 model = cif.reader(file_path=cif_file).build_crystal_structures()["1"] ic = ( model.structure_factors(anomalous_flag=True, d_min=0.55, algorithm="direct") .f_calc() .as_intensity_array() ) # read experimental measurements m = mtz.object(mtz_file) mad = m.as_miller_arrays_dict(merge_equivalents=False) idata = mad[("HKL_base", "HKL_base", "I")].as_anomalous_array() match = idata.match_indices(ic) # pair up, extract to vanilla arrays for easier handling pairs = match.pairs() icalc = flex.double() iobs = flex.double() sobs = flex.double() for p in pairs: iobs.append(idata.data()[p[0]]) sobs.append(idata.sigmas()[p[0]]) icalc.append(ic.data()[p[1]]) # estimate conversion scale - apply F^2 icalc *= flex.sum(iobs) / flex.sum(icalc) d = (iobs - icalc) / sobs dh = flex.histogram(d, data_min=-6, data_max=6, n_slots=120) m = flex.sum(d) / d.size() s = math.sqrt(flex.sum(d * d) / d.size() - m * m) # mean and standard deviation - print(m, s)
def plot_rij_histogram(rij_matrix, key="cosym_rij_histogram"): """Plot a histogram of the rij values. Args: plot_name (str): The file name to save the plot to. If this is not defined then the plot is displayed in interactive mode. """ rij = rij_matrix.as_1d() rij = rij.select(rij != 0) hist = flex.histogram( rij, data_min=min(-1, flex.min(rij)), data_max=max(1, flex.max(rij)), n_slots=100, ) d = { key: { "data": [{ "x": list(hist.slot_centers()), "y": list(hist.slots()), "type": "bar", "name": "Rij histogram", }], "layout": { "title": "Distribution of values in the Rij matrix", "xaxis": { "title": "r<sub>ij</sub>" }, "yaxis": { "title": "Frequency" }, "bargap": 0, }, "help": """\ A histogram of the values of the Rij matrix of pairwise correlation coefficients. A unimodal distribution of values may suggest that no indexing ambiguity is evident, whereas a bimodal distribution can be indicative of the presence of an indexing ambiguity. """, } } return d
def recalculate(self): from scitbx.array_family import flex measurable_only = self.meas_box.GetValue() obs_type = self.obs_type.GetStringSelection() n_bins = self.n_bins.GetValue() d_ano_rel = self._array.bijvoet_ratios(obs_type=obs_type, measurable_only=measurable_only) hist = flex.histogram(d_ano_rel, n_slots=n_bins) hist.show(f=sys.stdout) if (obs_type == "intensities"): x_label = "D_anom(I[hkl]) / I_mean[hkl]" else: x_label = "D_anom(F[hkl]) / F_mean[hkl]" self.show_histogram(data=list(d_ano_rel), n_bins=n_bins, x_label=x_label, y_label="# hkl", title="Bijvoet ratios for %s" % self._array_name, log_scale=self.log_box.GetValue())
def event(self, evt, env): """The event() function is called for every L1Accept transition. Once self.nshots shots are accumulated, this function turns into a nop. @param evt Event data object, a configure object @param env Environment object """ super(pixel_histograms, self).event(evt, env) if (evt.get("skip_event")): return if self.sigma_scaling: flex_cspad_img = self.cspad_img.as_double() flex_cspad_img_sel = flex_cspad_img.as_1d().select( self.dark_mask.as_1d()) flex_dark_stddev = self.dark_stddev.select( self.dark_mask.as_1d()).as_double() assert flex_dark_stddev.count(0) == 0 flex_cspad_img_sel /= flex_dark_stddev flex_cspad_img.as_1d().set_selected( self.dark_mask.as_1d().iselection(), flex_cspad_img_sel) self.cspad_img = flex_cspad_img.iround() pixels = self.cspad_img.deep_copy() dimensions = pixels.all() if self.roi is None: self.roi = (0, dimensions[1], 0, dimensions[0]) for i in range(self.roi[2], self.roi[3]): for j in range(self.roi[0], self.roi[1]): if (i, j) not in self.histograms: self.histograms[(i, j)] = flex.histogram( flex.double(), self.hist_min, self.hist_max, self.n_slots) self.histograms[(i, j)].update(pixels[i, j]) self.nmemb += 1 if 0 and math.log(self.nmemb, 2) % 1 == 0: self.endjob(env) print self.nmemb
def show_histogram(data, n_slots, smooth=True): triplets = [] histogram = flex.histogram(data=data, n_slots=n_slots) l = histogram.data_min() for i, s in enumerate(histogram.slots()): r = histogram.data_min() + histogram.slot_width() * (i + 1) triplets.append([l, r, s]) print "%8.4f %8.4f %d" % (l, r, s) l = r if (smooth): print "... smooth histogram" triplets_smooth = [] for i, t in enumerate(triplets): values = flex.double() for j in [-1, 0, 1]: if (i + j >= 0 and i + j < len(triplets)): values.append(float(triplets[i + j][2])) triplets_smooth.append((t[0], t[1], flex.mean(values))) for t in triplets_smooth: print "%8.4f %8.4f %d" % (t[0], t[1], int("%.0f" % t[2])) return histogram
def recalculate (self) : from scitbx.array_family import flex measurable_only = self.meas_box.GetValue() obs_type = self.obs_type.GetStringSelection() n_bins = self.n_bins.GetValue() d_ano_rel = self._array.bijvoet_ratios( obs_type=obs_type, measurable_only=measurable_only) hist = flex.histogram(d_ano_rel, n_slots=n_bins) hist.show(f=sys.stdout) if (obs_type == "intensities") : x_label = "D_anom(I[hkl]) / I_mean[hkl]" else : x_label = "D_anom(F[hkl]) / F_mean[hkl]" self.show_histogram( data=list(d_ano_rel), n_bins=n_bins, x_label=x_label, y_label="# hkl", title="Bijvoet ratios for %s" % self._array_name, log_scale=self.log_box.GetValue())
def show_histogram(data, n_slots, smooth = True): triplets = [] histogram = flex.histogram(data = data, n_slots = n_slots) l = histogram.data_min() for i, s in enumerate(histogram.slots()): r = histogram.data_min() + histogram.slot_width() * (i+1) triplets.append( [l, r, s] ) print "%8.4f %8.4f %d" % (l, r, s) l = r if(smooth): print "... smooth histogram" triplets_smooth = [] for i, t in enumerate(triplets): values = flex.double() for j in [-1,0,1]: if(i+j >=0 and i+j < len(triplets)): values.append(float(triplets[i+j][2])) triplets_smooth.append((t[0],t[1],flex.mean(values))) for t in triplets_smooth: print "%8.4f %8.4f %d" % (t[0], t[1], int("%.0f"%t[2])) return histogram
def event(self, evt, env): """The event() function is called for every L1Accept transition. Once self.nshots shots are accumulated, this function turns into a nop. @param evt Event data object, a configure object @param env Environment object """ super(pixel_histograms, self).event(evt, env) if (evt.get("skip_event")): return if self.sigma_scaling: flex_cspad_img = self.cspad_img.as_double() flex_cspad_img_sel = flex_cspad_img.as_1d().select(self.dark_mask.as_1d()) flex_dark_stddev = self.dark_stddev.select(self.dark_mask.as_1d()).as_double() assert flex_dark_stddev.count(0) == 0 flex_cspad_img_sel /= flex_dark_stddev flex_cspad_img.as_1d().set_selected(self.dark_mask.as_1d().iselection(), flex_cspad_img_sel) self.cspad_img = flex_cspad_img.iround() pixels = self.cspad_img.deep_copy() dimensions = pixels.all() if self.roi is None: self.roi = (0, dimensions[1], 0, dimensions[0]) for i in range(self.roi[2], self.roi[3]): for j in range(self.roi[0], self.roi[1]): if (i,j) not in self.histograms: self.histograms[(i,j)] = flex.histogram(flex.double(), self.hist_min, self.hist_max, self.n_slots) self.histograms[(i,j)].update(pixels[i,j]) self.nmemb += 1 if 0 and math.log(self.nmemb, 2) % 1 == 0: self.endjob(env) print self.nmemb
def plot_rij_histogram(rij_matrix, key="cosym_rij_histogram"): """Plot a histogram of the rij values. Args: plot_name (str): The file name to save the plot to. If this is not defined then the plot is displayed in interactive mode. """ rij = rij_matrix.as_1d() rij = rij.select(rij != 0) hist = flex.histogram( rij, data_min=min(-1, flex.min(rij)), data_max=max(1, flex.max(rij)), n_slots=100, ) d = { key: { "data": [{ "x": list(hist.slot_centers()), "y": list(hist.slots()), "type": "bar", "name": "Rij histogram", }], "layout": { "title": "Distribution of values in the Rij matrix", "xaxis": { "title": "r<sub>ij</sub>" }, "yaxis": { "title": "Frequency" }, "bargap": 0, }, } } return d
def centroidify(width, shift, count): g = variate(normal_distribution(mean=shift, sigma=width)) values = flex.double([next(g) for c in range(count)]) hist = flex.histogram(data=values, n_slots=20, data_min=-10, data_max=10) true_mean = flex.sum(values) / values.size() true_variance = sum([(v - true_mean)**2 for v in values]) / (values.size() - 1) total = 1.0 * flex.sum(hist.slots()) hist_mean = sum([c * v for c, v in zip(hist.slot_centers(), hist.slots()) ]) / total # equation 6 hist_var = sum([(v / total)**2 * (1.0 / 12.0) for v in hist.slots()]) # print input setings print("%8.5f %4.1f %4d" % (width**2 / count, shift, count), end=" ") # true variance / mean of distribution print("%6.3f %8.5f" % (true_mean, true_variance / values.size()), end=" ") # putative values of same derived from histogram print("%6.3f %8.5f" % (hist_mean, hist_var))
def __init__(self, cmd_list, nprocs=1, out=sys.stdout, log=None, verbosity=DEFAULT_VERBOSITY, max_time=180): if (log is None): log = null_out() self.out = multi_out() self.log = log self.out.register("stdout", out) self.out.register("log", log) self.verbosity = verbosity self.quiet = (verbosity == 0) self.results = [] self.pool = None # Filter cmd list for duplicates. self.cmd_list = [] for cmd in cmd_list: if (not cmd in self.cmd_list): self.cmd_list.append(cmd) else: print >> self.out, "Test %s repeated, skipping" % cmd # Set number of processors. if (nprocs is Auto): nprocs = cpu_count() nprocs = min(nprocs, len(self.cmd_list)) # Starting summary. if (self.verbosity > 0): print >> self.out, "Running %d tests on %s processors:" % (len( self.cmd_list), nprocs) for cmd in self.cmd_list: print >> self.out, " %s" % cmd print >> self.out, "" # Either run tests in parallel or run parallel tests, but # can't run parallel tests in parallel (cctbx#95) os.environ['OPENBLAS_NUM_THREADS'] = "1" t_start = time.time() if nprocs > 1: # Run the tests with multiprocessing pool. self.pool = Pool(processes=nprocs) for command in self.cmd_list: self.pool.apply_async(run_command, [command, verbosity, out], callback=self.save_result) try: self.pool.close() except KeyboardInterrupt: print >> self.out, "Caught KeyboardInterrupt, terminating" self.pool.terminate() finally: try: self.pool.join() except KeyboardInterrupt: pass else: # Run tests serially. for command in self.cmd_list: rc = run_command(command, verbosity=verbosity, out=out) if self.save_result(rc) == False: break # Print ending summary. t_end = time.time() print >> self.out, "=" * 80 print >> self.out, "" print >> self.out, "Tests finished. Elapsed time: %.2fs" % (t_end - t_start) print >> self.out, "" # Process results for errors and warnings. extra_stderr = len( [result for result in self.results if result.stderr_lines]) longjobs = [ result for result in self.results if result.wall_time > max_time ] warnings = [ result for result in self.results if result.alert_status == Status.WARNING ] failures = [ result for result in self.results if result.alert_status == Status.FAIL ] self.finished = len(self.results) self.failure = len(failures) self.warning = len(warnings) # Try writing the XML result file write_JUnit_XML(self.results, "output.xml") # Run time distribution. if (libtbx.env.has_module("scitbx")): from scitbx.array_family import flex print >> self.out, "Distribution of test runtimes:" hist = flex.histogram(flex.double( [result.wall_time for result in self.results]), n_slots=10) hist.show(f=self.out, prefix=" ", format_cutoffs="%.1fs") print >> self.out, "" # Long job warning. if longjobs: print >> self.out, "" print >> self.out, "Warning: the following jobs took at least %d seconds:" % max_time for result in sorted(longjobs, key=lambda result: result.wall_time): print >> self.out, " %s: %.1fs" % (result.command, result.wall_time) else: # Just print 5 worst offenders to encourage developers to check them out print >> self.out, "" print >> self.out, "Warning: the following are 5 longest jobs:" for result in sorted(self.results, key=lambda result: -result.wall_time)[:5]: print >> self.out, " %s: %.1fs" % (result.command, result.wall_time) print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests." print >> self.out, "" # Failures. if failures: print >> self.out, "" print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:" print >> self.out, "" for result in warnings: self.display_result(result, alert=Status.WARNING, out=self.out, log_return=self.out, log_stderr=self.out) for result in failures: self.display_result(result, alert=Status.FAIL, out=self.out, log_return=self.out, log_stderr=self.out) print >> self.out, "" print >> self.out, "Please verify these tests manually." print >> self.out, "" # Summary print >> self.out, "Summary:" print >> self.out, " Tests run :", self.finished print >> self.out, " Failures :", self.failure print >> self.out, " Warnings (possible failures) :", self.warning print >> self.out, " Stderr output (discouraged) :", extra_stderr if (self.finished != len(self.cmd_list)): print >> self.out, "*" * 80 print >> self.out, " WARNING: NOT ALL TESTS FINISHED!" print >> self.out, "*" * 80
class run_command_list(object): def __init__(self, cmd_list, nprocs=1, out=sys.stdout, log=None, verbosity=DEFAULT_VERBOSITY, max_time=180): if (log is None): log = null_out() self.out = multi_out() self.log = log self.out.register("stdout", out) self.out.register("log", log) self.verbosity = verbosity self.quiet = (verbosity == 0) self.results = [] # Filter cmd list for duplicates. self.cmd_list = [] for cmd in cmd_list: if (not cmd in self.cmd_list): self.cmd_list.append(cmd) else: print >> self.out, "Test %s repeated, skipping" % cmd # Set number of processors. if (nprocs is Auto): nprocs = cpu_count() nprocs = min(nprocs, len(self.cmd_list)) # Starting summary. if (self.verbosity > 0): print >> self.out, "Running %d tests on %s processors:" % (len( self.cmd_list), nprocs) for cmd in self.cmd_list: print >> self.out, " %s" % cmd print >> self.out, "" t_start = time.time() if nprocs > 1: # Run the tests with multiprocessing pool. pool = Pool(processes=nprocs) for command in self.cmd_list: pool.apply_async(run_command, [command, verbosity, out], callback=self.save_result) try: pool.close() except KeyboardInterrupt: print >> self.out, "Caught KeyboardInterrupt, terminating" pool.terminate() finally: pool.join() else: # Run tests serially. for command in self.cmd_list: rc = run_command(command, verbosity=verbosity, out=out) self.save_result(rc) # Print ending summary. t_end = time.time() print >> self.out, "=" * 80 print >> self.out, "" print >> self.out, "Tests finished. Elapsed time: %.2fs" % (t_end - t_start) print >> self.out, "" test_cases = [] # Process results for errors and warnings. extra_stderr = len( [result for result in self.results if result.stderr_lines]) longjobs = [ result for result in self.results if result.wall_time > max_time ] warnings = [ result for result in self.results if self.check_alert(result) == 1 ] failures = [ result for result in self.results if self.check_alert(result) == 2 ] self.finished = len(self.results) self.failure = len(failures) self.warning = len(warnings) # Output JUnit XML if possible try: from junit_xml import TestSuite, TestCase import re def decode_string(string): try: return string.encode('ascii', 'xmlcharrefreplace') except Exception: # intentional return unicode(string, errors='ignore').encode( 'ascii', 'xmlcharrefreplace') for result in self.results: test_name = reconstruct_test_name(result.command) plain_stdout = map(decode_string, result.stdout_lines) plain_stderr = map(decode_string, result.stderr_lines) output = '\n'.join(plain_stdout + plain_stderr) tc = TestCase(classname=test_name[0], name=test_name[1], elapsed_sec=result.wall_time, stdout='\n'.join(plain_stdout), stderr='\n'.join(plain_stderr)) if result.return_code == 0: # Identify skipped tests if re.search('skip', output, re.IGNORECASE): # find first line including word 'skip' and use it as message skipline = re.search('^((.*)skip(.*))$', output, re.IGNORECASE | re.MULTILINE).group(1) tc.add_skipped_info(skipline) else: # Test failed. Extract error message and stack trace if possible error_message = 'exit code %d' % result.return_code error_output = '\n'.join(plain_stderr) if plain_stderr: error_message = plain_stderr[-1] if len(plain_stderr) > 20: error_output = '\n'.join(plain_stderr[-20:]) tc.add_failure_info(message=error_message, output=error_output) test_cases.append(tc) ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases) with open('output.xml', 'wb') as f: print >> f, TestSuite.to_xml_string([ts], prettyprint=True) except ImportError, e: pass # Run time distribution. if (libtbx.env.has_module("scitbx")): from scitbx.array_family import flex print >> self.out, "Distribution of test runtimes:" hist = flex.histogram(flex.double( [result.wall_time for result in self.results]), n_slots=10) hist.show(f=self.out, prefix=" ", format_cutoffs="%.1fs") print >> self.out, "" # Long job warning. if longjobs: print >> self.out, "" print >> self.out, "Warning: the following jobs took at least %d seconds:" % max_time for result in sorted(longjobs, key=lambda result: result.wall_time): print >> self.out, " %s: %.1fs" % (result.command, result.wall_time) print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests." # Failures. if failures: print >> self.out, "" print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:" print >> self.out, "" for result in warnings: self.display_result(result, alert=1, out=self.out, log_return=self.out, log_stderr=self.out) for result in failures: self.display_result(result, alert=2, out=self.out, log_return=self.out, log_stderr=self.out) print >> self.out, "" print >> self.out, "Please verify these tests manually." print >> self.out, "" # Summary print >> self.out, "Summary:" print >> self.out, " Tests run :", self.finished print >> self.out, " Failures :", self.failure print >> self.out, " Warnings (possible failures) :", self.warning print >> self.out, " Stderr output (discouraged) :", extra_stderr if (self.finished != len(self.cmd_list)): print >> self.out, "*" * 80 print >> self.out, " WARNING: NOT ALL TESTS FINISHED!" print >> self.out, "*" * 80
def del_anom_normal_plot(intensities, strong_cutoff=0.0): """Make a normal probability plot of the normalised anomalous differences.""" diff_array = intensities.anomalous_differences() if not diff_array.data().size(): return {} delta = diff_array.data() / diff_array.sigmas() n = delta.size() y = np.sort(flumpy.to_numpy(delta)) d = 0.5 / n v = np.linspace(start=d, stop=1.0 - d, endpoint=True, num=n) x = norm.ppf(v) H, xedges, yedges = np.histogram2d(x, y, bins=(200, 200)) nonzeros = np.nonzero(H) z = np.empty(H.shape) z[:] = np.NAN z[nonzeros] = H[nonzeros] # also make a histogram histy = flex.histogram(flumpy.from_numpy(y), n_slots=100) # make a gaussian for reference also n = y.size width = histy.slot_centers()[1] - histy.slot_centers()[0] gaussian = [] from math import exp, pi for x in histy.slot_centers(): gaussian.append(n * width * exp(-(x**2) / 2.0) / ((2.0 * pi) ** 0.5)) title = "Normal probability plot of anomalous differences" plotname = "normal_distribution_plot" if strong_cutoff > 0.0: title += f" (d > {strong_cutoff:.2f})" plotname += "_lowres" else: title += " (all data)" plotname += "_highres" return { plotname: { "data": [ { "x": xedges.tolist(), "y": yedges.tolist(), "z": z.transpose().tolist(), "type": "heatmap", "name": "normalised deviations", "colorbar": { "title": "Number of reflections", "titleside": "right", }, "colorscale": "Viridis", }, { "x": [-5, 5], "y": [-5, 5], "type": "scatter", "mode": "lines", "name": "z = m", "color": "rgb(0,0,0)", }, ], "layout": { "title": title, "xaxis": { "anchor": "y", "title": "expected delta", "range": [-4, 4], }, "yaxis": { "anchor": "x", "title": "observed delta", "range": [-5, 5], }, }, "help": """\ This plot shows the normalised anomalous differences, sorted in order and plotted against the expected order based on a normal distribution model. A true normal distribution of deviations would give the straight line indicated. [1] P. L. Howell and G. D. Smith, J. Appl. Cryst. (1992). 25, 81-86 https://doi.org/10.1107/S0021889891010385 [2] P. Evans, Acta Cryst. (2006). D62, 72-82 https://doi.org/10.1107/S0907444905036693 """, } }
def eval_logs(file_names, out=None): if (out is None): out = sys.stdout from scitbx.array_family import flex from libtbx.str_utils import format_value min_secs_epoch = None max_secs_epoch = None n_refinements_initialized = 0 gaps = flex.double() infos = flex.std_string() n_stale = 0 n_unfinished = 0 n_exception = 0 n_traceback = 0 n_abort = 0 seconds = [] space_groups_by_cod_id = {} for file_name in file_names: have_time_end = False cod_id = None n_scatt = None iso = None file_str = open(file_name).read() if (file_str.find(chr(0)) >= 0): n_stale += 1 continue for line in file_str.splitlines(): if (line.startswith("cod_id: ")): cod_id = line[10:] iso = None elif (line.startswith("Space group: ")): assert cod_id is not None space_group = line.split(None, 2)[2] tabulated = space_groups_by_cod_id.setdefault(cod_id, space_group) assert tabulated == space_group elif (line.startswith("Number of scatterers: ")): assert cod_id is not None n_scatt = int(line.split(": ",1)[1]) elif (line.startswith("Number of refinable parameters: ")): assert cod_id is not None n_refinements_initialized += 1 elif (line.startswith("iso cc, r1: ")): assert cod_id is not None assert iso is None iso = line.split(": ",1)[1] elif ( line.startswith("dev cc, r1: ") or line.startswith("ls_simple cc, r1: ") or line.startswith("ls_lm cc, r1: ") or line.startswith("shelxl_fm cc, r1: ") or line.startswith("shelxl_cg cc, r1: ") or line.startswith("shelx76 cc, r1: ")): assert iso is not None ref = line.split(": ",1)[1] gap = float(ref.split()[1]) - float(iso.split()[1]) gaps.append(gap) infos.append(" : ".join([ cod_id, iso, ref, "%.3f" % gap, str(n_scatt), space_groups_by_cod_id[cod_id]])) cod_id = None n_scatt = None iso = None else: def get_secs_epoch(): return float(line.split()[-2][1:]) if (line.find("EXCEPTION") >= 0): n_exception += 1 if (line.startswith("Traceback")): n_traceback += 1 if (line.find("Abort") >= 0): n_abort += 1 if (line.startswith("wall clock time: ")): if (line.endswith(" seconds")): secs = float(line.split()[-2]) else: _, fld = line.split("(", 1) assert fld.endswith(" seconds total)") secs = float(fld.split()[0]) seconds.append(secs) elif (line.startswith("TIME BEGIN cod_refine: ")): s = get_secs_epoch() if (min_secs_epoch is None or s < min_secs_epoch): min_secs_epoch = s elif (line.startswith("TIME END cod_refine: ")): s = get_secs_epoch() if (max_secs_epoch is None or s > max_secs_epoch): max_secs_epoch = s have_time_end = True if (not have_time_end): n_unfinished += 1 perm = flex.sort_permutation(gaps) gaps = gaps.select(perm) n_missing = n_refinements_initialized - gaps.size() print >> out, "Number of results: %d (%d missing)" % (gaps.size(), n_missing) assert n_missing >= 0 print >> out, "Stale, Unfinished, Exceptions, Tracebacks, Abort:", \ n_stale, n_unfinished, n_exception, n_traceback, n_abort if (n_exception + n_abort < n_missing): print "WARNING: more missing results than expected." if (len(seconds) != 0): if (min_secs_epoch is not None and max_secs_epoch is not None): g = max_secs_epoch - min_secs_epoch else: g = None print >> out, "min, max, global seconds: %.2f %.2f %s" % ( min(seconds), max(seconds), format_value("%.2f", g)) print >> out def stats(f): n = f.count(True) return "%6d = %5.2f %%" % (n, 100 * n / max(1,n_refinements_initialized)) print >> out, "gaps below -0.05:", stats(gaps < -0.05) print >> out, "gaps below -0.01:", stats(gaps < -0.01) print >> out, "gaps below 0.01:", stats(gaps < 0.01) print >> out, "gaps above 0.01:", stats(gaps > 0.01) print >> out, "gaps above 0.05:", stats(gaps > 0.05) print >> out print >> out, "Histogram of gaps:" flex.histogram(gaps, n_slots=10).show(f=out) print >> out infos = infos.select(perm) for info in infos: print >> out, info return (len(file_names), n_unfinished, min_secs_epoch, max_secs_epoch)
def __init__(self, reflections, step_size=45, tolerance=1.5, max_height_fraction=0.25, percentile=0.05, histogram_binning='linear'): self.tolerance = tolerance # Margin of error for max unit cell estimate from scitbx.array_family import flex NEAR = 10 self.NNBIN = 5 # target number of neighbors per histogram bin self.histogram_binning = histogram_binning direct = flex.double() if 'entering' in reflections: entering_flags = reflections['entering'] else: entering_flags = flex.bool(reflections.size(), True) rs_vectors = reflections['rlp'] phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180/math.pi) d_spacings = flex.double() # nearest neighbor analysis from annlib_ext import AnnAdaptor for imageset_id in range(flex.max(reflections['imageset_id'])+1): sel = reflections['imageset_id'] == imageset_id if sel.count(True) == 0: continue phi_min = flex.min(phi_deg.select(sel)) phi_max = flex.max(phi_deg.select(sel)) d_phi = phi_max - phi_min n_steps = max(int(math.ceil(d_phi / step_size)), 1) for n in range(n_steps): sel &= (phi_deg >= (phi_min+n*step_size)) & (phi_deg < (phi_min+(n+1)*step_size)) for entering in (True, False): sel &= entering_flags == entering if sel.count(True) == 0: continue query = flex.double() query.extend(rs_vectors.select(sel).as_double()) if query.size() == 0: continue IS_adapt = AnnAdaptor(data=query,dim=3,k=1) IS_adapt.query(query) direct.extend(1/flex.sqrt(IS_adapt.distances)) d_spacings.extend(1/rs_vectors.norms()) assert len(direct)>NEAR, ( "Too few spots (%d) for nearest neighbour analysis." %len(direct)) perm = flex.sort_permutation(direct) direct = direct.select(perm) d_spacings = d_spacings.select(perm) # reject top 1% of longest distances to hopefully get rid of any outliers n = int(math.floor(0.99*len(direct))) direct = direct[:n] d_spacings = d_spacings[:n] # determine the most probable nearest neighbor distance (direct space) if self.histogram_binning == 'log': hst = flex.histogram( flex.log10(direct), n_slots=int(len(direct)/self.NNBIN)) else: hst = flex.histogram(direct, n_slots=int(len(direct)/self.NNBIN)) centers = hst.slot_centers() if self.histogram_binning == 'log': self.slot_start = flex.double( [10**s for s in hst.slot_centers() - 0.5 * hst.slot_width()]) self.slot_end = flex.double( [10**s for s in hst.slot_centers() + 0.5 * hst.slot_width()]) self.slot_width = self.slot_end - self.slot_start else: self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width() self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width() self.slot_width = hst.slot_width() self.relative_frequency = hst.slots().as_double()/self.slot_width highest_bin_height = flex.max(self.relative_frequency) if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, "" # choose a max cell based on bins above a given fraction of the highest bin height # given multiple isel = (self.relative_frequency.as_double() > ( max_height_fraction * highest_bin_height)).iselection() self.max_cell = ( self.tolerance * self.slot_end[int(flex.max(isel.as_double()))]) # determine the 5th-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) self.percentile = direct[perm[int(percentile * len(direct))]] self.reciprocal_lattice_vectors = rs_vectors self.d_spacings = d_spacings self.direct = direct self.histogram = hst
def common_mode(self, img, stddev, mask): """The common_mode() function returns the mode of image stored in the array pointed to by @p img. @p mask must be such that the @p stddev at the selected pixels is greater than zero. @param img 2D integer array of the image @param stddev 2D integer array of the standard deviation of each pixel in @p img @param mask 2D Boolean array, @c True if the pixel is to be included, @c False otherwise @return Mode of the image, as a real number """ # Flatten the image and take out inactive pixels XXX because we # cannot take means and medians of 2D arrays? img_1d = img.as_1d().select(mask.as_1d()).as_double() assert img_1d.size() > 0 if (self.common_mode_correction == "mean"): # The common mode is approximated by the mean of the pixels with # signal-to-noise ratio less than a given threshold. XXX Breaks # if the selection is empty! THRESHOLD_SNR = 2 img_snr = img_1d / stddev.as_double().as_1d().select(mask.as_1d()) return (flex.mean(img_1d.select(img_snr < THRESHOLD_SNR))) elif (self.common_mode_correction == "median"): return (flex.median(img_1d)) # Identify the common-mode correction as the peak histogram of the # histogram of pixel values (the "standard" common-mode correction, as # previously implemented in this class). hist_min = -40 hist_max = 40 n_slots = 100 hist = flex.histogram(img_1d, hist_min, hist_max, n_slots=n_slots) slots = hist.slots() i = flex.max_index(slots) common_mode = list(hist.slot_infos())[i].center() if (self.common_mode_correction == "mode"): return (common_mode) # Determine the common-mode correction from the peak of a single # Gaussian function fitted to the histogram. from scitbx.math.curve_fitting import single_gaussian_fit x = hist.slot_centers() y = slots.as_double() fit = single_gaussian_fit(x, y) scale, mu, sigma = fit.a, fit.b, fit.c self.logger.debug("fitted gaussian: mu=%.3f, sigma=%.3f" %(mu, sigma)) mode = common_mode common_mode = mu if abs(mode-common_mode) > 1000: common_mode = mode # XXX self.logger.debug("delta common mode corrections: %.3f" %(mode-common_mode)) if 0 and abs(mode-common_mode) > 0: #if 0 and skew > 0.5: # view histogram and fitted gaussian from numpy import exp from matplotlib import pyplot x_all = x n, bins, patches = pyplot.hist(section_img.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) y_all = scale * flex.exp(-flex.pow2(x_all-mu) / (2 * sigma**2)) scale = slots[flex.max_index(slots)] y_all *= scale/flex.max(y_all) pyplot.plot(x_all, y_all) pyplot.show() return (common_mode)
def run(args): from dials.util.options import OptionParser from dials.util.options import flatten_experiments from dials.util.options import flatten_reflections import libtbx.load_env usage = "%s [options] datablock.json" %( libtbx.env.dispatcher_name) parser = OptionParser( usage=usage, phil=phil_scope, read_experiments=True, read_reflections=True, check_format=True, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) if len(experiments) == 0 or len(reflections) == 0: parser.print_help() exit(0) imagesets = experiments.imagesets() reflections = reflections[0] shadowed = filter_shadowed_reflections(experiments, reflections) print "# shadowed reflections: %i/%i (%.2f%%)" %( shadowed.count(True), shadowed.size(), shadowed.count(True)/shadowed.size() * 100) expt = experiments[0] x,y,z = reflections['xyzcal.px'].parts() z_ = z * expt.scan.get_oscillation()[1] zmin, zmax = expt.scan.get_oscillation_range() hist_scan_angle = flex.histogram(z_.select(shadowed), n_slots=int(zmax-zmin)) #hist_scan_angle.show() uc = experiments[0].crystal.get_unit_cell() d_spacings = uc.d(reflections['miller_index']) ds2 = uc.d_star_sq(reflections['miller_index']) hist_res = flex.histogram(ds2.select(shadowed), flex.min(ds2), flex.max(ds2), n_slots=20, ) #hist_res.show() import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt plt.hist2d(z_.select(shadowed).as_numpy_array(), ds2.select(shadowed).as_numpy_array(), bins=(40,40), range=((flex.min(z_), flex.max(z_)),(flex.min(ds2), flex.max(ds2)))) yticks_dsq = flex.double(plt.yticks()[0]) from cctbx import uctbx yticks_d = uctbx.d_star_sq_as_d(yticks_dsq) plt.axes().set_yticklabels(['%.2f' %y for y in yticks_d]) plt.xlabel('Scan angle (degrees)') plt.ylabel('Resolution (A^-1)') cbar = plt.colorbar() cbar.set_label('# shadowed reflections') plt.savefig('n_shadowed_hist2d.png') plt.clf() plt.scatter(hist_scan_angle.slot_centers().as_numpy_array(), hist_scan_angle.slots().as_numpy_array()) plt.xlabel('Scan angle (degrees)') plt.ylabel('# shadowed reflections') plt.savefig("n_shadowed_vs_scan_angle.png") plt.clf() plt.scatter(hist_res.slot_centers().as_numpy_array(), hist_res.slots().as_numpy_array()) plt.xlabel('d_star_sq') plt.savefig("n_shadowed_vs_resolution.png") plt.clf()
if False: # have an I/sigma selection of > 1 i_over_sigi = pkl.data() / pkl.sigmas() pkl = pkl.select(i_over_sigi > 1) c, n = compare(reference1, pkl, sgtype) print '%5d %s %6.3f %5d' % (i, pkl_file.strip(), c, n), ccs[0].append(c) for j, reference in enumerate(references): c, n = compare(reference, pkl, sgtype) print '%6.3f %5d' % (c, n), ccs[j + 1].append(c) print '' from matplotlib import pyplot for j in sorted(ccs): pyplot.scatter(range(len(ccs[j])), ccs[j]) pyplot.savefig('ccs.png') pyplot.close() from scitbx.array_family import flex for j in sorted(ccs): hist = flex.histogram(flex.double(ccs[j]), n_slots=50) pyplot.plot(hist.slot_centers(), hist.slots()) pyplot.savefig('cc_hist_%d.png' % j) pyplot.close()
def convert_to_histogram(data, n_slots) : histogram = flex.histogram(data=data, n_slots=n_slots) return histogram
def set_ensemble_b_factors_to_xyz_displacement( pdb_hierarchy, include_hydrogens=False, include_waters=False, use_c_alpha_values=False, method="rmsf", selection=None, substitute_b_value=-1.0, logarithmic=False, log=None, ): """ Given an ensemble (multi-MODEL PDB hierarchy), calculate the deviation between copies of each atom (defined here as either the root-mean-square fluctuation, or the radius of the minimum covering sphere) and set the isotropic B-factors to this value. """ if log is None: log = null_out() assert method in ["rmsf", "mcs"] from scitbx.math import minimum_covering_sphere from scitbx.array_family import flex pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() xyz_by_atom = {} def get_key(atom): labels = atom.fetch_labels() return (labels.chain_id, labels.resid(), labels.altloc, atom.name) def get_c_alpha(atom): if (atom.name.strip() == "CA") and (atom.element.strip() == "C"): return atom for other in atom.parent().atoms(): if (other.name.strip() == "CA") and (other.element.strip() == "C"): return other return None for model in pdb_hierarchy.models(): for atom in model.atoms(): if selection is not None: if not selection[atom.i_seq]: continue elif (not include_hydrogens) and (atom.element.strip() in ["H", "D"]): continue elif (not include_waters) and (atom.parent().resname in ["HOH"]): continue if (use_c_alpha_values) and (atom.name.strip() != "CA"): continue atom_key = get_key(atom) if atom_key in xyz_by_atom: xyz_by_atom[atom_key].append(atom.xyz) else: xyz_by_atom[atom_key] = flex.vec3_double([atom.xyz]) dev_by_atom = {} for atom_key, xyz in xyz_by_atom.iteritems(): if method == "mcs": mcs = minimum_covering_sphere(points=xyz, epsilon=0.1) radius = mcs.radius() if logarithmic: radius = math.log(radius + 1.0) dev_by_atom[atom_key] = radius else: mean_array = flex.vec3_double(xyz.size(), xyz.mean()) rmsf = xyz.rms_difference(mean_array) dev_by_atom[atom_key] = rmsf all_dev = flex.double(dev_by_atom.values()) if method == "mcs": print >> log, "Distribution of sphere radii:" else: print >> log, "Distribution of root-mean-square fluctuation values:" flex.histogram(all_dev, n_slots=20).show(f=log, prefix=" ", format_cutoffs="%.2f") for model in pdb_hierarchy.models(): for atom in model.atoms(): if use_c_alpha_values: c_alpha = get_c_alpha(atom) if c_alpha is None: atom.b = substitute_b_value else: atom_key = get_key(c_alpha) atom.b = dev_by_atom.get(atom_key, substitute_b_value) else: atom_key = get_key(atom) atom.b = dev_by_atom.get(atom_key, substitute_b_value)
def __init__ (self, cmd_list, nprocs=1, out=sys.stdout, log=None, quiet=False, output_junit_xml=False) : if (log is None) : log = null_out() self.out = multi_out() self.log = log self.out.register("stdout", out) self.out.register("log", log) self.quiet = quiet self.cmd_list = [] for cmd in cmd_list : if (not cmd in self.cmd_list) : self.cmd_list.append(cmd) else : print >> self.out, " test %s repeated, skipping" % cmd nprocs = min(nprocs, len(self.cmd_list)) print >> self.out, "\n Starting command list" print >> self.out, " NProcs :",nprocs print >> self.out, " Cmds :",len(self.cmd_list) t_start = time.time() if nprocs>1: pool = Pool(processes=nprocs) self.results = [] for command in self.cmd_list: if nprocs>1: pool.apply_async( run_command, [command, (not quiet), out], callback=self.save_result) else: rc = run_command(command, verbose=(not quiet), out=out) self.save_result(rc) if nprocs>1: try : try : pool.close() except KeyboardInterrupt : print >> self.out, "Caught KeyboardInterrupt, terminating" pool.terminate() finally : pool.join() print >> self.out, '\nProcesses have joined : %d\n' % len(self.results) t_end = time.time() print >> self.out, "" print >> self.out, "Elapsed time: %.2fs" %(t_end-t_start) print >> self.out, "" finished = 0 warning = 0 extra_stderr = 0 failure = 0 failures = [] long_jobs = [] long_runtimes = [] runtimes = [] if output_junit_xml: from junit_xml import TestSuite, TestCase test_cases = [] for result in self.results : finished += 1 runtimes.append(result.wall_time) if (result.return_code != 0) : failure += 1 failures.append(result) else : if (len(result.error_lines) != 0) : warning += 1 failures.append(result) if (len(result.stderr_lines) != 0): extra_stderr += 1 if (result.wall_time > max_time) : long_jobs.append(result.command) long_runtimes.append(result.wall_time) if output_junit_xml: tc = TestCase(name=result.command, classname=result.command, elapsed_sec=result.wall_time, stdout='\n'.join(result.stdout_lines), stderr='\n'.join(result.stderr_lines)) if result.return_code != 0: tc.add_failure_info(message='exit code %d' %result.return_code) #if len(result.stderr_lines): #tc.add_error_info(output='\n'.join(result.stderr_lines)) test_cases.append(tc) if output_junit_xml: ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases) with open('output.xml', 'wb') as f: print >> f, TestSuite.to_xml_string([ts], prettyprint=True) if (libtbx.env.has_module("scitbx")) : from scitbx.array_family import flex print >> self.out, "Distribution of test runtimes:" hist = flex.histogram(flex.double(runtimes), n_slots=20) hist.show(f=self.out, prefix=" ", format_cutoffs="%.1fs") print >> self.out, "" if (len(long_jobs) > 0) : print >> self.out, "" print >> self.out, "WARNING: the following jobs took at least %d seconds:" % \ max_time jobs_and_timings = list(zip(long_jobs, long_runtimes)) jobs_and_timings.sort(lambda x,y: cmp(x[1], y[1])) for cmd, runtime in jobs_and_timings : print >> self.out, " " + cmd + " : %.1fs" % runtime print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests." if (len(failures) > 0) : print >> self.out, "" print >> self.out, "ERROR: the following jobs returned non-zero exit codes or suspicious stderr output:" for result in failures : print >> self.out, "" print >> self.out, result.command + "(exit code %d):" % result.return_code for line in result.stderr_lines : print >> self.out, " " + line for line in result.error_lines : print >> self.out, " " + line print >> self.out, "" print >> self.out, "Please verify these tests manually." print >> self.out, "" print >> self.out, "Summary:" print >> self.out, " Tests run :",finished print >> self.out, " Failures :",failure print >> self.out, " Warnings (possible failures) :",warning print >> self.out, " Stderr output (discouraged) :",extra_stderr if (finished != len(self.cmd_list)) : print >> self.out, "*" * 80 print >> self.out, " WARNING: NOT ALL TESTS FINISHED!" print >> self.out, "*" * 80
def run(self, flags, sweep=None, observations=None, **kwargs): obs_x, obs_y = observations.centroids().px_position_xy().parts() import numpy as np H, xedges, yedges = np.histogram2d( obs_x.as_numpy_array(), obs_y.as_numpy_array(),bins=self.nbins) from scitbx.array_family import flex H_flex = flex.double(H.flatten().astype(np.float64)) n_slots = min(int(flex.max(H_flex)), 30) hist = flex.histogram(H_flex, n_slots=n_slots) slots = hist.slots() cumulative_hist = flex.long(len(slots)) for i in range(len(slots)): cumulative_hist[i] = slots[i] if i > 0: cumulative_hist[i] += cumulative_hist[i-1] cumulative_hist = cumulative_hist.as_double()/flex.max( cumulative_hist.as_double()) cutoff = None gradients = flex.double() for i in range(len(slots)-1): x1 = cumulative_hist[i] x2 = cumulative_hist[i+1] g = (x2 - x1)/hist.slot_width() gradients.append(g) if (cutoff is None and i > 0 and g < self.gradient_cutoff and gradients[i-1] < self.gradient_cutoff): cutoff = hist.slot_centers()[i-1]-0.5*hist.slot_width() H_flex = flex.double(np.ascontiguousarray(H)) isel = (H_flex > cutoff).iselection() sel = np.column_stack(np.where(H > cutoff)) for (ix, iy) in sel: flags.set_selected( ((obs_x > xedges[ix]) & (obs_x < xedges[ix+1]) & (obs_y > yedges[iy]) & (obs_y < yedges[iy+1])), False) if 0: from matplotlib import pyplot fig, ax1 = pyplot.subplots() extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] plot1 = ax1.imshow(H, extent=extent, interpolation="nearest") pyplot.xlim((0, pyplot.xlim()[1])) pyplot.ylim((0, pyplot.ylim()[1])) pyplot.gca().invert_yaxis() cbar1 = pyplot.colorbar(plot1) pyplot.axes().set_aspect('equal') pyplot.show() fig, ax1 = pyplot.subplots() ax2 = ax1.twinx() ax1.scatter(hist.slot_centers()-0.5*hist.slot_width(), cumulative_hist) ax1.set_ylim(0, 1) ax2.plot(hist.slot_centers()[:-1]-0.5*hist.slot_width(), gradients) ymin, ymax = pyplot.ylim() pyplot.vlines(cutoff, ymin, ymax, color='r') pyplot.show() H2 = H.copy() if cutoff is not None: H2[np.where(H2 >= cutoff)] = 0 fig, ax1 = pyplot.subplots() plot1 = ax1.pcolormesh(xedges, yedges, H2) pyplot.xlim((0, pyplot.xlim()[1])) pyplot.ylim((0, pyplot.ylim()[1])) pyplot.gca().invert_yaxis() cbar1 = pyplot.colorbar(plot1) pyplot.axes().set_aspect('equal') pyplot.show() return flags
def __init__ (self, cmd_list, nprocs=1, out=sys.stdout, log=None, verbosity=DEFAULT_VERBOSITY, output_junit_xml=False) : if (log is None) : log = null_out() self.out = multi_out() self.log = log self.out.register("stdout", out) self.out.register("log", log) self.verbosity = verbosity self.quiet = (verbosity == 0) self.results = [] # Filter cmd list for duplicates. self.cmd_list = [] for cmd in cmd_list : if (not cmd in self.cmd_list) : self.cmd_list.append(cmd) else : print >> self.out, "Test %s repeated, skipping"%cmd # Set number of processors. if (nprocs is Auto) : nprocs = cpu_count() nprocs = min(nprocs, len(self.cmd_list)) # Starting summary. if (self.verbosity > 0) : print >> self.out, "Running %d tests on %s processors:"%(len(self.cmd_list), nprocs) for cmd in self.cmd_list: print >> self.out, " %s"%cmd print >> self.out, "" t_start = time.time() if nprocs > 1: # Run the tests with multiprocessing pool. pool = Pool(processes=nprocs) for command in self.cmd_list: pool.apply_async( run_command, [command, verbosity, out], callback=self.save_result) try: pool.close() except KeyboardInterrupt: print >> self.out, "Caught KeyboardInterrupt, terminating" pool.terminate() finally: pool.join() else: # Run tests serially. for command in self.cmd_list: rc = run_command(command, verbosity=verbosity, out=out) self.save_result(rc) # Print ending summary. t_end = time.time() print >> self.out, "="*80 print >> self.out, "" print >> self.out, "Tests finished. Elapsed time: %.2fs" %(t_end-t_start) print >> self.out, "" extra_stderr = 0 test_cases = [] # Process results for errors and warnings. extra_stderr = len([result for result in self.results if result.stderr_lines]) longjobs = [result for result in self.results if result.wall_time > MAX_TIME] warnings = [result for result in self.results if self.check_alert(result) == 1] failures = [result for result in self.results if self.check_alert(result) == 2] self.finished = len(self.results) self.failure = len(failures) self.warning = len(warnings) # Output JUnit XML if output_junit_xml: from junit_xml import TestSuite, TestCase for result in self.results: tc = TestCase(name=result.command, classname=result.command, elapsed_sec=result.wall_time, stdout='\n'.join(result.stdout_lines), stderr='\n'.join(result.stderr_lines)) if result.return_code != 0: tc.add_failure_info(message='exit code %d' %result.return_code) #if len(result.stderr_lines): #tc.add_error_info(output='\n'.join(result.stderr_lines)) test_cases.append(tc) ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases) with open('output.xml', 'wb') as f: print >> f, TestSuite.to_xml_string([ts], prettyprint=True) # Run time distribution. if (libtbx.env.has_module("scitbx")) : from scitbx.array_family import flex print >> self.out, "Distribution of test runtimes:" hist = flex.histogram(flex.double([result.wall_time for result in self.results]), n_slots=10) hist.show(f=self.out, prefix=" ", format_cutoffs="%.1fs") print >> self.out, "" # Long job warning. if longjobs: print >> self.out, "" print >> self.out, "Warning: the following jobs took at least %d seconds:"%MAX_TIME for result in sorted(longjobs, key=lambda result:result.wall_time): print >> self.out, " %s: %.1fs"%(result.command, result.wall_time) print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests." # Failures. if failures: print >> self.out, "" print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:" print >> self.out, "" for result in warnings: self.display_result(result, alert=1, out=self.out, log_return=self.out, log_stderr=self.out) for result in failures: self.display_result(result, alert=2, out=self.out, log_return=self.out, log_stderr=self.out) print >> self.out, "" print >> self.out, "Please verify these tests manually." print >> self.out, "" # Summary print >> self.out, "Summary:" print >> self.out, " Tests run :",self.finished print >> self.out, " Failures :",self.failure print >> self.out, " Warnings (possible failures) :",self.warning print >> self.out, " Stderr output (discouraged) :",extra_stderr if (self.finished != len(self.cmd_list)) : print >> self.out, "*" * 80 print >> self.out, " WARNING: NOT ALL TESTS FINISHED!" print >> self.out, "*" * 80