Пример #1
0
def analyze_distances(self, params, pdb_hierarchy=None, log=sys.stdout):
    assert 0  # Not used anywhere?
    atoms = None
    if params.verbose:
        assert pdb_hierarchy is not None
        atoms = pdb_hierarchy.atoms()
    remove_outliers = params.secondary_structure.protein.remove_outliers
    atoms = pdb_hierarchy.atoms()
    hist = flex.histogram(self.bond_lengths, 10)
    print >> log, "  Distribution of hydrogen bond lengths without filtering:"
    hist.show(f=log, prefix="    ", format_cutoffs="%.4f")
    print >> log, ""
    if not remove_outliers:
        return False
    for i, distance in enumerate(self.bond_lengths):
        if distance > distance_max:
            self.flag_use_bond[i] = False
            if params.verbose:
                print >> log, "Excluding H-bond with length %.3fA" % distance
                i_seq, j_seq = self.bonds[i]
                print >> log, "  %s" % atoms[i_seq].fetch_labels().id_str()
                print >> log, "  %s" % atoms[j_seq].fetch_labels().id_str()
    print >> log, "  After filtering: %d bonds remaining." % \
      self.flag_use_bond.count(True)
    print >> log, "  Distribution of hydrogen bond lengths after applying cutoff:"
    hist = flex.histogram(self.bond_lengths.select(self.flag_use_bond), 10)
    hist.show(f=log, prefix="    ", format_cutoffs="%.4f")
    print >> log, ""
    return True
Пример #2
0
def analyze_distances (self, params, pdb_hierarchy=None, log=sys.stdout) :
  assert 0 # Not used anywhere?
  atoms = None
  if params.verbose :
    assert pdb_hierarchy is not None
    atoms = pdb_hierarchy.atoms()
  remove_outliers = params.secondary_structure.protein.remove_outliers
  atoms = pdb_hierarchy.atoms()
  hist =  flex.histogram(self.bond_lengths, 10)
  print >> log, "  Distribution of hydrogen bond lengths without filtering:"
  hist.show(f=log, prefix="    ", format_cutoffs="%.4f")
  print >> log, ""
  if not remove_outliers :
    return False
  for i, distance in enumerate(self.bond_lengths) :
    if distance > distance_max :
      self.flag_use_bond[i] = False
      if params.verbose :
        print >> log, "Excluding H-bond with length %.3fA" % distance
        i_seq, j_seq = self.bonds[i]
        print >> log, "  %s" % atoms[i_seq].fetch_labels().id_str()
        print >> log, "  %s" % atoms[j_seq].fetch_labels().id_str()
  print >> log, "  After filtering: %d bonds remaining." % \
    self.flag_use_bond.count(True)
  print >> log, "  Distribution of hydrogen bond lengths after applying cutoff:"
  hist = flex.histogram(self.bond_lengths.select(self.flag_use_bond), 10)
  hist.show(f=log, prefix="    ", format_cutoffs="%.4f")
  print >> log, ""
  return True
Пример #3
0
  def process_image(process):
    import sys
    last_update = start = timeit.default_timer()

    i = process
    if use_python_counter:
      local_hist = Counter()
    else:
      local_hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins)

    max_images = image_count // nproc
    if process >= image_count % nproc:
      max_images += 1
    while i < image_count:
      data = read_cbf_image(image_list[i])
      if not use_python_counter:
        data = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins)
      local_hist.update(data)
      i = i + nproc
      if process == 0:
        if timeit.default_timer() > (last_update + 3):
          last_update = timeit.default_timer()
          if sys.stdout.isatty():
            sys.stdout.write('\033[A')
          print 'Processed %d%% (%d seconds remain)    ' % (100 * i // image_count, round((image_count - i) * (last_update - start) / (i+1)))
    return local_hist
Пример #4
0
def get_map_histograms(data, n_slots=20, data_1=None, data_2=None):
    h0, h1, h2 = None, None, None
    data_min = None
    hmhcc = None
    if (data_1 is None):
        h0 = flex.histogram(data=data.as_1d(), n_slots=n_slots)
    else:
        data_min = min(flex.min(data_1), flex.min(data_2))
        data_max = max(flex.max(data_1), flex.max(data_2))
        h0 = flex.histogram(data=data.as_1d(), n_slots=n_slots)
        h1 = flex.histogram(data=data_1.as_1d(),
                            data_min=data_min,
                            data_max=data_max,
                            n_slots=n_slots)
        h2 = flex.histogram(data=data_2.as_1d(),
                            data_min=data_min,
                            data_max=data_max,
                            n_slots=n_slots)
        hmhcc = flex.linear_correlation(
            x=h1.slots().as_double(), y=h2.slots().as_double()).coefficient()
    return group_args(h_map=h0,
                      h_half_map_1=h1,
                      h_half_map_2=h2,
                      _data_min=data_min,
                      half_map_histogram_cc=hmhcc)
Пример #5
0
    def process_image(process):
        last_update = start = timeit.default_timer()

        i = process
        if use_python_counter:
            local_hist = Counter()
        else:
            local_hist = flex.histogram(flex.double(),
                                        data_min=0.0,
                                        data_max=histmax,
                                        n_slots=histbins)

        max_images = image_count // nproc
        if process >= image_count % nproc:
            max_images += 1
        while i < image_count:
            data = read_cbf_image(image_list[i])
            if not use_python_counter:
                data = flex.histogram(data.as_double().as_1d(),
                                      data_min=0.0,
                                      data_max=histmax,
                                      n_slots=histbins)
            local_hist.update(data)
            i = i + nproc
            if process == 0:
                if timeit.default_timer() > (last_update + 3):
                    last_update = timeit.default_timer()
                    if sys.stdout.isatty():
                        sys.stdout.write('\033[A')
                    print 'Processed %d%% (%d seconds remain)    ' % (
                        100 * i // image_count,
                        round((image_count - i) * (last_update - start) /
                              (i + 1)))
        return local_hist
Пример #6
0
def process_file(file_object, n_slots, data_min, data_max, format_cutoffs):
  data = flex.double()
  for line in file_object.read().splitlines():
    data.append(float(line))
  print "total number of data points:", data.size()
  if (data_min is None): data_min = flex.min(data)
  if (data_max is None): data_max = flex.max(data)
  flex.histogram(
    data=data, n_slots=n_slots, data_min=data_min, data_max=data_max).show(
      format_cutoffs=format_cutoffs)
Пример #7
0
def process_file(file_object, n_slots, data_min, data_max, format_cutoffs):
  data = flex.double()
  for line in file_object.read().splitlines():
    data.append(float(line))
  print("total number of data points:", data.size())
  if (data_min is None): data_min = flex.min(data)
  if (data_max is None): data_max = flex.max(data)
  flex.histogram(
    data=data, n_slots=n_slots, data_min=data_min, data_max=data_max).show(
      format_cutoffs=format_cutoffs)
Пример #8
0
def plot_centroid_weights_histograms(reflections, n_slots=50):
    from matplotlib import pyplot
    from scitbx.array_family import flex

    variances = flex.vec3_double([r.centroid_variance for r in reflections])
    vx, vy, vz = variances.parts()
    idx = (vx > 0).__and__(vy > 0).__and__(vz > 0)
    vx = vx.select(idx)
    vy = vy.select(idx)
    vz = vz.select(idx)
    wx = 1 / vx
    wy = 1 / vy
    wz = 1 / vz
    wx = flex.log(wx)
    wy = flex.log(wy)
    wz = flex.log(wz)
    hx = flex.histogram(wx, n_slots=n_slots)
    hy = flex.histogram(wy, n_slots=n_slots)
    hz = flex.histogram(wz, n_slots=n_slots)
    fig = pyplot.figure()

    idx2 = flex.max_index(wx)
    idx3 = flex.int(range(len(reflections))).select(idx)[idx2]
    print(reflections[idx3])
    return

    # outliers = reflections.select(wx > 50)
    # for refl in outliers:
    # print refl

    for i, h in enumerate([hx, hy, hz]):
        ax = fig.add_subplot(311 + i)

        slots = h.slots().as_double()
        bins, data = hist_outline(h)
        log_scale = True
        if log_scale:
            data.set_selected(
                data == 0, 0.1
            )  # otherwise lines don't get drawn when we have some empty bins
            ax.set_yscale("log")
        ax.plot(bins, data, "-k", linewidth=2)
        # pyplot.suptitle(title)
        data_min = min(
            [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
        data_max = max(
            [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
        ax.set_xlim(data_min, data_max + h.slot_width())
    pyplot.show()
Пример #9
0
def build_hist():
  from scitbx.array_family import flex

  if len(sys.argv) == 2 and sys.argv[1].endswith('.json'):
    from dxtbx import datablock
    db = datablock.DataBlockFactory.from_json_file(sys.argv[1])[0]
    image_list = db.extract_imagesets()[0].paths()
  else:
    image_list = sys.argv[1:]
  image_count = len(image_list)

  # Faster, yet still less than ideal and wasting a lot of resources.
  limit = get_overload(image_list[0])
  binfactor = 5 # register up to 500% counts
  histmax = (limit * binfactor) + 0.0
  histbins = (limit * binfactor) + 1
  hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins)

  print "Processing %d images" % image_count
  start = timeit.default_timer()
  last_update = start

#  image_maxima = [None] * image_count

  for i in range(image_count):
    data = read_cbf_image(image_list[i])
    tmp_hist = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins)
#    image_max = histmax
#    for b in reversed(tmp_hist.slots()):
#      if b != 0:
#        image_maxima[i] = int(image_max)
#        break
#      image_max -= 1
    hist.update(tmp_hist)
    if timeit.default_timer() > (last_update + 3):
      last_update = timeit.default_timer()
      if sys.stdout.isatty():
        sys.stdout.write('\033[A')
      print 'Processed %d of %d images (%d seconds remain)    ' % (i+1, image_count, round((image_count - i) * (last_update - start) / (i+1)))
#  print image_maxima
  results = { 'scale_factor': 1 / limit,
              'bin_count': histbins,
              'bins': list(hist.slots()),
              'image_files': image_list }

  print "Writing results to overload.json"
  with open('overload.json', 'w') as fh:
    json.dump(results, fh)
Пример #10
0
def plot_number_of_crystals(experiments):
    image_to_expts = {}
    for expt in experiments:
        img = expt.imageset.get_image_identifier(0)
        image_to_expts.setdefault(img, [])
        image_to_expts[img].append(expt)

    n_crystals_per_image = flex.int(len(expts) for expts in image_to_expts.values())
    nmax = flex.max(n_crystals_per_image)
    hist = flex.histogram(n_crystals_per_image.as_double(), 0, nmax, n_slots=nmax)
    # hist.show()
    from matplotlib import pyplot as plt

    plt.style.use("ggplot")
    plt.bar(
        hist.slot_centers(),
        hist.slots(),
        align="center",
        width=hist.slot_width(),
        zorder=10,
        color="black",
        edgecolor=None,
    )
    plt.savefig("n_crystals_hist.png")
    plt.clf()
Пример #11
0
def filter_histogram_of_key_value(database_dict,
                                  key,
                                  max_reject_fraction,
                                  edge_tolerance_small=1.e-4,
                                  n_slots=3):
    values = database_dict[key]
    new_values = convert_to_numeric(values=values)
    #print flex.min(new_values), flex.max(new_values)
    size = new_values.size()
    #print size
    if (size == 0): return
    while True:
        values = database_dict[key]
        new_values = convert_to_numeric(values=values)
        selection = flex.bool(new_values.size(), True)
        histogram = flex.histogram(data=new_values, n_slots=n_slots)
        l = histogram.data_min()
        for i, s in enumerate(histogram.slots()):
            r = histogram.data_min() + histogram.slot_width() * (i + 1)
            r = r + edge_tolerance_small
            l = max(0, l - edge_tolerance_small)
            #print "%8.4f %8.4f %d" % (l, r, s)
            if (s < size * max_reject_fraction):
                selection &= ~((new_values >= l) & (new_values <= r))
            l = r
        #print
        leave, remove = selection.count(True), selection.count(False)
        #print leave, remove
        if (remove == 0): break
        if (size - leave > int(size * max_reject_fraction)): break
        database_dict = select_dict(database_dict=database_dict,
                                    selection=selection)
    return database_dict
Пример #12
0
 def show(self,
       sites_cart,
       n_slots_difference_histogram=6,
       out=None,
       prefix=""):
   if (out is None): out = sys.stdout
   selection_strings = self.group.selection_strings
   for i_op,pair,mx,rms in zip(
         count(1),
         self.group.selection_pairs,
         self.matrices,
         self.rms):
     print >> out, prefix + "NCS operator %d:" % i_op
     print >> out, prefix + "  Reference selection:", \
       show_string(selection_strings[0])
     print >> out, prefix + "      Other selection:", \
       show_string(selection_strings[i_op])
     print >> out, prefix + "  Number of atom pairs:", len(pair[0])
     print >> out, mx.r.mathematica_form(
       label="Rotation", format="%.6g", one_row_per_line=True,
       prefix=prefix+"  ")
     print >> out, mx.t.mathematica_form(
       label="Translation", format="%.6g", prefix=prefix+"  ")
     x = sites_cart.select(pair[0])
     y = mx * sites_cart.select(pair[1])
     d_sq = (x-y).dot()
     if (n_slots_difference_histogram is not None):
       print >> out, prefix + "  Histogram of differences:"
       diff_histogram = flex.histogram(
         data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram)
       diff_histogram.show(
         f=out, prefix=prefix+"    ", format_cutoffs="%8.6f")
     print >> out, \
       prefix + "  RMS difference with respect to the reference: %8.6f" %(rms)
Пример #13
0
def filter_histogram_of_key_value(database_dict, key, max_reject_fraction,
                                  edge_tolerance_small = 1.e-4, n_slots = 3):
  values = database_dict[key]
  new_values = convert_to_numeric(values = values)
  #print flex.min(new_values), flex.max(new_values)
  size = new_values.size()
  #print size
  if(size == 0): return
  while True:
    values = database_dict[key]
    new_values = convert_to_numeric(values = values)
    selection = flex.bool(new_values.size(), True)
    histogram = flex.histogram(data = new_values, n_slots = n_slots)
    l = histogram.data_min()
    for i, s in enumerate(histogram.slots()):
      r = histogram.data_min() + histogram.slot_width() * (i+1)
      r = r+edge_tolerance_small
      l = max(0, l-edge_tolerance_small)
      #print "%8.4f %8.4f %d" % (l, r, s)
      if(s < size * max_reject_fraction):
         selection &= ~((new_values >= l) & (new_values <= r))
      l = r
    #print
    leave, remove = selection.count(True), selection.count(False)
    #print leave, remove
    if(remove == 0): break
    if(size - leave > int(size * max_reject_fraction)): break
    database_dict = select_dict(database_dict = database_dict,
                                selection     = selection)
  return database_dict
Пример #14
0
 def show(self,
          sites_cart,
          n_slots_difference_histogram=6,
          out=None,
          prefix=""):
     if (out is None): out = sys.stdout
     selection_strings = self.group.selection_strings
     for i_op, pair, mx, rms in zip(count(1), self.group.selection_pairs,
                                    self.matrices, self.rms):
         print >> out, prefix + "NCS operator %d:" % i_op
         print >> out, prefix + "  Reference selection:", \
           show_string(selection_strings[0])
         print >> out, prefix + "      Other selection:", \
           show_string(selection_strings[i_op])
         print >> out, prefix + "  Number of atom pairs:", len(pair[0])
         print >> out, mx.r.mathematica_form(label="Rotation",
                                             format="%.6g",
                                             one_row_per_line=True,
                                             prefix=prefix + "  ")
         print >> out, mx.t.mathematica_form(label="Translation",
                                             format="%.6g",
                                             prefix=prefix + "  ")
         x = sites_cart.select(pair[0])
         y = mx * sites_cart.select(pair[1])
         d_sq = (x - y).dot()
         if (n_slots_difference_histogram is not None):
             print >> out, prefix + "  Histogram of differences:"
             diff_histogram = flex.histogram(
                 data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram)
             diff_histogram.show(f=out,
                                 prefix=prefix + "    ",
                                 format_cutoffs="%8.6f")
         print >> out, \
           prefix + "  RMS difference with respect to the reference: %8.6f" %(rms)
def get_mean_statistic_for_resolution (d_min, stat_type, range=0.2, out=None) :
  if (out is None) :
    out = sys.stdout
  from scitbx.array_family import flex
  pkl_file = libtbx.env.find_in_repositories(
    relative_path = "chem_data/polygon_data/all_mvd.pickle",
    test = os.path.isfile)
  db = easy_pickle.load(pkl_file)
  all_d_min = db['high_resolution']
  stat_values = db[stat_type]
  values_for_range = flex.double()
  for (d_, v_) in zip(all_d_min, stat_values) :
    try :
      d = float(d_)
      v = float(v_)
    except ValueError : continue
    else :
      if (d > (d_min - range)) and (d < (d_min + range)) :
        values_for_range.append(v)
  h = flex.histogram(values_for_range, n_slots=10)
  print >> out, "  %s for d_min = %.3f - %.3f A" % (stat_names[stat_type], d_min-range,
    d_min+range)
  min = flex.min(values_for_range)
  max = flex.max(values_for_range)
  mean = flex.mean(values_for_range)
  print >> out, "    count: %d" % values_for_range.size()
  print >> out, "    min: %.2f" % min
  print >> out, "    max: %.2f" % max
  print >> out, "    mean: %.2f" % mean
  print >> out, "    histogram of values:"
  h.show(prefix="      ")
  return mean
Пример #16
0
def get_mean_statistic_for_resolution(d_min, stat_type, range=0.2, out=None):
    if (out is None):
        out = sys.stdout
    from scitbx.array_family import flex
    pkl_file = libtbx.env.find_in_repositories(
        relative_path="chem_data/polygon_data/all_mvd.pickle",
        test=os.path.isfile)
    db = easy_pickle.load(pkl_file)
    all_d_min = db['high_resolution']
    stat_values = db[stat_type]
    values_for_range = flex.double()
    for (d_, v_) in zip(all_d_min, stat_values):
        try:
            d = float(d_)
            v = float(v_)
        except ValueError:
            continue
        else:
            if (d > (d_min - range)) and (d < (d_min + range)):
                values_for_range.append(v)
    h = flex.histogram(values_for_range, n_slots=10)
    print >> out, "  %s for d_min = %.3f - %.3f A" % (
        stat_names[stat_type], d_min - range, d_min + range)
    min = flex.min(values_for_range)
    max = flex.max(values_for_range)
    mean = flex.mean(values_for_range)
    print >> out, "    count: %d" % values_for_range.size()
    print >> out, "    min: %.2f" % min
    print >> out, "    max: %.2f" % max
    print >> out, "    mean: %.2f" % mean
    print >> out, "    histogram of values:"
    h.show(prefix="      ")
    return mean
Пример #17
0
def get_map_histograms(data, n_slots=20, data_1=None, data_2=None):
  h0, h1, h2 = None, None, None
  data_min = None
  if(data_1 is None):
    h0 = flex.histogram(data = data.as_1d(), n_slots = n_slots)
  else:
    data_min = min(flex.min(data), flex.min(data_1), flex.min(data_2))
    data_max = max(flex.max(data), flex.max(data_1), flex.max(data_2))
    h0 = flex.histogram(data = data.as_1d(), data_min=data_min,
      data_max=data_max, n_slots = n_slots)
    h1 = flex.histogram(data = data_1.as_1d(), data_min=data_min,
      data_max=data_max, n_slots = n_slots)
    h2 = flex.histogram(data = data_2.as_1d(), data_min=data_min,
      data_max=data_max, n_slots = n_slots)
  return group_args(h_map = h0, h_half_map_1 = h1, h_half_map_2 = h2,
    _data_min = data_min)
Пример #18
0
def run():
    m = any_reflection_file(sys.argv[1]).file_content()
    sg = m.space_group()
    mi = m.extract_miller_indices()
    mas = m.as_miller_arrays()

    absent = flex.bool([sg.is_sys_absent(indx) for indx in mi])

    intensities = None
    for ma in mas:
        if ma.is_xray_intensity_array():
            intensities = ma
            break

    assert intensities, "intensity data not found in %s" % sys.argv[1]

    print("Removing %d absent reflections" % absent.count(True))
    intensities = intensities.select(~absent)

    i_over_sig = intensities.data() / intensities.sigmas()
    hist = flex.histogram(i_over_sig, n_slots=50)

    print("I/sig(I)  N")

    for centre, value in zip(hist.slot_centers(), hist.slots()):
        print("%5.1f %d" % (centre, value))
Пример #19
0
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss):
    prf_sel = reflections.get_flags(reflections.flags.integrated_prf)
    if prf_sel.count(True) > 0:
        reflections = reflections.select(prf_sel)
        intensities = reflections["intensity.prf.value"]
        variances = reflections["intensity.prf.variance"]
    else:
        sum_sel = reflections.get_flags(reflections.flags.integrated_sum)
        reflections = reflections.select(sum_sel)
        intensities = reflections["intensity.sum.value"]
        variances = reflections["intensity.sum.variance"]

    i_sigi = intensities / flex.sqrt(variances)

    xyz_px = reflections["xyzobs.px.value"]
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    phi_min = flex.min(phi)
    phi_max = flex.max(phi)
    n_steps = iceil((phi_max - phi_min) / phi_step)

    hist = flex.histogram(z_px, n_slots=n_steps)

    mean_i_sigi = flex.double()
    for i, slot_info in enumerate(hist.slot_infos()):
        sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff)
        if sel.count(True) == 0:
            mean_i_sigi.append(0)
        else:
            mean_i_sigi.append(flex.mean(i_sigi.select(sel)))
    fractional_mean_i_sigi = mean_i_sigi / flex.max(mean_i_sigi)

    potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi))

    xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()])

    d = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(mean_i_sigi),
                "xlow": xmin,
                "xhigh": xmax,
                "blank": list(potential_blank_sel),
                "type": "bar",
                "name": "blank_counts_analysis",
            }
        ],
        "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0},
    }

    blank_regions = blank_regions_from_sel(d["data"][0])
    d["blank_regions"] = blank_regions

    return d
Пример #20
0
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss):
    if not len(reflections):
        raise Sorry("Input contains no reflections")

    xyz_px = reflections["xyzobs.px.value"]
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    array_range = scan.get_array_range()
    phi_min = scan.get_angle_from_array_index(array_range[0])
    phi_max = scan.get_angle_from_array_index(array_range[1])
    assert phi_min <= flex.min(phi)
    assert phi_max >= flex.max(phi)
    n_steps = int(round((phi_max - phi_min) / phi_step))
    hist = flex.histogram(
        z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps
    )
    logger.debug("Histogram:")
    logger.debug(hist.as_str())

    counts = hist.slots()
    fractional_counts = counts.as_double() / flex.max(counts)

    potential_blank_sel = fractional_counts <= fractional_loss

    xmin, xmax = zip(
        *[
            (slot_info.low_cutoff, slot_info.high_cutoff)
            for slot_info in hist.slot_infos()
        ]
    )

    d = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "xlow": xmin,
                "xhigh": xmax,
                "blank": list(potential_blank_sel),
                "type": "bar",
                "name": "blank_counts_analysis",
            }
        ],
        "layout": {
            "xaxis": {"title": "z observed (images)"},
            "yaxis": {"title": "Number of reflections"},
            "bargap": 0,
        },
    }

    blank_regions = blank_regions_from_sel(d["data"][0])
    d["blank_regions"] = blank_regions

    return d
Пример #21
0
def plot_uc_vs_detector_distance(
    uc_params,
    panel_distances,
    outliers,
    steps_per_angstrom=20,
    filename="uc_vs_distance.png",
):
    from matplotlib import pyplot as plt

    plt.style.use("ggplot")
    f = plt.figure(figsize=(12, 8))
    ax1 = plt.subplot2grid((2, 3), (0, 0))
    ax2 = plt.subplot2grid((2, 3), (0, 1), sharey=ax1)
    ax3 = plt.subplot2grid((2, 3), (0, 2), sharey=ax1)
    ax4 = plt.subplot2grid((2, 3), (1, 0), colspan=3)
    a, b, c = uc_params[:3]

    def hist2d(p1, p2, ax):
        nbins = 100
        H, xedges, yedges = np.histogram2d(p1, p2, bins=nbins)
        H = np.rot90(H)
        H = np.flipud(H)
        Hmasked = np.ma.masked_where(H == 0, H)
        ax.pcolormesh(xedges, yedges, Hmasked)

    hist2d(a, panel_distances[0], ax1)
    hist2d(b, panel_distances[0], ax2)
    hist2d(c, panel_distances[0], ax3)

    mmm = flex.min_max_mean_double(panel_distances[0])
    steps_per_mm = 20
    Amin = math.floor(mmm.min * steps_per_mm) / steps_per_mm
    Amax = math.floor(mmm.max * steps_per_mm) / steps_per_mm
    n_slots = max(1, int((Amax - Amin) * steps_per_mm))
    if Amin == Amax:
        eps = 0.05
        Amin -= eps
        Amax += eps
    hist = flex.histogram(panel_distances[0], Amin, Amax, n_slots=n_slots)
    ax4.bar(
        hist.slot_centers(),
        hist.slots(),
        align="center",
        width=hist.slot_width(),
        zorder=10,
        color="red",
        edgecolor=None,
        linewidth=0,
    )

    ax1.set_ylabel("Detector distance (mm)")
    ax1.set_xlabel(r"a ($\AA$)")
    ax2.set_xlabel(r"b ($\AA$)")
    ax3.set_xlabel(r"c ($\AA$)")
    ax4.set_xlabel("Detector distance (mm)")

    f.savefig(filename)
    plt.tight_layout()
    f.clf()
Пример #22
0
 def __init__(self, map_1, map_2, sites_cart, unit_cell, radius, n_slots):
     assert_same_gridding(map_1, map_2)
     self.ccs = from_map_map_atoms_per_atom(map_1=map_1,
                                            map_2=map_2,
                                            sites_cart=sites_cart,
                                            unit_cell=unit_cell,
                                            radius=radius)
     self.hist = flex.histogram(data=self.ccs, n_slots=n_slots)
Пример #23
0
 def __init__(self, map_1, map_2, sites_cart, unit_cell, radius, n_slots):
   assert_same_gridding(map_1, map_2)
   self.ccs = from_map_map_atoms_per_atom(
     map_1      = map_1,
     map_2      = map_2,
     sites_cart = sites_cart,
     unit_cell  = unit_cell,
     radius     = radius)
   self.hist = flex.histogram(data = self.ccs, n_slots = n_slots)
Пример #24
0
def plot_centroid_weights_histograms(reflections, n_slots=50):
  from matplotlib import pyplot
  from scitbx.array_family import flex
  variances = flex.vec3_double([r.centroid_variance for r in reflections])
  vx, vy, vz = variances.parts()
  idx = (vx > 0).__and__(vy > 0).__and__(vz > 0)
  vx = vx.select(idx)
  vy = vy.select(idx)
  vz = vz.select(idx)
  wx = 1/vx
  wy = 1/vy
  wz = 1/vz
  wx = flex.log(wx)
  wy = flex.log(wy)
  wz = flex.log(wz)
  hx = flex.histogram(wx, n_slots=n_slots)
  hy = flex.histogram(wy, n_slots=n_slots)
  hz = flex.histogram(wz, n_slots=n_slots)
  fig = pyplot.figure()

  idx2 = flex.max_index(wx)
  idx3 = flex.int(range(len(reflections))).select(idx)[idx2]
  print reflections[idx3]
  return

  #outliers = reflections.select(wx > 50)
  #for refl in outliers:
    #print refl

  for i, h in enumerate([hx, hy, hz]):
    ax = fig.add_subplot(311+i)

    slots = h.slots().as_double()
    bins, data = hist_outline(h)
    log_scale = True
    if log_scale:
      data.set_selected(data == 0, 0.1) # otherwise lines don't get drawn when we have some empty bins
      ax.set_yscale("log")
    ax.plot(bins, data, '-k', linewidth=2)
    #pyplot.suptitle(title)
    data_min = min([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
    data_max = max([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0])
    ax.set_xlim(data_min, data_max+h.slot_width())
  pyplot.show()
Пример #25
0
  def normal_probability_plot(self, data, rankits_sel=None, plot=False):
    """ Use normal probability analysis to determine if a set of data is normally distributed
    See https://en.wikipedia.org/wiki/Normal_probability_plot.
    Rankits are computed in the same way as qqnorm does in R.
    @param data flex array
    @param rankits_sel only use the rankits in a certain range. Useful for outlier rejection. Should be
    a tuple such as (-0.5,0.5).
    @param plot whether to show the normal probabilty plot
    """
    from scitbx.math import distributions
    import numpy as np
    norm = distributions.normal_distribution()

    n = len(data)
    if n <= 10:
      a = 3/8
    else:
      a = 0.5

    sorted_data = flex.sorted(data)
    rankits = flex.double([norm.quantile((i+1-a)/(n+1-(2*a))) for i in range(n)])

    if rankits_sel is None:
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data, rankits)
    else:
      sel = (rankits >= rankits_sel[0]) & (rankits <= rankits_sel[1])
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data.select(sel), rankits.select(sel))

    if plot:
      from matplotlib import pyplot as plt
      f = plt.figure(0)
      lim = -5, 5
      x = np.linspace(lim[0],lim[1],100) # 100 linearly spaced numbers
      y = slope * x + offset
      plt.plot(sorted_data, rankits, '-')
      #plt.plot(x,y)
      plt.title("CC: %.3f Slope: %.3f Offset: %.3f"%(corr, slope, offset))
      plt.xlabel("Sorted data")
      plt.ylabel("Rankits")
      plt.xlim(lim); plt.ylim(lim)
      plt.axes().set_aspect('equal')

      f = plt.figure(1)
      h = flex.histogram(sorted_data, n_slots=100, data_min = lim[0], data_max = lim[1])
      stats = flex.mean_and_variance(sorted_data)
      plt.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-')
      plt.xlim(lim)
      plt.xlabel("Sorted data")
      plt.ylabel("Count")
      plt.title("Normalized data mean: %.3f +/- %.3f"%(stats.mean(), stats.unweighted_sample_standard_deviation()))

      if self.scaler.params.raw_data.error_models.sdfac_refine.plot_refinement_steps:
        plt.ion()
        plt.pause(0.05)

    return corr, slope, offset
Пример #26
0
def show_histogram(data=None, n_slots=None, data_min=None, data_max=None,
                   log=None):
  from cctbx.array_family import flex
  hm = flex.histogram(data = data, n_slots = n_slots, data_min = data_min,
    data_max = data_max)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print >> log, "%10.4f - %-10.4f : %d" % (lc_1, hc_1, n_1)
    lc_1 = hc_1
Пример #27
0
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss):
    if not len(reflections):
        raise Sorry('Input contains no reflections')

    xyz_px = reflections['xyzobs.px.value']
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    array_range = scan.get_array_range()
    phi_min = scan.get_angle_from_array_index(array_range[0])
    phi_max = scan.get_angle_from_array_index(array_range[1])
    assert phi_min <= flex.min(phi)
    assert phi_max >= flex.max(phi)
    n_steps = iceil((phi_max - phi_min) / phi_step)

    hist = flex.histogram(z_px, n_slots=n_steps)

    counts = hist.slots()
    fractional_counts = counts.as_double() / flex.max(counts)

    potential_blank_sel = fractional_counts <= fractional_loss

    xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff)
                       for slot_info in hist.slot_infos()])

    d = {
        'data': [{
            'x': list(hist.slot_centers()),
            'y': list(hist.slots()),
            'xlow': xmin,
            'xhigh': xmax,
            'blank': list(potential_blank_sel),
            'type': 'bar',
            'name': 'blank_counts_analysis'
        }],
        'layout': {
            'xaxis': {
                'title': 'z observed (images)'
            },
            'yaxis': {
                'title': 'Number of reflections'
            },
            'bargap': 0,
        },
    }

    blank_regions = blank_regions_from_sel(d['data'][0])
    d['blank_regions'] = blank_regions

    return d
Пример #28
0
def show_histogram(data, n_slots):
  hm = flex.histogram(data = data, n_slots = n_slots)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  tmp = None
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    #print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1)
    lc_1 = hc_1
    if(tmp is None): tmp = hc_1
  return tmp
Пример #29
0
def show_histogram(data=None, n_slots=None, data_min=None, data_max=None,
                   log=None):
  from cctbx.array_family import flex
  hm = flex.histogram(data = data, n_slots = n_slots, data_min = data_min,
    data_max = data_max)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print >> log, "%10.4f - %-10.4f : %d" % (lc_1, hc_1, n_1)
    lc_1 = hc_1
Пример #30
0
    def __init__(self, rs_vectors, percentile=0.05):
        from scitbx.array_family import flex
        NEAR = 10
        self.NNBIN = 5  # target number of neighbors per histogram bin

        # nearest neighbor analysis
        from annlib_ext import AnnAdaptor
        query = flex.double()
        for spot in rs_vectors:  # spots, in reciprocal space xyz
            query.append(spot[0])
            query.append(spot[1])
            query.append(spot[2])

        assert len(
            rs_vectors) > NEAR  # Can't do nearest neighbor with too few spots

        IS_adapt = AnnAdaptor(data=query, dim=3, k=1)
        IS_adapt.query(query)

        direct = flex.double()
        for i in range(len(rs_vectors)):
            direct.append(1.0 / math.sqrt(IS_adapt.distances[i]))

        # determine the most probable nearest neighbor distance (direct space)
        hst = flex.histogram(direct, n_slots=int(len(rs_vectors) / self.NNBIN))
        centers = hst.slot_centers()
        islot = hst.slots()
        highest_bin_height = flex.max(islot)
        most_probable_neighbor = centers[list(islot).index(highest_bin_height)]

        if False:  # to print out the histogramming analysis
            smin, smax = flex.min(direct), flex.max(direct)
            stats = flex.mean_and_variance(direct)
            import sys
            out = sys.stdout
            print("     range:     %6.2f - %.2f" % (smin, smax), file=out)
            print("     mean:      %6.2f +/- %6.2f on N = %d" %
                  (stats.mean(), stats.unweighted_sample_standard_deviation(),
                   direct.size()),
                  file=out)
            hst.show(f=out, prefix="    ", format_cutoffs="%6.2f")
            print("", file=out)

        # determine the 5th-percentile direct-space distance
        perm = flex.sort_permutation(direct, reverse=True)
        percentile = direct[perm[int(percentile * len(rs_vectors))]]

        MAXTOL = 1.5  # Margin of error for max unit cell estimate
        self.max_cell = max(MAXTOL * most_probable_neighbor,
                            MAXTOL * percentile)

        if False:
            self.plot(direct)
Пример #31
0
def multihist(unmerged_mtz):
    from iotbx.reflection_file_reader import any_reflection_file

    reader = any_reflection_file(unmerged_mtz)
    assert reader.file_type() == "ccp4_mtz"
    mtz_object = reader.file_content()
    arrays = reader.as_miller_arrays(merge_equivalents=False)

    for ma in arrays:
        if ma.info().labels == ["I", "SIGI"]:
            intensities = ma
        elif ma.info().labels == ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]:
            intensities = ma

    indices = mtz_object.extract_original_index_miller_indices()
    intensities = intensities.customized_copy(indices=indices,
                                              info=intensities.info())

    intensities = intensities.resolution_filter(d_min=2.0)

    merging = intensities.merge_equivalents()
    multiplicities = merging.redundancies().complete_array(new_data_value=0)
    mult_acentric = multiplicities.select_acentric().data()
    mult_centric = multiplicities.select_centric().data()

    from scitbx.array_family import flex

    max_mult = max(flex.max(mult_acentric), flex.max(mult_centric))

    hist_acentric = flex.histogram(mult_acentric.as_double(),
                                   data_min=0,
                                   data_max=max_mult,
                                   n_slots=max_mult)
    hist_centric = flex.histogram(mult_centric.as_double(),
                                  data_min=0,
                                  data_max=max_mult,
                                  n_slots=max_mult)
    for s, a, c in zip(hist_acentric.slot_centers(), hist_acentric.slots(),
                       hist_centric.slots()):
        print(s, a, c)
Пример #32
0
  def __init__(self, rs_vectors, percentile=0.05):
    from scitbx.array_family import flex
    NEAR = 10
    self.NNBIN = 5 # target number of neighbors per histogram bin

    # nearest neighbor analysis
    from annlib_ext import AnnAdaptor
    query = flex.double()
    for spot in rs_vectors: # spots, in reciprocal space xyz
      query.append(spot[0])
      query.append(spot[1])
      query.append(spot[2])

    assert len(rs_vectors)>NEAR # Can't do nearest neighbor with too few spots

    IS_adapt = AnnAdaptor(data=query,dim=3,k=1)
    IS_adapt.query(query)

    direct = flex.double()
    for i in xrange(len(rs_vectors)):
       direct.append(1.0/math.sqrt(IS_adapt.distances[i]))

    # determine the most probable nearest neighbor distance (direct space)
    hst = flex.histogram(direct, n_slots=int(len(rs_vectors)/self.NNBIN))
    centers = hst.slot_centers()
    islot = hst.slots()
    highest_bin_height = flex.max(islot)
    most_probable_neighbor = centers[list(islot).index(highest_bin_height)]

    if False:  # to print out the histogramming analysis
      smin, smax = flex.min(direct), flex.max(direct)
      stats = flex.mean_and_variance(direct)
      import sys
      out = sys.stdout
      print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
      print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
        stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size())
      hst.show(f=out, prefix="    ", format_cutoffs="%6.2f")
      print >> out, ""

    # determine the 5th-percentile direct-space distance
    perm = flex.sort_permutation(direct, reverse=True)
    percentile = direct[perm[int(percentile * len(rs_vectors))]]

    MAXTOL = 1.5 # Margin of error for max unit cell estimate
    self.max_cell = max( MAXTOL * most_probable_neighbor,
                         MAXTOL * percentile)

    if False:
      self.plot(direct)
Пример #33
0
def show_histogram(data, n_slots, data_min, data_max, log=sys.stdout):
  from cctbx.array_family import flex
  h_data = flex.double()
  hm = flex.histogram(
    data=data, n_slots=n_slots, data_min=data_min, data_max=data_max)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    #print >> log, "%10.5f - %-10.5f : %d" % (lc_1, hc_1, n_1)
    #print >> log, "%10.2f : %d" % ((lc_1+hc_1)/2, n_1)
    print ("%10.2f : %10.4f" % ((lc_1+hc_1)/2, n_1*100./data.size()), file=log)
    lc_1 = hc_1
  return h_data
Пример #34
0
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss):
    if not len(reflections):
        raise Sorry("Input contains no reflections")

    xyz_px = reflections["xyzobs.px.value"]
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    array_range = scan.get_array_range()
    phi_min = scan.get_angle_from_array_index(array_range[0])
    phi_max = scan.get_angle_from_array_index(array_range[1])
    assert phi_min <= flex.min(phi)
    assert phi_max >= flex.max(phi)
    n_steps = iceil((phi_max - phi_min) / phi_step)

    hist = flex.histogram(z_px, n_slots=n_steps)

    counts = hist.slots()
    fractional_counts = counts.as_double() / flex.max(counts)

    potential_blank_sel = fractional_counts <= fractional_loss

    xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()])

    d = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "xlow": xmin,
                "xhigh": xmax,
                "blank": list(potential_blank_sel),
                "type": "bar",
                "name": "blank_counts_analysis",
            }
        ],
        "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0},
    }

    blank_regions = blank_regions_from_sel(d["data"][0])
    d["blank_regions"] = blank_regions

    return d
Пример #35
0
def compare_data_with_model(cif_file, mtz_file):
    from iotbx import cif, mtz
    from scitbx.array_family import flex
    import math
    import random

    # read model, compute Fc, square to F^2
    model = cif.reader(file_path=cif_file).build_crystal_structures()["1"]
    ic = (
        model.structure_factors(anomalous_flag=True, d_min=0.55, algorithm="direct")
        .f_calc()
        .as_intensity_array()
    )

    # read experimental measurements
    m = mtz.object(mtz_file)
    mad = m.as_miller_arrays_dict(merge_equivalents=False)
    idata = mad[("HKL_base", "HKL_base", "I")].as_anomalous_array()
    match = idata.match_indices(ic)

    # pair up, extract to vanilla arrays for easier handling
    pairs = match.pairs()

    icalc = flex.double()
    iobs = flex.double()
    sobs = flex.double()

    for p in pairs:
        iobs.append(idata.data()[p[0]])
        sobs.append(idata.sigmas()[p[0]])
        icalc.append(ic.data()[p[1]])

    # estimate conversion scale - apply F^2
    icalc *= flex.sum(iobs) / flex.sum(icalc)

    d = (iobs - icalc) / sobs

    dh = flex.histogram(d, data_min=-6, data_max=6, n_slots=120)

    m = flex.sum(d) / d.size()
    s = math.sqrt(flex.sum(d * d) / d.size() - m * m)

    # mean and standard deviation -
    print(m, s)
Пример #36
0
def plot_rij_histogram(rij_matrix, key="cosym_rij_histogram"):
    """Plot a histogram of the rij values.

    Args:
      plot_name (str): The file name to save the plot to.
        If this is not defined then the plot is displayed in interactive mode.
    """
    rij = rij_matrix.as_1d()
    rij = rij.select(rij != 0)
    hist = flex.histogram(
        rij,
        data_min=min(-1, flex.min(rij)),
        data_max=max(1, flex.max(rij)),
        n_slots=100,
    )

    d = {
        key: {
            "data": [{
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "type": "bar",
                "name": "Rij histogram",
            }],
            "layout": {
                "title": "Distribution of values in the Rij matrix",
                "xaxis": {
                    "title": "r<sub>ij</sub>"
                },
                "yaxis": {
                    "title": "Frequency"
                },
                "bargap": 0,
            },
            "help":
            """\
A histogram of the values of the Rij matrix of pairwise correlation coefficients. A
unimodal distribution of values may suggest that no indexing ambiguity is evident,
whereas a bimodal distribution can be indicative of the presence of an indexing
ambiguity.
""",
        }
    }
    return d
Пример #37
0
 def recalculate(self):
     from scitbx.array_family import flex
     measurable_only = self.meas_box.GetValue()
     obs_type = self.obs_type.GetStringSelection()
     n_bins = self.n_bins.GetValue()
     d_ano_rel = self._array.bijvoet_ratios(obs_type=obs_type,
                                            measurable_only=measurable_only)
     hist = flex.histogram(d_ano_rel, n_slots=n_bins)
     hist.show(f=sys.stdout)
     if (obs_type == "intensities"):
         x_label = "D_anom(I[hkl]) / I_mean[hkl]"
     else:
         x_label = "D_anom(F[hkl]) / F_mean[hkl]"
     self.show_histogram(data=list(d_ano_rel),
                         n_bins=n_bins,
                         x_label=x_label,
                         y_label="# hkl",
                         title="Bijvoet ratios for %s" % self._array_name,
                         log_scale=self.log_box.GetValue())
Пример #38
0
    def event(self, evt, env):
        """The event() function is called for every L1Accept transition.
    Once self.nshots shots are accumulated, this function turns into
    a nop.

    @param evt Event data object, a configure object
    @param env Environment object
    """

        super(pixel_histograms, self).event(evt, env)

        if (evt.get("skip_event")):
            return

        if self.sigma_scaling:
            flex_cspad_img = self.cspad_img.as_double()
            flex_cspad_img_sel = flex_cspad_img.as_1d().select(
                self.dark_mask.as_1d())
            flex_dark_stddev = self.dark_stddev.select(
                self.dark_mask.as_1d()).as_double()
            assert flex_dark_stddev.count(0) == 0
            flex_cspad_img_sel /= flex_dark_stddev
            flex_cspad_img.as_1d().set_selected(
                self.dark_mask.as_1d().iselection(), flex_cspad_img_sel)
            self.cspad_img = flex_cspad_img.iround()

        pixels = self.cspad_img.deep_copy()
        dimensions = pixels.all()
        if self.roi is None:
            self.roi = (0, dimensions[1], 0, dimensions[0])

        for i in range(self.roi[2], self.roi[3]):
            for j in range(self.roi[0], self.roi[1]):
                if (i, j) not in self.histograms:
                    self.histograms[(i, j)] = flex.histogram(
                        flex.double(), self.hist_min, self.hist_max,
                        self.n_slots)
                self.histograms[(i, j)].update(pixels[i, j])

        self.nmemb += 1
        if 0 and math.log(self.nmemb, 2) % 1 == 0:
            self.endjob(env)
        print self.nmemb
Пример #39
0
def show_histogram(data, n_slots, smooth=True):
    triplets = []
    histogram = flex.histogram(data=data, n_slots=n_slots)
    l = histogram.data_min()
    for i, s in enumerate(histogram.slots()):
        r = histogram.data_min() + histogram.slot_width() * (i + 1)
        triplets.append([l, r, s])
        print "%8.4f %8.4f %d" % (l, r, s)
        l = r
    if (smooth):
        print "... smooth histogram"
        triplets_smooth = []
        for i, t in enumerate(triplets):
            values = flex.double()
            for j in [-1, 0, 1]:
                if (i + j >= 0 and i + j < len(triplets)):
                    values.append(float(triplets[i + j][2]))
            triplets_smooth.append((t[0], t[1], flex.mean(values)))
        for t in triplets_smooth:
            print "%8.4f %8.4f %d" % (t[0], t[1], int("%.0f" % t[2]))
    return histogram
 def recalculate (self) :
   from scitbx.array_family import flex
   measurable_only = self.meas_box.GetValue()
   obs_type = self.obs_type.GetStringSelection()
   n_bins = self.n_bins.GetValue()
   d_ano_rel = self._array.bijvoet_ratios(
     obs_type=obs_type,
     measurable_only=measurable_only)
   hist = flex.histogram(d_ano_rel, n_slots=n_bins)
   hist.show(f=sys.stdout)
   if (obs_type == "intensities") :
     x_label = "D_anom(I[hkl]) / I_mean[hkl]"
   else :
     x_label = "D_anom(F[hkl]) / F_mean[hkl]"
   self.show_histogram(
     data=list(d_ano_rel),
     n_bins=n_bins,
     x_label=x_label,
     y_label="# hkl",
     title="Bijvoet ratios for %s" % self._array_name,
     log_scale=self.log_box.GetValue())
Пример #41
0
def show_histogram(data, n_slots, smooth = True):
  triplets = []
  histogram = flex.histogram(data = data, n_slots = n_slots)
  l = histogram.data_min()
  for i, s in enumerate(histogram.slots()):
    r = histogram.data_min() + histogram.slot_width() * (i+1)
    triplets.append( [l, r, s] )
    print "%8.4f %8.4f %d" % (l, r, s)
    l = r
  if(smooth):
    print "... smooth histogram"
    triplets_smooth = []
    for i, t in enumerate(triplets):
      values = flex.double()
      for j in [-1,0,1]:
        if(i+j >=0 and i+j < len(triplets)):
          values.append(float(triplets[i+j][2]))
      triplets_smooth.append((t[0],t[1],flex.mean(values)))
    for t in triplets_smooth:
      print "%8.4f %8.4f %d" % (t[0], t[1], int("%.0f"%t[2]))
  return histogram
Пример #42
0
  def event(self, evt, env):
    """The event() function is called for every L1Accept transition.
    Once self.nshots shots are accumulated, this function turns into
    a nop.

    @param evt Event data object, a configure object
    @param env Environment object
    """

    super(pixel_histograms, self).event(evt, env)

    if (evt.get("skip_event")):
      return

    if self.sigma_scaling:
      flex_cspad_img = self.cspad_img.as_double()
      flex_cspad_img_sel = flex_cspad_img.as_1d().select(self.dark_mask.as_1d())
      flex_dark_stddev = self.dark_stddev.select(self.dark_mask.as_1d()).as_double()
      assert flex_dark_stddev.count(0) == 0
      flex_cspad_img_sel /= flex_dark_stddev
      flex_cspad_img.as_1d().set_selected(self.dark_mask.as_1d().iselection(), flex_cspad_img_sel)
      self.cspad_img = flex_cspad_img.iround()

    pixels = self.cspad_img.deep_copy()
    dimensions = pixels.all()
    if self.roi is None:
      self.roi = (0, dimensions[1], 0, dimensions[0])

    for i in range(self.roi[2], self.roi[3]):
      for j in range(self.roi[0], self.roi[1]):
        if (i,j) not in self.histograms:
          self.histograms[(i,j)] = flex.histogram(flex.double(), self.hist_min, self.hist_max, self.n_slots)
        self.histograms[(i,j)].update(pixels[i,j])

    self.nmemb += 1
    if 0 and math.log(self.nmemb, 2) % 1 == 0:
      self.endjob(env)
    print self.nmemb
Пример #43
0
def plot_rij_histogram(rij_matrix, key="cosym_rij_histogram"):
    """Plot a histogram of the rij values.

  Args:
    plot_name (str): The file name to save the plot to.
      If this is not defined then the plot is displayed in interactive mode.

  """
    rij = rij_matrix.as_1d()
    rij = rij.select(rij != 0)
    hist = flex.histogram(
        rij,
        data_min=min(-1, flex.min(rij)),
        data_max=max(1, flex.max(rij)),
        n_slots=100,
    )

    d = {
        key: {
            "data": [{
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "type": "bar",
                "name": "Rij histogram",
            }],
            "layout": {
                "title": "Distribution of values in the Rij matrix",
                "xaxis": {
                    "title": "r<sub>ij</sub>"
                },
                "yaxis": {
                    "title": "Frequency"
                },
                "bargap": 0,
            },
        }
    }
    return d
Пример #44
0
def centroidify(width, shift, count):
    g = variate(normal_distribution(mean=shift, sigma=width))
    values = flex.double([next(g) for c in range(count)])
    hist = flex.histogram(data=values, n_slots=20, data_min=-10, data_max=10)
    true_mean = flex.sum(values) / values.size()
    true_variance = sum([(v - true_mean)**2
                         for v in values]) / (values.size() - 1)
    total = 1.0 * flex.sum(hist.slots())

    hist_mean = sum([c * v for c, v in zip(hist.slot_centers(), hist.slots())
                     ]) / total

    # equation 6
    hist_var = sum([(v / total)**2 * (1.0 / 12.0) for v in hist.slots()])

    # print input setings
    print("%8.5f %4.1f %4d" % (width**2 / count, shift, count), end=" ")

    # true variance / mean of distribution
    print("%6.3f %8.5f" % (true_mean, true_variance / values.size()), end=" ")

    # putative values of same derived from histogram
    print("%6.3f %8.5f" % (hist_mean, hist_var))
Пример #45
0
    def __init__(self,
                 cmd_list,
                 nprocs=1,
                 out=sys.stdout,
                 log=None,
                 verbosity=DEFAULT_VERBOSITY,
                 max_time=180):
        if (log is None): log = null_out()
        self.out = multi_out()
        self.log = log
        self.out.register("stdout", out)
        self.out.register("log", log)
        self.verbosity = verbosity
        self.quiet = (verbosity == 0)
        self.results = []
        self.pool = None

        # Filter cmd list for duplicates.
        self.cmd_list = []
        for cmd in cmd_list:
            if (not cmd in self.cmd_list):
                self.cmd_list.append(cmd)
            else:
                print >> self.out, "Test %s repeated, skipping" % cmd

        # Set number of processors.
        if (nprocs is Auto):
            nprocs = cpu_count()
        nprocs = min(nprocs, len(self.cmd_list))

        # Starting summary.
        if (self.verbosity > 0):
            print >> self.out, "Running %d tests on %s processors:" % (len(
                self.cmd_list), nprocs)
            for cmd in self.cmd_list:
                print >> self.out, "  %s" % cmd
            print >> self.out, ""

        # Either run tests in parallel or run parallel tests, but
        # can't run parallel tests in parallel (cctbx#95)
        os.environ['OPENBLAS_NUM_THREADS'] = "1"

        t_start = time.time()
        if nprocs > 1:
            # Run the tests with multiprocessing pool.
            self.pool = Pool(processes=nprocs)
            for command in self.cmd_list:
                self.pool.apply_async(run_command, [command, verbosity, out],
                                      callback=self.save_result)
            try:
                self.pool.close()
            except KeyboardInterrupt:
                print >> self.out, "Caught KeyboardInterrupt, terminating"
                self.pool.terminate()
            finally:
                try:
                    self.pool.join()
                except KeyboardInterrupt:
                    pass
        else:
            # Run tests serially.
            for command in self.cmd_list:
                rc = run_command(command, verbosity=verbosity, out=out)
                if self.save_result(rc) == False:
                    break

        # Print ending summary.
        t_end = time.time()
        print >> self.out, "=" * 80
        print >> self.out, ""
        print >> self.out, "Tests finished. Elapsed time: %.2fs" % (t_end -
                                                                    t_start)
        print >> self.out, ""

        # Process results for errors and warnings.
        extra_stderr = len(
            [result for result in self.results if result.stderr_lines])
        longjobs = [
            result for result in self.results if result.wall_time > max_time
        ]
        warnings = [
            result for result in self.results
            if result.alert_status == Status.WARNING
        ]
        failures = [
            result for result in self.results
            if result.alert_status == Status.FAIL
        ]
        self.finished = len(self.results)
        self.failure = len(failures)
        self.warning = len(warnings)

        # Try writing the XML result file
        write_JUnit_XML(self.results, "output.xml")

        # Run time distribution.
        if (libtbx.env.has_module("scitbx")):
            from scitbx.array_family import flex
            print >> self.out, "Distribution of test runtimes:"
            hist = flex.histogram(flex.double(
                [result.wall_time for result in self.results]),
                                  n_slots=10)
            hist.show(f=self.out, prefix="  ", format_cutoffs="%.1fs")
            print >> self.out, ""

        # Long job warning.
        if longjobs:
            print >> self.out, ""
            print >> self.out, "Warning: the following jobs took at least %d seconds:" % max_time
            for result in sorted(longjobs,
                                 key=lambda result: result.wall_time):
                print >> self.out, "  %s: %.1fs" % (result.command,
                                                    result.wall_time)
        else:
            # Just print 5 worst offenders to encourage developers to check them out
            print >> self.out, ""
            print >> self.out, "Warning: the following are 5 longest jobs:"
            for result in sorted(self.results,
                                 key=lambda result: -result.wall_time)[:5]:
                print >> self.out, "  %s: %.1fs" % (result.command,
                                                    result.wall_time)
        print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests."
        print >> self.out, ""

        # Failures.
        if failures:
            print >> self.out, ""
            print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:"
            print >> self.out, ""
            for result in warnings:
                self.display_result(result,
                                    alert=Status.WARNING,
                                    out=self.out,
                                    log_return=self.out,
                                    log_stderr=self.out)
            for result in failures:
                self.display_result(result,
                                    alert=Status.FAIL,
                                    out=self.out,
                                    log_return=self.out,
                                    log_stderr=self.out)
            print >> self.out, ""
            print >> self.out, "Please verify these tests manually."
            print >> self.out, ""

        # Summary
        print >> self.out, "Summary:"
        print >> self.out, "  Tests run                    :", self.finished
        print >> self.out, "  Failures                     :", self.failure
        print >> self.out, "  Warnings (possible failures) :", self.warning
        print >> self.out, "  Stderr output (discouraged)  :", extra_stderr
        if (self.finished != len(self.cmd_list)):
            print >> self.out, "*" * 80
            print >> self.out, "  WARNING: NOT ALL TESTS FINISHED!"
            print >> self.out, "*" * 80
Пример #46
0
class run_command_list(object):
    def __init__(self,
                 cmd_list,
                 nprocs=1,
                 out=sys.stdout,
                 log=None,
                 verbosity=DEFAULT_VERBOSITY,
                 max_time=180):
        if (log is None): log = null_out()
        self.out = multi_out()
        self.log = log
        self.out.register("stdout", out)
        self.out.register("log", log)
        self.verbosity = verbosity
        self.quiet = (verbosity == 0)
        self.results = []

        # Filter cmd list for duplicates.
        self.cmd_list = []
        for cmd in cmd_list:
            if (not cmd in self.cmd_list):
                self.cmd_list.append(cmd)
            else:
                print >> self.out, "Test %s repeated, skipping" % cmd

        # Set number of processors.
        if (nprocs is Auto):
            nprocs = cpu_count()
        nprocs = min(nprocs, len(self.cmd_list))

        # Starting summary.
        if (self.verbosity > 0):
            print >> self.out, "Running %d tests on %s processors:" % (len(
                self.cmd_list), nprocs)
            for cmd in self.cmd_list:
                print >> self.out, "  %s" % cmd
            print >> self.out, ""

        t_start = time.time()
        if nprocs > 1:
            # Run the tests with multiprocessing pool.
            pool = Pool(processes=nprocs)
            for command in self.cmd_list:
                pool.apply_async(run_command, [command, verbosity, out],
                                 callback=self.save_result)
            try:
                pool.close()
            except KeyboardInterrupt:
                print >> self.out, "Caught KeyboardInterrupt, terminating"
                pool.terminate()
            finally:
                pool.join()
        else:
            # Run tests serially.
            for command in self.cmd_list:
                rc = run_command(command, verbosity=verbosity, out=out)
                self.save_result(rc)

        # Print ending summary.
        t_end = time.time()
        print >> self.out, "=" * 80
        print >> self.out, ""
        print >> self.out, "Tests finished. Elapsed time: %.2fs" % (t_end -
                                                                    t_start)
        print >> self.out, ""
        test_cases = []
        # Process results for errors and warnings.
        extra_stderr = len(
            [result for result in self.results if result.stderr_lines])
        longjobs = [
            result for result in self.results if result.wall_time > max_time
        ]
        warnings = [
            result for result in self.results if self.check_alert(result) == 1
        ]
        failures = [
            result for result in self.results if self.check_alert(result) == 2
        ]
        self.finished = len(self.results)
        self.failure = len(failures)
        self.warning = len(warnings)

        # Output JUnit XML if possible
        try:
            from junit_xml import TestSuite, TestCase
            import re

            def decode_string(string):
                try:
                    return string.encode('ascii', 'xmlcharrefreplace')
                except Exception:  # intentional
                    return unicode(string, errors='ignore').encode(
                        'ascii', 'xmlcharrefreplace')

            for result in self.results:
                test_name = reconstruct_test_name(result.command)
                plain_stdout = map(decode_string, result.stdout_lines)
                plain_stderr = map(decode_string, result.stderr_lines)
                output = '\n'.join(plain_stdout + plain_stderr)
                tc = TestCase(classname=test_name[0],
                              name=test_name[1],
                              elapsed_sec=result.wall_time,
                              stdout='\n'.join(plain_stdout),
                              stderr='\n'.join(plain_stderr))
                if result.return_code == 0:
                    # Identify skipped tests
                    if re.search('skip', output, re.IGNORECASE):
                        # find first line including word 'skip' and use it as message
                        skipline = re.search('^((.*)skip(.*))$', output,
                                             re.IGNORECASE
                                             | re.MULTILINE).group(1)
                        tc.add_skipped_info(skipline)
                else:
                    # Test failed. Extract error message and stack trace if possible
                    error_message = 'exit code %d' % result.return_code
                    error_output = '\n'.join(plain_stderr)
                    if plain_stderr:
                        error_message = plain_stderr[-1]
                        if len(plain_stderr) > 20:
                            error_output = '\n'.join(plain_stderr[-20:])
                    tc.add_failure_info(message=error_message,
                                        output=error_output)
                test_cases.append(tc)
            ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases)
            with open('output.xml', 'wb') as f:
                print >> f, TestSuite.to_xml_string([ts], prettyprint=True)
        except ImportError, e:
            pass

        # Run time distribution.
        if (libtbx.env.has_module("scitbx")):
            from scitbx.array_family import flex
            print >> self.out, "Distribution of test runtimes:"
            hist = flex.histogram(flex.double(
                [result.wall_time for result in self.results]),
                                  n_slots=10)
            hist.show(f=self.out, prefix="  ", format_cutoffs="%.1fs")
            print >> self.out, ""

        # Long job warning.
        if longjobs:
            print >> self.out, ""
            print >> self.out, "Warning: the following jobs took at least %d seconds:" % max_time
            for result in sorted(longjobs,
                                 key=lambda result: result.wall_time):
                print >> self.out, "  %s: %.1fs" % (result.command,
                                                    result.wall_time)
            print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests."

        # Failures.
        if failures:
            print >> self.out, ""
            print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:"
            print >> self.out, ""
            for result in warnings:
                self.display_result(result,
                                    alert=1,
                                    out=self.out,
                                    log_return=self.out,
                                    log_stderr=self.out)
            for result in failures:
                self.display_result(result,
                                    alert=2,
                                    out=self.out,
                                    log_return=self.out,
                                    log_stderr=self.out)
            print >> self.out, ""
            print >> self.out, "Please verify these tests manually."
            print >> self.out, ""

        # Summary
        print >> self.out, "Summary:"
        print >> self.out, "  Tests run                    :", self.finished
        print >> self.out, "  Failures                     :", self.failure
        print >> self.out, "  Warnings (possible failures) :", self.warning
        print >> self.out, "  Stderr output (discouraged)  :", extra_stderr
        if (self.finished != len(self.cmd_list)):
            print >> self.out, "*" * 80
            print >> self.out, "  WARNING: NOT ALL TESTS FINISHED!"
            print >> self.out, "*" * 80
Пример #47
0
    def del_anom_normal_plot(intensities, strong_cutoff=0.0):
        """Make a normal probability plot of the normalised anomalous differences."""
        diff_array = intensities.anomalous_differences()
        if not diff_array.data().size():
            return {}
        delta = diff_array.data() / diff_array.sigmas()

        n = delta.size()
        y = np.sort(flumpy.to_numpy(delta))
        d = 0.5 / n
        v = np.linspace(start=d, stop=1.0 - d, endpoint=True, num=n)
        x = norm.ppf(v)

        H, xedges, yedges = np.histogram2d(x, y, bins=(200, 200))
        nonzeros = np.nonzero(H)
        z = np.empty(H.shape)
        z[:] = np.NAN
        z[nonzeros] = H[nonzeros]

        # also make a histogram
        histy = flex.histogram(flumpy.from_numpy(y), n_slots=100)
        # make a gaussian for reference also
        n = y.size
        width = histy.slot_centers()[1] - histy.slot_centers()[0]
        gaussian = []
        from math import exp, pi

        for x in histy.slot_centers():
            gaussian.append(n * width * exp(-(x**2) / 2.0) / ((2.0 * pi) ** 0.5))

        title = "Normal probability plot of anomalous differences"
        plotname = "normal_distribution_plot"
        if strong_cutoff > 0.0:
            title += f" (d > {strong_cutoff:.2f})"
            plotname += "_lowres"
        else:
            title += " (all data)"
            plotname += "_highres"
        return {
            plotname: {
                "data": [
                    {
                        "x": xedges.tolist(),
                        "y": yedges.tolist(),
                        "z": z.transpose().tolist(),
                        "type": "heatmap",
                        "name": "normalised deviations",
                        "colorbar": {
                            "title": "Number of reflections",
                            "titleside": "right",
                        },
                        "colorscale": "Viridis",
                    },
                    {
                        "x": [-5, 5],
                        "y": [-5, 5],
                        "type": "scatter",
                        "mode": "lines",
                        "name": "z = m",
                        "color": "rgb(0,0,0)",
                    },
                ],
                "layout": {
                    "title": title,
                    "xaxis": {
                        "anchor": "y",
                        "title": "expected delta",
                        "range": [-4, 4],
                    },
                    "yaxis": {
                        "anchor": "x",
                        "title": "observed delta",
                        "range": [-5, 5],
                    },
                },
                "help": """\
    This plot shows the normalised anomalous differences, sorted in order and
    plotted against the expected order based on a normal distribution model.
    A true normal distribution of deviations would give the straight line indicated.

    [1] P. L. Howell and G. D. Smith, J. Appl. Cryst. (1992). 25, 81-86
    https://doi.org/10.1107/S0021889891010385
    [2] P. Evans, Acta Cryst. (2006). D62, 72-82
    https://doi.org/10.1107/S0907444905036693
    """,
            }
        }
Пример #48
0
def eval_logs(file_names, out=None):
  if (out is None): out = sys.stdout
  from scitbx.array_family import flex
  from libtbx.str_utils import format_value
  min_secs_epoch = None
  max_secs_epoch = None
  n_refinements_initialized = 0
  gaps = flex.double()
  infos = flex.std_string()
  n_stale = 0
  n_unfinished = 0
  n_exception = 0
  n_traceback = 0
  n_abort = 0
  seconds = []
  space_groups_by_cod_id = {}
  for file_name in file_names:
    have_time_end = False
    cod_id = None
    n_scatt = None
    iso = None
    file_str = open(file_name).read()
    if (file_str.find(chr(0)) >= 0):
      n_stale += 1
      continue
    for line in file_str.splitlines():
      if (line.startswith("cod_id: ")):
        cod_id = line[10:]
        iso = None
      elif (line.startswith("Space group: ")):
        assert cod_id is not None
        space_group = line.split(None, 2)[2]
        tabulated = space_groups_by_cod_id.setdefault(cod_id, space_group)
        assert tabulated == space_group
      elif (line.startswith("Number of scatterers: ")):
        assert cod_id is not None
        n_scatt = int(line.split(": ",1)[1])
      elif (line.startswith("Number of refinable parameters: ")):
        assert cod_id is not None
        n_refinements_initialized += 1
      elif (line.startswith("iso          cc, r1: ")):
        assert cod_id is not None
        assert iso is None
        iso = line.split(": ",1)[1]
      elif (   line.startswith("dev          cc, r1: ")
            or line.startswith("ls_simple    cc, r1: ")
            or line.startswith("ls_lm        cc, r1: ")
            or line.startswith("shelxl_fm    cc, r1: ")
            or line.startswith("shelxl_cg    cc, r1: ")
            or line.startswith("shelx76      cc, r1: ")):
        assert iso is not None
        ref = line.split(": ",1)[1]
        gap = float(ref.split()[1]) - float(iso.split()[1])
        gaps.append(gap)
        infos.append(" : ".join([
          cod_id, iso, ref, "%.3f" % gap, str(n_scatt),
          space_groups_by_cod_id[cod_id]]))
        cod_id = None
        n_scatt = None
        iso = None
      else:
        def get_secs_epoch():
          return float(line.split()[-2][1:])
        if (line.find("EXCEPTION") >= 0):
          n_exception += 1
        if (line.startswith("Traceback")):
          n_traceback += 1
        if (line.find("Abort") >= 0):
          n_abort += 1
        if (line.startswith("wall clock time: ")):
          if (line.endswith(" seconds")):
            secs = float(line.split()[-2])
          else:
            _, fld = line.split("(", 1)
            assert fld.endswith(" seconds total)")
            secs = float(fld.split()[0])
          seconds.append(secs)
        elif (line.startswith("TIME BEGIN cod_refine: ")):
          s = get_secs_epoch()
          if (min_secs_epoch is None or s < min_secs_epoch): min_secs_epoch = s
        elif (line.startswith("TIME END cod_refine: ")):
          s = get_secs_epoch()
          if (max_secs_epoch is None or s > max_secs_epoch): max_secs_epoch = s
          have_time_end = True
    if (not have_time_end):
      n_unfinished += 1
  perm = flex.sort_permutation(gaps)
  gaps = gaps.select(perm)
  n_missing = n_refinements_initialized - gaps.size()
  print >> out, "Number of results: %d (%d missing)" % (gaps.size(), n_missing)
  assert n_missing >= 0
  print >> out, "Stale, Unfinished, Exceptions, Tracebacks, Abort:", \
    n_stale, n_unfinished, n_exception, n_traceback, n_abort
  if (n_exception + n_abort < n_missing):
    print "WARNING: more missing results than expected."
  if (len(seconds) != 0):
    if (min_secs_epoch is not None and max_secs_epoch is not None):
      g = max_secs_epoch - min_secs_epoch
    else:
      g = None
    print >> out, "min, max, global seconds: %.2f %.2f %s" % (
      min(seconds), max(seconds), format_value("%.2f", g))
  print >> out
  def stats(f):
    n = f.count(True)
    return "%6d = %5.2f %%" % (n, 100 * n / max(1,n_refinements_initialized))
  print >> out, "gaps below -0.05:", stats(gaps < -0.05)
  print >> out, "gaps below -0.01:", stats(gaps < -0.01)
  print >> out, "gaps below  0.01:", stats(gaps <  0.01)
  print >> out, "gaps above  0.01:", stats(gaps >  0.01)
  print >> out, "gaps above  0.05:", stats(gaps >  0.05)
  print >> out
  print >> out, "Histogram of gaps:"
  flex.histogram(gaps, n_slots=10).show(f=out)
  print >> out
  infos = infos.select(perm)
  for info in infos:
    print >> out, info
  return (len(file_names), n_unfinished, min_secs_epoch, max_secs_epoch)
Пример #49
0
  def __init__(self, reflections, step_size=45, tolerance=1.5,
               max_height_fraction=0.25, percentile=0.05,
               histogram_binning='linear'):
    self.tolerance = tolerance # Margin of error for max unit cell estimate
    from scitbx.array_family import flex
    NEAR = 10
    self.NNBIN = 5 # target number of neighbors per histogram bin
    self.histogram_binning = histogram_binning

    direct = flex.double()

    if 'entering' in reflections:
      entering_flags = reflections['entering']
    else:
      entering_flags = flex.bool(reflections.size(), True)
    rs_vectors = reflections['rlp']
    phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180/math.pi)

    d_spacings = flex.double()
    # nearest neighbor analysis
    from annlib_ext import AnnAdaptor
    for imageset_id in range(flex.max(reflections['imageset_id'])+1):
      sel = reflections['imageset_id'] == imageset_id
      if sel.count(True) == 0:
        continue
      phi_min = flex.min(phi_deg.select(sel))
      phi_max = flex.max(phi_deg.select(sel))
      d_phi = phi_max - phi_min
      n_steps = max(int(math.ceil(d_phi / step_size)), 1)

      for n in range(n_steps):
        sel &= (phi_deg >= (phi_min+n*step_size)) & (phi_deg < (phi_min+(n+1)*step_size))

        for entering in (True, False):
          sel  &= entering_flags == entering
          if sel.count(True) == 0:
            continue

          query = flex.double()
          query.extend(rs_vectors.select(sel).as_double())

          if query.size() == 0:
            continue

          IS_adapt = AnnAdaptor(data=query,dim=3,k=1)
          IS_adapt.query(query)

          direct.extend(1/flex.sqrt(IS_adapt.distances))
          d_spacings.extend(1/rs_vectors.norms())

    assert len(direct)>NEAR, (
      "Too few spots (%d) for nearest neighbour analysis." %len(direct))

    perm = flex.sort_permutation(direct)
    direct = direct.select(perm)
    d_spacings = d_spacings.select(perm)

    # reject top 1% of longest distances to hopefully get rid of any outliers
    n = int(math.floor(0.99*len(direct)))
    direct = direct[:n]
    d_spacings = d_spacings[:n]

    # determine the most probable nearest neighbor distance (direct space)
    if self.histogram_binning == 'log':
      hst = flex.histogram(
        flex.log10(direct), n_slots=int(len(direct)/self.NNBIN))
    else:
      hst = flex.histogram(direct, n_slots=int(len(direct)/self.NNBIN))
    centers = hst.slot_centers()
    if self.histogram_binning == 'log':
      self.slot_start = flex.double(
        [10**s for s in hst.slot_centers() - 0.5 * hst.slot_width()])
      self.slot_end = flex.double(
        [10**s for s in hst.slot_centers() + 0.5 * hst.slot_width()])
      self.slot_width = self.slot_end - self.slot_start
    else:
      self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width()
      self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width()
      self.slot_width = hst.slot_width()
    self.relative_frequency = hst.slots().as_double()/self.slot_width
    highest_bin_height = flex.max(self.relative_frequency)

    if False:  # to print out the histogramming analysis
      smin, smax = flex.min(direct), flex.max(direct)
      stats = flex.mean_and_variance(direct)
      import sys
      out = sys.stdout
      print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
      print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
        stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size())
      hst.show(f=out, prefix="    ", format_cutoffs="%6.2f")
      print >> out, ""

    # choose a max cell based on bins above a given fraction of the highest bin height
    # given multiple
    isel = (self.relative_frequency.as_double() > (
      max_height_fraction * highest_bin_height)).iselection()
    self.max_cell = (
      self.tolerance * self.slot_end[int(flex.max(isel.as_double()))])

    # determine the 5th-percentile direct-space distance
    perm = flex.sort_permutation(direct, reverse=True)
    self.percentile = direct[perm[int(percentile * len(direct))]]

    self.reciprocal_lattice_vectors = rs_vectors
    self.d_spacings = d_spacings
    self.direct = direct
    self.histogram = hst
Пример #50
0
  def common_mode(self, img, stddev, mask):
    """The common_mode() function returns the mode of image stored in
    the array pointed to by @p img.  @p mask must be such that the @p
    stddev at the selected pixels is greater than zero.

    @param img    2D integer array of the image
    @param stddev 2D integer array of the standard deviation of each
                  pixel in @p img
    @param mask   2D Boolean array, @c True if the pixel is to be
                  included, @c False otherwise
    @return       Mode of the image, as a real number
    """

    # Flatten the image and take out inactive pixels XXX because we
    # cannot take means and medians of 2D arrays?
    img_1d = img.as_1d().select(mask.as_1d()).as_double()
    assert img_1d.size() > 0

    if (self.common_mode_correction == "mean"):
      # The common mode is approximated by the mean of the pixels with
      # signal-to-noise ratio less than a given threshold.  XXX Breaks
      # if the selection is empty!
      THRESHOLD_SNR = 2
      img_snr = img_1d / stddev.as_double().as_1d().select(mask.as_1d())
      return (flex.mean(img_1d.select(img_snr < THRESHOLD_SNR)))

    elif (self.common_mode_correction == "median"):
      return (flex.median(img_1d))

    # Identify the common-mode correction as the peak histogram of the
    # histogram of pixel values (the "standard" common-mode correction, as
    # previously implemented in this class).
    hist_min = -40
    hist_max = 40
    n_slots = 100

    hist = flex.histogram(img_1d, hist_min, hist_max, n_slots=n_slots)
    slots = hist.slots()
    i = flex.max_index(slots)
    common_mode = list(hist.slot_infos())[i].center()

    if (self.common_mode_correction == "mode"):
      return (common_mode)

    # Determine the common-mode correction from the peak of a single
    # Gaussian function fitted to the histogram.
    from scitbx.math.curve_fitting import single_gaussian_fit
    x = hist.slot_centers()
    y = slots.as_double()
    fit = single_gaussian_fit(x, y)
    scale, mu, sigma = fit.a, fit.b, fit.c
    self.logger.debug("fitted gaussian: mu=%.3f, sigma=%.3f" %(mu, sigma))
    mode = common_mode
    common_mode = mu
    if abs(mode-common_mode) > 1000: common_mode = mode # XXX
    self.logger.debug("delta common mode corrections: %.3f" %(mode-common_mode))

    if 0 and abs(mode-common_mode) > 0:
      #if 0 and skew > 0.5:
      # view histogram and fitted gaussian
      from numpy import exp
      from matplotlib import pyplot
      x_all = x
      n, bins, patches = pyplot.hist(section_img.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max))
      y_all = scale * flex.exp(-flex.pow2(x_all-mu) / (2 * sigma**2))
      scale = slots[flex.max_index(slots)]
      y_all *= scale/flex.max(y_all)
      pyplot.plot(x_all, y_all)
      pyplot.show()

    return (common_mode)
Пример #51
0
def run(args):

  from dials.util.options import OptionParser
  from dials.util.options import flatten_experiments
  from dials.util.options import flatten_reflections
  import libtbx.load_env

  usage = "%s [options] datablock.json" %(
    libtbx.env.dispatcher_name)

  parser = OptionParser(
    usage=usage,
    phil=phil_scope,
    read_experiments=True,
    read_reflections=True,
    check_format=True,
    epilog=help_message)

  params, options = parser.parse_args(show_diff_phil=True)
  experiments = flatten_experiments(params.input.experiments)
  reflections = flatten_reflections(params.input.reflections)

  if len(experiments) == 0 or len(reflections) == 0:
    parser.print_help()
    exit(0)

  imagesets = experiments.imagesets()
  reflections = reflections[0]
  shadowed = filter_shadowed_reflections(experiments, reflections)

  print "# shadowed reflections: %i/%i (%.2f%%)" %(
    shadowed.count(True), shadowed.size(),
    shadowed.count(True)/shadowed.size() * 100)

  expt = experiments[0]
  x,y,z = reflections['xyzcal.px'].parts()
  z_ = z * expt.scan.get_oscillation()[1]
  zmin, zmax = expt.scan.get_oscillation_range()

  hist_scan_angle = flex.histogram(z_.select(shadowed), n_slots=int(zmax-zmin))
  #hist_scan_angle.show()

  uc = experiments[0].crystal.get_unit_cell()
  d_spacings = uc.d(reflections['miller_index'])
  ds2 = uc.d_star_sq(reflections['miller_index'])

  hist_res = flex.histogram(ds2.select(shadowed), flex.min(ds2), flex.max(ds2), n_slots=20, )
  #hist_res.show()

  import matplotlib
  matplotlib.use('Agg')
  from matplotlib import pyplot as plt

  plt.hist2d(z_.select(shadowed).as_numpy_array(),
             ds2.select(shadowed).as_numpy_array(), bins=(40,40),
             range=((flex.min(z_), flex.max(z_)),(flex.min(ds2), flex.max(ds2))))
  yticks_dsq = flex.double(plt.yticks()[0])
  from cctbx import uctbx
  yticks_d = uctbx.d_star_sq_as_d(yticks_dsq)
  plt.axes().set_yticklabels(['%.2f' %y for y in yticks_d])
  plt.xlabel('Scan angle (degrees)')
  plt.ylabel('Resolution (A^-1)')
  cbar = plt.colorbar()
  cbar.set_label('# shadowed reflections')
  plt.savefig('n_shadowed_hist2d.png')
  plt.clf()

  plt.scatter(hist_scan_angle.slot_centers().as_numpy_array(), hist_scan_angle.slots().as_numpy_array())
  plt.xlabel('Scan angle (degrees)')
  plt.ylabel('# shadowed reflections')
  plt.savefig("n_shadowed_vs_scan_angle.png")
  plt.clf()

  plt.scatter(hist_res.slot_centers().as_numpy_array(), hist_res.slots().as_numpy_array())
  plt.xlabel('d_star_sq')
  plt.savefig("n_shadowed_vs_resolution.png")
  plt.clf()
        if False:
            # have an I/sigma selection of > 1

            i_over_sigi = pkl.data() / pkl.sigmas()
            pkl = pkl.select(i_over_sigi > 1)
        
        c, n = compare(reference1, pkl, sgtype)
        print '%5d %s %6.3f %5d' % (i, pkl_file.strip(), c, n),
        ccs[0].append(c)
        for j, reference in enumerate(references):
            c, n = compare(reference, pkl, sgtype)
            print '%6.3f %5d' % (c, n),
            ccs[j + 1].append(c)
        print ''

    from matplotlib import pyplot
    
    for j in sorted(ccs):
        pyplot.scatter(range(len(ccs[j])), ccs[j])
    pyplot.savefig('ccs.png')
    pyplot.close()

    from scitbx.array_family import flex
    
    for j in sorted(ccs):
        hist = flex.histogram(flex.double(ccs[j]), n_slots=50)
        pyplot.plot(hist.slot_centers(), hist.slots())
        pyplot.savefig('cc_hist_%d.png' % j)
        pyplot.close()

Пример #53
0
def convert_to_histogram(data, n_slots) :
  histogram = flex.histogram(data=data, n_slots=n_slots)
  return histogram
Пример #54
0
def set_ensemble_b_factors_to_xyz_displacement(
    pdb_hierarchy,
    include_hydrogens=False,
    include_waters=False,
    use_c_alpha_values=False,
    method="rmsf",
    selection=None,
    substitute_b_value=-1.0,
    logarithmic=False,
    log=None,
):
    """
  Given an ensemble (multi-MODEL PDB hierarchy), calculate the deviation
  between copies of each atom (defined here as either the root-mean-square
  fluctuation, or the radius of the minimum covering sphere) and set the
  isotropic B-factors to this value.
  """
    if log is None:
        log = null_out()
    assert method in ["rmsf", "mcs"]
    from scitbx.math import minimum_covering_sphere
    from scitbx.array_family import flex

    pdb_atoms = pdb_hierarchy.atoms()
    pdb_atoms.reset_i_seq()
    xyz_by_atom = {}

    def get_key(atom):
        labels = atom.fetch_labels()
        return (labels.chain_id, labels.resid(), labels.altloc, atom.name)

    def get_c_alpha(atom):
        if (atom.name.strip() == "CA") and (atom.element.strip() == "C"):
            return atom
        for other in atom.parent().atoms():
            if (other.name.strip() == "CA") and (other.element.strip() == "C"):
                return other
        return None

    for model in pdb_hierarchy.models():
        for atom in model.atoms():
            if selection is not None:
                if not selection[atom.i_seq]:
                    continue
            elif (not include_hydrogens) and (atom.element.strip() in ["H", "D"]):
                continue
            elif (not include_waters) and (atom.parent().resname in ["HOH"]):
                continue
            if (use_c_alpha_values) and (atom.name.strip() != "CA"):
                continue
            atom_key = get_key(atom)
            if atom_key in xyz_by_atom:
                xyz_by_atom[atom_key].append(atom.xyz)
            else:
                xyz_by_atom[atom_key] = flex.vec3_double([atom.xyz])
    dev_by_atom = {}
    for atom_key, xyz in xyz_by_atom.iteritems():
        if method == "mcs":
            mcs = minimum_covering_sphere(points=xyz, epsilon=0.1)
            radius = mcs.radius()
            if logarithmic:
                radius = math.log(radius + 1.0)
            dev_by_atom[atom_key] = radius
        else:
            mean_array = flex.vec3_double(xyz.size(), xyz.mean())
            rmsf = xyz.rms_difference(mean_array)
            dev_by_atom[atom_key] = rmsf
    all_dev = flex.double(dev_by_atom.values())
    if method == "mcs":
        print >> log, "Distribution of sphere radii:"
    else:
        print >> log, "Distribution of root-mean-square fluctuation values:"
    flex.histogram(all_dev, n_slots=20).show(f=log, prefix="  ", format_cutoffs="%.2f")
    for model in pdb_hierarchy.models():
        for atom in model.atoms():
            if use_c_alpha_values:
                c_alpha = get_c_alpha(atom)
                if c_alpha is None:
                    atom.b = substitute_b_value
                else:
                    atom_key = get_key(c_alpha)
                    atom.b = dev_by_atom.get(atom_key, substitute_b_value)
            else:
                atom_key = get_key(atom)
                atom.b = dev_by_atom.get(atom_key, substitute_b_value)
Пример #55
0
  def __init__ (self,
                cmd_list,
                nprocs=1,
                out=sys.stdout,
                log=None,
                quiet=False,
                output_junit_xml=False) :
    if (log is None) : log = null_out()
    self.out = multi_out()
    self.log = log
    self.out.register("stdout", out)
    self.out.register("log", log)
    self.quiet = quiet
    self.cmd_list = []
    for cmd in cmd_list :
      if (not cmd in self.cmd_list) :
        self.cmd_list.append(cmd)
      else :
        print >> self.out, "  test %s repeated, skipping" % cmd
    nprocs = min(nprocs, len(self.cmd_list))
    print >> self.out, "\n  Starting command list"
    print >> self.out, "    NProcs :",nprocs
    print >> self.out, "    Cmds   :",len(self.cmd_list)
    t_start = time.time()
    if nprocs>1:
      pool = Pool(processes=nprocs)
    self.results = []
    for command in self.cmd_list:
      if nprocs>1:
        pool.apply_async(
          run_command,
          [command, (not quiet), out],
          callback=self.save_result)
      else:
        rc = run_command(command, verbose=(not quiet), out=out)
        self.save_result(rc)
    if nprocs>1:
      try :
        try :
          pool.close()
        except KeyboardInterrupt :
          print >> self.out, "Caught KeyboardInterrupt, terminating"
          pool.terminate()
      finally :
        pool.join()
      print >> self.out, '\nProcesses have joined : %d\n' % len(self.results)
    t_end = time.time()
    print >> self.out, ""
    print >> self.out, "Elapsed time: %.2fs" %(t_end-t_start)
    print >> self.out, ""
    finished = 0
    warning = 0
    extra_stderr = 0
    failure = 0
    failures = []
    long_jobs = []
    long_runtimes = []
    runtimes = []
    if output_junit_xml:
      from junit_xml import TestSuite, TestCase
      test_cases = []
    for result in self.results :
      finished += 1
      runtimes.append(result.wall_time)
      if (result.return_code != 0) :
        failure += 1
        failures.append(result)
      else :
        if (len(result.error_lines) != 0) :
          warning += 1
          failures.append(result)
        if (len(result.stderr_lines) != 0):
          extra_stderr += 1
      if (result.wall_time > max_time) :
        long_jobs.append(result.command)
        long_runtimes.append(result.wall_time)
      if output_junit_xml:
        tc = TestCase(name=result.command,
                      classname=result.command,
                      elapsed_sec=result.wall_time,
                      stdout='\n'.join(result.stdout_lines),
                      stderr='\n'.join(result.stderr_lines))
        if result.return_code != 0:
          tc.add_failure_info(message='exit code %d' %result.return_code)
        #if len(result.stderr_lines):
          #tc.add_error_info(output='\n'.join(result.stderr_lines))
        test_cases.append(tc)

    if output_junit_xml:
      ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases)
      with open('output.xml', 'wb') as f:
        print >> f, TestSuite.to_xml_string([ts], prettyprint=True)

    if (libtbx.env.has_module("scitbx")) :
      from scitbx.array_family import flex
      print >> self.out, "Distribution of test runtimes:"
      hist = flex.histogram(flex.double(runtimes), n_slots=20)
      hist.show(f=self.out, prefix="  ", format_cutoffs="%.1fs")
      print >> self.out, ""
    if (len(long_jobs) > 0) :
      print >> self.out, ""
      print >> self.out, "WARNING: the following jobs took at least %d seconds:" % \
        max_time
      jobs_and_timings = list(zip(long_jobs, long_runtimes))
      jobs_and_timings.sort(lambda x,y: cmp(x[1], y[1]))
      for cmd, runtime in jobs_and_timings :
        print >> self.out, "  " + cmd + " : %.1fs" % runtime
      print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests."
    if (len(failures) > 0) :
      print >> self.out, ""
      print >> self.out, "ERROR: the following jobs returned non-zero exit codes or suspicious stderr output:"
      for result in failures :
        print >> self.out, ""
        print >> self.out, result.command + "(exit code %d):" % result.return_code
        for line in result.stderr_lines :
          print >> self.out, "  " + line
        for line in result.error_lines :
          print >> self.out, "  " + line
        print >> self.out, ""
      print >> self.out, "Please verify these tests manually."
      print >> self.out, ""
    print >> self.out, "Summary:"
    print >> self.out, "  Tests run                    :",finished
    print >> self.out, "  Failures                     :",failure
    print >> self.out, "  Warnings (possible failures) :",warning
    print >> self.out, "  Stderr output (discouraged)  :",extra_stderr
    if (finished != len(self.cmd_list)) :
      print >> self.out, "*" * 80
      print >> self.out, "  WARNING: NOT ALL TESTS FINISHED!"
      print >> self.out, "*" * 80
Пример #56
0
  def run(self, flags, sweep=None, observations=None, **kwargs):
    obs_x, obs_y = observations.centroids().px_position_xy().parts()

    import numpy as np
    H, xedges, yedges = np.histogram2d(
      obs_x.as_numpy_array(), obs_y.as_numpy_array(),bins=self.nbins)

    from scitbx.array_family import flex
    H_flex = flex.double(H.flatten().astype(np.float64))
    n_slots = min(int(flex.max(H_flex)), 30)
    hist = flex.histogram(H_flex, n_slots=n_slots)

    slots = hist.slots()
    cumulative_hist = flex.long(len(slots))
    for i in range(len(slots)):
      cumulative_hist[i] = slots[i]
      if i > 0:
        cumulative_hist[i] += cumulative_hist[i-1]

    cumulative_hist = cumulative_hist.as_double()/flex.max(
      cumulative_hist.as_double())

    cutoff = None
    gradients = flex.double()
    for i in range(len(slots)-1):
      x1 = cumulative_hist[i]
      x2 = cumulative_hist[i+1]
      g = (x2 - x1)/hist.slot_width()
      gradients.append(g)
      if (cutoff is None and  i > 0 and
          g < self.gradient_cutoff and gradients[i-1] < self.gradient_cutoff):
        cutoff = hist.slot_centers()[i-1]-0.5*hist.slot_width()

    H_flex = flex.double(np.ascontiguousarray(H))
    isel = (H_flex > cutoff).iselection()
    sel = np.column_stack(np.where(H > cutoff))
    for (ix, iy) in sel:
      flags.set_selected(
        ((obs_x > xedges[ix]) & (obs_x < xedges[ix+1]) &
         (obs_y > yedges[iy]) & (obs_y < yedges[iy+1])), False)

    if 0:
      from matplotlib import pyplot
      fig, ax1 = pyplot.subplots()
      extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
      plot1 = ax1.imshow(H, extent=extent, interpolation="nearest")
      pyplot.xlim((0, pyplot.xlim()[1]))
      pyplot.ylim((0, pyplot.ylim()[1]))
      pyplot.gca().invert_yaxis()
      cbar1 = pyplot.colorbar(plot1)
      pyplot.axes().set_aspect('equal')
      pyplot.show()

      fig, ax1 = pyplot.subplots()
      ax2 = ax1.twinx()
      ax1.scatter(hist.slot_centers()-0.5*hist.slot_width(), cumulative_hist)
      ax1.set_ylim(0, 1)
      ax2.plot(hist.slot_centers()[:-1]-0.5*hist.slot_width(), gradients)
      ymin, ymax = pyplot.ylim()
      pyplot.vlines(cutoff, ymin, ymax, color='r')
      pyplot.show()

      H2 = H.copy()
      if cutoff is not None:
        H2[np.where(H2 >= cutoff)] = 0
      fig, ax1 = pyplot.subplots()
      plot1 = ax1.pcolormesh(xedges, yedges, H2)
      pyplot.xlim((0, pyplot.xlim()[1]))
      pyplot.ylim((0, pyplot.ylim()[1]))
      pyplot.gca().invert_yaxis()
      cbar1 = pyplot.colorbar(plot1)
      pyplot.axes().set_aspect('equal')
      pyplot.show()

    return flags
Пример #57
0
  def __init__ (self,
                cmd_list,
                nprocs=1,
                out=sys.stdout,
                log=None,
                verbosity=DEFAULT_VERBOSITY,
                output_junit_xml=False) :
    if (log is None) : log = null_out()
    self.out = multi_out()
    self.log = log
    self.out.register("stdout", out)
    self.out.register("log", log)
    self.verbosity = verbosity
    self.quiet = (verbosity == 0)
    self.results = []

    # Filter cmd list for duplicates.
    self.cmd_list = []
    for cmd in cmd_list :
      if (not cmd in self.cmd_list) :
        self.cmd_list.append(cmd)
      else :
        print >> self.out, "Test %s repeated, skipping"%cmd

    # Set number of processors.
    if (nprocs is Auto) :
      nprocs = cpu_count()
    nprocs = min(nprocs, len(self.cmd_list))

    # Starting summary.
    if (self.verbosity > 0) :
      print >> self.out, "Running %d tests on %s processors:"%(len(self.cmd_list), nprocs)
      for cmd in self.cmd_list:
        print >> self.out, "  %s"%cmd
      print >> self.out, ""

    t_start = time.time()
    if nprocs > 1:
      # Run the tests with multiprocessing pool.
      pool = Pool(processes=nprocs)
      for command in self.cmd_list:
        pool.apply_async(
          run_command,
          [command, verbosity, out],
          callback=self.save_result)
      try:
        pool.close()
      except KeyboardInterrupt:
        print >> self.out, "Caught KeyboardInterrupt, terminating"
        pool.terminate()
      finally:
        pool.join()
    else:
      # Run tests serially.
      for command in self.cmd_list:
        rc = run_command(command, verbosity=verbosity, out=out)
        self.save_result(rc)

    # Print ending summary.
    t_end = time.time()
    print >> self.out, "="*80
    print >> self.out, ""
    print >> self.out, "Tests finished. Elapsed time: %.2fs" %(t_end-t_start)
    print >> self.out, ""
    extra_stderr = 0
    test_cases = []
    # Process results for errors and warnings.
    extra_stderr = len([result for result in self.results if result.stderr_lines])
    longjobs = [result for result in self.results if result.wall_time > MAX_TIME]
    warnings = [result for result in self.results if self.check_alert(result) == 1]
    failures = [result for result in self.results if self.check_alert(result) == 2]
    self.finished = len(self.results)
    self.failure = len(failures)
    self.warning = len(warnings)

    # Output JUnit XML
    if output_junit_xml:
      from junit_xml import TestSuite, TestCase
      for result in self.results:
        tc = TestCase(name=result.command,
                      classname=result.command,
                      elapsed_sec=result.wall_time,
                      stdout='\n'.join(result.stdout_lines),
                      stderr='\n'.join(result.stderr_lines))
        if result.return_code != 0:
          tc.add_failure_info(message='exit code %d' %result.return_code)
        #if len(result.stderr_lines):
          #tc.add_error_info(output='\n'.join(result.stderr_lines))
        test_cases.append(tc)
      ts = TestSuite("libtbx.run_tests_parallel", test_cases=test_cases)
      with open('output.xml', 'wb') as f:
        print >> f, TestSuite.to_xml_string([ts], prettyprint=True)

    # Run time distribution.
    if (libtbx.env.has_module("scitbx")) :
      from scitbx.array_family import flex
      print >> self.out, "Distribution of test runtimes:"
      hist = flex.histogram(flex.double([result.wall_time for result in self.results]), n_slots=10)
      hist.show(f=self.out, prefix="  ", format_cutoffs="%.1fs")
      print >> self.out, ""

    # Long job warning.
    if longjobs:
      print >> self.out, ""
      print >> self.out, "Warning: the following jobs took at least %d seconds:"%MAX_TIME
      for result in sorted(longjobs, key=lambda result:result.wall_time):
        print >> self.out, "  %s: %.1fs"%(result.command, result.wall_time)
      print >> self.out, "Please try to reduce overall runtime - consider splitting up these tests."

    # Failures.
    if failures:
      print >> self.out, ""
      print >> self.out, "Error: the following jobs returned non-zero exit codes or suspicious stderr output:"
      print >> self.out, ""
      for result in warnings:
        self.display_result(result, alert=1, out=self.out, log_return=self.out, log_stderr=self.out)
      for result in failures:
        self.display_result(result, alert=2, out=self.out, log_return=self.out, log_stderr=self.out)
      print >> self.out, ""
      print >> self.out, "Please verify these tests manually."
      print >> self.out, ""

    # Summary
    print >> self.out, "Summary:"
    print >> self.out, "  Tests run                    :",self.finished
    print >> self.out, "  Failures                     :",self.failure
    print >> self.out, "  Warnings (possible failures) :",self.warning
    print >> self.out, "  Stderr output (discouraged)  :",extra_stderr
    if (self.finished != len(self.cmd_list)) :
      print >> self.out, "*" * 80
      print >> self.out, "  WARNING: NOT ALL TESTS FINISHED!"
      print >> self.out, "*" * 80