def as_miller_array(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   if (base_array_info is None):
     base_array_info = miller.array_info(
       source_type="scalepack_no_merge_original_index")
   crystal_symmetry_from_file = self.crystal_symmetry()
   crystal_symmetry = crystal_symmetry_from_file.join_symmetry(
     other_symmetry=crystal_symmetry,
     force=force_symmetry)
   result = miller.array(
     miller_set=self.unmerged_miller_set(
       crystal_symmetry=crystal_symmetry,
       force_symmetry=True),
     data=self.i_obs,
     sigmas=self.sigmas)
   if (merge_equivalents):
     result = result.merge_equivalents().array()
   return (result
     .set_info(base_array_info.customized_copy(
       labels=["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"],
       merged=merge_equivalents,
       crystal_symmetry_from_file=crystal_symmetry_from_file))
     .set_observation_type_xray_intensity())
Exemplo n.º 2
0
 def load_reflections_file (self, file_name, **kwds) :
   if (isinstance(file_name, unicode)) :
     file_name = str(file_name)
   if (file_name != "") :
     from iotbx.reflection_file_reader import any_reflection_file
     from cctbx import miller
     from scitbx.array_family import flex
     try :
       hkl_file = any_reflection_file(file_name)
     except Exception, e :
       raise Sorry(str(e))
     arrays = hkl_file.as_miller_arrays(merge_equivalents=True)
     f_obs = f_model = None
     for array in arrays :
       labels = array.info().label_string()
       if labels.startswith("F-obs-filtered") :
         f_obs = array
       elif labels.startswith("F-model") :
         f_model = array
     if (f_obs is None) or (f_model is None) :
       raise Sorry("This does not appear to be a phenix.refine output "+
         "file.  The MTZ file should contain data arrays for the filtered "+
         "amplitudes (F-obs) and F-model.")
     f_delta = f_obs.customized_copy(sigmas=None,
       data=flex.abs(f_obs.data()-abs(f_model).data())).set_info(
         miller.array_info(labels=["abs(F_obs - F_model)"]))
     self.set_miller_array(f_delta)
Exemplo n.º 3
0
 def as_miller_arrays(self, data_block_name=None,
                      crystal_symmetry=None,
                      force_symmetry=False,
                      merge_equivalents=True,
                      base_array_info=None):
   if base_array_info is None:
     base_array_info = miller.array_info(
       source=self.file_path, source_type="cif")
   if data_block_name is not None:
     arrays = self.build_miller_arrays(
       data_block_name=data_block_name,
       base_array_info=base_array_info).values()
   else:
     arrays = flat_list([
       arrays.values() for arrays in
       self.build_miller_arrays(base_array_info=base_array_info).values()])
   other_symmetry=crystal_symmetry
   for i, array in enumerate(arrays):
     if crystal_symmetry is not None:
       crystal_symmetry_from_file = array.crystal_symmetry()
       crystal_symmetry = crystal_symmetry_from_file.join_symmetry(
         other_symmetry=other_symmetry,
         force=force_symmetry)
       arrays[i] = array.customized_copy(crystal_symmetry=crystal_symmetry)
       arrays[i].set_info(array.info())
   return arrays
Exemplo n.º 4
0
 def as_miller_array(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="scalepack_merge")
   crystal_symmetry_from_file = self.crystal_symmetry()
   if (self.anomalous):
     labels = ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]
   else:
     labels = ["I", "SIGI"]
   return (miller.array(
     miller_set=miller.set(
       crystal_symmetry=crystal_symmetry_from_file.join_symmetry(
         other_symmetry=crystal_symmetry,
         force=force_symmetry),
       indices=self.miller_indices,
       anomalous_flag=self.anomalous),
     data=self.i_obs,
     sigmas=self.sigmas)
     .set_info(base_array_info.customized_copy(
       labels=labels,
       crystal_symmetry_from_file=crystal_symmetry_from_file))
     .set_observation_type_xray_intensity())
Exemplo n.º 5
0
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   from cctbx import miller
   from cctbx import crystal
   if (crystal_symmetry is None):
     crystal_symmetry = crystal.symmetry()
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="shelx_hklf")
   miller_set = miller.set(
     crystal_symmetry=crystal_symmetry,
     indices=self.indices()).auto_anomalous()
   miller_arrays = []
   obs = (miller.array(
     miller_set=miller_set,
     data=self.data(),
     sigmas=self.sigmas())
     .set_info(base_array_info.customized_copy(labels=["obs", "sigmas"])))
   miller_arrays.append(obs)
   if (self.alphas() is not None):
     miller_arrays.append(miller.array(
       miller_set=miller_set,
       data=self.alphas())
       .set_info(base_array_info.customized_copy(labels=["alphas"])))
   return miller_arrays
Exemplo n.º 6
0
 def get_resolve_map (self) :
   map_coeffs, null_value = self.get_ccp4_maps()
   if (map_coeffs is None) :
     map_coeffs = self._convert_amplitudes_and_phases(f_label="FP",
       phi_label="PHIM", fom_label="FOMM", weighted=True)
   from cctbx.miller import array_info
   info = array_info(labels=["FWT","PHWT"])
   return map_coeffs.set_info(info)
Exemplo n.º 7
0
    def i_obs(self, anomalous_flag=None):
        assert "IOBS" in self.data
        assert "SIGMA" in self.data

        array_info = miller.array_info(source_type="xds_integrate")#, wavelength=)
        return miller.array(miller_set=miller.set(crystal_symmetry=self.crystal_symmetry(),
                                                  indices=self.hkl,
                                                  anomalous_flag=anomalous_flag),
                            data=self.data["IOBS"],
                            sigmas=self.data["SIGMA"]).set_info(array_info).set_observation_type_xray_intensity()
Exemplo n.º 8
0
def exercise_wavelength():
    miller_set = crystal.symmetry(unit_cell=(10, 10, 10, 90, 90, 90), space_group_symbol="P1").miller_set(
        indices=flex.miller_index([(1, 2, 3), (4, 5, 6)]), anomalous_flag=False
    )
    data = flex.double([1, 2])
    info = miller.array_info(wavelength=0.9792)
    miller_array = miller_set.array(data=data).set_info(info)
    mtz_dataset = miller_array.as_mtz_dataset(column_root_label="F")
    mtz_dataset.mtz_object().write("tst_iotbx_mtz_wavelength.mtz")
    mtz_object = mtz.object(file_name="tst_iotbx_mtz_wavelength.mtz")
    miller_array = mtz_object.as_miller_arrays()[0]
    assert approx_equal(miller_array.info().wavelength, 0.9792)
Exemplo n.º 9
0
Arquivo: t2.py Projeto: youdar/work
def run(sf_file_name,pdb_file_name):
  # check if files exist
  if not isfile(sf_file_name): raise Sorry('{} is not a file'.format(sf_file_name))
  if not isfile(pdb_file_name): raise Sorry('{} is not a file'.format(pdb_file_name))
  # start processing file
  cs = crystal_symmetry_from_any.extract_from(pdb_file_name)
  base_array_info = miller.array_info(crystal_symmetry_from_file=cs)
  all_miller_arrays = cif.reader(file_path=sf_file_name).build_miller_arrays(base_array_info=base_array_info)
  #
  for (data_name, miller_arrays) in all_miller_arrays.iteritems():
    print data_name
    for ma in miller_arrays.values():
      print get_label(ma),ma

  print 'wait here'
Exemplo n.º 10
0
 def __init__(self, input, n_bins, lattice_symmetry_max_delta, completeness_as_non_anomalous=None):
     self.completeness_as_non_anomalous = completeness_as_non_anomalous
     self.input = input.eliminate_sys_absent(integral_only=True, log=sys.stdout)
     self.lattice_symmetry_max_delta = lattice_symmetry_max_delta
     if not self.input.is_unique_set_under_symmetry():
         print "Merging symmetry-equivalent reflections:"
         merged = self.input.merge_equivalents()
         merged.show_summary(prefix="  ")
         print
         self.input = merged.array()
         del merged
         if input.info() is not None:
             self.input.set_info(input.info().customized_copy(merged=True))
         else:
             self.input.set_info(miller.array_info(merged=True))
     self.input.show_comprehensive_summary()
     print
     self.input.setup_binner(n_bins=n_bins)
     self.resolution_range = self.input.resolution_range()
     self.change_of_basis_op_to_minimum_cell = self.input.change_of_basis_op_to_minimum_cell()
     self.observations = (
         self.input.change_basis(cb_op=self.change_of_basis_op_to_minimum_cell).expand_to_p1().map_to_asu()
     )
     if self.input.anomalous_flag():
         self.anom_diffs = (
             abs(self.input.anomalous_differences())
             .change_basis(cb_op=self.change_of_basis_op_to_minimum_cell)
             .expand_to_p1()
             .map_to_asu()
         )
     else:
         self.anom_diffs = None
     self.minimum_cell_symmetry = crystal.symmetry.change_basis(
         self.input, cb_op=self.change_of_basis_op_to_minimum_cell
     )
     self.intensity_symmetry = self.minimum_cell_symmetry.reflection_intensity_symmetry(
         anomalous_flag=self.input.anomalous_flag()
     )
     self.lattice_group = sgtbx.lattice_symmetry.group(
         self.minimum_cell_symmetry.unit_cell(), max_delta=self.lattice_symmetry_max_delta
     )
     self.lattice_group.expand_inv(sgtbx.tr_vec((0, 0, 0)))
     self.lattice_group.make_tidy()
     self.lattice_symmetry = crystal.symmetry(
         unit_cell=self.minimum_cell_symmetry.unit_cell(),
         space_group_info=sgtbx.space_group_info(group=self.lattice_group),
         assert_is_compatible_unit_cell=False,
     )
Exemplo n.º 11
0
 def batch_as_miller_array (self,
       crystal_symmetry=None,
       force_symmetry=False,
       base_array_info=None) :
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="xds_ascii")
   crystal_symmetry_from_file = self.crystal_symmetry()
   return miller.array(
     miller_set=self.miller_set(
         crystal_symmetry=crystal_symmetry,
         force_symmetry=force_symmetry),
     data=self.zd).set_info(
       base_array_info.customized_copy(
         labels=["ZD"],
         crystal_symmetry_from_file=crystal_symmetry_from_file,
         wavelength=self.wavelength))
Exemplo n.º 12
0
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None,
       include_unmerged_data=False,
       ):
   assert not include_unmerged_data, "Unmerged data not supported in MTZ"
   other_symmetry = crystal_symmetry
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="ccp4_mtz")
   result = []
   for crystal in self.crystals():
     try :
       unit_cell = crystal.unit_cell()
     except ValueError, e :
       raise Sorry(str(e))
     crystal_symmetry_from_file = cctbx.crystal.symmetry(
       unit_cell=unit_cell,
       space_group_info=self.space_group_info(),
       raise_sorry_if_incompatible_unit_cell=True)
     crystal_symmetry = crystal_symmetry_from_file.join_symmetry(
       other_symmetry=other_symmetry,
       force=force_symmetry)
     for dataset in crystal.datasets():
       base_dataset_info = base_array_info.customized_copy(
         wavelength=dataset.wavelength())
       column_groups = self.group_columns(
         crystal_symmetry_from_file=crystal_symmetry_from_file,
         crystal_symmetry=crystal_symmetry,
         base_array_info=base_dataset_info,
         dataset=dataset)
       for column_group in column_groups:
         if (merge_equivalents
             and isinstance(column_group.data(), flex.double)
             and isinstance(column_group.sigmas(), flex.double)
             and column_group.sigmas().size() != 0
             and flex.min(column_group.sigmas()) > 0):
           merged_column_group = column_group.merge_equivalents().array()
           if (merged_column_group.indices().size()
               != column_group.indices().size()):
             merged_column_group.set_info(
               column_group.info().customized_copy(merged=True))
             column_group = merged_column_group
         result.append(column_group)
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   if (crystal_symmetry is None):
     crystal_symmetry = crystal.symmetry()
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="cns_index_fobs_sigma")
   miller_set = miller.set(
     crystal_symmetry=crystal_symmetry,
     indices=self.indices()).auto_anomalous()
   return [miller.array(
     miller_set=miller_set,
     data=self.data(),
     sigmas=self.sigmas())
     .set_info(base_array_info.customized_copy(labels=self._names))
     .set_observation_type_xray_amplitude()]
 def batch_as_miller_array (self,
       crystal_symmetry=None,
       force_symmetry=False,
       base_array_info=None) :
   if (base_array_info is None):
     base_array_info = miller.array_info(
       source_type="scalepack_no_merge_original_index")
   crystal_symmetry_from_file = self.crystal_symmetry()
   crystal_symmetry = crystal_symmetry_from_file.join_symmetry(
     other_symmetry=crystal_symmetry,
     force=force_symmetry)
   return miller.array(
     miller_set=self.unmerged_miller_set(
       crystal_symmetry=crystal_symmetry,
       force_symmetry=True),
     data=self.batch_numbers).set_info(
       base_array_info.customized_copy(
         labels=["BATCH"],
         crystal_symmetry_from_file=crystal_symmetry_from_file))
Exemplo n.º 15
0
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   crystal_symmetry = self.crystal_symmetry(
     crystal_symmetry=crystal_symmetry,
     force_symmetry=force_symmetry)
   if (crystal_symmetry is None):
     crystal_symmetry = crystal.symmetry(
       unit_cell=None,
       space_group_info=None)
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="cns_reflection_file")
   result = []
   done = set()
   for group_index in xrange(len(self.groups)):
     names, miller_indices, hl = self.join_hl_group(group_index)
     result.append(self._as_miller_array(
       crystal_symmetry, miller_indices, hl).set_info(
         base_array_info.customized_copy(labels=names)))
     for name in names:
       done.add(name)
   real_arrays = {}
   for rso in self.reciprocal_space_objects.values():
     if (rso.name in done): continue
     if (not rso.is_real()): continue
     real_arrays[rso.name.lower()] = rso
   for obs,sigma,obs_type in group_obs_sigma(real_arrays):
     result.append(self._as_miller_array(
       crystal_symmetry, obs.indices,
       obs.real_data(), sigma.real_data(), obs_type).set_info(
         base_array_info.customized_copy(labels=[obs.name, sigma.name])))
     done.add(obs.name)
     done.add(sigma.name)
   for rso in self.reciprocal_space_objects.values():
     if (rso.name in done): continue
     result.append(self._as_miller_array(
       crystal_symmetry, rso.indices, rso.data).set_info(
         base_array_info.customized_copy(labels=[rso.name])))
     done.add(rso.name)
   return result
Exemplo n.º 16
0
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   if (crystal_symmetry is None):
     crystal_symmetry = crystal.symmetry()
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="solve_fpfm")
   miller_set = miller.set(
     crystal_symmetry=crystal_symmetry,
     indices=self.indices(),
     anomalous_flag=True)
   return [miller.array(
     miller_set=miller_set,
     data=self.data(),
     sigmas=self.sigmas())
     .set_info(base_array_info.customized_copy(
       labels=["fpfm", "sigma_fpfm"]))
     .set_observation_type_xray_amplitude()]
Exemplo n.º 17
0
 def as_miller_array(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="xds_ascii")
   crystal_symmetry_from_file = self.crystal_symmetry()
   array = (miller.array(
     miller_set=self.miller_set(
       crystal_symmetry=crystal_symmetry,
       force_symmetry=force_symmetry),
     data=self.iobs,
     sigmas=self.sigma_iobs)
     .set_info(base_array_info.customized_copy(
       labels=["iobs", "sigma_iobs"],
       crystal_symmetry_from_file=crystal_symmetry_from_file,
       wavelength=self.wavelength))
     .set_observation_type_xray_intensity())
   if (merge_equivalents) :
     info = array.info()
     info.merged = True
     array = array.merge_equivalents().array().set_info(info)
   return array
Exemplo n.º 18
0
 def as_miller_arrays(self,
       crystal_symmetry=None,
       force_symmetry=False,
       merge_equivalents=True,
       base_array_info=None):
   crystal_symmetry_from_file = self.crystal_symmetry()
   crystal_symmetry = crystal_symmetry_from_file.join_symmetry(
     other_symmetry=crystal_symmetry,
     force=force_symmetry)
   if (base_array_info is None):
     base_array_info = miller.array_info(source_type="dtrek_reflnlist")
   miller_arrays = []
   sigmas=self.column_dict["fSigmaI"]
   miller_arrays.append(miller.array(
     miller_set=miller.set(
       crystal_symmetry=crystal_symmetry,
       indices=self.miller_indices,
       anomalous_flag=False),
     data=self.column_dict["fIntensity"],
     sigmas=sigmas)
     .select(sigmas > 0)
     .set_info(base_array_info.customized_copy(
       labels=["Intensity", "SigmaI"],
       crystal_symmetry_from_file=crystal_symmetry_from_file))
     .set_observation_type_xray_intensity())
   if ("fIntensity+" in self.column_dict):
     assert "fSigmaI+" in self.column_dict
     assert "fIntensity-" in self.column_dict
     assert "fSigmaI-" in self.column_dict
     if (crystal_symmetry.space_group_info() is None):
       space_group = sgtbx.space_group() # P 1
     else:
       space_group = crystal_symmetry.space_group()
     ac = dtrek_ext.anomalous_combined(
       space_group,
       self.miller_indices,
       self.column_dict["fIntensity+"],
       self.column_dict["fSigmaI+"],
       self.column_dict["fIntensity-"],
       self.column_dict["fSigmaI-"])
     miller_arrays.append(miller.array(
       miller_set=miller.set(
         crystal_symmetry=crystal_symmetry,
         indices=ac.miller_indices(),
         anomalous_flag=True),
       data=ac.data(),
       sigmas=ac.sigmas())
       .set_info(base_array_info.customized_copy(
         labels=["Intensity+-", "SigmaI+-"],
         crystal_symmetry_from_file=crystal_symmetry_from_file))
       .set_observation_type_xray_intensity())
   for column_name in self.column_names:
     if (column_name in ("nH", "nK", "nL",
                         "fSigmaI", "fIntensity",
                         "fIntensity+", "fSigmaI+",
                         "fIntensity-", "fSigmaI-")):
       continue
     miller_arrays.append(miller.array(
       miller_set=miller.set(
         crystal_symmetry=crystal_symmetry,
         indices=self.miller_indices,
         anomalous_flag=False),
       data=self.column_dict[column_name])
       .set_info(base_array_info.customized_copy(
         labels=[column_name[1:]],
         crystal_symmetry_from_file=crystal_symmetry_from_file)))
   return miller_arrays
Exemplo n.º 19
0
 def i_obs(self, anomalous_flag=None):
     array_info = miller.array_info(source_type="xds_ascii")#, wavelength=)
     return miller.array(self.as_miller_set(anomalous_flag),
                         data=self.iobs, sigmas=self.sigma_iobs).set_info(array_info).set_observation_type_xray_intensity()
Exemplo n.º 20
0
def extract(file_name,
            crystal_symmetry,
            wavelength_id,
            crystal_id,
            show_details_if_error,
            output_r_free_label,
            merge_non_unique_under_symmetry,
            map_to_asu,
            remove_systematic_absences,
            incompatible_flags_to_work_set=False,
            ignore_bad_sigmas=False,
            extend_flags=False,
            return_as_miller_arrays=False,
            log=sys.stdout):
  import iotbx.cif
  from cctbx import miller
  base_array_info = miller.array_info(
    crystal_symmetry_from_file=crystal_symmetry)
  all_miller_arrays = iotbx.cif.reader(file_path=file_name).build_miller_arrays(
    base_array_info=base_array_info)
  if (len(all_miller_arrays) == 0) :
    raise Sorry("No data arrays were found in this CIF file.  Please make "+
      "sure that the file contains reflection data, rather than the refined "+
      "model.")
  column_labels = set()
  if (extend_flags) :
    map_to_asu = True
  def get_label(miller_array):
    label = None
    for l in miller_array.info().labels:
      if ('_meas' in l) :
        if miller_array.is_xray_amplitude_array():
          label = "FOBS"
        elif miller_array.is_xray_intensity_array():
          label = "IOBS"
        elif l.endswith(".phase_meas") :
          label = "PHIM"
        break
      elif ("_calc" in l) :
        if miller_array.is_xray_amplitude_array():
          label = "FC"
        elif miller_array.is_xray_intensity_array():
          label = "ICALC"
        elif l.endswith(".F_calc") :
          label = "FC"
        elif l.endswith(".phase_calc") :
          label = "PHIC"
        break
      elif miller_array.anomalous_flag() :
        if miller_array.is_xray_amplitude_array():
          label = "F"
        elif miller_array.is_xray_intensity_array():
          label = "I"
        break
      elif 'status' in l or '_free' in l:
        label = output_r_free_label
        break
      elif miller_array.is_hendrickson_lattman_array():
        label = "HL"
      elif (miller_array.is_complex_array()) :
        if (l.endswith("DELFWT")) :
          label = "DELFWT"
          break
        elif (l.endswith("FWT")) :
          label = "FWT"
          break
      elif (miller_array.is_real_array()) :
        if ("pdbx_anom_difference" in l) :
          label = "DANO"
          break
        elif (l.endswith(".fom")) :
          label = "FOM"
          break
    return label

  for (data_name, miller_arrays) in all_miller_arrays.iteritems():
    for ma in miller_arrays.values():
      other_symmetry = crystal_symmetry
      try:
        crystal_symmetry = other_symmetry.join_symmetry(
          other_symmetry=ma.crystal_symmetry(),
          force=True)
      except AssertionError, e:
        str_e = str(e)
        from cStringIO import StringIO
        s = StringIO()
        if "Space group is incompatible with unit cell parameters." in str_e:
          other_symmetry.show_summary(f=s)
          ma.crystal_symmetry().show_summary(f=s)
          str_e += "\n%s" %(s.getvalue())
          raise Sorry(str_e)
        else:
          raise
def exercise_miller_arrays_as_cif_block():
  from iotbx.cif import reader
  cif_model = reader(input_string=cif_miller_array,
                     builder=cif.builders.cif_model_builder()).model()
  ma_builder = cif.builders.miller_array_builder(cif_model['global'])
  ma1 = ma_builder.arrays()['_refln_F_squared_meas']
  mas_as_cif_block = cif.miller_arrays_as_cif_block(
    ma1, array_type='meas')
  mas_as_cif_block.add_miller_array(
    ma1.array(data=flex.complex_double([1-1j]*ma1.size())), array_type='calc')
  mas_as_cif_block.add_miller_array(
    ma1.array(data=flex.complex_double([1-2j]*ma1.size())), column_names=[
      '_refln_A_calc', '_refln_B_calc'])
  for key in ('_refln_F_squared_meas', '_refln_F_squared_sigma',
              '_refln_F_calc', '_refln_phase_calc',
              '_refln_A_calc', '_refln_A_calc'):
    assert key in mas_as_cif_block.cif_block.keys()
  #
  mas_as_cif_block = cif.miller_arrays_as_cif_block(
    ma1, array_type='meas', format="mmcif")
  mas_as_cif_block.add_miller_array(
    ma1.array(data=flex.complex_double([1-1j]*ma1.size())), array_type='calc')
  for key in ('_refln.F_squared_meas', '_refln.F_squared_sigma',
              '_refln.F_calc', '_refln.phase_calc',
              '_space_group_symop.operation_xyz',
              '_cell.length_a', '_refln.index_h'):
    assert key in mas_as_cif_block.cif_block.keys()
  #
  mas_as_cif_block = cif.miller_arrays_as_cif_block(
    ma1, column_names=['_diffrn_refln_intensity_net',
                       '_diffrn_refln_intensity_sigma'],
         miller_index_prefix='_diffrn_refln')
  mas_as_cif_block.add_miller_array(
    ma1.array(data=flex.std_string(ma1.size(), 'om')),
    column_name='_diffrn_refln_intensity_u')
  for key in ('_diffrn_refln_intensity_net', '_diffrn_refln_intensity_sigma',
              '_diffrn_refln_intensity_u'):
    assert key in mas_as_cif_block.cif_block.keys()
  #
  try: reader(input_string=cif_global)
  except CifParserError, e: pass
  else: raise Exception_expected
  cif_model = reader(input_string=cif_global, strict=False).model()
  assert not show_diff(str(cif_model), """\
data_1
_c                                3
_d                                4
""")
  # exercise adding miller arrays with non-matching indices
  cs = crystal.symmetry(unit_cell=uctbx.unit_cell((10, 10, 10, 90, 90, 90)),
                        space_group_info=sgtbx.space_group_info(symbol="P1"))
  mi = flex.miller_index(((1,0,0), (1,2,3), (2,3,4)))
  ms1 = miller.set(cs, mi)
  ma1 = miller.array(ms1, data=flex.double((1,2,3)))
  mas_as_cif_block = cif.miller_arrays_as_cif_block(
    ma1, column_name="_refln.F_meas_au")
  ms2 = miller.set(cs, mi[:2])
  ma2 = miller.array(ms2, data=flex.complex_double([1-2j]*ms2.size()))
  mas_as_cif_block.add_miller_array(
    ma2, column_names=("_refln.F_calc_au", "_refln.phase_calc")),
  ms3 = miller.set(cs, flex.miller_index(((1,0,0), (5,6,7), (2,3,4))))
  ma3 = miller.array(ms3, data=flex.double((4,5,6)))
  mas_as_cif_block.add_miller_array(ma3, column_name="_refln.F_squared_meas")
  ms4 = miller.set(cs, flex.miller_index(((1,2,3), (5,6,7), (1,1,1), (1,0,0), (2,3,4))))
  ma4 = ms4.d_spacings()
  mas_as_cif_block.add_miller_array(ma4, column_name="_refln.d_spacing")
  # extract arrays from cif block and make sure we get back what we started with
  arrays = cif.builders.miller_array_builder(mas_as_cif_block.cif_block).arrays()
  recycled_arrays = (arrays['_refln.F_meas_au'],
                     arrays['_refln.F_calc_au'],
                     arrays['_refln.F_squared_meas'],
                     arrays['_refln.d_spacing'])
  for orig, recycled in zip((ma1, ma2, ma3, ma4), recycled_arrays):
    assert orig.size() == recycled.size()
    recycled = recycled.customized_copy(anomalous_flag=orig.anomalous_flag())
    orig, recycled = orig.common_sets(recycled)
    assert orig.indices().all_eq(recycled.indices())
    assert approx_equal(orig.data(), recycled.data(), eps=1e-5)
  #
  cif_model = reader(input_string=r3adrsf,
                     builder=cif.builders.cif_model_builder()).model()
  cs = cif.builders.crystal_symmetry_builder(cif_model["r3adrsf"]).crystal_symmetry

  ma_builder = cif.builders.miller_array_builder(
    cif_model['r3adrAsf'],
    base_array_info=miller.array_info(crystal_symmetry_from_file=cs))
  miller_arrays = ma_builder.arrays().values()
  assert len(miller_arrays) == 4
  mas_as_cif_block = cif.miller_arrays_as_cif_block(
    miller_arrays[0].map_to_asu(), column_names=miller_arrays[0].info().labels)
  for array in miller_arrays[1:]:
    labels = array.info().labels
    if len(labels) > 1 :
      for label in labels :
        if label.startswith("wavelength_id") :
          labels.remove(label)
    mas_as_cif_block.add_miller_array(
      array=array.map_to_asu(), column_names=array.info().labels)
  s = StringIO()
  print >> s, mas_as_cif_block.refln_loop
  assert not show_diff(s.getvalue(), """\
loop_
  _refln_index_h
  _refln_index_k
  _refln_index_l
  _refln.crystal_id
  _refln.scale_group_code
  _refln.wavelength_id
  _refln.pdbx_I_plus
  _refln.pdbx_I_plus_sigma
  _refln.pdbx_I_minus
  _refln.pdbx_I_minus_sigma
  -87  5  46  1  1  3   40.2  40.4    6.7  63.9
  -87  5  45  1  1  3   47.8  29.7   35.1  30.5
  -87  5  44  1  1  3   18.1  33.2    0.5  34.6
  -87  5  43  1  1  3    6.1  45.4   12.9  51.6
  -87  5  42  1  1  3   -6.6  45.6  -15.5  55.8
  -87  7  37  1  1  3    6.3  43.4      ?     ?
  -87  7  36  1  1  3  -67.2  55.4      ?     ?
  -88  2  44  1  1  3      0    -1     35  38.5
  -88  2  43  1  1  3      0    -1   57.4  41.5
  -88  4  45  1  1  3     -1  46.1   -9.1  45.6
  -88  4  44  1  1  3  -19.8  49.2    0.3  34.7
  -88  6  44  1  1  3   -1.8  34.8      ?     ?

""")
Exemplo n.º 22
0
    def __init__(self, cif_block, base_array_info=None, wavelengths=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
        self._arrays = OrderedDict()
        if (wavelengths is None):
            wavelengths = {}
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in refln_loop.iteritems():
                # need to get these arrays first
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None:
                        continue
                    counts = data.counts()
                    if key.endswith('wavelength_id'):
                        wavelength_ids = counts.keys()
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = counts.keys()
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = counts.keys()
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = counts.keys()
            for label, value in sorted(refln_loop.items()):
                for w_id in wavelength_ids:
                    for crys_id in crystal_ids:
                        for scale_group in scale_groups:
                            if 'index_' in label: continue
                            key = label
                            labels = [label]
                            wavelength = None
                            if (key.endswith('wavelength_id')
                                    or key.endswith('crystal_id')
                                    or key.endswith('scale_group_code')):
                                w_id = None
                                crys_id = None
                                scale_group = None
                            key_suffix = ''
                            if w_id is not None:
                                key_suffix += '_%i' % w_id
                                labels.insert(0, "wavelength_id=%i" % w_id)
                                wavelength = wavelengths.get(w_id, None)
                            if crys_id is not None:
                                key_suffix += '_%i' % crys_id
                                labels.insert(0, "crystal_id=%i" % crys_id)
                            if scale_group is not None:
                                key_suffix += '_%i' % scale_group
                                labels.insert(
                                    0, "scale_group_code=%i" % scale_group)
                            key += key_suffix
                            sigmas = None
                            if key in self._arrays: continue
                            array = self.flex_std_string_as_miller_array(
                                value,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            if array is None: continue
                            if '_sigma' in key:
                                sigmas_label = label
                                key = None
                                for suffix in ('', '_meas', '_calc'):
                                    if sigmas_label.replace(
                                            '_sigma', suffix) in refln_loop:
                                        key = sigmas_label.replace(
                                            '_sigma', suffix) + key_suffix
                                        break
                                if key is None:
                                    key = sigmas_label + key_suffix
                                elif key in self._arrays and self._arrays[
                                        key].sigmas() is None:
                                    sigmas = array
                                    array = self._arrays[key]
                                    if (not check_array_sizes(
                                            array, sigmas, key, sigmas_label)):
                                        continue
                                    sigmas = as_flex_double(
                                        sigmas, sigmas_label)
                                    array.set_sigmas(sigmas.data())
                                    info = array.info()
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels +
                                            [sigmas_label],
                                            wavelength=wavelength))
                                    continue
                            elif 'PHWT' in key:
                                phwt_label = label
                                fwt_label = label.replace('PHWT', 'FWT')
                                if fwt_label not in refln_loop: continue
                                phwt_array = array
                                if fwt_label in self._arrays:
                                    array = self._arrays[fwt_label]
                                    if (not check_array_sizes(
                                            array, phwt_array, fwt_label,
                                            phwt_label)):
                                        continue
                                    phases = as_flex_double(
                                        phwt_array, phwt_label)
                                    info = array.info()
                                    array = array.phase_transfer(phases,
                                                                 deg=True)
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels + [phwt_label]))
                                    self._arrays[fwt_label] = array
                                    continue
                            elif 'HL_' in key:
                                hl_letter = key[key.find('HL_') + 3]
                                hl_key = 'HL_' + hl_letter
                                key = key.replace(hl_key, 'HL_A')
                                if key in self._arrays:
                                    continue  # this array is already dealt with
                                hl_labels = [
                                    label.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_keys = [
                                    key.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_values = [
                                    cif_block.get(hl_key)
                                    for hl_key in hl_labels
                                ]
                                if hl_values.count(None) == 0:
                                    selection = self.get_selection(
                                        hl_values[0],
                                        wavelength_id=w_id,
                                        crystal_id=crys_id,
                                        scale_group_code=scale_group)
                                    hl_values = [
                                        as_double_or_none_if_all_question_marks(
                                            hl.select(selection),
                                            column_name=lab)
                                        for hl, lab in zip(
                                            hl_values, hl_labels)
                                    ]
                                    array = miller.array(
                                        miller.set(
                                            self.crystal_symmetry,
                                            self.indices.select(
                                                selection)).auto_anomalous(),
                                        flex.hendrickson_lattman(*hl_values))
                                    labels = labels[:-1] + hl_labels
                            elif '.B_' in key or '_B_' in key:
                                if '.B_' in key:
                                    key, key_b = key.replace('.B_', '.A_'), key
                                    label, label_b = label.replace(
                                        '.B_', '.A_'), label
                                elif '_B_' in key:
                                    key, key_b = key.replace('_B', '_A'), key
                                    label, label_b = label.replace('_B',
                                                                   '_A'), label
                                if key in refln_loop and key_b in refln_loop:
                                    b_part = array.data()
                                    if key in self._arrays:
                                        info = self._arrays[key].info()
                                        a_part = self._arrays[key].data()
                                        self._arrays[key] = self._arrays[
                                            key].array(
                                                data=flex.complex_double(
                                                    a_part, b_part))
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels + [key_b]))
                                        continue
                            elif ('phase_' in key and not "_meas" in key
                                  and self.crystal_symmetry.space_group()
                                  is not None):
                                alt_key1 = label.replace('phase_', 'F_')
                                alt_key2 = alt_key1 + '_au'
                                if alt_key1 in refln_loop:
                                    phase_key = label
                                    key = alt_key1 + key_suffix
                                elif alt_key2 in refln_loop:
                                    phase_key = label
                                    key = alt_key2 + key_suffix
                                else:
                                    phase_key = None
                                if phase_key is not None:
                                    phases = array.data()
                                    if key in self._arrays:
                                        array = self._arrays[key]
                                        array = as_flex_double(array, key)
                                        if (not check_array_sizes(
                                                array, phases, key,
                                                phase_key)):
                                            continue
                                        info = self._arrays[key].info()
                                        self._arrays[
                                            key] = array.phase_transfer(
                                                phases, deg=True)
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels +
                                                [phase_key]))
                                    else:
                                        array = self.flex_std_string_as_miller_array(
                                            refln_loop[label],
                                            wavelength_id=w_id,
                                            crystal_id=crys_id,
                                            scale_group_code=scale_group)
                                        if (not check_array_sizes(
                                                array, phases, key,
                                                phase_key)):
                                            continue
                                        array.phase_transfer(phases, deg=True)
                                        labels = labels + [label, phase_key]
                            if base_array_info.labels is not None:
                                labels = base_array_info.labels + labels

                            def rstrip_substrings(string, substrings):
                                for substr in substrings:
                                    if substr == '': continue
                                    if string.endswith(substr):
                                        string = string[:-len(substr)]
                                return string

                            # determine observation type
                            stripped_key = rstrip_substrings(
                                key, [
                                    key_suffix, '_au', '_meas', '_calc',
                                    '_plus', '_minus'
                                ])
                            if (stripped_key.endswith('F_squared')
                                    or stripped_key.endswith('intensity')
                                    or stripped_key.endswith('.I')
                                    or stripped_key.endswith('_I')) and (
                                        array.is_real_array()
                                        or array.is_integer_array()):
                                array.set_observation_type_xray_intensity()
                            elif (stripped_key.endswith('F')
                                  and (array.is_real_array()
                                       or array.is_integer_array())):
                                array.set_observation_type_xray_amplitude()
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                # e.g. merge_equivalents treats integer arrays differently, so must
                                # convert integer observation arrays here to be safe
                                if isinstance(array.data(), flex.int):
                                    array = array.customized_copy(
                                        data=array.data().as_double())
                            array.set_info(
                                base_array_info.customized_copy(labels=labels))
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                info = array.info()
                                array.set_info(
                                    info.customized_copy(
                                        wavelength=wavelength))
                            self._arrays.setdefault(key, array)
        for key, array in self._arrays.copy().iteritems():
            if (key.endswith('_minus') or '_minus_' in key
                    or key.endswith('_plus') or '_plus_' in key):
                if '_minus' in key:
                    minus_key = key
                    plus_key = key.replace('_minus', '_plus')
                elif '_plus' in key:
                    plus_key = key
                    minus_key = key.replace('_plus', '_minus')
                if plus_key in self._arrays and minus_key in self._arrays:
                    plus_array = self._arrays.pop(plus_key)
                    minus_array = self._arrays.pop(minus_key)
                    minus_array = minus_array.customized_copy(
                        indices=-minus_array.indices()).set_info(
                            minus_array.info())
                    array = plus_array.concatenate(
                        minus_array, assert_is_similar_symmetry=False)
                    array = array.customized_copy(anomalous_flag=True)
                    array.set_info(
                        minus_array.info().customized_copy(labels=list(
                            OrderedSet(plus_array.info().labels +
                                       minus_array.info().labels))))
                    array.set_observation_type(plus_array.observation_type())
                    self._arrays.setdefault(key, array)

        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")
Exemplo n.º 23
0
def run(args):

    parser = OptionParser(
        #usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_datablocks=False,
        read_experiments=True,
        check_format=False,
        #epilog=help_message
    )

    params, options, args = parser.parse_args(show_diff_phil=False,
                                              return_unhandled=True)

    # Configure the logging
    log.config(params.verbosity,
               info=params.output.log,
               debug=params.output.debug_log)

    from dials.util.version import dials_version
    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil is not '':
        logger.info('The following parameters have been modified:\n')
        logger.info(diff_phil)

    datasets_input = []

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) or len(reflections):
        if len(reflections) == 1:
            reflections_input = reflections[0]
            reflections = []
            for i in range(len(experiments)):
                reflections.append(
                    reflections_input.select(reflections_input['id'] == i))

        assert len(experiments) == len(reflections)

        from cctbx import crystal, miller
        for expt, refl in zip(experiments, reflections):
            crystal_symmetry = crystal.symmetry(
                unit_cell=expt.crystal.get_unit_cell(),
                space_group=expt.crystal.get_space_group())

            # filtering of intensities similar to that done in export_mtz
            # FIXME this function should be renamed/moved elsewhere
            from dials.util.export_mtz import _apply_data_filters
            refl = _apply_data_filters(refl,
                                       ignore_profile_fitting=False,
                                       filter_ice_rings=False,
                                       min_isigi=-5,
                                       include_partials=False,
                                       keep_partials=False,
                                       scale_partials=True)

            assert 'intensity.sum.value' in refl
            sel = refl.get_flags(refl.flags.integrated_sum)
            data = refl['intensity.sum.value']
            variances = refl['intensity.sum.variance']
            if 'intensity.prf.value' in refl:
                prf_sel = refl.get_flags(refl.flags.integrated_prf)
                data.set_selected(prf_sel, refl['intensity.prf.value'])
                variances.set_selected(prf_sel, refl['intensity.prf.variance'])
                sel |= prf_sel
            refl = refl.select(sel)
            data = data.select(sel)
            variances = variances.select(sel)

            if 'lp' in refl and 'qe' in refl:
                lp = refl['lp']
                qe = refl['qe']
                assert qe.all_gt(0)
                scale = lp / qe
                data *= scale
                variances *= (flex.pow2(scale))
            miller_indices = refl['miller_index']
            assert variances.all_gt(0)
            sigmas = flex.sqrt(variances)

            miller_set = miller.set(crystal_symmetry,
                                    miller_indices,
                                    anomalous_flag=True)
            intensities = miller.array(miller_set, data=data, sigmas=sigmas)
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source='DIALS', source_type='pickle'))
            datasets_input.append(intensities)

    files = args
    for file_name in files:
        reader = any_reflection_file(file_name)
        assert reader.file_type() == 'ccp4_mtz'

        as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False)
        intensities_prf = [
            ma for ma in as_miller_arrays
            if ma.info().labels == ['IPR', 'SIGIPR']
        ]
        intensities_sum = [
            ma for ma in as_miller_arrays if ma.info().labels == ['I', 'SIGI']
        ]
        if len(intensities_prf):
            intensities = intensities_prf[0]
        else:
            assert len(intensities_sum), 'No intensities found in input file.'
            intensities = intensities_sum[0]
        batches = [
            ma for ma in as_miller_arrays if ma.info().labels == ['BATCH']
        ]
        if len(batches):
            batches = batches[0]
        else:
            batches = None
        mtz_object = reader.file_content()
        intensities = intensities.customized_copy(
            anomalous_flag=True,
            indices=mtz_object.extract_original_index_miller_indices(
            )).set_info(intensities.info())

        intensities.set_observation_type_xray_intensity()
        if params.batch is not None:
            assert batches is not None
            bmin, bmax = params.batch
            assert bmax >= bmin
            sel = (batches.data() >= bmin) & (batches.data() <= bmax)
            assert sel.count(True) > 0
            intensities = intensities.select(sel)

        datasets_input.append(intensities)

    datasets = datasets_input
    assert len(datasets) == 1
    result = determine_space_group(
        datasets[0],
        normalisation=params.normalisation,
        d_min=params.d_min,
        min_i_mean_over_sigma_mean=params.min_i_mean_over_sigma_mean)

    if (len(experiments) and len(reflections)
            and params.output.reflections is not None
            and params.output.experiments is not None):
        from dxtbx.serialize import dump
        from rstbx.symmetry.constraints import parameter_reduction
        reindexed_experiments = copy.deepcopy(experiments)
        reindexed_reflections = copy.deepcopy(reflections[0])
        cb_op_inp_best = result.best_solution.subgroup[
            'cb_op_inp_best'] * result.cb_op_inp_min
        best_subsym = result.best_solution.subgroup['best_subsym']
        for expt in reindexed_experiments:
            expt.crystal = expt.crystal.change_basis(cb_op_inp_best)
            expt.crystal.set_space_group(
                best_subsym.space_group().build_derived_acentric_group())
            S = parameter_reduction.symmetrize_reduce_enlarge(
                expt.crystal.get_space_group())
            S.set_orientation(expt.crystal.get_B())
            S.symmetrize()
            expt.crystal.set_B(S.orientation.reciprocal_matrix())
            reindexed_reflections['miller_index'] = cb_op_inp_best.apply(
                reindexed_reflections['miller_index'])
        logger.info('Saving reindexed experiments to %s' %
                    params.output.experiments)
        dump.experiment_list(reindexed_experiments, params.output.experiments)
        logger.info('Saving reindexed reflections to %s' %
                    params.output.reflections)
        reindexed_reflections.as_pickle(params.output.reflections)

    elif params.output.suffix is not None:
        cb_op_inp_best = result.best_solution.subgroup[
            'cb_op_inp_best'] * result.cb_op_inp_min
        best_subsym = result.best_solution.subgroup['best_subsym']
        space_group = best_subsym.space_group().build_derived_acentric_group()
        for file_name in files:
            basename = os.path.basename(file_name)
            out_name = os.path.splitext(
                basename)[0] + params.output.suffix + ".mtz"
            reader = any_reflection_file(file_name)
            assert reader.file_type() == 'ccp4_mtz'
            mtz_object = reader.file_content()
            if not cb_op_inp_best.is_identity_op():
                mtz_object.change_basis_in_place(cb_op_inp_best)
            mtz_object.set_space_group_info(space_group.info())
            for crystal in mtz_object.crystals():
                crystal.set_unit_cell_parameters(
                    best_subsym.unit_cell().parameters())
            mtz_object.write(out_name)
            logger.info('Saving reindexed reflections to %s' % out_name)
Exemplo n.º 24
0
  def __init__(self, cif_block, base_array_info=None, wavelengths=None):
    crystal_symmetry_builder.__init__(self, cif_block)
    if base_array_info is not None:
      self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
        other_symmetry=base_array_info.crystal_symmetry_from_file,
      force=True)
    self._arrays = OrderedDict()
    if (wavelengths is None) :
      wavelengths = {}
    if base_array_info is None:
      base_array_info = miller.array_info(source_type="cif")
    refln_containing_loops = self.get_miller_indices_containing_loops()
    for self.indices, refln_loop in refln_containing_loops:
      self.wavelength_id_array = None
      self.crystal_id_array = None
      self.scale_group_array = None
      wavelength_ids = [None]
      crystal_ids = [None]
      scale_groups = [None]
      for key, value in refln_loop.iteritems():
        # need to get these arrays first
        if (key.endswith('wavelength_id') or
            key.endswith('crystal_id') or
            key.endswith('scale_group_code')):
          data = as_int_or_none_if_all_question_marks(value, column_name=key)
          if data is None:
            continue
          counts = data.counts()
          if key.endswith('wavelength_id'):
            wavelength_ids = counts.keys()
          if len(counts) == 1: continue
          array = miller.array(
            miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data)
          if key.endswith('wavelength_id'):
            self.wavelength_id_array = array
            wavelength_ids = counts.keys()
          elif key.endswith('crystal_id'):
            self.crystal_id_array = array
            crystal_ids = counts.keys()
          elif key.endswith('scale_group_code'):
            self.scale_group_array = array
            scale_groups = counts.keys()
      for label, value in sorted(refln_loop.items()):
        for w_id in wavelength_ids:
          for crys_id in crystal_ids:
            for scale_group in scale_groups:
              if 'index_' in label: continue
              key = label
              labels = [label]
              wavelength = None
              if (key.endswith('wavelength_id') or
                    key.endswith('crystal_id') or
                    key.endswith('scale_group_code')):
                w_id = None
                crys_id = None
                scale_group = None
              key_suffix = ''
              if w_id is not None:
                key_suffix += '_%i' %w_id
                labels.insert(0, "wavelength_id=%i" %w_id)
                wavelength = wavelengths.get(w_id, None)
              if crys_id is not None:
                key_suffix += '_%i' %crys_id
                labels.insert(0, "crystal_id=%i" %crys_id)
              if scale_group is not None:
                key_suffix += '_%i' %scale_group
                labels.insert(0, "scale_group_code=%i" %scale_group)
              key += key_suffix
              sigmas = None
              if key in self._arrays: continue
              array = self.flex_std_string_as_miller_array(
                value, wavelength_id=w_id, crystal_id=crys_id,
                scale_group_code=scale_group)
              if array is None: continue
              if '_sigma' in key:
                sigmas_label = label
                key = None
                for suffix in ('', '_meas', '_calc'):
                  if sigmas_label.replace('_sigma', suffix) in refln_loop:
                    key = sigmas_label.replace('_sigma', suffix) + key_suffix
                    break
                if key is None:
                  key = sigmas_label + key_suffix
                elif key in self._arrays and self._arrays[key].sigmas() is None:
                  sigmas = array
                  array = self._arrays[key]
                  check_array_sizes(array, sigmas, key, sigmas_label)
                  sigmas = as_flex_double(sigmas, sigmas_label)
                  array.set_sigmas(sigmas.data())
                  info = array.info()
                  array.set_info(
                    info.customized_copy(labels=info.labels+[sigmas_label],
                      wavelength=wavelength))
                  continue
              elif 'PHWT' in key:
                phwt_label = label
                fwt_label = label.replace('PHWT', 'FWT')
                if fwt_label not in refln_loop: continue
                phwt_array = array
                if fwt_label in self._arrays:
                  array = self._arrays[fwt_label]
                  check_array_sizes(array, phwt_array, fwt_label, phwt_label)
                  phases = as_flex_double(phwt_array, phwt_label)
                  info = array.info()
                  array = array.phase_transfer(phases, deg=True)
                  array.set_info(
                    info.customized_copy(labels=info.labels+[phwt_label]))
                  self._arrays[fwt_label] = array
                  continue
              elif 'HL_' in key:
                hl_letter = key[key.find('HL_')+3]
                hl_key = 'HL_' + hl_letter
                key = key.replace(hl_key, 'HL_A')
                if key in self._arrays:
                  continue # this array is already dealt with
                hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_values = [cif_block.get(hl_key) for hl_key in hl_labels]
                if hl_values.count(None) == 0:
                  selection = self.get_selection(
                    hl_values[0], wavelength_id=w_id,
                    crystal_id=crys_id, scale_group_code=scale_group)
                  hl_values = [as_double_or_none_if_all_question_marks(
                    hl.select(selection), column_name=lab)
                               for hl, lab in zip(hl_values, hl_labels)]
                  array = miller.array(miller.set(
                    self.crystal_symmetry, self.indices.select(selection)
                    ).auto_anomalous(), flex.hendrickson_lattman(*hl_values))
                  labels = labels[:-1]+hl_labels
              elif '.B_' in key or '_B_' in key:
                if '.B_' in key:
                  key, key_b = key.replace('.B_', '.A_'), key
                  label, label_b = label.replace('.B_', '.A_'), label
                elif '_B_' in key:
                  key, key_b = key.replace('_B', '_A'), key
                  label, label_b = label.replace('_B', '_A'), label
                if key in refln_loop and key_b in refln_loop:
                  b_part = array.data()
                  if key in self._arrays:
                    info = self._arrays[key].info()
                    a_part = self._arrays[key].data()
                    self._arrays[key] = self._arrays[key].array(
                      data=flex.complex_double(a_part, b_part))
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[key_b]))
                    continue
              elif ('phase_' in key and not "_meas" in key and
                    self.crystal_symmetry.space_group() is not None):
                alt_key1 = label.replace('phase_', 'F_')
                alt_key2 = alt_key1 + '_au'
                if alt_key1 in refln_loop:
                  phase_key = label
                  key = alt_key1+key_suffix
                elif alt_key2 in refln_loop:
                  phase_key = label
                  key = alt_key2+key_suffix
                else: phase_key = None
                if phase_key is not None:
                  phases = array.data()
                  if key in self._arrays:
                    array = self._arrays[key]
                    array = as_flex_double(array, key)
                    check_array_sizes(array, phases, key, phase_key)
                    info = self._arrays[key].info()
                    self._arrays[key] = array.phase_transfer(phases, deg=True)
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[phase_key]))
                  else:
                    array = self.flex_std_string_as_miller_array(
                      refln_loop[label], wavelength_id=w_id, crystal_id=crys_id,
                      scale_group_code=scale_group)
                    check_array_sizes(array, phases, key, phase_key)
                    array.phase_transfer(phases, deg=True)
                    labels = labels+[label, phase_key]
              if base_array_info.labels is not None:
                labels = base_array_info.labels + labels
              def rstrip_substrings(string, substrings):
                for substr in substrings:
                  if substr == '': continue
                  if string.endswith(substr):
                    string = string[:-len(substr)]
                return string
              # determine observation type
              stripped_key = rstrip_substrings(
                key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus'])
              if (stripped_key.endswith('F_squared') or
                  stripped_key.endswith('intensity') or
                  stripped_key.endswith('.I') or
                  stripped_key.endswith('_I')) and (
                    array.is_real_array() or array.is_integer_array()):
                array.set_observation_type_xray_intensity()
              elif (stripped_key.endswith('F') and (
                array.is_real_array() or array.is_integer_array())):
                array.set_observation_type_xray_amplitude()
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                # e.g. merge_equivalents treats integer arrays differently, so must
                # convert integer observation arrays here to be safe
                if isinstance(array.data(), flex.int):
                  array = array.customized_copy(data=array.data().as_double())
              array.set_info(base_array_info.customized_copy(labels=labels))
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                info = array.info()
                array.set_info(info.customized_copy(wavelength=wavelength))
              self._arrays.setdefault(key, array)
    for key, array in self._arrays.copy().iteritems():
      if (   key.endswith('_minus') or '_minus_' in key
          or key.endswith('_plus') or '_plus_' in key):
        if '_minus' in key:
          minus_key = key
          plus_key = key.replace('_minus', '_plus')
        elif '_plus' in key:
          plus_key = key
          minus_key = key.replace('_plus', '_minus')
        if plus_key in self._arrays and minus_key in self._arrays:
          plus_array = self._arrays.pop(plus_key)
          minus_array = self._arrays.pop(minus_key)
          minus_array = minus_array.customized_copy(
            indices=-minus_array.indices()).set_info(minus_array.info())
          array = plus_array.concatenate(
            minus_array, assert_is_similar_symmetry=False)
          array = array.customized_copy(anomalous_flag=True)
          array.set_info(minus_array.info().customized_copy(
            labels=list(
              OrderedSet(plus_array.info().labels+minus_array.info().labels))))
          array.set_observation_type(plus_array.observation_type())
          self._arrays.setdefault(key, array)

    if len(self._arrays) == 0:
      raise CifBuilderError("No reflection data present in cif block")
Exemplo n.º 25
0
 def __init__(self, cif_block, base_array_info=None, wavelengths=None):
     crystal_symmetry_builder.__init__(self, cif_block)
     self._arrays = OrderedDict()
     self._origarrays = OrderedDict(
     )  # used for presenting raw data tables in HKLviewer
     basearraylabels = []
     if base_array_info is not None:
         self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
             other_symmetry=base_array_info.crystal_symmetry_from_file,
             force=True)
         if base_array_info.labels:
             basearraylabels = base_array_info.labels
     if (wavelengths is None):
         wavelengths = {}
     if base_array_info is None:
         base_array_info = miller.array_info(source_type="cif")
     refln_containing_loops = self.get_miller_indices_containing_loops()
     for self.indices, refln_loop in refln_containing_loops:
         self.wavelength_id_array = None
         self.crystal_id_array = None
         self.scale_group_array = None
         wavelength_ids = [None]
         crystal_ids = [None]
         scale_groups = [None]
         for key, value in six.iteritems(refln_loop):
             # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other
             # columns in self.get_selection() used by self.flex_std_string_as_miller_array()
             if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                     or key.endswith('scale_group_code')):
                 data = as_int_or_none_if_all_question_marks(
                     value, column_name=key)
                 if data is None:
                     continue
                 counts = data.counts()
                 if key.endswith('wavelength_id'):
                     wavelength_ids = list(counts.keys())
                 if len(counts) == 1: continue
                 array = miller.array(
                     miller.set(self.crystal_symmetry,
                                self.indices).auto_anomalous(), data)
                 if key.endswith('wavelength_id'):
                     self.wavelength_id_array = array
                     wavelength_ids = list(counts.keys())
                 elif key.endswith('crystal_id'):
                     self.crystal_id_array = array
                     crystal_ids = list(counts.keys())
                 elif key.endswith('scale_group_code'):
                     self.scale_group_array = array
                     scale_groups = list(counts.keys())
         labelsuffix = []
         wavelbl = []
         cryslbl = []
         scalegrplbl = []
         self._origarrays["HKLs"] = self.indices
         alllabels = list(sorted(refln_loop.keys()))
         remaininglabls = alllabels[:]  # deep copy the list
         # Parse labels matching cif column conventions
         # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html
         # and extract groups of labels or just single columns.
         # Groups corresponds to the map coefficients, phase and amplitudes,
         # amplitudes or intensities with sigmas and hendrickson-lattman columns.
         phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(
             remaininglabls)
         mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(
             remaininglabls)
         HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
         data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(
             remaininglabls)
         for w_id in wavelength_ids:
             for crys_id in crystal_ids:
                 for scale_group in scale_groups:
                     # If reflection data files contain more than one crystal, wavelength or scalegroup
                     # then add their id(s) as a suffix to data labels computed below. Needed for avoiding
                     # ambuguity but avoid when not needed to make labels more human readable!
                     if (len(wavelength_ids) > 1
                             or len(wavelengths) > 1) and w_id is not None:
                         wavelbl = ["wavelength_id=%i" % w_id]
                     if len(crystal_ids) > 1 and crys_id is not None:
                         cryslbl = ["crystal_id=%i" % crys_id]
                     if len(scale_groups) > 1 and scale_group is not None:
                         scalegrplbl = ["scale_group_code=%i" % scale_group]
                     labelsuffix = scalegrplbl + cryslbl + wavelbl
                     jlablsufx = ""
                     if len(labelsuffix):
                         jlablsufx = "," + ",".join(labelsuffix)
                     for mapcoefflabl in mapcoefflabls:
                         A_array = refln_loop[mapcoefflabl[0]]
                         B_array = refln_loop[mapcoefflabl[1]]
                         # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks
                         selection = self.get_selection(
                             A_array,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         A_array = A_array.select(selection)
                         B_array = B_array.select(selection)
                         # form the miller array with map coefficients
                         data = flex.complex_double(flex.double(A_array),
                                                    flex.double(B_array))
                         millarr = miller.array(
                             miller.set(self.crystal_symmetry,
                                        self.indices.select(
                                            selection)).auto_anomalous(),
                             data)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None: continue
                         labl = basearraylabels + mapcoefflabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=labl,
                                 wavelength=wavelengths.get(w_id, None)))
                         self._arrays[mapcoefflabl[0] + jlablsufx] = millarr
                     for phaseamplabl in phaseamplabls:
                         amplitudestrarray = refln_loop[phaseamplabl[0]]
                         phasestrarray = refln_loop[phaseamplabl[1]]
                         millarr = self.flex_std_string_as_miller_array(
                             amplitudestrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         phasesmillarr = self.flex_std_string_as_miller_array(
                             phasestrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None or phasesmillarr is None:
                             continue
                         phases = as_flex_double(phasesmillarr,
                                                 phaseamplabl[1])
                         millarr = millarr.phase_transfer(phases, deg=True)
                         labl = basearraylabels + phaseamplabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=labl,
                                 wavelength=wavelengths.get(w_id, None)))
                         self._arrays[phaseamplabl[0] + jlablsufx] = millarr
                     for datlabl, siglabl, otype in data_sig_obstype_labls:
                         datastrarray = refln_loop[datlabl]
                         millarr = self.flex_std_string_as_miller_array(
                             datastrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None: continue
                         millarr = as_flex_double(millarr, datlabl)
                         datsiglabl = [datlabl]
                         if siglabl:
                             sigmasstrarray = refln_loop[siglabl]
                             sigmas = self.flex_std_string_as_miller_array(
                                 sigmasstrarray,
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             sigmas = as_flex_double(sigmas, siglabl)
                             millarr.set_sigmas(sigmas.data())
                             datsiglabl = [datlabl, siglabl]
                         datsiglabl = basearraylabels + datsiglabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=datsiglabl,
                                 wavelength=wavelengths.get(w_id, None)))
                         if otype is not None:
                             millarr.set_observation_type(otype)
                         self._arrays[datlabl + jlablsufx] = millarr
                     for hl_labels in HLcoefflabls:
                         hl_values = [
                             cif_block.get(hl_key) for hl_key in hl_labels
                         ]
                         if hl_values.count(None) == 0:
                             selection = self.get_selection(
                                 hl_values[0],
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             hl_values = [
                                 as_double_or_none_if_all_question_marks(
                                     hl.select(selection), column_name=lab)
                                 for hl, lab in zip(hl_values, hl_labels)
                             ]
                             # hl_values will be None for column data not matching w_id,crys_id,scale_group values
                             if hl_values == [None, None, None, None]:
                                 continue
                             millarr = miller.array(
                                 miller.set(
                                     self.crystal_symmetry,
                                     self.indices.select(
                                         selection)).auto_anomalous(),
                                 flex.hendrickson_lattman(*hl_values))
                             hlabels = basearraylabels + hl_labels + labelsuffix
                             millarr.set_info(
                                 base_array_info.customized_copy(
                                     labels=hlabels,
                                     wavelength=wavelengths.get(w_id,
                                                                None)))
                             self._arrays[hl_labels[0] +
                                          jlablsufx] = millarr
                     # pick up remaining columns if any that weren't identified above
                     for label in alllabels:
                         if "index_" in label:
                             continue
                         datastrarray = refln_loop[label]
                         if label in remaininglabls:
                             labels = basearraylabels + [label
                                                         ] + labelsuffix
                             lablsufx = jlablsufx
                             millarr = self.flex_std_string_as_miller_array(
                                 datastrarray,
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             # millarr will be None for column data not matching w_id,crys_id,scale_group values
                             if (label.endswith(
                                     'wavelength_id'
                             ) or label.endswith(
                                     'crystal_id'
                             ) or  # get full array if any of these labels, not just subsets
                                     label.endswith('scale_group_code')):
                                 millarr = self.flex_std_string_as_miller_array(
                                     datastrarray,
                                     wavelength_id=None,
                                     crystal_id=None,
                                     scale_group_code=None)
                                 lablsufx = ""
                                 labels = basearraylabels + [label]
                             if millarr is None: continue
                             otype = self.guess_observationtype(label)
                             if otype is not None:
                                 millarr.set_observation_type(otype)
                             millarr.set_info(
                                 base_array_info.customized_copy(
                                     labels=labels,
                                     wavelength=wavelengths.get(w_id,
                                                                None)))
                             self._arrays[label + lablsufx] = millarr
                         origarr = self.flex_std_string_as_miller_array(
                             datastrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         newlabel = label.replace("_refln.", "")
                         newlabel2 = newlabel.replace("_refln_", "")
                         if origarr:  # want only genuine miller arrays
                             self._origarrays[newlabel2 +
                                              jlablsufx] = origarr.data()
     # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays
     # i.e. both friedel mates in the same array
     for key, array in six.iteritems(self._arrays.copy()):
         plus_key = ""
         if '_minus' in key:
             minus_key = key
             plus_key = key.replace('_minus', '_plus')
         elif '-' in key:
             minus_key = key
             plus_key = key.replace('-', '+')
         elif '_plus' in key:
             plus_key = key
             minus_key = key.replace('_plus', '_minus')
         elif '+' in key:
             plus_key = key
             minus_key = key.replace('+', '-')
         if plus_key in self._arrays and minus_key in self._arrays:
             plus_array = self._arrays.pop(plus_key)
             minus_array = self._arrays.pop(minus_key)
             minus_array = minus_array.customized_copy(
                 indices=-minus_array.indices()).set_info(
                     minus_array.info())
             array = plus_array.concatenate(
                 minus_array, assert_is_similar_symmetry=False)
             array = array.customized_copy(anomalous_flag=True)
             array.set_info(minus_array.info().customized_copy(labels=list(
                 OrderedSet(plus_array.info().labels +
                            minus_array.info().labels))))
             array.set_observation_type(plus_array.observation_type())
             self._arrays.setdefault(key, array)
     if len(self._arrays) == 0:
         raise CifBuilderError("No reflection data present in cif block")
     # Sort the ordered dictionary to resemble the order of columns in the cif file
     # This is to avoid any F_meas arrays accidentally being put adjacent to
     # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these
     # arrays may unintentionally be combined into a reconstructed anomalous amplitude
     # array when saving as an mtz file due to a problem in the iotbx/mtz module.
     # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html
     arrlstord = []
     arrlst = list(self._arrays)
     for arr in arrlst:
         for i, k in enumerate(refln_loop.keys()):
             if arr.split(",")[0] == k:
                 arrlstord.append((arr, i))
     # arrlstord must have the same keys as in the self._arrays dictionary
     assert sorted(arrlst) == sorted([e[0] for e in arrlstord])
     sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1])
     self._ordarrays = OrderedDict()
     for sortkey, i in sortarrlst:
         self._ordarrays.setdefault(sortkey, self._arrays[sortkey])
     self._arrays = self._ordarrays
Exemplo n.º 26
0
 def get_solve_map(self):
   map_coeffs = self._convert_amplitudes_and_phases(f_label="FP",
     phi_label="PHIB", fom_label="FOM", weighted=True)
   from cctbx.miller import array_info
   info = array_info(labels=["FWT","PHWT"])
   return map_coeffs.set_info(info)
Exemplo n.º 27
0
def list_6_as_miller_arrays(file_name):
  """ Read the file of given name and return a pair of miller arrays
  (F_obs^2, F_cal) """
  # potentially iotbx.cif could be used here
  fcf = iter(open(file_name))
  space_group = sgtbx.space_group()
  unit_cell_params = {}
  indices = flex.miller_index()
  f_obs_squares = flex.double()
  sigma_f_obs_squares = flex.double()
  f_calc_amplitudes = flex.double()
  f_calc_phases = flex.double()
  for li in fcf:
    if li.startswith('loop_'):
      for li in fcf:
        li = li.strip()
        if li == '_symmetry_equiv_pos_as_xyz':
          for li in fcf:
            li = li.strip()
            if not li: break
            space_group.expand_smx(li[1:-1])
        else:
          for i in xrange(6): fcf.next()
          for li in fcf:
            items = li.split()
            if not items: break
            h,k,l, fo, sig_fo, fc, phase = items
            indices.append((int(h), int(k), int(l)))
            f_obs_squares.append(float(fo))
            sigma_f_obs_squares.append(float(sig_fo))
            f_calc_amplitudes.append(float(fc))
            f_calc_phases.append(float(phase))
        if not li: break
    elif li.startswith('_cell'):
      lbl, value = li.split()
      unit_cell_params[lbl] = float(value)

  unit_cell = uctbx.unit_cell(
    [ unit_cell_params[p]
      for p in ( "_cell_length_a","_cell_length_b","_cell_length_c",
                 "_cell_angle_alpha","_cell_angle_beta","_cell_angle_gamma" )
    ])
  crystal_symmetry = crystal.symmetry(
    unit_cell=unit_cell,
    space_group=space_group)
  f_calc_phases *= pi/180
  f_calc = flex.complex_double(
    reals=f_calc_amplitudes * flex.cos(f_calc_phases),
    imags=f_calc_amplitudes * flex.sin(f_calc_phases) )
  miller_set = miller.set(
      crystal_symmetry=crystal_symmetry,
      indices=indices).auto_anomalous()
  f_obs_squares = miller.array(
    miller_set=miller_set,
    data=f_obs_squares,
    sigmas=sigma_f_obs_squares)
  f_obs_squares.set_observation_type_xray_intensity()
  f_obs_squares.set_info(miller.array_info(
    source=file_name,
    labels=["F_squared_meas", "F_squared_sigma"]))
  f_calc = miller.array(
    miller_set=miller_set,
    data=f_calc)
  f_obs_squares.set_info(miller.array_info(
    source=file_name,
    labels=["F_calc"]))
  return f_obs_squares, f_calc
Exemplo n.º 28
0
def extract(file_name,
            crystal_symmetry,
            wavelength_id,
            crystal_id,
            show_details_if_error,
            output_r_free_label,
            merge_non_unique_under_symmetry,
            map_to_asu,
            remove_systematic_absences,
            incompatible_flags_to_work_set=False,
            ignore_bad_sigmas=False,
            extend_flags=False,
            return_as_miller_arrays=False,
            log=sys.stdout):
    import iotbx.cif
    from cctbx import miller
    base_array_info = miller.array_info(
        crystal_symmetry_from_file=crystal_symmetry)
    all_miller_arrays = iotbx.cif.reader(
        file_path=file_name).build_miller_arrays(
            base_array_info=base_array_info)
    if (len(all_miller_arrays) == 0):
        raise Sorry(
            "No data arrays were found in this CIF file.  Please make " +
            "sure that the file contains reflection data, rather than the refined "
            + "model.")
    column_labels = set()
    if (extend_flags):
        map_to_asu = True

    def get_label(miller_array):
        label = None
        for l in miller_array.info().labels:
            if ('_meas' in l):
                if miller_array.is_xray_amplitude_array():
                    label = "FOBS"
                elif miller_array.is_xray_intensity_array():
                    label = "IOBS"
                elif l.endswith(".phase_meas"):
                    label = "PHIM"
                break
            elif ("_calc" in l):
                if miller_array.is_xray_amplitude_array():
                    label = "FC"
                elif miller_array.is_xray_intensity_array():
                    label = "ICALC"
                elif l.endswith(".F_calc"):
                    label = "FC"
                elif l.endswith(".phase_calc"):
                    label = "PHIC"
                break
            elif miller_array.anomalous_flag():
                if miller_array.is_xray_amplitude_array():
                    label = "F"
                elif miller_array.is_xray_intensity_array():
                    label = "I"
                break
            elif 'status' in l or '_free' in l:
                label = output_r_free_label
                break
            elif miller_array.is_hendrickson_lattman_array():
                label = "HL"
            elif (miller_array.is_complex_array()):
                if (l.endswith("DELFWT")):
                    label = "DELFWT"
                    break
                elif (l.endswith("FWT")):
                    label = "FWT"
                    break
            elif (miller_array.is_real_array()):
                if ("pdbx_anom_difference" in l):
                    label = "DANO"
                    break
                elif (l.endswith(".fom")):
                    label = "FOM"
                    break
        return label

    for (data_name, miller_arrays) in all_miller_arrays.iteritems():
        for ma in miller_arrays.values():
            other_symmetry = crystal_symmetry
            try:
                crystal_symmetry = other_symmetry.join_symmetry(
                    other_symmetry=ma.crystal_symmetry(), force=True)
            except AssertionError, e:
                str_e = str(e)
                from cStringIO import StringIO
                s = StringIO()
                if "Space group is incompatible with unit cell parameters." in str_e:
                    other_symmetry.show_summary(f=s)
                    ma.crystal_symmetry().show_summary(f=s)
                    str_e += "\n%s" % (s.getvalue())
                    raise Sorry(str_e)
                else:
                    raise
Exemplo n.º 29
0
def run(args):
    import libtbx
    from libtbx import easy_pickle
    from dials.util import log
    from dials.util.options import OptionParser

    parser = OptionParser(
        #usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_datablocks=False,
        read_experiments=True,
        check_format=False,
        #epilog=help_message
    )

    params, options, args = parser.parse_args(show_diff_phil=False,
                                              return_unhandled=True)

    # Configure the logging
    log.config(params.verbosity,
               info=params.output.log,
               debug=params.output.debug_log)

    from dials.util.version import dials_version
    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil is not '':
        logger.info('The following parameters have been modified:\n')
        logger.info(diff_phil)

    if params.seed is not None:
        import random
        flex.set_random_seed(params.seed)
        random.seed(params.seed)

    if params.save_plot and not params.animate:
        import matplotlib
        # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
        matplotlib.use('Agg')  # use a non-interactive backend

    datasets_input = []

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) or len(reflections):
        if len(reflections) == 1:
            reflections_input = reflections[0]
            reflections = []
            for i in range(len(experiments)):
                reflections.append(
                    reflections_input.select(reflections_input['id'] == i))

        if len(experiments) > len(reflections):
            flattened_reflections = []
            for refl in reflections:
                for i in range(0, flex.max(refl['id']) + 1):
                    sel = refl['id'] == i
                    flattened_reflections.append(refl.select(sel))
            reflections = flattened_reflections

        assert len(experiments) == len(reflections)

        i_refl = 0
        for i_expt in enumerate(experiments):
            refl = reflections[i_refl]

        for expt, refl in zip(experiments, reflections):
            crystal_symmetry = crystal.symmetry(
                unit_cell=expt.crystal.get_unit_cell(),
                space_group=expt.crystal.get_space_group())
            if 0 and 'intensity.prf.value' in refl:
                sel = refl.get_flags(refl.flags.integrated_prf)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.prf.value']
                variances = refl['intensity.prf.variance']
            else:
                assert 'intensity.sum.value' in refl
                sel = refl.get_flags(refl.flags.integrated_sum)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.sum.value']
                variances = refl['intensity.sum.variance']
            # FIXME probably need to do some filtering of intensities similar to that
            # done in export_mtz
            miller_indices = refl['miller_index']
            assert variances.all_gt(0)
            sigmas = flex.sqrt(variances)

            miller_set = miller.set(crystal_symmetry,
                                    miller_indices,
                                    anomalous_flag=False)
            intensities = miller.array(miller_set, data=data, sigmas=sigmas)
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source='DIALS', source_type='pickle'))
            datasets_input.append(intensities)

    files = args

    for file_name in files:

        try:
            data = easy_pickle.load(file_name)
            intensities = data['observations'][0]
            intensities.set_info(
                miller.array_info(source=file_name, source_type='pickle'))
            intensities = intensities.customized_copy(
                anomalous_flag=False).set_info(intensities.info())
            batches = None
        except Exception:
            reader = any_reflection_file(file_name)
            assert reader.file_type() == 'ccp4_mtz'

            as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False)
            intensities = [
                ma for ma in as_miller_arrays
                if ma.info().labels == ['I', 'SIGI']
            ][0]
            batches = [
                ma for ma in as_miller_arrays if ma.info().labels == ['BATCH']
            ]
            if len(batches):
                batches = batches[0]
            else:
                batches = None
            mtz_object = reader.file_content()
            intensities = intensities.customized_copy(
                anomalous_flag=False,
                indices=mtz_object.extract_original_index_miller_indices(
                )).set_info(intensities.info())

        intensities.set_observation_type_xray_intensity()
        datasets_input.append(intensities)

    if len(datasets_input) == 0:
        raise Sorry('No valid reflection files provided on command line')

    datasets = []
    for intensities in datasets_input:

        if params.batch is not None:
            assert batches is not None
            bmin, bmax = params.batch
            assert bmax >= bmin
            sel = (batches.data() >= bmin) & (batches.data() <= bmax)
            assert sel.count(True) > 0
            intensities = intensities.select(sel)

        if params.min_i_mean_over_sigma_mean is not None and (
                params.d_min is libtbx.Auto or params.d_min is not None):
            from xia2.Modules import Resolutionizer
            rparams = Resolutionizer.phil_defaults.extract().resolutionizer
            rparams.nbins = 20
            resolutionizer = Resolutionizer.resolutionizer(
                intensities, None, rparams)
            i_mean_over_sigma_mean = 4
            d_min = resolutionizer.resolution_i_mean_over_sigma_mean(
                i_mean_over_sigma_mean)
            if params.d_min is libtbx.Auto:
                intensities = intensities.resolution_filter(
                    d_min=d_min).set_info(intensities.info())
                if params.verbose:
                    logger.info('Selecting reflections with d > %.2f' % d_min)
            elif d_min > params.d_min:
                logger.info('Rejecting dataset %s as d_min too low (%.2f)' %
                            (file_name, d_min))
                continue
            else:
                logger.info('Estimated d_min for %s: %.2f' %
                            (file_name, d_min))
        elif params.d_min not in (None, libtbx.Auto):
            intensities = intensities.resolution_filter(
                d_min=params.d_min).set_info(intensities.info())

        if params.normalisation == 'kernel':
            from mmtbx.scaling import absolute_scaling
            normalisation = absolute_scaling.kernel_normalisation(
                intensities, auto_kernel=True)
            intensities = normalisation.normalised_miller.deep_copy()

        cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting(
        )
        intensities = intensities.change_basis(cb_op_to_primitive)
        if params.mode == 'full' or params.space_group is not None:
            if params.space_group is not None:
                space_group_info = params.space_group.primitive_setting()
                if not space_group_info.group().is_compatible_unit_cell(
                        intensities.unit_cell()):
                    logger.info(
                        'Skipping data set - incompatible space group and unit cell: %s, %s'
                        % (space_group_info, intensities.unit_cell()))
                    continue
            else:
                space_group_info = sgtbx.space_group_info('P1')
            intensities = intensities.customized_copy(
                space_group_info=space_group_info)

        datasets.append(intensities)

    crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets]
    lattice_ids = range(len(datasets))
    from xfel.clustering.cluster import Cluster
    from xfel.clustering.cluster_groups import unit_cell_info
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                          lattice_ids=lattice_ids)
    threshold = 1000
    if params.save_plot:
        from matplotlib import pyplot as plt
        fig = plt.figure("Andrews-Bernstein distance dendogram",
                         figsize=(12, 8))
        ax = plt.gca()
    else:
        ax = None
    clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold,
                                 log=params.unit_cell_clustering.log,
                                 write_file_lists=False,
                                 schnell=False,
                                 doplot=params.save_plot,
                                 ax=ax)
    if params.save_plot:
        plt.tight_layout()
        plt.savefig('%scluster_unit_cell.png' % params.plot_prefix)
        plt.close(fig)
    logger.info(unit_cell_info(clusters))
    largest_cluster = None
    largest_cluster_lattice_ids = None
    for cluster in clusters:
        cluster_lattice_ids = [m.lattice_id for m in cluster.members]
        if largest_cluster_lattice_ids is None:
            largest_cluster_lattice_ids = cluster_lattice_ids
        elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
            largest_cluster_lattice_ids = cluster_lattice_ids

    dataset_selection = largest_cluster_lattice_ids
    if len(dataset_selection) < len(datasets):
        logger.info('Selecting subset of data for cosym analysis: %s' %
                    str(dataset_selection))
        datasets = [datasets[i] for i in dataset_selection]

    # per-dataset change of basis operator to ensure all consistent
    change_of_basis_ops = []
    for i, dataset in enumerate(datasets):
        metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset,
                                                                   max_delta=5)
        subgroup = metric_subgroups.result_groups[0]
        cb_op_inp_best = subgroup['cb_op_inp_best']
        datasets[i] = dataset.change_basis(cb_op_inp_best)
        change_of_basis_ops.append(cb_op_inp_best)

    cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell()
    for i, dataset in enumerate(datasets):
        if params.space_group is None:
            datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy(
                space_group_info=sgtbx.space_group_info('P1'))
        else:
            datasets[i] = dataset.change_basis(cb_op_ref_min)
            datasets[i] = datasets[i].customized_copy(
                crystal_symmetry=crystal.symmetry(
                    unit_cell=datasets[i].unit_cell(),
                    space_group_info=params.space_group.primitive_setting(),
                    assert_is_compatible_unit_cell=False))
        datasets[i] = datasets[i].merge_equivalents().array()
        change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i]

    result = analyse_datasets(datasets, params)

    space_groups = {}
    reindexing_ops = {}
    for dataset_id in result.reindexing_ops.iterkeys():
        if 0 in result.reindexing_ops[dataset_id]:
            cb_op = result.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, [])
            reindexing_ops[cb_op].append(dataset_id)
        if dataset_id in result.space_groups:
            space_groups.setdefault(result.space_groups[dataset_id], [])
            space_groups[result.space_groups[dataset_id]].append(dataset_id)

    logger.info('Space groups:')
    for sg, datasets in space_groups.iteritems():
        logger.info(str(sg.info().reference_setting()))
        logger.info(datasets)

    logger.info('Reindexing operators:')
    for cb_op, datasets in reindexing_ops.iteritems():
        logger.info(cb_op)
        logger.info(datasets)

    if (len(experiments) and len(reflections)
            and params.output.reflections is not None
            and params.output.experiments is not None):
        import copy
        from dxtbx.model import ExperimentList
        from dxtbx.serialize import dump
        reindexed_experiments = ExperimentList()
        reindexed_reflections = flex.reflection_table()
        expt_id = 0
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                expt = experiments[dataset_selection[dataset_id]]
                refl = reflections[dataset_selection[dataset_id]]
                reindexed_expt = copy.deepcopy(expt)
                refl_reindexed = copy.deepcopy(refl)
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                reindexed_expt.crystal = reindexed_expt.crystal.change_basis(
                    cb_op_this)
                refl_reindexed['miller_index'] = cb_op_this.apply(
                    refl_reindexed['miller_index'])
                reindexed_experiments.append(reindexed_expt)
                refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id)
                reindexed_reflections.extend(refl_reindexed)
                expt_id += 1

        logger.info('Saving reindexed experiments to %s' %
                    params.output.experiments)
        dump.experiment_list(reindexed_experiments, params.output.experiments)
        logger.info('Saving reindexed reflections to %s' %
                    params.output.reflections)
        reindexed_reflections.as_pickle(params.output.reflections)

    elif params.output.suffix is not None:
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                file_name = files[dataset_selection[dataset_id]]
                basename = os.path.basename(file_name)
                out_name = os.path.splitext(
                    basename)[0] + params.output.suffix + '_' + str(
                        dataset_selection[dataset_id]) + ".mtz"
                reader = any_reflection_file(file_name)
                assert reader.file_type() == 'ccp4_mtz'
                mtz_object = reader.file_content()
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                if not cb_op_this.is_identity_op():
                    logger.info('reindexing %s (%s)' %
                                (file_name, cb_op_this.as_xyz()))
                    mtz_object.change_basis_in_place(cb_op_this)
                mtz_object.write(out_name)
  def as_miller_arrays(self,
                       crystal_symmetry=None,
                       force_symmetry=False,
                       merge_equivalents=True,
                       base_array_info=None,
                       assume_shelx_observation_type_is=None,
                       enforce_positive_sigmas=False,
     ):
    """
    Convert the contents of the reflection file into a list of
    :py:class:`cctbx.miller.array` objects, each of which may contain multiple
    columns of data from the underlying file.  By default this will
    automatically merge redundant observations to obtain a unique set under
    symmetry.

    :param crystal_symmetry: :py:class:`cctbx.crystal.symmetry` object
      (defaults to using internally specified symmetry, if any)
    :param force_symmetry: TODO
    :param merge_equivalents: merge redundant obervations (default=True)
    :param base_array_info: :py:class:`cctbx.miller.array_info` object
      containing basic information to be propagated to the arrays
    :param assume_shelx_observation_type_is: if specified, instead of raising
      an exception if the SHELX file type is not known from the file name plus
      data type tag, the function will force the specified data type.
    """
    assert (assume_shelx_observation_type_is in
            [None, "amplitudes", "intensities"])
    if (self._file_type is None):
      return []
    info_source = self._file_name
    if (info_source.startswith("./") or info_source.startswith(".\\")):
      info_source = info_source[2:]
    if (base_array_info is None):
      base_array_info = miller.array_info(
        source=info_source,
        source_type=self._file_type)
    if (self._file_type == "cctbx.miller.array"):
      result = []
      for miller_array in self._file_content:
        info = miller_array.info()
        if (info is None or not isinstance(info, miller.array_info)):
          info = base_array_info
        info.source = info_source
        info.crystal_symmetry_from_file = crystal.symmetry(
          unit_cell=miller_array.unit_cell(),
          space_group_info=miller_array.space_group_info(),
          raise_sorry_if_incompatible_unit_cell=True)
        result.append(miller_array.customized_copy(
          crystal_symmetry=miller_array.join_symmetry(
            other_symmetry=crystal_symmetry,
            force=force_symmetry,
            raise_sorry_if_incompatible_unit_cell=True))
              .set_info(info)
              .set_observation_type(miller_array.observation_type()))
      return result
    if ((   crystal_symmetry is None
         or crystal_symmetry.unit_cell() is None)
        and self._observation_type == 'hklf+ins/res'
        ):
        name, ext = os.path.splitext(self._file_name)
        if ext != '.hkl': # it may be compressed: name.hkl.gz
          name, ext = os.path.splitext(name)
        for shelx_file_name in ('%s.ins' % name, '%s.res' % name):
          try:
            shelx_file = open(shelx_file_name)
            break
          except IOError:
            continue
        else:
          raise Sorry("Can't open files %s.ins or %s.res"
                      "required by the option hklf+ins/res" % ((name,)*2))
        crystal_symmetry = crystal_symmetry_from_ins.extract_from(
          file=shelx_file)
        shelx_file.seek(0)
        remaining = shelx_file.read()
        shelx_file.close()
        m = re.search("^HKLF\s*(\d)", remaining, re.X|re.M|re.S)
        if m is None:
          raise Sorry("%s does not contain the mandatory HKLF instruction"
                      % shelx_file.name)
        if m.group(1) == "4":
          self._observation_type = "intensities"
        elif m.group(1) == "3":
          self._observation_type = "amplitudes"
        else:
          raise Sorry("HKLF %s not supported" % m.group(1))
    result = self._file_content.as_miller_arrays(
      crystal_symmetry=crystal_symmetry,
      force_symmetry=force_symmetry,
      merge_equivalents=merge_equivalents,
      base_array_info=base_array_info,
      )
    if (self.file_type() == "shelx_hklf"):
      if ((self._observation_type == "intensities") or
          (assume_shelx_observation_type_is == "intensities")) :
        result[0].set_info(result[0].info().customized_copy(
          labels=["Iobs", "SigIobs"]))
        result[0].set_observation_type_xray_intensity()
      elif ((self._observation_type == "amplitudes") or
            (assume_shelx_observation_type_is == "amplitudes")) :
        result[0].set_info(result[0].info().customized_copy(
          labels=["Fobs", "SigFobs"]))
        result[0].set_observation_type_xray_amplitude()
      else:
        raise Sorry("Unresolved amplitude/intensity ambiguity: %s\n"
          "  SHELX reflection files may contain amplitudes or intensities.\n"
          "  Please append   =amplitudes\n"
          "             or   =hklf3\n"
          "             or   =intensities\n"
          "             or   =hklf4\n"
          "  to the file name argument or parameter to resolve the"
          " ambiguity.\n"
          "  If a corresponding .ins file is available, look for the"
          " HKLF codeword.\n"
          "  Alternatively, run the phenix.reflection_statistics"
          " command twice,\n"
          "  once with =amplitudes and once with =intensities. Inspect"
          " the <I^2>/(<I>)^2\n"
          "  statistics. For acentric structures the values should"
          " fluctuate around\n"
          "  2.0, for centric structures around 3.0. If the statistics"
          " are not conclusive\n"
          "  it will be best to recover the original reflection data, such"
          " as SCALEPACK,\n"
          "  SCALA MTZ, XDS, or d*TREK files." % self._file_name)
    # discard reflections where sigma <= 0
    # XXX note that this will happen after data merging, so for unmerged data
    # it is better to specify merge_equivalents=False!
    if (enforce_positive_sigmas) :
      result_ = []
      for array in result :
        result_.append(array.enforce_positive_sigmas())
      result = result_
    return result
Exemplo n.º 31
0
def filtered_arrays_from_experiments_reflections(
    experiments,
    reflections,
    outlier_rejection_after_filter=False,
    partiality_threshold=0.99,
    return_batches=False,
):
    """Create a list of filtered arrays from experiments and reflections.

    A partiality threshold can be set, and if outlier_rejection_after_filter
    is True, and intensity.scale values are not present, then a round of
    outlier rejection will take place.

    Raises:
        ValueError: if no datasets remain after filtering.
    """
    miller_arrays = []
    ids_to_del = []

    if return_batches:
        assert all(expt.scan is not None for expt in experiments)
        batch_offsets = [expt.scan.get_batch_offset() for expt in experiments]
        reflections = assign_batches_to_reflections(reflections, batch_offsets)
        batch_arrays = []

    for idx, (expt, refl) in enumerate(zip(experiments, reflections)):
        crystal_symmetry = crystal.symmetry(
            unit_cell=expt.crystal.get_unit_cell(),
            space_group=expt.crystal.get_space_group(),
        )

        # want to use scale intensities if present, else sum + prf (if available)
        if "intensity.scale.value" in refl:
            intensity_choice = ["scale"]
            intensity_to_use = "intensity.scale"
        else:
            assert "intensity.sum.value" in refl
            intensity_to_use = "intensity.sum"
            intensity_choice = ["sum"]
            if "intensity.prf.value" in refl:
                intensity_choice.append("profile")
                intensity_to_use = "intensity.prf"

        try:
            logger.info("Filtering reflections for dataset %s" % idx)
            refl = filter_reflection_table(
                refl,
                intensity_choice,
                min_isigi=-5,
                filter_ice_rings=False,
                combine_partials=True,
                partiality_threshold=partiality_threshold,
            )
        except ValueError:
            logger.info(
                "Dataset %s removed as no reflections left after filtering",
                idx)
            ids_to_del.append(idx)
        else:
            # If scale was chosen - will return scale or have raised ValueError
            # If prf or sum, possible was no prf but want to continue.
            try:
                refl["intensity"] = refl[intensity_to_use + ".value"]
                refl["variance"] = refl[intensity_to_use + ".variance"]
            except KeyError:  # catch case where prf were removed.
                refl["intensity"] = refl["intensity.sum.value"]
                refl["variance"] = refl["intensity.sum.variance"]
            if outlier_rejection_after_filter and intensity_to_use != "intensity.scale":
                refl = reject_outliers(refl, expt, method="simple", zmax=12.0)
                refl = refl.select(
                    ~refl.get_flags(refl.flags.outlier_in_scaling))

            miller_set = miller.set(crystal_symmetry,
                                    refl["miller_index"],
                                    anomalous_flag=False)
            intensities = miller_set.array(data=refl["intensity"],
                                           sigmas=flex.sqrt(refl["variance"]))
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source="DIALS", source_type="pickle"))
            miller_arrays.append(intensities)
            if return_batches:
                batch_arrays.append(
                    miller_set.array(data=refl["batch"]).set_info(
                        intensities.info()))

    if not miller_arrays:
        raise ValueError(
            """No datasets remain after pre-filtering. Please check input data.
The datasets may not contain any full reflections; the command line
option partiality_threshold can be lowered to include partials.""")

    for id_ in ids_to_del[::-1]:
        del experiments[id_]
        del reflections[id_]

    if return_batches:
        return miller_arrays, batch_arrays
    return miller_arrays
Exemplo n.º 32
0
class intensities_scaler(object):
    """
  Author      : Uervirojnangkoorn, M.
  Created     : 7/13/2014
  Merge equivalent reflections and report intensity and refinement statistics.
  """
    def __init__(self):
        """
    Constructor
    """
        self.CONST_SE_MIN_WEIGHT = 0.17
        self.CONST_SE_MAX_WEIGHT = 1.0
        self.CONST_SIG_I_FACTOR = 1.5

    def write_stat_pickle(self, iparams, stat_dict):
        fname = iparams.run_no + '/pickle.stat'
        if os.path.isfile(fname):
            pickle_stat = pickle.load(open(fname, "rb"))
            for key in stat_dict.keys():
                if key in pickle_stat.keys():
                    pickle_stat[key].append(stat_dict[key][0])
                else:
                    pickle_stat[key] = stat_dict[key]
            pickle.dump(pickle_stat, open(fname, "wb"))
        else:
            pickle.dump(stat_dict, open(fname, "wb"))

    def read_stat_pickle(self, iparams):
        fname = iparams.run_no + '/pickle.stat'
        if os.path.isfile(fname):
            pickle_stat = pickle.load(open(fname, "rb"))
            for key in pickle_stat.keys():
                data = pickle_stat[key]
                print "key:", key, " size:", len(data)
                for d in data:
                    print d

    def calc_avg_I_cpp(self, prep_output, iparams, avg_mode):
        group_no, group_id_list, miller_index, miller_indices_ori, I, sigI, G, B, p_set, rs_set, wavelength_set, sin_theta_over_lambda_sq, SE, uc_mean, wavelength_mean, pickle_filename_set, txt_out = prep_output
        from prime import Average_Mode, averaging_engine
        if avg_mode == 'average': avg_mode_cpp = Average_Mode.Average
        elif avg_mode == 'weighted': avg_mode_cpp = Average_Mode.Weighted
        elif avg_mode == 'final': avg_mode_cpp = Average_Mode.Final
        else: raise Sorry("Bad averaging mode selected: %s" % avg_mode)
        sigma_max = iparams.sigma_rejection
        engine = averaging_engine(group_no, group_id_list, miller_index,
                                  miller_indices_ori, I, sigI, G, B, p_set,
                                  rs_set, wavelength_set,
                                  sin_theta_over_lambda_sq, SE,
                                  pickle_filename_set)
        engine.avg_mode = avg_mode_cpp
        engine.sigma_max = sigma_max
        engine.flag_volume_correction = iparams.flag_volume_correction
        engine.n_rejection_cycle = iparams.n_rejection_cycle
        engine.flag_output_verbose = iparams.flag_output_verbose
        results = engine.calc_avg_I()
        mdh = merge_data_handler(
            results.miller_index, results.I_avg, results.sigI_avg,
            (results.r_meas_top, results.r_meas_btm, results.multiplicity),
            (results.I_avg_even, results.I_avg_odd, results.I_avg_even_h,
             results.I_avg_odd_h, results.I_avg_even_k, results.I_avg_odd_k,
             results.I_avg_even_l, results.I_avg_odd_l), uc_mean,
            wavelength_mean)
        return mdh, results.txt_obs_out, results.txt_reject_out

    def calc_mean_unit_cell(self, results):
        uc_array = [
            list(pres.uc_params) for pres in results if pres is not None
        ]
        return np.mean(uc_array, 0), np.median(uc_array,
                                               0), np.std(uc_array, 0)

    def calc_mean_postref_parameters(self, results):
        params_array = [[pres.G, pres.B, pres.ry, pres.rz, pres.re, pres.r0, \
            pres.voigt_nu, pres.rotx, pres.roty, pres.R_final, pres.R_xy_final, pres.SE] \
            for pres in results if (pres is not None and not math.isnan(pres.G) and not math.isnan(pres.B) \
            and not math.isnan(pres.ry) and not math.isnan(pres.rz) and not math.isnan(pres.re) and not math.isnan(pres.r0) \
            and not math.isnan(pres.voigt_nu) and not math.isnan(pres.rotx) and not math.isnan(pres.roty) \
            and not math.isnan(pres.R_final) and not math.isnan(pres.R_xy_final) and not math.isnan(pres.SE))]
        return np.mean(params_array, 0), np.median(params_array,
                                                   0), np.std(params_array, 0)

    def prepare_output(self, results, iparams, avg_mode):
        if avg_mode == 'average':
            cc_thres = 0
        else:
            cc_thres = iparams.frame_accept_min_cc
        std_filter = iparams.sigma_rejection
        if iparams.flag_weak_anomalous:
            if avg_mode == 'final':
                target_anomalous_flag = iparams.target_anomalous_flag
            else:
                target_anomalous_flag = False
        else:
            target_anomalous_flag = iparams.target_anomalous_flag
        pr_params_mean, pr_params_med, pr_params_std = self.calc_mean_postref_parameters(
            results)
        G_mean, B_mean, ry_mean, rz_mean, re_mean, r0_mean, voigt_nu_mean, rotx_mean, roty_mean, R_mean, R_xy_mean, SE_mean = pr_params_mean
        G_med, B_med, ry_med, rz_med, re_med, r0_med, voigt_nu_med, rotx_med, roty_med, R_med, R_xy_med, SE_med = pr_params_med
        G_std, B_std, ry_std, rz_std, re_std, r0_std, voigt_nu_std, rotx_std, roty_std, R_std, R_xy_std, SE_std = pr_params_std
        #prepare data for merging
        miller_indices_all = flex.miller_index()
        miller_indices_ori_all = flex.miller_index()
        I_all = flex.double()
        sigI_all = flex.double()
        G_all = flex.double()
        B_all = flex.double()
        p_all = flex.double()
        rx_all = flex.double()
        rs_all = flex.double()
        rh_all = flex.double()
        SE_all = flex.double()
        sin_sq_all = flex.double()
        wavelength_all = flex.double()
        detector_distance_set = flex.double()
        R_init_all = flex.double()
        R_final_all = flex.double()
        R_xy_init_all = flex.double()
        R_xy_final_all = flex.double()
        pickle_filename_all = flex.std_string()
        filtered_results = []
        cn_good_frame, cn_bad_frame_SE, cn_bad_frame_uc, cn_bad_frame_cc, cn_bad_frame_G, cn_bad_frame_re = (
            0, 0, 0, 0, 0, 0)
        crystal_orientation_dict = {}
        for pres in results:
            if pres is not None:
                pickle_filepath = pres.pickle_filename.split('/')
                img_filename = pickle_filepath[len(pickle_filepath) - 1]
                flag_pres_ok = True
                #check SE, CC, UC, G, B, gamma_e
                if math.isnan(pres.G):
                    flag_pres_ok = False
                if math.isnan(pres.SE) or np.isinf(pres.SE):
                    flag_pres_ok = False
                if flag_pres_ok and SE_std > 0:
                    if abs(pres.SE - SE_med) / SE_std > std_filter:
                        flag_pres_ok = False
                        cn_bad_frame_SE += 1
                if flag_pres_ok and pres.CC_final < cc_thres:
                    flag_pres_ok = False
                    cn_bad_frame_cc += 1
                if flag_pres_ok:
                    if G_std > 0:
                        if abs(pres.G - G_med) / G_std > std_filter:
                            flag_pres_ok = False
                            cn_bad_frame_G += 1
                if flag_pres_ok:
                    if re_std > 0:
                        if abs(pres.re - re_med) / re_std > std_filter:
                            flag_pres_ok = False
                            cn_bad_frame_re += 1
                if flag_pres_ok and not good_unit_cell(
                        pres.uc_params, iparams, iparams.merge.uc_tolerance):
                    flag_pres_ok = False
                    cn_bad_frame_uc += 1
                data_size = pres.observations.size()
                if flag_pres_ok:
                    cn_good_frame += 1
                    filtered_results.append(pres)
                    R_init_all.append(pres.R_init)
                    R_final_all.append(pres.R_final)
                    R_xy_init_all.append(pres.R_xy_init)
                    R_xy_final_all.append(pres.R_xy_final)
                    miller_indices_all.extend(pres.observations.indices())
                    miller_indices_ori_all.extend(
                        pres.observations_original.indices())
                    I_all.extend(pres.observations.data())
                    sigI_all.extend(pres.observations.sigmas())
                    G_all.extend(flex.double([pres.G] * data_size))
                    B_all.extend(flex.double([pres.B] * data_size))
                    p_all.extend(pres.partiality)
                    rs_all.extend(pres.rs_set)
                    rh_all.extend(pres.rh_set)
                    sin_sq_all.extend(
                        pres.observations.two_theta(wavelength=pres.wavelength)
                        .sin_theta_over_lambda_sq().data())
                    SE_all.extend(flex.double([pres.SE] * data_size))
                    wavelength_all.extend(
                        flex.double([pres.wavelength] * data_size))
                    detector_distance_set.append(pres.detector_distance_mm)
                    pickle_filename_all.extend(
                        flex.std_string([pres.pickle_filename] * data_size))
                    crystal_orientation_dict[
                        pres.pickle_filename] = pres.crystal_orientation
        #plot stats
        self.plot_stats(filtered_results, iparams)
        #write out updated crystal orientation as a pickle file
        if not iparams.flag_hush:
            pickle.dump(crystal_orientation_dict,
                        open(iparams.run_no + '/' + "crystal.o", "wb"),
                        pickle.HIGHEST_PROTOCOL)
        #calculate average unit cell
        uc_mean, uc_med, uc_std = self.calc_mean_unit_cell(filtered_results)
        unit_cell_mean = unit_cell(tuple(uc_mean))
        #recalculate stats for pr parameters
        pr_params_mean, pr_params_med, pr_params_std = self.calc_mean_postref_parameters(
            filtered_results)
        G_mean, B_mean, ry_mean, rz_mean, re_mean, r0_mean, voigt_nu_mean, rotx_mean, roty_mean, R_mean, R_xy_mean, SE_mean = pr_params_mean
        G_med, B_med, ry_med, rz_med, re_med, r0_med, voigt_nu_med, rotx_med, roty_med, R_med, R_xy_med, SE_med = pr_params_med
        G_std, B_std, ry_std, rz_std, re_std, r0_std, voigt_nu_std, rotx_std, roty_std, R_std, R_xy_std, SE_std = pr_params_std
        #from all observations merge them
        crystal_symmetry = crystal.symmetry(
            unit_cell=tuple(uc_mean),
            space_group_symbol=iparams.target_space_group)
        miller_set_all = miller.set(crystal_symmetry=crystal_symmetry,
                                    indices=miller_indices_all,
                                    anomalous_flag=target_anomalous_flag)
        miller_array_all = miller_set_all.array(
            data=I_all, sigmas=sigI_all).set_observation_type_xray_intensity()
        #sort reflections according to asymmetric-unit symmetry hkl
        perm = miller_array_all.sort_permutation(by_value="packed_indices")
        miller_indices_all_sort = miller_array_all.indices().select(perm)
        miller_indices_ori_all_sort = miller_indices_ori_all.select(perm)
        I_obs_all_sort = miller_array_all.data().select(perm)
        sigI_obs_all_sort = miller_array_all.sigmas().select(perm)
        G_all_sort = G_all.select(perm)
        B_all_sort = B_all.select(perm)
        p_all_sort = p_all.select(perm)
        rs_all_sort = rs_all.select(perm)
        wavelength_all_sort = wavelength_all.select(perm)
        sin_sq_all_sort = sin_sq_all.select(perm)
        SE_all_sort = SE_all.select(perm)
        pickle_filename_all_sort = pickle_filename_all.select(perm)
        miller_array_uniq = miller_array_all.merge_equivalents().array(
        ).complete_array(d_min=iparams.merge.d_min, d_max=iparams.merge.d_max)
        matches_uniq = miller.match_multi_indices(
            miller_indices_unique=miller_array_uniq.indices(),
            miller_indices=miller_indices_all_sort)
        pair_0 = flex.int([pair[0] for pair in matches_uniq.pairs()])
        pair_1 = flex.int([pair[1] for pair in matches_uniq.pairs()])
        group_id_list = flex.int(
            [pair_0[pair_1[i]] for i in range(len(matches_uniq.pairs()))])
        tally = Counter()
        for elem in group_id_list:
            tally[elem] += 1
        cn_group = len(tally)
        #preparte txt out stat
        txt_out = 'Summary of refinement and merging\n'
        txt_out += ' No. good frames:          %12.0f\n' % (cn_good_frame)
        txt_out += ' No. bad cc frames:        %12.0f\n' % (cn_bad_frame_cc)
        txt_out += ' No. bad G frames) :       %12.0f\n' % (cn_bad_frame_G)
        txt_out += ' No. bad unit cell frames: %12.0f\n' % (cn_bad_frame_uc)
        txt_out += ' No. bad gamma_e frames:   %12.0f\n' % (cn_bad_frame_re)
        txt_out += ' No. bad SE:               %12.0f\n' % (cn_bad_frame_SE)
        txt_out += ' No. observations:         %12.0f\n' % (
            len(I_obs_all_sort))
        txt_out += 'Mean target value (BEFORE: Mean Median (Std.))\n'
        txt_out += ' post-refinement:          %12.2f %12.2f (%9.2f)\n' % (
            np.mean(R_init_all), np.median(R_init_all), np.std(R_init_all))
        txt_out += ' (x,y) restraints:         %12.2f %12.2f (%9.2f)\n' % (
            np.mean(R_xy_init_all), np.median(R_xy_init_all),
            np.std(R_xy_init_all))
        txt_out += 'Mean target value (AFTER: Mean Median (Std.))\n'
        txt_out += ' post-refinement:          %12.2f %12.2f (%9.2f)\n' % (
            np.mean(R_final_all), np.median(R_final_all), np.std(R_final_all))
        txt_out += ' (x,y) restraints:         %12.2f %12.2f (%9.2f)\n' % (
            np.mean(R_xy_final_all), np.median(R_xy_final_all),
            np.std(R_xy_final_all))
        txt_out += ' SE:                       %12.2f %12.2f (%9.2f)\n' % (
            SE_mean, SE_med, SE_std)
        txt_out += ' G:                        %12.3e %12.3e (%9.2e)\n' % (
            G_mean, G_med, G_std)
        txt_out += ' B:                        %12.2f %12.2f (%9.2f)\n' % (
            B_mean, B_med, B_std)
        txt_out += ' Rot.x:                    %12.2f %12.2f (%9.2f)\n' % (
            rotx_mean * 180 / math.pi, rotx_med * 180 / math.pi,
            rotx_std * 180 / math.pi)
        txt_out += ' Rot.y:                    %12.2f %12.2f (%9.2f)\n' % (
            roty_mean * 180 / math.pi, roty_med * 180 / math.pi,
            roty_std * 180 / math.pi)
        txt_out += ' gamma_y:                  %12.5f %12.5f (%9.5f)\n' % (
            ry_mean, ry_med, ry_std)
        txt_out += ' gamma_z:                  %12.5f %12.5f (%9.5f)\n' % (
            rz_mean, rz_med, rz_std)
        txt_out += ' gamma_0:                  %12.5f %12.5f (%9.5f)\n' % (
            r0_mean, r0_med, r0_std)
        txt_out += ' gamma_e:                  %12.5f %12.5f (%9.5f)\n' % (
            re_mean, re_med, re_std)
        txt_out += ' voigt_nu:                 %12.5f %12.5f (%9.5f)\n' % (
            voigt_nu_mean, voigt_nu_med, voigt_nu_std)
        txt_out += ' unit cell\n'
        txt_out += '   a:                      %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[0], uc_med[0], uc_std[0])
        txt_out += '   b:                      %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[1], uc_med[1], uc_std[1])
        txt_out += '   c:                      %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[2], uc_med[2], uc_std[2])
        txt_out += '   alpha:                  %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[3], uc_med[3], uc_std[3])
        txt_out += '   beta:                   %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[4], uc_med[4], uc_std[4])
        txt_out += '   gamma:                  %12.2f %12.2f (%9.2f)\n' % (
            uc_mean[5], uc_med[5], uc_std[5])
        txt_out += 'Parmeters from integration (not-refined)\n'
        txt_out += '  Wavelength:              %12.5f %12.5f (%9.5f)\n' % (
            np.mean(wavelength_all), np.median(wavelength_all),
            np.std(wavelength_all))
        txt_out += '  Detector distance:       %12.5f %12.5f (%9.5f)\n' % (
            np.mean(detector_distance_set), np.median(detector_distance_set),
            np.std(detector_distance_set))
        txt_out += '* (standard deviation)\n'
        #write out stat. pickle
        if not iparams.flag_hush:
            stat_dict = {"n_frames_good": [cn_good_frame], \
                         "n_frames_bad_cc": [cn_bad_frame_cc], \
                         "n_frames_bad_G": [cn_bad_frame_G], \
                         "n_frames_bad_uc": [cn_bad_frame_uc], \
                         "n_frames_bad_gamma_e": [cn_bad_frame_re], \
                         "n_frames_bad_SE": [cn_bad_frame_SE], \
                         "n_observations": [len(I_obs_all_sort)], \
                         "R_start": [np.mean(R_init_all)], \
                         "R_end": [np.mean(R_final_all)], \
                         "R_xy_start": [np.mean(R_xy_init_all)], \
                         "R_xy_end": [np.mean(R_xy_final_all)], \
                         "mean_gamma_y": [ry_mean], \
                         "std_gamma_y": [ry_std], \
                         "mean_gamma_z": [rz_mean], \
                         "std_gamma_z": [rz_std], \
                         "mean_gamma_0": [r0_mean], \
                         "std_gamma_0": [r0_std], \
                         "mean_gamma_e": [re_mean], \
                         "std_gamma_e": [re_std], \
                         "mean_voigt_nu": [voigt_nu_mean], \
                         "std_voigt_nu": [voigt_nu_std], \
                         "mean_a": [uc_mean[0]], \
                         "std_a": [uc_std[0]], \
                         "mean_b": [uc_mean[1]], \
                         "std_b": [uc_std[1]], \
                         "mean_c": [uc_mean[2]], \
                         "std_c": [uc_std[2]], \
                         "mean_alpha": [uc_mean[3]], \
                         "std_alpha": [uc_std[3]], \
                         "mean_beta": [uc_mean[4]], \
                         "std_beta": [uc_std[4]], \
                         "mean_gamma": [uc_mean[5]], \
                         "std_gamma": [uc_std[5]]}
            self.write_stat_pickle(iparams, stat_dict)
        return cn_group, group_id_list, miller_indices_all_sort, miller_indices_ori_all_sort, \
               I_obs_all_sort, sigI_obs_all_sort,G_all_sort, B_all_sort, \
               p_all_sort, rs_all_sort, wavelength_all_sort, sin_sq_all_sort, SE_all_sort, uc_mean, \
               np.mean(wavelength_all), pickle_filename_all_sort, txt_out

    def write_output(self, mdh, iparams, output_mtz_file_prefix, avg_mode):
        if iparams.flag_weak_anomalous:
            if avg_mode == 'final':
                target_anomalous_flag = iparams.target_anomalous_flag
            else:
                target_anomalous_flag = False
        else:
            target_anomalous_flag = iparams.target_anomalous_flag
        uc_mean = mdh.uc_mean
        wavelength_mean = mdh.wavelength_mean
        #output mtz file and report binning stat
        miller_set_merge = crystal.symmetry(
            unit_cell=unit_cell(tuple(uc_mean)),
            space_group_symbol=iparams.target_space_group).build_miller_set(
                anomalous_flag=target_anomalous_flag,
                d_min=iparams.merge.d_min)
        mdh.generate_miller_array_from_miller_set(miller_set_merge,
                                                  target_anomalous_flag)
        miller_array_complete = miller_set_merge.array()
        fake_data = flex.double([1.0] * len(miller_array_complete.indices()))
        miller_array_template_asu = miller_array_complete.customized_copy(data=fake_data, \
                  sigmas=fake_data).resolution_filter(d_min=iparams.merge.d_min, \
                  d_max=iparams.merge.d_max)
        n_refl_all = mdh.get_size()
        #do another resolution filter here
        i_sel_res = mdh.miller_array_merge.resolution_filter_selection(
            d_min=iparams.merge.d_min, d_max=iparams.merge.d_max)
        mdh.reduce_by_selection(i_sel_res)
        n_refl_out_resolutions = n_refl_all - mdh.get_size()
        #remove outliers
        sequences = flex.int(range(mdh.get_size()))
        good_sequences = []
        for i_rejection in range(iparams.n_rejection_cycle):
            binner_merge = mdh.miller_array_merge.setup_binner(n_bins=200)
            for i_bin in range(1, 201):
                i_binner = (binner_merge.bin_indices() == i_bin)
                I_obs_bin = mdh.miller_array_merge.data().select(i_binner)
                sequences_bin = sequences.select(i_binner)
                if len(I_obs_bin) > 0:
                    I_obs_bin = mdh.miller_array_merge.data().select(i_binner)
                    try:
                        i_filter = flex.abs(
                            (I_obs_bin - np.median(I_obs_bin)) /
                            np.std(I_obs_bin)) < 10
                    except Exception, e:
                        print "Warning: outlier rejection by bins failed because of floating point."
                        print e
                        i_filter = flex.bool([True] * len(I_obs_bin))
                    good_sequences.extend(list(sequences_bin.select(i_filter)))
        mdh.reduce_by_selection(flex.size_t(good_sequences))
        n_refl_outliers = n_refl_all - n_refl_out_resolutions - mdh.get_size()
        #get iso if given.
        mxh = mx_handler()
        flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file(
            iparams.hklisoin)
        #write output files
        if output_mtz_file_prefix != '':
            #write as mtz file
            miller_array_merge_unique = mdh.miller_array_merge.merge_equivalents(
            ).array()
            info = miller.array_info(wavelength=wavelength_mean)
            miller_array_merge_unique.set_info(info)
            mtz_dataset_merge = miller_array_merge_unique.as_mtz_dataset(
                column_root_label="IOBS")
            mtz_dataset_merge.mtz_object().write(
                file_name=output_mtz_file_prefix + '_merge.mtz')
            #write as cns file
            f_cns = open(output_mtz_file_prefix + '_merge.hkl', 'w')
            miller_array_merge_unique.export_as_cns_hkl(file_object=f_cns)
            f_cns.close()
        if iparams.flag_hush:
            cc12, n_refl_cc12 = mdh.get_cc12()
            cciso, n_refl_cciso = mdh.get_cciso(miller_array_iso)
            cc_anom_acentric, n_refl_anom_acentric = mdh.get_cc_anom()
            txt_out = 'Warning: flag_hush is set to True. Continue without writing merging statistic tables.\n'
            txt_out += 'Bin Resolution Range     Completeness      <N_obs> |Rmerge  Rsplit   CC1/2   N_ind |CCiso   N_ind|CCanoma  N_ind| <I/sigI>   <I>    <sigI>    <I**2>\n'
            txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
            txt_out += '        TOTAL        %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f %6.0f %8.2f %10.1f %8.1f %6.2f\n' \
              %((mdh.get_size()/miller_array_template_asu.size())*100, \
                  mdh.get_size(), miller_array_template_asu.size(),\
                  mdh.get_multiplicity(), mdh.get_r_meas()*100, mdh.get_r_split()*100, \
                  cc12*100, n_refl_cc12, cciso*100, n_refl_cciso, \
                  cc_anom_acentric, n_refl_anom_acentric, \
                  mdh.get_mean_IoversigI(), mdh.get_mean_I(), mdh.get_mean_sigI(), mdh.get_second_moment())
        else:
            #calculate isotropic B-factor
            try:
                mxh = mx_handler()
                asu_contents = mxh.get_asu_contents(iparams.n_residues)
                observations_as_f = mdh.miller_array_merge.as_amplitude_array()
                observations_as_f.setup_binner(auto_binning=True)
                wp = statistics.wilson_plot(observations_as_f,
                                            asu_contents,
                                            e_statistics=True)
                B_merged = wp.wilson_b
            except Exception, e:
                B_merged = 0
                print "Warning: b-factor calculation in mod_util failed. Reset b-factor to 0"
                print e
            #report binning stats
            txt_out = '\n'
            txt_out += 'Isotropic B-factor:  %7.2f\n' % (B_merged)
            txt_out += 'No. of reflections\n'
            txt_out += ' all:                %7.0f\n' % (n_refl_all)
            txt_out += ' outside resolution: %7.0f\n' % (
                n_refl_out_resolutions)
            txt_out += ' outliers:           %7.0f\n' % (n_refl_outliers)
            txt_out += ' total left:         %7.0f\n' % (mdh.get_size())
            txt_out += 'Summary for ' + output_mtz_file_prefix + '_merge.mtz\n'
            txt_out += 'Bin Resolution Range     Completeness      <N_obs> |Rmerge  Rsplit   CC1/2   N_ind |CCiso   N_ind|CCanoma  N_ind| <I/sigI>   <I>    <sigI>    <I**2>\n'
            txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
            #for stat pickle
            sp_res, sp_complete, sp_n_obs, sp_cc12, sp_cc12_anom, sp_rmerge, sp_i_o_sigi, sp_isqr = (
                [], [], [], [], [], [], [], [])
            #binning
            binner_template_asu = miller_array_template_asu.setup_binner(
                n_bins=iparams.n_bins)
            binner_template_asu_indices = binner_template_asu.bin_indices()
            #for stats on axis cones
            mdh_astar = deepcopy(mdh)
            mdh_bstar = deepcopy(mdh)
            mdh_cstar = deepcopy(mdh)
            mdh_astar.reduce_to_cone_on_axis((1, 0, 0),
                                             iparams.percent_cone_fraction)
            mdh_bstar.reduce_to_cone_on_axis((0, 1, 0),
                                             iparams.percent_cone_fraction)
            mdh_cstar.reduce_to_cone_on_axis((0, 0, 1),
                                             iparams.percent_cone_fraction)
            #prepare text out for axis cones
            txt_out_cone = 'Summary of CC1/2 on three crystal axes\n'
            txt_out_cone += 'Bin Resolution Range           CC1/2                      <I>                          N_refl           \n'
            txt_out_cone += '                        a*      b*      c*  |      a*        b*       c*    |    a*      b*     c*      \n'
            txt_out_cone += '---------------------------------------------------------------------------------------------------------\n'
            for i in range(1, iparams.n_bins + 1):
                i_binner = (binner_template_asu_indices == i)
                miller_indices_template_bin = miller_array_template_asu.indices(
                ).select(i_binner)
                #for all reflections
                mdh_bin = deepcopy(mdh)
                mdh_bin.reduce_by_miller_index(miller_indices_template_bin)
                cc12, n_refl_cc12 = mdh_bin.get_cc12()
                cciso, n_refl_cciso = mdh_bin.get_cciso(miller_array_iso)
                cc_anom_acentric, n_refl_anom_acentric = mdh_bin.get_cc_anom()
                completeness = (mdh_bin.get_size() /
                                len(miller_indices_template_bin)) * 100
                multiplicity = mdh_bin.get_multiplicity()
                txt_out += '%02d %7.2f - %7.2f %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f %6.0f %8.2f %10.1f %8.1f %6.2f\n' \
                    %(i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], \
                    completeness, \
                    mdh_bin.get_size(), len(miller_indices_template_bin),\
                    multiplicity, mdh_bin.get_r_meas()*100, mdh_bin.get_r_split()*100, \
                    cc12*100, n_refl_cc12, cciso*100, n_refl_cciso, \
                    cc_anom_acentric, n_refl_anom_acentric, \
                    mdh_bin.get_mean_IoversigI(), mdh_bin.get_mean_I(), mdh_bin.get_mean_sigI(), mdh_bin.get_second_moment())
                #for reflections on cones
                mdh_astar_bin = deepcopy(mdh_astar)
                mdh_astar_bin.reduce_by_miller_index(
                    miller_indices_template_bin)
                cc12_astar, n_refl_cc12_astar = mdh_astar_bin.get_cc12()
                mdh_bstar_bin = deepcopy(mdh_bstar)
                mdh_bstar_bin.reduce_by_miller_index(
                    miller_indices_template_bin)
                cc12_bstar, n_refl_cc12_bstar = mdh_bstar_bin.get_cc12()
                mdh_cstar_bin = deepcopy(mdh_cstar)
                mdh_cstar_bin.reduce_by_miller_index(
                    miller_indices_template_bin)
                cc12_cstar, n_refl_cc12_cstar = mdh_cstar_bin.get_cc12()
                txt_out_cone += '%02d %7.2f - %7.2f %7.2f %7.2f %7.2f %10.1f %10.1f %10.1f %6.0f %6.0f %6.0f\n' \
                    %(i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], \
                    cc12_astar*100, cc12_bstar*100, cc12_cstar*100, \
                    mdh_astar_bin.get_mean_I(), mdh_bstar_bin.get_mean_I(), mdh_cstar_bin.get_mean_I(), \
                    n_refl_cc12_astar, n_refl_cc12_bstar, n_refl_cc12_cstar)
                #for stat pickle
                sp_res.append(binner_template_asu.bin_d_range(i)[1])
                sp_complete.append(completeness)
                sp_n_obs.append(multiplicity)
                sp_cc12.append(cc12)
                sp_cc12_anom.append(cc_anom_acentric)
                sp_rmerge.append(mdh_bin.get_r_meas() * 100)
                sp_i_o_sigi.append(mdh_bin.get_mean_IoversigI())
                sp_isqr.append(mdh.get_second_moment())
            #txt out total for all reflections
            cc12, n_refl_cc12 = mdh.get_cc12()
            cciso, n_refl_cciso = mdh.get_cciso(miller_array_iso)
            cc_anom_acentric, n_refl_anom_acentric = mdh.get_cc_anom()
            txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
            txt_out += '        TOTAL        %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f %6.0f %8.2f %10.1f %8.1f %6.2f\n' \
            %((mdh.get_size()/miller_array_template_asu.size())*100, \
                mdh.get_size(), miller_array_template_asu.size(),\
                mdh.get_multiplicity(), mdh.get_r_meas()*100, mdh.get_r_split()*100, \
                cc12*100, n_refl_cc12, cciso*100, n_refl_cciso, \
                cc_anom_acentric, n_refl_anom_acentric, \
                mdh.get_mean_IoversigI(), mdh.get_mean_I(), mdh.get_mean_sigI(), mdh.get_second_moment())
            txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
            txt_out += '\n'
            #txt out total for reflections on cones
            cc12_astar, n_refl_cc12_astar = mdh_astar.get_cc12()
            cc12_bstar, n_refl_cc12_bstar = mdh_bstar.get_cc12()
            cc12_cstar, n_refl_cc12_cstar = mdh_cstar.get_cc12()
            txt_out_cone += '----------------------------------------------------------------------------------------------------------\n'
            txt_out_cone += '       total         %7.2f %7.2f %7.2f %10.1f %10.1f %10.1f %6.0f %6.0f %6.0f\n' \
                  %(cc12_astar*100, cc12_bstar*100, cc12_cstar*100, \
                  mdh_astar.get_mean_I(), mdh_bstar.get_mean_I(), mdh_cstar.get_mean_I(), \
                  n_refl_cc12_astar, n_refl_cc12_bstar, n_refl_cc12_cstar)
            txt_out_cone += '----------------------------------------------------------------------------------------------------------\n'
            txt_out_cone += '\n'
            #save data for stat. pickle in stat_dict
            stat_dict = {"binned_resolution": [sp_res], \
            "binned_completeness": [sp_complete], \
            "binned_n_obs": [sp_n_obs], \
            "binned_cc12": [sp_cc12], \
            "binned_cc12_anom": [sp_cc12_anom], \
            "binned_rmerge": [sp_rmerge], \
            "binned_i_o_sigi": [sp_i_o_sigi], \
            "binned_isqr": [sp_isqr], \
            "total_res_max": [mdh.miller_array_merge.d_max_min()[0]], \
            "total_res_min": [mdh.miller_array_merge.d_max_min()[1]], \
            "total_completeness": [(mdh.get_size()/miller_array_template_asu.size())*100], \
            "total_n_obs": [mdh.get_multiplicity()], \
            "total_cc12": [mdh.get_cc12()[0]*100], \
            "total_rmerge": [mdh.get_r_meas()*100], \
            "total_i_o_sigi": [mdh.get_mean_IoversigI()], \
            "space_group_info": [mdh.miller_array_merge.space_group_info()], \
            }
            self.write_stat_pickle(iparams, stat_dict)
            txt_out += txt_out_cone
Exemplo n.º 33
0
    for orig, recycled in zip((ma1, ma2, ma3, ma4), recycled_arrays):
        assert orig.size() == recycled.size()
        recycled = recycled.customized_copy(
            anomalous_flag=orig.anomalous_flag())
        orig, recycled = orig.common_sets(recycled)
        assert orig.indices().all_eq(recycled.indices())
        assert approx_equal(orig.data(), recycled.data(), eps=1e-5)
    #
    cif_model = reader(input_string=r3adrsf,
                       builder=cif.builders.cif_model_builder()).model()
    cs = cif.builders.crystal_symmetry_builder(
        cif_model["r3adrsf"]).crystal_symmetry

    ma_builder = cif.builders.miller_array_builder(
        cif_model['r3adrAsf'],
        base_array_info=miller.array_info(crystal_symmetry_from_file=cs))
    miller_arrays = ma_builder.arrays().values()
    assert len(miller_arrays) == 4
    mas_as_cif_block = cif.miller_arrays_as_cif_block(
        miller_arrays[0].map_to_asu(),
        column_names=miller_arrays[0].info().labels,
        format="corecif")
    for array in miller_arrays[1:]:
        labels = array.info().labels
        if len(labels) > 1:
            for label in labels:
                if label.startswith("wavelength_id"):
                    labels.remove(label)
        mas_as_cif_block.add_miller_array(array=array.map_to_asu(),
                                          column_names=array.info().labels)
    s = StringIO()
Exemplo n.º 34
0
def scaled_data_as_miller_array(reflection_table_list,
                                experiments,
                                best_unit_cell=None,
                                anomalous_flag=False):
    """Get a scaled miller array from an experiment and reflection table."""
    if len(reflection_table_list) > 1:
        joint_table = flex.reflection_table()
        for reflection_table in reflection_table_list:
            # better to just create many miller arrays and join them?
            refl_for_joint_table = flex.reflection_table()
            for col in [
                    "miller_index",
                    "intensity.scale.value",
                    "inverse_scale_factor",
                    "intensity.scale.variance",
            ]:
                refl_for_joint_table[col] = reflection_table[col]
            good_refl_sel = ~reflection_table.get_flags(
                reflection_table.flags.bad_for_scaling, all=False)
            refl_for_joint_table = refl_for_joint_table.select(good_refl_sel)
            joint_table.extend(refl_for_joint_table)
    else:
        reflection_table = reflection_table_list[0]
        good_refl_sel = ~reflection_table.get_flags(
            reflection_table.flags.bad_for_scaling, all=False)
        joint_table = reflection_table.select(good_refl_sel)
    # Filter out negative scale factors to avoid merging statistics errors.
    # These are not removed from the output data, as it is likely one would
    # want to do further analysis e.g. delta cc1/2 and rescaling, to exclude
    # certain data and get better scale factors for all reflections.
    pos_scales = joint_table["inverse_scale_factor"] > 0
    if pos_scales.count(False) > 0:
        logger.info(
            """There are %s reflections with non-positive scale factors which
will not be used for calculating merging statistics""",
            pos_scales.count(False),
        )
        joint_table = joint_table.select(pos_scales)

    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiments)
    miller_set = miller.set(
        crystal_symmetry=crystal.symmetry(
            unit_cell=best_unit_cell,
            space_group=experiments[0].crystal.get_space_group(),
            assert_is_compatible_unit_cell=False,
        ),
        indices=joint_table["miller_index"],
        anomalous_flag=anomalous_flag,
    )
    i_obs = miller.array(
        miller_set,
        data=joint_table["intensity.scale.value"] /
        joint_table["inverse_scale_factor"],
    )
    i_obs.set_observation_type_xray_intensity()
    i_obs.set_sigmas((joint_table["intensity.scale.variance"]**0.5) /
                     joint_table["inverse_scale_factor"])
    i_obs.set_info(
        miller.array_info(source="DIALS", source_type="reflection_tables"))
    return i_obs
Exemplo n.º 35
0
 def write_output(self, mdh, iparams, output_mtz_file_prefix, avg_mode):
     if iparams.flag_weak_anomalous:
         if avg_mode == 'final':
             target_anomalous_flag = iparams.target_anomalous_flag
         else:
             target_anomalous_flag = False
     else:
         target_anomalous_flag = iparams.target_anomalous_flag
     uc_mean = mdh.uc_mean
     wavelength_mean = mdh.wavelength_mean
     #output mtz file and report binning stat
     miller_set_merge = crystal.symmetry(
         unit_cell=unit_cell(tuple(uc_mean)),
         space_group_symbol=iparams.target_space_group).build_miller_set(
             anomalous_flag=target_anomalous_flag,
             d_min=iparams.merge.d_min)
     mdh.generate_miller_array_from_miller_set(miller_set_merge,
                                               target_anomalous_flag)
     miller_array_complete = miller_set_merge.array()
     fake_data = flex.double([1.0] * len(miller_array_complete.indices()))
     miller_array_template_asu = miller_array_complete.customized_copy(data=fake_data, \
               sigmas=fake_data).resolution_filter(d_min=iparams.merge.d_min, \
               d_max=iparams.merge.d_max)
     n_refl_all = mdh.get_size()
     #do another resolution filter here
     i_sel_res = mdh.miller_array_merge.resolution_filter_selection(
         d_min=iparams.merge.d_min, d_max=iparams.merge.d_max)
     mdh.reduce_by_selection(i_sel_res)
     n_refl_out_resolutions = n_refl_all - mdh.get_size()
     #remove outliers
     sequences = flex.int(range(mdh.get_size()))
     good_sequences = []
     for i_rejection in range(iparams.n_rejection_cycle):
         binner_merge = mdh.miller_array_merge.setup_binner(n_bins=200)
         for i_bin in range(1, 201):
             i_binner = (binner_merge.bin_indices() == i_bin)
             I_obs_bin = mdh.miller_array_merge.data().select(i_binner)
             sequences_bin = sequences.select(i_binner)
             if len(I_obs_bin) > 0:
                 I_obs_bin = mdh.miller_array_merge.data().select(i_binner)
                 try:
                     i_filter = flex.abs(
                         (I_obs_bin - np.median(I_obs_bin)) /
                         np.std(I_obs_bin)) < 10
                 except Exception as e:
                     print "Warning: outlier rejection by bins failed because of floating point."
                     print e
                     i_filter = flex.bool([True] * len(I_obs_bin))
                 good_sequences.extend(list(sequences_bin.select(i_filter)))
     mdh.reduce_by_selection(flex.size_t(good_sequences))
     n_refl_outliers = n_refl_all - n_refl_out_resolutions - mdh.get_size()
     #get iso if given.
     mxh = mx_handler()
     flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file(
         iparams.hklisoin)
     #write output files
     if output_mtz_file_prefix != '':
         #write as mtz file
         miller_array_merge_unique = mdh.miller_array_merge.merge_equivalents(
         ).array()
         info = miller.array_info(wavelength=wavelength_mean)
         miller_array_merge_unique.set_info(info)
         mtz_dataset_merge = miller_array_merge_unique.as_mtz_dataset(
             column_root_label="IOBS")
         mtz_dataset_merge.mtz_object().write(
             file_name=output_mtz_file_prefix + '_merge.mtz')
         #write as cns file
         f_cns = open(output_mtz_file_prefix + '_merge.hkl', 'w')
         miller_array_merge_unique.export_as_cns_hkl(file_object=f_cns)
         f_cns.close()
     #calculate merging stat table
     if True:
         #calculate isotropic B-factor
         try:
             mxh = mx_handler()
             asu_contents = mxh.get_asu_contents(iparams.n_residues)
             observations_as_f = mdh.miller_array_merge.as_amplitude_array()
             observations_as_f.setup_binner(auto_binning=True)
             wp = statistics.wilson_plot(observations_as_f,
                                         asu_contents,
                                         e_statistics=True)
             B_merged = wp.wilson_b
         except Exception as e:
             B_merged = 0
             print "Warning: b-factor calculation in mod_util failed. Reset b-factor to 0"
             print e
         #report binning stats
         txt_out = '\n'
         txt_out += 'Isotropic B-factor:  %7.2f\n' % (B_merged)
         txt_out += 'No. of reflections\n'
         txt_out += ' all:                %7.0f\n' % (n_refl_all)
         txt_out += ' outside resolution: %7.0f\n' % (
             n_refl_out_resolutions)
         txt_out += ' outliers:           %7.0f\n' % (n_refl_outliers)
         txt_out += ' total left:         %7.0f\n' % (mdh.get_size())
         txt_out += 'Summary for ' + output_mtz_file_prefix + '_merge.mtz\n'
         txt_out += 'Bin Resolution Range     Completeness      <N_obs> |Rmerge  Rsplit   CC1/2   N_ind |CCiso   N_ind|CCanoma  N_ind| <I/sigI>   <I>    <sigI>    <I**2>\n'
         txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
         #for stat pickle
         sp_res, sp_complete, sp_n_obs, sp_cc12, sp_cc12_anom, sp_rmerge, sp_i_o_sigi, sp_isqr = (
             [], [], [], [], [], [], [], [])
         #binning
         binner_template_asu = miller_array_template_asu.setup_binner(
             n_bins=iparams.n_bins)
         binner_template_asu_indices = binner_template_asu.bin_indices()
         #for stats on axis cones
         mdh_astar = deepcopy(mdh)
         mdh_bstar = deepcopy(mdh)
         mdh_cstar = deepcopy(mdh)
         mdh_astar.reduce_to_cone_on_axis((1, 0, 0),
                                          iparams.percent_cone_fraction)
         mdh_bstar.reduce_to_cone_on_axis((0, 1, 0),
                                          iparams.percent_cone_fraction)
         mdh_cstar.reduce_to_cone_on_axis((0, 0, 1),
                                          iparams.percent_cone_fraction)
         #prepare text out for axis cones
         txt_out_cone = 'Summary of CC1/2 on three crystal axes\n'
         txt_out_cone += 'Bin Resolution Range           CC1/2                      <I>                          N_refl           \n'
         txt_out_cone += '                        a*      b*      c*  |      a*        b*       c*    |    a*      b*     c*      \n'
         txt_out_cone += '---------------------------------------------------------------------------------------------------------\n'
         for i in range(1, iparams.n_bins + 1):
             i_binner = (binner_template_asu_indices == i)
             miller_indices_template_bin = miller_array_template_asu.indices(
             ).select(i_binner)
             #for all reflections
             mdh_bin = deepcopy(mdh)
             mdh_bin.reduce_by_miller_index(miller_indices_template_bin)
             cc12, n_refl_cc12 = mdh_bin.get_cc12()
             cciso, n_refl_cciso = mdh_bin.get_cciso(miller_array_iso)
             cc_anom_acentric, n_refl_anom_acentric = mdh_bin.get_cc_anom()
             completeness = (mdh_bin.get_size() /
                             len(miller_indices_template_bin)) * 100
             multiplicity = mdh_bin.get_multiplicity()
             txt_out += '%02d %7.2f - %7.2f %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f %6.0f %8.2f %10.1f %8.1f %6.2f\n' \
                 %(i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], \
                 completeness, \
                 mdh_bin.get_size(), len(miller_indices_template_bin),\
                 multiplicity, mdh_bin.get_r_meas()*100, mdh_bin.get_r_split()*100, \
                 cc12*100, n_refl_cc12, cciso*100, n_refl_cciso, \
                 cc_anom_acentric, n_refl_anom_acentric, \
                 mdh_bin.get_mean_IoversigI(), mdh_bin.get_mean_I(), mdh_bin.get_mean_sigI(), mdh_bin.get_second_moment())
             #for reflections on cones
             mdh_astar_bin = deepcopy(mdh_astar)
             mdh_astar_bin.reduce_by_miller_index(
                 miller_indices_template_bin)
             cc12_astar, n_refl_cc12_astar = mdh_astar_bin.get_cc12()
             mdh_bstar_bin = deepcopy(mdh_bstar)
             mdh_bstar_bin.reduce_by_miller_index(
                 miller_indices_template_bin)
             cc12_bstar, n_refl_cc12_bstar = mdh_bstar_bin.get_cc12()
             mdh_cstar_bin = deepcopy(mdh_cstar)
             mdh_cstar_bin.reduce_by_miller_index(
                 miller_indices_template_bin)
             cc12_cstar, n_refl_cc12_cstar = mdh_cstar_bin.get_cc12()
             txt_out_cone += '%02d %7.2f - %7.2f %7.2f %7.2f %7.2f %10.1f %10.1f %10.1f %6.0f %6.0f %6.0f\n' \
                 %(i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], \
                 cc12_astar*100, cc12_bstar*100, cc12_cstar*100, \
                 mdh_astar_bin.get_mean_I(), mdh_bstar_bin.get_mean_I(), mdh_cstar_bin.get_mean_I(), \
                 n_refl_cc12_astar, n_refl_cc12_bstar, n_refl_cc12_cstar)
             #for stat pickle
             sp_res.append(binner_template_asu.bin_d_range(i)[1])
             sp_complete.append(completeness)
             sp_n_obs.append(multiplicity)
             sp_cc12.append(cc12)
             sp_cc12_anom.append(cc_anom_acentric)
             sp_rmerge.append(mdh_bin.get_r_meas() * 100)
             sp_i_o_sigi.append(mdh_bin.get_mean_IoversigI())
             sp_isqr.append(mdh.get_second_moment())
         #txt out total for all reflections
         cc12, n_refl_cc12 = mdh.get_cc12()
         cciso, n_refl_cciso = mdh.get_cciso(miller_array_iso)
         cc_anom_acentric, n_refl_anom_acentric = mdh.get_cc_anom()
         txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
         txt_out += '        TOTAL        %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f %6.0f %8.2f %10.1f %8.1f %6.2f\n' \
         %((mdh.get_size()/miller_array_template_asu.size())*100, \
             mdh.get_size(), miller_array_template_asu.size(),\
             mdh.get_multiplicity(), mdh.get_r_meas()*100, mdh.get_r_split()*100, \
             cc12*100, n_refl_cc12, cciso*100, n_refl_cciso, \
             cc_anom_acentric, n_refl_anom_acentric, \
             mdh.get_mean_IoversigI(), mdh.get_mean_I(), mdh.get_mean_sigI(), mdh.get_second_moment())
         txt_out += '--------------------------------------------------------------------------------------------------------------------------------------------------\n'
         txt_out += '\n'
         #txt out total for reflections on cones
         cc12_astar, n_refl_cc12_astar = mdh_astar.get_cc12()
         cc12_bstar, n_refl_cc12_bstar = mdh_bstar.get_cc12()
         cc12_cstar, n_refl_cc12_cstar = mdh_cstar.get_cc12()
         txt_out_cone += '----------------------------------------------------------------------------------------------------------\n'
         txt_out_cone += '       total         %7.2f %7.2f %7.2f %10.1f %10.1f %10.1f %6.0f %6.0f %6.0f\n' \
               %(cc12_astar*100, cc12_bstar*100, cc12_cstar*100, \
               mdh_astar.get_mean_I(), mdh_bstar.get_mean_I(), mdh_cstar.get_mean_I(), \
               n_refl_cc12_astar, n_refl_cc12_bstar, n_refl_cc12_cstar)
         txt_out_cone += '----------------------------------------------------------------------------------------------------------\n'
         txt_out_cone += '\n'
         txt_out_table1 = "Table1 (" + avg_mode + ")\n"
         txt_out_table1 += "  Space group: " + str(
             mdh.miller_array_merge.space_group_info()) + "\n"
         txt_out_table1 += "  Cell dimensions: %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f\n" % tuple(
             mdh.uc_mean)
         txt_out_table1 += "  Resolution (A): %6.2f - %6.2f (%6.2f - %6.2f)\n" % (
             mdh.miller_array_merge.d_max_min()[0],
             mdh.miller_array_merge.d_max_min()[1], sp_res[-2], sp_res[-1])
         txt_out_table1 += "  Rmerge: %6.2f (%6.2f)\n" % (
             mdh.get_r_meas() * 100, sp_rmerge[-1])
         txt_out_table1 += "  CC1/2: %6.2f (%6.2f)\n" % (mdh.get_cc12()[0] *
                                                         100, sp_cc12[-1])
         txt_out_table1 += "  I/sigI: %6.2f (%6.2f)\n" % (
             mdh.get_mean_IoversigI(), sp_i_o_sigi[-1])
         txt_out_table1 += "  Completeness (%%): %6.2f (%6.2f)\n" % (
             (mdh.get_size() / miller_array_template_asu.size()) * 100,
             sp_complete[-1])
         txt_out_table1 += "  Redundancy: %6.2f (%6.2f)\n" % (
             mdh.get_multiplicity(), sp_n_obs[-1])
         #save data for stat. pickle in stat_dict
         if not iparams.flag_hush:
             stat_dict = {"binned_resolution": [sp_res], \
                 "binned_completeness": [sp_complete], \
                 "binned_n_obs": [sp_n_obs], \
                 "binned_cc12": [sp_cc12], \
                 "binned_cc12_anom": [sp_cc12_anom], \
                 "binned_rmerge": [sp_rmerge], \
                 "binned_i_o_sigi": [sp_i_o_sigi], \
                 "binned_isqr": [sp_isqr], \
                 "total_res_max": [mdh.miller_array_merge.d_max_min()[0]], \
                 "total_res_min": [mdh.miller_array_merge.d_max_min()[1]], \
                 "total_completeness": [(mdh.get_size()/miller_array_template_asu.size())*100], \
                 "total_n_obs": [mdh.get_multiplicity()], \
                 "total_cc12": [mdh.get_cc12()[0]*100], \
                 "total_rmerge": [mdh.get_r_meas()*100], \
                 "total_i_o_sigi": [mdh.get_mean_IoversigI()], \
                 "space_group_info": [mdh.miller_array_merge.space_group_info()], \
                 }
             self.write_stat_pickle(iparams, stat_dict)
         txt_out += txt_out_cone + txt_out_table1
     return mdh, txt_out
Exemplo n.º 36
0
    def write(self, experiments, reflections):
        """
        Write the experiments and reflections to file
        """

        # if mmmcif filename is auto, then choose scaled.cif or integrated.cif
        if self.params.mmcif.hklout in (None, Auto, "auto"):
            if ("intensity.scale.value"
                    in reflections) and ("intensity.scale.variance"
                                         in reflections):
                filename = "scaled.cif"
                logger.info(
                    "Data appears to be scaled, setting mmcif.hklout = 'scaled_unmerged.cif'"
                )
            else:
                filename = "integrated.cif"
                logger.info(
                    "Data appears to be unscaled, setting mmcif.hklout = 'integrated.cif'"
                )

        # Select reflections
        selection = reflections.get_flags(reflections.flags.integrated,
                                          all=True)
        reflections = reflections.select(selection)

        # Filter out bad variances and other issues, but don't filter on ice rings
        # or alter partialities.

        ### Assumes you want to apply the lp and dqe corrections to sum and prf
        ### Do we want to combine partials?
        reflections = filter_reflection_table(
            reflections,
            self.params.intensity,
            combine_partials=False,
            partiality_threshold=0.0,
            d_min=self.params.mtz.d_min,
        )

        # Get the cif block
        cif_block = iotbx.cif.model.block()

        # Audit trail
        dials_version = dials.util.version.dials_version()
        cif_block["_audit.creation_method"] = dials_version
        cif_block["_audit.creation_date"] = datetime.date.today().isoformat()
        cif_block["_computing.data_reduction"] = (
            "%s (Winter, G. et al., 2018)" % dials_version)
        cif_block[
            "_publ.section_references"] = "Winter, G. et al. (2018) Acta Cryst. D74, 85-97."

        # Hard coding X-ray
        cif_block["_pdbx_diffrn_data_section.id"] = "dials"
        cif_block["_pdbx_diffrn_data_section.type_scattering"] = "x-ray"
        cif_block["_pdbx_diffrn_data_section.type_merged"] = "false"
        cif_block["_pdbx_diffrn_data_section.type_scaled"] = str(
            "scale" in self.params.intensity).lower()

        # FIXME finish metadata addition - detector and source details needed
        # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/index.html

        ## Add source information;
        # _diffrn_source.pdbx_wavelength_list = (list of wavelengths)
        # _diffrn_source.source = (general class of source e.g. synchrotron)
        # _diffrn_source.type = (specific beamline or instrument e.g DIAMOND BEAMLINE I04)

        wls = []
        epochs = []
        for exp in experiments:
            wls.append(round(exp.beam.get_wavelength(), 5))
            epochs.append(exp.scan.get_epochs()[0])
        unique_wls = set(wls)
        cif_block["_diffrn_source.pdbx_wavelength_list"] = ", ".join(
            str(w) for w in unique_wls)

        ## Add detector information;
        # _diffrn_detector.detector  = (general class e.g. PIXEL, PLATE etc)
        # _diffrn_detector.pdbx_collection_date = (Date of collection yyyy-mm-dd)
        # _diffrn_detector.type = (full name of detector e.g. DECTRIS PILATUS3 2M)
        # One date is required, so if multiple just use the first date.
        min_epoch = min(epochs)
        date_str = time.strftime("%Y-%m-%d", time.gmtime(min_epoch))
        cif_block["_diffrn_detector.pdbx_collection_date"] = date_str

        # Write the crystal information
        cif_loop = iotbx.cif.model.loop(header=(
            "_pdbx_diffrn_unmerged_cell.ordinal",
            "_pdbx_diffrn_unmerged_cell.crystal_id",
            "_pdbx_diffrn_unmerged_cell.wavelength",
            "_pdbx_diffrn_unmerged_cell.cell_length_a",
            "_pdbx_diffrn_unmerged_cell.cell_length_b",
            "_pdbx_diffrn_unmerged_cell.cell_length_c",
            "_pdbx_diffrn_unmerged_cell.cell_angle_alpha",
            "_pdbx_diffrn_unmerged_cell.cell_angle_beta",
            "_pdbx_diffrn_unmerged_cell.cell_angle_gamma",
            "_pdbx_diffrn_unmerged_cell.Bravais_lattice",
        ))
        crystals = experiments.crystals()
        crystal_to_id = {crystal: i + 1 for i, crystal in enumerate(crystals)}
        for i, exp in enumerate(experiments):
            crystal = exp.crystal
            crystal_id = crystal_to_id[crystal]
            wavelength = exp.beam.get_wavelength()
            a, b, c, alpha, beta, gamma = crystal.get_unit_cell().parameters()
            latt_type = str(
                bravais_types.bravais_lattice(group=crystal.get_space_group()))
            cif_loop.add_row((i + 1, crystal_id, wavelength, a, b, c, alpha,
                              beta, gamma, latt_type))
            cif_block.add_loop(cif_loop)

        # Write the scan information
        cif_loop = iotbx.cif.model.loop(header=(
            "_pdbx_diffrn_scan.scan_id",
            "_pdbx_diffrn_scan.crystal_id",
            "_pdbx_diffrn_scan.image_id_begin",
            "_pdbx_diffrn_scan.image_id_end",
            "_pdbx_diffrn_scan.scan_angle_begin",
            "_pdbx_diffrn_scan.scan_angle_end",
        ))
        for i, exp in enumerate(experiments):
            scan = exp.scan
            crystal_id = crystal_to_id[exp.crystal]
            image_range = scan.get_image_range()
            osc_range = scan.get_oscillation_range(deg=True)
            cif_loop.add_row((
                i + 1,
                crystal_id,
                image_range[0],
                image_range[1],
                osc_range[0],
                osc_range[1],
            ))
            cif_block.add_loop(cif_loop)

        # Make a dict of unit_cell parameters
        unit_cell_parameters = {}
        if crystal.num_scan_points > 1:
            for i in range(crystal.num_scan_points):
                a, b, c, alpha, beta, gamma = crystal.get_unit_cell_at_scan_point(
                    i).parameters()
                unit_cell_parameters[i] = (a, b, c, alpha, beta, gamma)
        else:
            unit_cell_parameters[0] = (a, b, c, alpha, beta, gamma)

        ### _pdbx_diffrn_image_proc has been removed from the dictionary extension.
        ### Keeping this section commented out as it may be added back in some
        ### form in future
        #
        # Write the image data
        # scan = experiments[0].scan
        # z0 = scan.get_image_range()[0]
        #
        # cif_loop = iotbx.cif.model.loop(
        #  header=("_pdbx_diffrn_image_proc.image_id",
        #          "_pdbx_diffrn_image_proc.crystal_id",
        #          "_pdbx_diffrn_image_proc.image_number",
        #          "_pdbx_diffrn_image_proc.phi_value",
        #          "_pdbx_diffrn_image_proc.wavelength",
        #          "_pdbx_diffrn_image_proc.cell_length_a",
        #          "_pdbx_diffrn_image_proc.cell_length_b",
        #          "_pdbx_diffrn_image_proc.cell_length_c",
        #          "_pdbx_diffrn_image_proc.cell_angle_alpha",
        #          "_pdbx_diffrn_image_proc.cell_angle_beta",
        #          "_pdbx_diffrn_image_proc.cell_angle_gamma"))
        # for i in range(len(scan)):
        #  z = z0 + i
        #  if crystal.num_scan_points > 1:
        #    a, b, c, alpha, beta, gamma = unit_cell_parameters[i]
        #  else:
        #    a, b, c, alpha, beta, gamma = unit_cell_parameters[0]
        #  # phi is the angle at the image centre
        #  phi = scan.get_angle_from_image_index(z + 0.5, deg=True)
        #  cif_loop.add_row((i+1, 1, z, phi, wavelength,
        #                    a, b, c, alpha, beta, gamma))
        # cif_block.add_loop(cif_loop)

        # Write reflection data
        # Required columns
        header = (
            "_pdbx_diffrn_unmerged_refln.reflection_id",
            "_pdbx_diffrn_unmerged_refln.scan_id",
            "_pdbx_diffrn_unmerged_refln.image_id_begin",
            "_pdbx_diffrn_unmerged_refln.image_id_end",
            "_pdbx_diffrn_unmerged_refln.index_h",
            "_pdbx_diffrn_unmerged_refln.index_k",
            "_pdbx_diffrn_unmerged_refln.index_l",
        )

        headernames = {
            "scales": "_pdbx_diffrn_unmerged_refln.scale_value",
            "intensity.scale.value":
            "_pdbx_diffrn_unmerged_refln.intensity_meas",
            "intensity.scale.sigma":
            "_pdbx_diffrn_unmerged_refln.intensity_sigma",
            "intensity.sum.value": "_pdbx_diffrn_unmerged_refln.intensity_sum",
            "intensity.sum.sigma":
            "_pdbx_diffrn_unmerged_refln.intensity_sum_sigma",
            "intensity.prf.value": "_pdbx_diffrn_unmerged_refln.intensity_prf",
            "intensity.prf.sigma":
            "_pdbx_diffrn_unmerged_refln.intensity_prf_sigma",
            "angle": "_pdbx_diffrn_unmerged_refln.scan_angle_reflection",
            "partiality": "_pdbx_diffrn_unmerged_refln.partiality",
        }

        variables_present = []
        if "scale" in self.params.intensity:
            reflections["scales"] = 1.0 / reflections["inverse_scale_factor"]
            reflections["intensity.scale.sigma"] = flex.sqrt(
                reflections["intensity.scale.variance"])
            variables_present.extend(
                ["scales", "intensity.scale.value", "intensity.scale.sigma"])
        if "sum" in self.params.intensity:
            reflections["intensity.sum.sigma"] = flex.sqrt(
                reflections["intensity.sum.variance"])
            variables_present.extend(
                ["intensity.sum.value", "intensity.sum.sigma"])
        if "profile" in self.params.intensity:
            reflections["intensity.prf.sigma"] = flex.sqrt(
                reflections["intensity.prf.variance"])
            variables_present.extend(
                ["intensity.prf.value", "intensity.prf.sigma"])

        # Should always exist
        reflections["angle"] = reflections["xyzcal.mm"].parts()[2] * RAD2DEG
        variables_present.extend(["angle"])

        if "partiality" in reflections:
            variables_present.extend(["partiality"])

        for name in variables_present:
            if name in reflections:
                header += (headernames[name], )

        if "scale" in self.params.intensity:
            # Write dataset_statistics - first make a miller array
            crystal_symmetry = cctbxcrystal.symmetry(
                space_group=experiments[0].crystal.get_space_group(),
                unit_cell=experiments[0].crystal.get_unit_cell(),
            )
            miller_set = miller.set(
                crystal_symmetry=crystal_symmetry,
                indices=reflections["miller_index"],
                anomalous_flag=False,
            )
            i_obs = miller.array(miller_set,
                                 data=reflections["intensity.scale.value"])
            i_obs.set_observation_type_xray_intensity()
            i_obs.set_sigmas(reflections["intensity.scale.sigma"])
            i_obs.set_info(
                miller.array_info(source="DIALS",
                                  source_type="reflection_tables"))

            result = dataset_statistics(
                i_obs=i_obs,
                crystal_symmetry=crystal_symmetry,
                use_internal_variance=False,
                eliminate_sys_absent=False,
            )

            cif_block.update(result.as_cif_block())

        cif_loop = iotbx.cif.model.loop(header=header)

        for i, r in enumerate(reflections.rows()):
            refl_id = i + 1
            scan_id = r["id"] + 1
            _, _, _, _, z0, z1 = r["bbox"]
            h, k, l = r["miller_index"]
            variable_values = tuple((r[name]) for name in variables_present)
            cif_loop.add_row((refl_id, scan_id, z0, z1, h, k, l) +
                             variable_values)
        cif_block.add_loop(cif_loop)

        # Add the block
        self._cif["dials"] = cif_block

        # Print to file
        with open(filename, "w") as fh:
            self._cif.show(out=fh)

        # Log
        logger.info("Wrote reflections to %s" % filename)
Exemplo n.º 37
0
def extract(file_name,
            crystal_symmetry,
            wavelength_id,
            crystal_id,
            show_details_if_error,
            output_r_free_label,
            merge_non_unique_under_symmetry,
            map_to_asu,
            remove_systematic_absences,
            all_miller_arrays=None,
            incompatible_flags_to_work_set=False,
            ignore_bad_sigmas=False,
            extend_flags=False,
            return_as_miller_arrays=False,
            log=sys.stdout):
    import iotbx.cif
    from cctbx import miller
    if all_miller_arrays is None:
        base_array_info = miller.array_info(
            crystal_symmetry_from_file=crystal_symmetry)
        all_miller_arrays = iotbx.cif.reader(
            file_path=file_name).build_miller_arrays(
                base_array_info=base_array_info)
    if (len(all_miller_arrays) == 0):
        raise Sorry(
            "No data arrays were found in this CIF file.  Please make " +
            "sure that the file contains reflection data, rather than the refined "
            + "model.")
    column_labels = set()
    if (extend_flags):
        map_to_asu = True
    # TODO: is all_mille_arrays a dict ? If not change back
    for (data_name, miller_arrays) in six.iteritems(all_miller_arrays):
        for ma in miller_arrays.values():
            other_symmetry = crystal_symmetry
            try:
                crystal_symmetry = other_symmetry.join_symmetry(
                    other_symmetry=ma.crystal_symmetry(), force=True)
            except AssertionError as e:
                str_e = str(e)
                from six.moves import cStringIO as StringIO
                s = StringIO()
                if "Space group is incompatible with unit cell parameters." in str_e:
                    other_symmetry.show_summary(f=s)
                    ma.crystal_symmetry().show_summary(f=s)
                    str_e += "\n%s" % (s.getvalue())
                    raise Sorry(str_e)
                else:
                    raise
    if (crystal_symmetry.unit_cell() is None
            or crystal_symmetry.space_group_info() is None):
        raise Sorry(
            "Crystal symmetry is not defined. Please use the --symmetry option."
        )
    mtz_object = iotbx.mtz.object() \
      .set_title(title="phenix.cif_as_mtz") \
      .set_space_group_info(space_group_info=crystal_symmetry.space_group_info())
    unit_cell = crystal_symmetry.unit_cell()
    mtz_crystals = {}
    mtz_object.set_hkl_base(unit_cell=unit_cell)
    from iotbx.reflection_file_utils import cif_status_flags_as_int_r_free_flags
    # generate list of all reflections (for checking R-free flags)
    from iotbx.reflection_file_utils import make_joined_set
    all_arrays = []
    for (data_name, miller_arrays) in six.iteritems(all_miller_arrays):
        for ma in miller_arrays.values():
            all_arrays.append(ma)
    complete_set = make_joined_set(all_arrays)
    if return_as_miller_arrays:
        miller_array_list = []
    current_i = -1
    uc = None
    for i, (data_name,
            miller_arrays) in enumerate(six.iteritems(all_miller_arrays)):
        for ma in miller_arrays.values():
            #ma = ma.customized_copy(
            #  crystal_symmetry=crystal_symmetry).set_info(ma.info())
            if ma._space_group_info is None:
                ma._space_group_info = crystal_symmetry.space_group_info()
            labels = ma.info().labels
            label = get_label(miller_array=ma,
                              output_r_free_label=output_r_free_label)
            if label is None:
                print("Can't determine output label for %s - skipping." % \
                  ma.info().label_string(), file=log)
                continue
            elif label.startswith(output_r_free_label):
                ma, _ = cif_status_flags_as_int_r_free_flags(
                    ma, test_flag_value="f")
                if isinstance(ma.data(), flex.double):
                    data_int = ma.data().iround()
                    assert data_int.as_double().all_eq(ma.data())
                    ma = ma.customized_copy(data=data_int).set_info(ma.info())
            elif (
                (ma.is_xray_amplitude_array() or ma.is_xray_intensity_array())
                    and isinstance(ma.data(), flex.int)):
                ma = ma.customized_copy(data=ma.data().as_double()).set_info(
                    ma.info())
            crys_id = 0
            for l in labels:
                if 'crystal_id' in l:
                    crys_id = int(l.split('=')[-1])
                    break
            if crys_id > 0 and crystal_id is None:
                label += "%i" % crys_id
            if crystal_id is not None and crys_id > 0 and crys_id != crystal_id:
                continue

            if ma.unit_cell(
            ) is not None:  # use symmetry file on the command line if it's None
                unit_cell = ma.unit_cell()

            if crys_id not in mtz_crystals or \
              (i > current_i and unit_cell is not None and uc is not None and unit_cell.parameters() != uc.parameters()):
                # Ensure new mtz crystals are created if miller_array objects have different unit cells
                # Can happen if there are more datasets in the same cif file, like MAD datasets
                uc = unit_cell
                current_i = i
                # Use unique project and crystal names so that MtzGet() in cmtzlib.c picks up individual unit cells
                mtz_crystals[crys_id] = (mtz_object.add_crystal(
                    name="crystal_%i" % i,
                    project_name="project_%i" % i,
                    unit_cell=uc), {})
            crystal, datasets = mtz_crystals[crys_id]
            w_id = 0
            for l in labels:
                if 'wavelength_id' in l:
                    w_id = int(l.split('=')[-1])
                    break
            if wavelength_id is not None and w_id > 0 and w_id != wavelength_id:
                continue
            if w_id > 1 and wavelength_id is None:
                if (label in column_labels):
                    label += "%i" % w_id
                #print "label is", label
            if w_id not in datasets:
                wavelength = ma.info().wavelength
                if (wavelength is None):
                    wavelength = 0
                datasets[w_id] = crystal.add_dataset(name="dataset",
                                                     wavelength=wavelength)
            dataset = datasets[w_id]
            # if all sigmas for an array are set to zero either raise an error, or set sigmas to None
            if ma.sigmas() is not None and (ma.sigmas()
                                            == 0).count(False) == 0:
                if ignore_bad_sigmas:
                    print("Warning: bad sigmas, setting sigmas to None.",
                          file=log)
                    ma.set_sigmas(None)
                else:
                    raise Sorry("""Bad sigmas: all sigmas are equal to zero.
  Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file."""
                                )
            if not ma.is_unique_set_under_symmetry():
                if merge_non_unique_under_symmetry:
                    print("Warning: merging non-unique data", file=log)
                    if (label.startswith(output_r_free_label)
                            and incompatible_flags_to_work_set):
                        merging = ma.merge_equivalents(
                            incompatible_flags_replacement=0)
                        if merging.n_incompatible_flags > 0:
                            print("Warning: %i reflections were placed in the working set " \
                                  "because of incompatible flags between equivalents." %(
                                    merging.n_incompatible_flags), file=log)
                    else:
                        try:
                            merging = ma.merge_equivalents()
                        except Sorry as e:
                            if ("merge_equivalents_exact: incompatible"
                                    in str(e)):
                                raise Sorry(
                                    str(e) + " for %s" % ma.info().labels[-1] +
                                    "\n" +
                                    "Add --incompatible_flags_to_work_set to command line "
                                    "arguments to place incompatible flags to working set."
                                )
                                raise
                    ma = merging.array().customized_copy(
                        crystal_symmetry=ma).set_info(ma.info())
                elif return_as_miller_arrays:  # allow non-unique set
                    pass
                else:
                    n_all = ma.indices().size()
                    sel_unique = ma.unique_under_symmetry_selection()
                    sel_dup = ~flex.bool(n_all, sel_unique)
                    n_duplicate = sel_dup.count(True)
                    n_uus = sel_unique.size()
                    msg = (
                      "Miller indices not unique under symmetry: " + file_name + \
                      "(%d redundant indices out of %d)" % (n_all-n_uus, n_all) +
                      "Add --merge to command arguments to force merging data.")
                    if (show_details_if_error):
                        print(msg)
                        ma.show_comprehensive_summary(prefix="  ")
                        ma.map_to_asu().sort().show_array(prefix="  ")
                    raise Sorry(msg)
            if (map_to_asu):
                ma = ma.map_to_asu().set_info(ma.info())
            if (remove_systematic_absences):
                ma = ma.remove_systematic_absences()
            if (label.startswith(output_r_free_label)
                    and complete_set is not None):
                n_missing = len(complete_set.lone_set(other=ma).indices())
                if (n_missing > 0):
                    if (extend_flags):
                        from cctbx import r_free_utils
                        # determine flag values
                        fvals = list(set(ma.data()))
                        print("fvals", fvals)
                        fval = None
                        if (len(fvals) == 1):
                            fval = fvals[0]
                        elif (len(fvals) == 2):
                            f1 = (ma.data()
                                  == fvals[0]).count(True) / ma.data().size()
                            f2 = (ma.data()
                                  == fvals[1]).count(True) / ma.data().size()
                            if (f1 < f2): fval = fvals[0]
                            else: fval = fvals[1]
                        elif (len(fvals) == 0):
                            fval = None
                        else:
                            fval = 0
                            if (not fval in fvals):
                                raise Sorry(
                                    "Cannot determine free-R flag value.")
                        #
                        if (fval is not None):
                            ma = r_free_utils.extend_flags(
                                r_free_flags=ma,
                                test_flag_value=fval,
                                array_label=label,
                                complete_set=complete_set,
                                preserve_input_values=True,
                                allow_uniform_flags=True,
                                log=sys.stdout)
                        else:
                            ma = None
                    else:
                        libtbx.warn((
                            "%d reflections do not have R-free flags in the " +
                            "array '%s' - this may " +
                            "cause problems if you try to use the MTZ file for refinement "
                            +
                            "or map calculation.  We recommend that you extend the flags "
                            +
                            "to cover all reflections (--extend_flags on the command line)."
                        ) % (n_missing, label))
            # Get rid of fake (0,0,0) reflection in some CIFs
            if (ma is not None):
                ma = ma.select_indices(indices=flex.miller_index(
                    ((0, 0, 0), )),
                                       negate=True).set_info(ma.info())

            if return_as_miller_arrays:
                miller_array_list.append(ma)
                continue  # don't make a dataset

            dec = None
            if ("FWT" in label):
                dec = iotbx.mtz.ccp4_label_decorator()
            column_types = None
            if ("PHI" in label or "PHWT" in label) and (ma.is_real_array()):
                column_types = "P"
            elif (label.startswith("DANO") and ma.is_real_array()):
                if (ma.sigmas() is not None):
                    column_types = "DQ"
                else:
                    column_types = "D"
            label_base = label
            i = 1
            while label in column_labels:
                label = label_base + "-%i" % (i)
                i += 1
            if (ma is not None):
                column_labels.add(label)
                if ("FWT-1" in label): dec = None
                dataset.add_miller_array(ma,
                                         column_root_label=label,
                                         label_decorator=dec,
                                         column_types=column_types)
    if return_as_miller_arrays:
        return miller_array_list
    else:
        return mtz_object
Exemplo n.º 38
0
def exercise_miller_arrays_as_cif_block():
    from iotbx.cif import reader
    cif_model = reader(input_string=cif_miller_array,
                       builder=cif.builders.cif_model_builder()).model()
    ma_builder = cif.builders.miller_array_builder(cif_model['global'])
    ma1 = ma_builder.arrays()['_refln_F_squared_meas']
    mas_as_cif_block = cif.miller_arrays_as_cif_block(ma1,
                                                      array_type='meas',
                                                      format="corecif")
    mas_as_cif_block.add_miller_array(
        ma1.array(data=flex.complex_double([1 - 1j] * ma1.size())),
        array_type='calc')
    mas_as_cif_block.add_miller_array(
        ma1.array(data=flex.complex_double([1 - 2j] * ma1.size())),
        column_names=['_refln_A_calc', '_refln_B_calc'])
    for key in ('_refln_F_squared_meas', '_refln_F_squared_sigma',
                '_refln_F_calc', '_refln_phase_calc', '_refln_A_calc',
                '_refln_A_calc'):
        assert (key in mas_as_cif_block.cif_block.keys()), key
    #
    mas_as_cif_block = cif.miller_arrays_as_cif_block(ma1,
                                                      array_type='meas',
                                                      format="mmcif")
    mas_as_cif_block.add_miller_array(
        ma1.array(data=flex.complex_double([1 - 1j] * ma1.size())),
        array_type='calc')
    for key in ('_refln.F_squared_meas', '_refln.F_squared_sigma',
                '_refln.F_calc', '_refln.phase_calc',
                '_space_group_symop.operation_xyz', '_cell.length_a',
                '_refln.index_h'):
        assert key in mas_as_cif_block.cif_block.keys()
    #
    mas_as_cif_block = cif.miller_arrays_as_cif_block(
        ma1,
        column_names=[
            '_diffrn_refln_intensity_net', '_diffrn_refln_intensity_sigma'
        ],
        miller_index_prefix='_diffrn_refln')
    mas_as_cif_block.add_miller_array(
        ma1.array(data=flex.std_string(ma1.size(), 'om')),
        column_name='_diffrn_refln_intensity_u')
    for key in ('_diffrn_refln_intensity_net', '_diffrn_refln_intensity_sigma',
                '_diffrn_refln_intensity_u'):
        assert key in list(mas_as_cif_block.cif_block.keys())
    #
    try:
        reader(input_string=cif_global)
    except CifParserError as e:
        pass
    else:
        raise Exception_expected
    cif_model = reader(input_string=cif_global, strict=False).model()
    assert not show_diff(
        str(cif_model), """\
data_1
_c                                3
_d                                4
""")
    # exercise adding miller arrays with non-matching indices
    cs = crystal.symmetry(unit_cell=uctbx.unit_cell((10, 10, 10, 90, 90, 90)),
                          space_group_info=sgtbx.space_group_info(symbol="P1"))
    mi = flex.miller_index(((1, 0, 0), (1, 2, 3), (2, 3, 4)))
    ms1 = miller.set(cs, mi)
    ma1 = miller.array(ms1, data=flex.double((1, 2, 3)))
    mas_as_cif_block = cif.miller_arrays_as_cif_block(
        ma1, column_name="_refln.F_meas_au")
    ms2 = miller.set(cs, mi[:2])
    ma2 = miller.array(ms2, data=flex.complex_double([1 - 2j] * ms2.size()))
    mas_as_cif_block.add_miller_array(ma2,
                                      column_names=("_refln.F_calc_au",
                                                    "_refln.phase_calc")),
    ms3 = miller.set(cs, flex.miller_index(((1, 0, 0), (5, 6, 7), (2, 3, 4))))
    ma3 = miller.array(ms3, data=flex.double((4, 5, 6)))
    mas_as_cif_block.add_miller_array(ma3, column_name="_refln.F_squared_meas")
    ms4 = miller.set(
        cs,
        flex.miller_index(
            ((1, 2, 3), (5, 6, 7), (1, 1, 1), (1, 0, 0), (2, 3, 4))))
    ma4 = ms4.d_spacings()
    mas_as_cif_block.add_miller_array(ma4, column_name="_refln.d_spacing")
    # extract arrays from cif block and make sure we get back what we started with
    arrays = cif.builders.miller_array_builder(
        mas_as_cif_block.cif_block).arrays()
    recycled_arrays = (arrays['_refln.F_meas_au'], arrays['_refln.F_calc_au'],
                       arrays['_refln.F_squared_meas'],
                       arrays['_refln.d_spacing'])
    for orig, recycled in zip((ma1, ma2, ma3, ma4), recycled_arrays):
        assert orig.size() == recycled.size()
        recycled = recycled.customized_copy(
            anomalous_flag=orig.anomalous_flag())
        orig, recycled = orig.common_sets(recycled)
        assert orig.indices().all_eq(recycled.indices())
        assert approx_equal(orig.data(), recycled.data(), eps=1e-5)
    #
    cif_model = reader(input_string=r3adrsf,
                       builder=cif.builders.cif_model_builder()).model()
    cs = cif.builders.crystal_symmetry_builder(
        cif_model["r3adrsf"]).crystal_symmetry

    ma_builder = cif.builders.miller_array_builder(
        cif_model['r3adrAsf'],
        base_array_info=miller.array_info(crystal_symmetry_from_file=cs))
    miller_arrays = list(ma_builder.arrays().values())
    assert len(miller_arrays) == 4
    mas_as_cif_block = cif.miller_arrays_as_cif_block(
        miller_arrays[0].map_to_asu(),
        column_names=miller_arrays[0].info().labels,
        format="corecif")
    for array in miller_arrays[1:]:
        labels = array.info().labels
        if len(labels) > 1:
            for label in labels:
                if label.startswith("wavelength_id"):
                    labels.remove(label)
        mas_as_cif_block.add_miller_array(array=array.map_to_asu(),
                                          column_names=array.info().labels)
    s = StringIO()
    print(mas_as_cif_block.refln_loop, file=s)
    assert not show_diff(
        s.getvalue(), """\
loop_
  _refln_index_h
  _refln_index_k
  _refln_index_l
  _refln.crystal_id
  _refln.scale_group_code
  _refln.wavelength_id
  _refln.pdbx_I_plus
  _refln.pdbx_I_plus_sigma
  _refln.pdbx_I_minus
  _refln.pdbx_I_minus_sigma
  -87  5  46  1  1  3   40.2  40.4    6.7  63.9
  -87  5  45  1  1  3   47.8  29.7   35.1  30.5
  -87  5  44  1  1  3   18.1  33.2    0.5  34.6
  -87  5  43  1  1  3    6.1  45.4   12.9  51.6
  -87  5  42  1  1  3   -6.6  45.6  -15.5  55.8
  -87  7  37  1  1  3    6.3  43.4      ?     ?
  -87  7  36  1  1  3  -67.2  55.4      ?     ?
  -88  2  44  1  1  3      0    -1     35  38.5
  -88  2  43  1  1  3      0    -1   57.4  41.5
  -88  4  45  1  1  3     -1  46.1   -9.1  45.6
  -88  4  44  1  1  3  -19.8  49.2    0.3  34.7
  -88  6  44  1  1  3   -1.8  34.8      ?     ?

""")
  def _extract_flags(self, data_description = "R-free flags"):
    r_free_flags, test_flag_value = None, None
    params = self.parameters.r_free_flags
    # Extract
    if(not self.parameters.r_free_flags.generate):
      try:
        r_free_flags, test_flag_value = \
          self.reflection_file_server.get_r_free_flags(
            file_name                = params.file_name,
            label                    = params.label,
            test_flag_value          = params.test_flag_value,
            disable_suitability_test = params.disable_suitability_test,
            parameter_scope          = "")
      except reflection_file_utils.Sorry_No_array_of_the_required_type as e:
        if(self.parameters.r_free_flags.generate is not None):
          if(not self.keep_going):
            self.err.append(explain_how_to_generate_array_of_r_free_flags())
            self.err.append("Please try again.")
          return None
        r_free_flags, test_flag_value = None, None
      else:
        params.file_name       = r_free_flags.info().source
        params.label           = r_free_flags.info().label_string()
        params.test_flag_value = test_flag_value
        msg = miller_array_symmetry_safety_check(
          miller_array        = r_free_flags,
          data_description    = data_description,
          working_point_group = self.working_point_group)
        if(msg is not None and not self.keep_going):
          self.err.append(msg)
        info = r_free_flags.info()
        try:
          processed = r_free_flags.regularize()
        except RuntimeError as e:
          self.err.append("Bad free-r flags:\n %s"%str(e))
          return None
        if (self.force_non_anomalous):
          processed = processed.average_bijvoet_mates()
        r_free_flags = processed.set_info(info)
    # Generate or stop
    if(r_free_flags is None):
      if ((params.fraction is None) or
          (params.lattice_symmetry_max_delta is None) or
          (params.use_lattice_symmetry is None)):
        msg = """
No R-free flags are available, but one or more parameters required to generate
new flags is undefined.
"""
        self.err.append(msg)
        return None
      print("Generating a new array of R-free flags.", file=self.log)
      print(file=self.log)
      r_free_flags = self.f_obs.generate_r_free_flags(
        fraction                   = params.fraction,
        max_free                   = params.max_free,
        lattice_symmetry_max_delta = params.lattice_symmetry_max_delta,
        use_lattice_symmetry       = params.use_lattice_symmetry,
        use_dataman_shells         = params.use_dataman_shells,
        n_shells                   = params.n_shells
        ).set_info(miller.array_info(labels = ["R-free-flags"]))
      params.label           = r_free_flags.info().label_string()
      params.test_flag_value = 1
    # check if anomalous pairs are sound
    if(r_free_flags is not None):
      r_free_flags.deep_copy().as_non_anomalous_array()
    # make sure flags match anomalous flag of data
    if(self.raw_data.anomalous_flag() and not r_free_flags.anomalous_flag()):
      info = r_free_flags.info()
      observation_type = r_free_flags.observation_type()
      r_free_flags = r_free_flags.generate_bijvoet_mates()
      r_free_flags.set_observation_type(observation_type)
      r_free_flags.set_info(info)
    return r_free_flags
Exemplo n.º 40
0
    def make_cif_block(self, experiments, reflections):
        """Write the data to a cif block"""
        # Select reflections
        selection = reflections.get_flags(reflections.flags.integrated,
                                          all=True)
        reflections = reflections.select(selection)

        # Filter out bad variances and other issues, but don't filter on ice rings
        # or alter partialities.

        # Assumes you want to apply the lp and dqe corrections to sum and prf
        # Do we want to combine partials?
        reflections = filter_reflection_table(
            reflections,
            self.params.intensity,
            combine_partials=False,
            partiality_threshold=0.0,
            d_min=self.params.mtz.d_min,
        )

        # Get the cif block
        cif_block = iotbx.cif.model.block()

        # Audit trail
        dials_version = dials.util.version.dials_version()
        cif_block["_audit.revision_id"] = 1
        cif_block["_audit.creation_method"] = dials_version
        cif_block["_audit.creation_date"] = datetime.date.today().isoformat()
        cif_block["_entry.id"] = "DIALS"
        # add software loop
        mmcif_software_header = (
            "_software.pdbx_ordinal",
            "_software.citation_id",
            "_software.name",  # as defined at [1]
            "_software.version",
            "_software.type",
            "_software.classification",
            "_software.description",
        )

        mmcif_citations_header = (
            "_citation.id",
            "_citation.journal_abbrev",
            "_citation.journal_volume",
            "_citation.journal_issue",
            "_citation.page_first",
            "_citation.page_last",
            "_citation.year",
            "_citation.title",
        )

        software_loop = iotbx.cif.model.loop(header=mmcif_software_header)
        citations_loop = iotbx.cif.model.loop(header=mmcif_citations_header)

        software_loop.add_row((
            1,
            1,
            "DIALS",
            dials_version,
            "package",
            "data processing",
            "Data processing and integration within the DIALS software package",
        ))
        citations_loop.add_row((
            1,
            "Acta Cryst. D",
            74,
            2,
            85,
            97,
            2018,
            "DIALS: implementation and evaluation of a new integration package",
        ))
        if "scale" in self.params.intensity:
            software_loop.add_row((
                2,
                2,
                "DIALS",
                dials_version,
                "program",
                "data scaling",
                "Data scaling and merging within the DIALS software package",
            ))
            citations_loop.add_row((
                2,
                "Acta Cryst. D",
                76,
                4,
                385,
                399,
                2020,
                "Scaling diffraction data in the DIALS software package: algorithms and new approaches for multi-crystal scaling",
            ))
        cif_block.add_loop(software_loop)
        cif_block.add_loop(citations_loop)

        # Hard coding X-ray
        if self.params.mmcif.pdb_version == "v5_next":
            cif_block["_pdbx_diffrn_data_section.id"] = "dials"
            cif_block["_pdbx_diffrn_data_section.type_scattering"] = "x-ray"
            cif_block["_pdbx_diffrn_data_section.type_merged"] = "false"
            cif_block["_pdbx_diffrn_data_section.type_scaled"] = str(
                "scale" in self.params.intensity).lower()

        # FIXME finish metadata addition - detector and source details needed
        # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/index.html

        # Add source information;
        # _diffrn_source.pdbx_wavelength_list = (list of wavelengths)
        # _diffrn_source.source = (general class of source e.g. synchrotron)
        # _diffrn_source.type = (specific beamline or instrument e.g DIAMOND BEAMLINE I04)

        wls = []
        epochs = []
        for exp in experiments:
            wls.append(round(exp.beam.get_wavelength(), 5))
            epochs.append(exp.scan.get_epochs()[0])
        unique_wls = set(wls)
        cif_block["_exptl_crystal.id"] = 1  # links to crystal_id
        cif_block["_diffrn.id"] = 1  # links to diffrn_id
        cif_block["_diffrn.crystal_id"] = 1
        cif_block["_diffrn_source.diffrn_id"] = 1
        cif_block["_diffrn_source.pdbx_wavelength_list"] = ", ".join(
            str(w) for w in unique_wls)

        # Add detector information;
        # _diffrn_detector.detector  = (general class e.g. PIXEL, PLATE etc)
        # _diffrn_detector.pdbx_collection_date = (Date of collection yyyy-mm-dd)
        # _diffrn_detector.type = (full name of detector e.g. DECTRIS PILATUS3 2M)
        # One date is required, so if multiple just use the first date.
        min_epoch = min(epochs)
        date_str = time.strftime("%Y-%m-%d", time.gmtime(min_epoch))
        cif_block["_diffrn_detector.diffrn_id"] = 1
        cif_block["_diffrn_detector.pdbx_collection_date"] = date_str

        # Write reflection data
        # Required columns
        header = (
            "_pdbx_diffrn_unmerged_refln.reflection_id",
            "_pdbx_diffrn_unmerged_refln.scan_id",
            "_pdbx_diffrn_unmerged_refln.image_id_begin",
            "_pdbx_diffrn_unmerged_refln.image_id_end",
            "_pdbx_diffrn_unmerged_refln.index_h",
            "_pdbx_diffrn_unmerged_refln.index_k",
            "_pdbx_diffrn_unmerged_refln.index_l",
        )

        extra_items = {
            "scales": ("_pdbx_diffrn_unmerged_refln.scale_value", "%5.3f"),
            "intensity.scale.value": (
                "_pdbx_diffrn_unmerged_refln.intensity_meas",
                "%8.3f",
            ),
            "intensity.scale.sigma": (
                "_pdbx_diffrn_unmerged_refln.intensity_sigma",
                "%8.3f",
            ),
            "intensity.sum.value": (
                "_pdbx_diffrn_unmerged_refln.intensity_sum",
                "%8.3f",
            ),
            "intensity.sum.sigma": (
                "_pdbx_diffrn_unmerged_refln.intensity_sum_sigma",
                "%8.3f",
            ),
            "intensity.prf.value": (
                "_pdbx_diffrn_unmerged_refln.intensity_prf",
                "%8.3f",
            ),
            "intensity.prf.sigma": (
                "_pdbx_diffrn_unmerged_refln.intensity_prf_sigma",
                "%8.3f",
            ),
            "angle":
            ("_pdbx_diffrn_unmerged_refln.scan_angle_reflection", "%7.4f"),
            "partiality": ("_pdbx_diffrn_unmerged_refln.partiality", "%7.4f"),
        }

        variables_present = []
        if "scale" in self.params.intensity:
            reflections["scales"] = 1.0 / reflections["inverse_scale_factor"]
            reflections["intensity.scale.sigma"] = flex.sqrt(
                reflections["intensity.scale.variance"])
            variables_present.extend(
                ["scales", "intensity.scale.value", "intensity.scale.sigma"])
        if "sum" in self.params.intensity:
            reflections["intensity.sum.sigma"] = flex.sqrt(
                reflections["intensity.sum.variance"])
            variables_present.extend(
                ["intensity.sum.value", "intensity.sum.sigma"])
        if "profile" in self.params.intensity:
            reflections["intensity.prf.sigma"] = flex.sqrt(
                reflections["intensity.prf.variance"])
            variables_present.extend(
                ["intensity.prf.value", "intensity.prf.sigma"])

        # Should always exist
        reflections["angle"] = reflections["xyzcal.mm"].parts()[2] * RAD2DEG
        variables_present.extend(["angle"])

        if "partiality" in reflections:
            variables_present.extend(["partiality"])

        for name in variables_present:
            if name in reflections:
                header += (extra_items[name][0], )
                self._fmt += " " + extra_items[name][1]

        if "scale" in self.params.intensity:
            # Write dataset_statistics - first make a miller array
            crystal_symmetry = cctbxcrystal.symmetry(
                space_group=experiments[0].crystal.get_space_group(),
                unit_cell=experiments[0].crystal.get_unit_cell(),
            )
            miller_set = miller.set(
                crystal_symmetry=crystal_symmetry,
                indices=reflections["miller_index"],
                anomalous_flag=False,
            )
            i_obs = miller.array(miller_set,
                                 data=reflections["intensity.scale.value"])
            i_obs.set_observation_type_xray_intensity()
            i_obs.set_sigmas(reflections["intensity.scale.sigma"])
            i_obs.set_info(
                miller.array_info(source="DIALS",
                                  source_type="reflection_tables"))

            result = dataset_statistics(
                i_obs=i_obs,
                crystal_symmetry=crystal_symmetry,
                use_internal_variance=False,
                eliminate_sys_absent=False,
                assert_is_not_unique_set_under_symmetry=False,
            )
            merged_block = iotbx.cif.model.block()
            merged_block["_reflns.pdbx_ordinal"] = 1
            merged_block["_reflns.pdbx_diffrn_id"] = 1
            merged_block["_reflns.entry_id"] = "DIALS"
            merged_data = result.as_cif_block()
            merged_block.update(merged_data)
            cif_block.update(merged_block)

        # Write the crystal information
        # if v5, that's all so return
        if self.params.mmcif.pdb_version == "v5":
            return cif_block
        # continue if v5_next
        cif_loop = iotbx.cif.model.loop(header=(
            "_pdbx_diffrn_unmerged_cell.ordinal",
            "_pdbx_diffrn_unmerged_cell.crystal_id",
            "_pdbx_diffrn_unmerged_cell.wavelength",
            "_pdbx_diffrn_unmerged_cell.cell_length_a",
            "_pdbx_diffrn_unmerged_cell.cell_length_b",
            "_pdbx_diffrn_unmerged_cell.cell_length_c",
            "_pdbx_diffrn_unmerged_cell.cell_angle_alpha",
            "_pdbx_diffrn_unmerged_cell.cell_angle_beta",
            "_pdbx_diffrn_unmerged_cell.cell_angle_gamma",
            "_pdbx_diffrn_unmerged_cell.Bravais_lattice",
        ))
        crystals = experiments.crystals()
        crystal_to_id = {crystal: i + 1 for i, crystal in enumerate(crystals)}
        for i, exp in enumerate(experiments):
            crystal = exp.crystal
            crystal_id = crystal_to_id[crystal]
            wavelength = exp.beam.get_wavelength()
            a, b, c, alpha, beta, gamma = crystal.get_unit_cell().parameters()
            latt_type = str(
                bravais_types.bravais_lattice(group=crystal.get_space_group()))
            cif_loop.add_row((i + 1, crystal_id, wavelength, a, b, c, alpha,
                              beta, gamma, latt_type))
            cif_block.add_loop(cif_loop)

        # Write the scan information
        cif_loop = iotbx.cif.model.loop(header=(
            "_pdbx_diffrn_scan.scan_id",
            "_pdbx_diffrn_scan.crystal_id",
            "_pdbx_diffrn_scan.image_id_begin",
            "_pdbx_diffrn_scan.image_id_end",
            "_pdbx_diffrn_scan.scan_angle_begin",
            "_pdbx_diffrn_scan.scan_angle_end",
        ))

        expid_to_scan_id = {
            exp.identifier: i + 1
            for i, exp in enumerate(experiments)
        }

        for i, exp in enumerate(experiments):
            scan = exp.scan
            crystal_id = crystal_to_id[exp.crystal]
            image_range = scan.get_image_range()
            osc_range = scan.get_oscillation_range(deg=True)
            cif_loop.add_row((
                i + 1,
                crystal_id,
                image_range[0],
                image_range[1],
                osc_range[0],
                osc_range[1],
            ))
            cif_block.add_loop(cif_loop)

        _, _, _, _, z0, z1 = reflections["bbox"].parts()
        h, k, l = [
            hkl.iround()
            for hkl in reflections["miller_index"].as_vec3_double().parts()
        ]
        # make scan id consistent with header as defined above
        scan_id = flex.int(reflections.size(), 0)
        for id_ in reflections.experiment_identifiers().keys():
            expid = reflections.experiment_identifiers()[id_]
            sel = reflections["id"] == id_
            scan_id.set_selected(sel, expid_to_scan_id[expid])

        loop_values = [
            flex.size_t_range(1,
                              len(reflections) + 1),
            scan_id,
            z0,
            z1,
            h,
            k,
            l,
        ] + [reflections[name] for name in variables_present]
        cif_loop = iotbx.cif.model.loop(data=dict(zip(header, loop_values)))
        cif_block.add_loop(cif_loop)

        return cif_block
Exemplo n.º 41
0
 def get_solve_map (self) :
   map_coeffs = self._convert_amplitudes_and_phases(f_label="FP",
     phi_label="PHIB", fom_label="FOM", weighted=True)
   from cctbx.miller import array_info
   info = array_info(labels=["FWT","PHWT"])
   return map_coeffs.set_info(info)
Exemplo n.º 42
0
 def extract_flags(self, data_description="R-free flags"):
     r_free_flags, test_flag_value = None, None
     params = self.parameters.r_free_flags
     # Extract
     if (not self.parameters.r_free_flags.generate):
         try:
             r_free_flags, test_flag_value = \
               self.reflection_file_server.get_r_free_flags(
                 file_name                = params.file_name,
                 label                    = params.label,
                 test_flag_value          = params.test_flag_value,
                 disable_suitability_test = params.disable_suitability_test,
                 parameter_scope          = self.flags_parameter_scope)
         except reflection_file_utils.Sorry_No_array_of_the_required_type as e:
             if (self.parameters.r_free_flags.generate is not None):
                 explain_how_to_generate_array_of_r_free_flags(
                     log=self.log,
                     flags_parameter_scope=self.flags_parameter_scope)
                 if (self.keep_going): return None
                 raise Sorry("Please try again.")
             r_free_flags, test_flag_value = None, None
         else:
             params.file_name = r_free_flags.info().source
             params.label = r_free_flags.info().label_string()
             params.test_flag_value = test_flag_value
             if ([self.working_point_group,
                  self.symmetry_safety_check].count(None) == 0):
                 miller_array_symmetry_safety_check(
                     miller_array=r_free_flags,
                     data_description=data_description,
                     working_point_group=self.working_point_group,
                     symmetry_safety_check=self.symmetry_safety_check,
                     log=self.log)
             info = r_free_flags.info()
             try:
                 processed = r_free_flags.regularize()
             except RuntimeError as e:
                 raise Sorry("Bad free-r flags:\n %s" % str(e))
             if (self.force_non_anomalous):
                 processed = processed.average_bijvoet_mates()
             r_free_flags = processed.set_info(info)
     # Generate or stop
     if (r_free_flags is None):
         if ((params.fraction is None)
                 or (params.lattice_symmetry_max_delta is None)
                 or (params.use_lattice_symmetry is None)):
             raise Sorry(
                 "No R-free flags are available, but one or more " +
                 "parameters required to generate new flags is undefined.")
         print("Generating a new array of R-free flags.", file=self.log)
         print(file=self.log)
         libtbx.call_back(
             message="warn",
             data=
             "PHENIX will generate a new array of R-free flags.  Please " +
             "check to make sure that the input data do not already contain "
             +
             "an R-free set; if one is present, you should cancel this job and "
             +
             "disable generation of new flags.  If the program you are running "
             +
             "outputs an MTZ file, you should be sure to use that file in all "
             + "future refinements.")
         r_free_flags = self.f_obs.generate_r_free_flags(
             fraction=params.fraction,
             max_free=params.max_free,
             lattice_symmetry_max_delta=params.lattice_symmetry_max_delta,
             use_lattice_symmetry=params.use_lattice_symmetry,
             use_dataman_shells=params.use_dataman_shells,
             n_shells=params.n_shells).set_info(
                 miller.array_info(labels=["R-free-flags"]))
         params.label = r_free_flags.info().label_string()
         params.test_flag_value = 1
     # check if anomalous pairs are sound
     if (r_free_flags is not None):
         r_free_flags.deep_copy().as_non_anomalous_array()
     # make sure flags match anomalous flag of data
     if (self.raw_data.anomalous_flag()
             and not r_free_flags.anomalous_flag()):
         info = r_free_flags.info()
         observation_type = r_free_flags.observation_type()
         r_free_flags = r_free_flags.generate_bijvoet_mates()
         r_free_flags.set_observation_type(observation_type)
         r_free_flags.set_info(info)
     return r_free_flags