Exemplo n.º 1
0
class cctbx_data_structures_from_cif(object):
    def __init__(self,
                 file_object=None,
                 file_path=None,
                 cif_model=None,
                 data_structure_builder=None,
                 data_block_name=None,
                 base_array_info=None,
                 **kwds):
        assert file_object is None or cif_model is None
        if data_structure_builder is None:
            data_structure_builders = (builders.miller_array_builder,
                                       builders.crystal_structure_builder)
        else:
            assert data_structure_builder in (
                builders.miller_array_builder,
                builders.crystal_structure_builder)
            data_structure_builders = (data_structure_builder, )

        self.xray_structures = OrderedDict()
        self.miller_arrays = OrderedDict()
        if cif_model is None:
            cif_model = reader(file_path=file_path,
                               file_object=file_object).model()
        if not len(cif_model):
            raise Sorry("No data block found in CIF")
        if data_block_name is not None and not data_block_name in cif_model:
            if (file_path is None):
                msg = 'Unknown CIF data block name: "%s"' % data_block_name
            else:
                msg = 'Unknown CIF data block name "%s" in file: "%s"' % (
                    data_block_name, file_path)
            raise RuntimeError(msg)
        errors = []
        wavelengths = {}
        for key, block in cif_model.items():
            if data_block_name is not None and key != data_block_name: continue
            for builder in data_structure_builders:
                if builder == builders.crystal_structure_builder:
                    if '_atom_site_fract_x' in block or '_atom_site_Cartn_x' in block:
                        self.xray_structures.setdefault(
                            key,
                            builder(block).structure)
                elif builder == builders.miller_array_builder:
                    block_wavelengths = builders.get_wavelengths(block)
                    if (block_wavelengths is not None):
                        wavelengths = block_wavelengths
                    if base_array_info is not None:
                        base_array_info = base_array_info.customized_copy(
                            labels=[key])
                    if ('_refln_index_h' in block or '_refln.index_h' in block
                            or '_diffrn_refln' in block):
                        self.miller_arrays.setdefault(
                            key,
                            builder(block,
                                    base_array_info=base_array_info,
                                    wavelengths=wavelengths).arrays())
Exemplo n.º 2
0
def bravais_lattice_to_space_groups(chiral_only=True):
  from cctbx import sgtbx
  from cctbx.sgtbx import bravais_types
  from libtbx.containers import OrderedDict
  bravais_lattice_to_sg = OrderedDict()
  for sgn in range(230):
    sg = sgtbx.space_group_info(number=sgn+1).group()
    if (not chiral_only) or (sg.is_chiral()):
      bravais_lattice = bravais_types.bravais_lattice(group=sg)
      bravais_lattice_to_sg.setdefault(str(bravais_lattice), [])
      bravais_lattice_to_sg[str(bravais_lattice)].append(sg)
  return bravais_lattice_to_sg
Exemplo n.º 3
0
def bravais_lattice_to_space_groups(chiral_only=True):
  from cctbx import sgtbx
  from cctbx.sgtbx import bravais_types
  from libtbx.containers import OrderedDict
  bravais_lattice_to_sg = OrderedDict()
  for sgn in range(230):
    sg = sgtbx.space_group_info(number=sgn+1).group()
    if (not chiral_only) or (sg.is_chiral()):
      bravais_lattice = bravais_types.bravais_lattice(group=sg)
      bravais_lattice_to_sg.setdefault(str(bravais_lattice), [])
      bravais_lattice_to_sg[str(bravais_lattice)].append(sg)
  return bravais_lattice_to_sg
Exemplo n.º 4
0
class cctbx_data_structures_from_cif(object):
  def __init__(self,
               file_object=None,
               file_path=None,
               cif_model=None,
               data_structure_builder=None,
               data_block_name=None,
               base_array_info=None,
               **kwds):
    assert file_object is None or cif_model is None
    if data_structure_builder is None:
      data_structure_builders = (
        builders.miller_array_builder, builders.crystal_structure_builder)
    else:
      assert data_structure_builder in (
        builders.miller_array_builder, builders.crystal_structure_builder)
      data_structure_builders = (data_structure_builder,)

    self.xray_structures = OrderedDict()
    self.miller_arrays = OrderedDict()
    if cif_model is None:
      cif_model = reader(file_path=file_path, file_object=file_object).model()
    if not len(cif_model):
      raise Sorry("No data block found in CIF")
    if data_block_name is not None and not data_block_name in cif_model:
      if (file_path is None):
        msg = 'Unknown CIF data block name: "%s"' % data_block_name
      else:
        msg = 'Unknown CIF data block name "%s" in file: "%s"' % (
          data_block_name, file_path)
      raise RuntimeError(msg)
    errors = []
    wavelengths = {}
    for key, block in cif_model.items():
      if data_block_name is not None and key != data_block_name: continue
      for builder in data_structure_builders:
        if builder == builders.crystal_structure_builder:
          if '_atom_site_fract_x' in block or '_atom_site_Cartn_x' in block:
            self.xray_structures.setdefault(key, builder(block).structure)
        elif builder == builders.miller_array_builder:
          block_wavelengths = builders.get_wavelengths(block)
          if (block_wavelengths is not None) :
            wavelengths = block_wavelengths
          if base_array_info is not None:
            base_array_info = base_array_info.customized_copy(labels=[key])
          if ( '_refln_index_h' in block or '_refln.index_h' in block or
               '_diffrn_refln' in block
               ):
            self.miller_arrays.setdefault(
              key, builder(block, base_array_info=base_array_info,
                wavelengths=wavelengths).arrays())
Exemplo n.º 5
0
class miller_array_builder(crystal_symmetry_builder):

  observation_types = {
    '_refln_F_squared': xray.intensity(),
    '_refln_intensity': xray.intensity(),
    '_refln_F': xray.amplitude(),
    '_refln_A': None,
  }

  def __init__(self, cif_block, base_array_info=None, wavelengths=None):
    crystal_symmetry_builder.__init__(self, cif_block)
    if base_array_info is not None:
      self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
        other_symmetry=base_array_info.crystal_symmetry_from_file,
      force=True)
    self._arrays = OrderedDict()
    if (wavelengths is None) :
      wavelengths = {}
    if base_array_info is None:
      base_array_info = miller.array_info(source_type="cif")
    refln_containing_loops = self.get_miller_indices_containing_loops()
    for self.indices, refln_loop in refln_containing_loops:
      self.wavelength_id_array = None
      self.crystal_id_array = None
      self.scale_group_array = None
      wavelength_ids = [None]
      crystal_ids = [None]
      scale_groups = [None]
      for key, value in refln_loop.iteritems():
        # need to get these arrays first
        if (key.endswith('wavelength_id') or
            key.endswith('crystal_id') or
            key.endswith('scale_group_code')):
          data = as_int_or_none_if_all_question_marks(value, column_name=key)
          if data is None:
            continue
          counts = data.counts()
          if key.endswith('wavelength_id'):
            wavelength_ids = counts.keys()
          if len(counts) == 1: continue
          array = miller.array(
            miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data)
          if key.endswith('wavelength_id'):
            self.wavelength_id_array = array
            wavelength_ids = counts.keys()
          elif key.endswith('crystal_id'):
            self.crystal_id_array = array
            crystal_ids = counts.keys()
          elif key.endswith('scale_group_code'):
            self.scale_group_array = array
            scale_groups = counts.keys()
      for label, value in sorted(refln_loop.items()):
        for w_id in wavelength_ids:
          for crys_id in crystal_ids:
            for scale_group in scale_groups:
              if 'index_' in label: continue
              key = label
              labels = [label]
              wavelength = None
              if (key.endswith('wavelength_id') or
                    key.endswith('crystal_id') or
                    key.endswith('scale_group_code')):
                w_id = None
                crys_id = None
                scale_group = None
              key_suffix = ''
              if w_id is not None:
                key_suffix += '_%i' %w_id
                labels.insert(0, "wavelength_id=%i" %w_id)
                wavelength = wavelengths.get(w_id, None)
              if crys_id is not None:
                key_suffix += '_%i' %crys_id
                labels.insert(0, "crystal_id=%i" %crys_id)
              if scale_group is not None:
                key_suffix += '_%i' %scale_group
                labels.insert(0, "scale_group_code=%i" %scale_group)
              key += key_suffix
              sigmas = None
              if key in self._arrays: continue
              array = self.flex_std_string_as_miller_array(
                value, wavelength_id=w_id, crystal_id=crys_id,
                scale_group_code=scale_group)
              if array is None: continue
              if '_sigma' in key:
                sigmas_label = label
                key = None
                for suffix in ('', '_meas', '_calc'):
                  if sigmas_label.replace('_sigma', suffix) in refln_loop:
                    key = sigmas_label.replace('_sigma', suffix) + key_suffix
                    break
                if key is None:
                  key = sigmas_label + key_suffix
                elif key in self._arrays and self._arrays[key].sigmas() is None:
                  sigmas = array
                  array = self._arrays[key]
                  check_array_sizes(array, sigmas, key, sigmas_label)
                  sigmas = as_flex_double(sigmas, sigmas_label)
                  array.set_sigmas(sigmas.data())
                  info = array.info()
                  array.set_info(
                    info.customized_copy(labels=info.labels+[sigmas_label],
                      wavelength=wavelength))
                  continue
              elif 'PHWT' in key:
                phwt_label = label
                fwt_label = label.replace('PHWT', 'FWT')
                if fwt_label not in refln_loop: continue
                phwt_array = array
                if fwt_label in self._arrays:
                  array = self._arrays[fwt_label]
                  check_array_sizes(array, phwt_array, fwt_label, phwt_label)
                  phases = as_flex_double(phwt_array, phwt_label)
                  info = array.info()
                  array = array.phase_transfer(phases, deg=True)
                  array.set_info(
                    info.customized_copy(labels=info.labels+[phwt_label]))
                  self._arrays[fwt_label] = array
                  continue
              elif 'HL_' in key:
                hl_letter = key[key.find('HL_')+3]
                hl_key = 'HL_' + hl_letter
                key = key.replace(hl_key, 'HL_A')
                if key in self._arrays:
                  continue # this array is already dealt with
                hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_values = [cif_block.get(hl_key) for hl_key in hl_labels]
                if hl_values.count(None) == 0:
                  selection = self.get_selection(
                    hl_values[0], wavelength_id=w_id,
                    crystal_id=crys_id, scale_group_code=scale_group)
                  hl_values = [as_double_or_none_if_all_question_marks(
                    hl.select(selection), column_name=lab)
                               for hl, lab in zip(hl_values, hl_labels)]
                  array = miller.array(miller.set(
                    self.crystal_symmetry, self.indices.select(selection)
                    ).auto_anomalous(), flex.hendrickson_lattman(*hl_values))
                  labels = labels[:-1]+hl_labels
              elif '.B_' in key or '_B_' in key:
                if '.B_' in key:
                  key, key_b = key.replace('.B_', '.A_'), key
                  label, label_b = label.replace('.B_', '.A_'), label
                elif '_B_' in key:
                  key, key_b = key.replace('_B', '_A'), key
                  label, label_b = label.replace('_B', '_A'), label
                if key in refln_loop and key_b in refln_loop:
                  b_part = array.data()
                  if key in self._arrays:
                    info = self._arrays[key].info()
                    a_part = self._arrays[key].data()
                    self._arrays[key] = self._arrays[key].array(
                      data=flex.complex_double(a_part, b_part))
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[key_b]))
                    continue
              elif ('phase_' in key and not "_meas" in key and
                    self.crystal_symmetry.space_group() is not None):
                alt_key1 = label.replace('phase_', 'F_')
                alt_key2 = alt_key1 + '_au'
                if alt_key1 in refln_loop:
                  phase_key = label
                  key = alt_key1+key_suffix
                elif alt_key2 in refln_loop:
                  phase_key = label
                  key = alt_key2+key_suffix
                else: phase_key = None
                if phase_key is not None:
                  phases = array.data()
                  if key in self._arrays:
                    array = self._arrays[key]
                    array = as_flex_double(array, key)
                    check_array_sizes(array, phases, key, phase_key)
                    info = self._arrays[key].info()
                    self._arrays[key] = array.phase_transfer(phases, deg=True)
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[phase_key]))
                  else:
                    array = self.flex_std_string_as_miller_array(
                      refln_loop[label], wavelength_id=w_id, crystal_id=crys_id,
                      scale_group_code=scale_group)
                    check_array_sizes(array, phases, key, phase_key)
                    array.phase_transfer(phases, deg=True)
                    labels = labels+[label, phase_key]
              if base_array_info.labels is not None:
                labels = base_array_info.labels + labels
              def rstrip_substrings(string, substrings):
                for substr in substrings:
                  if substr == '': continue
                  if string.endswith(substr):
                    string = string[:-len(substr)]
                return string
              # determine observation type
              stripped_key = rstrip_substrings(
                key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus'])
              if (stripped_key.endswith('F_squared') or
                  stripped_key.endswith('intensity') or
                  stripped_key.endswith('.I') or
                  stripped_key.endswith('_I')) and (
                    array.is_real_array() or array.is_integer_array()):
                array.set_observation_type_xray_intensity()
              elif (stripped_key.endswith('F') and (
                array.is_real_array() or array.is_integer_array())):
                array.set_observation_type_xray_amplitude()
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                # e.g. merge_equivalents treats integer arrays differently, so must
                # convert integer observation arrays here to be safe
                if isinstance(array.data(), flex.int):
                  array = array.customized_copy(data=array.data().as_double())
              array.set_info(base_array_info.customized_copy(labels=labels))
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                info = array.info()
                array.set_info(info.customized_copy(wavelength=wavelength))
              self._arrays.setdefault(key, array)
    for key, array in self._arrays.copy().iteritems():
      if (   key.endswith('_minus') or '_minus_' in key
          or key.endswith('_plus') or '_plus_' in key):
        if '_minus' in key:
          minus_key = key
          plus_key = key.replace('_minus', '_plus')
        elif '_plus' in key:
          plus_key = key
          minus_key = key.replace('_plus', '_minus')
        if plus_key in self._arrays and minus_key in self._arrays:
          plus_array = self._arrays.pop(plus_key)
          minus_array = self._arrays.pop(minus_key)
          minus_array = minus_array.customized_copy(
            indices=-minus_array.indices()).set_info(minus_array.info())
          array = plus_array.concatenate(
            minus_array, assert_is_similar_symmetry=False)
          array = array.customized_copy(anomalous_flag=True)
          array.set_info(minus_array.info().customized_copy(
            labels=list(
              OrderedSet(plus_array.info().labels+minus_array.info().labels))))
          array.set_observation_type(plus_array.observation_type())
          self._arrays.setdefault(key, array)

    if len(self._arrays) == 0:
      raise CifBuilderError("No reflection data present in cif block")

  def get_miller_indices_containing_loops(self):
    loops = []
    for loop in self.cif_block.loops.values():
      for key in loop.keys():
        if 'index_h' not in key: continue
        hkl_str = [loop.get(key.replace('index_h', 'index_%s' %i)) for i in 'hkl']
        if hkl_str.count(None) > 0:
          raise CifBuilderError(
            "Miller indices missing from current CIF block (%s)"
            %key.replace('index_h', 'index_%s' %'hkl'[hkl_str.index(None)]))
        hkl_int = []
        for i,h_str in enumerate(hkl_str):
          try:
            h_int = flex.int(h_str)
          except ValueError, e:
            raise CifBuilderError(
              "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e)))
          hkl_int.append(h_int)
        indices = flex.miller_index(*hkl_int)
        loops.append((indices, loop))
        break
    return loops
Exemplo n.º 6
0
class miller_array_builder(crystal_symmetry_builder):

    observation_types = {
        '_refln_F_squared': xray.intensity(),
        '_refln_intensity': xray.intensity(),
        '_refln_F': xray.amplitude(),
        '_refln_A': None,
    }

    def __init__(self, cif_block, base_array_info=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
        self._arrays = OrderedDict()
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in refln_loop.iteritems():
                # need to get these arrays first
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None: continue
                    counts = data.counts()
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = counts.keys()
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = counts.keys()
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = counts.keys()
            for label, value in sorted(refln_loop.items()):
                for w_id in wavelength_ids:
                    for crys_id in crystal_ids:
                        for scale_group in scale_groups:
                            if 'index_' in label: continue
                            key = label
                            labels = [label]
                            if (key.endswith('wavelength_id')
                                    or key.endswith('crystal_id')
                                    or key.endswith('scale_group_code')):
                                w_id = None
                                crys_id = None
                                scale_group = None
                            key_suffix = ''
                            if w_id is not None:
                                key_suffix += '_%i' % w_id
                                labels.insert(0, "wavelength_id=%i" % w_id)
                            if crys_id is not None:
                                key_suffix += '_%i' % crys_id
                                labels.insert(0, "crystal_id=%i" % crys_id)
                            if scale_group is not None:
                                key_suffix += '_%i' % scale_group
                                labels.insert(
                                    0, "scale_group_code=%i" % scale_group)
                            key += key_suffix
                            sigmas = None
                            if key in self._arrays: continue
                            array = self.flex_std_string_as_miller_array(
                                value,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            if array is None: continue
                            if '_sigma' in key:
                                sigmas_label = label
                                key = None
                                for suffix in ('', '_meas', '_calc'):
                                    if sigmas_label.replace(
                                            '_sigma', suffix) in refln_loop:
                                        key = sigmas_label.replace(
                                            '_sigma', suffix) + key_suffix
                                        break
                                if key is None:
                                    key = sigmas_label + key_suffix
                                elif key in self._arrays and self._arrays[
                                        key].sigmas() is None:
                                    sigmas = array
                                    array = self._arrays[key]
                                    check_array_sizes(array, sigmas, key,
                                                      sigmas_label)
                                    sigmas = as_flex_double(
                                        sigmas, sigmas_label)
                                    array.set_sigmas(sigmas.data())
                                    info = array.info()
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels +
                                            [sigmas_label]))
                                    continue
                            elif 'PHWT' in key:
                                phwt_label = label
                                fwt_label = label.replace('PHWT', 'FWT')
                                if fwt_label not in refln_loop: continue
                                phwt_array = array
                                if fwt_label in self._arrays:
                                    array = self._arrays[fwt_label]
                                    check_array_sizes(array, phwt_array,
                                                      fwt_label, phwt_label)
                                    phases = as_flex_double(
                                        phwt_array, phwt_label)
                                    info = array.info()
                                    array = array.phase_transfer(phases,
                                                                 deg=True)
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels + [phwt_label]))
                                    self._arrays[fwt_label] = array
                                    continue
                            elif 'HL_' in key:
                                hl_letter = key[key.find('HL_') + 3]
                                hl_key = 'HL_' + hl_letter
                                key = key.replace(hl_key, 'HL_A')
                                if key in self._arrays:
                                    continue  # this array is already dealt with
                                hl_labels = [
                                    label.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_keys = [
                                    key.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_values = [
                                    cif_block.get(hl_key)
                                    for hl_key in hl_labels
                                ]
                                if hl_values.count(None) == 0:
                                    selection = self.get_selection(
                                        hl_values[0],
                                        wavelength_id=w_id,
                                        crystal_id=crys_id,
                                        scale_group_code=scale_group)
                                    hl_values = [
                                        as_double_or_none_if_all_question_marks(
                                            hl.select(selection),
                                            column_name=lab)
                                        for hl, lab in zip(
                                            hl_values, hl_labels)
                                    ]
                                    array = miller.array(
                                        miller.set(
                                            self.crystal_symmetry,
                                            self.indices.select(
                                                selection)).auto_anomalous(),
                                        flex.hendrickson_lattman(*hl_values))
                                    labels = labels[:-1] + hl_labels
                            elif '.B_' in key or '_B_' in key:
                                if '.B_' in key:
                                    key, key_b = key.replace('.B_', '.A_'), key
                                    label, label_b = label.replace(
                                        '.B_', '.A_'), label
                                elif '_B_' in key:
                                    key, key_b = key.replace('_B', '_A'), key
                                    label, label_b = label.replace('_B',
                                                                   '_A'), label
                                if key in refln_loop and key_b in refln_loop:
                                    b_part = array.data()
                                    if key in self._arrays:
                                        info = self._arrays[key].info()
                                        a_part = self._arrays[key].data()
                                        self._arrays[key] = self._arrays[
                                            key].array(
                                                data=flex.complex_double(
                                                    a_part, b_part))
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels + [key_b]))
                                        continue
                            elif ('phase_' in key and not key.endswith('_meas')
                                  and self.crystal_symmetry.space_group()
                                  is not None):
                                alt_key1 = label.replace('phase_', 'F_')
                                alt_key2 = alt_key1 + '_au'
                                if alt_key1 in refln_loop:
                                    phase_key = label
                                    key = alt_key1 + key_suffix
                                elif alt_key2 in refln_loop:
                                    phase_key = label
                                    key = alt_key2 + key_suffix
                                else:
                                    phase_key = None
                                if phase_key is not None:
                                    phases = array.data()
                                    if key in self._arrays:
                                        array = self._arrays[key]
                                        array = as_flex_double(array, key)
                                        check_array_sizes(
                                            array, phases, key, phase_key)
                                        info = self._arrays[key].info()
                                        self._arrays[
                                            key] = array.phase_transfer(
                                                phases, deg=True)
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels +
                                                [phase_key]))
                                    else:
                                        array = self.flex_std_string_as_miller_array(
                                            refln_loop[label],
                                            wavelength_id=w_id,
                                            crystal_id=crys_id,
                                            scale_group_code=scale_group)
                                        check_array_sizes(
                                            array, phases, key, phase_key)
                                        array.phase_transfer(phases, deg=True)
                                        labels = labels + [label, phase_key]
                            if base_array_info.labels is not None:
                                labels = base_array_info.labels + labels

                            def rstrip_substrings(string, substrings):
                                for substr in substrings:
                                    if substr == '': continue
                                    if string.endswith(substr):
                                        string = string[:-len(substr)]
                                return string

                            # determine observation type
                            stripped_key = rstrip_substrings(
                                key, [
                                    key_suffix, '_au', '_meas', '_calc',
                                    '_plus', '_minus'
                                ])
                            if (stripped_key.endswith('F_squared')
                                    or stripped_key.endswith('intensity')
                                    or stripped_key.endswith('.I')
                                    or stripped_key.endswith('_I')) and (
                                        array.is_real_array()
                                        or array.is_integer_array()):
                                array.set_observation_type_xray_intensity()
                            elif (stripped_key.endswith('F')
                                  and (array.is_real_array()
                                       or array.is_integer_array())):
                                array.set_observation_type_xray_amplitude()
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                # e.g. merge_equivalents treats integer arrays differently, so must
                                # convert integer observation arrays here to be safe
                                if isinstance(array.data(), flex.int):
                                    array = array.customized_copy(
                                        data=array.data().as_double())
                            array.set_info(
                                base_array_info.customized_copy(labels=labels))
                            self._arrays.setdefault(key, array)
        for key, array in self._arrays.copy().iteritems():
            if (key.endswith('_minus') or '_minus_' in key
                    or key.endswith('_plus') or '_plus_' in key):
                if '_minus' in key:
                    minus_key = key
                    plus_key = key.replace('_minus', '_plus')
                elif '_plus' in key:
                    plus_key = key
                    minus_key = key.replace('_plus', '_minus')
                if plus_key in self._arrays and minus_key in self._arrays:
                    plus_array = self._arrays.pop(plus_key)
                    minus_array = self._arrays.pop(minus_key)
                    minus_array = minus_array.customized_copy(
                        indices=-minus_array.indices()).set_info(
                            minus_array.info())
                    array = plus_array.concatenate(
                        minus_array, assert_is_similar_symmetry=False)
                    array = array.customized_copy(anomalous_flag=True)
                    array.set_info(
                        minus_array.info().customized_copy(labels=list(
                            OrderedSet(plus_array.info().labels +
                                       minus_array.info().labels))))
                    array.set_observation_type(plus_array.observation_type())
                    self._arrays.setdefault(key, array)

        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")

    def get_miller_indices_containing_loops(self):
        loops = []
        for loop in self.cif_block.loops.values():
            for key in loop.keys():
                if 'index_h' not in key: continue
                hkl_str = [
                    loop.get(key.replace('index_h', 'index_%s' % i))
                    for i in 'hkl'
                ]
                if hkl_str.count(None) > 0:
                    raise CifBuilderError(
                        "Miller indices missing from current CIF block (%s)" %
                        key.replace('index_h',
                                    'index_%s' % 'hkl'[hkl_str.index(None)]))
                hkl_int = []
                for i, h_str in enumerate(hkl_str):
                    try:
                        h_int = flex.int(h_str)
                    except ValueError, e:
                        raise CifBuilderError(
                            "Invalid item for Miller index %s: %s" %
                            ("HKL"[i], str(e)))
                    hkl_int.append(h_int)
                indices = flex.miller_index(*hkl_int)
                loops.append((indices, loop))
                break
        return loops
Exemplo n.º 7
0
class align_crystal(object):

  vector_names = {
    a.elems: 'a',
    b.elems: 'b',
    c.elems: 'c',
  }

  def __init__(self, experiment, vectors, frame='reciprocal', mode='main'):
    from libtbx.utils import Sorry
    self.experiment = experiment
    self.vectors = vectors
    self.frame = frame
    self.mode = mode

    gonio = experiment.goniometer
    scan = experiment.scan

    self.s0 = matrix.col(self.experiment.beam.get_s0())
    self.rotation_axis = matrix.col(gonio.get_rotation_axis())

    from dxtbx.model import MultiAxisGoniometer
    if not isinstance(gonio, MultiAxisGoniometer):
      raise Sorry('Only MultiAxisGoniometer models supported')
    axes = gonio.get_axes()
    if len(axes) != 3:
      raise Sorry('Only 3-axis goniometers supported')
    e1, e2, e3 = (matrix.col(e) for e in reversed(axes))

    fixed_rotation = matrix.sqr(gonio.get_fixed_rotation())
    setting_rotation = matrix.sqr(gonio.get_setting_rotation())
    rotation_axis = matrix.col(gonio.get_rotation_axis_datum())
    rotation_matrix = rotation_axis.axis_and_angle_as_r3_rotation_matrix(
      experiment.scan.get_oscillation()[0], deg=True)

    from dials.algorithms.refinement import rotation_decomposition

    results = OrderedDict()

    # from https://github.com/legrandp/xdsme/blob/master/XOalign/XOalign.py#L427
    #  referential_permutations sign permutations for four permutations of
    #        parallel/antiparallel (rotation axis & beam)
    #    y1 // e1, y2 // beamVector;  y1 anti// e1, y2 // beamVector
    #    y1 // e1, y2 anti// beamVector;  y1 anti// e1, y2 anti// beamVector

    ex = matrix.col((1, 0, 0))
    ey = matrix.col((0, 1, 0))
    ez = matrix.col((0, 0, 1))

    referential_permutations = ([ ex,  ey,  ez],
                                [-ex, -ey,  ez],
                                [ ex, -ey, -ez],
                                [-ex,  ey, -ez])

    for (v1_, v2_) in self.vectors:
      results[(v1_, v2_)] = OrderedDict()
      space_group = self.experiment.crystal.get_space_group()
      for smx in list(space_group.smx())[:]:
        results[(v1_, v2_)][smx] = []
        crystal = copy.deepcopy(self.experiment.crystal)
        cb_op = sgtbx.change_of_basis_op(smx)
        crystal = crystal.change_basis(cb_op)

        # Goniometer datum setting [D] at which the orientation was determined
        D = (setting_rotation * rotation_matrix * fixed_rotation).inverse()

        # The setting matrix [U] will vary with the datum setting according to
        # [U] = [D] [U0]
        U = matrix.sqr(crystal.get_U())

        # XXX In DIALS recorded U is equivalent to U0 - D is applied to U inside
        # prediction
        U0 = U

        B = matrix.sqr(crystal.get_B())

        if self.frame == 'direct':
          B = B.inverse().transpose()

        v1_0 = U0 * B * v1_
        v2_0 = U0 * B * v2_

        #c  (b) The laboratory frame vectors l1 & l2 are normally specified with the
        #c MODE command: MODE MAIN (the default) sets l1 (along which v1 will be
        #c placed) along the principle goniostat axis e1 (Omega), and l2 along
        #c the beam s0. This allows rotation for instance around a principle axis.
        #c The other mode is MODE CUSP, which puts l1 (v1) perpendicular to the
        #c beam (s0) and the e1 (Omega) axis, and l2 (v2) in the plane containing
        #c l1 & e1 (ie l1 = e1 x s0, l2 = e1).

        if self.mode == 'cusp':
          l1 = self.rotation_axis.cross(self.s0)
          l2 = self.rotation_axis
        else:
          l1 = self.rotation_axis.normalize()
          l3 = l1.cross(self.s0).normalize()
          l2 = l1.cross(l3)

        for perm in referential_permutations:
          S = matrix.sqr(perm[0].elems + perm[1].elems + perm[2].elems)
          from rstbx.cftbx.coordinate_frame_helpers import align_reference_frame
          R = align_reference_frame(v1_0, S * l1, v2_0, S * l2)

          solutions = rotation_decomposition.solve_r3_rotation_for_angles_given_axes(
            R, e1, e2, e3, return_both_solutions=True, deg=True)

          if solutions is None:
            continue

          results[(v1_, v2_)][smx].extend(solutions)

    self.all_solutions = results

    self.unique_solutions = OrderedDict()
    for (v1, v2), result in results.iteritems():
      for solutions in result.itervalues():
        for solution in solutions:
          k = tuple(round(a, 3) for a in solution[1:])
          self.unique_solutions.setdefault(k, OrderedSet())
          self.unique_solutions[k].add((v1, v2))

  def _vector_as_str(self, v):
    v = v.elems
    if v in self.vector_names:
      vstr = self.vector_names[v]
      if self.frame == 'reciprocal':
        vstr += '*'
    else:
      vstr = str(v)
    return vstr

  def show(self):
    from libtbx import table_utils
    self.info()

    rows = []
    names = self.experiment.goniometer.get_names()

    space_group = self.experiment.crystal.get_space_group()
    reciprocal = self.frame == 'reciprocal'
    for angles, vector_pairs in self.unique_solutions.iteritems():
      v1, v2 = list(vector_pairs)[0]
      rows.append((
        describe(v1, space_group, reciprocal=reciprocal),
        describe(v2, space_group, reciprocal=reciprocal),
        '% 7.3f' %angles[0], '% 7.3f' %angles[1],
      ))
    rows = [('Primary axis', 'Secondary axis', names[1], names[0])] + \
           sorted(rows)
    print 'Independent solutions:'
    print table_utils.format(rows=rows, has_header=True)

  def as_json(self, filename=None):
    names = self.experiment.goniometer.get_names()
    solutions = []
    space_group = self.experiment.crystal.get_space_group()
    reciprocal = self.frame == 'reciprocal'
    for angles, solns in self.unique_solutions.iteritems():
      solutions.append({
        'primary_axis': [self._vector_as_str(v1) for v1, v2 in solns],
        'secondary_axis': [self._vector_as_str(v2) for v1, v2 in solns],
        'primary_axis_type': [axis_type(v1, space_group) for v1, v2 in solns],
        'secondary_axis_type': [axis_type(v2, space_group) for v1, v2 in solns],
        names[1]: angles[0],
        names[0]: angles[1]
      })
    d = {'solutions': solutions,
         'goniometer': self.experiment.goniometer.to_dict()}
    import json
    if filename is not None:
      return json.dump(d, open(filename, 'wb'), indent=2)
    else:
      return json.dumps(d, indent=2)

  def info(self):
    from libtbx import table_utils

    U = matrix.sqr(self.experiment.crystal.get_U())
    B = matrix.sqr(self.experiment.crystal.get_B())

    a_star_ = U * B * a_star
    b_star_ = U * B * b_star
    c_star_ = U * B * c_star

    Binvt = B.inverse().transpose()

    a_ = U * Binvt * a
    b_ = U * Binvt * b
    c_ = U * Binvt * c

    names = self.experiment.goniometer.get_names()
    axes = self.experiment.goniometer.get_axes()
    rows = [['Experimental axis', 'a*', 'b*', 'c*']]
    rows.append([names[0]] + [
      '%.3f' %smallest_angle(axis.angle(matrix.col(axes[0]), deg=True))
      for axis in (a_star_, b_star_, c_star_)])
    rows.append(['Beam'] + [
      '%.3f' %smallest_angle(axis.angle(self.s0, deg=True))
      for axis in (a_star_, b_star_, c_star_)])
    rows.append([names[2]] + [
      '%.3f' %smallest_angle(axis.angle(matrix.col(axes[2]), deg=True))
      for axis in (a_star_, b_star_, c_star_)])
    print 'Angles between reciprocal cell axes and principal experimental axes:'
    print table_utils.format(rows=rows, has_header=True)
    print

    rows = [['Experimental axis', 'a', 'b', 'c']]
    rows.append([names[0]] + [
      '%.3f' %smallest_angle(axis.angle(matrix.col(axes[0]), deg=True))
      for axis in (a_, b_, c_)])
    rows.append(['Beam'] + [
      '%.3f' %smallest_angle(axis.angle(self.s0, deg=True))
      for axis in (a_, b_, c_)])
    rows.append([names[2]] + [
      '%.3f' %smallest_angle(axis.angle(matrix.col(axes[2]), deg=True))
      for axis in (a_, b_, c_)])
    print 'Angles between unit cell axes and principal experimental axes:'
    print table_utils.format(rows=rows, has_header=True)
    print
Exemplo n.º 8
0
  def __init__(self, pdb_hierarchy,
               sequences,
               alignment_params=None,
               crystal_symmetry=None,
               coordinate_precision=5,
               occupancy_precision=3,
               b_iso_precision=5,
               u_aniso_precision=5):

    pdb_hierarchy_as_cif_block.__init__(
      self, pdb_hierarchy, crystal_symmetry=crystal_symmetry,
    coordinate_precision=coordinate_precision,
    occupancy_precision=occupancy_precision,
    b_iso_precision=b_iso_precision,
    u_aniso_precision=u_aniso_precision)

    import mmtbx.validation.sequence
    validation = mmtbx.validation.sequence.validation(
      pdb_hierarchy=pdb_hierarchy,
      sequences=sequences,
      params=alignment_params,
      extract_residue_groups=True,
      log=null_out(), # silence output
    )

    entity_loop = iotbx.cif.model.loop(header=(
      '_entity.id',
      '_entity.type',
      #'_entity.src_method',
      #'_entity.pdbx_description',
      '_entity.formula_weight',
      '_entity.pdbx_number_of_molecules',
      #'_entity.details',
      #'_entity.pdbx_mutation',
      #'_entity.pdbx_fragment',
      #'_entity.pdbx_ec'
    ))

    entity_poly_loop = iotbx.cif.model.loop(header=(
      '_entity_poly.entity_id',
      '_entity_poly.type',
      '_entity_poly.nstd_chirality',
      '_entity_poly.nstd_linkage',
      '_entity_poly.nstd_monomer',
      '_entity_poly.pdbx_seq_one_letter_code',
      '_entity_poly.pdbx_seq_one_letter_code_can',
      '_entity_poly.pdbx_strand_id',
      '_entity_poly.type_details'
    ))

    entity_poly_seq_loop = iotbx.cif.model.loop(header=(
      '_entity_poly_seq.entity_id',
      '_entity_poly_seq.num',
      '_entity_poly_seq.mon_id',
      '_entity_poly_seq.hetero',
    ))

    sequence_counts = OrderedDict()
    sequence_to_chain_ids = {}
    entity_id = 0
    sequence_to_entity_id = {}
    chain_id_to_entity_id = {}
    sequence_to_chains = {}
    residue_group_to_seq_num_mapping = {}
    aligned_pdb_chains = OrderedSet()
    non_polymer_counts = dict_with_default_0()
    non_polymer_resname_to_entity_id = OrderedDict()

    for chain in validation.chains:
      sequence = chain.alignment.b
      if sequence not in sequence_to_entity_id:
        entity_id += 1
        sequence_to_entity_id[sequence] = entity_id
      sequence_counts.setdefault(sequence, 0)
      sequence_counts[sequence] += 1
      sequence_to_chain_ids.setdefault(sequence, [])
      sequence_to_chain_ids[sequence].append(chain.chain_id)
      sequence_to_chains.setdefault(sequence, [])
      sequence_to_chains[sequence].append(chain)
      chain_id_to_entity_id[chain.chain_id] = sequence_to_entity_id[sequence]
      aligned_pdb_chains.add(chain.residue_groups[0].parent())
      unaligned_pdb_chains = OrderedSet(pdb_hierarchy.chains()) - aligned_pdb_chains

      assert len(chain.residue_groups) + chain.n_missing_start + chain.n_missing_end == len(sequence)
      residue_groups = [None] * chain.n_missing_start + chain.residue_groups + [None] * chain.n_missing_end
      i = chain.n_missing_start
      seq_num = 0
      for i, residue_group in enumerate(residue_groups):
        if residue_group is None and chain.alignment.b[i] == '-':
          # a deletion
          continue
        seq_num += 1
        if residue_group is not None:
          residue_group_to_seq_num_mapping[
            residue_group] = seq_num

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        for resname in residue_group.unique_resnames():
          if resname not in non_polymer_resname_to_entity_id:
            entity_id += 1
            non_polymer_resname_to_entity_id[resname] = entity_id
          non_polymer_counts[resname] += 1

    for sequence, count in sequence_counts.iteritems():
      entity_poly_seq_num = 0
      entity_id = sequence_to_entity_id[sequence]

      entity_loop.add_row((
        entity_id,
        'polymer', #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        len(sequence_to_chains[sequence]), # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

      # The definition of the cif item _entity_poly.pdbx_seq_one_letter_code
      # says that modifications and non-standard amino acids should be encoded
      # as 'X', however in practice the PDB seem to encode them as the three-letter
      # code in parentheses.
      pdbx_seq_one_letter_code = []
      pdbx_seq_one_letter_code_can = []

      chains = sequence_to_chains[sequence]

      from iotbx.pdb import amino_acid_codes

      chain = chains[0]
      matches = chain.alignment.matches()

      for i, one_letter_code in enumerate(sequence):

        #Data items in the ENTITY_POLY_SEQ category specify the sequence
        #of monomers in a polymer. Allowance is made for the possibility
        #of microheterogeneity in a sample by allowing a given sequence
        #number to be correlated with more than one monomer ID. The
        #corresponding ATOM_SITE entries should reflect this
        #heterogeneity.

        monomer_id = None
        if i >= chain.n_missing_start and i < (len(sequence) - chain.n_missing_end):
          monomer_id = chain.resnames[i-chain.n_missing_start]

        if monomer_id is None and one_letter_code == '-': continue

        pdbx_seq_one_letter_code_can.append(one_letter_code)

        if monomer_id is None:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            monomer_id = amino_acid_codes.three_letter_given_one_letter.get(
              one_letter_code, "UNK") # XXX
          else:
            monomer_id = one_letter_code
        else:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            one_letter_code = amino_acid_codes.one_letter_given_three_letter.get(
              monomer_id, "(%s)" %monomer_id)

        pdbx_seq_one_letter_code.append(one_letter_code)

        entity_poly_seq_num += 1

        entity_poly_seq_loop.add_row((
          entity_id,
          entity_poly_seq_num,
          monomer_id,
          'no', #XXX
        ))

      entity_poly_type = '?'
      entity_nstd_chirality = 'n'
      # we should probably determine the chirality more correctly by examining
      # the chirality of the backbone chain rather than relying on the residue
      # names to be correct
      if chain.chain_type == mmtbx.validation.sequence.PROTEIN:
        n_d_peptides = 0
        n_l_peptides = 0
        n_achiral_peptides = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname == "GLY":
            n_achiral_peptides += 1
          elif resname in iotbx.pdb.common_residue_names_amino_acid:
            n_l_peptides += 1
          elif resname in amino_acid_codes.three_letter_l_given_three_letter_d:
            n_d_peptides += 1
          else:
            n_unknown += 1
        n_total = sum([n_d_peptides, n_l_peptides, n_achiral_peptides, n_unknown])
        if (n_l_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(L)'
          if n_d_peptides > 0:
            entity_nstd_chirality = 'y'
        elif (n_d_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(D)'
          if n_l_peptides > 0:
            entity_nstd_chirality = 'y'
      elif chain.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID:
        n_dna = 0
        n_rna = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname is not None and resname.strip().upper() in (
            'AD', 'CD', 'GD', 'TD', 'DA', 'DC', 'DG', 'DT'):
            n_dna += 1
          elif resname is not None and resname.strip().upper() in (
            'A', 'C', 'G', 'T', '+A', '+C', '+G', '+T'):
            n_rna += 1
          else:
            n_unknown += 1
        n_total = sum([n_dna + n_rna + n_unknown])
        if n_dna/n_total > 0.5 and n_rna == 0:
          entity_poly_type = 'polydeoxyribonucleotide'
        elif n_rna/n_total > 0.5 and n_dna == 0:
          entity_poly_type = 'polyribonucleotide'
        elif (n_rna + n_dna)/n_total > 0.5:
          entity_poly_type = 'polydeoxyribonucleotide/polyribonucleotide hybrid'

      entity_poly_loop.add_row((
        entity_id,
        entity_poly_type,
        entity_nstd_chirality,
        'no',
        'no',
        wrap_always("".join(pdbx_seq_one_letter_code), width=80).strip(),
        wrap_always("".join(pdbx_seq_one_letter_code_can), width=80).strip(),
        ','.join(sequence_to_chain_ids[sequence]),
        '?'
      ))

    for resname, entity_id in non_polymer_resname_to_entity_id.iteritems():
      entity_type = "non-polymer"
      if resname == "HOH":
        entity_type = "water" # XXX
      entity_loop.add_row((
        entity_id,
        entity_type, #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        non_polymer_counts[resname], # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

    self.cif_block.add_loop(entity_loop)
    self.cif_block.add_loop(entity_poly_loop)
    self.cif_block.add_loop(entity_poly_seq_loop)
    self.cif_block.update(pdb_hierarchy.as_cif_block())

    label_entity_id = self.cif_block['_atom_site.label_entity_id']
    auth_seq_id = self.cif_block['_atom_site.auth_seq_id']
    ins_code = self.cif_block['_atom_site.pdbx_PDB_ins_code']
    auth_asym_id = self.cif_block['_atom_site.auth_asym_id']
    label_seq_id = flex.std_string(auth_seq_id.size(), '.')
    ins_code = ins_code.deep_copy()
    ins_code.set_selected(ins_code == '?', '')
    for residue_group, seq_num in residue_group_to_seq_num_mapping.iteritems():
      sel = ((auth_asym_id == residue_group.parent().id) &
             (ins_code == residue_group.icode.strip()) &
             (auth_seq_id == residue_group.resseq.strip()))
      label_seq_id.set_selected(sel, str(seq_num))
      label_entity_id.set_selected(
        sel, str(chain_id_to_entity_id[residue_group.parent().id]))

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        sel = ((auth_asym_id == residue_group.parent().id) &
               (ins_code == residue_group.icode.strip()) &
               (auth_seq_id == residue_group.resseq.strip()))
        label_entity_id.set_selected(
          sel, str(non_polymer_resname_to_entity_id[residue_group.unique_resnames()[0]]))

    self.cif_block['_atom_site.label_seq_id'] = label_seq_id

    # reorder the loops
    atom_site_loop = self.cif_block['_atom_site']
    atom_site_aniso_loop = self.cif_block.get('_atom_site_anisotrop')
    del self.cif_block['_atom_site']
    self.cif_block.add_loop(atom_site_loop)
    if atom_site_aniso_loop is not None:
      del self.cif_block['_atom_site_anisotrop']
      self.cif_block.add_loop(atom_site_aniso_loop)
Exemplo n.º 9
0
class miller_array_builder(crystal_symmetry_builder):
    # Changes to this class should pass regression tests:
    # cctbx_project\mmtbx\regression\tst_cif_as_mtz_wavelengths.py
    # cctbx_project\iotbx\cif\tests\tst_lex_parse_build.py
    # phenix_regression\cif_as_mtz\tst_cif_as_mtz.py

    observation_types = {
        # known types of column data to be tagged as either amplitudes or intensities as per
        # https://www.iucr.org/__data/iucr/cifdic_html/2/cif_mm.dic/index.html
        '_refln.F_squared': xray.intensity(),
        '_refln_F_squared': xray.intensity(),
        '_refln.intensity': xray.intensity(),
        '_refln.I(+)': xray.intensity(),
        '_refln.I(-)': xray.intensity(),
        '_refln.F_calc': xray.amplitude(),
        '_refln.F_meas': xray.amplitude(),
        '_refln.FP': xray.amplitude(),
        '_refln.F-obs': xray.amplitude(),
        '_refln.Fobs': xray.amplitude(),
        '_refln.F-calc': xray.amplitude(),
        '_refln.Fcalc': xray.amplitude(),
        '_refln.pdbx_F_': xray.amplitude(),
        '_refln.pdbx_I_': xray.intensity(),
        '_refln.pdbx_anom_difference': xray.amplitude(),
    }

    def guess_observationtype(self, labl):
        for okey in self.observation_types.keys():
            if labl.startswith(okey):
                return self.observation_types[okey]
        return None

    def __init__(self, cif_block, base_array_info=None, wavelengths=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        self._arrays = OrderedDict()
        self._origarrays = OrderedDict(
        )  # used for presenting raw data tables in HKLviewer
        basearraylabels = []
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
            if base_array_info.labels:
                basearraylabels = base_array_info.labels
        if (wavelengths is None):
            wavelengths = {}
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in six.iteritems(refln_loop):
                # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other
                # columns in self.get_selection() used by self.flex_std_string_as_miller_array()
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None:
                        continue
                    counts = data.counts()
                    if key.endswith('wavelength_id'):
                        wavelength_ids = list(counts.keys())
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = list(counts.keys())
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = list(counts.keys())
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = list(counts.keys())
            labelsuffix = []
            wavelbl = []
            cryslbl = []
            scalegrplbl = []
            self._origarrays["HKLs"] = self.indices
            alllabels = list(sorted(refln_loop.keys()))
            remaininglabls = alllabels[:]  # deep copy the list
            # Parse labels matching cif column conventions
            # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html
            # and extract groups of labels or just single columns.
            # Groups corresponds to the map coefficients, phase and amplitudes,
            # amplitudes or intensities with sigmas and hendrickson-lattman columns.
            phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(
                remaininglabls)
            mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(
                remaininglabls)
            HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
            data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(
                remaininglabls)
            for w_id in wavelength_ids:
                for crys_id in crystal_ids:
                    for scale_group in scale_groups:
                        # If reflection data files contain more than one crystal, wavelength or scalegroup
                        # then add their id(s) as a suffix to data labels computed below. Needed for avoiding
                        # ambuguity but avoid when not needed to make labels more human readable!
                        if (len(wavelength_ids) > 1
                                or len(wavelengths) > 1) and w_id is not None:
                            wavelbl = ["wavelength_id=%i" % w_id]
                        if len(crystal_ids) > 1 and crys_id is not None:
                            cryslbl = ["crystal_id=%i" % crys_id]
                        if len(scale_groups) > 1 and scale_group is not None:
                            scalegrplbl = ["scale_group_code=%i" % scale_group]
                        labelsuffix = scalegrplbl + cryslbl + wavelbl
                        jlablsufx = ""
                        if len(labelsuffix):
                            jlablsufx = "," + ",".join(labelsuffix)
                        for mapcoefflabl in mapcoefflabls:
                            A_array = refln_loop[mapcoefflabl[0]]
                            B_array = refln_loop[mapcoefflabl[1]]
                            # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks
                            selection = self.get_selection(
                                A_array,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            A_array = A_array.select(selection)
                            B_array = B_array.select(selection)
                            # form the miller array with map coefficients
                            data = flex.complex_double(flex.double(A_array),
                                                       flex.double(B_array))
                            millarr = miller.array(
                                miller.set(self.crystal_symmetry,
                                           self.indices.select(
                                               selection)).auto_anomalous(),
                                data)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            labl = basearraylabels + mapcoefflabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[mapcoefflabl[0] + jlablsufx] = millarr
                        for phaseamplabl in phaseamplabls:
                            amplitudestrarray = refln_loop[phaseamplabl[0]]
                            phasestrarray = refln_loop[phaseamplabl[1]]
                            millarr = self.flex_std_string_as_miller_array(
                                amplitudestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            phasesmillarr = self.flex_std_string_as_miller_array(
                                phasestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None or phasesmillarr is None:
                                continue
                            phases = as_flex_double(phasesmillarr,
                                                    phaseamplabl[1])
                            millarr = millarr.phase_transfer(phases, deg=True)
                            labl = basearraylabels + phaseamplabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[phaseamplabl[0] + jlablsufx] = millarr
                        for datlabl, siglabl, otype in data_sig_obstype_labls:
                            datastrarray = refln_loop[datlabl]
                            millarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            millarr = as_flex_double(millarr, datlabl)
                            datsiglabl = [datlabl]
                            if siglabl:
                                sigmasstrarray = refln_loop[siglabl]
                                sigmas = self.flex_std_string_as_miller_array(
                                    sigmasstrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                sigmas = as_flex_double(sigmas, siglabl)
                                millarr.set_sigmas(sigmas.data())
                                datsiglabl = [datlabl, siglabl]
                            datsiglabl = basearraylabels + datsiglabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=datsiglabl,
                                    wavelength=wavelengths.get(w_id, None)))
                            if otype is not None:
                                millarr.set_observation_type(otype)
                            self._arrays[datlabl + jlablsufx] = millarr
                        for hl_labels in HLcoefflabls:
                            hl_values = [
                                cif_block.get(hl_key) for hl_key in hl_labels
                            ]
                            if hl_values.count(None) == 0:
                                selection = self.get_selection(
                                    hl_values[0],
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                hl_values = [
                                    as_double_or_none_if_all_question_marks(
                                        hl.select(selection), column_name=lab)
                                    for hl, lab in zip(hl_values, hl_labels)
                                ]
                                # hl_values will be None for column data not matching w_id,crys_id,scale_group values
                                if hl_values == [None, None, None, None]:
                                    continue
                                millarr = miller.array(
                                    miller.set(
                                        self.crystal_symmetry,
                                        self.indices.select(
                                            selection)).auto_anomalous(),
                                    flex.hendrickson_lattman(*hl_values))
                                hlabels = basearraylabels + hl_labels + labelsuffix
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=hlabels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[hl_labels[0] +
                                             jlablsufx] = millarr
                        # pick up remaining columns if any that weren't identified above
                        for label in alllabels:
                            if "index_" in label:
                                continue
                            datastrarray = refln_loop[label]
                            if label in remaininglabls:
                                labels = basearraylabels + [label
                                                            ] + labelsuffix
                                lablsufx = jlablsufx
                                millarr = self.flex_std_string_as_miller_array(
                                    datastrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                # millarr will be None for column data not matching w_id,crys_id,scale_group values
                                if (label.endswith(
                                        'wavelength_id'
                                ) or label.endswith(
                                        'crystal_id'
                                ) or  # get full array if any of these labels, not just subsets
                                        label.endswith('scale_group_code')):
                                    millarr = self.flex_std_string_as_miller_array(
                                        datastrarray,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None)
                                    lablsufx = ""
                                    labels = basearraylabels + [label]
                                if millarr is None: continue
                                otype = self.guess_observationtype(label)
                                if otype is not None:
                                    millarr.set_observation_type(otype)
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=labels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[label + lablsufx] = millarr
                            origarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            newlabel = label.replace("_refln.", "")
                            newlabel2 = newlabel.replace("_refln_", "")
                            if origarr:  # want only genuine miller arrays
                                self._origarrays[newlabel2 +
                                                 jlablsufx] = origarr.data()
        # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays
        # i.e. both friedel mates in the same array
        for key, array in six.iteritems(self._arrays.copy()):
            plus_key = ""
            if '_minus' in key:
                minus_key = key
                plus_key = key.replace('_minus', '_plus')
            elif '-' in key:
                minus_key = key
                plus_key = key.replace('-', '+')
            elif '_plus' in key:
                plus_key = key
                minus_key = key.replace('_plus', '_minus')
            elif '+' in key:
                plus_key = key
                minus_key = key.replace('+', '-')
            if plus_key in self._arrays and minus_key in self._arrays:
                plus_array = self._arrays.pop(plus_key)
                minus_array = self._arrays.pop(minus_key)
                minus_array = minus_array.customized_copy(
                    indices=-minus_array.indices()).set_info(
                        minus_array.info())
                array = plus_array.concatenate(
                    minus_array, assert_is_similar_symmetry=False)
                array = array.customized_copy(anomalous_flag=True)
                array.set_info(minus_array.info().customized_copy(labels=list(
                    OrderedSet(plus_array.info().labels +
                               minus_array.info().labels))))
                array.set_observation_type(plus_array.observation_type())
                self._arrays.setdefault(key, array)
        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")
        # Sort the ordered dictionary to resemble the order of columns in the cif file
        # This is to avoid any F_meas arrays accidentally being put adjacent to
        # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these
        # arrays may unintentionally be combined into a reconstructed anomalous amplitude
        # array when saving as an mtz file due to a problem in the iotbx/mtz module.
        # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html
        arrlstord = []
        arrlst = list(self._arrays)
        for arr in arrlst:
            for i, k in enumerate(refln_loop.keys()):
                if arr.split(",")[0] == k:
                    arrlstord.append((arr, i))
        # arrlstord must have the same keys as in the self._arrays dictionary
        assert sorted(arrlst) == sorted([e[0] for e in arrlstord])
        sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1])
        self._ordarrays = OrderedDict()
        for sortkey, i in sortarrlst:
            self._ordarrays.setdefault(sortkey, self._arrays[sortkey])
        self._arrays = self._ordarrays

    def get_HL_labels(self, keys):
        lstkeys = list(keys)  # cast into list if not a list
        HLquads = []
        alllabels = " ".join(lstkeys)
        """ Hendrickson-Lattmann labels could look like: 'HLAM', 'HLBM', 'HLCM', 'HLDM'
    or like 'HLanomA', 'HLanomB', 'HLanomC', 'HLanomD'
    Use a regular expression to group them accordingly
    """
        allmatches = re.findall(r"(\S*(HL(\S*)[abcdABCD](\S*)))", alllabels)
        HLtagslst = list(set([(e[2], e[3]) for e in allmatches]))
        usedkeys = []
        for m in HLtagslst:
            hllist = []
            for hm in allmatches:
                if m == (hm[2], hm[3]):
                    hllist.append((hm[0], hm[1]))
            if len(hllist) == 4:
                HLquads.append([e[0] for e in hllist])
                for e in hllist:
                    usedkeys.append(e[0])
        remainingkeys = []
        for e in lstkeys:
            if e not in usedkeys:
                remainingkeys.append(e)
        return HLquads, remainingkeys

    def get_mapcoefficient_labels(self, keys):
        # extract map coeffficients labels from list of cif column labels
        # e.g. ( _refln.A_calc_au _refln.B_calc_au ) , ( _refln.A_calc _refln.B_calc )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        mapcoefflabels = []
        A_matches = re.findall(
            r"( (\s*_refln[\._]A_)(\S*) )", alllabels, re.VERBOSE
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in A_matches:
                Blabel = m[1].replace("A_", "B_") + m[2]
                if Blabel == label:
                    mapcoefflabels.append([m[0], label])
                    remainingkeys.remove(m[0])
                    remainingkeys.remove(label)
        return mapcoefflabels, remainingkeys

    def get_phase_amplitude_labels(self, keys):
        # extract phase and amplitudes labels from list of cif column labels
        # e.g. ( _refln.F_calc _refln.phase_calc ) , ( _refln.FC_ALL _refln.PHIC_ALL ), ( _refln.FWT _refln.PHWT )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        phase_amplitudelabels = []
        PHmatches = re.findall(
            r"((\S*PH)([^I]\S*))", alllabels
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in PHmatches:
                PFlabel = m[1].replace("PH", "F") + m[2]
                Flabel = m[1].replace("PH", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHImatches = re.findall(
            r"((\S*PHI)(\S*))", alllabels
        )  # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')]
        for label in lstkeys:
            for m in PHImatches:
                PFlabel = m[1].replace("PHI", "F") + m[2]
                Flabel = m[1].replace("PHI", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHDELmatches = re.findall(
            r"(((\S*)PH)([^I]\S*(WT)))", alllabels
        )  # [('_refln.PHDELWT', '_refln.PH', '_refln.', 'DELWT', 'WT')]
        for label in lstkeys:
            for m in PHDELmatches:
                Flabel = m[2] + m[3].replace("WT", "FWT")
                if Flabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        phase_matches = re.findall(
            r"((\S*[\._])phase(\S*))",
            alllabels)  # [('_refln.phase_calc', '_refln.', '')]
        for label in lstkeys:
            for m in phase_matches:
                phaselabel = m[0]
                Flabl = m[1] + m[2]
                Flabel = m[1] + "F" + m[2]
                Faulabel = m[1] + "F" + m[2] + "_au"
                if Flabl in label or Flabel in label or Faulabel in label:  # in case of _refln.F_calc_au and _refln.phase_calc
                    if label in remainingkeys and m[
                            0] in remainingkeys:  # in case
                        if (Flabel + "_sigma_au") in remainingkeys or (
                                Flabel + "_sigma") in remainingkeys:
                            continue  # give priority to F_meas, F_meas_sigma or  F_meas_au, F_meas_sigma_au
                        phase_amplitudelabels.append([label, m[0]])
                        remainingkeys.remove(label)
                        remainingkeys.remove(m[0])
        return phase_amplitudelabels, remainingkeys

    def get_FSigF_ISigI_labels(self, keys):
        # extract amplitudea, sigmas or intensitiy, sigmas labels from list of cif column labels
        # e.g. ( _refln.F_meas_sigma_au _refln.F_meas), ( _refln.intensity_sigma _refln.intensity ) ,
        # ( _refln.pdbx_I_plus_sigma _refln.pdbx_I_plus )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        labelpairs = []
        sigma_matches = re.findall(
            r"((\S*[\._])SIG(\S*))",
            alllabels)  # catch label pairs like F(+),SIGF(+)
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        sigma_matches = re.findall(
            r"((\S*)_sigma(_*\S*))", alllabels
        )  # [('_refln.F_meas_sigma_au', '_refln.F_meas', '_au'), ('_refln.intensity_sigma', '_refln.intensity', ''), ('_refln.pdbx_I_plus_sigma', '_refln.pdbx_I_plus', '')]
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        # catch generic meas and sigma labels
        anymeas_matches = re.findall(r"((\S*)_meas(\S*))",
                                     alllabels) + re.findall(
                                         r"((\S*)_calc(\S*))", alllabels)
        anysigma_matches = re.findall(r"((\S*)_sigma(\S*))", alllabels)
        for mmatch in anymeas_matches:
            for smatch in anysigma_matches:
                if mmatch[1] == smatch[1] and mmatch[2] == smatch[2]:
                    remainingkeys.remove(mmatch[0])
                    if smatch[
                            0] in remainingkeys:  # in case of say F_squared_calc, F_squared_meas, F_squared_sigma all being present
                        remainingkeys.remove(smatch[0])
                        labelpairs.append([
                            mmatch[0], smatch[0],
                            self.guess_observationtype(mmatch[0])
                        ])
                    else:
                        labelpairs.append([
                            mmatch[0], None,
                            self.guess_observationtype(mmatch[0])
                        ])
        return labelpairs, remainingkeys

    def get_miller_indices_containing_loops(self):
        loops = []
        for loop in self.cif_block.loops.values():
            for key in loop.keys():
                if 'index_h' not in key: continue
                hkl_str = [
                    loop.get(key.replace('index_h', 'index_%s' % i))
                    for i in 'hkl'
                ]
                if hkl_str.count(None) > 0:
                    raise CifBuilderError(
                        "Miller indices missing from current CIF block (%s)" %
                        key.replace('index_h',
                                    'index_%s' % 'hkl'[hkl_str.index(None)]))
                hkl_int = []
                for i, h_str in enumerate(hkl_str):
                    try:
                        h_int = flex.int(h_str)
                    except ValueError as e:
                        raise CifBuilderError(
                            "Invalid item for Miller index %s: %s" %
                            ("HKL"[i], str(e)))
                    hkl_int.append(h_int)
                indices = flex.miller_index(*hkl_int)
                loops.append((indices, loop))
                break
        return loops

    def get_selection(self,
                      value,
                      wavelength_id=None,
                      crystal_id=None,
                      scale_group_code=None):
        selection = ~((value == '.') | (value == '?'))
        if self.wavelength_id_array is not None and wavelength_id is not None:
            selection &= (self.wavelength_id_array.data() == wavelength_id)
        if self.crystal_id_array is not None and crystal_id is not None:
            selection &= (self.crystal_id_array.data() == crystal_id)
        if self.scale_group_array is not None and scale_group_code is not None:
            selection &= (self.scale_group_array.data() == scale_group_code)
        return selection

    def flex_std_string_as_miller_array(self,
                                        value,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None):
        # Create a miller_array object of only the data and indices matching the
        # wavelength_id, crystal_id and scale_group_code submitted or full array if these are None
        selection = self.get_selection(value,
                                       wavelength_id=wavelength_id,
                                       crystal_id=crystal_id,
                                       scale_group_code=scale_group_code)
        data = value.select(selection)
        #if not isinstance(data, flex.double):
        try:
            data = flex.int(data)
            indices = self.indices.select(selection)
        except ValueError:
            try:
                data = flex.double(data)
                indices = self.indices.select(selection)
            except ValueError:
                # if flex.std_string return all values including '.' and '?'
                data = value
                indices = self.indices
        if data.size() == 0: return None
        return miller.array(
            miller.set(self.crystal_symmetry, indices).auto_anomalous(), data)

    def arrays(self):
        return self._arrays

    def origarrays(self):
        """
    return dictionary of raw data found in cif file cast into flex.double arrays
    or just string arrays as a fall back.
    """
        return self._origarrays