示例#1
0
 def align_chain(self, i):
   import mmtbx.alignment
   chain = self.chains[i]
   best_alignment = None
   best_sequence = None
   best_seq_id = None
   best_identity = self.minimum_identity
   best_width = sys.maxsize
   best_length = sys.maxsize
   for i_seq, seq_object in enumerate(self.sequences):
     alignment = mmtbx.alignment.align(
       seq_a=chain.sequence,
       seq_b=seq_object.sequence).extract_alignment()
     identity = alignment.calculate_sequence_identity(skip_chars=['X'])
     # if the identities of two alignments are equal, then we prefer the
     # alignment that has the narrowest range for the match and the
     # shortest sequence
     width = alignment.match_codes.rfind('m') - alignment.match_codes.find('m')
     length = len(seq_object.sequence)
     if ((identity > best_identity) or
         (approx_equal_core(identity, best_identity, 1.e-6, 1.e10, None, "")
          and width <= best_width and length < best_length)):
       best_identity = identity
       best_alignment = alignment
       best_sequence = seq_object.name
       best_seq_id = i_seq
       best_width = width
       best_length = length
   return best_alignment, best_sequence, best_seq_id
示例#2
0
def find_similar_chains (pdb_hierarchy,
    sequence,
    source_info=None,
    first_chain_only=False,
    remove_alt_confs=True,
    reset_chain_id=None,
    min_identity=0.95,
    min_identity_epsilon=0.02,
    log=null_out()) :
  """
  Find all chains with at least the specified fractional sequence identity to
  the target, and extract as related_chain objects with new pdb hierarchies.
  """
  import mmtbx.alignment
  import iotbx.pdb.hierarchy
  results = []
  for chain in pdb_hierarchy.only_model().chains() :
    if (chain.is_protein()) :
      chain_seq = chain.as_padded_sequence(pad=True,
        substitute_unknown='X',
        pad_at_start=False)
      alignment = mmtbx.alignment.align(
        seq_a=chain_seq,
        seq_b=sequence).extract_alignment()
      identity = alignment.calculate_sequence_identity(skip_chars=['X'])
      if (identity >= min_identity - min_identity_epsilon) :
        root = iotbx.pdb.hierarchy.root()
        model = iotbx.pdb.hierarchy.model()
        root.append_model(model)
        chain_id = chain.id
        if (reset_chain_id is not None) :
          chain_id = reset_chain_id
        new_chain = iotbx.pdb.hierarchy.chain(id=chain_id)
        model.append_chain(new_chain)
        for residue_group in chain.residue_groups() :
          atom_groups = residue_group.atom_groups()
          new_rg = iotbx.pdb.hierarchy.residue_group(
            resseq=residue_group.resseq,
            icode=residue_group.icode)
          for k, atom_group in enumerate(atom_groups) :
            if ((remove_alt_confs) and
                ((k > 0) or (not atom_group.altloc.strip() in ['','A']))) :
              continue
            new_rg.append_atom_group(atom_group.detached_copy())
          new_chain.append_residue_group(new_rg)
        xrs = root.extract_xray_structure()
        xrs.convert_to_isotropic()
        root.atoms().set_adps_from_scatterers(xrs.scatterers(),
          xrs.unit_cell())
        results.append(
          related_chain(
            source_info=source_info,
            chain_id=chain.id,
            pdb_hierarchy=root,
            identity=identity))
        if (first_chain_only) : break
  return results
示例#3
0
def find_similar_chains(pdb_hierarchy,
    sequence,
    source_info=None,
    first_chain_only=False,
    remove_alt_confs=True,
    reset_chain_id=None,
    min_identity=0.95,
    min_identity_epsilon=0.02,
    log=null_out()):
  """
  Find all chains with at least the specified fractional sequence identity to
  the target, and extract as related_chain objects with new pdb hierarchies.
  """
  import mmtbx.alignment
  import iotbx.pdb.hierarchy
  results = []
  for chain in pdb_hierarchy.only_model().chains():
    if (chain.is_protein()):
      chain_seq = chain.as_padded_sequence(pad=True,
        substitute_unknown='X',
        pad_at_start=False)
      alignment = mmtbx.alignment.align(
        seq_a=chain_seq,
        seq_b=sequence).extract_alignment()
      identity = alignment.calculate_sequence_identity(skip_chars=['X'])
      if (identity >= min_identity - min_identity_epsilon):
        root = iotbx.pdb.hierarchy.root()
        model = iotbx.pdb.hierarchy.model()
        root.append_model(model)
        chain_id = chain.id
        if (reset_chain_id is not None):
          chain_id = reset_chain_id
        new_chain = iotbx.pdb.hierarchy.chain(id=chain_id)
        model.append_chain(new_chain)
        for residue_group in chain.residue_groups():
          atom_groups = residue_group.atom_groups()
          new_rg = iotbx.pdb.hierarchy.residue_group(
            resseq=residue_group.resseq,
            icode=residue_group.icode)
          for k, atom_group in enumerate(atom_groups):
            if ((remove_alt_confs) and
                ((k > 0) or (not atom_group.altloc.strip() in ['','A']))):
              continue
            new_rg.append_atom_group(atom_group.detached_copy())
          new_chain.append_residue_group(new_rg)
        xrs = root.extract_xray_structure()
        xrs.convert_to_isotropic()
        root.atoms().set_adps_from_scatterers(xrs.scatterers(),
          xrs.unit_cell())
        results.append(
          related_chain(
            source_info=source_info,
            chain_id=chain.id,
            pdb_hierarchy=root,
            identity=identity))
        if (first_chain_only) : break
  return results
示例#4
0
 def align_chain (self, i) :
   import mmtbx.alignment
   chain = self.chains[i]
   best_alignment = None
   best_sequence = None
   best_seq_id = None
   best_identity = self.minimum_identity
   best_width = sys.maxint
   for i_seq, seq_object in enumerate(self.sequences) :
     alignment = mmtbx.alignment.align(
       seq_a=chain.sequence,
       seq_b=seq_object.sequence).extract_alignment()
     identity = alignment.calculate_sequence_identity(skip_chars=['X'])
     # if the identities of two alignments are equal, then we prefer the
     # alignment that has the narrowest range for the match
     width = alignment.match_codes.rfind('m') - alignment.match_codes.find('m')
     if ((identity > best_identity) or
         (identity == best_identity and width < best_width)):
       best_identity = identity
       best_alignment = alignment
       best_sequence = seq_object.name
       best_seq_id = i_seq
       best_width = width
   return best_alignment, best_sequence, best_seq_id