Exemplo n.º 1
0
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager):
  """
  Works with poly_gly truncated hierarchy.
  Also used in fix_rama_outliers.
  """
  resname = current_reference_ag.resname.upper()
  c = one_three.get(resname, None)

  # seems to work with unusual residues...
  # if c is None:
  #   msg = "Only standard protein residues are currently supported.\n"
  #   msg += "The residue %s (chain %s, resid %s) chain is not standard." % (
  #       resname,
  #       current_reference_ag.parent().parent().id,
  #       current_reference_ag.parent().resid())
  #   raise Sorry(msg)
  ag_to_place.resname = three_one.get(c,resname)
  if c == 'G':
    return

  # align residue from ideal_res_dict to just placed ALA (ag_to_place)
  # or from pdb_hierarchy_template
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  reper_atoms = ["C","CA", "N"]
  for (ag, arr) in [(ag_to_place, fixed_sites),
                    (current_reference_ag, moving_sites)]:
    for a in ag.atoms():
      if a.name.strip() in reper_atoms:
        arr.append(a.xyz)
  assert len(fixed_sites) == 3
  if len(moving_sites) < 3:
    error_msg = "C, CA or N atoms are absent in secondary structure element." +\
        "\nPlease add them to the model and try again."
    raise Sorry(error_msg)
  assert len(moving_sites) == 3
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                            other_sites = moving_sites)
  ideal_correct_ag = current_reference_ag.detached_copy()
  ideal_correct_ag.atoms().set_xyz(
      lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\
      lsq_fit_obj.t.elems)
  ideal_correct_ag.atoms().set_xyz(
      rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag))
  if len(ideal_correct_ag.atoms()) > 4:
    ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\
                                                len(ideal_correct_ag.atoms())-4)
    for a in ideal_correct_ag.atoms():
      if a.name.strip() not in ["N","CA","C","O"]:
        at = a.detached_copy()
        at.uij_erase()
        ag_to_place.append_atom(atom=at)
  else:
    # This means something wrong with input model, e.g. only 3 atoms in
    # the residue and they happened to be N, CA, C
    pass
Exemplo n.º 2
0
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager):
  """
  Works with poly_gly truncated hierarchy.
  Also used in fix_rama_outliers.
  """
  resname = current_reference_ag.resname.upper()
  c = one_three.get(resname, None)
  if c is None:
    msg = "Only standard protein residues are currently supported.\n"
    msg += "The residue %s (chain %s, resid %s) chain is not standard." % (
        resname,
        current_reference_ag.parent().parent().id,
        current_reference_ag.parent().resid())
    raise Sorry(msg)
  ag_to_place.resname = three_one[c]
  if c == 'G':
    return

  # align residue from ideal_res_dict to just placed ALA (ag_to_place)
  # or from pdb_hierarchy_template
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  reper_atoms = ["C","CA", "N"]
  for (ag, arr) in [(ag_to_place, fixed_sites),
                    (current_reference_ag, moving_sites)]:
    for a in ag.atoms():
      if a.name.strip() in reper_atoms:
        arr.append(a.xyz)
  assert len(fixed_sites) == 3
  if len(moving_sites) < 3:
    error_msg = "C, CA or N atoms are absent in secondary structure element." +\
        "\nPlease add them to the model and try again."
    raise Sorry(error_msg)
  assert len(moving_sites) == 3
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                            other_sites = moving_sites)
  ideal_correct_ag = current_reference_ag.detached_copy()
  ideal_correct_ag.atoms().set_xyz(
      lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\
      lsq_fit_obj.t.elems)
  ideal_correct_ag.atoms().set_xyz(
      rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag))
  if len(ideal_correct_ag.atoms()) > 4:
    ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\
                                                len(ideal_correct_ag.atoms())-4)
    for a in ideal_correct_ag.atoms():
      if a.name.strip() not in ["N","CA","C","O"]:
        at = a.detached_copy()
        at.uij_erase()
        ag_to_place.append_atom(atom=at)
  else:
    # This means something wrong with input model, e.g. only 3 atoms in
    # the residue and they happened to be N, CA, C
    pass
Exemplo n.º 3
0
  def sequence_as_cif_block(self, custom_residues=None):
    """
    Export sequence information as mmCIF block
    Version 5.0 of mmCIF/PDBx dictionary

    Parameters
    ----------
    custom_residues: list of str
      List of custom 3-letter residues to keep in pdbx_one_letter_sequence
      The 3-letter residue must exist in the model. If None, the value
      from self.custom_residues is used.

    Returns
    -------
    cif_block: iotbx.cif.model.block
    """

    if custom_residues is None:
      custom_residues = self.custom_residues

    dna = set(['DA', 'DT', 'DC', 'DG', 'DI'])
    rna = set(['A', 'U', 'C', 'G'])
    rna_to_dna = {'A': 'DA', 'U': 'DT', 'T': 'DT', 'C': 'DC', 'G': 'DG',
                  'I': 'DI'}
    modified_dna = set()
    modified_rna = set()
    for key in modified_rna_dna_names.lookup.keys():
      value = modified_rna_dna_names.lookup[key]
      if value in dna:
        modified_dna.add(key)
      elif value in rna:
        modified_rna.add(key)

    # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity.html
    entity_loop = iotbx.cif.model.loop(header=(
      '_entity.id',
      '_entity.pdbx_description'
    ))

    # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly.html
    entity_poly_loop = iotbx.cif.model.loop(header=(
      '_entity_poly.entity_id',
      '_entity_poly.nstd_linkage',
      '_entity_poly.nstd_monomer',
      '_entity_poly.pdbx_seq_one_letter_code',
      '_entity_poly.pdbx_seq_one_letter_code_can',
      '_entity_poly.pdbx_strand_id',
      '_entity_poly.pdbx_target_identifier',
      '_entity_poly.type',
    ))

    # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
    entity_poly_seq_loop = iotbx.cif.model.loop(header=(
      '_entity_poly_seq.entity_id',
      '_entity_poly_seq.num',
      '_entity_poly_seq.mon_id',
      '_entity_poly_seq.hetero',
    ))

    # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/struct_ref.html
    struct_ref_loop = iotbx.cif.model.loop(header=(
      '_struct_ref.id',
      '_struct_ref.db_code',
      '_struct_ref.db_name',
      '_struct_ref.entity_id',
      '_struct_ref.pdbx_align_begin',
      '_struct_ref.pdbx_db_accession',
      '_struct_ref.pdbx_db_isoform',
      '_struct_ref.pdbx_seq_one_letter_code',
    ))

    # http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/struct_ref_seq.html
    struct_ref_seq_loop = iotbx.cif.model.loop(header=(
      '_struct_ref_seq.align_id',
      '_struct_ref_seq.db_align_beg',
      '_struct_ref_seq.db_align_end',
      '_struct_ref_seq.pdbx_PDB_id_code',
      '_struct_ref_seq.pdbx_auth_seq_align_beg',
      '_struct_ref_seq.pdbx_auth_seq_align_end',
      '_struct_ref_seq.pdbx_db_accession',
      '_struct_ref_seq.pdbx_db_align_beg_ins_code',
      '_struct_ref_seq.pdbx_db_align_end_ins_code',
      '_struct_ref_seq.pdbx_seq_align_beg_ins_code',
      '_struct_ref_seq.pdbx_seq_align_end_ins_code',
      '_struct_ref_seq.pdbx_strand_id',
      '_struct_ref_seq.ref_id',
      '_struct_ref_seq.seq_align_beg',
      '_struct_ref_seq.seq_align_end',
    ))

    entity_id = 0
    # entity_poly
    sequence_to_entity_id = dict()
    nstd_linkage = dict()
    nstd_monomer = dict()
    seq_one_letter_code = dict()
    seq_one_letter_code_can = dict()
    strand_id = dict()
    target_identifier = dict()
    sequence_type = dict()
    # entity_poly_seq
    num = dict()
    mon_id = dict()
    hetero = dict()
    # struct_ref (work in progress)
    chain_id = dict()
    db_code = '?'
    db_name = '?'
    align_begin = '?'
    db_accession = '?'
    db_isoform = '?'
    # struct_ref_seq (work in progress)
    db_align_beg = '?'
    db_align_end = '?'
    PDB_id_code = '?'
    align_beg_ins_code = '?'
    align_end_ins_code = '?'

    for i_chain, chain in enumerate(self.chains):
      seq_can = chain.alignment.b
      # entity_id
      if seq_can not in sequence_to_entity_id:
        entity_id += 1
        sequence_to_entity_id[seq_can] = entity_id
      else:
        # subsequent matches just add strand_id
        entity_id = sequence_to_entity_id[seq_can]
        strand_id[entity_id].append(chain.chain_id)
        continue

      # entity_poly items
      # nstd_linkage (work in progress)
      if entity_id not in nstd_linkage:
        nstd_linkage[entity_id] = 'no'
      # nstd_monomer
      if entity_id not in nstd_monomer:
        nstd_monomer[entity_id] = 'no'
      # pdbx_seq_one_letter_code
      if entity_id not in seq_one_letter_code:
        seq_one_letter_code[entity_id] = list()
      # type (work in progress)
      if entity_id not in sequence_type:
        sequence_type[entity_id] = '?'
      has_protein = False
      has_rna = False
      has_dna = False
      has_sugar = False
      is_d = False
      # chain.alignment.a is the model
      # chain.alignment.b is the sequence
      for i_a, i_b in zip(chain.alignment.i_seqs_a, chain.alignment.i_seqs_b):
        # sequence does not have residue in model
        if i_b is None:
          continue
        # model does not have residue in sequence
        if i_a is None or chain.resnames[i_a] is None:
          letter = seq_can[i_b]
        else:
          resname = chain.resnames[i_a].strip()
          # check for modified residues
          if (resname in modified_aa_names.lookup or
              resname in modified_rna_dna_names.lookup or
              resname in custom_residues):
            letter = '({resname})'.format(resname=resname)
            nstd_monomer[entity_id] = 'yes'
          elif resname in three_letter_l_given_three_letter_d:
            letter = '({resname})'.format(resname=resname)
            nstd_monomer[entity_id] = 'yes'
          # check for nucleic acid
          elif resname in dna:
            letter = '({resname})'.format(resname=resname)
          elif resname in rna:
            letter = resname
          # regular protein
          else:
            letter = one_letter_given_three_letter.get(resname)
            if letter is None:
              letter = 'X'

          # check for protein
          if (resname in one_letter_given_three_letter or
              resname in modified_aa_names.lookup):
            has_protein = True
          # check for DNA
          # hybrid protein/DNA/RNA chains are not allowed
          if resname in dna or resname in modified_dna:
            has_dna = True
            has_protein = False
          # check for RNA
          # does not handle hybrid DNA/RNA chains
          if resname in rna or resname in modified_rna:
            has_rna = True
            has_dna = False
            has_protein = False
          # check chirality
          # hybrid D/L handed chains are not allowed
          if resname in three_letter_l_given_three_letter_d:
            is_d = True
        # pdbx_seq_one_letter_code
        seq_one_letter_code[entity_id].append(letter)

      # pdbx_seq_one_letter_code_can
      seq_one_letter_code_can[entity_id] = seq_can.replace('-', '')
      # strand_id
      if entity_id not in strand_id:
        strand_id[entity_id] = list()
      strand_id[entity_id].append(chain.chain_id)
      # target_identifier (work in progress)
      if entity_id not in target_identifier:
        target_identifier[entity_id] = '?'
      # type
      #   polypeptide(L)
      #   polypeptide(D)
      #   polydeoxyribonucleotide,
      #   polyribonucleotide
      # missing
      #   cyclic-psuedo-peptide
      #   other
      #   peptide nucleic acid
      #   polydeoxyribonucleotide/polyribonucleotide
      #   polysaccharide(D)
      #   polysaccahride(L)
      if has_protein:
        choice = 'polypeptide'
        if is_d:
          choice += '(D)'
        else:
          choice += '(L)'
      if has_dna:
        choice = 'polydeoxyribonucleotide'
      if has_rna:
        choice = 'polyribonucleotide'
      sequence_type[entity_id] = choice

      # entity_poly_seq items
      if entity_id not in mon_id:
        mon_id[entity_id] = list()
      if entity_id not in num:
        num[entity_id] = list()
      if entity_id not in hetero:
        hetero[entity_id] = list()

      # struct_ref items
      if entity_id not in chain_id:
        chain_id[entity_id] = i_chain + 1

      for i_a, i_b in zip(chain.alignment.i_seqs_a, chain.alignment.i_seqs_b):
        # sequence does not have residue in model
        if i_b is None:
          continue
        seq_resname = None
        if has_protein:
          seq_resname = three_letter_given_one_letter.get(seq_can[i_b])
        if has_dna:
          seq_resname = rna_to_dna.get(seq_can[i_b])
        if has_rna:
          seq_resname = seq_can[i_b]
        if seq_resname is None:
          seq_resname = 'UNK'
        # model does not have residue in sequence
        if i_a is None or chain.resnames[i_a] is None:
          resname = seq_resname
        else:
          resname = chain.resnames[i_a]
        mon_id[entity_id].append(resname.strip())
        if len(num[entity_id]) == 0:
          num[entity_id].append(1)
        else:
          num[entity_id].append(num[entity_id][-1] + 1)
        hetero[entity_id].append('no')

    # build loops
    ids = list(sequence_to_entity_id.values())
    ids.sort()
    align_id = 1
    for entity_id in ids:
      # construct entity_poly loop
      if len(strand_id[entity_id]) == 1:
        chains = strand_id[entity_id][0]
      else:
        chains = strand_id[entity_id]
        #chains.sort()
        chains = ','.join(chains)
      entity_poly_loop.add_row((
        entity_id,
        nstd_linkage[entity_id],
        nstd_monomer[entity_id],
        ';' + ''.join(seq_one_letter_code[entity_id]) + '\n;',
        ';' + seq_one_letter_code_can[entity_id] + '\n;',
        chains,
        target_identifier[entity_id],
        sequence_type[entity_id]
      ))

      # construct entity loop
      entity_loop.add_row((
        entity_id,
        'Chains: ' + chains
      ))

      # construct entity_poly_seq loop
      chain_length = len(mon_id[entity_id])
      for i in range(chain_length):
        entity_poly_seq_loop.add_row((
          entity_id,
          num[entity_id][i],
          mon_id[entity_id][i],
          hetero[entity_id][i]
        ))

      # construct struct_ref loop
      struct_ref_loop.add_row((
        chain_id[entity_id],
        db_code,
        db_name,
        entity_id,
        align_begin,
        db_accession,
        db_isoform,
        ';' + seq_one_letter_code_can[entity_id] + '\n;'
      ))

      # construct struct_ref_seq loop
      for chain in strand_id[entity_id]:
        struct_ref_seq_loop.add_row((
          align_id,
          db_align_beg,
          db_align_end,
          PDB_id_code,
          '1',
          len(seq_one_letter_code_can[entity_id]) - 1,
          db_accession,
          align_beg_ins_code,
          align_end_ins_code,
          align_beg_ins_code,
          align_end_ins_code,
          chain,
          chain_id[entity_id],
          '1',
          len(seq_one_letter_code_can[entity_id]) - 1
        ))

    # construct block
    cif_block = iotbx.cif.model.block()
    cif_block.add_loop(entity_loop)
    cif_block.add_loop(entity_poly_loop)
    cif_block.add_loop(entity_poly_seq_loop)
    cif_block.add_loop(struct_ref_loop)
    cif_block.add_loop(struct_ref_seq_loop)

    return cif_block