示例#1
0
  def __init__(self,
      pdb_hierarchy,
      nontrans_only=False,
      out=sys.stdout,
      quiet=True):
    validation.__init__(self)
    self.residue_count = [0, 0]
    #[OMEGA_GENERAL, OMEGA_PRO]
    self.omega_count = [[0,0,0], [0,0,0]]
    #[OMEGA_GENERAL, OMEGA_PRO], then
    #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

    from mmtbx.validation import utils
    from scitbx.array_family import flex
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if all_i_seqs.all_eq(0):
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)

    prev_rezes, next_rezes = None, None
    prev_resid = None
    cur_resseq = None
    next_resseq = None
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        prev_rezes, next_rezes = None, None
        prev_resid = None
        cur_resseq = None
        next_resseq = None
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        residues = list(chain.residue_groups())
        for i, residue_group in enumerate(residues):
          # The reason I pass lists of atom_groups to get_phi and get_psi is to
          # deal with the particular issue where some residues have an A alt
          # conf that needs some atoms from a "" alt conf to get calculated
          # correctly.  See 1jxt.pdb for examples.  This way I can search both
          # the alt conf atoms and the "" atoms if necessary.
          prev_atom_list, next_atom_list, atom_list = None, None, None
          if cur_resseq is not None:
            prev_rezes = rezes
            prev_resseq = cur_resseq
          rezes = construct_residues(residues[i])
          cur_resseq = residue_group.resseq_as_int()
          cur_icode = residue_group.icode.strip()
          if (i > 0):
            #check for insertion codes
            if (cur_resseq == residues[i-1].resseq_as_int()) :
              if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
                continue
            elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
              continue
          for atom_group in residue_group.atom_groups():
            alt_conf = atom_group.altloc
            if rezes is not None:
              atom_list = rezes.get(alt_conf)
            if prev_rezes is not None:
              prev_atom_list = prev_rezes.get(alt_conf)
              if (prev_atom_list is None):
                prev_keys = sorted(prev_rezes.keys())
                prev_atom_list = prev_rezes.get(prev_keys[0])
            omega=get_omega(prev_atom_list, atom_list)
            highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list)
            if omega is not None:
              resname = atom_group.resname[0:3]
              coords = get_center(atom_group)
              if resname == "PRO":
                res_type = OMEGA_PRO
              else:
                res_type = OMEGA_GENERAL
              self.residue_count[res_type] += 1
              omega_type = find_omega_type(omega)
              is_nontrans = False
              if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED:
                self.n_outliers += 1
                is_nontrans = True
              self.omega_count[res_type][omega_type] += 1
              markup_atoms = [None, None, None, None] #for kinemage markup
              if is_nontrans:
                for a in prev_atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "CA":
                    markup_atoms[0] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "C":
                    markup_atoms[1] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                for a in atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "N":
                    markup_atoms[2] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "CA":
                    markup_atoms[3] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                #------------
              #prevres=residues[i-1]
              #find prev res identities for printing
              prev_alts = []
              prev_resnames = {}
              for ag in residues[i-1].atom_groups():
                prev_alts.append(ag.altloc)
                prev_resnames[ag.altloc] = ag.resname
              if alt_conf in prev_alts:
                prev_altloc = alt_conf
              else:
                if len(prev_alts) > 1:
                  prev_altloc = prev_alts[1]
                else:
                  prev_altloc = prev_alts[0]
              prev_resname = prev_resnames[prev_altloc]
              #done finding prev res identities
              result = omega_result(
                chain_id=chain_id,
                resseq=residue_group.resseq,
                icode=residue_group.icode,
                resname=atom_group.resname,
                altloc=atom_group.altloc,
                prev_resseq=residues[i-1].resseq,
                prev_icode=residues[i-1].icode,
                prev_resname=prev_resname,
                prev_altloc=prev_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)
              if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans)
                self.results.append(result)
              if is_nontrans:
                i_seqs = atom_group.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
示例#2
0
  def __init__(self, pdb_hierarchy,
      data_version="8000",
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False):
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    from mmtbx.rotamer.sidechain_angles import SidechainAngles
    from mmtbx.rotamer import rotamer_eval
    from mmtbx.rotamer.rotamer_eval import RotamerID
    from mmtbx.validation import utils
    self.data_version = data_version
#   if self.data_version == "500":    self.outlier_threshold = 0.01
    if self.data_version == "8000": self.outlier_threshold = OUTLIER_THRESHOLD
    else: raise ValueError(
      "data_version given to RotamerEval not recognized (%s)." % data_version)
    sidechain_angles = SidechainAngles(show_errors)
    rotamer_evaluator = rotamer_eval.RotamerEval(
                             data_version=data_version)
    rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names
    use_segids = utils.use_segids_in_place_of_chainids(
                   hierarchy=pdb_hierarchy)
    current_rotamers = {}
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        for rg in chain.residue_groups():
          all_dict = construct_complete_sidechain(rg)
          for atom_group in rg.atom_groups():
            coords = get_center(atom_group)
            resname = atom_group.resname
            occupancy = get_occupancy(atom_group)
            kwargs = {
              "chain_id" : chain_id,
              "resseq" : rg.resseq,
              "icode" : rg.icode,
              "altloc" : atom_group.altloc,
              "resname" : resname,
              "xyz" : coords,
              "occupancy" : occupancy,
            }
            atom_dict = all_dict.get(atom_group.altloc)
            res_key = get_residue_key(atom_group=atom_group)
            try:
              chis = sidechain_angles.measureChiAngles(
                       atom_group,
                       atom_dict)#.get(conformer.altloc))
            except AttributeError:
              if show_errors:
                kwargs['incomplete'] = True
                result = rotamer(**kwargs)
                print >> out, '%s is missing some sidechain atoms' % \
                  result.id_str()
                self.results.append(result)
              continue
            if (chis is not None):
              if None in chis:
                continue
              cur_res = resname.lower().strip()
              if cur_res == 'mse':
                cur_res = 'met'
              value = rotamer_evaluator.evaluate(cur_res, chis)
              if value is not None:
                self.n_total += 1
                kwargs['score'] = value * 100
                wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis,
                  symmetry=False)
                sym_chis = wrap_chis[:]
                sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis)
                evaluation = self.evaluateScore(value)
                kwargs['evaluation'] = evaluation
                if evaluation == "OUTLIER":
                  kwargs['outlier'] = True
                  kwargs['rotamer_name'] = evaluation
                else:
                  kwargs['outlier'] = False
                  kwargs['rotamer_name'] = rotamer_id.identify(resname,
                    wrap_chis)
                  #deal with unclassified rotamers
                  if kwargs['rotamer_name'] == '':
                    kwargs['rotamer_name'] = "UNCLASSIFIED"
                while (len(wrap_chis) < 4):
                  wrap_chis.append(None)
                kwargs['chi_angles'] = wrap_chis
                result = rotamer(**kwargs)
                if (result.is_outlier()) or (not outliers_only):
                  self.results.append(result)
    out_count, out_percent = self.get_outliers_count_and_fraction()
    self.out_percent = out_percent * 100.0
示例#3
0
  def __init__(self,
      pdb_hierarchy,
      nontrans_only=False,
      out=sys.stdout,
      quiet=True):
    validation.__init__(self)
    self.residue_count = [0, 0]
    #[OMEGA_GENERAL, OMEGA_PRO]
    self.omega_count = [[0,0,0], [0,0,0]]
    #[OMEGA_GENERAL, OMEGA_PRO], then
    #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

    from mmtbx.validation import utils
    from scitbx.array_family import flex
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if all_i_seqs.all_eq(0):
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)

    prev_rezes, next_rezes = None, None
    prev_resid = None
    cur_resseq = None
    next_resseq = None
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        prev_rezes, next_rezes = None, None
        prev_resid = None
        cur_resseq = None
        next_resseq = None
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        residues = list(chain.residue_groups())
        for i, residue_group in enumerate(residues):
          # The reason I pass lists of atom_groups to get_phi and get_psi is to
          # deal with the particular issue where some residues have an A alt
          # conf that needs some atoms from a "" alt conf to get calculated
          # correctly.  See 1jxt.pdb for examples.  This way I can search both
          # the alt conf atoms and the "" atoms if necessary.
          prev_atom_list, next_atom_list, atom_list = None, None, None
          if cur_resseq is not None:
            prev_rezes = rezes
            prev_resseq = cur_resseq
          rezes = construct_residues(residues[i])
          cur_resseq = residue_group.resseq_as_int()
          cur_icode = residue_group.icode.strip()
          if (i > 0):
            #check for insertion codes
            if (cur_resseq == residues[i-1].resseq_as_int()) :
              if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
                continue
            elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
              continue
          for atom_group in residue_group.atom_groups():
            alt_conf = atom_group.altloc
            if rezes is not None:
              atom_list = rezes.get(alt_conf)
            if prev_rezes is not None:
              prev_atom_list = prev_rezes.get(alt_conf)
              if (prev_atom_list is None):
                prev_keys = sorted(prev_rezes.keys())
                prev_atom_list = prev_rezes.get(prev_keys[0])
            omega=get_omega(prev_atom_list, atom_list)
            highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list)
            if omega is not None:
              resname = atom_group.resname[0:3]
              coords = get_center(atom_group)
              if resname == "PRO":
                res_type = OMEGA_PRO
              else:
                res_type = OMEGA_GENERAL
              self.residue_count[res_type] += 1
              omega_type = find_omega_type(omega)
              is_nontrans = False
              if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED:
                self.n_outliers += 1
                is_nontrans = True
              self.omega_count[res_type][omega_type] += 1
              markup_atoms = [None, None, None, None] #for kinemage markup
              if is_nontrans:
                for a in prev_atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "CA":
                    markup_atoms[0] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "C":
                    markup_atoms[1] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                for a in atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "N":
                    markup_atoms[2] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "CA":
                    markup_atoms[3] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                #------------
              #prevres=residues[i-1]
              #find prev res identities for printing
              prev_alts = []
              prev_resnames = {}
              for ag in residues[i-1].atom_groups():
                prev_alts.append(ag.altloc)
                prev_resnames[ag.altloc] = ag.resname
              if alt_conf in prev_alts:
                prev_altloc = alt_conf
              else:
                if len(prev_alts) > 1:
                  prev_altloc = prev_alts[1]
                else:
                  prev_altloc = prev_alts[0]
              prev_resname = prev_resnames[prev_altloc]
              #done finding prev res identities
              result = omega_result(
                chain_id=chain_id,
                resseq=residue_group.resseq,
                icode=residue_group.icode,
                resname=atom_group.resname,
                altloc=atom_group.altloc,
                prev_resseq=residues[i-1].resseq,
                prev_icode=residues[i-1].icode,
                prev_resname=prev_resname,
                prev_altloc=prev_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)
              if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans)
                self.results.append(result)
              if is_nontrans:
                i_seqs = atom_group.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
示例#4
0
    def __init__(self,
                 pdb_hierarchy,
                 nontrans_only=False,
                 out=sys.stdout,
                 quiet=True):
        validation.__init__(self)
        self.residue_count = [0, 0]
        #[OMEGA_GENERAL, OMEGA_PRO]
        self.omega_count = [[0, 0, 0], [0, 0, 0]]
        #[OMEGA_GENERAL, OMEGA_PRO], then
        #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

        from mmtbx.validation import utils
        from scitbx.array_family import flex
        self._outlier_i_seqs = flex.size_t()
        pdb_atoms = pdb_hierarchy.atoms()
        all_i_seqs = pdb_atoms.extract_i_seq()
        if all_i_seqs.all_eq(0):
            pdb_atoms.reset_i_seq()
        use_segids = utils.use_segids_in_place_of_chainids(
            hierarchy=pdb_hierarchy)

        first_conf_altloc = None
        prev_chain_id = None
        for twores in generate_protein_fragments(
                pdb_hierarchy,
                length=2,
                geometry=None,
                include_non_standard_peptides=True):
            main_residue = twores[
                1]  #this is the relevant residue for id-ing cis-Pro
            conf_altloc = get_conformer_altloc(twores)
            prevres_altloc, mainres_altloc = get_local_omega_altlocs(twores)
            twores_altloc = prevres_altloc or mainres_altloc  #default '' evals False

            chain = main_residue.parent().parent()
            if use_segids:
                chain_id = utils.get_segid_as_chainid(chain=chain)
            else:
                chain_id = chain.id

            if chain_id != prev_chain_id:  #if we've moved to a new chain...
                first_conf_altloc = conf_altloc  #...reset reference altloc
                prev_chain_id = chain_id
            if (conf_altloc != first_conf_altloc) and twores_altloc == '':
                #skip non-alternate residues unless this is the first time thru a chain
                continue
            omega_atoms = get_omega_atoms(twores)
            #omega_atoms is the list [CA1 C1 N2 CA2], with None for missing atoms
            if None in omega_atoms:
                continue
            omega = get_omega(omega_atoms)
            if omega is None: continue
            omega_type = find_omega_type(omega)
            if omega_type == OMEGALYZE_TRANS:
                is_nontrans = False
            else:
                is_nontrans = True
                self.n_outliers += 1
            if main_residue.resname == "PRO": res_type = OMEGA_PRO
            else: res_type = OMEGA_GENERAL
            self.residue_count[res_type] += 1
            self.omega_count[res_type][omega_type] += 1
            highest_mc_b = get_highest_mc_b(twores[0].atoms(),
                                            twores[1].atoms())
            coords = get_center(main_residue)
            markup_atoms = []
            for omega_atom in omega_atoms:
                markup_atoms.append(
                    kin_atom(omega_atom.parent().id_str(), omega_atom.xyz))

            result = omega_result(
                model_id=twores[0].parent().parent().parent().id,
                chain_id=chain_id,
                resseq=main_residue.resseq,
                icode=main_residue.icode,
                resname=main_residue.resname,
                altloc=mainres_altloc,
                prev_resseq=twores[0].resseq,
                prev_icode=twores[0].icode,
                prev_resname=twores[0].resname,
                prev_altloc=prevres_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)

            if is_nontrans or not nontrans_only:  #(not nontrans_only or is_nontrans)
                self.results.append(result)
            if is_nontrans:
                i_seqs = main_residue.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)
                        )  #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
            self.results.sort(key=lambda x: x.model_id + ':' + x.id_str())
示例#5
0
 def __init__(self,
              pdb_hierarchy,
              outliers_only=False,
              out=sys.stdout,
              collect_ideal=False,
              quiet=False):
     validation.__init__(self)
     self._outlier_i_seqs = flex.size_t()
     self.beta_ideal = {}
     relevant_atom_names = {
         " CA ": None,
         " N  ": None,
         " C  ": None,
         " CB ": None
     }  # FUTURE: set
     output_list = []
     self.stats = group_args(n_results=0,
                             n_weighted_results=0,
                             n_weighted_outliers=0)
     from mmtbx.validation import utils
     use_segids = utils.use_segids_in_place_of_chainids(
         hierarchy=pdb_hierarchy)
     for model in pdb_hierarchy.models():
         for chain in model.chains():
             if use_segids:
                 chain_id = utils.get_segid_as_chainid(chain=chain)
             else:
                 chain_id = chain.id
             for rg in chain.residue_groups():
                 for i_cf, cf in enumerate(rg.conformers()):
                     for i_residue, residue in enumerate(cf.residues()):
                         if (residue.resname == "GLY"):
                             continue
                         is_first = (i_cf == 0)
                         is_alt_conf = False
                         relevant_atoms = {}
                         for atom in residue.atoms():
                             if (atom.name in relevant_atom_names):
                                 relevant_atoms[atom.name] = atom
                                 if (len(atom.parent().altloc) != 0):
                                     is_alt_conf = True
                         if ((is_first or is_alt_conf)
                                 and len(relevant_atoms) == 4):
                             result = calculate_ideal_and_deviation(
                                 relevant_atoms=relevant_atoms,
                                 resname=residue.resname)
                             dev = result.deviation
                             dihedralNABB = result.dihedral
                             betaxyz = result.ideal
                             if (dev is None): continue
                             resCB = relevant_atoms[" CB "]
                             self.stats.n_results += 1
                             self.stats.n_weighted_results += resCB.occ
                             if (dev >= 0.25 or outliers_only == False):
                                 if (dev >= 0.25):
                                     self.n_outliers += 1
                                     self.stats.n_weighted_outliers += resCB.occ
                                     self._outlier_i_seqs.append(atom.i_seq)
                                 if (is_alt_conf):
                                     altchar = cf.altloc
                                 else:
                                     altchar = " "
                                 res = residue.resname.lower()
                                 sub = chain.id
                                 if (len(sub) == 1):
                                     sub = " " + sub
                                 result = cbeta(chain_id=chain_id,
                                                resname=residue.resname,
                                                resseq=residue.resseq,
                                                icode=residue.icode,
                                                altloc=altchar,
                                                xyz=resCB.xyz,
                                                occupancy=resCB.occ,
                                                deviation=dev,
                                                dihedral_NABB=dihedralNABB,
                                                ideal_xyz=betaxyz,
                                                outlier=(dev >= 0.25))
                                 self.results.append(result)
                                 key = result.id_str()
                                 if (collect_ideal):
                                     self.beta_ideal[key] = betaxyz
示例#6
0
def run(args):
    """
  I suggest adding here:
  cctbx_project/mmtbx/validation/regression/tst_mp_geo.py
  test cases with just .pdb, without arguments, etc.
  """
    master_phil = get_master_phil()
    import iotbx.phil
    input_objects = iotbx.phil.process_command_line_with_files(
        args=args, master_phil=master_phil, pdb_file_def="mp_geo.pdb")
    work_params = input_objects.work.extract()
    assert len(work_params.mp_geo.pdb) == 1, "Need a model file to run"
    file_name = work_params.mp_geo.pdb[0]
    out_file = None
    if work_params.mp_geo.out_file != None:
        out_file = work_params.mp_geo.out_file
    do_bonds_and_angles = work_params.mp_geo.bonds_and_angles
    do_kinemage = work_params.mp_geo.kinemage
    do_rna_backbone = work_params.mp_geo.rna_backbone
    outliers_only = work_params.mp_geo.outliers_only
    use_cdl = work_params.mp_geo.cdl
    log = StringIO()
    basename = os.path.basename(file_name)
    if out_file == None:
        import sys
        out = sys.stdout
    else:
        if do_bonds_and_angles:
            out = file(out_file, 'w')
        elif do_kinemage:
            out = file(out_file, 'a')
        elif do_rna_backbone:
            out = file(out_file, 'w')
    restraints_loading_flags = {}
    restraints_loading_flags["use_neutron_distances"] = False
    from mmtbx.validation import utils
    params = pdb_interpretation.master_params.extract()
    params.restraints_library.cdl = use_cdl
    params.clash_guard.nonbonded_distance_threshold = None
    processed_pdb_file = pdb_interpretation.process(
        params=params,
        mon_lib_srv=server.server(),
        ener_lib=server.ener_lib(),
        file_name=file_name,
        strict_conflict_handling=True,
        restraints_loading_flags=restraints_loading_flags,
        force_symmetry=True,
        substitute_non_crystallographic_unit_cell_if_necessary=True,
        log=log)
    grm = processed_pdb_file.geometry_restraints_manager()
    use_segids = utils.use_segids_in_place_of_chainids(
        hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy)
    if do_bonds_and_angles or do_kinemage:
        rc = get_bond_and_angle_outliers(
            pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy,
            xray_structure=processed_pdb_file.xray_structure(),
            geometry_restraints_manager=grm,
            use_segids=use_segids,
            outliers_only=outliers_only)
        #get chain types
        chain_types = {}
        for chain in processed_pdb_file.all_chain_proxies.\
                       pdb_hierarchy.models()[0].chains():
            if use_segids:
                chain_id = utils.get_segid_as_chainid(chain=chain)
            else:
                chain_id = chain.id
            main_conf = chain.conformers()[0]
            if chain_types.get(chain_id) not in ["NA", "PROTEIN"]:
                if (main_conf.is_na()):
                    chain_types[chain_id] = "NA"
                elif (main_conf.is_protein()):
                    chain_types[chain_id] = "PROTEIN"
                else:
                    chain_types[chain_id] = "UNK"
        outliers = []
        #bonds
        #for result in rc.bonds.results:
        for result in sorted(
                rc.bonds.results,
                key=lambda x:
            (x.atoms_info[0].resseq, get_altloc(atoms_info=x.atoms_info),
             get_atoms_str(atoms_info=x.atoms_info))):
            atom_info = result.atoms_info[0]
            # label:chain:number:ins:alt:type:measure:value:sigmas:class
            atoms_str = get_atoms_str(atoms_info=result.atoms_info)
            altloc = get_altloc(atoms_info=result.atoms_info)
            chain_id = atom_info.chain_id
            outliers.append([
                chain_id, atom_info.resseq, atom_info.icode, altloc,
                atom_info.resname, atoms_str, result.model, result.score,
                chain_types[atom_info.chain_id]
            ])
        #angles
        #for result in rc.angles.results:
        for result in sorted(
                rc.angles.results,
                key=lambda x:
            (x.atoms_info[0].resseq, get_altloc(atoms_info=x.atoms_info),
             get_atoms_str(atoms_info=x.atoms_info))):
            atom_info = result.atoms_info[0]
            # label:chain:number:ins:alt:type:measure:value:sigmas:class
            atoms_str = get_atoms_str(atoms_info=result.atoms_info)
            altloc = get_altloc(atoms_info=result.atoms_info)
            chain_id = atom_info.chain_id
            outliers.append([
                chain_id, atom_info.resseq, atom_info.icode, altloc,
                atom_info.resname, atoms_str, result.model, result.score,
                chain_types[atom_info.chain_id]
            ])

        if do_bonds_and_angles:
            for outlier in outliers:
                print >> out, "%s:%2s:%s:%s:%s:%s:%s:%.3f:%.3f:%s" % (
                    basename, outlier[0], outlier[1], outlier[2], outlier[3],
                    outlier[4], outlier[5], outlier[6], outlier[7], outlier[8])
        elif do_kinemage:
            print >> out, rc.bonds.kinemage_header
            for result in rc.bonds.results:
                print >> out, result.as_kinemage()
            print >> out, rc.angles.kinemage_header
            for result in rc.angles.results:
                print >> out, result.as_kinemage()
        out.close()
    elif do_rna_backbone:
        from mmtbx.validation import utils
        rna_bb = utils.get_rna_backbone_dihedrals(processed_pdb_file)
        print >> out, rna_bb
        if out_file is not None:
            out.close()
示例#7
0
 def __init__ (self, pdb_hierarchy,
     outliers_only=False,
     out=sys.stdout,
     collect_ideal=False,
     quiet=False) :
   validation.__init__(self)
   self._outlier_i_seqs = flex.size_t()
   self.beta_ideal = {}
   relevant_atom_names = {
     " CA ": None, " N  ": None, " C  ": None, " CB ": None} # FUTURE: set
   output_list = []
   from mmtbx.validation import utils
   use_segids = utils.use_segids_in_place_of_chainids(
     hierarchy=pdb_hierarchy)
   for model in pdb_hierarchy.models():
     for chain in model.chains():
       if use_segids:
         chain_id = utils.get_segid_as_chainid(chain=chain)
       else:
         chain_id = chain.id
       for rg in chain.residue_groups():
         for i_cf,cf in enumerate(rg.conformers()):
           for i_residue,residue in enumerate(cf.residues()):
             if (residue.resname == "GLY") :
               continue
             is_first = (i_cf == 0)
             is_alt_conf = False
             relevant_atoms = {}
             for atom in residue.atoms():
               if (atom.name in relevant_atom_names):
                 relevant_atoms[atom.name] = atom
                 if (len(atom.parent().altloc) != 0):
                   is_alt_conf = True
             if ((is_first or is_alt_conf) and len(relevant_atoms) == 4):
               result = calculate_ideal_and_deviation(
                 relevant_atoms=relevant_atoms,
                 resname=residue.resname)
               dev = result.deviation
               dihedralNABB = result.dihedral
               betaxyz = result.ideal
               if (dev is None) : continue
               if(dev >=0.25 or outliers_only==False):
                 if(dev >=0.25):
                   self.n_outliers+=1
                   self._outlier_i_seqs.append(atom.i_seq)
                 if (is_alt_conf):
                   altchar = cf.altloc
                 else:
                   altchar = " "
                 res=residue.resname.lower()
                 sub=chain.id
                 if(len(sub)==1):
                   sub=" "+sub
                 resCB = relevant_atoms[" CB "]
                 result = cbeta(
                   chain_id=chain_id,
                   resname=residue.resname,
                   resseq=residue.resseq,
                   icode=residue.icode,
                   altloc=altchar,
                   xyz=resCB.xyz,
                   occupancy=resCB.occ,
                   deviation=dev,
                   dihedral_NABB=dihedralNABB,
                   ideal_xyz=betaxyz,
                   outlier=(dev >= 0.25))
                 self.results.append(result)
                 key = result.id_str()
                 if (collect_ideal) :
                   self.beta_ideal[key] = betaxyz
示例#8
0
   def __init__(self,
                pdb_hierarchy,
                outliers_only=False,
                out=sys.stdout,
                collect_ideal=False,
                apply_phi_psi_correction=False,
                display_phi_psi_correction=False,
                quiet=False):
       validation.__init__(self)
       self._outlier_i_seqs = flex.size_t()
       self.beta_ideal = {}
       output_list = []
       self.stats = group_args(n_results=0,
                               n_weighted_results=0,
                               n_weighted_outliers=0)
       if apply_phi_psi_correction:
           phi_psi_angles = get_phi_psi_dict(pdb_hierarchy)
           new_outliers = 0
           outliers_removed = 0
           total_residues = 0
       from mmtbx.validation import utils
       use_segids = utils.use_segids_in_place_of_chainids(
           hierarchy=pdb_hierarchy)
       for model in pdb_hierarchy.models():
           for chain in model.chains():
               if use_segids:
                   chain_id = utils.get_segid_as_chainid(chain=chain)
               else:
                   chain_id = chain.id
               for rg in chain.residue_groups():
                   for i_cf, cf in enumerate(rg.conformers()):
                       for i_residue, residue in enumerate(cf.residues()):
                           if (residue.resname == "GLY"):
                               continue
                           is_first = (i_cf == 0)
                           is_alt_conf = False
                           relevant_atoms = {}
                           for atom in residue.atoms():
                               if (atom.name in relevant_atom_names):
                                   relevant_atoms[atom.name] = atom
                                   if (len(atom.parent().altloc) != 0):
                                       is_alt_conf = True
                           if ((is_first or is_alt_conf)
                                   and len(relevant_atoms) == 4):
                               result = calculate_ideal_and_deviation(
                                   relevant_atoms=relevant_atoms,
                                   resname=residue.resname)
                               dev = result.deviation
                               dihedralNABB = result.dihedral
                               betaxyz = result.ideal
                               if (dev is None): continue
                               resCB = relevant_atoms[" CB "]
                               self.stats.n_results += 1
                               self.stats.n_weighted_results += resCB.occ
                               if (is_alt_conf):
                                   altchar = cf.altloc
                               else:
                                   altchar = " "
                               if apply_phi_psi_correction:
                                   total_residues += 1
                                   id_str = '|%s:%s|' % (residue.id_str(),
                                                         altchar)
                                   phi_psi = phi_psi_angles.get(id_str, None)
                                   if phi_psi:
                                       rc = cbd_utils.get_phi_psi_correction(
                                           result,
                                           residue,
                                           phi_psi,
                                           display_phi_psi_correction=
                                           display_phi_psi_correction,
                                       )
                                       if rc:
                                           dev, dihedralNABB, start, finish = rc
                                           if start and not finish:
                                               outliers_removed += 1
                                           elif not start and finish:
                                               new_outliers += 1
                               if (dev >= 0.25 or outliers_only == False):
                                   if (dev >= 0.25):
                                       self.n_outliers += 1
                                       self.stats.n_weighted_outliers += resCB.occ
                                       self._outlier_i_seqs.append(atom.i_seq)
                                   res = residue.resname.lower()
                                   sub = chain.id
                                   if (len(sub) == 1):
                                       sub = " " + sub
                                   result = cbeta(chain_id=chain_id,
                                                  resname=residue.resname,
                                                  resseq=residue.resseq,
                                                  icode=residue.icode,
                                                  altloc=altchar,
                                                  xyz=resCB.xyz,
                                                  occupancy=resCB.occ,
                                                  deviation=dev,
                                                  dihedral_NABB=dihedralNABB,
                                                  ideal_xyz=betaxyz,
                                                  outlier=(dev >= 0.25))
                                   self.results.append(result)
                                   key = result.id_str()
                                   if (collect_ideal):
                                       self.beta_ideal[key] = betaxyz
           if apply_phi_psi_correction:
               print('''
 Outliers removed : %5d
 New outliers     : %5d
 Num. of outliers : %5d
 Num. of residues : %5d
 ''' % (
                   outliers_removed,
                   new_outliers,
                   self.n_outliers,
                   total_residues,
               ))
示例#9
0
    def __init__(
        self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False
    ):
        validation.__init__(self)
        self.n_allowed = 0
        self.n_favored = 0
        from mmtbx.rotamer.sidechain_angles import SidechainAngles
        from mmtbx.rotamer import rotamer_eval
        from mmtbx.rotamer.rotamer_eval import RotamerID
        from mmtbx.validation import utils

        self.data_version = data_version
        #   if self.data_version == "500":    self.outlier_threshold = 0.01
        if self.data_version == "8000":
            self.outlier_threshold = 0.003
        else:
            raise ValueError("data_version given to RotamerEval not recognized (%s)." % data_version)
        sidechain_angles = SidechainAngles(show_errors)
        rotamer_evaluator = rotamer_eval.RotamerEval(data_version=data_version)
        rotamer_id = rotamer_eval.RotamerID()  # loads in the rotamer names
        use_segids = utils.use_segids_in_place_of_chainids(hierarchy=pdb_hierarchy)
        current_rotamers = {}
        for model in pdb_hierarchy.models():
            for chain in model.chains():
                if use_segids:
                    chain_id = utils.get_segid_as_chainid(chain=chain)
                else:
                    chain_id = chain.id
                for rg in chain.residue_groups():
                    all_dict = construct_complete_sidechain(rg)
                    for atom_group in rg.atom_groups():
                        coords = get_center(atom_group)
                        resname = atom_group.resname
                        occupancy = get_occupancy(atom_group)
                        kwargs = {
                            "chain_id": chain_id,
                            "resseq": rg.resseq,
                            "icode": rg.icode,
                            "altloc": atom_group.altloc,
                            "resname": resname,
                            "xyz": coords,
                            "occupancy": occupancy,
                        }
                        atom_dict = all_dict.get(atom_group.altloc)
                        res_key = get_residue_key(atom_group=atom_group)
                        try:
                            chis = sidechain_angles.measureChiAngles(atom_group, atom_dict)  # .get(conformer.altloc))
                        except AttributeError:
                            if show_errors:
                                kwargs["incomplete"] = True
                                result = rotamer(**kwargs)
                                print >> out, "%s is missing some sidechain atoms" % result.id_str()
                                self.results.append(result)
                            continue
                        if chis is not None:
                            if None in chis:
                                continue
                            cur_res = resname.lower().strip()
                            if cur_res == "mse":
                                cur_res = "met"
                            value = rotamer_evaluator.evaluate(cur_res, chis)
                            if value is not None:
                                self.n_total += 1
                                kwargs["score"] = value * 100
                                wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False)
                                sym_chis = wrap_chis[:]
                                sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis)
                                evaluation = self.evaluateScore(value)
                                kwargs["evaluation"] = evaluation
                                if evaluation == "OUTLIER":
                                    kwargs["outlier"] = True
                                    kwargs["rotamer_name"] = evaluation
                                else:
                                    kwargs["outlier"] = False
                                    kwargs["rotamer_name"] = rotamer_id.identify(resname, wrap_chis)
                                    # deal with unclassified rotamers
                                    if kwargs["rotamer_name"] == "":
                                        kwargs["rotamer_name"] = "UNCLASSIFIED"
                                while len(wrap_chis) < 4:
                                    wrap_chis.append(None)
                                kwargs["chi_angles"] = wrap_chis
                                result = rotamer(**kwargs)
                                if (result.is_outlier()) or (not outliers_only):
                                    self.results.append(result)
        out_count, out_percent = self.get_outliers_count_and_fraction()
        self.out_percent = out_percent * 100.0
示例#10
0
 def __init__ (self,
     pdb_hierarchy,
     outliers_only=False,
     show_errors=False,
     out=sys.stdout,
     quiet=False) :
   validation.__init__(self)
   self.n_allowed = 0
   self.n_favored = 0
   self.n_type = [ 0 ] * 6
   from mmtbx.validation import utils
   import mmtbx.rotamer
   from mmtbx.rotamer import ramachandran_eval
   from scitbx.array_family import flex
   self._outlier_i_seqs = flex.size_t()
   pdb_atoms = pdb_hierarchy.atoms()
   all_i_seqs = pdb_atoms.extract_i_seq()
   if (all_i_seqs.all_eq(0)) :
     pdb_atoms.reset_i_seq()
   use_segids = utils.use_segids_in_place_of_chainids(
     hierarchy=pdb_hierarchy)
   analysis = ""
   output_list = []
   r = ramachandran_eval.RamachandranEval()
   prev_rezes, next_rezes = None, None
   prev_resid = None
   cur_resseq = None
   next_resseq = None
   for model in pdb_hierarchy.models():
     for chain in model.chains():
       if use_segids:
         chain_id = utils.get_segid_as_chainid(chain=chain)
       else:
         chain_id = chain.id
       residues = list(chain.residue_groups())
       for i, residue_group in enumerate(residues):
         # The reason I pass lists of atom_groups to get_phi and get_psi is to
         # deal with the particular issue where some residues have an A alt
         # conf that needs some atoms from a "" alt conf to get calculated
         # correctly.  See 1jxt.pdb for examples.  This way I can search both
         # the alt conf atoms and the "" atoms if necessary.
         prev_atom_list, next_atom_list, atom_list = None, None, None
         if cur_resseq is not None:
           prev_rezes = rezes
           prev_resseq = cur_resseq
         rezes = construct_complete_residues(residues[i])
         cur_resseq = residue_group.resseq_as_int()
         cur_icode = residue_group.icode.strip()
         if (i > 0):
           #check for insertion codes
           if (cur_resseq == residues[i-1].resseq_as_int()) :
             if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
               continue
           elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
             continue
         if (i < len(residues)-1):
           #find next residue
           if residue_group.resseq_as_int() == \
              residues[i+1].resseq_as_int():
             if (cur_icode == '') and (residues[i+1].icode.strip() == '') :
               continue
           elif residue_group.resseq_as_int() != \
              (residues[i+1].resseq_as_int())-1:
             continue
           next_rezes = construct_complete_residues(residues[i+1])
           next_resid = residues[i+1].resseq_as_int()
         else:
           next_rezes = None
           next_resid = None
         for atom_group in residue_group.atom_groups():
           alt_conf = atom_group.altloc
           if rezes is not None:
             atom_list = rezes.get(alt_conf)
           if prev_rezes is not None:
             prev_atom_list = prev_rezes.get(alt_conf)
             if (prev_atom_list is None):
               prev_keys = sorted(prev_rezes.keys())
               prev_atom_list = prev_rezes.get(prev_keys[0])
           if next_rezes is not None:
             next_atom_list = next_rezes.get(alt_conf)
             if (next_atom_list is None):
               next_keys = sorted(next_rezes.keys())
               next_atom_list = next_rezes.get(next_keys[0])
           phi = get_phi(prev_atom_list, atom_list)
           psi = get_psi(atom_list, next_atom_list)
           coords = get_center(atom_group)
           if (phi is not None and psi is not None):
             res_type = RAMA_GENERAL
             self.n_total += 1
             if (atom_group.resname[0:3] == "GLY"):
               res_type = RAMA_GLYCINE
             elif (atom_group.resname[0:3] == "PRO"):
               is_cis = is_cis_peptide(prev_atom_list, atom_list)
               if is_cis:
                 res_type = RAMA_CISPRO
               else:
                 res_type = RAMA_TRANSPRO
             elif (isPrePro(residues, i)):
               res_type = RAMA_PREPRO
             elif (atom_group.resname[0:3] == "ILE" or \
                   atom_group.resname[0:3] == "VAL"):
               res_type = RAMA_ILE_VAL
             self.n_type[res_type] += 1
             value = r.evaluate(res_types[res_type], [phi, psi])
             ramaType = self.evaluateScore(res_type, value)
             is_outlier = ramaType == RAMALYZE_OUTLIER
             c_alphas = None
             # XXX only save kinemage data for outliers
             if is_outlier :
               c_alphas = []
               for atoms in [prev_atom_list, atom_list, next_atom_list] :
                 for a in atoms :
                   if (a.name.strip() == "CA") :
                     a_ = atom(pdb_atom=a)
                     c_alphas.append(c_alpha(
                       id_str=a_.atom_group_id_str(),
                       xyz=a_.xyz))
               assert (len(c_alphas) == 3)
             result = ramachandran(
               chain_id=chain_id,
               resseq=residue_group.resseq,
               icode=residue_group.icode,
               resname=atom_group.resname,
               altloc=atom_group.altloc,
               segid=None, # XXX ???
               phi=phi,
               psi=psi,
               rama_type=ramaType,
               res_type=res_type,
               score=value*100,
               outlier=is_outlier,
               xyz=coords,
               c_alphas=c_alphas)
             if (not outliers_only or is_outlier) :
               self.results.append(result)
             if is_outlier :
               i_seqs = atom_group.atoms().extract_i_seq()
               assert (not i_seqs.all_eq(0))
               self._outlier_i_seqs.extend(i_seqs)
   out_count, out_percent = self.get_outliers_count_and_fraction()
   fav_count, fav_percent = self.get_favored_count_and_fraction()
   self.out_percent = out_percent * 100.0
   self.fav_percent = fav_percent * 100.0