Пример #1
0
def run():
  from libtbx.utils import format_cpu_times
  from mmtbx.rotamer import rotamer_eval
  from mmtbx.rotamer import ramachandran_eval

  initial_current_working_directory = os.getcwd()
  rotamer_data_dir = rotamer_eval.find_rotarama_data_dir(optional=True)
  if rotamer_data_dir is None:
    print('  Rebuilding rotarama library skipped. Needs rotamer library.')
    return
  target_db = rotamer_eval.open_rotarama_dlite(
    rotarama_data_dir=rotamer_data_dir)
# rebuild_pickle_files(data_dir=rotamer_data_dir,
#   file_prefix="rota500-",
#   target_db=target_db,
#   amino_acids=rotamer_eval.aminoAcids)
  rebuild_pickle_files(data_dir=rotamer_data_dir,
    file_prefix="rota8000-",
    target_db=target_db,
    amino_acids=rotamer_eval.aminoAcids)
  #
  ramachandran_data_dir = rotamer_eval.find_rotarama_data_dir()
  target_db = rotamer_eval.open_rotarama_dlite(
    rotarama_data_dir=ramachandran_data_dir)
  rebuild_pickle_files(data_dir=rotamer_data_dir,
    file_prefix="rama8000-",
    target_db=target_db,
    amino_acids=ramachandran_eval.aminoAcids_8000)
# rebuild_pickle_files(data_dir=rotamer_data_dir,
#   file_prefix="rama500-",
#   target_db=target_db,
#   amino_acids=ramachandran_eval.aminoAcids)
  os.chdir(initial_current_working_directory)
  print(format_cpu_times())
def run():
  initial_current_working_directory = os.getcwd()
  rotamer_data_dir = rotamer_eval.find_rotarama_data_dir(optional=True)
  if rotamer_data_dir is None:
    print '  Rebuilding rotarama library skipped. Needs rotamer library.'
    return
  target_db = rotamer_eval.open_rotarama_dlite(
    rotarama_data_dir=rotamer_data_dir)
# rebuild_pickle_files(data_dir=rotamer_data_dir,
#   file_prefix="rota500-",
#   target_db=target_db,
#   amino_acids=rotamer_eval.aminoAcids)
  rebuild_pickle_files(data_dir=rotamer_data_dir,
    file_prefix="rota8000-",
    target_db=target_db,
    amino_acids=rotamer_eval.aminoAcids)
  #
  ramachandran_data_dir = rotamer_eval.find_rotarama_data_dir()
  target_db = rotamer_eval.open_rotarama_dlite(
    rotarama_data_dir=ramachandran_data_dir)
  rebuild_pickle_files(data_dir=rotamer_data_dir,
    file_prefix="rama8000-",
    target_db=target_db,
    amino_acids=ramachandran_eval.aminoAcids_8000)
# rebuild_pickle_files(data_dir=rotamer_data_dir,
#   file_prefix="rama500-",
#   target_db=target_db,
#   amino_acids=ramachandran_eval.aminoAcids)
  os.chdir(initial_current_working_directory)
  print format_cpu_times()
Пример #3
0
def get_rotarama_data(residue_type=None,
                      pos_type=None,
                      db="rama",
                      convert_to_numpy_array=False):
    from mmtbx.rotamer import ramachandran_eval
    from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir
    # backwards compatibility
    if (pos_type == "proline"): pos_type = "trans-proline"
    if (pos_type == "prepro"): pos_type = "pre-proline"
    assert (pos_type in [
        "general", "cis-proline", "trans-proline", "glycine",
        "isoleucine or valine", "pre-proline", None
    ])
    assert (db in ["rama", "rota"])
    assert (residue_type is not None) or (pos_type is not None)
    if pos_type is not None:
        residue_type = ramachandran_eval.aminoAcids_8000[pos_type]
    if residue_type.lower() in ["phe", "tyr"]:
        residue_type = "phetyr"
    assert (residue_type is not None)
    rama_data_dir = find_rotarama_data_dir()
    if (db == "rama"):
        pkl_file = "%s8000-%s.pickle" % (db, residue_type)
    else:
        pkl_file = "%s8000-%s.pickle" % (db, residue_type.lower())
    ndt = easy_pickle.load(os.path.join(rama_data_dir, pkl_file))
    if convert_to_numpy_array:
        if (db == "rama"):
            return export_ramachandran_distribution(ndt)
        else:
            return export_rotamer_distribution(ndt)
    else:
        return ndt
 def __init__(self):
   main_aaTables = RamachandranEval.aaTables
   self.aaTables = {}
   for aa,ndt_weakref in main_aaTables.items():
     # convert existing weak references to strong references
     self.aaTables[aa] = ndt_weakref()
   rama_data_dir = find_rotarama_data_dir()
   target_db = open_rotarama_dlite(rotarama_data_dir=rama_data_dir)
   no_update = os.path.exists(os.path.join(rama_data_dir, "NO_UPDATE"))
   for aa, aafile in aminoAcids_8000.items():
     if (self.aaTables.get(aa) is not None): continue
     data_file = "rama8000-"+aafile+".data"
     pickle_file = "rama8000-"+aafile+".pickle"
     pair_info = target_db.pair_info(
       source_path=data_file,
       target_path=pickle_file,
       path_prefix=rama_data_dir)
     if (((pair_info.needs_update) and (not no_update)) or not
         os.path.exists(os.path.join(rama_data_dir, pickle_file)))  :
       raise Sorry(
         "chem_data/rotarama_data/*.pickle files are missing or out of date.\n"
         "  Please run\n"
         "    mmtbx.rebuild_rotarama_cache\n"
         "  to resolve this problem.\n")
     ndt = easy_pickle.load(file_name=os.path.join(
       rama_data_dir, pair_info.target.path))
     self.aaTables[aa] = ndt
     main_aaTables[aa] = weakref.ref(ndt)
Пример #5
0
def get_rotarama_data (residue_type=None, pos_type=None, db="rama",
    convert_to_numpy_array=False) :
  from mmtbx.rotamer import ramachandran_eval
  from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir
  # backwards compatibility
  if (pos_type == "proline") : pos_type = "trans-proline"
  if (pos_type == "prepro") : pos_type = "pre-proline"
  assert (pos_type in ["general", "cis-proline", "trans-proline", "glycine",
    "isoleucine or valine", "pre-proline",None])
  assert (db in ["rama", "rota"])
  assert (residue_type is not None) or (pos_type is not None)
  if pos_type is not None :
    residue_type = ramachandran_eval.aminoAcids_8000[pos_type]
  if residue_type.lower() in ["phe", "tyr"] :
    residue_type = "phetyr"
  assert (residue_type is not None)
  rama_data_dir = find_rotarama_data_dir()
  if (db == "rama") :
    pkl_file = "%s8000-%s.pickle" % (db, residue_type)
  else :
    pkl_file = "%s8000-%s.pickle" % (db, residue_type.lower())
  ndt = easy_pickle.load(os.path.join(rama_data_dir, pkl_file))
  if convert_to_numpy_array :
    if (db == "rama") :
      return export_ramachandran_distribution(ndt)
    else :
      return export_rotamer_distribution(ndt)
  else :
    return ndt
 def __init__(self):
     main_aaTables = RamachandranEval.aaTables
     self.aaTables = {}
     for aa, ndt_weakref in main_aaTables.items():
         # convert existing weak references to strong references
         self.aaTables[aa] = ndt_weakref()
     rama_data_dir = find_rotarama_data_dir()
     target_db = open_rotarama_dlite(rotarama_data_dir=rama_data_dir)
     no_update = os.path.exists(os.path.join(rama_data_dir, "NO_UPDATE"))
     for aa, aafile in aminoAcids_8000.items():
         if (self.aaTables.get(aa) is not None): continue
         data_file = "rama8000-" + aafile + ".data"
         pickle_file = "rama8000-" + aafile + ".pickle"
         pair_info = target_db.pair_info(source_path=data_file,
                                         target_path=pickle_file,
                                         path_prefix=rama_data_dir)
         if (((pair_info.needs_update) and (not no_update))
                 or not os.path.exists(
                     os.path.join(rama_data_dir, pickle_file))):
             raise Sorry(
                 "chem_data/rotarama_data/*.pickle files are missing or out of date.\n"
                 "  Please run\n"
                 "    mmtbx.rebuild_rotarama_cache\n"
                 "  to resolve this problem.\n")
         ndt = easy_pickle.load(
             file_name=os.path.join(rama_data_dir, pair_info.target.path))
         self.aaTables[aa] = ndt
         main_aaTables[aa] = weakref.ref(ndt)
Пример #7
0
def exercise_rotalyze():
    regression_pdb = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile)
    if (regression_pdb is None):
        print "Skipping exercise_rotalyze(): input pdb (jcm.pdb) not available"
        return
    if (find_rotarama_data_dir(optional=True) is None):
        print "Skipping exercise_rotalyze(): rotarama_data directory not available"
        return
    pdb_in = file_reader.any_file(file_name=regression_pdb)
    hierarchy = pdb_in.file_object.hierarchy
    pdb_io = pdb.input(file_name=regression_pdb)
    r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=True)
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    output = out.getvalue()
    assert output.count("OUTLIER") == 246, output.count("OUTLIER")
    assert output.count(":") == 984, output.count(":")
    output_lines = output.splitlines()
    assert len(output_lines) == 123
    for lines in output_lines:
        assert float(lines[12:15]) <= 1.0

    r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=False)
    for unpickle in [False, True]:
        if unpickle:
            r = loads(dumps(r))
        out = StringIO()
        r.show_old_output(out=out, verbose=False)
        for outlier in r.results:
            assert (len(outlier.xyz) == 3)
        output = out.getvalue()
        assert output.count("OUTLIER") == 246
        assert output.count(":") == 5144, output.count(":")
        assert output.count("p") == 120
        assert output.count("m") == 324
        assert output.count("t") == 486
        output_lines = output.splitlines()
        #for line in output_lines:
        #  print line
        #STOP()
        assert len(output_lines) == 643
        line_indices = [0, 1, 2, 42, 43, 168, 169, 450, 587, 394, 641, 642]

        #    top500 version
        line_values = [
            " A  14  MET:1.00:3.3:29.2:173.3:287.9::Favored:ptm",
            " A  15  SER:1.00:0.1:229.0::::OUTLIER:OUTLIER",
            " A  16  SER:1.00:4.2:277.9::::Favored:m",
            " A  58  ASN:1.00:2.0:252.4:343.6:::Favored:m-20",
            " A  59  ILE:1.00:2.0:84.2:186.7:::Allowed:pt",
            " A 202  GLU:1.00:0.4:272.7:65.9:287.8::OUTLIER:OUTLIER",
            " A 203  ILE:1.00:5.0:292.9:199.6:::Favored:mt",
            " B 154  THR:1.00:0.1:356.0::::OUTLIER:OUTLIER",
            " B 316  TYR:1.00:5.4:153.7:68.6:::Favored:t80",
            " B  86  ASP:1.00:2.2:321.4:145.1:::Favored:m-20",
            " B 377  GLU:1.00:45.3:311.7:166.2:160.1::Favored:mt-10",
            " B 378  THR:1.00:23.5:309.4::::Favored:m"
        ]
        #    top8000 version
        line_values = [
            " A  14  MET:1.00:1.3:29.2:173.3:287.9::Allowed:ptm",
            " A  15  SER:1.00:0.1:229.0::::OUTLIER:OUTLIER",
            " A  16  SER:1.00:3.0:277.9::::Favored:m",
            " A  58  ASN:1.00:1.0:252.4:343.6:::Allowed:m-40",
            " A  59  ILE:1.00:0.5:84.2:186.7:::Allowed:pt",
            " A 202  GLU:1.00:0.0:272.7:65.9:287.8::OUTLIER:OUTLIER",
            " A 203  ILE:1.00:1.0:292.9:199.6:::Allowed:mt",
            " B 154  THR:1.00:0.0:356.0::::OUTLIER:OUTLIER",
            " B 316  TYR:1.00:4.1:153.7:68.6:::Favored:t80",
            " B  86  ASP:1.00:0.4:321.4:145.1:::Allowed:m-30",
            " B 377  GLU:1.00:15.0:311.7:166.2:160.1::Favored:mt-10",
            " B 378  THR:1.00:17.0:309.4::::Favored:m",
        ]
        for idx, val in zip(line_indices, line_values):
            assert (output_lines[idx] == val), (idx, output_lines[idx])

    regression_pdb = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile)
    if (regression_pdb is None):
        print "Skipping exercise_ramalyze(): input pdb (pdb1jxt.ent) not available"
        return
    pdb_in = file_reader.any_file(file_name=regression_pdb)
    hierarchy = pdb_in.file_object.hierarchy
    pdb_io = pdb.input(file_name=regression_pdb)
    r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=True)
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    output = out.getvalue().strip()
    assert output == ""

    r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=False)
    for unpickle in [False, True]:
        if unpickle:
            r = loads(dumps(r))
        out = StringIO()
        r.show_old_output(out=out, verbose=False)
        output = out.getvalue()
        assert not show_diff(
            output, """\
 A   1  THR:1.00:95.4:299.5::::Favored:m
 A   2 ATHR:0.67:49.5:56.1::::Favored:p
 A   2 BTHR:0.33:90.4:298.1::::Favored:m
 A   3  CYS:1.00:12.9:310.5::::Favored:m
 A   4  CYS:1.00:91.6:293.1::::Favored:m
 A   5  PRO:1.00:78.8:30.2:319.7:33.8::Favored:Cg_endo
 A   6  SER:1.00:90.1:68.4::::Favored:p
 A   7 AILE:0.45:49.6:290.8:178.2:::Favored:mt
 A   7 BILE:0.55:6.5:284.4:298.4:::Favored:mm
 A   8 AVAL:0.50:1.1:156.7::::Allowed:t
 A   8 BVAL:0.30:5.1:71.3::::Favored:p
 A   8 CVAL:0.20:69.8:172.1::::Favored:t
 A  10 AARG:0.65:24.7:176.8:66.5:63.9:180.0:Favored:tpp-160
 A  10 BARG:0.35:17.5:176.8:72.8:66.4:171.9:Favored:tpp-160
 A  11  SER:1.00:51.6:300.9::::Favored:m
 A  12 AASN:0.50:93.9:286.1:343.8:::Favored:m-40
 A  12 BASN:0.50:98.9:288.4:337.6:::Favored:m-40
 A  13 APHE:0.65:45.1:187.2:276.4:::Favored:t80
 A  13 BPHE:0.35:86.1:179.6:263.1:::Favored:t80
 A  14  ASN:1.00:95.2:289.6:333.0:::Favored:m-40
 A  15  VAL:1.00:42.3:168.2::::Favored:t
 A  16  CYS:1.00:40.8:176.5::::Favored:t
 A  17  ARG:1.00:21.4:289.7:282.8:288.6:158.7:Favored:mmm160
 A  18  LEU:1.00:65.0:287.2:173.3:::Favored:mt
 A  19  PRO:1.00:43.6:24.4:324.8:31.6::Favored:Cg_endo
 A  21  THR:1.00:5.7:314.0::::Favored:m
 A  22 APRO:0.55:87.5:333.5:34.0:333.8::Favored:Cg_exo
 A  23 AGLU:0.50:86.9:290.9:187.1:341.8::Favored:mt-10
 A  23 BGLU:0.50:91.7:292.0:183.8:339.2::Favored:mt-10
 A  25 ALEU:0.50:95.7:294.4:173.6:::Favored:mt
 A  26  CYS:1.00:83.0:295.0::::Favored:m
 A  28  THR:1.00:29.6:52.9::::Favored:p
 A  29 ATYR:0.65:18.5:161.8:67.8:::Favored:t80
 A  29 BTYR:0.35:0.4:191.3:322.7:::Allowed:t80
 A  30 ATHR:0.70:60.8:57.4::::Favored:p
 A  30 BTHR:0.30:6.6:78.1::::Favored:p
 A  32  CYS:1.00:61.4:301.7::::Favored:m
 A  33  ILE:1.00:36.6:66.5:173.4:::Favored:pt
 A  34 AILE:0.70:60.9:303.6:167.6:::Favored:mt
 A  34 BILE:0.30:31.4:308.5:296.8:::Favored:mm
 A  35  ILE:1.00:45.6:62.4:170.0:::Favored:pt
 A  36  PRO:1.00:36.2:22.5:330.5:24.8::Favored:Cg_endo
 A  39 ATHR:0.70:14.0:311.0::::Favored:m
 A  39 BTHR:0.30:13.1:288.8::::Favored:m
 A  40  CYS:1.00:81.4:294.4::::Favored:m
 A  41  PRO:1.00:35.4:34.4:317.5:33.1::Favored:Cg_endo
 A  43 AASP:0.75:24.8:56.5:340.3:::Favored:p0
 A  43 BASP:0.25:43.2:59.6:349.3:::Favored:p0
 A  44  TYR:1.00:85.3:290.9:85.1:::Favored:m-80
 A  46  ASN:1.00:38.7:301.6:117.9:::Favored:m110
""")
Пример #8
0
def exercise_ramalyze():
    from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir
    regression_pdb = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile)
    if (regression_pdb is None):
        print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available"
        return
    if (find_rotarama_data_dir(optional=True) is None):
        print "Skipping exercise_ramalyze(): rotarama_data directory not available"
        return
    from iotbx import file_reader
    # Exercise 1
    pdb_in = file_reader.any_file(file_name=regression_pdb)
    hierarchy = pdb_in.file_object.hierarchy
    pdb_io = pdb.input(file_name=regression_pdb)
    hierarchy.atoms().reset_i_seq()
    r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True)
    out = StringIO()
    r.show_old_output(out=out)
    output = out.getvalue()
    assert output.count("OUTLIER") == 100
    assert output.count("Favored") == 0
    assert output.count("Allowed") == 0
    assert output.count("General") == 64
    assert output.count("Glycine") == 6
    assert output.count("Trans-proline") == 1
    assert output.count("Cis-proline") == 0
    assert output.count("Pre-proline") == 4
    assert output.count("Isoleucine or valine") == 25
    assert (len(r.outlier_selection()) == 494)
    outlier_ids = set([])
    atoms = hierarchy.atoms()
    for i_seq in r.outlier_selection():
        atom = atoms[i_seq]
        atom_group = atoms[i_seq].parent()
        outlier_ids.add(atom_group.id_str())
    outliers1 = sorted([o.atom_group_id_str() for o in r.results])
    outliers2 = sorted(list(outlier_ids))
    assert (outliers1 == outliers2)

    r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False)
    for unpickle in [False, True]:
        if unpickle:
            r = loads(dumps(r))
        for outlier in r.results:
            assert (len(outlier.xyz) == 3)
        out = StringIO()
        r.show_old_output(out=out, verbose=False)
        output = out.getvalue()
        assert output.count("OUTLIER") == 100
        assert output.count("Favored") == 463
        assert output.count("Allowed") == 162
        assert output.count("General") == 514
        assert output.count("Glycine") == 39
        assert output.count("Trans-proline") == 23
        assert output.count("Cis-proline") == 0
        assert output.count("Pre-proline") == 21
        assert output.count("Isoleucine or valine") == 128
        numtotal = r.get_phi_psi_residues_count()
        assert r.get_outliers_count_and_fraction() == (100, 100. / numtotal)
        assert r.get_allowed_count_and_fraction() == (162, 162. / numtotal)
        assert r.get_favored_count_and_fraction() == (463, 463. / numtotal)
        assert r.get_general_count_and_fraction() == (514, 514. / numtotal)
        assert r.get_gly_count_and_fraction() == (39, 39. / numtotal)
        assert r.get_trans_pro_count_and_fraction() == (23, 23. / numtotal)
        assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal)
        assert r.get_prepro_count_and_fraction() == (21, 21. / numtotal)
        assert r.get_ileval_count_and_fraction() == (128, 128. / numtotal)
        #assert numtotal == 75+154+494 #reasons for this math unclear
        assert numtotal == 725
        output_lines = output.splitlines()
        assert len(output_lines) == 725
        selected_lines = []
        for x in [
                0, 1, 168, 169, 715, 716, 717, 718, 719, 720, 721, 722, 723,
                724
        ]:
            selected_lines.append(output_lines[x])
        assert not show_diff(
            "\n".join(selected_lines), """\
 A  15  SER:35.07:-83.26:131.88:Favored:General
 A  16  SER:0.74:-111.53:71.36:Allowed:General
 A 191  ASP:2.66:-42.39:121.87:Favored:Pre-proline
 A 192  PRO:0.31:-39.12:-31.84:Allowed:Trans-proline
 B 368  LYS:56.44:-62.97:-53.28:Favored:General
 B 369  GLU:8.89:-44.36:-45.50:Favored:General
 B 370  LYS:40.00:-50.00:-39.06:Favored:General
 B 371  VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine
 B 372  LEU:0.02:-61.13:-170.23:OUTLIER:General
 B 373  ARG:0.02:60.09:-80.26:OUTLIER:General
 B 374  ALA:0.13:-37.21:-36.12:Allowed:General
 B 375  LEU:11.84:-89.81:-41.45:Favored:General
 B 376  ASN:84.33:-58.30:-41.39:Favored:General
 B 377  GLU:30.88:-56.79:-21.74:Favored:General""")
        assert (len(r.outlier_selection()) == 494)

    # Exercise 2
    regression_pdb = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile)
    pdb_in = file_reader.any_file(file_name=regression_pdb)
    hierarchy = pdb_in.file_object.hierarchy
    hierarchy.atoms().reset_i_seq()
    r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True)
    out = StringIO()
    r.show_old_output(out=out)
    output = out.getvalue()
    assert output.count("Favored") == 0
    assert output.count("Allowed") == 0
    assert output.count("OUTLIER") == 0
    r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False)
    for unpickle in [False, True]:
        if unpickle:
            r = loads(dumps(r))
        out = StringIO()
        r.show_old_output(out=out, verbose=False)
        output = out.getvalue()
        assert output.count("Favored") == 50
        assert output.count("Allowed") == 1
        assert output.count("OUTLIER") == 0
        assert output.count("General") == 29
        assert output.count("Glycine") == 4
        assert output.count("Trans-proline") == 5
        assert output.count("Cis-proline") == 0
        assert output.count("Pre-proline") == 5
        assert output.count("Isoleucine or valine") == 8
        numtotal = r.get_phi_psi_residues_count()
        assert r.get_outliers_count_and_fraction() == (0, 0. / numtotal)
        assert r.get_allowed_count_and_fraction() == (1, 1. / numtotal)
        assert r.get_favored_count_and_fraction() == (43, 43. / numtotal)
        #print r.get_general_count_and_fraction()
        assert r.get_general_count_and_fraction() == (25, 25. / numtotal)
        assert r.get_gly_count_and_fraction() == (4, 4. / numtotal)
        assert r.get_trans_pro_count_and_fraction() == (5, 5. / numtotal)
        assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal)
        assert r.get_prepro_count_and_fraction() == (5, 5. / numtotal)
        assert r.get_ileval_count_and_fraction() == (5, 5. / numtotal)
        output_lines = output.splitlines()
        assert len(output_lines) == 51
        selected_lines = []
        for x in [0, 1, 5, 6, 7, 8, 9, 47, 48, 49, 50]:
            selected_lines.append(output_lines[x])
        assert not show_diff(
            "\n".join(selected_lines), """\
 A   2 ATHR:33.85:-106.92:144.23:Favored:General
 A   3 ACYS:47.07:-132.54:137.26:Favored:General
 A   7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine
 A   7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine
 A   8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine
 A   8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine
 A   8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine
 A  43 AASP:51.81:-94.64:5.45:Favored:General
 A  43 BASP:56.98:-88.69:-0.12:Favored:General
 A  44  TYR:1.76:-133.10:58.75:Allowed:General
 A  45  ALA:57.37:-86.61:-8.57:Favored:General""")

    # Exercise 3: 2plx excerpt (unusual icode usage)
    import iotbx.pdb.hierarchy
    pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\
ATOM   1468  N   GLY A 219       3.721  21.322  10.752  1.00 14.12           N
ATOM   1469  CA  GLY A 219       3.586  21.486  12.188  1.00 14.85           C
ATOM   1470  C   GLY A 219       4.462  20.538  12.995  1.00 15.63           C
ATOM   1471  O   GLY A 219       5.513  20.090  12.512  1.00 14.55           O
ATOM   1472  N   CYS A 220       4.036  20.213  14.235  1.00 15.02           N
ATOM   1473  CA  CYS A 220       4.776  19.228  15.068  1.00 15.56           C
ATOM   1474  C   CYS A 220       3.773  18.322  15.741  1.00 14.69           C
ATOM   1475  O   CYS A 220       2.799  18.828  16.338  1.00 15.54           O
ATOM   1476  CB  CYS A 220       5.620  19.906  16.174  1.00 15.72           C
ATOM   1477  SG  CYS A 220       6.762  21.133  15.448  1.00 15.45           S
ATOM   1478  N   ALA A 221A      4.054  17.017  15.707  1.00 14.77           N
ATOM   1479  CA  ALA A 221A      3.274  16.015  16.507  1.00 14.01           C
ATOM   1480  C   ALA A 221A      1.774  15.992  16.099  1.00 14.50           C
ATOM   1481  O   ALA A 221A      0.875  15.575  16.881  1.00 14.46           O
ATOM   1482  CB  ALA A 221A      3.440  16.318  17.935  1.00 12.28           C
ATOM   1483  N   GLN A 221       1.523  16.390  14.848  1.00 14.52           N
ATOM   1484  CA  GLN A 221       0.159  16.391  14.325  1.00 15.19           C
ATOM   1485  C   GLN A 221      -0.229  15.044  13.717  1.00 14.43           C
ATOM   1486  O   GLN A 221       0.641  14.280  13.307  1.00 16.88           O
ATOM   1487  CB  GLN A 221       0.002  17.491  13.272  1.00 16.41           C
ATOM   1488  CG  GLN A 221       0.253  18.906  13.805  1.00 16.52           C
ATOM   1489  CD  GLN A 221      -0.640  19.181  14.995  1.00 17.87           C
ATOM   1490  OE1 GLN A 221      -1.857  19.399  14.826  1.00 13.54           O
ATOM   1491  NE2 GLN A 221      -0.050  19.149  16.228  1.00 16.18           N
ATOM   1492  N   LYS A 222      -1.537  14.773  13.694  1.00 14.34           N
ATOM   1493  CA  LYS A 222      -2.053  13.536  13.125  1.00 15.07           C
ATOM   1494  C   LYS A 222      -1.679  13.455  11.655  1.00 14.88           C
ATOM   1495  O   LYS A 222      -1.856  14.424  10.883  1.00 14.32           O
""")
    r = ramalyze.ramalyze(pdb_hierarchy=pdb_io.hierarchy, outliers_only=False)
    assert (len(r.results) == 3)
Пример #9
0
def exercise_ramalyze():
  from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir
  regression_pdb = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/jcm.pdb",
    test=os.path.isfile)
  if (regression_pdb is None):
    print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available"
    return
  if (find_rotarama_data_dir(optional=True) is None):
    print "Skipping exercise_ramalyze(): rotarama_data directory not available"
    return
  from iotbx import file_reader
  # Exercise 1
  pdb_in = file_reader.any_file(file_name=regression_pdb)
  hierarchy = pdb_in.file_object.hierarchy
  pdb_io = pdb.input(file_name=regression_pdb)
  hierarchy.atoms().reset_i_seq()
  r = ramalyze.ramalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=True)
  out = StringIO()
  r.show_old_output(out=out)
  output = out.getvalue()
  assert output.count("OUTLIER") == 100
  assert output.count("Favored") == 0
  assert output.count("Allowed") == 0
  assert output.count("General") == 64
  assert output.count("Glycine") == 6
  assert output.count("Trans-proline") == 1
  assert output.count("Cis-proline") == 0
  assert output.count("Pre-proline") == 4
  assert output.count("Isoleucine or valine") == 25
  assert (len(r.outlier_selection()) == 788)
  outlier_ids = set([])
  atoms = hierarchy.atoms()
  for i_seq in r.outlier_selection() :
    atom = atoms[i_seq]
    atom_group = atoms[i_seq].parent()
    outlier_ids.add(atom_group.id_str())
  outliers1 = sorted([ o.atom_group_id_str() for o in r.results ])
  outliers2 = sorted(list(outlier_ids))
  assert (outliers1 == outliers2)

  r = ramalyze.ramalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=False)
  for unpickle in [False, True] :
    if unpickle :
      r = loads(dumps(r))
    for outlier in r.results :
      assert (len(outlier.xyz) == 3)
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    output = out.getvalue()
    assert output.count("OUTLIER") == 100
    assert output.count("Favored") == 461
    assert output.count("Allowed") == 162
    assert output.count("General") == 513
    assert output.count("Glycine") == 39
    assert output.count("Trans-proline") == 23
    assert output.count("Cis-proline") == 0
    assert output.count("Pre-proline") == 21
    assert output.count("Isoleucine or valine") == 127
    numtotal = r.get_phi_psi_residues_count()
    assert r.get_outliers_count_and_fraction()  == (100, 100./numtotal)
    assert r.get_allowed_count_and_fraction()   == (162, 162./numtotal)
    assert r.get_favored_count_and_fraction()   == (461, 461./numtotal)
    assert r.get_general_count_and_fraction()   == (513, 513./numtotal)
    assert r.get_gly_count_and_fraction()       == (39, 39./numtotal)
    assert r.get_trans_pro_count_and_fraction() == (23, 23./numtotal)
    assert r.get_cis_pro_count_and_fraction()   == (0, 0./numtotal)
    assert r.get_prepro_count_and_fraction()    == (21, 21./numtotal)
    assert r.get_ileval_count_and_fraction()    == (127, 127./numtotal)
    assert numtotal == 75+154+494
    output_lines = output.splitlines()
    assert len(output_lines) == 723
    selected_lines = []
    for x in [0, 1, 168, 169, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722]:
      selected_lines.append(output_lines[x])
    assert not show_diff("\n".join(selected_lines), """\
 A  15  SER:35.07:-83.26:131.88:Favored:General
 A  16  SER:0.74:-111.53:71.36:Allowed:General
 A 191  ASP:2.66:-42.39:121.87:Favored:Pre-proline
 A 192  PRO:0.31:-39.12:-31.84:Allowed:Trans-proline
 B 368  LYS:56.44:-62.97:-53.28:Favored:General
 B 369  GLU:8.89:-44.36:-45.50:Favored:General
 B 370  LYS:40.00:-50.00:-39.06:Favored:General
 B 371  VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine
 B 372  LEU:0.02:-61.13:-170.23:OUTLIER:General
 B 373  ARG:0.02:60.09:-80.26:OUTLIER:General
 B 374  ALA:0.13:-37.21:-36.12:Allowed:General
 B 375  LEU:11.84:-89.81:-41.45:Favored:General
 B 376  ASN:84.33:-58.30:-41.39:Favored:General
 B 377  GLU:30.88:-56.79:-21.74:Favored:General""")
    assert (len(r.outlier_selection()) == 788)

  # Exercise 2
  regression_pdb = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/pdb1jxt.ent",
    test=os.path.isfile)
  pdb_in = file_reader.any_file(file_name=regression_pdb)
  hierarchy = pdb_in.file_object.hierarchy
  hierarchy.atoms().reset_i_seq()
  r = ramalyze.ramalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=True)
  out = StringIO()
  r.show_old_output(out=out)
  output = out.getvalue()
  assert output.count("Favored") == 0
  assert output.count("Allowed") == 0
  assert output.count("OUTLIER") == 0
  r = ramalyze.ramalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=False)
  for unpickle in [False, True] :
    if unpickle :
      r = loads(dumps(r))
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    output = out.getvalue()
    assert output.count("Favored") == 47
    assert output.count("Allowed") == 1
    assert output.count("OUTLIER") == 0
    assert output.count("General") == 27
    assert output.count("Glycine") == 4
    assert output.count("Trans-proline") == 4
    assert output.count("Cis-proline") == 0
    assert output.count("Pre-proline") == 5
    assert output.count("Isoleucine or valine") == 8
    numtotal = r.get_phi_psi_residues_count()
    assert r.get_outliers_count_and_fraction()  == (0, 0./numtotal)
    assert r.get_allowed_count_and_fraction()   == (1, 1./numtotal)
    assert r.get_favored_count_and_fraction()   == (47, 47./numtotal)
    assert r.get_general_count_and_fraction()   == (27, 27./numtotal)
    assert r.get_gly_count_and_fraction()       == (4, 4./numtotal)
    assert r.get_trans_pro_count_and_fraction() == (4, 4./numtotal)
    assert r.get_cis_pro_count_and_fraction()   == (0, 0./numtotal)
    assert r.get_prepro_count_and_fraction()    == (5, 5./numtotal)
    assert r.get_ileval_count_and_fraction()    == (8, 8./numtotal)
    output_lines = output.splitlines()
    assert len(output_lines) == 48
    selected_lines = []
    for x in [0, 1, 6, 7, 8, 9, 10, 44, 45, 46, 47]:
      selected_lines.append(output_lines[x])
    assert not show_diff("\n".join(selected_lines), """\
 A   2 ATHR:33.85:-106.92:144.23:Favored:General
 A   2 BTHR:37.07:-97.44:137.00:Favored:General
 A   7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine
 A   7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine
 A   8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine
 A   8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine
 A   8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine
 A  43 AASP:51.81:-94.64:5.45:Favored:General
 A  43 BASP:56.98:-88.69:-0.12:Favored:General
 A  44  TYR:1.76:-133.10:58.75:Allowed:General
 A  45  ALA:57.37:-86.61:-8.57:Favored:General""")

  # Exercise 3: 2plx excerpt (unusual icode usage)
  import iotbx.pdb.hierarchy
  pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\
ATOM   1468  N   GLY A 219       3.721  21.322  10.752  1.00 14.12           N
ATOM   1469  CA  GLY A 219       3.586  21.486  12.188  1.00 14.85           C
ATOM   1470  C   GLY A 219       4.462  20.538  12.995  1.00 15.63           C
ATOM   1471  O   GLY A 219       5.513  20.090  12.512  1.00 14.55           O
ATOM   1472  N   CYS A 220       4.036  20.213  14.235  1.00 15.02           N
ATOM   1473  CA  CYS A 220       4.776  19.228  15.068  1.00 15.56           C
ATOM   1474  C   CYS A 220       3.773  18.322  15.741  1.00 14.69           C
ATOM   1475  O   CYS A 220       2.799  18.828  16.338  1.00 15.54           O
ATOM   1476  CB  CYS A 220       5.620  19.906  16.174  1.00 15.72           C
ATOM   1477  SG  CYS A 220       6.762  21.133  15.448  1.00 15.45           S
ATOM   1478  N   ALA A 221A      4.054  17.017  15.707  1.00 14.77           N
ATOM   1479  CA  ALA A 221A      3.274  16.015  16.507  1.00 14.01           C
ATOM   1480  C   ALA A 221A      1.774  15.992  16.099  1.00 14.50           C
ATOM   1481  O   ALA A 221A      0.875  15.575  16.881  1.00 14.46           O
ATOM   1482  CB  ALA A 221A      3.440  16.318  17.935  1.00 12.28           C
ATOM   1483  N   GLN A 221       1.523  16.390  14.848  1.00 14.52           N
ATOM   1484  CA  GLN A 221       0.159  16.391  14.325  1.00 15.19           C
ATOM   1485  C   GLN A 221      -0.229  15.044  13.717  1.00 14.43           C
ATOM   1486  O   GLN A 221       0.641  14.280  13.307  1.00 16.88           O
ATOM   1487  CB  GLN A 221       0.002  17.491  13.272  1.00 16.41           C
ATOM   1488  CG  GLN A 221       0.253  18.906  13.805  1.00 16.52           C
ATOM   1489  CD  GLN A 221      -0.640  19.181  14.995  1.00 17.87           C
ATOM   1490  OE1 GLN A 221      -1.857  19.399  14.826  1.00 13.54           O
ATOM   1491  NE2 GLN A 221      -0.050  19.149  16.228  1.00 16.18           N
ATOM   1492  N   LYS A 222      -1.537  14.773  13.694  1.00 14.34           N
ATOM   1493  CA  LYS A 222      -2.053  13.536  13.125  1.00 15.07           C
ATOM   1494  C   LYS A 222      -1.679  13.455  11.655  1.00 14.88           C
ATOM   1495  O   LYS A 222      -1.856  14.424  10.883  1.00 14.32           O
""")
  r = ramalyze.ramalyze(
    pdb_hierarchy=pdb_io.hierarchy,
    outliers_only=False)
  assert (len(r.results) == 3)
Пример #10
0
def exercise_rotalyze():
  regression_pdb = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/jcm.pdb",
    test=os.path.isfile)
  if (regression_pdb is None):
    print "Skipping exercise_rotalyze(): input pdb (jcm.pdb) not available"
    return
  if (find_rotarama_data_dir(optional=True) is None):
    print "Skipping exercise_rotalyze(): rotarama_data directory not available"
    return
  pdb_in = file_reader.any_file(file_name=regression_pdb)
  hierarchy = pdb_in.file_object.hierarchy
  pdb_io = pdb.input(file_name=regression_pdb)
  r = rotalyze.rotalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=True)
  out = StringIO()
  r.show_old_output(out=out, verbose=False)
  output = out.getvalue()
  assert output.count("OUTLIER") == 246, output.count("OUTLIER")
  assert output.count(":") == 984, output.count(":")
  output_lines = output.splitlines()
  assert len(output_lines) == 123
  for lines in output_lines:
    assert float(lines[12:15]) <= 1.0

  r = rotalyze.rotalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=False)
  for unpickle in [False, True] :
    if unpickle :
      r = loads(dumps(r))
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    for outlier in r.results :
      assert (len(outlier.xyz) == 3)
    output = out.getvalue()
    assert output.count("OUTLIER") == 246
    assert output.count(":") == 5144, output.count(":")
    assert output.count("p") == 120
    assert output.count("m") == 324
    assert output.count("t") == 486
    output_lines = output.splitlines()
    #for line in output_lines:
    #  print line
    #STOP()
    assert len(output_lines) == 643
    line_indices = [0,1,2,42,43,168,169,450,587,394,641,642]

#    top500 version
    line_values = [
     " A  14  MET:1.00:3.3:29.2:173.3:287.9::Favored:ptm",
     " A  15  SER:1.00:0.1:229.0::::OUTLIER:OUTLIER",
     " A  16  SER:1.00:4.2:277.9::::Favored:m",
     " A  58  ASN:1.00:2.0:252.4:343.6:::Favored:m-20",
     " A  59  ILE:1.00:2.0:84.2:186.7:::Allowed:pt",
     " A 202  GLU:1.00:0.4:272.7:65.9:287.8::OUTLIER:OUTLIER",
     " A 203  ILE:1.00:5.0:292.9:199.6:::Favored:mt",
     " B 154  THR:1.00:0.1:356.0::::OUTLIER:OUTLIER",
     " B 316  TYR:1.00:5.4:153.7:68.6:::Favored:t80",
     " B  86  ASP:1.00:2.2:321.4:145.1:::Favored:m-20",
     " B 377  GLU:1.00:45.3:311.7:166.2:160.1::Favored:mt-10",
     " B 378  THR:1.00:23.5:309.4::::Favored:m"]
#    top8000 version
    line_values = [
     " A  14  MET:1.00:1.3:29.2:173.3:287.9::Allowed:ptm",
     " A  15  SER:1.00:0.1:229.0::::OUTLIER:OUTLIER",
     " A  16  SER:1.00:3.0:277.9::::Favored:m",
     " A  58  ASN:1.00:1.0:252.4:343.6:::Allowed:m-40",
     " A  59  ILE:1.00:0.5:84.2:186.7:::Allowed:pt",
     " A 202  GLU:1.00:0.0:272.7:65.9:287.8::OUTLIER:OUTLIER",
     " A 203  ILE:1.00:1.0:292.9:199.6:::Allowed:mt",
     " B 154  THR:1.00:0.0:356.0::::OUTLIER:OUTLIER",
     " B 316  TYR:1.00:4.1:153.7:68.6:::Favored:t80",
     " B  86  ASP:1.00:0.4:321.4:145.1:::Allowed:m-30",
     " B 377  GLU:1.00:15.0:311.7:166.2:160.1::Favored:mt-10",
     " B 378  THR:1.00:17.0:309.4::::Favored:m",
    ]
    for idx, val in zip(line_indices, line_values) :
      assert (output_lines[idx] == val), (idx, output_lines[idx])

  regression_pdb = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/pdb1jxt.ent",
    test=os.path.isfile)
  if (regression_pdb is None):
    print "Skipping exercise_ramalyze(): input pdb (pdb1jxt.ent) not available"
    return
  pdb_in = file_reader.any_file(file_name=regression_pdb)
  hierarchy = pdb_in.file_object.hierarchy
  pdb_io = pdb.input(file_name=regression_pdb)
  r = rotalyze.rotalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=True)
  out = StringIO()
  r.show_old_output(out=out, verbose=False)
  output = out.getvalue().strip()
  assert output == ""

  r = rotalyze.rotalyze(
    pdb_hierarchy=hierarchy,
    outliers_only=False)
  for unpickle in [False, True] :
    if unpickle :
      r = loads(dumps(r))
    out = StringIO()
    r.show_old_output(out=out, verbose=False)
    output = out.getvalue()
    assert not show_diff(output,"""\
 A   1  THR:1.00:95.4:299.5::::Favored:m
 A   2 ATHR:0.67:49.5:56.1::::Favored:p
 A   2 BTHR:0.33:90.4:298.1::::Favored:m
 A   3  CYS:1.00:12.9:310.5::::Favored:m
 A   4  CYS:1.00:91.6:293.1::::Favored:m
 A   5  PRO:1.00:78.8:30.2:319.7:33.8::Favored:Cg_endo
 A   6  SER:1.00:90.1:68.4::::Favored:p
 A   7 AILE:0.45:49.6:290.8:178.2:::Favored:mt
 A   7 BILE:0.55:6.5:284.4:298.4:::Favored:mm
 A   8 AVAL:0.50:1.1:156.7::::Allowed:t
 A   8 BVAL:0.30:5.1:71.3::::Favored:p
 A   8 CVAL:0.20:69.8:172.1::::Favored:t
 A  10 AARG:0.65:24.7:176.8:66.5:63.9:180.0:Favored:tpp-160
 A  10 BARG:0.35:17.5:176.8:72.8:66.4:171.9:Favored:tpp-160
 A  11  SER:1.00:51.6:300.9::::Favored:m
 A  12 AASN:0.50:93.9:286.1:343.8:::Favored:m-40
 A  12 BASN:0.50:98.9:288.4:337.6:::Favored:m-40
 A  13 APHE:0.65:45.1:187.2:276.4:::Favored:t80
 A  13 BPHE:0.35:86.1:179.6:263.1:::Favored:t80
 A  14  ASN:1.00:95.2:289.6:333.0:::Favored:m-40
 A  15  VAL:1.00:42.3:168.2::::Favored:t
 A  16  CYS:1.00:40.8:176.5::::Favored:t
 A  17  ARG:1.00:21.4:289.7:282.8:288.6:158.7:Favored:mmm160
 A  18  LEU:1.00:65.0:287.2:173.3:::Favored:mt
 A  19  PRO:1.00:43.6:24.4:324.8:31.6::Favored:Cg_endo
 A  21  THR:1.00:5.7:314.0::::Favored:m
 A  22 APRO:0.55:87.5:333.5:34.0:333.8::Favored:Cg_exo
 A  23 AGLU:0.50:86.9:290.9:187.1:341.8::Favored:mt-10
 A  23 BGLU:0.50:91.7:292.0:183.8:339.2::Favored:mt-10
 A  25 ALEU:0.50:95.7:294.4:173.6:::Favored:mt
 A  26  CYS:1.00:83.0:295.0::::Favored:m
 A  28  THR:1.00:29.6:52.9::::Favored:p
 A  29 ATYR:0.65:18.5:161.8:67.8:::Favored:t80
 A  29 BTYR:0.35:0.4:191.3:322.7:::Allowed:t80
 A  30 ATHR:0.70:60.8:57.4::::Favored:p
 A  30 BTHR:0.30:6.6:78.1::::Favored:p
 A  32  CYS:1.00:61.4:301.7::::Favored:m
 A  33  ILE:1.00:36.6:66.5:173.4:::Favored:pt
 A  34 AILE:0.70:60.9:303.6:167.6:::Favored:mt
 A  34 BILE:0.30:31.4:308.5:296.8:::Favored:mm
 A  35  ILE:1.00:45.6:62.4:170.0:::Favored:pt
 A  36  PRO:1.00:36.2:22.5:330.5:24.8::Favored:Cg_endo
 A  39 ATHR:0.70:14.0:311.0::::Favored:m
 A  39 BTHR:0.30:13.1:288.8::::Favored:m
 A  40  CYS:1.00:81.4:294.4::::Favored:m
 A  41  PRO:1.00:35.4:34.4:317.5:33.1::Favored:Cg_endo
 A  43 AASP:0.75:24.8:56.5:340.3:::Favored:p0
 A  43 BASP:0.25:43.2:59.6:349.3:::Favored:p0
 A  44  TYR:1.00:85.3:290.9:85.1:::Favored:m-80
 A  46  ASN:1.00:38.7:301.6:117.9:::Favored:m110
""")