Exemplo n.º 1
0
  def test_load_bonds_from_pdb(self):
    """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
    pdb = PDB()
    # Test that we can load CO2
    carbon_atom = Atom(element="C")
    oxygen_atom_1 = Atom(element="O")
    oxygen_atom_2 = Atom(element="O")

    pdb.add_new_atom(carbon_atom)
    pdb.add_new_atom(oxygen_atom_1)
    pdb.add_new_atom(oxygen_atom_2)
    lines = [
      "CONECT    1    2    3                                                 "
      "CONECT    2                                                           "
      "CONECT    3                                                           "
    ]
    with tempfile.NamedTemporaryFile() as temp:
      temp.write("\n".join(lines))
      temp.flush()
      pdb.load_bonds_from_pdb(temp.name)
    assert len(carbon_atom.indices_of_atoms_connecting) == 2
    assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
    assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0
    def test_load_bonds_from_pdb(self):
        """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
        pdb = PDB()
        # Test that we can load CO2
        carbon_atom = Atom(element="C")
        oxygen_atom_1 = Atom(element="O")
        oxygen_atom_2 = Atom(element="O")

        pdb.add_new_atom(carbon_atom)
        pdb.add_new_atom(oxygen_atom_1)
        pdb.add_new_atom(oxygen_atom_2)
        lines = [
            "CONECT    1    2    3                                                 "
            "CONECT    2                                                           "
            "CONECT    3                                                           "
        ]
        with tempfile.NamedTemporaryFile() as temp:
            temp.write("\n".join(lines))
            temp.flush()
            pdb.load_bonds_from_pdb(temp.name)
        assert len(carbon_atom.indices_of_atoms_connecting) == 2
        assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
        assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0
Exemplo n.º 3
0
class TestPDB(unittest.TestCase):
  """"
  Test PDB class.
  """

  def setUp(self):
    """
    Instantiate a dummy PDB file.
    """
    self.temp_dir = tempfile.mkdtemp()
    self.pdb = PDB()

    _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
        dir=self.temp_dir)

    self.prgr_pdb = PDB()
    prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
    prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
    self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

    self._1r5y_protein = PDB()
    _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
    _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt")
    self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)

    self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]



  def tearDown(self):
    """
    Delete temporary directory.
    """
    shutil.rmtree(self.temp_dir)

  def test_add_new_atom(self):
    """
    TestPDB: Verifies that new atoms can be added.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    empty_atom = Atom()
    self.pdb.add_new_atom(empty_atom)
    # Verify that we now have one atom
    assert len(self.pdb.all_atoms.keys()) == 1

  def test_get_residues(self):
    """
    TestPDB: Tests that all residues in PDB are identified.
    """
    residues = self.prgr_pdb.get_residues()
    # prgr.pdb has 280 unique residues
    assert len(residues.keys()) == 280
    prgr_residues = ["LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER",
        "ILE", "GLU", "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS",
        "ASP", "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU",
        "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU", "SER",
        "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU", "PRO", "GLY",
        "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP", "GLN", "ILE", "THR",
        "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "SER", "LEU", "MET",
        "VAL", "PHE", "GLY", "LEU", "GLY", "TRP", "ARG", "SER", "TYR", "LYS",
        "HIS", "VAL", "SER", "GLY", "GLN", "MET", "LEU", "TYR", "PHE", "ALA",
        "PRO", "ASP", "LEU", "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET",
        "LYS", "GLU", "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET",
        "TRP", "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU",
        "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS", "MET",
        "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR", "ILE", "PRO",
        "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG", "TYR", "ILE", "GLU",
        "LEU", "ALA", "ILE", "ARG", "ARG", "PHE", "TYR", "GLN", "LEU", "THR",
        "LYS", "LEU", "LEU", "ASP", "ASN", "LEU", "HIS", "ASP", "LEU", "VAL",
        "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR",
        "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU",
        "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA",
        "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
        "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN", "LEU",
        "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP",
        "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "ARG", "VAL",
        "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR",
        "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU",
        "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA",
        "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
        "VAL", "LYS", "PRO"]
    # Recall the keys have format RESNAME_RESNUMBER_CHAIN
    resnames = [reskey.split("_")[0].strip() for reskey in residues]
    resnames.sort()
    prgr_residues.sort()
    assert resnames == prgr_residues
    # prgr.pdb has 2749 unique atoms.
    atom_count = 0
    for (_, atom_indices) in residues.iteritems():
      atom_count += len(atom_indices)
    print atom_count
    assert atom_count == 2788

  def test_get_lysine_charges(self):
    """
    TestPDB: Test that lysine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    lysine_charges = self.prgr_pdb.get_lysine_charges(res_list)
    # prgr has 14 lysines.
    print len(lysine_charges)
    assert len(lysine_charges) == 14
    for charge in lysine_charges:
      # Lysine should be posistively charged
      assert charge.positive

  def test_get_arginine_charges(self):
    """
    TestPDB: Test that arginine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    arginine_charges = self.prgr_pdb.get_arginine_charges(res_list)
    # prgr has 10 arginines
    assert len(arginine_charges) == 10
    for charge in arginine_charges:
      # The guanidium in arginine should be positively charged.
      assert charge.positive

  def test_get_histidine_charges(self):
    """
    TestPDB: Test that histidine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    histidine_charges = self.prgr_pdb.get_histidine_charges(res_list)
    # prgr has 7 arginines
    assert len(histidine_charges) == 7
    for charge in histidine_charges:
      # The nitrogens pick up positive charges
      assert charge.positive

  def test_get_glutamic_acid_charges(self):
    """
    TestPDB: Test that glutamic acid charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges(res_list)
    assert len(glutamic_acid_charges) == 16
    for charge in glutamic_acid_charges:
      # The carboxyls get deprotonated.
      assert not charge.positive

  def test_get_aspartic_acid_charges(self):
    """
    TestPDB: Test that aspartic acid charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges(res_list)
    assert len(aspartic_acid_charges) == 9
    for charge in aspartic_acid_charges:
      # The carboxyls get deprotonated
      assert not charge.positive

  def test_assign_ligand_aromatics(self):
    """
    TestPDB: Test that non-protein aromatic rings are assigned correctly.
    """
    ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
    ### atoms without residues labelled. This triggered some problems with
    ### non-protein aromatics complaining.
    # TODO(rbharath): Add a stub here.
    _3ao4_protein = PDB()
    _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
    _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt")
    _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)

  def test_remove_redundant_rings(self):
    """
    TestPDB: Test that redundant rings are removed.
    """
    # Recall that each ring is represented as a list of atom indices.
    # Test that rings of length 0 are removed
    assert remove_redundant_rings([[]]) == []
    # Set that supersets are removed
    assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5], [1, 2, 3, 4, 5]])
        == [[1, 2, 3], [1, 3, 4, 5]])
    # Ensure that duplicate rings are handled correctly (that is, only one
    # copy of a duplicate ring should remain)
    assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]]

  def test_assign_protein_aromatics(self):
    """
    TestPDB: Test that aromatic rings are assigned correctly.
    """
    for name, protein in self.proteins:
      # The proteins should have aromatic rings assigned already by
      # load_from_files()
      print "Processing aromatics for %s" % name
      for aromatic in protein.aromatic_rings:
        assert aromatic is not None

  def test_get_phenylalanine_aromatics(self):
    """
    TestPDB: Test that phenylalanine aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    phenylalanine_aromatics = (
        self.prgr_pdb.get_phenylalanine_aromatics(res_list))

    # prgr has 13 phenylalanines, each of which has 1 aromatic ring.
    assert len(phenylalanine_aromatics) == 13
    for aromatic in phenylalanine_aromatics:
      # The aromatic rings in phenylalanine have 6 elements each
      assert len(aromatic.indices) == 6

  def test_get_tyrosine_aromatics(self):
    """
    TestPDB: Test that tyrosine aromatic rings are retrieved.
    """
    # prgr has 10 tyrosines, each of which has 1 aromatic ring.
    res_list = self.prgr_pdb.get_residues()
    tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list)
    assert len(tyrosine_aromatics) == 10
    for aromatic in tyrosine_aromatics:
      # The aromatic rings in tyrosine have 6 elements each
      assert len(aromatic.indices) == 6

  def test_get_histidine_aromatics(self):
    """
    TestPDB: Test that histidine aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list)
    # prgr has 7 histidines, each of which has 1 aromatic ring.
    assert len(histidine_aromatics) == 7
    for aromatic in histidine_aromatics:
      # The aromatic rings in histidine have 6 elements each
      print len(aromatic.indices)
      assert len(aromatic.indices) == 5

  def test_get_tryptophan_aromatics(self):
    """
    TestPDB: Test that tryptophan aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list)
    # prgr has 5 tryptophans, each of which has 2 aromatic ring.
    print len(tryptophan_aromatics)
    assert len(tryptophan_aromatics) == 10 
    num_five_rings, num_six_rings = 0, 0
    for aromatic in tryptophan_aromatics:
      # One aromatic ring in tryptophan hahas 6 elements each,
      # while the other has 5 elements.
      if len(aromatic.indices) == 6:
        num_six_rings += 1
      elif len(aromatic.indices) == 5:
        num_five_rings += 1
    assert num_six_rings == 5
    assert num_five_rings == 5

  def test_connected_atoms(self):
    """
    TestPDB: Verifies that connected atom retrieval works.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    carbon_atom = Atom(element="C")
    oxygen_atom = Atom(element="O")
    hydrogen_atom = Atom(element="H")

    self.pdb.add_new_atom(carbon_atom)
    self.pdb.add_new_atom(oxygen_atom)
    self.pdb.add_new_atom(hydrogen_atom)

    # We want a carboxyl, so C connects O and H
    carbon_atom.indices_of_atoms_connecting = [2, 3]
    oxygen_atom.indices_of_atoms_connecting = [1]
    hydrogen_atom.indices_of_atoms_connecting = [1]

    connected_oxygens = self.pdb.connected_atoms(1, "O")
    assert len(connected_oxygens) == 1

    connected_hydrogens = self.pdb.connected_atoms(1, "H")
    assert len(connected_hydrogens) == 1

  def test_load_bonds_from_pdb(self):
    """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
    pdb = PDB()
    # Test that we can load CO2
    carbon_atom = Atom(element="C")
    oxygen_atom_1 = Atom(element="O")
    oxygen_atom_2 = Atom(element="O")

    pdb.add_new_atom(carbon_atom)
    pdb.add_new_atom(oxygen_atom_1)
    pdb.add_new_atom(oxygen_atom_2)
    lines = [
      "CONECT    1    2    3                                                 "
      "CONECT    2                                                           "
      "CONECT    3                                                           "
    ]
    with tempfile.NamedTemporaryFile() as temp:
      temp.write("\n".join(lines))
      temp.flush()
      pdb.load_bonds_from_pdb(temp.name)
    assert len(carbon_atom.indices_of_atoms_connecting) == 2
    assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
    assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0


  def test_connected_heavy_atoms(self):
    """
    TestPDB: Verifies retrieval of connected heavy atoms.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    carbon_atom = Atom(element="C")
    oxygen_atom = Atom(element="O")
    hydrogen_atom = Atom(element="H")

    self.pdb.add_new_atom(carbon_atom)
    self.pdb.add_new_atom(oxygen_atom)
    self.pdb.add_new_atom(hydrogen_atom)

    # We want a carboxyl, so C connects O and H
    carbon_atom.indices_of_atoms_connecting = [2, 3]
    oxygen_atom.indices_of_atoms_connecting = [1]
    hydrogen_atom.indices_of_atoms_connecting = [1]

    connected_heavy_atoms = self.pdb.connected_heavy_atoms(1)
    assert len(connected_heavy_atoms) == 1
    assert connected_heavy_atoms[0] == 2

  def test_assign_non_protein_charges(self):
    """
    TestPDB: Verify that charges are properly added to ligands.
    """
    # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
    # There should be 3 charged groups, two positive, one negative
    ammonium_sulfate_pdb = PDB()
    ammonium_sulfate_pdb_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdb")
    ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdbqt")
    # Notice that load automatically identifies non-protein charges.
    ammonium_sulfate_pdb.load_from_files(
        ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
    assert len(ammonium_sulfate_pdb.charges) == 3
    num_pos, num_neg = 0, 0
    for charge in ammonium_sulfate_pdb.charges:
      if charge.positive:
        num_pos += 1
      else:
        num_neg += 1
    assert num_pos == 2
    assert num_neg == 1

  def test_metallic_charges(self):
    """
    TestPDB: Verify that non-protein charges are assigned properly.
    """
    # Test metallic ion charge.
    magnesium_pdb = PDB()
    magnesium_atom = Atom(element="MG",
        coordinates=Point(coords=np.array([0,0,0])))
    magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
    metallic_charges = magnesium_pdb.identify_metallic_charges()
    assert len(metallic_charges) == 1

  def test_nitrogen_charges(self):
    """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
    # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
    # The labeling should pick up 2 charged nitrogen groups for two
    # ammoniums.
    ammonium_sulfate_pdb = PDB()
    ammonium_sulfate_pdb_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdb")
    ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdbqt")
    ammonium_sulfate_pdb.load_from_files(
        ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
    nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 2
    assert nitrogen_charges[0].positive  # Should be positive
    assert nitrogen_charges[1].positive  # Should be positive

    # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
    # hybridized, so is likely to pick up an extra proton to its nitrogen
    # at physiological pH.
    pyrrolidine_pdb = PDB()
    pyrrolidine_pdb_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdb")
    pyrrolidine_pdbqt_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdbqt")
    pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
        pyrrolidine_pdbqt_path)
    nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 1
    assert nitrogen_charges[0].positive  # Should be positive

  def test_carbon_charges(self):
    """
    TestPDB: Verify that carbon groups are charged correctly.
    """
    # Guanidine is positively charged at physiological pH
    guanidine_pdb = PDB()
    guanidine_pdb_path = os.path.join(data_dir(),
        "guanidine_hyd.pdb")
    guanidine_pdbqt_path = os.path.join(data_dir(),
        "guanidine_hyd.pdbqt")
    guanidine_pdb.load_from_files(
        guanidine_pdb_path, guanidine_pdbqt_path)
    carbon_charges = guanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # sulfaguanidine contains a guanidine group that is likely to be
    # positively protonated at physiological pH
    sulfaguanidine_pdb = PDB()
    sulfaguanidine_pdb_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdb")
    sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdbqt")
    sulfaguanidine_pdb.load_from_files(
        sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path)
    carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # Formic acid is a carboxylic acid, which should be negatively charged.
    formic_acid_pdb = PDB()
    formic_acid_pdb_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdb")
    formic_acid_pdbqt_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdbqt")
    formic_acid_pdb.load_from_files(
        formic_acid_pdb_path, formic_acid_pdbqt_path)
    carbon_charges = formic_acid_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert not carbon_charges[0].positive  # Should be negatively charged.

  def test_phosphorus_charges(self):
    """
    TestPDB: Verify that Phosphorus groups are charged correctly.
    """
    # CID82671 contains a phosphate between two aromatic groups.
    phosphate_pdb = PDB()
    phosphate_pdb_path = os.path.join(data_dir(),
      "82671_hyd.pdb")
    phosphate_pdbqt_path = os.path.join(data_dir(),
      "82671_hyd.pdb")
    phosphate_pdb.load_from_files(
        phosphate_pdb_path, phosphate_pdbqt_path)
    phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
    assert len(phosphorus_charges) == 1
    assert not phosphorus_charges[0].positive  # Should be negatively charged.


  def test_sulfur_charges(self):
    """
    TestPDB: Verify that sulfur groups are charged correctly.
    """
    triflic_acid_pdb = PDB()
    triflic_acid_pdb_path = os.path.join(data_dir(),
      "triflic_acid_hyd.pdb")
    triflic_acid_pdbqt_path = os.path.join(data_dir(),
      "triflic_acid_hyd.pdbqt")
    triflic_acid_pdb.load_from_files(
      triflic_acid_pdb_path,
      triflic_acid_pdbqt_path)
    sulfur_charges = (
        triflic_acid_pdb.identify_sulfur_charges())
    assert len(sulfur_charges) == 1
    assert not sulfur_charges[0].positive  # Should be negatively charged.


  def test_ligand_assign_aromatics(self):
    """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
    benzene_pdb = PDB()
    benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
    benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
    benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

    # A benzene should have exactly one aromatic ring.
    print benzene_pdb.aromatic_rings
    assert len(benzene_pdb.aromatic_rings) == 1
    # The first 6 atoms in the benzene pdb form the aromatic ring.
    assert (set(benzene_pdb.aromatic_rings[0].indices)
         == set([1,2,3,4,5,6]))

  def test_assign_secondary_structure(self):
    """
    TestPDB: Verify that secondary structure is assigned meaningfully.
    """
    # TODO(rbharath): This test is just a stub. Add a more realistic test
    # that checks that nontrivial secondary structure is computed correctly
    # here.
    self.prgr_pdb.assign_secondary_structure()
    

  def test_get_structure_dict(self):
    """
    TestPDB: Verify that dict with rudimentary structure labels is generated.

    TODO(rbharath): This is just a stub. Add some nontrivial tests here.
    """
    structures = self.prgr_pdb.get_structure_dict()
    print structures
    print len(structures)
class TestPDB(unittest.TestCase):
    """"
  Test PDB class.
  """
    def setUp(self):
        """
    Instantiate a dummy PDB file.
    """
        self.temp_dir = tempfile.mkdtemp()
        self.pdb = PDB()

        _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
                                                dir=self.temp_dir)

        self.prgr_pdb = PDB()
        prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
        prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
        self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

        self._1r5y_protein = PDB()
        _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
        _1r5y_protein_pdbqt = os.path.join(data_dir(),
                                           "1r5y_protein_hyd.pdbqt")
        self._1r5y_protein.load_from_files(_1r5y_protein_pdb,
                                           _1r5y_protein_pdbqt)

        self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]

    def tearDown(self):
        """
    Delete temporary directory.
    """
        shutil.rmtree(self.temp_dir)

    def test_add_new_atom(self):
        """
    TestPDB: Verifies that new atoms can be added.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        empty_atom = Atom()
        self.pdb.add_new_atom(empty_atom)
        # Verify that we now have one atom
        assert len(self.pdb.all_atoms.keys()) == 1

    def test_get_residues(self):
        """
    TestPDB: Tests that all residues in PDB are identified.
    """
        residues = self.prgr_pdb.get_residues()
        # prgr.pdb has 280 unique residues
        assert len(residues.keys()) == 280
        prgr_residues = [
            "LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER", "ILE", "GLU",
            "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS", "ASP",
            "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU",
            "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU",
            "SER", "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU",
            "PRO", "GLY", "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP",
            "GLN", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP",
            "MET", "SER", "LEU", "MET", "VAL", "PHE", "GLY", "LEU", "GLY",
            "TRP", "ARG", "SER", "TYR", "LYS", "HIS", "VAL", "SER", "GLY",
            "GLN", "MET", "LEU", "TYR", "PHE", "ALA", "PRO", "ASP", "LEU",
            "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET", "LYS", "GLU",
            "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET", "TRP",
            "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU",
            "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS",
            "MET", "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR",
            "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG",
            "TYR", "ILE", "GLU", "LEU", "ALA", "ILE", "ARG", "ARG", "PHE",
            "TYR", "GLN", "LEU", "THR", "LYS", "LEU", "LEU", "ASP", "ASN",
            "LEU", "HIS", "ASP", "LEU", "VAL", "LYS", "GLN", "LEU", "HIS",
            "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN",
            "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO",
            "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA",
            "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
            "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN",
            "LEU", "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR",
            "SER", "TRP", "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY",
            "LEU", "ARG", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR",
            "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG",
            "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET",
            "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU",
            "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS",
            "PRO"
        ]
        # Recall the keys have format RESNAME_RESNUMBER_CHAIN
        resnames = [reskey.split("_")[0].strip() for reskey in residues]
        resnames.sort()
        prgr_residues.sort()
        assert resnames == prgr_residues
        # prgr.pdb has 2749 unique atoms.
        atom_count = 0
        for (_, atom_indices) in residues.iteritems():
            atom_count += len(atom_indices)
        print atom_count
        assert atom_count == 2788

    def test_get_lysine_charges(self):
        """
    TestPDB: Test that lysine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        lysine_charges = self.prgr_pdb.get_lysine_charges(res_list)
        # prgr has 14 lysines.
        print len(lysine_charges)
        assert len(lysine_charges) == 14
        for charge in lysine_charges:
            # Lysine should be posistively charged
            assert charge.positive

    def test_get_arginine_charges(self):
        """
    TestPDB: Test that arginine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        arginine_charges = self.prgr_pdb.get_arginine_charges(res_list)
        # prgr has 10 arginines
        assert len(arginine_charges) == 10
        for charge in arginine_charges:
            # The guanidium in arginine should be positively charged.
            assert charge.positive

    def test_get_histidine_charges(self):
        """
    TestPDB: Test that histidine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        histidine_charges = self.prgr_pdb.get_histidine_charges(res_list)
        # prgr has 7 arginines
        assert len(histidine_charges) == 7
        for charge in histidine_charges:
            # The nitrogens pick up positive charges
            assert charge.positive

    def test_get_glutamic_acid_charges(self):
        """
    TestPDB: Test that glutamic acid charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges(
            res_list)
        assert len(glutamic_acid_charges) == 16
        for charge in glutamic_acid_charges:
            # The carboxyls get deprotonated.
            assert not charge.positive

    def test_get_aspartic_acid_charges(self):
        """
    TestPDB: Test that aspartic acid charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges(
            res_list)
        assert len(aspartic_acid_charges) == 9
        for charge in aspartic_acid_charges:
            # The carboxyls get deprotonated
            assert not charge.positive

    def test_assign_ligand_aromatics(self):
        """
    TestPDB: Test that non-protein aromatic rings are assigned correctly.
    """
        ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
        ### atoms without residues labelled. This triggered some problems with
        ### non-protein aromatics complaining.
        # TODO(rbharath): Add a stub here.
        _3ao4_protein = PDB()
        _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
        _3ao4_protein_pdbqt = os.path.join(data_dir(),
                                           "3ao4_protein_hyd.pdbqt")
        _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)

    def test_remove_redundant_rings(self):
        """
    TestPDB: Test that redundant rings are removed.
    """
        # Recall that each ring is represented as a list of atom indices.
        # Test that rings of length 0 are removed
        assert remove_redundant_rings([[]]) == []
        # Set that supersets are removed
        assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5],
                                        [1, 2, 3, 4, 5]]) == [[1, 2, 3],
                                                              [1, 3, 4, 5]])
        # Ensure that duplicate rings are handled correctly (that is, only one
        # copy of a duplicate ring should remain)
        assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]]

    def test_assign_protein_aromatics(self):
        """
    TestPDB: Test that aromatic rings are assigned correctly.
    """
        for name, protein in self.proteins:
            # The proteins should have aromatic rings assigned already by
            # load_from_files()
            print "Processing aromatics for %s" % name
            for aromatic in protein.aromatic_rings:
                assert aromatic is not None

    def test_get_phenylalanine_aromatics(self):
        """
    TestPDB: Test that phenylalanine aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        phenylalanine_aromatics = (
            self.prgr_pdb.get_phenylalanine_aromatics(res_list))

        # prgr has 13 phenylalanines, each of which has 1 aromatic ring.
        assert len(phenylalanine_aromatics) == 13
        for aromatic in phenylalanine_aromatics:
            # The aromatic rings in phenylalanine have 6 elements each
            assert len(aromatic.indices) == 6

    def test_get_tyrosine_aromatics(self):
        """
    TestPDB: Test that tyrosine aromatic rings are retrieved.
    """
        # prgr has 10 tyrosines, each of which has 1 aromatic ring.
        res_list = self.prgr_pdb.get_residues()
        tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list)
        assert len(tyrosine_aromatics) == 10
        for aromatic in tyrosine_aromatics:
            # The aromatic rings in tyrosine have 6 elements each
            assert len(aromatic.indices) == 6

    def test_get_histidine_aromatics(self):
        """
    TestPDB: Test that histidine aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list)
        # prgr has 7 histidines, each of which has 1 aromatic ring.
        assert len(histidine_aromatics) == 7
        for aromatic in histidine_aromatics:
            # The aromatic rings in histidine have 6 elements each
            print len(aromatic.indices)
            assert len(aromatic.indices) == 5

    def test_get_tryptophan_aromatics(self):
        """
    TestPDB: Test that tryptophan aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list)
        # prgr has 5 tryptophans, each of which has 2 aromatic ring.
        print len(tryptophan_aromatics)
        assert len(tryptophan_aromatics) == 10
        num_five_rings, num_six_rings = 0, 0
        for aromatic in tryptophan_aromatics:
            # One aromatic ring in tryptophan hahas 6 elements each,
            # while the other has 5 elements.
            if len(aromatic.indices) == 6:
                num_six_rings += 1
            elif len(aromatic.indices) == 5:
                num_five_rings += 1
        assert num_six_rings == 5
        assert num_five_rings == 5

    def test_connected_atoms(self):
        """
    TestPDB: Verifies that connected atom retrieval works.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        carbon_atom = Atom(element="C")
        oxygen_atom = Atom(element="O")
        hydrogen_atom = Atom(element="H")

        self.pdb.add_new_atom(carbon_atom)
        self.pdb.add_new_atom(oxygen_atom)
        self.pdb.add_new_atom(hydrogen_atom)

        # We want a carboxyl, so C connects O and H
        carbon_atom.indices_of_atoms_connecting = [2, 3]
        oxygen_atom.indices_of_atoms_connecting = [1]
        hydrogen_atom.indices_of_atoms_connecting = [1]

        connected_oxygens = self.pdb.connected_atoms(1, "O")
        assert len(connected_oxygens) == 1

        connected_hydrogens = self.pdb.connected_atoms(1, "H")
        assert len(connected_hydrogens) == 1

    def test_load_bonds_from_pdb(self):
        """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
        pdb = PDB()
        # Test that we can load CO2
        carbon_atom = Atom(element="C")
        oxygen_atom_1 = Atom(element="O")
        oxygen_atom_2 = Atom(element="O")

        pdb.add_new_atom(carbon_atom)
        pdb.add_new_atom(oxygen_atom_1)
        pdb.add_new_atom(oxygen_atom_2)
        lines = [
            "CONECT    1    2    3                                                 "
            "CONECT    2                                                           "
            "CONECT    3                                                           "
        ]
        with tempfile.NamedTemporaryFile() as temp:
            temp.write("\n".join(lines))
            temp.flush()
            pdb.load_bonds_from_pdb(temp.name)
        assert len(carbon_atom.indices_of_atoms_connecting) == 2
        assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
        assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0

    def test_connected_heavy_atoms(self):
        """
    TestPDB: Verifies retrieval of connected heavy atoms.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        carbon_atom = Atom(element="C")
        oxygen_atom = Atom(element="O")
        hydrogen_atom = Atom(element="H")

        self.pdb.add_new_atom(carbon_atom)
        self.pdb.add_new_atom(oxygen_atom)
        self.pdb.add_new_atom(hydrogen_atom)

        # We want a carboxyl, so C connects O and H
        carbon_atom.indices_of_atoms_connecting = [2, 3]
        oxygen_atom.indices_of_atoms_connecting = [1]
        hydrogen_atom.indices_of_atoms_connecting = [1]

        connected_heavy_atoms = self.pdb.connected_heavy_atoms(1)
        assert len(connected_heavy_atoms) == 1
        assert connected_heavy_atoms[0] == 2

    def test_assign_non_protein_charges(self):
        """
    TestPDB: Verify that charges are properly added to ligands.
    """
        # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
        # There should be 3 charged groups, two positive, one negative
        ammonium_sulfate_pdb = PDB()
        ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                                 "ammonium_sulfate_hyd.pdb")
        ammonium_sulfate_pdbqt_path = os.path.join(
            data_dir(), "ammonium_sulfate_hyd.pdbqt")
        # Notice that load automatically identifies non-protein charges.
        ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                             ammonium_sulfate_pdbqt_path)
        assert len(ammonium_sulfate_pdb.charges) == 3
        num_pos, num_neg = 0, 0
        for charge in ammonium_sulfate_pdb.charges:
            if charge.positive:
                num_pos += 1
            else:
                num_neg += 1
        assert num_pos == 2
        assert num_neg == 1

    def test_metallic_charges(self):
        """
    TestPDB: Verify that non-protein charges are assigned properly.
    """
        # Test metallic ion charge.
        magnesium_pdb = PDB()
        magnesium_atom = Atom(element="MG",
                              coordinates=Point(coords=np.array([0, 0, 0])))
        magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
        metallic_charges = magnesium_pdb.identify_metallic_charges()
        assert len(metallic_charges) == 1

    def test_nitrogen_charges(self):
        """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
        # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
        # The labeling should pick up 2 charged nitrogen groups for two
        # ammoniums.
        ammonium_sulfate_pdb = PDB()
        ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                                 "ammonium_sulfate_hyd.pdb")
        ammonium_sulfate_pdbqt_path = os.path.join(
            data_dir(), "ammonium_sulfate_hyd.pdbqt")
        ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                             ammonium_sulfate_pdbqt_path)
        nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 2
        assert nitrogen_charges[0].positive  # Should be positive
        assert nitrogen_charges[1].positive  # Should be positive

        # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
        # hybridized, so is likely to pick up an extra proton to its nitrogen
        # at physiological pH.
        pyrrolidine_pdb = PDB()
        pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb")
        pyrrolidine_pdbqt_path = os.path.join(data_dir(),
                                              "pyrrolidine_hyd.pdbqt")
        pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
                                        pyrrolidine_pdbqt_path)
        nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 1
        assert nitrogen_charges[0].positive  # Should be positive

    def test_carbon_charges(self):
        """
    TestPDB: Verify that carbon groups are charged correctly.
    """
        # Guanidine is positively charged at physiological pH
        guanidine_pdb = PDB()
        guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb")
        guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt")
        guanidine_pdb.load_from_files(guanidine_pdb_path, guanidine_pdbqt_path)
        carbon_charges = guanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # sulfaguanidine contains a guanidine group that is likely to be
        # positively protonated at physiological pH
        sulfaguanidine_pdb = PDB()
        sulfaguanidine_pdb_path = os.path.join(data_dir(),
                                               "sulfaguanidine_hyd.pdb")
        sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
                                                 "sulfaguanidine_hyd.pdbqt")
        sulfaguanidine_pdb.load_from_files(sulfaguanidine_pdb_path,
                                           sulfaguanidine_pdbqt_path)
        carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # Formic acid is a carboxylic acid, which should be negatively charged.
        formic_acid_pdb = PDB()
        formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb")
        formic_acid_pdbqt_path = os.path.join(data_dir(),
                                              "formic_acid_hyd.pdbqt")
        formic_acid_pdb.load_from_files(formic_acid_pdb_path,
                                        formic_acid_pdbqt_path)
        carbon_charges = formic_acid_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert not carbon_charges[0].positive  # Should be negatively charged.

    def test_phosphorus_charges(self):
        """
    TestPDB: Verify that Phosphorus groups are charged correctly.
    """
        # CID82671 contains a phosphate between two aromatic groups.
        phosphate_pdb = PDB()
        phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb")
        phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb")
        phosphate_pdb.load_from_files(phosphate_pdb_path, phosphate_pdbqt_path)
        phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
        assert len(phosphorus_charges) == 1
        assert not phosphorus_charges[
            0].positive  # Should be negatively charged.

    def test_sulfur_charges(self):
        """
    TestPDB: Verify that sulfur groups are charged correctly.
    """
        triflic_acid_pdb = PDB()
        triflic_acid_pdb_path = os.path.join(data_dir(),
                                             "triflic_acid_hyd.pdb")
        triflic_acid_pdbqt_path = os.path.join(data_dir(),
                                               "triflic_acid_hyd.pdbqt")
        triflic_acid_pdb.load_from_files(triflic_acid_pdb_path,
                                         triflic_acid_pdbqt_path)
        sulfur_charges = (triflic_acid_pdb.identify_sulfur_charges())
        assert len(sulfur_charges) == 1
        assert not sulfur_charges[0].positive  # Should be negatively charged.

    def test_ligand_assign_aromatics(self):
        """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
        benzene_pdb = PDB()
        benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
        benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
        benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

        # A benzene should have exactly one aromatic ring.
        print benzene_pdb.aromatic_rings
        assert len(benzene_pdb.aromatic_rings) == 1
        # The first 6 atoms in the benzene pdb form the aromatic ring.
        assert (set(benzene_pdb.aromatic_rings[0].indices) == set(
            [1, 2, 3, 4, 5, 6]))

    def test_assign_secondary_structure(self):
        """
    TestPDB: Verify that secondary structure is assigned meaningfully.
    """
        # TODO(rbharath): This test is just a stub. Add a more realistic test
        # that checks that nontrivial secondary structure is computed correctly
        # here.
        self.prgr_pdb.assign_secondary_structure()

    def test_get_structure_dict(self):
        """
    TestPDB: Verify that dict with rudimentary structure labels is generated.

    TODO(rbharath): This is just a stub. Add some nontrivial tests here.
    """
        structures = self.prgr_pdb.get_structure_dict()
        print structures
        print len(structures)