def test_records(): """Test private _read_pdb.""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=["HETATM"]) with open(OUTFILE, "r") as f: f1 = f.read() os.remove(OUTFILE) assert f1 == hetatm
def test_anisou(): """Test writing ANISOU entries.""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME2) ppdb.to_pdb(path=OUTFILE, records=None) with open(OUTFILE, "r") as f: f1 = f.read() # os.remove(OUTFILE) assert f1 == four_eiy
def test_defaults(): ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=None) with open(TESTDATA_FILENAME, "r") as f: f1 = f.read() with open(OUTFILE, "r") as f: f2 = f.read() assert f1 == f2 os.remove(OUTFILE)
def test_fetch_pdb(): """Test fetch_pdb""" try: ppdb = PandasPDB() txt = ppdb._fetch_pdb('3eiy') except HTTPError: pass if txt: # skip if PDB down txt[:100] == three_eiy[:100] ppdb.fetch_pdb('3eiy') assert ppdb.pdb_text == txt
def test_fetch_pdb(): """Test fetch_pdb""" try: ppdb = PandasPDB() txt = ppdb._fetch_pdb('3eiy') except HTTPError: pass if txt: # skip if PDB down txt[:100] == three_eiy[:100] ppdb.fetch_pdb('3eiy') assert ppdb.pdb_text == txt txt = ppdb._fetch_pdb('3ey') err = "We're sorry, but the requested file is not available" assert err in txt
def _gen_anisou(self): ''' Generate information to mimic the ANISOU part of a pdb file. Not implemented because not needed as of current version (1/31/2017) ''' self.df = PandasPDB().read_pdb('./test.pdb').df['ANISOU'] return
def test__construct_df(): """Test pandas dataframe construction""" ppdb = PandasPDB() dfs = ppdb._construct_df(three_eiy.splitlines()) assert set(dfs.keys()) == {'OTHERS', 'ATOM', 'ANISOU', 'HETATM'} assert set(dfs['ATOM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['HETATM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['ANISOU'].columns) == set(ANISOU_DF_COLUMNS) exp = pd.Series(np.array(['ATOM', 1, '', 'N', '', 'SER', '', 'A', 2, '', '', 2.527, 54.656, -1.667, 1.0, 52.73, '', '', 'N', None, 609]), index=['record_name', 'atom_number', 'blank_1', 'atom_name', 'alt_loc', 'residue_name', 'blank_2', 'chain_id', 'residue_number', 'insertion', 'blank_3', 'x_coord', 'y_coord', 'z_coord', 'occupancy', 'b_factor', 'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx']) assert exp.equals(dfs['ATOM'].loc[0, :])
def _gen_others(self): ''' Essentialy the 'meta information' at the top of pdb files Will add 'OTHER' information to self.df dict Returns: None ''' # essentially load the headers from the test.pdb file # which is PDB with some things chagned to make it look obviously wrong self.df['OTHERS'] = PandasPDB().read_pdb('./test.pdb').df['OTHERS'] return
def test__construct_df(): """Test pandas dataframe construction""" ppdb = PandasPDB() dfs = ppdb._construct_df(three_eiy.splitlines()) assert set(dfs.keys()) == {'OTHERS', 'ATOM', 'ANISOU', 'HETATM'} assert set(dfs['ATOM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['HETATM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['ANISOU'].columns) == set(ANISOU_DF_COLUMNS) exp = pd.Series(np.array([ 'ATOM', 1, '', 'N', '', 'SER', '', 'A', 2, '', '', 2.527, 54.656, -1.667, 1.0, 52.73, '', '', 'N', None, 609 ]), index=[ 'record_name', 'atom_number', 'blank_1', 'atom_name', 'alt_loc', 'residue_name', 'blank_2', 'chain_id', 'residue_number', 'insertion', 'blank_3', 'x_coord', 'y_coord', 'z_coord', 'occupancy', 'b_factor', 'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx' ]) assert exp.equals(dfs['ATOM'].loc[0, :])
def test_get_df(): ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) shape = ppdb.get('c-alpha').shape assert shape == (174, 21), shape shape = ppdb.get('hydrogen', invert=True).shape assert shape == (1330, 21), shape shape = ppdb.get('hydrogen').shape assert shape == (0, 21), shape shape = ppdb.get('main chain').shape assert shape == (696, 21), shape shape = ppdb.get('heavy').shape assert shape == (1330, 21), shape shape = ppdb.get('carbon').shape assert shape == (473, 21), shape
def test_anisou(): """Test writing ANISOU entries.""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME2) ppdb.to_pdb(path=OUTFILE, records=None) with open(OUTFILE, 'r') as f: f1 = f.read() # os.remove(OUTFILE) assert f1 == four_eiy
def test_records(): """Test private _read_pdb.""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=['HETATM']) with open(OUTFILE, 'r') as f: f1 = f.read() os.remove(OUTFILE) assert f1 == hetatm
def test_defaults(): ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=None) with open(TESTDATA_FILENAME, 'r') as f: f1 = f.read() with open(OUTFILE, 'r') as f: f2 = f.read() assert f1 == f2 os.remove(OUTFILE)
def check_structure_exists(name): if not name: raise ValueError("Empty name, cannot check if structure is valid") pro = None for _ in range(3): try: pro = PandasPDB().fetch_pdb(name) if pro: break except: continue if not hasattr(pro, 'df'): return False if len(pro.df['HETATM']) == 0 or len(pro.df["ATOM"]) == 0: return False # no errors; this structure is probably fine? return True
def test_get_exceptions(): ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.get('main-chai')
def test_get_all(): ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) for i in ['c-alpha', 'hydrogen', 'main chain']: ppdb.get(i)
def test__read_pdb(): """Test private _read_pdb""" ppdb = PandasPDB() txt = ppdb._read_pdb(TESTDATA_FILENAME) print(txt) assert txt == three_eiy
def test_anisou_input_handling(): """Test public read_pdb""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME2) assert ppdb.pdb_text == four_eiy assert ppdb.code == '4eiy', ppdb.code
def test_ligand(): r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], s='hydrogen', invert=True) assert r == 2.6444, r
def test_read_pdb(): """Test public read_pdb""" ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) assert ppdb.pdb_text == three_eiy assert ppdb.code == '3eiy', ppdb.code
def test_ligand_default(): r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], s=None) assert r == 2.6444, r
def test_protein(): r = PandasPDB.rmsd(p1t48.df['ATOM'], p1t49.df['ATOM'], s='c-alpha', invert=False) assert r == 0.4785, r
def test__read_pdb_gz(): """Test public _read_pdb with gzip files""" ppdb = PandasPDB() txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ) assert txt == three_eiy
# Author: Sebastian Raschka <*****@*****.**> # License: BSD 3 clause # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas from biopandas.pdb import PandasPDB import os import numpy as np import pandas as pd from nose.tools import raises TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), 'data', '3eiy_stripped_no_ele.pdb') ppdb = PandasPDB() ppdb.read_pdb(TESTDATA_FILENAME) def test_impute_hetatm(): new = ppdb.impute_element(sections=['HETATM']) assert new['HETATM']['element_symbol'][1] == 'N' assert new['HETATM']['element_symbol'][10] == 'O' assert new['ATOM']['element_symbol'][1] == '' assert new['ATOM']['element_symbol'][10] == '' def test_impute_atom(): new = ppdb.impute_element(sections=['ATOM']) assert new['ATOM']['element_symbol'][1] == 'C' assert new['ATOM']['element_symbol'][10] == 'C'
# Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas from biopandas.pdb import PandasPDB import os import numpy as np import pandas as pd from nose.tools import raises TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), 'data', '1t49_995.pdb') TESTDATA_lig1 = os.path.join(os.path.dirname(__file__), 'data', 'lig_conf_1.pdb') TESTDATA_lig2 = os.path.join(os.path.dirname(__file__), 'data', 'lig_conf_2.pdb') p1t48 = PandasPDB() p1t48.read_pdb(TESTDATA_1t48) p1t49 = PandasPDB() p1t49.read_pdb(TESTDATA_1t49) pl1 = PandasPDB() pl1.read_pdb(TESTDATA_lig1) pl2 = PandasPDB() pl2.read_pdb(TESTDATA_lig2) def test_equal(): r = PandasPDB.rmsd(p1t48.df['ATOM'], p1t48.df['ATOM'], s=None) assert r == 0.000, r @raises(AttributeError) def test_wrong_arg():
def test_wrong_arg(): r = PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla')
def test_incompatible(): r = PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s=None)
def test_invalid_query(): r = PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla')
def test_equal(): r = PandasPDB.rmsd(p1t48.df['ATOM'], p1t48.df['ATOM'], s=None) assert r == 0.000, r