def _from_pdb_text(cls, pdb_text, verbose=False): """ Get structural data from pdb text as DataFrame. Parameters ---------- pdb_text : str Pdb file content from KLIFS database. verbose : bool Show only default columns (False) or additionally input-format specific columns (True). Returns ------- dict of pandas.DataFrame Structural data """ # Set pdb columns (index, name, dtype) as DataFrame pdb_columns = pd.DataFrame.from_dict(PDB_COLUMNS, orient="index", columns=["name", "dtype"]) # Use biopandas to parse the pdb format and return DataFrames # TODO in the future: BioPandas: wait for pdb equivalent of PandasMol2.read_mol2_from_list ppdb = PandasPdb() pdb_dict = ppdb._construct_df(pdb_text.splitlines(True)) # Concatenate ATOM and HETATM entries pdb_df = pd.concat([pdb_dict["ATOM"], pdb_dict["HETATM"]]).reset_index(drop=True) # Select only columns of interest and rename columns pdb_df = pdb_df.iloc[:, pdb_columns.index.to_list()] pdb_df.columns = pdb_columns["name"].to_list() # Merge residue ID and insertion code pdb_df["residue.id"] = pdb_df.apply( lambda x: str(x["residue.id"]) + x["residue.insertion"], axis=1) # Format DataFrame pdb_df = cls._format_dataframe(pdb_df, verbose) if len(pdb_df) == 0: raise ValueError( f"No structural data could be loaded. Is the input text in pdb format?" ) return pdb_df
def test__construct_df(): """Test pandas dataframe construction""" ppdb = PandasPdb() dfs = ppdb._construct_df(three_eiy.splitlines()) assert set(dfs.keys()) == {'OTHERS', 'ATOM', 'ANISOU', 'HETATM'} assert set(dfs['ATOM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['HETATM'].columns) == set(ATOM_DF_COLUMNS) assert set(dfs['ANISOU'].columns) == set(ANISOU_DF_COLUMNS) exp = pd.Series(np.array(['ATOM', 1, '', 'N', '', 'SER', '', 'A', 2, '', '', 2.527, 54.656, -1.667, 1.0, 52.73, '', '', 'N', None, 609]), index=['record_name', 'atom_number', 'blank_1', 'atom_name', 'alt_loc', 'residue_name', 'blank_2', 'chain_id', 'residue_number', 'insertion', 'blank_3', 'x_coord', 'y_coord', 'z_coord', 'occupancy', 'b_factor', 'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx']) assert exp.equals(dfs['ATOM'].loc[0, :])
def _pdb_text_to_dataframe(pdb_text): """ Get structural data from pdb text. Parameters ---------- pdb_text : str Pdb file content from KLIFS database. Returns ------- dict of pandas.DataFrame Structural data """ ppdb = PandasPdb() pdb_dict = ppdb._construct_df(pdb_text.splitlines(True)) print(f'Structural data keys: {pdb_dict.keys()}') return pdb_dict