Exemplo n.º 1
0
    def _from_pdb_text(cls, pdb_text, verbose=False):
        """
        Get structural data from pdb text as DataFrame.

        Parameters
        ----------
        pdb_text : str
            Pdb file content from KLIFS database.
        verbose : bool
            Show only default columns (False) or additionally input-format specific columns (True).

        Returns
        -------
        dict of pandas.DataFrame
            Structural data
        """

        # Set pdb columns (index, name, dtype) as DataFrame
        pdb_columns = pd.DataFrame.from_dict(PDB_COLUMNS,
                                             orient="index",
                                             columns=["name", "dtype"])

        # Use biopandas to parse the pdb format and return DataFrames
        # TODO in the future: BioPandas: wait for pdb equivalent of PandasMol2.read_mol2_from_list
        ppdb = PandasPdb()
        pdb_dict = ppdb._construct_df(pdb_text.splitlines(True))

        # Concatenate ATOM and HETATM entries
        pdb_df = pd.concat([pdb_dict["ATOM"],
                            pdb_dict["HETATM"]]).reset_index(drop=True)

        # Select only columns of interest and rename columns
        pdb_df = pdb_df.iloc[:, pdb_columns.index.to_list()]
        pdb_df.columns = pdb_columns["name"].to_list()
        # Merge residue ID and insertion code
        pdb_df["residue.id"] = pdb_df.apply(
            lambda x: str(x["residue.id"]) + x["residue.insertion"], axis=1)

        # Format DataFrame
        pdb_df = cls._format_dataframe(pdb_df, verbose)

        if len(pdb_df) == 0:
            raise ValueError(
                f"No structural data could be loaded. Is the input text in pdb format?"
            )

        return pdb_df
Exemplo n.º 2
0
def test__construct_df():
    """Test pandas dataframe construction"""
    ppdb = PandasPdb()
    dfs = ppdb._construct_df(three_eiy.splitlines())
    assert set(dfs.keys()) == {'OTHERS', 'ATOM', 'ANISOU', 'HETATM'}
    assert set(dfs['ATOM'].columns) == set(ATOM_DF_COLUMNS)
    assert set(dfs['HETATM'].columns) == set(ATOM_DF_COLUMNS)
    assert set(dfs['ANISOU'].columns) == set(ANISOU_DF_COLUMNS)
    exp = pd.Series(np.array(['ATOM', 1, '', 'N', '', 'SER',
                              '', 'A', 2, '', '', 2.527, 54.656, -1.667, 1.0,
                              52.73, '', '', 'N', None, 609]),
                    index=['record_name', 'atom_number', 'blank_1',
                           'atom_name', 'alt_loc', 'residue_name',
                           'blank_2', 'chain_id', 'residue_number',
                           'insertion', 'blank_3',
                           'x_coord', 'y_coord', 'z_coord',
                           'occupancy', 'b_factor', 'blank_4',
                           'segment_id', 'element_symbol',
                           'charge', 'line_idx'])
    assert exp.equals(dfs['ATOM'].loc[0, :])
Exemplo n.º 3
0
def _pdb_text_to_dataframe(pdb_text):
    """
    Get structural data from pdb text.

    Parameters
    ----------
    pdb_text : str
       Pdb file content from KLIFS database.

    Returns
    -------
    dict of pandas.DataFrame
        Structural data
    """

    ppdb = PandasPdb()

    pdb_dict = ppdb._construct_df(pdb_text.splitlines(True))

    print(f'Structural data keys: {pdb_dict.keys()}')

    return pdb_dict