def klifs_kinase_from_uniprot_id(uniprot_id: str) -> pd.DataFrame: """ Retrieve KLIFS kinase details about the kinase matching the given Uniprot ID. Parameters ---------- uniprot_id: str Uniprot identifier. Returns ------- kinase: pd.Series KLIFS kinase details. Raises ------ ValueError: No KLIFS kinase found for UniProt ID. ValueError: Multiple KLIFS kinases found for UniProt ID. """ from opencadd.databases.klifs import setup_remote remote = setup_remote() kinase_ids = remote.kinases.all_kinases()["kinase.klifs_id"] kinases = remote.kinases.by_kinase_klifs_id(list(kinase_ids)) kinases = kinases[kinases["kinase.uniprot"] == uniprot_id] if len(kinases) == 0: raise ValueError("No KLIFS kinase found for UniProt ID.") elif len(kinases) > 1: raise ValueError("Multiple KLIFS kinases found for UniProt ID.") kinase = kinases.iloc[0] return kinase
def from_structure_klifs_id(cls, structure_klifs_id, subpockets=None, extension="pdb", klifs_session=None): """ Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and subpockets. Parameters ---------- structure_klifs_id : int Structure KLIFS ID. subpockets : dict Dictionary with the following keys and values: "anchor_residue.klifs_id" : list of int List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket center. "subpocket.name" : str Subpocket name. "subpocket.color" : str Subpocket color. extension : str Structure protein data file format. Defaults to PDB format. klifs_session : opencadd.databases.klifs.session.Session or None Remote or local KLIFS session. If None, a remote session is initialized. Returns ------- opencadd.structure.pocket.PocketKlifs KLIFS pocket object. """ # Use existing KLIFS session or set up remote session if not klifs_session: klifs_session = setup_remote() # Get pocket and coordinates for a structure (by a structure KLIFS ID) if klifs_session._client: pocket_residues = klifs_session.pockets.by_structure_klifs_id( structure_klifs_id) else: pocket_residues = klifs_session.pockets.by_structure_klifs_id( structure_klifs_id, extension=extension) text = klifs_session.coordinates.to_text(structure_klifs_id, entity="complex", extension=extension) pocket = cls.from_text( text, extension, pocket_residues["residue.id"].to_list(), pocket_residues["residue.klifs_id"].to_list(), structure_klifs_id, ) pocket = pocket.add_klifs_regions(pocket, pocket_residues) pocket = pocket.add_klifs_subpockets(pocket, pocket_residues, subpockets) return pocket
def _to_kinase_annotation(distance_matrix, outputfile): """ Save kinase annotations to file used for FigTree. Parameters ---------- distance_matrix : pandas.DataFrame Distance matrix on which clustering is based. outputfile : str or pathlib.Path Path to kinase annotation file (CSV file) in FigTree format. """ outputfile = Path(outputfile) logger.info(f"Writing resulting kinase annotation to {outputfile}") # Get kinase names from matrix kinase_names = distance_matrix.columns.to_list() # Query KLIFS for kinase details klifs_session = klifs.setup_remote() kinases = klifs_session.kinases.by_kinase_name(kinase_names) kinases = kinases[kinases["species.klifs"] == "Human"] kinases = kinases[["kinase.klifs_name", "kinase.family", "kinase.group"]] # Save to file kinases.to_csv(outputfile, sep="\t", index=False)
def plot_number_of_kinases_per_kinase_group(structures, remote=None): """ Plot the number of kinases per kinase group. Parameters ---------- structures : pandas.DataFrame Structures DataFrame from opencadd.databases.klifs module. remote : None or opencadd.databases.klifs.session.Session Remote KLIFS session. If None, generate new remote session. Returns ------- matplotlib.pyplot.axis Plot axis. """ kinase_ids = structures["kinase.klifs_id"].to_list() # Get kinases by kinase KLIFS IDs if remote is None: remote = setup_remote() kinases = remote.kinases.by_kinase_klifs_id(kinase_ids) ax = (kinases.groupby("kinase.group").size().sort_values().plot( kind="barh", figsize=(4, 3), title="Number of kinases per kinase group", xlabel="Kinase group", )) return ax
def from_structure_klifs_ids(cls, structure_klifs_ids, klifs_session=None, n_cores=1): """ Calculate fingerprints for one or more KLIFS structures (by structure KLIFS IDs). Parameters ---------- structure_klifs_id : int Input structure KLIFS ID (output fingerprints may contain less IDs because some structures could not be encoded). klifs_session : opencadd.databases.klifs.session.Session Local or remote KLIFS session. n_cores : int or None Number of cores to be used for fingerprint generation as defined by the user. Returns ------- kissim.encoding.fingerprint_generator Fingerprint generator object containing fingerprints. """ logger.info("GENERATE FINGERPRINTS") logger.info(f"Number of input structures: {len(structure_klifs_ids)}") start_time = datetime.datetime.now() logger.info(f"Fingerprint generation started at: {start_time}") # Set up KLIFS session if needed if klifs_session is None: klifs_session = setup_remote() # Set number of cores to be used n_cores = set_n_cores(n_cores) # Initialize FingerprintGenerator object fingerprint_generator = cls() fingerprint_generator.structure_klifs_ids = structure_klifs_ids fingerprint_generator.klifs_session = klifs_session fingerprints_list = fingerprint_generator._get_fingerprint_list(n_cores) fingerprint_generator.data = { i.structure_klifs_id: i for i in fingerprints_list if i is not None # Removes emtpy fingerprints } fingerprint_generator.data_normalized = fingerprint_generator._normalize_fingerprints() logger.info(f"Number of output fingerprints: {len(fingerprint_generator.data)}") end_time = datetime.datetime.now() logger.info(f"Runtime: {end_time - start_time}") return fingerprint_generator
def _get_klifs_residue_colors(remote=None): """ Get KLIFS residue colors from example structure KLIFS ID (12347). Parameters ---------- remote : None or opencadd.databases.klifs.session.Session Remote KLIFS session. If None, generate new remote session. Returns ------- list of str KLIFS residue colors (matplotlib color names). """ if remote is None: remote = setup_remote() klifs_colors = remote.pockets.by_structure_klifs_id( 12347)["residue.klifs_color"] return klifs_colors.to_list()
def _setup_klifs_session(local_klifs_download_path=None): """ Set up KLIFS session. Parameters ---------- local_klifs_download_path : str or None If path to local KLIFS download is given, set up local KLIFS session. If None is given, set up remote KLIFS session. Returns ------- klifs_session : opencadd.databases.klifs.session.Session Local or remote KLIFS session. """ if local_klifs_download_path: klifs_session = setup_local(local_klifs_download_path) else: klifs_session = setup_remote() return klifs_session
def from_structure_klifs_id(cls, structure_klifs_id, klifs_session=None): """ Get KLIFS data from structure KLIFS ID. Parameters ---------- structure_klifs_id : int KLIFS structure ID. klifs_session : opencadd.databases.klifs.session.Session Local or remote KLIFS session. Returns ------- kissim.io.KlifsToKissimData KLIFS data. """ data = cls() data.structure_klifs_id = structure_klifs_id # If no KLIFS session is given, set up remote KLIFS session if klifs_session is None: klifs_session = setup_remote() data.klifs_session = klifs_session # Structure KLIFS ID exists if not data._structure_klifs_id_exists(): return None # In case of a local KLIFS session, test if complex and pocket structural files exist if data.klifs_session._database is not None: if not data._local_session_files_exist(): return None data.text, data.extension = data._get_text_and_extension() data.residue_ids, data.residue_ixs = data._get_pocket_residue_ids_and_ixs() data.kinase_name = data._get_kinase_name() return data
def from_structure_klifs_id(cls, structure_klifs_id, subpockets=None): """ Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and subpockets. Parameters ---------- structure_klifs_id : int Structure KLIFS ID. subpockets : pandas.DataFrame Subpockets (row) with the following details (columns): "anchor_residue.klifs_id" : list of int List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket center. "subpocket.name" : str Subpocket name. "subpocket.color" : str Subpocket color. Returns ------- opencadd.structure.pocket.KlifsPocket KLIFS pocket object. """ # Set up remote KLIFS session remote = setup_remote() # Get pocket and coordinates for a structure (by a structure KLIFS ID) pocket = remote.pockets.by_structure_klifs_id(structure_klifs_id) filepath = remote.coordinates.to_pdb(structure_klifs_id, ".", entity="complex") pocket_3d = cls.from_file( filepath, pocket["residue.id"].to_list(), "example kinase", pocket["residue.klifs_id"].to_list(), ) # Add regions for (region, color), group in pocket.groupby( ["residue.klifs_region_id", "residue.klifs_color"]): pocket_3d.add_region( region, group["residue.id"].to_list(), color, group["residue.klifs_region_id"].to_list(), ) # Map residue KLIFS IDs > residue ID if subpockets is not None: subpockets["anchor_residue.ids"] = subpockets[ "anchor_residue.klifs_ids"].apply(lambda x: pocket[pocket[ "residue.klifs_id"].isin(x)]["residue.id"].to_list()) # Add subpockets for _, subpocket in subpockets.iterrows(): pocket_3d.add_subpocket( subpocket["subpocket.name"], subpocket["anchor_residue.ids"], subpocket["subpocket.color"], subpocket["anchor_residue.klifs_ids"], ) return pocket_3d
import pytest import numpy as np import pandas as pd from opencadd.databases.klifs import setup_local, setup_remote from kissim.utils import enter_temp_directory from kissim.encoding import Fingerprint, FingerprintNormalized, FingerprintGenerator from kissim.schema import ( FEATURE_NAMES_PHYSICOCHEMICAL, FEATURE_NAMES_PHYSICOCHEMICAL_DICT, FEATURE_NAMES_DISTANCES_AND_MOMENTS, ) PATH_TEST_DATA = Path(__name__).parent / "kissim" / "tests" / "data" REMOTE = setup_remote() LOCAL = setup_local(PATH_TEST_DATA / "KLIFS_download") class TestFingerprintGenerator: """ Test common functionalities in the PocketBioPython and PocketDataFrame classes. """ @pytest.mark.parametrize( "structure_klifs_ids, klifs_session, n_cores, fingerprints_values_array_sum", [ ([110, 118], REMOTE, 1, 10152.4256), ([110, 118], REMOTE, 2, 10152.4256), ([110, 118], LOCAL, 1, 10152.4256), ([110, 118], LOCAL, 2, 10152.4256), ([110, 118], None, None, 10152.4256),
Draw, Descriptors, Lipinski, PandasTools, rdFingerprintGenerator, QED, ) from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem.PropertyMol import PropertyMol from rdkit.ML.Cluster import Butina import seaborn as sns from opencadd.databases.klifs import setup_remote KLIFS_SESSION = setup_remote() RDLogger.DisableLog("rdApp.*") SUBPOCKET_COLORS = { "AP": "purple", "FP": "forestgreen", "SE": "c", "GA": "tab:orange", "B1": "tab:blue", "B2": "darkslateblue", "X": "grey", } def read_fragment_library(path_to_lib):
def from_structure_klifs_id( cls, structure_klifs_id, subpockets=None, extension="pdb", klifs_session=None ): """ Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and subpockets. Parameters ---------- structure_klifs_id : int Structure KLIFS ID. subpockets : dict Dictionary with the following keys and values: "anchor_residue.klifs_id" : list of int List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket center. "subpocket.name" : str Subpocket name. "subpocket.color" : str Subpocket color. extension : str Structure protein data file format. Defaults to PDB format. klifs_session : opencadd.databases.klifs.session.Session or None Remote or local KLIFS session. If None, a remote session is initialized. Returns ------- opencadd.structure.pocket.KlifsPocket KLIFS pocket object. """ # Use existing KLIFS session or set up remote session if not klifs_session: klifs_session = setup_remote() # Get pocket and coordinates for a structure (by a structure KLIFS ID) if klifs_session._client: pocket = klifs_session.pockets.by_structure_klifs_id(structure_klifs_id) else: pocket = klifs_session.pockets.by_structure_klifs_id( structure_klifs_id, extension=extension ) text = klifs_session.coordinates.to_text( structure_klifs_id, entity="complex", extension=extension ) pocket_3d = cls.from_text( text, extension, pocket["residue.id"].to_list(), pocket["residue.klifs_id"].to_list(), structure_klifs_id, ) # Add regions for (region, color), group in pocket.groupby( ["residue.klifs_region", "residue.klifs_color"] ): pocket_3d.add_region( name=region, residue_ixs=group["residue.klifs_id"].to_list(), color=color, ) # Map residue KLIFS IDs > residue ID if subpockets is not None: subpockets = pd.DataFrame(subpockets) subpockets["anchor_residue.ids"] = subpockets["anchor_residue.klifs_ids"].apply( lambda x: pocket[pocket["residue.klifs_id"].isin(x)]["residue.id"].to_list() ) # Add subpockets for _, subpocket in subpockets.iterrows(): pocket_3d.add_subpocket( name=subpocket["subpocket.name"], anchor_residue_ixs=subpocket["anchor_residue.klifs_ids"], color=subpocket["subpocket.color"], ) return pocket_3d