def fetch_all_by_trgm_sim(self, smiles, *expr, **kwargs): """ Returns all fragments that are similar to the given SMILES string using trigam similarity (similar to LINGO). Parameters ---------- threshold : float, default=0.6 Similarity threshold that will be used for searching. limit : int, default=25 Maximum number of hits that will be returned. Returns ------- resultset : list List of tuples (Fragment, similarity) containing the chemical components and the calculated trigram similarity. Queried Entities ---------------- Fragment Examples -------- >>>> FragmentAdaptor().fetch_all_by_trgm_sim('Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C') [(<Fragment(STI)>, 0.883721), (<Fragment(NIL)>, 0.73913), (<Fragment(PRC)>, 0.738095), (<Fragment(406)>, 0.666667), (<Fragment(J07)>, 0.604167), (<Fragment(AD5)>, 0.6), (<Fragment(AAX)>, 0.6), (<Fragment(VX6)>, 0.6)] Requires -------- .. important:: `pg_trgm <http://www.postgresql.org/docs/current/static/pgtrgm.html>`_ PostgreSQL extension. """ session = Session() threshold = kwargs.get('threshold', 0.6) # SET THE SIMILARITY THRESHOLD FOR THE INDEX session.execute( text("SELECT set_limit(:threshold)").execution_options( autocommit=True).params(threshold=threshold)) similarity = func.similarity(Fragment.ism, smiles).label('similarity') sim_thresh = func.show_limit().label('sim_thresh') query = self.query.add_columns(similarity, sim_thresh) query = query.filter(and_(Fragment.like(smiles), *expr)) # KNN-GIST query = query.order_by(Fragment.ism.op('<->')(smiles)) if kwargs.get('limit'): query = query.limit(kwargs['limit']) results = query.all() #session.close() return results #query
def abstracts(self): """ Returns the abstract(s) of the journal articles that are associated with this PDB entry. """ session = Session() statement = select([citations], and_(citations.c.pubmed_id==cast(XRef.xref, Integer), XRef.source=='PubMed', XRef.entity_type=='Structure', XRef.entity_id==self.structure_id)) return session.execute(statement).fetchall()
def disordered_regions(self, *expr): """ Returns a list of disordered regions inside this Chain (if any). """ session = Session() statement = select([disordered_regions], and_(disordered_regions.c.pdb==self.Biomolecule.Structure.pdb, disordered_regions.c.pdb_chain_id==self.pdb_chain_asu_id, *expr)) result = session.execute(statement).fetchall() return result
def fetch_all_by_sim_oe(self, smiles, *expr, **kwargs): """ Returns all Chemical Components that match the given SMILES string with at least the given similarity threshold using chemical fingerprints. Parameters ---------- smi : str The query rdmol in SMILES format. threshold : float, default=0.5 The similarity threshold that will be used for searching. fp : {'circular','atompair','torsion'} RDKit fingerprint type to be used for similarity searching. *expr : BinaryExpressions, optional SQLAlchemy BinaryExpressions that will be used to filter the query. Queried Entities ---------------- ChemComp, ChemCompOEFP Returns ------- hits : list List of tuples in the form (ChemComp, similarity) Examples -------- Requires -------- .. important:: OpenEye cartridge. """ session = Session() threshold = kwargs.get('threshold') metric = kwargs.get('metric', 'tanimoto') fp = kwargs.get('fp', 'circular') limit = kwargs.get('limit', 100) # set the similarity threshold for the selected metric if threshold: statement = text( "SELECT openeye.set_oefp_similarity_limit(:threshold, :metric)" ) session.execute( statement.params(threshold=threshold, metric=metric)) if fp == 'circular': query = func.openeye.make_circular_fp(smiles) target = ChemCompOEFP.circular_fp elif fp == 'maccs166': query = func.openeye.make_maccs166_fp(smiles) target = ChemCompOEFP.maccs166_fp elif fp == 'path': query = func.openeye.make_path_fp(smiles) target = ChemCompOEFP.path_fp elif fp == 'tree': query = func.openeye.make_tree_fp(smiles) target = ChemCompOEFP.tree_fp else: raise ValueError( "cannot create fingerprint: type {0} does not exist.".format( fp)) # compile similarity metric and the correspoding GIST index / KNN-GIST if metric == 'tanimoto': similarity = func.openeye.tanimoto(query, target) index = func.openeye.tanimoto_is_above_limit(target, query) orderby = target.op('OPERATOR(openeye.<%%>)')(query) # escape % elif metric == 'dice': similarity = func.openeye.dice(query, target) index = func.openeye.dice_is_above_limit(target, query) orderby = target.op('OPERATOR(openeye.<#>)')(query) elif metric == 'manhattan': similarity = func.openeye.manhattan(query, target) index = func.openeye.manhattan_is_above_limit(target, query) orderby = target.op('OPERATOR(openeye.<~>)')(query) elif metric == 'cosine': similarity = func.openeye.cosine(query, target) index = func.openeye.cosine_is_above_limit(target, query) orderby = target.op('OPERATOR(openeye.<@>)')(query) elif metric == 'euclidean': similarity = func.openeye.euclidean(query, target) index = func.openeye.euclidean_is_above_limit(target, query) orderby = target.op('OPERATOR(openeye.<->)')(query) else: raise ValueError( "{} is not a valid similarity metric.".format(metric)) query = ChemComp.query.add_column(similarity) query = query.join('OEFP').filter(and_(index, *expr)) query = query.order_by(orderby) return query
def fetch_all_by_sim(self, smi, *expr, **kwargs): """ Returns all fragments that match the given SMILES string with at least the given similarity threshold using chemical fingerprints. Parameters ---------- smi : str The query rdmol in SMILES format. threshold : float, default=0.5 The similarity threshold that will be used for searching. fp : {'circular','atompair','torsion','maccs','layered','avalon'} RDKit fingerprint type to be used for similarity searching. *expr : BinaryExpressions, optional SQLAlchemy BinaryExpressions that will be used to filter the query. Queried Entities ---------------- Fragments, FragmentRDFP Returns ------- hits : list List of tuples in the form (Fragment, similarity) Examples -------- >>> #PENDING Requires -------- .. important:: `RDKit <http://www.rdkit.org>`_ PostgreSQL cartridge. """ session = Session() threshold = kwargs.get('threshold', 0.5) metric = kwargs.get('metric', 'tanimoto') fp = kwargs.get('fp', 'circular') if fp == 'circular': query = func.rdkit.morganbv_fp(smi, 2).label('queryfp') target = FragmentRDFP.circular_fp elif fp == 'torsion': query = func.rdkit.torsionbv_fp(smi).label('queryfp') target = FragmentRDFP.torsion_fp elif fp == 'atompair': query = func.rdkit.atompairbv_fp(smi).label('queryfp') target = FragmentRDFP.atompair_fp elif fp == 'maccs': query = func.rdkit.maccs_fp(smi).label('queryfp') target = FragmentRDFP.maccs_fp elif fp == 'layered': query = func.rdkit.layered_fp(smi).label('queryfp') target = FragmentRDFP.layered_fp elif fp == 'avalon': query = func.rdkit.avalon_fp(smi).label('queryfp') target = FragmentRDFP.avalon_fp else: msg = "The fingerprint type [{0}] does not exist.".format(fp) raise RuntimeError(msg) # set the similarity threshold for the index if metric == 'tanimoto': session.execute( text("SET rdkit.tanimoto_threshold=:threshold"). execution_options(autocommit=True).params(threshold=threshold)) sim_thresh = func.current_setting( 'rdkit.tanimoto_threshold').label('sim_thresh') similarity = func.rdkit.tanimoto_sml(query, target).label('similarity') index = func.rdkit.tanimoto_sml_op(query, target) elif metric == 'dice': session.execute( text("SET rdkit.dice_threshold=:threshold").execution_options( autocommit=True).params(threshold=threshold)) sim_thresh = func.current_setting('rdkit.dice_threshold').label( 'sim_thresh') similarity = func.rdkit.dice_sml(query, target).label('similarity') index = func.rdkit.dice_sml_op(query, target) query = self.query.add_columns(similarity, sim_thresh) query = query.join('RDFP').filter(and_(index, *expr)) query = query.order_by('similarity DESC') if kwargs.get('limit'): query = query.limit(kwargs['limit']) #.all( #print query.statement results = query.all() #session.close() return results # query