def get_statements_from_hashes(statement_hashes, preassembled=True, db=None, **kwargs): """Retrieve statement objects given only statement hashes. WARNING: This function will be removed in the future. Please look to indra_db.client.readonly.query and indra_db.client.principal.raw_statements for alternatives. """ warnings.warn(('This module is being taken out of service, as the tools ' 'have become deprecated. Moreover, the service has been ' 're-implemented to use newer tools as best as possible, ' 'but some results may be subtly different.'), DeprecationWarning) if db is None: db = get_ro('primary') if not preassembled: raise DeprecationWarning("This functionality is not longer supported. " "indra_db.client.principal.raw_statements " "has more functional features to search for " "raw statements.") query = HasHash(statement_hashes) ev_lim = None if kwargs.get('with_evidence') is False: ev_lim = 0 result = query.get_statements(db, ev_limit=ev_lim) return result.statements()
def get_ro_source_info(): from indra.sources import SOURCE_INFO from indra_db import get_ro ro = get_ro('primary') ro_srcs: set = ro.get_source_names() sources = {} for src_id in ro_srcs: src_info = {'id': src_id} lookup_id = src_id if src_id == 'vhn': lookup_id = 'virhostnet' elif src_id == 'bel_lc': lookup_id = 'bel' elif src_id == 'pe': lookup_id = 'phosphoelm' elif src_id == 'psp': lookup_id = 'phosphosite' src_info.update(SOURCE_INFO[lookup_id]) if src_id == 'eidos': src_info['domain'] = 'biology' sources[src_id] = src_info return sources
def get_mesh_ref_counts(mesh_terms, require_all=False, ro=None): """Get the number of distinct pmids by mesh term for each hash. This function directly queries a table in the readonly database that counts the number of distinct PMIDs for each mesh term/hash pair. Given a list of mesh terms, this will return a dictionary keyed by hash containing dictionaries indicating how much support the hash has from each of the given mesh IDs in terms of distinct PMIDs (thus distinct publications). Parameters ---------- mesh_terms : list A list of mesh term strings of the form "D000#####". require_all : Optional[bool] If True, require that each entry in the result includes both mesh terms. In other words, only return results where, for each hash, articles exist with support from all MeSH IDs given, not just one or the other. Default is False ro : Optional[DatabaseManager] A database manager handle. The default is the primary readonly, as indicated by environment variables or the config file. """ # Get the default readonly database, if needed.. if ro is None: ro = get_ro('primary') # Make sure the mesh IDs are of the correct kind. if not all(m.startswith('D') or m.startswith('C') for m in mesh_terms): raise ValueError("All mesh terms must begin with C or D.") # Convert the IDs to numbers for faster lookup. result = {} for prefix, table in [('C', ro.MeshConceptRefCounts), ('D', ro.MeshTermRefCounts)]: mesh_num_map = { int(m[1:]): m for m in mesh_terms if m.startswith(prefix) } if not mesh_num_map: continue # Build the query. nums = func.array_agg(table.mesh_num) counts = func.array_agg(table.ref_count) q = ro.session.query(table.mk_hash, nums.label('nums'), counts.label('ref_counts'), table.pmid_count) if len(mesh_num_map.keys()) == 1: q = q.filter(table.mesh_num == list(mesh_num_map.keys())[0]) elif len(mesh_num_map.keys()) > 1: q = q.filter(table.mesh_num.in_(mesh_num_map.keys())) q = q.group_by(table.mk_hash, table.pmid_count) # Apply the require all option by comparing the length of the nums array # to the number of inputs. if require_all: q = q.having(func.cardinality(nums) == len(mesh_num_map.keys())) # Parse the results. for mk_hash, nums, counts, pmid_count in q.all(): count_dict = { mesh_num_map[mesh_num]: ref_count for mesh_num, ref_count in zip(nums, counts) } if mk_hash not in result: result[mk_hash] = count_dict result[mk_hash]['total'] = pmid_count else: result[mk_hash].update(count_dict) result[mk_hash]['total'] += sum(counts) # Little sloppy, but delete any that don't meet the require_all constraint. if require_all: num_terms = len(set(mesh_terms)) for mk_hash in result.copy().keys(): if len(result[mk_hash]) != num_terms + 1: result.pop(mk_hash) return result
def get_statements_by_gene_role_type(agent_id=None, agent_ns='HGNC-SYMBOL', role=None, stmt_type=None, count=1000, db=None, do_stmt_count=False, preassembled=True, fix_refs=True, with_evidence=True, with_support=False, essentials_only=False): """Get statements from the DB by stmt type, agent, and/or agent role. WARNING: This function will be removed in the future. Please look to indra_db.client.readonly.query and indra_db.client.principal.raw_statements for alternatives. Parameters ---------- agent_id : str String representing the identifier of the agent from the given namespace. Note: if the agent namespace argument, `agent_ns`, is set to 'HGNC-SYMBOL', this function will treat `agent_id` as an HGNC gene symbol and perform an internal lookup of the corresponding HGNC ID. Default is 'HGNC-SYMBOL'. agent_ns : str Namespace for the identifier given in `agent_id`. role : str String corresponding to the role of the agent in the statement. Options are 'SUBJECT', 'OBJECT', or 'OTHER' (in the case of `Complex`, `SelfModification`, and `ActiveForm` Statements). stmt_type : str Name of the Statement class. count : int (DEPRECATED) Number of statements to retrieve in each batch (passed to :py:func:`get_statements`). db : :py:class:`DatabaseManager` Optionally specify a database manager that attaches to something besides the primary database, for example a local databse instance. do_stmt_count : bool (DEPRECATED) Whether or not to perform an initial statement counting step to give more meaningful progress messages. preassembled : bool (DEPRECATED) If true, statements will be selected from the table of pre-assembled statements. Otherwise, they will be selected from the raw statements. Default is True. with_support : bool (DEPRECATED) Choose whether to populate the supports and supported_by list attributes of the Statement objects. Generally results in slower queries. DEFAULT IS CURRENTLY False. with_evidence : bool Choose whether or not to populate the evidence list attribute of the Statements. As with `with_support`, setting this to True will take longer. fix_refs : bool (DEPRECATED) The paper refs within the evidence objects are not populated in the database, and thus must be filled using the relations in the database. If True (default), the `pmid` field of each Statement Evidence object is set to the correct PMIDs, or None if no PMID is available. If False, the `pmid` field defaults to the value populated by the reading system. essentials_only : bool (DEPRECATED) Default is False. If True, retrieve only some metadata regarding the statements. Implicitly `with_support`, `with_evidence`, `fix_refs`, and `do_stmt_count` are all False, as none of the relevant features apply. Returns ------- if essentials_only is False: list of Statements from the database corresponding to the query. else: list of tuples containing basic data from the statements. """ warnings.warn(('This module is being taken out of service, as the tools ' 'have become deprecated. Moreover, the service has been ' 're-implemented to use newer tools as best as possible, ' 'but some results may be subtly different.'), DeprecationWarning) if db is None: db = get_ro('primary') if not preassembled: raise DeprecationWarning("This functionality is not longer supported. " "indra_db.client.principal.raw_statements " "has more functional features to search for " "raw statements.") if not (agent_id or role or stmt_type): raise ValueError('At least one of agent_id, role, or stmt_type ' 'must be specified.') if agent_id and agent_ns == 'HGNC-SYMBOL': hgnc_symbol = agent_id agent_id = hgnc_client.get_hgnc_id(hgnc_symbol) if not agent_id: logger.warning('Invalid gene name: %s' % hgnc_symbol) return [] agent_ns = 'HGNC' query = EmptyQuery() if agent_id: query &= HasAgent(agent_id, agent_ns, role) if stmt_type: query &= HasType([stmt_type]) if not isinstance(query, QueryCore): raise ValueError("Either agent_id or stmt_type must be given.") if essentials_only: raise DeprecationWarning("This functionality is no longer supported. " "Similar features are available in" "indra_db.client.readonly.query, especially " "the `get_interactions` methods.") if with_evidence: ev_lim = None else: ev_lim = 0 if with_support: raise DeprecationWarning("This feature is not supported at this " "time, and was never truly supported.") result = query.get_statements(db, ev_limit=ev_lim) return result.statements()
def get_statements_by_paper(id_list, id_type='pmid', db=None, preassembled=True): """Get the statements from a list of paper ids. WARNING: This function will be removed in the future. Please look to indra_db.client.readonly.query and indra_db.client.principal.raw_statements for alternatives. Parameters ---------- id_list : list or set A list of ints or strs that are ids of papers of type `id_type`. id_type : str The type of id used (default is pmid). Options include pmid, pmcid, doi, pii, url, or manuscript_id. Note that pmid is generally the best means of getting a paper. db : :py:class:`DatabaseManager` Optionally specify a database manager that attaches to something besides the primary database, for example a local databse instance. preassembled : bool If True, statements will be selected from the table of pre-assembled statements. Otherwise, they will be selected from the raw statements. Default is True. Returns ------- stmt_dict : dict A dict of Statements from the database keyed the paper id given. Papers that yielded no statements are not included. If `preassembled` is True, there may be ids which were not present in the original dataset, and there may be a key None for statements that has evidence from refs that did not have that id_type of reference. """ warnings.warn(('This module is being taken out of service, as the tools ' 'have become deprecated. Moreover, the service has been ' 're-implemented to use newer tools as best as possible, ' 'but some results may be subtly different.'), DeprecationWarning) if not preassembled: raise DeprecationWarning("This functionality is not longer supported. " "indra_db.client.principal.raw_statements " "has more functional features to search for " "raw statements by paper.") if not db: db = get_ro('primary') query = FromPapers([(id_type, pid) for pid in id_list]) result = query.get_statements(db) # Get the Statement object from the jsons. A statement shows up for # all papers that it references. result_dict = defaultdict(list) for stmt in result.statements(): for ev in stmt.evidence: result_dict[ev.text_refs.get(id_type)].append(stmt) # Convert from defaultdict to ordinary dict. result_dict = dict(result_dict) return result_dict