def default_string_query(self, q, options): # genomic interval query pattern = r'chr(?P<chrom>\w+):(?P<gstart>[0-9,]+)-(?P<gend>[0-9,]+)' match = re.search(pattern, q) if match: # (chr, gstart, gend) d = match.groupdict() if q.startswith('hg19.'): # support hg19 for human (default is hg38) d['assembly'] = 'hg19' if q.startswith('mm9.'): # support mm9 for mouse (default is mm10) d['assembly'] = 'mm9' search = AsyncSearch().from_dict(interval(**d)) # query_string query elif q.startswith('"') and q.endswith('"') or \ any(map(q.__contains__, (':', '~', ' AND ', ' OR ', 'NOT '))): search = AsyncSearch().query("query_string", query=q, default_operator="AND", auto_generate_phrase_queries=True) # wildcard query elif '*' in q or '?' in q: search = AsyncSearch().from_dict(wildcard(q)) else: # default query search = AsyncSearch().from_dict(dismax(q)) search = self._extra_query_options(search, options) return search
def _build_string_query(self, q, options): """ q + options -> query object options: userquery """ assert isinstance(q, str) search = AsyncSearch() userquery = options.userquery or '' if not q: # same empty q behavior as that of ES. search = search.query("match_none") elif self.user_query.has_query(userquery): userquery_ = self.user_query.get_query(userquery, q=q) search = search.query(userquery_) else: # customization here search = self.default_string_query(q, options) if self.user_query.has_filter(userquery): userfilter = self.user_query.get_filter(userquery) search = search.filter(userfilter) return search
def default_string_query(self, q, options): search = AsyncSearch() if q == '__all__': search = search.query() elif q == '__any__' and self.allow_random_query: search = search.query('function_score', random_score={}) else: # elasticsearch default query = { "query": { "function_score": { "query": {"dis_max": {"queries": [ {"term": {"_id": {"value": q, "boost": 15.0}}}, {"term": {"label.raw": {"value": q, "boost": 10.0}}}, {"term": {"_meta.username": {"value": q}}}, # for dataset {"term": {"name": {"value": q}}}, {"match": {"parent_classes": {"query": q}}}, {"prefix": {"label": {"value": q}}}, {"query_string": {"query": q}} ]}}, "functions": [ {"filter": {"term": {"namespace": "schema"}}, "weight": 0.5}, {"filter": {"term": {"prefix.raw": "schema"}}, "weight": 0.5}, {"filter": {"match": {"parent_classes": "bts:BiologicalEntity"}}, "weight": 1.5} ] } } } search = AsyncSearch() search = search.update_from_dict(query) search = search.params(rest_total_hits_as_int=True) return search
def default_string_query(self, q, options): query = { "query": { "query_string": { "query": q, "fields": ["name^4", "interventions.name^3", "description", "all"] } } } search = AsyncSearch() search = search.update_from_dict(query) return search
async def execute_pipeline(self, *args, **kwargs): try: graph_query = GraphQuery.from_dict(self.args_json) es_query = self._to_es_query(graph_query) if graph_query.can_reverse(): graph_query.reverse() es_query_rev = self._to_es_query(graph_query) es_query = es_query | es_query_rev # it's sent in one query so that parameters like size is still meaningful _query = AsyncSearch().query(es_query) _res = await self.pipeline.execute(_query, dotdict()) res = self.pipeline.transform(_res, dotdict()) # TODO additional transformation, like double reversal in result. except GraphObjectError as exc: raise BadRequest(reason=str(exc)) except Exception as exc: raise HTTPError(str(exc)) self.finish(res)
def default_match_query(self, q, scopes, options): ''' Override this to customize default match query. By default it implements a multi_match query. ''' if isinstance(q, (str, int, float)): query = Q('multi_match', query=str(q), operator="and", fields=scopes, lenient=True) elif isinstance(q, list): if not isinstance(scopes, list): raise TypeError(scopes) if len(q) != len(scopes): raise ValueError(q) query = Q() # combine conditions for _q, _scopes in zip(q, scopes): query = query & Q('multi_match', query=_q, operator="and", fields=_scopes, lenient=True) else: # invalid raise TypeError(q) return AsyncSearch().query(query)
def default_string_query(self, q, options): """ Override this to customize default string query. By default it implements a query string query. """ search = AsyncSearch() if q == '__all__': search = search.query() elif q == '__any__' and self.allow_random_query: search = search.query('function_score', random_score={}) else: # elasticsearch default search = search.query("query_string", query=str(q)) return search
def _extra_query_options(self, search, options): search = AsyncSearch().query( "function_score", query=search.query, functions=[ {"filter": {"term": {"taxid": 9606}}, "weight": "1.55"}, # human {"filter": {"term": {"taxid": 10090}}, "weight": "1.3"}, # mouse {"filter": {"term": {"taxid": 10116}}, "weight": "1.1"}, # rat ], score_mode="first") if options.species: if 'all' in options.species: pass elif not all(isinstance(string, str) for string in options.species): raise BadRequest(reason="species must be strings or integer strings.") elif not all(string.isnumeric() for string in options.species): raise BadRequest(reason="cannot map some species to taxids.") else: search = search.filter('terms', taxid=options.species) if options.aggs and options.species_facet_filter: search = search.post_filter('terms', taxid=options.species_facet_filter) return search
def build_string_query(self, q, options): """ q + options -> query object options: userquery """ assert isinstance(q, str) search = AsyncSearch() userquery = options.userquery or '' if self.user_query.has_query(userquery): userquery_ = self.user_query.get_query(userquery, q=q) search = search.query(userquery_) elif q == '__all__': search = search.query() elif q == '__any__' and self.allow_random_query: search = search.query('function_score', random_score={}) else: # customization here search = self.default_string_query(q, options) if self.user_query.has_filter(userquery): userfilter = self.user_query.get_filter(userquery) search = search.filter(userfilter) return search
def default_string_query(self, q, options): ''' Override this to customize default string query. By default it implements a query string query. ''' ## for extra query types: # # if q == 'case_1': # return case_1(q) # elif q == 'case_2': # return case_2(q) # # return default_case(q) return AsyncSearch().query("query_string", query=str(q))
def build_lineage_query(_id, options): search = AsyncSearch() search = search.query('match', lineage=_id) if options.has_gene: search = search.query('match', has_gene=options.has_gene) max_taxid_count = 10000 search = search.params(size=max_taxid_count) search = search.params(_source='_id') return search
def default_string_query(self, q, options): return AsyncSearch().from_dict({ "query": { "dis_max": { "queries": [{ "query_string": { "query": q, "fields": ["name^6", "description^3"] } }, { "query_string": { "query": q } }] } } })
def default_string_query(self, q, options): match = self._parse_interval_query(q) if match: # interval query search = AsyncSearch() if match['query']: search = search.query("query_string", query=match['query']) search = search.filter('match', chrom=match['chr']) assembly = 'hg38' if options.assembly == 'hg38' else 'hg19' search = search.filter( 'range', **{assembly + ".start": { "lte": match['gend'] }}) search = search.filter( 'range', **{assembly + ".end": { "gte": match['gstart'] }}) else: # default query search = AsyncSearch().query("query_string", query=q) return search
def _extra_query_options(self, search, options): search = AsyncSearch().query( "function_score", query=search.query, functions=[ {"filter": {"term": {"name": "pseudogene"}}, "weight": "0.5"}, # downgrade {"filter": {"term": {"taxid": 9606}}, "weight": "1.55"}, {"filter": {"term": {"taxid": 10090}}, "weight": "1.3"}, {"filter": {"term": {"taxid": 10116}}, "weight": "1.1"}, ], score_mode="first") if options.entrezonly: search = search.filter('exists', field="entrezgene") if options.ensemblonly: search = search.filter('exists', field="ensembl.gene") if options.missing: for field in options.missing: search = search.exclude('exists', field=field) if options.exists: for field in options.exists: search = search.filter('exists', field=field) if options.species: if 'all' in options.species: pass # do not apply any filters elif not all(isinstance(string, str) for string in options.species): raise BadRequest(reason="species must be strings or integer strings.") elif not all(string.isnumeric() for string in options.species): raise BadRequest(reason="cannot map some species to taxids.") else: # filter by taxid numeric strings search = search.filter('terms', taxid=options.species) if options.aggs and options.species_facet_filter: search = search.post_filter('terms', taxid=options.species_facet_filter) return search
def default_string_query(self, q, options): search = AsyncSearch() q = q.strip() if q == '__all__': search = search.query() elif q == '__any__' and self.allow_random_query: search = search.query('function_score', random_score={}) # elasticsearch query string syntax elif ":" in q or " AND " in q or " OR " in q: search = search.query('query_string', query=q) # term search elif q.startswith('"') \ and q.endswith('"'): query = { "query": { "dis_max": { "queries": [ { "term": { "_id": { "value": q.strip('"'), "boost": 5 } } }, { "term": { "_meta.slug": { "value": q.strip('"'), "boost": 5 } } }, { "match": { "info.title": { "query": q, "boost": 1.5, "operator": "AND" } } }, { "query_string": { "query": q, "default_operator": "AND" } } # base score ] } } } search = AsyncSearch() search = search.update_from_dict(query) else: # simple text search query = { "query": { "dis_max": { "queries": [ { "term": { "_id": { "value": q, "boost": 5 } } }, { "term": { "_meta.slug": { "value": q, "boost": 5 } } }, { "match": { "info.title": { "query": q, "boost": 1.5 } } }, { "term": { "servers.url": { "value": q, "boost": 1.1 } } }, # --------------------------------------------- { "query_string": { "query": q } }, # base score # --------------------------------------------- { "wildcard": { "info.title": { "value": q + "*", "boost": 0.8 } } }, { "wildcard": { "info.description": { "value": q + "*", "boost": 0.5 } } }, ] } } } search = AsyncSearch() search = search.update_from_dict(query) search = search.params(rest_total_hits_as_int=True) search = search.source(exclude=['_raw'], include=options._source) if options.authors: # '"Chunlei Wu"' search = search.filter('terms', info__contact__name__raw=options.authors) if options.tags: # '"chemical", "drug"' search = search.filter('terms', tags__name__raw=options.tags) return search
def _extra_query_options(self, search, options): search = AsyncSearch().query( "function_score", query=search.query, functions=[ { "filter": { "term": { "name": "pseudogene" } }, "weight": "0.5" }, # downgrade { "filter": { "term": { "taxid": 9606 } }, "weight": "1.55" }, { "filter": { "term": { "taxid": 10090 } }, "weight": "1.3" }, { "filter": { "term": { "taxid": 10116 } }, "weight": "1.1" }, ], score_mode="first") if options.entrezonly: search = search.filter('exists', field="entrezgene") if options.ensemblonly: search = search.filter('exists', field="ensembl.gene") if options.missing: for field in options.missing: search = search.exclude('exists', field=field) if options.exists: for field in options.exists: search = search.filter('exists', field=field) if options.species: if 'all' not in options.species: # TODO search = search.filter('terms', taxid=options.species) if options.aggs and options.species_facet_filter: search = search.post_filter('terms', taxid=options.species_facet_filter) return search