示例#1
0
    def default_string_query(self, q, options):
        search = AsyncSearch()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # elasticsearch default
            query = {
                "query": {
                    "function_score": {
                        "query": {"dis_max": {"queries": [
                            {"term": {"_id": {"value": q, "boost": 15.0}}},
                            {"term": {"label.raw": {"value": q, "boost": 10.0}}},
                            {"term": {"_meta.username": {"value": q}}},  # for dataset
                            {"term": {"name": {"value": q}}},
                            {"match": {"parent_classes": {"query": q}}},
                            {"prefix": {"label": {"value": q}}},
                            {"query_string": {"query": q}}
                        ]}},
                        "functions": [
                            {"filter": {"term": {"namespace": "schema"}}, "weight": 0.5},
                            {"filter": {"term": {"prefix.raw": "schema"}}, "weight": 0.5},
                            {"filter": {"match": {"parent_classes": "bts:BiologicalEntity"}}, "weight": 1.5}
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        search = search.params(rest_total_hits_as_int=True)
        return search
示例#2
0
    def default_string_query(self, q, options):

        # genomic interval query
        pattern = r'chr(?P<chrom>\w+):(?P<gstart>[0-9,]+)-(?P<gend>[0-9,]+)'
        match = re.search(pattern, q)
        if match:  # (chr, gstart, gend)
            d = match.groupdict()
            if q.startswith('hg19.'):
                # support hg19 for human (default is hg38)
                d['assembly'] = 'hg19'
            if q.startswith('mm9.'):
                # support mm9 for mouse (default is mm10)
                d['assembly'] = 'mm9'
            search = AsyncSearch().from_dict(interval(**d))

        # query_string query
        elif q.startswith('"') and q.endswith('"') or \
                any(map(q.__contains__, (':', '~', ' AND ', ' OR ', 'NOT '))):
            search = AsyncSearch().query("query_string",
                                         query=q,
                                         default_operator="AND",
                                         auto_generate_phrase_queries=True)

        # wildcard query
        elif '*' in q or '?' in q:
            search = AsyncSearch().from_dict(wildcard(q))
        else:  # default query
            search = AsyncSearch().from_dict(dismax(q))

        search = self._extra_query_options(search, options)
        return search
示例#3
0
    async def execute_pipeline(self, *args, **kwargs):

        try:

            graph_query = GraphQuery.from_dict(self.args_json)
            es_query = self._to_es_query(graph_query)

            if graph_query.can_reverse():
                graph_query.reverse()
                es_query_rev = self._to_es_query(graph_query)
                es_query = es_query | es_query_rev

            # it's sent in one query so that parameters like size is still meaningful
            _query = AsyncSearch().query(es_query)
            _res = await self.pipeline.execute(_query, dotdict())
            res = self.pipeline.transform(_res, dotdict())

            # TODO additional transformation, like double reversal in result.

        except GraphObjectError as exc:
            raise BadRequest(reason=str(exc))

        except Exception as exc:
            raise HTTPError(str(exc))

        self.finish(res)
示例#4
0
    def default_match_query(self, q, scopes, options):
        '''
        Override this to customize default match query.
        By default it implements a multi_match query.
        '''
        if isinstance(q, (str, int, float)):
            query = Q('multi_match',
                      query=str(q),
                      operator="and",
                      fields=scopes,
                      lenient=True)

        elif isinstance(q, list):
            if not isinstance(scopes, list):
                raise TypeError(scopes)
            if len(q) != len(scopes):
                raise ValueError(q)

            query = Q()  # combine conditions
            for _q, _scopes in zip(q, scopes):
                query = query & Q('multi_match',
                                  query=_q,
                                  operator="and",
                                  fields=_scopes,
                                  lenient=True)

        else:  # invalid
            raise TypeError(q)

        return AsyncSearch().query(query)
示例#5
0
    def build_string_query(self, q, options):
        """ q + options -> query object

            options:
                userquery
        """
        assert isinstance(q, str)
        search = AsyncSearch()
        userquery = options.userquery or ''

        if self.user_query.has_query(userquery):
            userquery_ = self.user_query.get_query(userquery, q=q)
            search = search.query(userquery_)

        elif q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # customization here
            search = self.default_string_query(q, options)

        if self.user_query.has_filter(userquery):
            userfilter = self.user_query.get_filter(userquery)
            search = search.filter(userfilter)

        return search
示例#6
0
    def _build_string_query(self, q, options):
        """ q + options -> query object

            options:
                userquery
        """
        assert isinstance(q, str)
        search = AsyncSearch()
        userquery = options.userquery or ''

        if not q:  # same empty q behavior as that of ES.
            search = search.query("match_none")

        elif self.user_query.has_query(userquery):
            userquery_ = self.user_query.get_query(userquery, q=q)
            search = search.query(userquery_)

        else:  # customization here
            search = self.default_string_query(q, options)

        if self.user_query.has_filter(userquery):
            userfilter = self.user_query.get_filter(userquery)
            search = search.filter(userfilter)

        return search
示例#7
0
    def _extra_query_options(self, search, options):
        search = AsyncSearch().query(
            "function_score",
            query=search.query,
            functions=[
                {
                    "filter": {
                        "term": {
                            "taxid": 9606
                        }
                    },
                    "weight": "1.55"
                },  # human
                {
                    "filter": {
                        "term": {
                            "taxid": 10090
                        }
                    },
                    "weight": "1.3"
                },  # mouse
                {
                    "filter": {
                        "term": {
                            "taxid": 10116
                        }
                    },
                    "weight": "1.1"
                },  # rat
            ],
            score_mode="first")
        if options.species:
            if 'all' in options.species:
                pass
            elif not all(
                    isinstance(string, str) for string in options.species):
                raise BadRequest(
                    reason="species must be strings or integer strings.")
            elif not all(string.isnumeric() for string in options.species):
                raise BadRequest(reason="cannot map some species to taxids.")
            else:
                search = search.filter('terms', taxid=options.species)
            if options.aggs and options.species_facet_filter:
                search = search.post_filter('terms',
                                            taxid=options.species_facet_filter)

        if options.source:
            if 'all' in options.source:
                pass
            elif not all(isinstance(src, str) for src in options.source):
                raise BadRequest(reason="source must be strings.")
            else:
                search = search.filter('terms', source=options.source)

            if options.aggs and options.source_facet_filter:
                search = search.post_filter('terms',
                                            source=options.source_facet_filter)

        return search
示例#8
0
    def build_lineage_query(_id, options):

        search = AsyncSearch()
        search = search.query('match', lineage=_id)
        if options.has_gene:
            search = search.query('match', has_gene=options.has_gene)

        max_taxid_count = 10000
        search = search.params(size=max_taxid_count)
        search = search.params(_source='_id')
        
        return search
示例#9
0
    def default_string_query(self, q, options):

        match = self._parse_interval_query(q)
        if match:  # interval query
            search = AsyncSearch()
            if match['query']:
                search = search.query("query_string", query=match['query'])
            search = search.filter('match', chrom=match['chr'])
            assembly = 'hg38' if options.assembly == 'hg38' else 'hg19'
            search = search.filter(
                'range', **{assembly + ".start": {
                    "lte": match['gend']
                }})
            search = search.filter(
                'range', **{assembly + ".end": {
                    "gte": match['gstart']
                }})

        else:  # default query
            search = AsyncSearch().query("query_string", query=q)

        return search
示例#10
0
    def default_string_query(self, q, options):

        query = {
            "query": {
                "query_string": {
                    "query":
                    q,
                    "fields":
                    ["name^4", "interventions.name^3", "description", "all"]
                }
            }
        }
        search = AsyncSearch()
        search = search.update_from_dict(query)
        return search
示例#11
0
    def default_string_query(self, q, options):
        '''
        Override this to customize default string query.
        By default it implements a query string query.
        '''

        ## for extra query types:
        #
        # if q == 'case_1':
        #    return case_1(q)
        # elif q == 'case_2':
        #    return case_2(q)
        #
        # return default_case(q)

        return AsyncSearch().query("query_string", query=str(q))
示例#12
0
    def default_string_query(self, q, options):
        """
        Override this to customize default string query.
        By default it implements a query string query.
        """
        search = AsyncSearch()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # elasticsearch default
            search = search.query("query_string", query=str(q))

        return search
示例#13
0
    def default_string_query(self, q, options):

        return AsyncSearch().from_dict({
            "query": {
                "dis_max": {
                    "queries": [{
                        "query_string": {
                            "query": q,
                            "fields": ["name^6", "description^3"]
                        }
                    }, {
                        "query_string": {
                            "query": q
                        }
                    }]
                }
            }
        })
示例#14
0
    def _extra_query_options(self, search, options):

        search = AsyncSearch().query(
            "function_score",
            query=search.query,
            functions=[
                {"filter": {"term": {"name": "pseudogene"}}, "weight": "0.5"},  # downgrade
                {"filter": {"term": {"taxid": 9606}}, "weight": "1.55"},
                {"filter": {"term": {"taxid": 10090}}, "weight": "1.3"},
                {"filter": {"term": {"taxid": 10116}}, "weight": "1.1"},
            ], score_mode="first")

        if options.entrezonly:
            search = search.filter('exists', field="entrezgene")
        if options.ensemblonly:
            search = search.filter('exists', field="ensembl.gene")

        if options.missing:
            for field in options.missing:
                search = search.exclude('exists', field=field)
        if options.exists:
            for field in options.exists:
                search = search.filter('exists', field=field)

        if options.species:
            if 'all' in options.species:
                pass  # do not apply any filters
            elif not all(isinstance(string, str) for string in options.species):
                raise BadRequest(reason="species must be strings or integer strings.")
            elif not all(string.isnumeric() for string in options.species):
                raise BadRequest(reason="cannot map some species to taxids.")
            else:  # filter by taxid numeric strings
                search = search.filter('terms', taxid=options.species)
        if options.aggs and options.species_facet_filter:
            search = search.post_filter('terms', taxid=options.species_facet_filter)

        return search
示例#15
0
    def default_string_query(self, q, options):

        search = AsyncSearch()
        q = q.strip()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        # elasticsearch query string syntax
        elif ":" in q or " AND " in q or " OR " in q:
            search = search.query('query_string', query=q)

        # term search
        elif q.startswith('"') \
                and q.endswith('"'):
            query = {
                "query": {
                    "dis_max": {
                        "queries": [
                            {
                                "term": {
                                    "_id": {
                                        "value": q.strip('"'),
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "_meta.slug": {
                                        "value": q.strip('"'),
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "match": {
                                    "info.title": {
                                        "query": q,
                                        "boost": 1.5,
                                        "operator": "AND"
                                    }
                                }
                            },
                            {
                                "query_string": {
                                    "query": q,
                                    "default_operator": "AND"
                                }
                            }  # base score
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        else:  # simple text search
            query = {
                "query": {
                    "dis_max": {
                        "queries": [
                            {
                                "term": {
                                    "_id": {
                                        "value": q,
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "_meta.slug": {
                                        "value": q,
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "match": {
                                    "info.title": {
                                        "query": q,
                                        "boost": 1.5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "servers.url": {
                                        "value": q,
                                        "boost": 1.1
                                    }
                                }
                            },
                            # ---------------------------------------------
                            {
                                "query_string": {
                                    "query": q
                                }
                            },  # base score
                            # ---------------------------------------------
                            {
                                "wildcard": {
                                    "info.title": {
                                        "value": q + "*",
                                        "boost": 0.8
                                    }
                                }
                            },
                            {
                                "wildcard": {
                                    "info.description": {
                                        "value": q + "*",
                                        "boost": 0.5
                                    }
                                }
                            },
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        search = search.params(rest_total_hits_as_int=True)
        search = search.source(exclude=['_raw'], include=options._source)

        if options.authors:  # '"Chunlei Wu"'
            search = search.filter('terms',
                                   info__contact__name__raw=options.authors)

        if options.tags:  # '"chemical", "drug"'
            search = search.filter('terms', tags__name__raw=options.tags)

        return search
示例#16
0
    def _extra_query_options(self, search, options):

        search = AsyncSearch().query(
            "function_score",
            query=search.query,
            functions=[
                {
                    "filter": {
                        "term": {
                            "name": "pseudogene"
                        }
                    },
                    "weight": "0.5"
                },  # downgrade
                {
                    "filter": {
                        "term": {
                            "taxid": 9606
                        }
                    },
                    "weight": "1.55"
                },
                {
                    "filter": {
                        "term": {
                            "taxid": 10090
                        }
                    },
                    "weight": "1.3"
                },
                {
                    "filter": {
                        "term": {
                            "taxid": 10116
                        }
                    },
                    "weight": "1.1"
                },
            ],
            score_mode="first")

        if options.entrezonly:
            search = search.filter('exists', field="entrezgene")
        if options.ensemblonly:
            search = search.filter('exists', field="ensembl.gene")

        if options.missing:
            for field in options.missing:
                search = search.exclude('exists', field=field)
        if options.exists:
            for field in options.exists:
                search = search.filter('exists', field=field)

        if options.species:
            if 'all' not in options.species:  # TODO
                search = search.filter('terms', taxid=options.species)
        if options.aggs and options.species_facet_filter:
            search = search.post_filter('terms',
                                        taxid=options.species_facet_filter)

        return search