示例#1
0
    def default_string_query(self, q, options):
        search = AsyncSearch()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # elasticsearch default
            query = {
                "query": {
                    "function_score": {
                        "query": {"dis_max": {"queries": [
                            {"term": {"_id": {"value": q, "boost": 15.0}}},
                            {"term": {"label.raw": {"value": q, "boost": 10.0}}},
                            {"term": {"_meta.username": {"value": q}}},  # for dataset
                            {"term": {"name": {"value": q}}},
                            {"match": {"parent_classes": {"query": q}}},
                            {"prefix": {"label": {"value": q}}},
                            {"query_string": {"query": q}}
                        ]}},
                        "functions": [
                            {"filter": {"term": {"namespace": "schema"}}, "weight": 0.5},
                            {"filter": {"term": {"prefix.raw": "schema"}}, "weight": 0.5},
                            {"filter": {"match": {"parent_classes": "bts:BiologicalEntity"}}, "weight": 1.5}
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        search = search.params(rest_total_hits_as_int=True)
        return search
示例#2
0
    def build_string_query(self, q, options):
        """ q + options -> query object

            options:
                userquery
        """
        assert isinstance(q, str)
        search = AsyncSearch()
        userquery = options.userquery or ''

        if self.user_query.has_query(userquery):
            userquery_ = self.user_query.get_query(userquery, q=q)
            search = search.query(userquery_)

        elif q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # customization here
            search = self.default_string_query(q, options)

        if self.user_query.has_filter(userquery):
            userfilter = self.user_query.get_filter(userquery)
            search = search.filter(userfilter)

        return search
示例#3
0
    def _build_string_query(self, q, options):
        """ q + options -> query object

            options:
                userquery
        """
        assert isinstance(q, str)
        search = AsyncSearch()
        userquery = options.userquery or ''

        if not q:  # same empty q behavior as that of ES.
            search = search.query("match_none")

        elif self.user_query.has_query(userquery):
            userquery_ = self.user_query.get_query(userquery, q=q)
            search = search.query(userquery_)

        else:  # customization here
            search = self.default_string_query(q, options)

        if self.user_query.has_filter(userquery):
            userfilter = self.user_query.get_filter(userquery)
            search = search.filter(userfilter)

        return search
示例#4
0
    def build_lineage_query(_id, options):

        search = AsyncSearch()
        search = search.query('match', lineage=_id)
        if options.has_gene:
            search = search.query('match', has_gene=options.has_gene)

        max_taxid_count = 10000
        search = search.params(size=max_taxid_count)
        search = search.params(_source='_id')
        
        return search
示例#5
0
    def default_string_query(self, q, options):
        """
        Override this to customize default string query.
        By default it implements a query string query.
        """
        search = AsyncSearch()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        else:  # elasticsearch default
            search = search.query("query_string", query=str(q))

        return search
示例#6
0
    def default_string_query(self, q, options):

        search = AsyncSearch()

        # genomic interval query
        pattern = r'chr(?P<chrom>\w+):(?P<gstart>[0-9,]+)-(?P<gend>[0-9,]+)'
        match = re.search(pattern, q)

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        elif match:  # (chr, gstart, gend)
            d = match.groupdict()
            if q.startswith('hg19.'):
                # support hg19 for human (default is hg38)
                d['assembly'] = 'hg19'
            if q.startswith('mm9.'):
                # support mm9 for mouse (default is mm10)
                d['assembly'] = 'mm9'
            search = AsyncSearch().from_dict(interval(**d))

        # query_string query
        elif q.startswith('"') and q.endswith('"') or \
                any(map(q.__contains__, (':', '~', ' AND ', ' OR ', 'NOT '))):
            search = AsyncSearch().query(
                "query_string", query=q,
                default_operator="AND",
                auto_generate_phrase_queries=True)

        # wildcard query
        elif '*' in q or '?' in q:
            search = AsyncSearch().from_dict(wildcard(q))
        else:  # default query
            search = AsyncSearch().from_dict(dismax(q))

        search = self._extra_query_options(search, options)
        return search
示例#7
0
    def default_string_query(self, q, options):

        match = self._parse_interval_query(q)
        if match:  # interval query
            search = AsyncSearch()
            if match['query']:
                search = search.query("query_string", query=match['query'])
            search = search.filter('match', chrom=match['chr'])
            assembly = 'hg38' if options.assembly == 'hg38' else 'hg19'
            search = search.filter(
                'range', **{assembly + ".start": {
                    "lte": match['gend']
                }})
            search = search.filter(
                'range', **{assembly + ".end": {
                    "gte": match['gstart']
                }})

        else:  # default query
            search = AsyncSearch().query("query_string", query=q)

        return search
示例#8
0
    def default_string_query(self, q, options):

        search = AsyncSearch()
        q = q.strip()

        if q == '__all__':
            search = search.query()

        elif q == '__any__' and self.allow_random_query:
            search = search.query('function_score', random_score={})

        # elasticsearch query string syntax
        elif ":" in q or " AND " in q or " OR " in q:
            search = search.query('query_string', query=q)

        # term search
        elif q.startswith('"') \
                and q.endswith('"'):
            query = {
                "query": {
                    "dis_max": {
                        "queries": [
                            {
                                "term": {
                                    "_id": {
                                        "value": q.strip('"'),
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "_meta.slug": {
                                        "value": q.strip('"'),
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "match": {
                                    "info.title": {
                                        "query": q,
                                        "boost": 1.5,
                                        "operator": "AND"
                                    }
                                }
                            },
                            {
                                "query_string": {
                                    "query": q,
                                    "default_operator": "AND"
                                }
                            }  # base score
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        else:  # simple text search
            query = {
                "query": {
                    "dis_max": {
                        "queries": [
                            {
                                "term": {
                                    "_id": {
                                        "value": q,
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "_meta.slug": {
                                        "value": q,
                                        "boost": 5
                                    }
                                }
                            },
                            {
                                "match": {
                                    "info.title": {
                                        "query": q,
                                        "boost": 1.5
                                    }
                                }
                            },
                            {
                                "term": {
                                    "servers.url": {
                                        "value": q,
                                        "boost": 1.1
                                    }
                                }
                            },
                            # ---------------------------------------------
                            {
                                "query_string": {
                                    "query": q
                                }
                            },  # base score
                            # ---------------------------------------------
                            {
                                "wildcard": {
                                    "info.title": {
                                        "value": q + "*",
                                        "boost": 0.8
                                    }
                                }
                            },
                            {
                                "wildcard": {
                                    "info.description": {
                                        "value": q + "*",
                                        "boost": 0.5
                                    }
                                }
                            },
                        ]
                    }
                }
            }
            search = AsyncSearch()
            search = search.update_from_dict(query)

        search = search.params(rest_total_hits_as_int=True)
        search = search.source(exclude=['_raw'], include=options._source)

        if options.authors:  # '"Chunlei Wu"'
            search = search.filter('terms',
                                   info__contact__name__raw=options.authors)

        if options.tags:  # '"chemical", "drug"'
            search = search.filter('terms', tags__name__raw=options.tags)

        return search