Пример #1
0
    def build(self, q, options):
        '''
        Build a query according to q and options.
        This is the public method called by API handlers.

        Options:

            q: string query or queries
            scopes: fields to query q(s)

                _source: fields to return
                size: maximum number of hits to return
                from: starting index of result list to return
                sort: customized sort keys for result list
                explain: include es scoring information
                userquery: customized function to interpret q
                regexs: substitution groups to infer scopes

            aggs: customized aggregation string
            facet_size: maximum number of agg results

            * additional es keywords are passed through
              for example: 'explain', 'version' ...

        '''
        try:  # TODO clarify
            return self._build(q, options)
        except TypeError as exc:
            raise BadRequest(reason='TypeError', value=str(exc))
        except ValueError as exc:
            raise BadRequest(reason='ValueError', details=str(exc))
Пример #2
0
    def _extra_query_options(self, search, options):
        search = AsyncSearch().query(
            "function_score",
            query=search.query,
            functions=[
                {
                    "filter": {
                        "term": {
                            "taxid": 9606
                        }
                    },
                    "weight": "1.55"
                },  # human
                {
                    "filter": {
                        "term": {
                            "taxid": 10090
                        }
                    },
                    "weight": "1.3"
                },  # mouse
                {
                    "filter": {
                        "term": {
                            "taxid": 10116
                        }
                    },
                    "weight": "1.1"
                },  # rat
            ],
            score_mode="first")
        if options.species:
            if 'all' in options.species:
                pass
            elif not all(
                    isinstance(string, str) for string in options.species):
                raise BadRequest(
                    reason="species must be strings or integer strings.")
            elif not all(string.isnumeric() for string in options.species):
                raise BadRequest(reason="cannot map some species to taxids.")
            else:
                search = search.filter('terms', taxid=options.species)
            if options.aggs and options.species_facet_filter:
                search = search.post_filter('terms',
                                            taxid=options.species_facet_filter)

        if options.source:
            if 'all' in options.source:
                pass
            elif not all(isinstance(src, str) for src in options.source):
                raise BadRequest(reason="source must be strings.")
            else:
                search = search.filter('terms', source=options.source)

            if options.aggs and options.source_facet_filter:
                search = search.post_filter('terms',
                                            source=options.source_facet_filter)

        return search
Пример #3
0
 def build(self, q, options):
     '''
     Build the corresponding query.
     '''
     try:  # TODO clarify
         return self._build(q, options)
     except TypeError as exc:
         raise BadRequest(reason='TypeError', value=str(exc))
     except ValueError as exc:
         raise BadRequest(reason='ValueError', details=str(exc))
Пример #4
0
    async def execute(self, query, options):
        '''
        Execute the corresponding query. Must return an awaitable.
        May override to add more. Handle uncaught exceptions.

        Options:
            Required: either an es-dsl query object or scroll_id
            Optional:
                fetch_all: also return a scroll_id for this query (default: false)
                biothing_type: which type's corresponding indices to query (default in config.py)
        '''
        if options.scroll_id:
            try:
                res = await self.client.scroll(
                    scroll_id=options.scroll_id,
                    scroll=self.scroll_time)
            except ConnectionError:
                raise HTTPError(503)
            except (NotFoundError, RequestError, TransportError):
                raise BadRequest(reason="Invalid or stale scroll_id.")
            else:
                if not res['hits']['hits']:
                    raise EndRequest(reason="No more results to return.")
                return res

        if query:
            biothing_type = options.get('biothing_type', None) or self.default_type
            query = query.index(self.indices.get(biothing_type, self.default_index))

            if options.get('fetch_all', False):
                query = query.params(scroll=self.scroll_time)
                query = query.extra(size=self.scroll_size)
            try:
                res = await query.using(self.client).execute()
            except (ConnectionError, ConnectionTimeout):
                raise HTTPError(503)
            except RequestError as exc:
                raise BadRequest(_es_error=exc)
            except TransportError as exc:
                if exc.error == 'search_phase_execution_exception':
                    raise EndRequest(500, reason=exc.info)
                elif exc.error == 'index_not_found_exception':
                    raise HTTPError(500, reason=exc.error)
                elif exc.status_code == 'N/A':
                    raise HTTPError(503)
                else:  # unexpected
                    raise
            else:  # format to {} or [{}...]
                if isinstance(res, list):
                    return [res_.to_dict() for res_ in res]
                return res.to_dict()

        return asyncio.sleep(0, {})
Пример #5
0
    def build(self, q, options):
        """
        Build a query according to q and options.
        This is the public method called by API handlers.

        Options:

            q: string query or queries
            scopes: fields to query q(s)

                _source: fields to return
                size: maximum number of hits to return
                from: starting index of result list to return
                sort: customized sort keys for result list
                explain: include es scoring information
                userquery: customized function to interpret q
                regexs: substitution groups to infer scopes

            aggs: customized aggregation string
            facet_size: maximum number of agg results

            * additional es keywords are passed through
              for example: 'explain', 'version' ...

        """
        try:
            # process single q vs list of q(s).
            # dispatch 'val' vs 'key:val' to corresponding functions.

            if options.scopes is not None:
                build_query = self._build_match_query
            else:  # no scopes, only q
                build_query = self._build_string_query

            if isinstance(q, list):
                if not q:  # es cannot execute empty multisearch
                    raise ValueError("No search terms.")
                search = AsyncMultiSearch()
                for _q in q:
                    _search = build_query(_q, options)
                    _search = self._apply_extras(_search, options)
                    search = search.add(_search)
            else:  # str, int ...
                search = build_query(str(q), options)
                # pass through es query options. (from, size ...)
                search = self._apply_extras(search, options)

        except (TypeError, ValueError) as exc:
            raise BadRequest(reason=type(exc).__name__, details=str(exc))
        except IllegalOperation as exc:
            raise BadRequest(reason=str(exc))  # ex. sorting by -_score
        else:
            return search
Пример #6
0
    async def put(self, _id):
        """
        Add/Update the URL slug:
            PUT {"slug": "new_slug"}
        Remove a URL slug:
            PUT {"slug": "" }
        Refresh a document:
            PUT {}
        """

        try:
            smartapi = SmartAPI.get(_id)
        except NotFoundError:
            raise HTTPError(404)

        if smartapi.username != self.current_user['login']:
            raise HTTPError(403)

        if self.args.slug is not None:

            if self.args.slug in {'api'}:  #reserved
                raise BadRequest(details='slug is reserved')

            try:  # update slug
                smartapi.slug = self.args.slug or None
                smartapi.save()

            except (ControllerError, ValueError) as err:
                raise BadRequest(details=str(err)) from err

            self.finish({'success': True})

        else:  # refresh
            file = await download_async(smartapi.url, raise_error=False)
            code = smartapi.refresh(file)
            smartapi.save()

            try:
                status = smartapi.webdoc.STATUS(code)
                status = status.name.lower()
            except ValueError:
                status = 'nofile'  # keep the original copy

            self.finish({
                'success': code in (200, 299),
                'status': status,
                'code': code
            })
Пример #7
0
    async def execute_pipeline(self, *args, **kwargs):

        try:

            graph_query = GraphQuery.from_dict(self.args_json)
            es_query = self._to_es_query(graph_query)

            if graph_query.can_reverse():
                graph_query.reverse()
                es_query_rev = self._to_es_query(graph_query)
                es_query = es_query | es_query_rev

            # it's sent in one query so that parameters like size is still meaningful
            _query = AsyncSearch().query(es_query)
            _res = await self.pipeline.execute(_query, dotdict())
            res = self.pipeline.transform(_res, dotdict())

            # TODO additional transformation, like double reversal in result.

        except GraphObjectError as exc:
            raise BadRequest(reason=str(exc))

        except Exception as exc:
            raise HTTPError(str(exc))

        self.finish(res)
Пример #8
0
    async def get(self):

        if self.request.body:
            raise BadRequest(details="GET takes no request body.")

        raw = await self.download(self.args.url)
        self.validate(raw)
Пример #9
0
    async def download(self, url):

        try:
            file = await download_async(url)
        except DownloadError as err:
            raise BadRequest(details=str(err))
        else:  # other file info irrelevent for validation
            return file.raw
Пример #10
0
    async def post(self):
        """
        Add an API document
        """

        if SmartAPI.find(self.args.url, "url"):
            raise HTTPError(409)

        try:
            file = await download_async(self.args.url)
        except DownloadError as err:
            raise BadRequest(details=str(err)) from err

        try:
            smartapi = SmartAPI(self.args.url)
            smartapi.raw = file.raw
            smartapi.validate()
        except (ControllerError, AssertionError) as err:
            raise BadRequest(details=str(err)) from err

        if self.args.dryrun:
            raise Finish({
                'success':
                True,
                'details':
                f"[Dryrun] Valid {smartapi.version} Metadata"
            })

        try:
            smartapi.username = self.current_user['login']
            smartapi.refresh(file)  # populate webdoc meta
            _id = smartapi.save()
        except ControllerError as err:
            raise BadRequest(details=str(err)) from err
        else:
            self.finish({'success': True, '_id': _id})
            await self._notify(smartapi)
Пример #11
0
    def _extra_query_options(self, search, options):

        search = AsyncSearch().query(
            "function_score",
            query=search.query,
            functions=[
                {"filter": {"term": {"name": "pseudogene"}}, "weight": "0.5"},  # downgrade
                {"filter": {"term": {"taxid": 9606}}, "weight": "1.55"},
                {"filter": {"term": {"taxid": 10090}}, "weight": "1.3"},
                {"filter": {"term": {"taxid": 10116}}, "weight": "1.1"},
            ], score_mode="first")

        if options.entrezonly:
            search = search.filter('exists', field="entrezgene")
        if options.ensemblonly:
            search = search.filter('exists', field="ensembl.gene")

        if options.missing:
            for field in options.missing:
                search = search.exclude('exists', field=field)
        if options.exists:
            for field in options.exists:
                search = search.filter('exists', field=field)

        if options.species:
            if 'all' in options.species:
                pass  # do not apply any filters
            elif not all(isinstance(string, str) for string in options.species):
                raise BadRequest(reason="species must be strings or integer strings.")
            elif not all(string.isnumeric() for string in options.species):
                raise BadRequest(reason="cannot map some species to taxids.")
            else:  # filter by taxid numeric strings
                search = search.filter('terms', taxid=options.species)
        if options.aggs and options.species_facet_filter:
            search = search.post_filter('terms', taxid=options.species_facet_filter)

        return search
Пример #12
0
    def validate(self, raw):

        try:
            smartapi = SmartAPI(SmartAPI.VALIDATION_ONLY)
            smartapi.raw = raw
            smartapi.validate()

        except (ControllerError, AssertionError) as err:
            raise BadRequest(details=str(err))
        else:
            self.finish({
                'success':
                True,
                'details':
                f'valid SmartAPI ({smartapi.version}) metadata.'
            })
Пример #13
0
    def delete(self, _id):
        """
        Delete API
        """

        try:
            smartapi = SmartAPI.get(_id)
        except NotFoundError:
            raise HTTPError(404)

        if smartapi.username != self.current_user['login']:
            raise HTTPError(403)

        try:
            _id = smartapi.delete()
        except ControllerError as err:
            raise BadRequest(details=str(err)) from err

        self.finish({'success': True, '_id': _id})