def build(self, q, options): ''' Build a query according to q and options. This is the public method called by API handlers. Options: q: string query or queries scopes: fields to query q(s) _source: fields to return size: maximum number of hits to return from: starting index of result list to return sort: customized sort keys for result list explain: include es scoring information userquery: customized function to interpret q regexs: substitution groups to infer scopes aggs: customized aggregation string facet_size: maximum number of agg results * additional es keywords are passed through for example: 'explain', 'version' ... ''' try: # TODO clarify return self._build(q, options) except TypeError as exc: raise BadRequest(reason='TypeError', value=str(exc)) except ValueError as exc: raise BadRequest(reason='ValueError', details=str(exc))
def _extra_query_options(self, search, options): search = AsyncSearch().query( "function_score", query=search.query, functions=[ { "filter": { "term": { "taxid": 9606 } }, "weight": "1.55" }, # human { "filter": { "term": { "taxid": 10090 } }, "weight": "1.3" }, # mouse { "filter": { "term": { "taxid": 10116 } }, "weight": "1.1" }, # rat ], score_mode="first") if options.species: if 'all' in options.species: pass elif not all( isinstance(string, str) for string in options.species): raise BadRequest( reason="species must be strings or integer strings.") elif not all(string.isnumeric() for string in options.species): raise BadRequest(reason="cannot map some species to taxids.") else: search = search.filter('terms', taxid=options.species) if options.aggs and options.species_facet_filter: search = search.post_filter('terms', taxid=options.species_facet_filter) if options.source: if 'all' in options.source: pass elif not all(isinstance(src, str) for src in options.source): raise BadRequest(reason="source must be strings.") else: search = search.filter('terms', source=options.source) if options.aggs and options.source_facet_filter: search = search.post_filter('terms', source=options.source_facet_filter) return search
def build(self, q, options): ''' Build the corresponding query. ''' try: # TODO clarify return self._build(q, options) except TypeError as exc: raise BadRequest(reason='TypeError', value=str(exc)) except ValueError as exc: raise BadRequest(reason='ValueError', details=str(exc))
async def execute(self, query, options): ''' Execute the corresponding query. Must return an awaitable. May override to add more. Handle uncaught exceptions. Options: Required: either an es-dsl query object or scroll_id Optional: fetch_all: also return a scroll_id for this query (default: false) biothing_type: which type's corresponding indices to query (default in config.py) ''' if options.scroll_id: try: res = await self.client.scroll( scroll_id=options.scroll_id, scroll=self.scroll_time) except ConnectionError: raise HTTPError(503) except (NotFoundError, RequestError, TransportError): raise BadRequest(reason="Invalid or stale scroll_id.") else: if not res['hits']['hits']: raise EndRequest(reason="No more results to return.") return res if query: biothing_type = options.get('biothing_type', None) or self.default_type query = query.index(self.indices.get(biothing_type, self.default_index)) if options.get('fetch_all', False): query = query.params(scroll=self.scroll_time) query = query.extra(size=self.scroll_size) try: res = await query.using(self.client).execute() except (ConnectionError, ConnectionTimeout): raise HTTPError(503) except RequestError as exc: raise BadRequest(_es_error=exc) except TransportError as exc: if exc.error == 'search_phase_execution_exception': raise EndRequest(500, reason=exc.info) elif exc.error == 'index_not_found_exception': raise HTTPError(500, reason=exc.error) elif exc.status_code == 'N/A': raise HTTPError(503) else: # unexpected raise else: # format to {} or [{}...] if isinstance(res, list): return [res_.to_dict() for res_ in res] return res.to_dict() return asyncio.sleep(0, {})
def build(self, q, options): """ Build a query according to q and options. This is the public method called by API handlers. Options: q: string query or queries scopes: fields to query q(s) _source: fields to return size: maximum number of hits to return from: starting index of result list to return sort: customized sort keys for result list explain: include es scoring information userquery: customized function to interpret q regexs: substitution groups to infer scopes aggs: customized aggregation string facet_size: maximum number of agg results * additional es keywords are passed through for example: 'explain', 'version' ... """ try: # process single q vs list of q(s). # dispatch 'val' vs 'key:val' to corresponding functions. if options.scopes is not None: build_query = self._build_match_query else: # no scopes, only q build_query = self._build_string_query if isinstance(q, list): if not q: # es cannot execute empty multisearch raise ValueError("No search terms.") search = AsyncMultiSearch() for _q in q: _search = build_query(_q, options) _search = self._apply_extras(_search, options) search = search.add(_search) else: # str, int ... search = build_query(str(q), options) # pass through es query options. (from, size ...) search = self._apply_extras(search, options) except (TypeError, ValueError) as exc: raise BadRequest(reason=type(exc).__name__, details=str(exc)) except IllegalOperation as exc: raise BadRequest(reason=str(exc)) # ex. sorting by -_score else: return search
async def put(self, _id): """ Add/Update the URL slug: PUT {"slug": "new_slug"} Remove a URL slug: PUT {"slug": "" } Refresh a document: PUT {} """ try: smartapi = SmartAPI.get(_id) except NotFoundError: raise HTTPError(404) if smartapi.username != self.current_user['login']: raise HTTPError(403) if self.args.slug is not None: if self.args.slug in {'api'}: #reserved raise BadRequest(details='slug is reserved') try: # update slug smartapi.slug = self.args.slug or None smartapi.save() except (ControllerError, ValueError) as err: raise BadRequest(details=str(err)) from err self.finish({'success': True}) else: # refresh file = await download_async(smartapi.url, raise_error=False) code = smartapi.refresh(file) smartapi.save() try: status = smartapi.webdoc.STATUS(code) status = status.name.lower() except ValueError: status = 'nofile' # keep the original copy self.finish({ 'success': code in (200, 299), 'status': status, 'code': code })
async def execute_pipeline(self, *args, **kwargs): try: graph_query = GraphQuery.from_dict(self.args_json) es_query = self._to_es_query(graph_query) if graph_query.can_reverse(): graph_query.reverse() es_query_rev = self._to_es_query(graph_query) es_query = es_query | es_query_rev # it's sent in one query so that parameters like size is still meaningful _query = AsyncSearch().query(es_query) _res = await self.pipeline.execute(_query, dotdict()) res = self.pipeline.transform(_res, dotdict()) # TODO additional transformation, like double reversal in result. except GraphObjectError as exc: raise BadRequest(reason=str(exc)) except Exception as exc: raise HTTPError(str(exc)) self.finish(res)
async def get(self): if self.request.body: raise BadRequest(details="GET takes no request body.") raw = await self.download(self.args.url) self.validate(raw)
async def download(self, url): try: file = await download_async(url) except DownloadError as err: raise BadRequest(details=str(err)) else: # other file info irrelevent for validation return file.raw
async def post(self): """ Add an API document """ if SmartAPI.find(self.args.url, "url"): raise HTTPError(409) try: file = await download_async(self.args.url) except DownloadError as err: raise BadRequest(details=str(err)) from err try: smartapi = SmartAPI(self.args.url) smartapi.raw = file.raw smartapi.validate() except (ControllerError, AssertionError) as err: raise BadRequest(details=str(err)) from err if self.args.dryrun: raise Finish({ 'success': True, 'details': f"[Dryrun] Valid {smartapi.version} Metadata" }) try: smartapi.username = self.current_user['login'] smartapi.refresh(file) # populate webdoc meta _id = smartapi.save() except ControllerError as err: raise BadRequest(details=str(err)) from err else: self.finish({'success': True, '_id': _id}) await self._notify(smartapi)
def _extra_query_options(self, search, options): search = AsyncSearch().query( "function_score", query=search.query, functions=[ {"filter": {"term": {"name": "pseudogene"}}, "weight": "0.5"}, # downgrade {"filter": {"term": {"taxid": 9606}}, "weight": "1.55"}, {"filter": {"term": {"taxid": 10090}}, "weight": "1.3"}, {"filter": {"term": {"taxid": 10116}}, "weight": "1.1"}, ], score_mode="first") if options.entrezonly: search = search.filter('exists', field="entrezgene") if options.ensemblonly: search = search.filter('exists', field="ensembl.gene") if options.missing: for field in options.missing: search = search.exclude('exists', field=field) if options.exists: for field in options.exists: search = search.filter('exists', field=field) if options.species: if 'all' in options.species: pass # do not apply any filters elif not all(isinstance(string, str) for string in options.species): raise BadRequest(reason="species must be strings or integer strings.") elif not all(string.isnumeric() for string in options.species): raise BadRequest(reason="cannot map some species to taxids.") else: # filter by taxid numeric strings search = search.filter('terms', taxid=options.species) if options.aggs and options.species_facet_filter: search = search.post_filter('terms', taxid=options.species_facet_filter) return search
def validate(self, raw): try: smartapi = SmartAPI(SmartAPI.VALIDATION_ONLY) smartapi.raw = raw smartapi.validate() except (ControllerError, AssertionError) as err: raise BadRequest(details=str(err)) else: self.finish({ 'success': True, 'details': f'valid SmartAPI ({smartapi.version}) metadata.' })
def delete(self, _id): """ Delete API """ try: smartapi = SmartAPI.get(_id) except NotFoundError: raise HTTPError(404) if smartapi.username != self.current_user['login']: raise HTTPError(403) try: _id = smartapi.delete() except ControllerError as err: raise BadRequest(details=str(err)) from err self.finish({'success': True, '_id': _id})