def before_search(self,search_params): if 'extras' in search_params and 'ext_bbox' in search_params['extras'] \ and search_params['extras']['ext_bbox']: bbox = validate_bbox(search_params['extras']['ext_bbox']) if not bbox: raise SearchError('Wrong bounding box provided') if search_params.get('sort') == 'spatial desc': if search_params.get('q') or search_params.get('fq'): raise SearchError('Spatial ranking cannot be mixed with other search parameters') # ...because it is too inefficient to use SOLR to filter # results and return the entire set to this class and # after_search do the sorting and paging. extents = bbox_query_ordered(bbox) are_no_results = not extents search_params['extras']['ext_rows'] = search_params.get('rows', 50) search_params['extras']['ext_start'] = search_params.get('start', 0) # this SOLR query needs to return no actual results since # they are in the wrong order anyway. We just need this SOLR # query to get the count and facet counts. rows = 0 search_params['sort'] = None # SOLR should not sort. # Store the rankings of the results for this page, so for # after_search to construct the correctly sorted results rows = search_params['extras']['ext_rows'] = search_params.get('rows', 50) start = search_params['extras']['ext_start'] = search_params.get('start', 0) search_params['extras']['ext_spatial'] = [ (extent.package_id, extent.spatial_ranking) \ for extent in extents[start:start+rows]] else: extents = bbox_query(bbox) are_no_results = extents.count() == 0 if are_no_results: # We don't need to perform the search search_params['abort_search'] = True else: # We'll perform the existing search but also filtering by the ids # of datasets within the bbox bbox_query_ids = [extent.package_id for extent in extents] q = search_params.get('q','').strip() or '""' new_q = '%s AND ' % q if q else '' new_q += '(%s)' % ' OR '.join(['id:%s' % id for id in bbox_query_ids]) search_params['q'] = new_q return search_params
def before_search(self, search_params): if 'extras' in search_params and 'ext_bbox' in search_params['extras'] \ and search_params['extras']['ext_bbox']: bbox = validate_bbox(search_params['extras']['ext_bbox']) if not bbox: raise SearchError('Wrong bounding box provided') extents = bbox_query(bbox) if extents.count() == 0: # We don't need to perform the search search_params['abort_search'] = True else: # We'll perform the existing search but also filtering by the ids # of datasets within the bbox bbox_query_ids = [extent.package_id for extent in extents] q = search_params.get('q', '') new_q = '%s AND ' % q if q else '' new_q += '(%s)' % ' OR '.join( ['id:%s' % id for id in bbox_query_ids]) search_params['q'] = new_q return search_params
def before_search(self, search_params): from ckanext.spatial.lib import validate_bbox from ckan.lib.search import SearchError if search_params.get('extras', None) and search_params['extras'].get( 'ext_bbox', None): bbox = validate_bbox(search_params['extras']['ext_bbox']) if not bbox: raise SearchError('Wrong bounding box provided') # Adjust easting values while (bbox['minx'] < -180): bbox['minx'] += 360 bbox['maxx'] += 360 while (bbox['minx'] > 180): bbox['minx'] -= 360 bbox['maxx'] -= 360 if self.search_backend == 'solr': search_params = self._params_for_solr_search( bbox, search_params) elif self.search_backend == 'solr-spatial-field': search_params = self._params_for_solr_spatial_field_search( bbox, search_params) elif self.search_backend == 'postgis': search_params = self._params_for_postgis_search( bbox, search_params) return search_params
def edc_package_update(context, input_data_dict): ''' Find a package, from the given object_name, and update it with the given fields. 1) Call __package_search to find the package 2) Check the results (success == true), (count==1) 3) Modify the data 4) Call get_action(package_update) to update the package ''' from ckan.lib.search import SearchError # first, do the search q = 'object_name:' + input_data_dict.get("object_name") fq = '' offset = 0 limit = 2 sort = 'metadata_modified desc' try: data_dict = { 'q': q, 'fq': fq, 'start': offset, 'rows': limit, 'sort': sort } # Use package_search to filter the list query = get_action('package_search')(context, data_dict) except SearchError, se: log.error('Search error : %s', str(se)) raise SearchError(str(se))
def before_search(self, search_params): search_params_modified = copy.deepcopy(search_params) if 'extras' in search_params: extras = search_params['extras'] begin_time = extras.get('ext_timerange_start') end_time = extras.get('ext_timerange_end') # temporal handling # if both begin and end time are none, no search window was provided if begin_time is None and end_time is None: return search_params else: try: log.debug(begin_time) convert_begin = convert_date(begin_time) log.debug(convert_begin) log.debug(end_time) convert_end = convert_date(end_time) log.debug(convert_end) except pendulum.parsing.exceptions.ParserError: log.exception("Error while parsing begin/end time") raise SearchError("Cannot parse provided time") log.debug(search_params) # fq should be defined in query params, but just in case, use .get # defaulting to empty string fq_contents = search_params.get('fq', '') fq_modified = ("{} +temporal_extent:[{} TO {}]".format( fq_contents, convert_begin, convert_end)) search_params_modified['fq'] = fq_modified log.debug(search_params_modified) return search_params_modified
def _params_for_postgis_search(self, bbox, search_params): from ckanext.spatial.lib import bbox_query, bbox_query_ordered from ckan.lib.search import SearchError # Note: This will be deprecated at some point in favour of the # Solr 4 spatial sorting capabilities if search_params.get('sort') == 'spatial desc' and \ tk.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')): if search_params['q'] or search_params['fq']: raise SearchError( 'Spatial ranking cannot be mixed with other search parameters' ) # ...because it is too inefficient to use SOLR to filter # results and return the entire set to this class and # after_search do the sorting and paging. extents = bbox_query_ordered(bbox) are_no_results = not extents search_params['extras']['ext_rows'] = search_params['rows'] search_params['extras']['ext_start'] = search_params['start'] # this SOLR query needs to return no actual results since # they are in the wrong order anyway. We just need this SOLR # query to get the count and facet counts. rows = 0 search_params['sort'] = None # SOLR should not sort. # Store the rankings of the results for this page, so for # after_search to construct the correctly sorted results rows = search_params['extras']['ext_rows'] = search_params['rows'] start = search_params['extras']['ext_start'] = search_params[ 'start'] search_params['extras']['ext_spatial'] = [ (extent.package_id, extent.spatial_ranking) \ for extent in extents[start:start+rows]] else: extents = bbox_query(bbox) are_no_results = extents.count() == 0 if are_no_results: # We don't need to perform the search search_params['abort_search'] = True else: # We'll perform the existing search but also filtering by the ids # of datasets within the bbox bbox_query_ids = [extent.package_id for extent in extents] q = search_params.get('q', '').strip() or '""' # Note: `"" AND` query doesn't work in github ci new_q = '%s AND ' % q if q and q != '""' else '' new_q += '(%s)' % ' OR '.join( ['id:%s' % id for id in bbox_query_ids]) search_params['q'] = new_q return search_params
def resource_search(context, data_dict): model = context['model'] session = context['session'] fields = data_dict['fields'] order_by = data_dict.get('order_by') offset = data_dict.get('offset') limit = data_dict.get('limit') # TODO: should we check for user authentication first? q = model.Session.query(model.Resource) resource_fields = model.Resource.get_columns() for field, terms in fields.items(): if isinstance(terms, basestring): terms = terms.split() if field not in resource_fields: raise SearchError('Field "%s" not recognised in Resource search.' % field) for term in terms: model_attr = getattr(model.Resource, field) if field == 'hash': q = q.filter(model_attr.ilike(unicode(term) + '%')) elif field in model.Resource.get_extra_columns(): model_attr = getattr(model.Resource, 'extras') like = or_( model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)), model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))) q = q.filter(like) else: q = q.filter(model_attr.ilike('%' + unicode(term) + '%')) if order_by is not None: if hasattr(model.Resource, order_by): q = q.order_by(getattr(model.Resource, order_by)) count = q.count() q = q.offset(offset) q = q.limit(limit) results = [] for result in q: if isinstance(result, tuple) and isinstance(result[0], model.DomainObject): # This is the case for order_by rank due to the add_column. results.append(result[0]) else: results.append(result) return {'count': count, 'results': results}
def before_search(self, search_params): if search_params.get('extras', None) and search_params['extras'].get('ext_bbox', None): bbox = validate_bbox(search_params['extras']['ext_bbox']) if not bbox: raise SearchError('Wrong bounding box provided') if self.search_backend == 'solr': search_params = self._params_for_solr_search(bbox, search_params) elif self.search_backend == 'solr-spatial-field': search_params = self._params_for_solr_spatial_field_search(bbox, search_params) elif self.search_backend == 'postgis': search_params = self._params_for_postgis_search(bbox, search_params) return search_params
def edc_package_update_bcgw(context, input_data_dict): ''' Find a package, from the given object_name, and update it with the given fields. 1) Call __package_search to find the package 2) Check the results (success == true), (count==1) 3) Modify the data 4) Call get_action(package_update) to update the package ''' from ckan.lib.search import SearchError ''' Fixed unicode characters decoding problem. ''' import json input_dict_str = json.dumps(input_data_dict, ensure_ascii=False) input_data_dict = json.loads(input_dict_str, encoding="cp1252") update = {} # first, do the search q = 'object_name:' + input_data_dict.get("object_name") fq = '' offset = 0 limit = 2 sort = 'metadata_modified desc' try: data_dict = { 'q': q, 'fq': fq, 'start': offset, 'rows': limit, 'sort': sort } # Use package_search to filter the list query = get_action('package_search')(context, data_dict) except SearchError, se: log.error('Search error : %s', str(se)) raise SearchError(str(se))
def reverse_apicontroller_action(status, response): """ Make an API call look like a direct action call by reversing the exception -> HTTP response translation that ApiController.action does """ try: parsed = json.loads(response) if parsed.get('success'): return parsed if hasattr(parsed, 'get'): err = parsed.get('error', {}) else: err = {} except ValueError: err = {} etype = err.get('__type') emessage = err.get('message', '').split(': ', 1)[-1] if etype == 'Search Query Error': # I refuse to eval(emessage), even if it would be more correct raise SearchQueryError(emessage) elif etype == 'Search Error': # I refuse to eval(emessage), even if it would be more correct raise SearchError(emessage) elif etype == 'Search Index Error': raise SearchIndexError(emessage) elif etype == 'Parameter Error': raise ParameterError(emessage) elif etype == 'Validation Error': raise ValidationError(err) elif etype == 'Not Found Error': raise NotFound(emessage) elif etype == 'Authorization Error': raise NotAuthorized() # don't recognize the error raise CKANAPIError(response, status)
def before_view(self, pkg_dict): """ Extend the group controller to show resource information The resource information will come from elastic search """ # use r as query string q = c.q = request.params.get( 'q', default=None) # unicode format (decoded from utf8) page = c.page = request.params.get('page', default=None) # TODO: put this as a parameter rows = 20 if page is None: start = 0 else: # Start with the first element in this page start = ((int(page) * rows) - rows) # format q to send to elastic search if q is None or q is '*:*': query = { "sort": { "data": { "order": "desc" }, }, "query": { "match_all": {} }, "size": rows, "from": start } else: query = { "sort": { "data": { "order": "desc" }, }, "query": { "query_string": { "query": q, "default_operator": "AND" } }, "size": rows, "from": start } # Now send query to elastic search self._load_elastic_config() client = DataStoreClient(urlparse.urljoin(self.url, pkg_dict['name'])) headers = dict() headers['Authorization'] = self.user.get('apikey') #req = urllib2.Request(webstore_request_url, post_data, headers) client._headers = headers # do not fail on search errors try: response = client.query(query) except: # there's an error in search params import traceback response = dict() errmsg = 'Error searching query string \n %s \n Message\n%s' % ( query, traceback.format_exc()) log.error(errmsg) raise SearchError(errmsg) # Now we have to parse the result back to package dict hits = response.get('hits') resources = list() if hits is not None: for res in hits['hits']: # Store it in extras resources.append(res['_source']) # Add a new field on pkg_dict pkg_dict['elastic_resources'] = resources pkg_dict['elastic_hits'] = hits.get('total') else: # Add a new field on pkg_dict pkg_dict['elastic_resources'] = dict() pkg_dict['elastic_hits'] = 0 return pkg_dict