def find_entity(self, source_type, type, name, **filters): """ GET /api/v3/entities?query=((sourceType:<source_type>)AND(type:<type>)AND(originalName:<name>)) http://cloudera.github.io/navigator/apidocs/v3/path__v3_entities.html """ try: params = self.__params query_filters = { 'sourceType': source_type, 'type': type, 'originalName': name, 'deleted': 'false' } if get_navigator_hue_server_name(): query_filters['clusterName'] = get_navigator_hue_server_name() for key, value in filters.items(): query_filters[key] = value filter_query = 'AND'.join('(%s:%s)' % (key, value) for key, value in query_filters.items()) params += ( ('query', filter_query), ('offset', 0), ( 'limit', 2 ), # We are looking for single entity, so limit to 2 to check for multiple results ) response = self._root.get('entities', headers=self.__headers, params=params) if not response: raise NavigatorApiException( 'Could not find entity with query filters: %s' % str(query_filters)) elif len(response) > 1: raise NavigatorApiException( 'Found more than 1 entity with query filters: %s' % str(query_filters)) return response[0] except RestException, e: msg = 'Failed to find entity: %s' % str(e) LOG.exception(msg) raise NavigatorApiException(msg)
def get_cluster_source_ids(api): ''' ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName. ''' cluster_source_ids = cache.get(CLUSTER_SOURCE_IDS_CACHE_KEY) if cluster_source_ids is None: cluster_source_ids = '' if get_navigator_hue_server_name(): sources = api.get_cluster_source_ids() LOG.info('Navigator cluster source ids: %s' % (sources, )) if sources: # Sometimes sourceId seems to be missing source_ids = [ 'sourceId:%s' % (_id.get('sourceId') or _id.get('identity')) for _id in sources ] cluster_source_ids = '(' + ' OR '.join(source_ids) + ') AND ' else: # 0 means always false cluster_source_ids = 'sourceId:0 AND' cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids, 60 * 60 * 12) # 1/2 Day return cluster_source_ids
def get_cluster_source_ids(self): params = ( ('query', 'clusterName:"%s"' % get_navigator_hue_server_name()), ('limit', 200), ) LOG.info(params) return self._root.get('entities', headers=self.__headers, params=params)
def get_cluster_source_ids(api): ''' ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName. ''' global CLUSTER_SOURCE_IDS if CLUSTER_SOURCE_IDS is None: CLUSTER_SOURCE_IDS = '' if get_navigator_hue_server_name(): sources = api.get_cluster_source_ids() if sources: CLUSTER_SOURCE_IDS = '(' + ' OR '.join(['sourceId:%(sourceId)s' % _id for _id in api.get_cluster_source_ids()]) + ') AND ' else: CLUSTER_SOURCE_IDS = 'sourceId:0 AND' return CLUSTER_SOURCE_IDS
def test_search_entities(self): if get_navigator_hue_server_name(): cluster_filter = '(sourceId:1 OR sourceId:2) AND (%s)' else: cluster_filter = '%s' assert_equal( cluster_filter % '(((originalName:cases*^3)OR(originalDescription:cases*^1)OR(name:cases*^10)OR(description:cases*^3)OR(tags:cases*^5))AND((originalName:[* TO *])OR(originalDescription:[* TO *])OR(name:[* TO *])OR(description:[* TO *])OR(tags:[* TO *]))) AND (*) AND ((type:TABLE)OR(type:VIEW)) AND (sourceType:HIVE OR sourceType:IMPALA)', self.api.search_entities(query_s='cases', sources=['hive'])[0][1]) assert_equal( cluster_filter % '* AND ((type:FIELD*)) AND ((type:TABLE)OR(type:VIEW)OR(type:DATABASE)OR(type:PARTITION)OR(type:FIELD)) AND (sourceType:HIVE OR sourceType:IMPALA)', self.api.search_entities(query_s='type:FIELD', sources=['hive'])[0][1]) assert_equal( cluster_filter % '* AND ((type:\\{\\}\\(\\)\\[\\]*)) AND ((type:TABLE)OR(type:VIEW)OR(type:DATABASE)OR(type:PARTITION)OR(type:FIELD)) AND (sourceType:HIVE OR sourceType:IMPALA)', self.api.search_entities(query_s='type:{}()[]*', sources=['hive'])[0][1])
def get_cluster_source_ids(api): ''' ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName. ''' global CLUSTER_SOURCE_IDS if CLUSTER_SOURCE_IDS is None: CLUSTER_SOURCE_IDS = '' if get_navigator_hue_server_name(): sources = api.get_cluster_source_ids() if sources: # Sometimes sourceId seems to be missing source_ids = [ 'sourceId:%s' % _id.get('sourceId') or _id.get('identity') for _id in api.get_cluster_source_ids() ] CLUSTER_SOURCE_IDS = '(' + ' OR '.join(source_ids) + ') AND ' else: # 0 means always false CLUSTER_SOURCE_IDS = 'sourceId:0 AND' return CLUSTER_SOURCE_IDS
def search_entities(self, query_s, limit=100, offset=0, **filters): """ Solr edismax query parser syntax. :param query_s: a query string of search terms (e.g. - sales quarterly); Currently the search will perform an OR boolean search for all terms (split on whitespace), against a whitelist of search_fields. """ search_fields = ('originalName', 'originalDescription', 'name', 'description', 'tags') sources = filters.get('sources', []) default_entity_types, entity_types = self._get_types_from_sources( sources) try: params = self.__params search_terms = [term for term in query_s.strip().split()] query_clauses = [] user_filters = [] source_type_filter = [] for term in search_terms: if ':' not in term: query_clauses.append('OR'.join([ '(%s:*%s*)' % (field, term) for field in search_fields ])) else: name, val = term.split(':') if val: if name == 'type': term = '%s:%s' % (name, val.upper().strip('*')) default_entity_types = entity_types # Make sure type value still makes sense for the source user_filters.append( term + '*') # Manual filter allowed e.g. type:VIE* ca filter_query = '*' if query_clauses: filter_query = 'OR'.join( ['(%s)' % clause for clause in query_clauses]) user_filter_clause = 'OR '.join(['(%s)' % f for f in user_filters]) or '*' source_filter_clause = 'OR'.join([ '(%s:%s)' % ('type', entity_type) for entity_type in default_entity_types ]) if 's3' in sources: source_type_filter.append('sourceType:s3') filter_query = '%s AND (%s) AND (%s)' % ( filter_query, user_filter_clause, source_filter_clause) if source_type_filter: filter_query += ' AND (%s)' % 'OR '.join(source_type_filter) if get_navigator_hue_server_name(): filter_query += 'AND clusterName:%s' % get_navigator_hue_server_name( ) params += ( ('query', filter_query), ('offset', offset), ('limit', NAVIGATOR.FETCH_SIZE_SEARCH.get()), ) LOG.info(params) response = self._root.get('entities', headers=self.__headers, params=params) response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms return response except RestException, e: msg = 'Failed to search for entities with search query: %s' % query_s LOG.exception(msg) raise NavigatorApiException(msg)
def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None, facetRanges=None, filterQueries=None, firstClassEntitiesOnly=None, sources=None): try: pagination = { 'offset': offset, 'limit': NAVIGATOR.FETCH_SIZE_SEARCH_INTERACTIVE.get(), } entity_types = [] fq_type = [] if filterQueries is None: filterQueries = [] if sources: default_entity_types, entity_types = self._get_types_from_sources( sources) if 'hive' in sources or 'impala' in sources: fq_type = default_entity_types elif 'hdfs' in sources: fq_type = entity_types elif 's3' in sources: fq_type = default_entity_types filterQueries.append('sourceType:s3') if query_s.strip().endswith( 'type:*'): # To list all available types fq_type = entity_types search_terms = [term for term in query_s.strip().split() ] if query_s else [] query = [] for term in search_terms: if ':' not in term: query.append(term) else: name, val = term.split(':') if val: # Allow to type non default types, e.g for SQL: type:FIEL* if name == 'type': # Make sure type value still makes sense for the source term = '%s:%s' % (name, val.upper()) fq_type = entity_types filterQueries.append(term) body = {'query': ' '.join(query) or '*'} if fq_type: filterQueries += [ '{!tag=type} %s' % ' OR '.join(['type:%s' % fq for fq in fq_type]) ] if get_navigator_hue_server_name(): filterQueries.append('clusterName:%s' % get_navigator_hue_server_name()) body['facetFields'] = facetFields or [ ] # Currently mandatory in API if facetPrefix: body['facetPrefix'] = facetPrefix if facetRanges: body['facetRanges'] = facetRanges if filterQueries: body['filterQueries'] = filterQueries if firstClassEntitiesOnly: body['firstClassEntitiesOnly'] = firstClassEntitiesOnly data = json.dumps(body) LOG.info(data) response = self._root.post( 'interactive/entities?limit=%(limit)s&offset=%(offset)s' % pagination, data=data, contenttype=_JSON_CONTENT_TYPE, clear_cookies=True) response['results'] = list( islice(self._secure_results(response['results']), limit)) # Apply Sentry perms return response except RestException: msg = 'Failed to search for entities with search query %s' % json.dumps( body) LOG.exception(msg) raise NavigatorApiException(msg)