def test_find_tf_w_tf_or(self): """ Tests the find method with a timeframe and an 'OR' joiner. """ field_query = EngineQuery(self.fieldsets, 'OR') timeframe = self.timeframe subqueries = [field_query] + timeframe query = EngineQuery(subqueries, 'AND') results = self.engine.find(query) docs = results['results'] count = results['count'] self.assertEqual(count, 2) self.assertEqual(len(docs), 2)
def test_find_pagination_sort(self): """ Tests pagination and sorting of find results. """ sorter = Sorter([ SortParam(field_name='user.age', field_type='IntegerField', order='DESC'), SortParam(field_name='user.screen_name', field_type='CharField', order='ASC') ]) field_query = EngineQuery(self.fieldsets, 'OR') results = self.engine.find(query=field_query, sorter=sorter, page=1, page_size=2) docs = results['results'] count = results['count'] self.assertEqual(count, 3) self.assertEqual(len(docs), 2) self.assertEqual(docs[0]['user']['screen_name'], 'jack') results = self.engine.find(query=field_query, sorter=sorter, page=2, page_size=2) docs = results['results'] count = results['count'] self.assertEqual(count, 3) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['user']['screen_name'], 'john')
def __init__(self, query, distillery, page=1, page_size=DEFAULT_PAGE_SIZE): """Create a DistillerySearchResults instance. Parameters ---------- query : query.search.search_query.SearchQuery distillery : Distillery """ super(DistillerySearchResults, self).__init__( self.VIEW_NAME, query, page, page_size, ) self.results = [] self.distillery = distillery self.fieldsets = [] self.engine_query = None self.fieldsets = self._get_fieldsets(distillery, query) if not self.fieldsets: return self.engine_query = EngineQuery(subqueries=self.fieldsets, joiner='OR') results = self.distillery.find( self.engine_query, page=page, page_size=page_size, ) if results and results['count']: self.count += results['count'] self.results = results['results']
def _get_field_engine_query(distillery, field_parameters): """Return QueryFieldsets of FieldSearchParameters based on Distillery. Parameters ---------- distillery : Distillery field_parameters : list of query.search.field_search_parameter.FieldSearchParameter Returns ------- EngineQuery or None """ if not field_parameters: return None fieldsets = [ parameter.create_fieldset() for parameter in field_parameters if parameter.is_related_to_distillery(distillery) ] if not fieldsets: return None return EngineQuery(subqueries=fieldsets, joiner='AND')
def _create_keyword_engine_query(text_fields, keyword): """ Parameters ---------- text_fields : list of DataField keyword: str Returns ------- EngineQuery or None """ fieldsets = [ QueryFieldset( field_name=field.field_name, field_type=field.field_type, operator='regex', value=keyword, ) for field in text_fields ] if not fieldsets: return None return EngineQuery(subqueries=fieldsets, joiner='OR')
def _get_keyword_engine_query(distillery, keywords): """Return QueryFieldsets for all text fields of a distillery. Parameters ---------- distillery : Distillery Distillery to get the text fields from. keywords : list of str Keywords to search for in the text fields. Returns ------- EngineQuery or None """ if not keywords: return None text_fields = distillery.get_text_fields() if not text_fields: return None keyword_engine_queries = [] for keyword in keywords: keyword_engine_query = ( DistillerySearchResults._create_keyword_engine_query( text_fields, keyword)) if keyword_engine_query: keyword_engine_queries.append(keyword_engine_query) return EngineQuery(subqueries=keyword_engine_queries, joiner='AND')
def _create_timeframe_query(self, data): """ Takes a dictionary of data that was distilled by the Context's primary_distillery. If the Context has a time_interval and time_unit, returns a list of fieldset dictionaries for a time-frame query. Otherwise, returns an empty list. """ timeframe = [] if self._has_time_interval(): start_time = self._get_start_time(data) end_time = self._get_end_time(data) if start_time: timeframe.append( QueryFieldset(field_name=self._related_date_field, field_type='DateTimeField', operator='gt', value=start_time)) if end_time: timeframe.append( QueryFieldset(field_name=self._related_date_field, field_type='DateTimeField', operator='lte', value=end_time)) if timeframe: return EngineQuery(timeframe, 'AND')
def test_within_multiple_polygons(self): """ Tests the find method using a 'within' filter for a feature collection with more than one polygon. """ features = { 'type': 'FeatureCollection', 'features': [self.polygon1, self.polygon2] } fieldsets = [ QueryFieldset(field_name='location', field_type='PointField', operator='within', value=json.dumps(features)) ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] expected_names = ['john', 'jane'] self.assertEqual(count, 2) self.assertIn( self._get_doc(docs, 0)['user']['screen_name'], expected_names) self.assertIn( self._get_doc(docs, 1)['user']['screen_name'], expected_names)
def test_or(self): """ Tests the find method using two query terms joined by 'OR'. """ query = EngineQuery(self.fieldsets, 'OR') results = self.engine.find(query) count = results['count'] self.assertEqual(count, 3)
def test_find_tf_end_time_or(self): """ Tests the find method with an endtime and an 'OR' joiner. """ field_query = EngineQuery(self.fieldsets, 'OR') timeframe = [ QueryFieldset(field_name='_saved_date', field_type='DateTimeField', operator='gte', value=self.time + timedelta(hours=1)) ] subqueries = [field_query] + timeframe query = EngineQuery(subqueries, 'AND') results = self.engine.find(query) docs = results['results'] count = results['count'] self.assertEqual(count, 1) self.assertEqual(len(docs), 1)
def test_and(self): """ Tests the find method using two query terms joined by 'AND'. """ query = EngineQuery(self.fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'jack')
def _get_query(self, data, keyword): """ """ filter_q = self._create_filter_query(data) keyword_q = self._create_keyword_query(keyword) timeframe_q = self._create_timeframe_query(data) subqueries = [q for q in [filter_q, keyword_q, timeframe_q] if q] if subqueries: return EngineQuery(subqueries, 'AND')
def _create_filter_query(self, data): """ Takes a dictionary of data that was distilled by the Context's primary_distillery. Returns a list of fieldset dictionaries for query terms defined by the Context's ContextFilters. If the Context has no ContextFilters, returns an empty list. """ fieldsets = [cfilter.create_fieldset(data) \ for cfilter in self.filters.all()] if fieldsets: return EngineQuery(fieldsets, self.filter_logic)
def test_find_tf_no_tf_or(self): """ Tests the find method with no timeframe and an 'OR' joiner. """ fieldsets = self.fieldsets joiner = 'OR' query = EngineQuery(fieldsets, joiner) results = self.engine.find(query) docs = results['results'] count = results['count'] self.assertEqual(count, 3) self.assertEqual(len(docs), 3)
def test_find_fields(self): """ Tests that the find method only returns fields defined in the Engine's schema. """ doc_id = self.engine.insert({ 'content': { 'text': 'I like cats and dogs.', 'tags': ['cats', 'dogs'], }, 'user': { 'screen_name': 'Jill', 'email': '*****@*****.**', }, }) self.engine.schema = [ DataField( field_name='content.text', field_type='TextField', target_type='Keyword' ), DataField( field_name='user.screen_name', field_type='CharField', target_type='Account' ) ] fieldsets = [ QueryFieldset( field_name='user.screen_name', field_type='CharField', operator='eq', value='Jill' ) ] query = EngineQuery(fieldsets, 'AND') actual = self.engine.find(query) expected = { 'count': 1, 'results': [ { '_id': doc_id, 'content': { 'text': 'I like cats and dogs.' }, 'user': { 'screen_name': 'Jill' } } ] } self.assertEqual(actual, expected)
def _get_query(self, date_field): """ Takes the name of a date field and returns an |EngineQuery| for documents with dates later than the last_healthy date (if there is one) or the start of the monitoring interval (if there isn't). """ start_time = self._get_query_start_time() query = QueryFieldset(field_name=date_field, field_type='DateTimeField', operator='gt', value=start_time) return EngineQuery([query])
def _get_engine_query(distillery, query, before=None, after=None): """Return QueryFieldsets of keyword and field searches for a distillery. Parameters ---------- distillery : Distillery query: query.search.search_query.SearchQuery before: datetime.datetime or None after: datetime.datetime or None Returns ------- EngineQuery or None """ engine_queries = [ DistillerySearchResults._get_field_engine_query( distillery, query.field_parameters), DistillerySearchResults._get_keyword_engine_query( distillery, query.keywords) ] subqueries = [ engine_query for engine_query in engine_queries if engine_query ] if not subqueries: return None if before or after: searchable_date_field = distillery.get_searchable_date_field() if searchable_date_field: if before: subqueries += [ QueryFieldset(field_name=searchable_date_field, field_type='DateTimeField', operator='lte', value=before.isoformat()) ] if after: subqueries += [ QueryFieldset(field_name=searchable_date_field, field_type='DateTimeField', operator='gte', value=after.isoformat()) ] return EngineQuery(subqueries=subqueries, joiner='AND')
def test_find_tf_no_tf_and(self): """ Tests the find method with no timeframe and an 'AND' joiner. """ fieldsets = self.fieldsets joiner = 'AND' query = EngineQuery(fieldsets, joiner) results = self.engine.find(query) docs = results['results'] count = results['count'] self.assertEqual(count, 1) self.assertEqual(len(docs), 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'jack')
def _get_results(query): """ Takes a query dictionary and returns a list of documents that match the query criteria. """ subqueries = [QueryFieldset(**fieldset) \ for fieldset in query['fieldsets']] engine_query = EngineQuery(subqueries=subqueries, joiner=query['joiner']) docs = [] for collection in query['collections']: results = collection.find(engine_query) docs.extend(results['results']) return docs
def _create_keyword_query(self, keyword): """ """ keyword_search = [] if keyword: fields = self._get_text_fields() for field in fields: keyword_search.append( QueryFieldset(field_name=field.field_name, field_type=field.field_type, operator='regex', value=keyword)) if keyword_search: return EngineQuery(keyword_search, 'OR')
def test_lte(self): """ Tests the find method for an 'lte' (less than or equal to) filter. """ fieldsets = [ QueryFieldset(field_name='user.age', field_type='IntegerField', operator='lte', value=30) ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 3)
def test_regex_unmatched_quote(self): """ Tests the find method for a 'regex' filter with a string containing an unmatched quotation mark. """ fieldsets = [ QueryFieldset(field_name='user.screen_name', field_type='CharField', operator='regex', value='"john') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] self.assertEqual(count, 0)
def test_eq_text(self): """ Tests the find method for an 'eq' (equals) filter on a CharField. """ fieldsets = [ QueryFieldset(field_name='content.text', field_type='TextField', operator='eq', value='I like dogs.') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'jane')
def test_eq_email(self): """ Tests the find method for an 'eq' (equals) filter on an EmailField. """ fieldsets = [ QueryFieldset(field_name='user.email', field_type='EmailField', operator='eq', value='*****@*****.**') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'jane')
def test_gte(self): """ Tests the find method for a 'gte' (greater than or equal to) filter. """ fieldsets = [ QueryFieldset(field_name='user.age', field_type='IntegerField', operator='gte', value=20) ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) expected_names = ['jane', 'jack'] count = results['count'] docs = results['results'] self.assertEqual(count, 3)
def test_find(self, mock_engine): """ Tests the find method of the Collection class. """ collection = Collection() fieldset = QueryFieldset(field_name='text', field_type='CharField', operator='regex', value='test') joiner = 'AND' query = EngineQuery([fieldset], joiner) collection.engine.find = Mock(return_value=[self.doc]) docs = collection.find(query) collection.engine.find.assert_called_once_with(query, None, 1, PAGE_SIZE) self.assertEqual(docs, [self.doc])
def test_lt(self): """ Tests the find method for an 'lt' (less than) filter. """ fieldsets = [ QueryFieldset(field_name='user.age', field_type='IntegerField', operator='lt', value=30) ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'john')
def test_not_in(self): """ Tests the find method for a 'not:in' filter. """ fieldsets = [ QueryFieldset(field_name='content.tags', field_type='CharField', operator='not:in', value='cats') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'jane')
def test_not_regex_with_fragment(self): """ Tests the find method for a 'not:regex' filter with a word fragment. """ fieldsets = [ QueryFieldset(field_name='user.screen_name', field_type='CharField', operator='not:regex', value='ja') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] self.assertEqual(count, 1) self.assertEqual(self._get_doc(docs, 0)['user']['screen_name'], 'john')
def test_not_missing(self): """ Tests the find method using a 'not missing' filter. """ fieldsets = [ QueryFieldset(field_name='user.last_login', field_type='DateTimeField', operator='not:missing', value='') ] query = EngineQuery(fieldsets, 'AND') results = self.engine.find(query) count = results['count'] docs = results['results'] expected_name = 'john' self.assertEqual(count, 1) self.assertEqual( self._get_doc(docs, 0)['user']['screen_name'], expected_name)