def __getitem__(self, key): start = 0 if isinstance(key, slice): start = key.start or 0 stop = key.stop or len(self) if key.step: raise KeyError('step of %s is not permitted' % key.step) elif isinstance(key, int): start = key stop = key + 1 if self.es_search: search = dict(self.es_search) search['sort'] = [{self.sort: 'desc'}, 'bill_id'] search['from'] = start search['size'] = stop - start es_result = elasticsearch.search(search, index='billy', doc_type='bills') _mongo_query = { '_id': { '$in': [r['_id'] for r in es_result['hits']['hits']] } } return db.bills.find(_mongo_query, fields=self.fields).sort([ (self.sort, pymongo.DESCENDING), ('bill_id', pymongo.ASCENDING) ]) else: return db.bills.find(self.mongo_query, fields=self.fields).sort([ (self.sort, pymongo.DESCENDING) ]).skip(start).limit(stop - start)
def __getitem__(self, key): start = 0 if isinstance(key, slice): start = key.start or 0 stop = key.stop or len(self) if key.step: raise KeyError('step of %s is not permitted' % key.step) elif isinstance(key, int): start = key stop = key + 1 if self.es_search: search = dict(self.es_search) search['sort'] = [{self.sort: 'desc'}, 'bill_id'] search['from'] = start search['size'] = stop - start es_result = elasticsearch.search(search, index='billy', doc_type='bills') _mongo_query = {'_id': {'$in': [r['_id'] for r in es_result['hits']['hits']]}} return db.bills.find(_mongo_query, fields=self.fields).sort( [(self.sort, pymongo.DESCENDING), ('bill_id', pymongo.ASCENDING)]) else: return db.bills.find(self.mongo_query, fields=self.fields).sort( [(self.sort, pymongo.DESCENDING)] ).skip(start).limit(stop - start)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [(settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) # if query is numeric convert to an id filter if re.findall('\d+', query): _id_filter = dict(_filter) # if query is entirely numeric make it a regex if not re.findall('\D', query): _id_filter['bill_id'] = {'$regex': fix_bill_id(query).upper()} else: _id_filter['bill_id'] = fix_bill_id(query).upper() # check for a result result = db.bills.find(_id_filter, fields=bill_fields) if result.count(): return result query = {"query_string": {"fields": ["text", "title"], "default_operator": "AND", "query": query}} search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append(pyes.TermFilter( settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append(pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append(pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append(pyes.TermFilter( 'subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append(pyes.TermFilter( 'sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, fields=bill_fields)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [ (settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) if re.findall('\d+', query): _id_filter = dict(_filter) _id_filter['bill_id'] = fix_bill_id(query).upper() result = db.bills.find(_id_filter) if result: return result query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append( pyes.TermFilter(settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, bill_fields)