def refresh_all_default_indices(): """ Refresh all default indexes """ for index_type in ALL_INDEX_TYPES: alias = get_default_alias(index_type) refresh_index(alias)
def document_needs_updating(enrollment): """ Get the document from elasticsearch and see if it matches what's in the database Args: enrollment (ProgramEnrollment): A program enrollment Returns: bool: True if the document needs to be updated via reindex """ index = get_default_alias(PRIVATE_ENROLLMENT_INDEX_TYPE) conn = get_conn() try: document = conn.get(index=index, id=enrollment.id) except NotFoundError: return True serialized_enrollment = serialize_program_enrolled_user(enrollment) del serialized_enrollment['_id'] source = document['_source'] if serialized_enrollment != source: # Convert OrderedDict to dict reserialized_enrollment = json.loads(json.dumps(serialized_enrollment)) diff = make_patch(source, reserialized_enrollment).patch serialized_diff = json.dumps(diff, indent=" ") log.info("Difference found for enrollment %s: %s", enrollment, serialized_diff) return True return False
def _search_percolate_queries(program_enrollment): """ Find all PercolateQuery ids whose queries match a user document Args: program_enrollment (ProgramEnrollment): A ProgramEnrollment Returns: list of int: A list of PercolateQuery ids """ conn = get_conn() percolate_index = get_default_alias(PERCOLATE_INDEX_TYPE) doc = serialize_program_enrolled_user(program_enrollment) if not doc: return [] # We don't need this to search for percolator queries and # it causes a dynamic mapping failure so we need to remove it del doc['_id'] body = {"query": {"percolate": {"field": "query", "document": doc}}} result = conn.search(percolate_index, GLOBAL_DOC_TYPE, body=body) failures = result.get('_shards', {}).get('failures', []) if len(failures) > 0: raise PercolateException("Failed to percolate: {}".format(failures)) return [int(row['_id']) for row in result['hits']['hits']]
def document_needs_updating(enrollment): """ Get the document from elasticsearch and see if it matches what's in the database Args: enrollment (ProgramEnrollment): A program enrollment Returns: bool: True if the document needs to be updated via reindex """ index = get_default_alias(PRIVATE_ENROLLMENT_INDEX_TYPE) conn = get_conn() try: document = conn.get(index=index, doc_type=GLOBAL_DOC_TYPE, id=enrollment.id) except NotFoundError: return True serialized_enrollment = serialize_program_enrolled_user(enrollment) del serialized_enrollment['_id'] source = document['_source'] if serialized_enrollment != source: # Convert OrderedDict to dict reserialized_enrollment = json.loads(json.dumps(serialized_enrollment)) diff = make_patch(source, reserialized_enrollment).patch serialized_diff = json.dumps(diff, indent=" ") log.info("Difference found for enrollment %s: %s", enrollment, serialized_diff) return True return False
def test_create_search_obj_metadata(self, is_advance_search_capable, expected_index_type): """ Test that Search objects are created with proper metadata """ user = self.user if is_advance_search_capable else self.learner search_param_dict = {'size': 50} with patch('search.api.Search.update_from_dict', autospec=True) as mock_update_from_dict: search_obj = create_search_obj( user, search_param_dict=search_param_dict, ) expected_alias = get_default_alias(expected_index_type) assert search_obj._index == [expected_alias] # pylint: disable=protected-access assert mock_update_from_dict.call_count == 2 assert isinstance(mock_update_from_dict.call_args[0][0], Search) assert mock_update_from_dict.call_args[0][1] == search_param_dict
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False): """ Creates a search object and prepares it with metadata and query parameters that we want to apply for all ES requests Args: user (User): User object search_param_dict (dict): A dict representing the body of an ES query filter_on_email_optin (bool): If true, filter out profiles where email_optin != True Returns: Search: elasticsearch_dsl Search object """ staff_program_ids = get_advance_searchable_program_ids(user) is_advance_search_capable = bool(staff_program_ids) index_type = PRIVATE_ENROLLMENT_INDEX_TYPE if is_advance_search_capable else PUBLIC_ENROLLMENT_INDEX_TYPE index = get_default_alias(index_type) search_obj = Search(index=index) # Update from search params first so our server-side filtering will overwrite it if necessary if search_param_dict is not None: search_obj.update_from_dict(search_param_dict) if not is_advance_search_capable: # Learners can't search for other learners with privacy set to private search_obj = search_obj.filter( ~Q('term', **{'profile.account_privacy': Profile.PRIVATE}) # pylint: disable=invalid-unary-operand-type ) # Limit results to one of the programs the user is staff on search_obj = search_obj.filter( create_program_limit_query( user, staff_program_ids, filter_on_email_optin=filter_on_email_optin)) # Filter so that only filled_out profiles are seen search_obj = search_obj.filter(Q('term', **{'profile.filled_out': True})) # Force size to be the one we set on the server update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE} if search_param_dict is not None and search_param_dict.get( 'from') is not None: update_dict['from'] = search_param_dict['from'] search_obj.update_from_dict(update_dict) return search_obj
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False): """ Creates a search object and prepares it with metadata and query parameters that we want to apply for all ES requests Args: user (User): User object search_param_dict (dict): A dict representing the body of an ES query filter_on_email_optin (bool): If true, filter out profiles where email_optin != True Returns: Search: elasticsearch_dsl Search object """ staff_program_ids = get_advance_searchable_program_ids(user) is_advance_search_capable = bool(staff_program_ids) index_type = PRIVATE_ENROLLMENT_INDEX_TYPE if is_advance_search_capable else PUBLIC_ENROLLMENT_INDEX_TYPE index = get_default_alias(index_type) search_obj = Search(index=index) # Update from search params first so our server-side filtering will overwrite it if necessary if search_param_dict is not None: search_obj.update_from_dict(search_param_dict) if not is_advance_search_capable: # Learners can't search for other learners with privacy set to private search_obj = search_obj.filter( ~Q('term', **{'profile.account_privacy': Profile.PRIVATE}) # pylint: disable=invalid-unary-operand-type ) # Limit results to one of the programs the user is staff on search_obj = search_obj.filter(create_program_limit_query( user, staff_program_ids, filter_on_email_optin=filter_on_email_optin )) # Filter so that only filled_out profiles are seen search_obj = search_obj.filter( Q('term', **{'profile.filled_out': True}) ) # Force size to be the one we set on the server update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE} if search_param_dict is not None and search_param_dict.get('from') is not None: update_dict['from'] = search_param_dict['from'] search_obj.update_from_dict(update_dict) return search_obj
def test_get_aliases(self, is_reindex, index_type, expected_indices): """ We should choose the correct alias and doc type given the circumstances """ conn = get_conn(verify=False) alias = make_alias_name(index_type, is_reindexing=False) backing_index = make_backing_index_name() # Skip the mapping because it's invalid for 2.x schema, and we don't need it here clear_and_create_index(backing_index, index_type=index_type, skip_mapping=True) conn.indices.put_alias(index=backing_index, name=alias) if is_reindex: conn.indices.put_alias(index=backing_index, name=make_alias_name(index_type, is_reindexing=True)) aliases = get_aliases(index_type) assert aliases == list(expected_indices) assert get_default_alias(index_type) == aliases[0]
def _search_percolate_queries(program_enrollment): """ Find all PercolateQuery ids whose queries match a user document Args: program_enrollment (ProgramEnrollment): A ProgramEnrollment Returns: list of int: A list of PercolateQuery ids """ conn = get_conn() percolate_index = get_default_alias(PERCOLATE_INDEX_TYPE) doc = serialize_program_enrolled_user(program_enrollment) if not doc: return [] # We don't need this to search for percolator queries and # it causes a dynamic mapping failure so we need to remove it del doc['_id'] result = conn.percolate(percolate_index, GLOBAL_DOC_TYPE, body={"doc": doc}) failures = result.get('_shards', {}).get('failures', []) if len(failures) > 0: raise PercolateException("Failed to percolate: {}".format(failures)) return [int(row['_id']) for row in result['matches']]
def get_mappings(self, index_type): """Gets mapping data""" alias = get_default_alias(index_type) refresh_index(alias) mapping = self.conn.indices.get_mapping(index=alias) return list(mapping.values())[0]['mappings']
def get_percolate_query(self, _id): """Get percolate query""" index = get_default_alias(PERCOLATE_INDEX_TYPE) return self.conn.get(id=_id, index=index)
def search(self, index_type): """Gets full index data from the _search endpoint""" alias = get_default_alias(index_type) refresh_index(alias) return self.conn.search(index=alias)['hits']
def get_default_backing_index(self, index_type): """Get the default backing index""" alias = get_default_alias(index_type) return list(self.conn.indices.get_alias(name=alias).keys())[0]
def get_percolate_query(self, _id): """Get percolate query""" index = get_default_alias(PERCOLATE_INDEX_TYPE) return self.conn.get(id=_id, doc_type=GLOBAL_DOC_TYPE, index=index)