def test_search_from_url(self): """ ensure that we get the error back when the backend fails """ searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME) searcher.index( "courseware_content", [ { "id": "FAKE_ID_1", "content": { "text": "Little Darling, it's been a long long lonely winter" } } ] ) searcher.index( "courseware_content", [ { "id": "FAKE_ID_2", "content": { "text": "Little Darling, it's been a year since sun been gone" } } ] ) searcher.index("test_doc", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}]) code, results = post_request({"search_string": "sun"}) self.assertGreater(code, 499) self.assertEqual(results["error"], 'An error occurred when searching for "sun"') with self.assertRaises(StandardError): searcher.search(query_string="test search")
def _display_name_filter(self, library, display_name): """ Filters library children by capa type""" search_engine = SearchEngine.get_search_engine(index="library_index") if search_engine: log.warn("search engine found") filter_clause = { "library": unicode(normalize_key_for_search(library.location.library_key)), # "content_type": CapaDescriptor.INDEX_CONTENT_TYPE, # "display_name": display_name } search_result = search_engine.search(field_dictionary=filter_clause) new_results = search_result.get('results', []) results = [] for r in new_results: v = self.deep_search(["display_name"], r) if v['display_name'] == display_name: results.append(r) return [LibraryUsageLocator.from_string(item['data']['id']) for item in results] else: log.warn("search engine NOT found") #return [key for key in library.children if self._filter_child_name(key, display_name)] results = [] for r in library.children: p = self.store.get_item(r, 1) v = {} for field in p.fields.values(): v[field.name] = field.read_json(p) # v = p.get_explicitly_set_fields_by_scope(Scope.settings) if v.get('display_name') == display_name: results.append(r) return results
def engine(cls): """ Return course team search engine (if feature is enabled). """ try: return SearchEngine.get_search_engine(index=cls.INDEX_NAME) except ConnectionError as err: logging.error('Error connecting to elasticsearch: %s', err) raise ElasticSearchConnectionError
def handle(self, *args, **options): """ By convention set by Django developers, this method actually executes command's actions. So, there could be no better docstring than emphasize this once again. """ course_ids = options['course_ids'] all_option = options['all'] setup_option = options['setup'] index_all_courses_option = all_option or setup_option if (not len(course_ids) and not index_all_courses_option) or \ (len(course_ids) and index_all_courses_option): raise CommandError("reindex_course requires one or more <course_id>s OR the --all or --setup flags.") store = modulestore() if index_all_courses_option: index_name = CoursewareSearchIndexer.INDEX_NAME doc_type = CoursewareSearchIndexer.DOCUMENT_TYPE if setup_option: try: # try getting the ElasticSearch engine searcher = SearchEngine.get_search_engine(index_name) except exceptions.ElasticsearchException as exc: logging.exception(u'Search Engine error - %s', exc) return index_exists = searcher._es.indices.exists(index=index_name) # pylint: disable=protected-access doc_type_exists = searcher._es.indices.exists_type( # pylint: disable=protected-access index=index_name, doc_type=doc_type ) index_mapping = searcher._es.indices.get_mapping( # pylint: disable=protected-access index=index_name, doc_type=doc_type ) if index_exists and doc_type_exists else {} if index_exists and index_mapping: return # if reindexing is done during devstack setup step, don't prompt the user if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"): # in case of --setup or --all, get the list of course keys from all courses # that are stored in the modulestore course_keys = [course.id for course in modulestore().get_courses()] else: return else: # in case course keys are provided as arguments course_keys = map(self._parse_course_key, course_ids) for course_key in course_keys: CoursewareSearchIndexer.do_course_reindex(store, course_key)
def test_task_indexing_course(self): """ Making sure that the receiver correctly fires off the task when invoked by signal """ searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME) response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) self.assertEqual(response["total"], 0) listen_for_course_publish(self, self.course.id) # Note that this test will only succeed if celery is working in inline mode response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) self.assertEqual(response["total"], 3)
def setUp(self): """ Set up tests. """ super(ReindexCourseTeamTest, self).setUp() self.team1 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team1') self.team2 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team2') self.team3 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team3') self.search_engine = SearchEngine.get_search_engine(index='index_course_team')
def delete_course_task(user_id, course_key_string): profile = UserProfile.objects.get(pk=user_id) user = User.objects.get(pk=profile.user_id) course_key = CourseKey.from_string(course_key_string) delete_course_and_groups(course_key, user.id) searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME) if searcher != None: CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), []) searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string])
def test_task_library_update(self): """ Making sure that the receiver correctly fires off the task when invoked by signal """ searcher = SearchEngine.get_search_engine(LibrarySearchIndexer.INDEX_NAME) library_search_key = unicode(normalize_key_for_search(self.library.location.library_key)) response = searcher.search(field_dictionary={"library": library_search_key}) self.assertEqual(response["total"], 0) listen_for_library_update(self, self.library.location.library_key) # Note that this test will only succeed if celery is working in inline mode response = searcher.search(field_dictionary={"library": library_search_key}) self.assertEqual(response["total"], 2)
def remove_deleted_items(cls, structure_key): """ Remove item from Course About Search_index """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) if not searcher: return response = searcher.search( doc_type=cls.DISCOVERY_DOCUMENT_TYPE, field_dictionary=cls._get_location_info(structure_key) ) result_ids = [result["data"]["id"] for result in response["results"]] searcher.remove(cls.DISCOVERY_DOCUMENT_TYPE, result_ids)
def _problem_type_filter(self, library, capa_type): """ Filters library children by capa type""" search_engine = SearchEngine.get_search_engine(index="library_index") if search_engine: filter_clause = { "library": unicode(normalize_key_for_search(library.location.library_key)), "content_type": CapaDescriptor.INDEX_CONTENT_TYPE, "problem_types": capa_type } search_result = search_engine.search(field_dictionary=filter_clause) results = search_result.get('results', []) return [LibraryUsageLocator.from_string(item['data']['id']) for item in results] else: return [key for key in library.children if self._filter_child(key, capa_type)]
def delete_temp_user_task(request, user_id): profile = UserProfile.objects.get(pk=user_id) user = User.objects.get(pk=profile.user_id) courses = [format_course_for_view(c) for c in get_courses_accessible_to_user(request, user)[0]] libraries = [format_library_for_view(lib, user) for lib in accessible_libraries_list(user)] for course in courses: course_key = CourseKey.from_string(course["course_key"]) delete_course_and_groups(course_key, user.id) searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME) if searcher != None: CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), []) searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string]) for library in libraries: library_key = CourseKey.from_string(library['library_key']) delete_course_and_groups(library_key, user.id)
def searcher(self): """ Centralized call to getting the search engine for the test """ return SearchEngine.get_search_engine(self.INDEX_NAME)
def index_about_information(cls, modulestore, course): """ Add the given course to the course discovery index Arguments: modulestore - modulestore object to use for operations course - course object from which to take properties, locate about information """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) if not searcher: return course_id = unicode(course.id) course_info = { 'id': course_id, 'course': course_id, 'content': {}, 'image_url': course_image_url(course), } # load data for all of the 'about' modules for this course into a dictionary about_dictionary = { item.location.block_id: item.data for item in modulestore.get_items(course.id, qualifiers={"category": "about"}) } about_context = { "course": course, "about_dictionary": about_dictionary, } for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE: # Broad exception handler so that a single bad property does not scupper the collection of others try: section_content = about_information.get_value(**about_context) except: # pylint: disable=bare-except section_content = None log.warning( "Course discovery could not collect property %s for course %s", about_information.property_name, course_id, exc_info=True, ) if section_content: if about_information.index_flags & AboutInfo.ANALYSE: analyse_content = section_content if isinstance(section_content, basestring): analyse_content = strip_html_content_to_text( section_content) course_info['content'][ about_information.property_name] = analyse_content if about_information.index_flags & AboutInfo.PROPERTY: course_info[ about_information.property_name] = section_content # Broad exception handler to protect around and report problems with indexing try: searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info]) except: # pylint: disable=bare-except log.exception( "Course discovery indexing error encountered, course discovery index may be out of date %s", course_id, ) raise log.debug("Successfully added %s course to the course discovery index", course_id)
def index_about_information(cls, modulestore, course): """ Add the given course to the course discovery index Arguments: modulestore - modulestore object to use for operations course - course object from which to take properties, locate about information """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) if not searcher: return course_id = unicode(course.id) course_info = { 'id': course_id, 'course': course_id, 'content': {}, 'image_url': course_image_url(course), } # load data for all of the 'about' modules for this course into a dictionary about_dictionary = { item.location.name: item.data for item in modulestore.get_items(course.id, qualifiers={"category": "about"}) } about_context = { "course": course, "about_dictionary": about_dictionary, } for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE: # Broad exception handler so that a single bad property does not scupper the collection of others try: section_content = about_information.get_value(**about_context) except: # pylint: disable=bare-except section_content = None log.warning( "Course discovery could not collect property %s for course %s", about_information.property_name, course_id, exc_info=True, ) if section_content: if about_information.index_flags & AboutInfo.ANALYSE: analyse_content = section_content if isinstance(section_content, basestring): analyse_content = strip_html_content_to_text(section_content) course_info['content'][about_information.property_name] = analyse_content if about_information.property_name == "more_info": course_info[about_information.property_name] = analyse_content if about_information.index_flags & AboutInfo.PROPERTY: course_info[about_information.property_name] = section_content # Broad exception handler to protect around and report problems with indexing try: searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info]) except: # pylint: disable=bare-except log.exception( "Course discovery indexing error encountered, course discovery index may be out of date %s", course_id, ) raise log.debug( "Successfully added %s course to the course discovery index", course_id )
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE): """ Process course for indexing Arguments: modulestore - modulestore object to use for operations structure_key (CourseKey|LibraryKey) - course or library identifier triggered_at (datetime) - provides time at which indexing was triggered; useful for index updates - only things changed recently from that date (within REINDEX_AGE above ^^) will have their index updated, others skip updating their index but are still walked through in order to identify which items may need to be removed from the index If None, then a full reindex takes place Returns: Number of items that have been added to the index """ error_list = [] searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) if not searcher: return structure_key = cls.normalize_structure_key(structure_key) location_info = cls._get_location_info(structure_key) # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index` indexed_count = { "count": 0 } # indexed_items is a list of all the items that we wish to remain in the # index, whether or not we are planning to actually update their index. # This is used in order to build a query to remove those items not in this # list - those are ready to be destroyed indexed_items = set() # items_index is a list of all the items index dictionaries. # it is used to collect all indexes and index them using bulk API, # instead of per item index API call. items_index = [] def get_item_location(item): """ Gets the version agnostic item location """ return item.location.version_agnostic().replace(branch=None) def prepare_item_index(item, skip_index=False, groups_usage_info=None): """ Add this item to the items_index and indexed_items list Arguments: item - item to add to index, its children will be processed recursively skip_index - simply walk the children in the tree, the content change is older than the REINDEX_AGE window and would have been already indexed. This should really only be passed from the recursive child calls when this method has determined that it is safe to do so Returns: item_content_groups - content groups assigned to indexed item """ is_indexable = hasattr(item, "index_dictionary") item_index_dictionary = item.index_dictionary() if is_indexable else None # if it's not indexable and it does not have children, then ignore if not item_index_dictionary and not item.has_children: return item_content_groups = None if item.category == "split_test": split_partition = item.get_selected_partition() for split_test_child in item.get_children(): if split_partition: for group in split_partition.groups: group_id = unicode(group.id) child_location = item.group_id_to_child.get(group_id, None) if child_location == split_test_child.location: groups_usage_info.update({ unicode(get_item_location(split_test_child)): [group_id], }) for component in split_test_child.get_children(): groups_usage_info.update({ unicode(get_item_location(component)): [group_id] }) if groups_usage_info: item_location = get_item_location(item) item_content_groups = groups_usage_info.get(unicode(item_location), None) item_id = unicode(cls._id_modifier(item.scope_ids.usage_id)) indexed_items.add(item_id) if item.has_children: # determine if it's okay to skip adding the children herein based upon how recently any may have changed skip_child_index = skip_index or \ (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age) children_groups_usage = [] for child_item in item.get_children(): if modulestore.has_published_version(child_item): children_groups_usage.append( prepare_item_index( child_item, skip_index=skip_child_index, groups_usage_info=groups_usage_info ) ) if None in children_groups_usage: item_content_groups = None if skip_index or not item_index_dictionary: return item_index = {} # if it has something to add to the index, then add it try: item_index.update(location_info) item_index.update(item_index_dictionary) item_index['id'] = item_id if item.start: item_index['start_date'] = item.start item_index['content_groups'] = item_content_groups if item_content_groups else None item_index.update(cls.supplemental_fields(item)) items_index.append(item_index) indexed_count["count"] += 1 return item_content_groups except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not fail on one item of many log.warning('Could not index item: %s - %r', item.location, err) error_list.append(_('Could not index item: {}').format(item.location)) try: with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only): structure = cls._fetch_top_level(modulestore, structure_key) groups_usage_info = cls.fetch_group_usage(modulestore, structure) # First perform any additional indexing from the structure object cls.supplemental_index_information(modulestore, structure) # Now index the content for item in structure.get_children(): prepare_item_index(item, groups_usage_info=groups_usage_info) searcher.index(cls.DOCUMENT_TYPE, items_index) cls.remove_deleted_items(searcher, structure_key, indexed_items) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working log.exception( "Indexing error encountered, courseware index may be out of date %s - %r", structure_key, err ) error_list.append(_('General indexing error occurred')) if error_list: raise SearchIndexingError('Error(s) present during indexing', error_list) return indexed_count["count"]
def add_to_search_index(modulestore, location, delete=False, raise_on_error=False): """ Add to courseware search index from given location and its children """ error_list = [] indexed_count = 0 # TODO - inline for now, need to move this out to a celery task searcher = SearchEngine.get_search_engine(INDEX_NAME) if not searcher: return if isinstance(location, CourseLocator): course_key = location else: course_key = location.course_key location_info = { "course": unicode(course_key), } def _fetch_item(item_location): """ Fetch the item from the modulestore location, log if not found, but continue """ try: if isinstance(item_location, CourseLocator): item = modulestore.get_course(item_location) else: item = modulestore.get_item( item_location, revision=ModuleStoreEnum.RevisionOption.published_only) except ItemNotFoundError: log.warning('Cannot find: %s', item_location) return None return item def index_item_location(item_location, current_start_date): """ add this item to the search index """ item = _fetch_item(item_location) if not item: return is_indexable = hasattr(item, "index_dictionary") # if it's not indexable and it does not have children, then ignore if not is_indexable and not item.has_children: return # if it has a defined start, then apply it and to it's children if item.start and (not current_start_date or item.start > current_start_date): current_start_date = item.start if item.has_children: for child_loc in item.children: index_item_location(child_loc, current_start_date) item_index = {} item_index_dictionary = item.index_dictionary( ) if is_indexable else None # if it has something to add to the index, then add it if item_index_dictionary: try: item_index.update(location_info) item_index.update(item_index_dictionary) item_index['id'] = unicode(item.scope_ids.usage_id) if current_start_date: item_index['start_date'] = current_start_date searcher.index(DOCUMENT_TYPE, item_index) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not fail on one item of many log.warning('Could not index item: %s - %s', item_location, unicode(err)) error_list.append( _('Could not index item: {}').format(item_location)) def remove_index_item_location(item_location): """ remove this item from the search index """ item = _fetch_item(item_location) if item: if item.has_children: for child_loc in item.children: remove_index_item_location(child_loc) searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id)) try: if delete: remove_index_item_location(location) else: index_item_location(location, None) indexed_count += 1 except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working log.exception( "Indexing error encountered, courseware index may be out of date %s - %s", course_key, unicode(err)) error_list.append(_('General indexing error occurred')) if raise_on_error and error_list: raise SearchIndexingError(_('Error(s) present during indexing'), error_list) return indexed_count
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE): """ Process course for indexing Arguments: modulestore - modulestore object to use for operations structure_key (CourseKey|LibraryKey) - course or library identifier triggered_at (datetime) - provides time at which indexing was triggered; useful for index updates - only things changed recently from that date (within REINDEX_AGE above ^^) will have their index updated, others skip updating their index but are still walked through in order to identify which items may need to be removed from the index If None, then a full reindex takes place Returns: Number of items that have been added to the index """ error_list = [] searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) if not searcher: return structure_key = cls.normalize_structure_key(structure_key) location_info = cls._get_location_info(structure_key) # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index` indexed_count = {"count": 0} # indexed_items is a list of all the items that we wish to remain in the # index, whether or not we are planning to actually update their index. # This is used in order to build a query to remove those items not in this # list - those are ready to be destroyed indexed_items = set() # items_index is a list of all the items index dictionaries. # it is used to collect all indexes and index them using bulk API, # instead of per item index API call. items_index = [] def get_item_location(item): """ Gets the version agnostic item location """ return item.location.version_agnostic().replace(branch=None) def prepare_item_index(item, skip_index=False, groups_usage_info=None): """ Add this item to the items_index and indexed_items list Arguments: item - item to add to index, its children will be processed recursively skip_index - simply walk the children in the tree, the content change is older than the REINDEX_AGE window and would have been already indexed. This should really only be passed from the recursive child calls when this method has determined that it is safe to do so Returns: item_content_groups - content groups assigned to indexed item """ is_indexable = hasattr(item, "index_dictionary") item_index_dictionary = item.index_dictionary( ) if is_indexable else None # if it's not indexable and it does not have children, then ignore if not item_index_dictionary and not item.has_children: return item_content_groups = None if item.category == "split_test": split_partition = item.get_selected_partition() for split_test_child in item.get_children(): if split_partition: for group in split_partition.groups: group_id = unicode(group.id) child_location = item.group_id_to_child.get( group_id, None) if child_location == split_test_child.location: groups_usage_info.update({ unicode(get_item_location(split_test_child)): [group_id], }) for component in split_test_child.get_children( ): groups_usage_info.update({ unicode(get_item_location(component)): [group_id] }) if groups_usage_info: item_location = get_item_location(item) item_content_groups = groups_usage_info.get( unicode(item_location), None) item_id = unicode(cls._id_modifier(item.scope_ids.usage_id)) indexed_items.add(item_id) if item.has_children: # determine if it's okay to skip adding the children herein based upon how recently any may have changed skip_child_index = skip_index or \ (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age) children_groups_usage = [] for child_item in item.get_children(): if modulestore.has_published_version(child_item): children_groups_usage.append( prepare_item_index( child_item, skip_index=skip_child_index, groups_usage_info=groups_usage_info)) if None in children_groups_usage: item_content_groups = None if skip_index or not item_index_dictionary: return item_index = {} # if it has something to add to the index, then add it try: item_index.update(location_info) item_index.update(item_index_dictionary) item_index['id'] = item_id if item.start: item_index['start_date'] = item.start item_index[ 'content_groups'] = item_content_groups if item_content_groups else None item_index.update(cls.supplemental_fields(item)) items_index.append(item_index) indexed_count["count"] += 1 return item_content_groups except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not fail on one item of many log.warning('Could not index item: %s - %r', item.location, err) error_list.append( _('Could not index item: {}').format(item.location)) try: with modulestore.branch_setting( ModuleStoreEnum.RevisionOption.published_only): structure = cls._fetch_top_level(modulestore, structure_key) groups_usage_info = cls.fetch_group_usage( modulestore, structure) # First perform any additional indexing from the structure object cls.supplemental_index_information(modulestore, structure) # Now index the content for item in structure.get_children(): prepare_item_index(item, groups_usage_info=groups_usage_info) searcher.index(cls.DOCUMENT_TYPE, items_index) cls.remove_deleted_items(searcher, structure_key, indexed_items) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working log.exception( "Indexing error encountered, courseware index may be out of date %s - %r", structure_key, err) error_list.append(_('General indexing error occurred')) if error_list: raise SearchIndexingError('Error(s) present during indexing', error_list) return indexed_count["count"]
def test_search_from_url(self): """ ensure that we get the error back when the backend fails """ searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME) with self.assertRaises(StandardError): searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])
def add_to_search_index(modulestore, location, delete=False, raise_on_error=False): """ Add to courseware search index from given location and its children """ error_list = [] # TODO - inline for now, need to move this out to a celery task searcher = SearchEngine.get_search_engine(INDEX_NAME) if not searcher: return if isinstance(location, CourseLocator): course_key = location else: course_key = location.course_key location_info = { "course": unicode(course_key), } def _fetch_item(item_location): """ Fetch the item from the modulestore location, log if not found, but continue """ try: if isinstance(item_location, CourseLocator): item = modulestore.get_course(item_location) else: item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only) except ItemNotFoundError: log.warning('Cannot find: %s', item_location) return None return item def index_item_location(item_location, current_start_date): """ add this item to the search index """ item = _fetch_item(item_location) if not item: return is_indexable = hasattr(item, "index_dictionary") # if it's not indexable and it does not have children, then ignore if not is_indexable and not item.has_children: return # if it has a defined start, then apply it and to it's children if item.start and (not current_start_date or item.start > current_start_date): current_start_date = item.start if item.has_children: for child_loc in item.children: index_item_location(child_loc, current_start_date) item_index = {} item_index_dictionary = item.index_dictionary() if is_indexable else None # if it has something to add to the index, then add it if item_index_dictionary: try: item_index.update(location_info) item_index.update(item_index_dictionary) item_index['id'] = unicode(item.scope_ids.usage_id) if current_start_date: item_index['start_date'] = current_start_date searcher.index(DOCUMENT_TYPE, item_index) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not fail on one item of many log.warning('Could not index item: %s - %s', item_location, unicode(err)) error_list.append(_('Could not index item: {}').format(item_location)) def remove_index_item_location(item_location): """ remove this item from the search index """ item = _fetch_item(item_location) if item: if item.has_children: for child_loc in item.children: remove_index_item_location(child_loc) searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id)) try: if delete: remove_index_item_location(location) else: index_item_location(location, None) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working log.exception( "Indexing error encountered, courseware index may be out of date %s - %s", course_key, unicode(err) ) error_list.append(_('General indexing error occurred')) if raise_on_error and error_list: raise SearchIndexingError(_('Error(s) present during indexing'), error_list)
def searcher(self): """ cached instance of search engine """ if self._searcher is None: self._searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME) return self._searcher
def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE): """ Process course for indexing Arguments: course_key (CourseKey) - course identifier triggered_at (datetime) - provides time at which indexing was triggered; useful for index updates - only things changed recently from that date (within REINDEX_AGE above ^^) will have their index updated, others skip updating their index but are still walked through in order to identify which items may need to be removed from the index If None, then a full reindex takes place Returns: Number of items that have been added to the index """ error_list = [] searcher = SearchEngine.get_search_engine(INDEX_NAME) if not searcher: return location_info = { "course": unicode(course_key), } # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item` indexed_count = { "count": 0 } # indexed_items is a list of all the items that we wish to remain in the # index, whether or not we are planning to actually update their index. # This is used in order to build a query to remove those items not in this # list - those are ready to be destroyed indexed_items = set() def index_item(item, skip_index=False): """ Add this item to the search index and indexed_items list Arguments: item - item to add to index, its children will be processed recursively skip_index - simply walk the children in the tree, the content change is older than the REINDEX_AGE window and would have been already indexed. This should really only be passed from the recursive child calls when this method has determined that it is safe to do so """ is_indexable = hasattr(item, "index_dictionary") item_index_dictionary = item.index_dictionary() if is_indexable else None # if it's not indexable and it does not have children, then ignore if not item_index_dictionary and not item.has_children: return item_id = unicode(item.scope_ids.usage_id) indexed_items.add(item_id) if item.has_children: # determine if it's okay to skip adding the children herein based upon how recently any may have changed skip_child_index = skip_index or \ (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age) for child_item in item.get_children(): index_item(child_item, skip_index=skip_child_index) if skip_index or not item_index_dictionary: return item_index = {} # if it has something to add to the index, then add it try: item_index.update(location_info) item_index.update(item_index_dictionary) item_index['id'] = item_id if item.start: item_index['start_date'] = item.start searcher.index(DOCUMENT_TYPE, item_index) indexed_count["count"] += 1 except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not fail on one item of many log.warning('Could not index item: %s - %r', item.location, err) error_list.append(_('Could not index item: {}').format(item.location)) def remove_deleted_items(): """ remove any item that is present in the search index that is not present in updated list of indexed items as we find items we can shorten the set of items to keep """ response = searcher.search( doc_type=DOCUMENT_TYPE, field_dictionary={"course": unicode(course_key)}, exclude_ids=indexed_items ) result_ids = [result["data"]["id"] for result in response["results"]] for result_id in result_ids: searcher.remove(DOCUMENT_TYPE, result_id) try: with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only): course = modulestore.get_course(course_key, depth=None) for item in course.get_children(): index_item(item) remove_deleted_items() except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working log.exception( "Indexing error encountered, courseware index may be out of date %s - %r", course_key, err ) error_list.append(_('General indexing error occurred')) if error_list: raise SearchIndexingError('Error(s) present during indexing', error_list) return indexed_count["count"]
def engine(cls): """ Return course team search engine (if feature is enabled). """ if cls.search_is_enabled(): return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
def mock_perform(cls, filter_terms, text_search): # pylint: disable=no-member return SearchEngine.get_search_engine(cls.INDEX_NAME).search( field_dictionary=filter_terms, query_string=text_search, size=MAX_SIZE)