Exemplos de SearchEngine.get_search_engine em Python, exemplos de search.search_engine_base.SearchEngine.get_search_engine em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_views.py Projeto: hiepkhachbk/edx-search

    def test_search_from_url(self):
        """ ensure that we get the error back when the backend fails """
        searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
        searcher.index(
            "courseware_content",
            [
                {
                    "id": "FAKE_ID_1",
                    "content": {
                        "text": "Little Darling, it's been a long long lonely winter"
                    }
                }
            ]
        )
        searcher.index(
            "courseware_content",
            [
                {
                    "id": "FAKE_ID_2",
                    "content": {
                        "text": "Little Darling, it's been a year since sun been gone"
                    }
                }
            ]
        )
        searcher.index("test_doc", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])

        code, results = post_request({"search_string": "sun"})
        self.assertGreater(code, 499)
        self.assertEqual(results["error"], 'An error occurred when searching for "sun"')

        with self.assertRaises(StandardError):
            searcher.search(query_string="test search")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: library_tools.py Projeto: JudyFox/edXMOOC

 def _display_name_filter(self, library, display_name):
     """ Filters library children by capa type"""
     search_engine = SearchEngine.get_search_engine(index="library_index")
     if search_engine:
         log.warn("search engine found")
         filter_clause = {
             "library": unicode(normalize_key_for_search(library.location.library_key)),
             # "content_type": CapaDescriptor.INDEX_CONTENT_TYPE,
             # "display_name": display_name
         }
         search_result = search_engine.search(field_dictionary=filter_clause)
         new_results = search_result.get('results', [])
         results = []
         for r in new_results:
             v = self.deep_search(["display_name"], r)
             if v['display_name'] == display_name:
                 results.append(r)
         return [LibraryUsageLocator.from_string(item['data']['id']) for item in results]
     else:
         log.warn("search engine NOT found")
         #return [key for key in library.children if self._filter_child_name(key, display_name)]
         results = []
         for r in library.children:
             p = self.store.get_item(r, 1)
             v = {}
             for field in p.fields.values():
                 v[field.name] = field.read_json(p)
             # v = p.get_explicitly_set_fields_by_scope(Scope.settings)
             if v.get('display_name') == display_name:
                 results.append(r)
         return results

Exemplo n.º 3

0

Exibir arquivo

Arquivo: search_indexes.py Projeto: Certific-NET/edx-platform

 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     try:
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
     except ConnectionError as err:
         logging.error('Error connecting to elasticsearch: %s', err)
         raise ElasticSearchConnectionError

Exemplo n.º 4

0

Exibir arquivo

Arquivo: reindex_course.py Projeto: cpennington/edx-platform

    def handle(self, *args, **options):
        """
        By convention set by Django developers, this method actually executes command's actions.
        So, there could be no better docstring than emphasize this once again.
        """
        course_ids = options['course_ids']
        all_option = options['all']
        setup_option = options['setup']
        index_all_courses_option = all_option or setup_option

        if (not len(course_ids) and not index_all_courses_option) or \
                (len(course_ids) and index_all_courses_option):
            raise CommandError("reindex_course requires one or more <course_id>s OR the --all or --setup flags.")

        store = modulestore()

        if index_all_courses_option:
            index_name = CoursewareSearchIndexer.INDEX_NAME
            doc_type = CoursewareSearchIndexer.DOCUMENT_TYPE
            if setup_option:
                try:
                    # try getting the ElasticSearch engine
                    searcher = SearchEngine.get_search_engine(index_name)
                except exceptions.ElasticsearchException as exc:
                    logging.exception(u'Search Engine error - %s', exc)
                    return

                index_exists = searcher._es.indices.exists(index=index_name)  # pylint: disable=protected-access
                doc_type_exists = searcher._es.indices.exists_type(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type
                )

                index_mapping = searcher._es.indices.get_mapping(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type
                ) if index_exists and doc_type_exists else {}

                if index_exists and index_mapping:
                    return

            # if reindexing is done during devstack setup step, don't prompt the user
            if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"):
                # in case of --setup or --all, get the list of course keys from all courses
                # that are stored in the modulestore
                course_keys = [course.id for course in modulestore().get_courses()]
            else:
                return
        else:
            # in case course keys are provided as arguments
            course_keys = map(self._parse_course_key, course_ids)

        for course_key in course_keys:
            CoursewareSearchIndexer.do_course_reindex(store, course_key)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_courseware_index.py Projeto: Cgruppo/edx-platform

    def test_task_indexing_course(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
        response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 0)

        listen_for_course_publish(self, self.course.id)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 3)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_reindex_course_team.py Projeto: Certific-NET/edx-platform

    def setUp(self):
        """
        Set up tests.
        """
        super(ReindexCourseTeamTest, self).setUp()

        self.team1 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team1')
        self.team2 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team2')
        self.team3 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team3')

        self.search_engine = SearchEngine.get_search_engine(index='index_course_team')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: tasks.py Projeto: franosincic/edx-platform

def delete_course_task(user_id, course_key_string):

    profile = UserProfile.objects.get(pk=user_id)
    user = User.objects.get(pk=profile.user_id)

    course_key = CourseKey.from_string(course_key_string)
    delete_course_and_groups(course_key, user.id)
    searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
    if searcher != None:
        CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), [])
        searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_courseware_index.py Projeto: HowestX/edx-platform

    def test_task_library_update(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(LibrarySearchIndexer.INDEX_NAME)
        library_search_key = unicode(normalize_key_for_search(self.library.location.library_key))
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 0)

        listen_for_library_update(self, self.library.location.library_key)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 2)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: courseware_index.py Projeto: marcore/edx-platform

    def remove_deleted_items(cls, structure_key):
        """ Remove item from Course About Search_index """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        response = searcher.search(
            doc_type=cls.DISCOVERY_DOCUMENT_TYPE,
            field_dictionary=cls._get_location_info(structure_key)
        )
        result_ids = [result["data"]["id"] for result in response["results"]]
        searcher.remove(cls.DISCOVERY_DOCUMENT_TYPE, result_ids)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: library_tools.py Projeto: cmscom/edx-platform

 def _problem_type_filter(self, library, capa_type):
     """ Filters library children by capa type"""
     search_engine = SearchEngine.get_search_engine(index="library_index")
     if search_engine:
         filter_clause = {
             "library": unicode(normalize_key_for_search(library.location.library_key)),
             "content_type": CapaDescriptor.INDEX_CONTENT_TYPE,
             "problem_types": capa_type
         }
         search_result = search_engine.search(field_dictionary=filter_clause)
         results = search_result.get('results', [])
         return [LibraryUsageLocator.from_string(item['data']['id']) for item in results]
     else:
         return [key for key in library.children if self._filter_child(key, capa_type)]

Exemplo n.º 11

0

Exibir arquivo

Arquivo: tasks.py Projeto: franosincic/edx-platform

def delete_temp_user_task(request, user_id):

    profile = UserProfile.objects.get(pk=user_id)
    user = User.objects.get(pk=profile.user_id)

    courses = [format_course_for_view(c) for c in get_courses_accessible_to_user(request, user)[0]]
    libraries = [format_library_for_view(lib, user) for lib in accessible_libraries_list(user)]

    for course in courses:
        course_key = CourseKey.from_string(course["course_key"])
        delete_course_and_groups(course_key, user.id)
        searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
        if searcher != None:
            CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), [])
            searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string])

    for library in libraries:
        library_key = CourseKey.from_string(library['library_key'])
        delete_course_and_groups(library_key, user.id)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_courseware_index.py Projeto: HowestX/edx-platform

 def searcher(self):
     """ Centralized call to getting the search engine for the test """
     return SearchEngine.get_search_engine(self.INDEX_NAME)

Exemplo n.º 13

0

Exibir arquivo

    def index_about_information(cls, modulestore, course):
        """
        Add the given course to the course discovery index

        Arguments:
        modulestore - modulestore object to use for operations

        course - course object from which to take properties, locate about information
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        course_id = unicode(course.id)
        course_info = {
            'id': course_id,
            'course': course_id,
            'content': {},
            'image_url': course_image_url(course),
        }

        # load data for all of the 'about' modules for this course into a dictionary
        about_dictionary = {
            item.location.block_id: item.data
            for item in modulestore.get_items(course.id,
                                              qualifiers={"category": "about"})
        }

        about_context = {
            "course": course,
            "about_dictionary": about_dictionary,
        }

        for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE:
            # Broad exception handler so that a single bad property does not scupper the collection of others
            try:
                section_content = about_information.get_value(**about_context)
            except:  # pylint: disable=bare-except
                section_content = None
                log.warning(
                    "Course discovery could not collect property %s for course %s",
                    about_information.property_name,
                    course_id,
                    exc_info=True,
                )

            if section_content:
                if about_information.index_flags & AboutInfo.ANALYSE:
                    analyse_content = section_content
                    if isinstance(section_content, basestring):
                        analyse_content = strip_html_content_to_text(
                            section_content)
                    course_info['content'][
                        about_information.property_name] = analyse_content
                if about_information.index_flags & AboutInfo.PROPERTY:
                    course_info[
                        about_information.property_name] = section_content

        # Broad exception handler to protect around and report problems with indexing
        try:
            searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
        except:  # pylint: disable=bare-except
            log.exception(
                "Course discovery indexing error encountered, course discovery index may be out of date %s",
                course_id,
            )
            raise

        log.debug("Successfully added %s course to the course discovery index",
                  course_id)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: courseware_index.py Projeto: marcore/edx-platform

    def index_about_information(cls, modulestore, course):
        """
        Add the given course to the course discovery index

        Arguments:
        modulestore - modulestore object to use for operations

        course - course object from which to take properties, locate about information
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        course_id = unicode(course.id)
        course_info = {
            'id': course_id,
            'course': course_id,
            'content': {},
            'image_url': course_image_url(course),
        }

        # load data for all of the 'about' modules for this course into a dictionary
        about_dictionary = {
            item.location.name: item.data
            for item in modulestore.get_items(course.id, qualifiers={"category": "about"})
        }

        about_context = {
            "course": course,
            "about_dictionary": about_dictionary,
        }

        for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE:
            # Broad exception handler so that a single bad property does not scupper the collection of others
            try:
                section_content = about_information.get_value(**about_context)
            except:  # pylint: disable=bare-except
                section_content = None
                log.warning(
                    "Course discovery could not collect property %s for course %s",
                    about_information.property_name,
                    course_id,
                    exc_info=True,
                )

            if section_content:
                if about_information.index_flags & AboutInfo.ANALYSE:
                    analyse_content = section_content
                    if isinstance(section_content, basestring):
                        analyse_content = strip_html_content_to_text(section_content)
                    course_info['content'][about_information.property_name] = analyse_content
                    if about_information.property_name == "more_info":
                        course_info[about_information.property_name] = analyse_content
                if about_information.index_flags & AboutInfo.PROPERTY:
                    course_info[about_information.property_name] = section_content

        # Broad exception handler to protect around and report problems with indexing
        try:
            searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
        except:  # pylint: disable=bare-except
            log.exception(
                "Course discovery indexing error encountered, course discovery index may be out of date %s",
                course_id,
            )
            raise

        log.debug(
            "Successfully added %s course to the course discovery index",
            course_id
        )

Exemplo n.º 15

0

Exibir arquivo

Arquivo: courseware_index.py Projeto: marcore/edx-platform

    def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        modulestore - modulestore object to use for operations

        structure_key (CourseKey|LibraryKey) - course or library identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        structure_key = cls.normalize_structure_key(structure_key)
        location_info = cls._get_location_info(structure_key)

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
        indexed_count = {
            "count": 0
        }

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        # items_index is a list of all the items index dictionaries.
        # it is used to collect all indexes and index them using bulk API,
        # instead of per item index API call.
        items_index = []

        def get_item_location(item):
            """
            Gets the version agnostic item location
            """
            return item.location.version_agnostic().replace(branch=None)

        def prepare_item_index(item, skip_index=False, groups_usage_info=None):
            """
            Add this item to the items_index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so

            Returns:
            item_content_groups - content groups assigned to indexed item
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary() if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_content_groups = None

            if item.category == "split_test":
                split_partition = item.get_selected_partition()
                for split_test_child in item.get_children():
                    if split_partition:
                        for group in split_partition.groups:
                            group_id = unicode(group.id)
                            child_location = item.group_id_to_child.get(group_id, None)
                            if child_location == split_test_child.location:
                                groups_usage_info.update({
                                    unicode(get_item_location(split_test_child)): [group_id],
                                })
                                for component in split_test_child.get_children():
                                    groups_usage_info.update({
                                        unicode(get_item_location(component)): [group_id]
                                    })

            if groups_usage_info:
                item_location = get_item_location(item)
                item_content_groups = groups_usage_info.get(unicode(item_location), None)

            item_id = unicode(cls._id_modifier(item.scope_ids.usage_id))
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                children_groups_usage = []
                for child_item in item.get_children():
                    if modulestore.has_published_version(child_item):
                        children_groups_usage.append(
                            prepare_item_index(
                                child_item,
                                skip_index=skip_child_index,
                                groups_usage_info=groups_usage_info
                            )
                        )
                if None in children_groups_usage:
                    item_content_groups = None

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start
                item_index['content_groups'] = item_content_groups if item_content_groups else None
                item_index.update(cls.supplemental_fields(item))
                items_index.append(item_index)
                indexed_count["count"] += 1
                return item_content_groups
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location, err)
                error_list.append(_('Could not index item: {}').format(item.location))

        try:
            with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only):
                structure = cls._fetch_top_level(modulestore, structure_key)
                groups_usage_info = cls.fetch_group_usage(modulestore, structure)

                # First perform any additional indexing from the structure object
                cls.supplemental_index_information(modulestore, structure)

                # Now index the content
                for item in structure.get_children():
                    prepare_item_index(item, groups_usage_info=groups_usage_info)
                searcher.index(cls.DOCUMENT_TYPE, items_index)
                cls.remove_deleted_items(searcher, structure_key, indexed_items)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                structure_key,
                err
            )
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing', error_list)

        return indexed_count["count"]

Exemplo n.º 16

0

Exibir arquivo

    def add_to_search_index(modulestore,
                            location,
                            delete=False,
                            raise_on_error=False):
        """
        Add to courseware search index from given location and its children
        """
        error_list = []
        indexed_count = 0
        # TODO - inline for now, need to move this out to a celery task
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        if isinstance(location, CourseLocator):
            course_key = location
        else:
            course_key = location.course_key

        location_info = {
            "course": unicode(course_key),
        }

        def _fetch_item(item_location):
            """ Fetch the item from the modulestore location, log if not found, but continue """
            try:
                if isinstance(item_location, CourseLocator):
                    item = modulestore.get_course(item_location)
                else:
                    item = modulestore.get_item(
                        item_location,
                        revision=ModuleStoreEnum.RevisionOption.published_only)
            except ItemNotFoundError:
                log.warning('Cannot find: %s', item_location)
                return None

            return item

        def index_item_location(item_location, current_start_date):
            """ add this item to the search index """
            item = _fetch_item(item_location)
            if not item:
                return

            is_indexable = hasattr(item, "index_dictionary")
            # if it's not indexable and it does not have children, then ignore
            if not is_indexable and not item.has_children:
                return

            # if it has a defined start, then apply it and to it's children
            if item.start and (not current_start_date
                               or item.start > current_start_date):
                current_start_date = item.start

            if item.has_children:
                for child_loc in item.children:
                    index_item_location(child_loc, current_start_date)

            item_index = {}
            item_index_dictionary = item.index_dictionary(
            ) if is_indexable else None

            # if it has something to add to the index, then add it
            if item_index_dictionary:
                try:
                    item_index.update(location_info)
                    item_index.update(item_index_dictionary)
                    item_index['id'] = unicode(item.scope_ids.usage_id)
                    if current_start_date:
                        item_index['start_date'] = current_start_date

                    searcher.index(DOCUMENT_TYPE, item_index)
                except Exception as err:  # pylint: disable=broad-except
                    # broad exception so that index operation does not fail on one item of many
                    log.warning('Could not index item: %s - %s', item_location,
                                unicode(err))
                    error_list.append(
                        _('Could not index item: {}').format(item_location))

        def remove_index_item_location(item_location):
            """ remove this item from the search index """
            item = _fetch_item(item_location)
            if item:
                if item.has_children:
                    for child_loc in item.children:
                        remove_index_item_location(child_loc)

                searcher.remove(DOCUMENT_TYPE,
                                unicode(item.scope_ids.usage_id))

        try:
            if delete:
                remove_index_item_location(location)
            else:
                index_item_location(location, None)
            indexed_count += 1
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %s",
                course_key, unicode(err))
            error_list.append(_('General indexing error occurred'))

        if raise_on_error and error_list:
            raise SearchIndexingError(_('Error(s) present during indexing'),
                                      error_list)

        return indexed_count

Exemplo n.º 17

0

Exibir arquivo

    def index(cls,
              modulestore,
              structure_key,
              triggered_at=None,
              reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        modulestore - modulestore object to use for operations

        structure_key (CourseKey|LibraryKey) - course or library identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        structure_key = cls.normalize_structure_key(structure_key)
        location_info = cls._get_location_info(structure_key)

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
        indexed_count = {"count": 0}

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        # items_index is a list of all the items index dictionaries.
        # it is used to collect all indexes and index them using bulk API,
        # instead of per item index API call.
        items_index = []

        def get_item_location(item):
            """
            Gets the version agnostic item location
            """
            return item.location.version_agnostic().replace(branch=None)

        def prepare_item_index(item, skip_index=False, groups_usage_info=None):
            """
            Add this item to the items_index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so

            Returns:
            item_content_groups - content groups assigned to indexed item
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary(
            ) if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_content_groups = None

            if item.category == "split_test":
                split_partition = item.get_selected_partition()
                for split_test_child in item.get_children():
                    if split_partition:
                        for group in split_partition.groups:
                            group_id = unicode(group.id)
                            child_location = item.group_id_to_child.get(
                                group_id, None)
                            if child_location == split_test_child.location:
                                groups_usage_info.update({
                                    unicode(get_item_location(split_test_child)):
                                    [group_id],
                                })
                                for component in split_test_child.get_children(
                                ):
                                    groups_usage_info.update({
                                        unicode(get_item_location(component)):
                                        [group_id]
                                    })

            if groups_usage_info:
                item_location = get_item_location(item)
                item_content_groups = groups_usage_info.get(
                    unicode(item_location), None)

            item_id = unicode(cls._id_modifier(item.scope_ids.usage_id))
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                children_groups_usage = []
                for child_item in item.get_children():
                    if modulestore.has_published_version(child_item):
                        children_groups_usage.append(
                            prepare_item_index(
                                child_item,
                                skip_index=skip_child_index,
                                groups_usage_info=groups_usage_info))
                if None in children_groups_usage:
                    item_content_groups = None

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start
                item_index[
                    'content_groups'] = item_content_groups if item_content_groups else None
                item_index.update(cls.supplemental_fields(item))
                items_index.append(item_index)
                indexed_count["count"] += 1
                return item_content_groups
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location,
                            err)
                error_list.append(
                    _('Could not index item: {}').format(item.location))

        try:
            with modulestore.branch_setting(
                    ModuleStoreEnum.RevisionOption.published_only):
                structure = cls._fetch_top_level(modulestore, structure_key)
                groups_usage_info = cls.fetch_group_usage(
                    modulestore, structure)

                # First perform any additional indexing from the structure object
                cls.supplemental_index_information(modulestore, structure)

                # Now index the content
                for item in structure.get_children():
                    prepare_item_index(item,
                                       groups_usage_info=groups_usage_info)
                searcher.index(cls.DOCUMENT_TYPE, items_index)
                cls.remove_deleted_items(searcher, structure_key,
                                         indexed_items)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                structure_key, err)
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing',
                                      error_list)

        return indexed_count["count"]

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_views.py Projeto: hiepkhachbk/edx-search

 def test_search_from_url(self):
     """ ensure that we get the error back when the backend fails """
     searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     with self.assertRaises(StandardError):
         searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])

Exemplo n.º 19

0

Exibir arquivo

Arquivo: courseware_index.py Projeto: akbargumbira/Labster.EdX

    def add_to_search_index(modulestore, location, delete=False, raise_on_error=False):
        """
        Add to courseware search index from given location and its children
        """
        error_list = []
        # TODO - inline for now, need to move this out to a celery task
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        if isinstance(location, CourseLocator):
            course_key = location
        else:
            course_key = location.course_key

        location_info = {
            "course": unicode(course_key),
        }

        def _fetch_item(item_location):
            """ Fetch the item from the modulestore location, log if not found, but continue """
            try:
                if isinstance(item_location, CourseLocator):
                    item = modulestore.get_course(item_location)
                else:
                    item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only)
            except ItemNotFoundError:
                log.warning('Cannot find: %s', item_location)
                return None

            return item

        def index_item_location(item_location, current_start_date):
            """ add this item to the search index """
            item = _fetch_item(item_location)
            if not item:
                return

            is_indexable = hasattr(item, "index_dictionary")
            # if it's not indexable and it does not have children, then ignore
            if not is_indexable and not item.has_children:
                return

            # if it has a defined start, then apply it and to it's children
            if item.start and (not current_start_date or item.start > current_start_date):
                current_start_date = item.start

            if item.has_children:
                for child_loc in item.children:
                    index_item_location(child_loc, current_start_date)

            item_index = {}
            item_index_dictionary = item.index_dictionary() if is_indexable else None

            # if it has something to add to the index, then add it
            if item_index_dictionary:
                try:
                    item_index.update(location_info)
                    item_index.update(item_index_dictionary)
                    item_index['id'] = unicode(item.scope_ids.usage_id)
                    if current_start_date:
                        item_index['start_date'] = current_start_date

                    searcher.index(DOCUMENT_TYPE, item_index)
                except Exception as err:  # pylint: disable=broad-except
                    # broad exception so that index operation does not fail on one item of many
                    log.warning('Could not index item: %s - %s', item_location, unicode(err))
                    error_list.append(_('Could not index item: {}').format(item_location))

        def remove_index_item_location(item_location):
            """ remove this item from the search index """
            item = _fetch_item(item_location)
            if item:
                if item.has_children:
                    for child_loc in item.children:
                        remove_index_item_location(child_loc)

                searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id))

        try:
            if delete:
                remove_index_item_location(location)
            else:
                index_item_location(location, None)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %s",
                course_key,
                unicode(err)
            )
            error_list.append(_('General indexing error occurred'))

        if raise_on_error and error_list:
            raise SearchIndexingError(_('Error(s) present during indexing'), error_list)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: utils.py Projeto: hiepkhachbk/edx-search

 def searcher(self):
     """ cached instance of search engine """
     if self._searcher is None:
         self._searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     return self._searcher

Exemplo n.º 21

0

Exibir arquivo

Arquivo: courseware_index.py Projeto: CrRinko/edx-platform

    def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        course_key (CourseKey) - course identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        location_info = {
            "course": unicode(course_key),
        }

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
        indexed_count = {
            "count": 0
        }

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        def index_item(item, skip_index=False):
            """
            Add this item to the search index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary() if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_id = unicode(item.scope_ids.usage_id)
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                for child_item in item.get_children():
                    index_item(child_item, skip_index=skip_child_index)

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start

                searcher.index(DOCUMENT_TYPE, item_index)
                indexed_count["count"] += 1
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location, err)
                error_list.append(_('Could not index item: {}').format(item.location))

        def remove_deleted_items():
            """
            remove any item that is present in the search index that is not present in updated list of indexed items
            as we find items we can shorten the set of items to keep
            """
            response = searcher.search(
                doc_type=DOCUMENT_TYPE,
                field_dictionary={"course": unicode(course_key)},
                exclude_ids=indexed_items
            )
            result_ids = [result["data"]["id"] for result in response["results"]]
            for result_id in result_ids:
                searcher.remove(DOCUMENT_TYPE, result_id)

        try:
            with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only):
                course = modulestore.get_course(course_key, depth=None)
                for item in course.get_children():
                    index_item(item)
                remove_deleted_items()
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                course_key,
                err
            )
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing', error_list)

        return indexed_count["count"]

Exemplo n.º 22

0

Exibir arquivo

Arquivo: search_indexes.py Projeto: BIllli/edx-platform

 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     if cls.search_is_enabled():
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)

Exemplo n.º 23

0

Exibir arquivo

 def mock_perform(cls, filter_terms, text_search):
     # pylint: disable=no-member
     return SearchEngine.get_search_engine(cls.INDEX_NAME).search(
         field_dictionary=filter_terms,
         query_string=text_search,
         size=MAX_SIZE)