def prepare_es(): """ Prepare the ES index so we only have to manage indexing and searches in our actual tests. """ # Delete any existing indices so we get a clean slate ES_INDICES_CLIENT.delete(index="_all") # Create the indices we'll use to test the ES features for index in [ "richie_categories", "richie_courses", "richie_persons", "richie_organizations", ]: ES_INDICES_CLIENT.create(index=index) ES_INDICES_CLIENT.close(index=index) ES_INDICES_CLIENT.put_settings(body=ANALYSIS_SETTINGS, index=index) ES_INDICES_CLIENT.open(index=index) # Use the default courses mapping from the Indexer ES_INDICES_CLIENT.put_mapping(body=CoursesIndexer.mapping, index="richie_courses") # Add the sorting script ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"]) ES_CLIENT.put_script(id="state_field", body=CoursesIndexer.scripts["state_field"])
def prepare_index(self, courses): """ Not a test. This method is doing the heavy lifting for the tests in this class: - prepare the Elasticsearch index, - execute the query. """ self.create_filter_pages() # Index these 4 courses in Elasticsearch indices_client = IndicesClient(client=ES_CLIENT) # Delete any existing indices so we get a clean slate indices_client.delete(index="_all") # Create an index we'll use to test the ES features indices_client.create(index="test_courses") indices_client.close(index="test_courses") indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses") indices_client.open(index="test_courses") # Use the default courses mapping from the Indexer indices_client.put_mapping(body=CoursesIndexer.mapping, doc_type="course", index="test_courses") # Add the sorting script ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"]) # Actually insert our courses in the index actions = [{ "_id": course["id"], "_index": "test_courses", "_op_type": "create", "_type": "course", **course, } for course in courses] bulk(actions=actions, chunk_size=500, client=ES_CLIENT) indices_client.refresh()
def execute_query(self, courses, querystring="", **extra): """ Not a test. Prepare the ElasticSearch index and execute the query in it. """ indices_client = IndicesClient(client=ES_CLIENT) # Delete any existing indices so we get a clean slate indices_client.delete(index="_all") # Create an index we'll use to test the ES features indices_client.create(index=COURSES_INDEX) # The index needs to be closed before we set an analyzer indices_client.close(index=COURSES_INDEX) indices_client.put_settings(body=ANALYSIS_SETTINGS, index=COURSES_INDEX) indices_client.open(index=COURSES_INDEX) # Use the default courses mapping from the Indexer indices_client.put_mapping(body=CoursesIndexer.mapping, doc_type="course", index=COURSES_INDEX) # Add the sorting script ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"]) ES_CLIENT.put_script(id="state_field", body=CoursesIndexer.scripts["state_field"]) # Actually insert our courses in the index actions = [{ "_id": course["id"], "_index": COURSES_INDEX, "_op_type": "create", "_type": "course", "absolute_url": { "en": "en/url", "fr": "fr/url" }, "categories": ["1", "2", "3"], "cover_image": { "en": "en/image", "fr": "fr/image" }, "is_meta": False, "logo": { "en": "/en/some/img.png", "fr": "/fr/some/img.png" }, "nb_children": 0, "organizations": ["11", "12", "13"], **course, } for course in courses] bulk(actions=actions, chunk_size=500, client=ES_CLIENT) indices_client.refresh() results = self.client.get( f"/api/v1.0/courses/autocomplete/?{querystring:s}", **extra) self.assertEqual(results.status_code, 200) return json.loads(results.content)
def execute_query(self, querystring=""): """ Not a test. This method is doing the heavy lifting for the tests in this class: create and fill the index with our courses so we can run our queries and check our facet counts. It also executes the query and returns the result from the API. """ # Create the subject category page. This is necessary to link the subjects we # defined above with the "subjects" filter # As it is the only page we create, we expect it to have the path "0001" CategoryFactory(page_reverse_id="subjects", should_publish=True) # Index these 4 courses in Elasticsearch indices_client = IndicesClient(client=ES_CLIENT) # Delete any existing indices so we get a clean slate indices_client.delete(index="_all") # Create an index we'll use to test the ES features indices_client.create(index="test_courses") indices_client.close(index="test_courses") indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses") indices_client.open(index="test_courses") # Use the default courses mapping from the Indexer indices_client.put_mapping( body=CoursesIndexer.mapping, doc_type="course", index="test_courses" ) # Add the sorting script ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"]) # Actually insert our courses in the index actions = [ { "_id": course["id"], "_index": "test_courses", "_op_type": "create", "_type": "course", "absolute_url": {"en": "url"}, "cover_image": {"en": "image"}, "title": {"en": "title"}, **course, "course_runs": [ { "languages": course_run["languages"], "start": arrow.utcnow().datetime, "end": arrow.utcnow().datetime, "enrollment_start": arrow.utcnow().datetime, "enrollment_end": arrow.utcnow().datetime, } for course_run in course["course_runs"] ], } for course in COURSES ] bulk(actions=actions, chunk_size=500, client=ES_CLIENT) indices_client.refresh() response = self.client.get(f"/api/v1.0/courses/?{querystring:s}") self.assertEqual(response.status_code, 200) return json.loads(response.content)
def prepare_index(self, courses, organizations=None): """ Not a test. This method is doing the heavy lifting for the tests in this class: preparing the Elasticsearch index so that individual tests just have to execute the query. """ organizations = organizations or [] self.create_filter_pages() # Delete any existing indices so we get a clean slate ES_INDICES_CLIENT.delete(index="_all") # Create an index for our organizations ES_INDICES_CLIENT.create(index="richie_organizations") ES_INDICES_CLIENT.close(index="richie_organizations") ES_INDICES_CLIENT.put_settings(body=ANALYSIS_SETTINGS, index="richie_organizations") ES_INDICES_CLIENT.open(index="richie_organizations") # Use the default organizations mapping from the Indexer ES_INDICES_CLIENT.put_mapping(body=OrganizationsIndexer.mapping, index="richie_organizations") # Set up empty indices for categories & persons. They need to exist to avoid errors # but we do not use results from them in our tests. ES_INDICES_CLIENT.create(index="richie_categories") ES_INDICES_CLIENT.create(index="richie_persons") # Create an index we'll use to test the ES features ES_INDICES_CLIENT.create(index="test_courses") ES_INDICES_CLIENT.close(index="test_courses") ES_INDICES_CLIENT.put_settings(body=ANALYSIS_SETTINGS, index="test_courses") ES_INDICES_CLIENT.open(index="test_courses") # Use the default courses mapping from the Indexer ES_INDICES_CLIENT.put_mapping(body=CoursesIndexer.mapping, index="test_courses") # Add the sorting script ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"]) ES_CLIENT.put_script(id="state_field", body=CoursesIndexer.scripts["state_field"]) # Prepare actions to insert our courses and organizations in their indices actions = [ OrganizationsIndexer.get_es_document_for_organization( organization.public_extension) for organization in organizations ] + [{ "_id": course["id"], "_index": "test_courses", "_op_type": "create", **course, } for course in courses] bulk_compat(actions=actions, chunk_size=500, client=ES_CLIENT) ES_INDICES_CLIENT.refresh()
def execute_query(self, querystring="", suite=None): """ Not a test. This method is doing the heavy lifting for the tests in this class: - generate a set of courses randomly associated to our "interesting" course runs, - prepare the Elasticsearch index, - execute the query. """ # Shuffle our course runs to assign them randomly to 4 courses # For example: ["H", "D", "C", "F", "B", "A", "G", "E"] suite = suite or random.sample(list(COURSE_RUNS), len(COURSE_RUNS)) # Assume 4 courses and associate 2 course runs to each course # > [[3, ["H", "D"]], [0, ["C", "F"]], [1, ["B", "A"]], [2, ["G", "E"]]] courses_definition = [[i, suite[2 * i : 2 * i + 2]] for i in range(4)] # noqa # Index these 4 courses in Elasticsearch indices_client = IndicesClient(client=ES_CLIENT) # Delete any existing indices so we get a clean slate indices_client.delete(index="_all") # Create an index we'll use to test the ES features indices_client.create(index="test_courses") indices_client.close(index="test_courses") indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses") indices_client.open(index="test_courses") # Use the default courses mapping from the Indexer indices_client.put_mapping( body=CoursesIndexer.mapping, doc_type="course", index="test_courses" ) # Add the sorting script ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"]) # Actually insert our courses in the index now = arrow.utcnow() actions = [ { "_id": course_id, "_index": "test_courses", "_op_type": "create", "_type": "course", # The sorting algorithm assumes that course runs are sorted by decreasing # end date in order to limit the number of iterations and courses with a # lot of archived courses. "absolute_url": {"en": "url"}, "cover_image": {"en": "cover_image.jpg"}, "duration": {"en": "N/A"}, "effort": {"en": "N/A"}, "icon": {"en": "icon.jpg"}, "title": {"en": "title"}, **COURSES[course_id], "course_runs": sorted( [ # Each course randomly gets 2 course runs (thanks to above shuffle) COURSE_RUNS[course_run_id] for course_run_id in course_run_ids ], key=lambda o: now - o["end"], ), } for course_id, course_run_ids in courses_definition ] bulk(actions=actions, chunk_size=500, client=ES_CLIENT) indices_client.refresh() response = self.client.get(f"/api/v1.0/courses/?{querystring:s}") self.assertEqual(response.status_code, 200) return courses_definition, json.loads(response.content)
def execute_query(self, querystring="", **extra): """ Not a test. Prepare the ElasticSearch index and execute the query in it. """ courses = [ { "complete": { "en": slice_string_for_completion( "Artificial intelligence for mushroom picking"), "fr": slice_string_for_completion( "Intelligence artificielle pour la cueillette de chàmpiñons" ), }, "course_runs": [], "id": "24", "path": "001000", "title": { "en": "Artificial intelligence for mushroom picking", "fr": "Intelligence artificielle pour la cueillette de chàmpiñons", }, }, { "complete": { "en": slice_string_for_completion( "Kung-fu moves for cloud infrastructure security"), "fr": slice_string_for_completion( "Protéger ses serveurs par la pratique des arts martiaux" ), }, "course_runs": [], "id": "33", "path": "001001", "title": { "en": "Kung-fu moves for cloud infrastructure security", "fr": "Prôtéger ses serveurs par la pratique des arts martiaux", }, }, { "complete": { "en": slice_string_for_completion( "Securing funding through token sales"), "fr": slice_string_for_completion("Lever des fonds par des ICO"), }, "course_runs": [], "id": "51", "path": "001002", "title": { "en": "Securing funding through token sales", "fr": "Lever des fonds par des ICO", }, }, ] indices_client = IndicesClient(client=ES_CLIENT) # Delete any existing indices so we get a clean slate indices_client.delete(index="_all") # Create an index we'll use to test the ES features indices_client.create(index=COURSES_INDEX) # The index needs to be closed before we set an analyzer indices_client.close(index=COURSES_INDEX) indices_client.put_settings(body=ANALYSIS_SETTINGS, index=COURSES_INDEX) indices_client.open(index=COURSES_INDEX) # Use the default courses mapping from the Indexer indices_client.put_mapping(body=CoursesIndexer.mapping, doc_type="course", index=COURSES_INDEX) # Add the sorting script ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"]) # Actually insert our courses in the index actions = [{ "_id": course["id"], "_index": COURSES_INDEX, "_op_type": "create", "_type": "course", "absolute_url": { "en": "en/url", "fr": "fr/url" }, "categories": ["1", "2", "3"], "cover_image": { "en": "en/image", "fr": "fr/image" }, "is_meta": False, "logo": { "en": "/en/some/img.png", "fr": "/fr/some/img.png" }, "nb_children": 0, "organizations": ["11", "12", "13"], **course, } for course in courses] bulk(actions=actions, chunk_size=500, client=ES_CLIENT) indices_client.refresh() response = self.client.get( f"/api/v1.0/courses/autocomplete/?{querystring:s}", **extra) self.assertEqual(response.status_code, 200) return courses, json.loads(response.content)