Пример #1
0
 def test_movie_search_basic(self):
     query_params = {'query': ['luke', 'father'], 'movie_title': '', 'year': '', 'actor': ''}
     start = time.time()
     results = ranked_movie_search(query_params, 20)
     end = time.time()
     print("Basic {:.4f} s".format(end-start))
     self.assertIn("tt0080684", results)  # Star Wars V should definitely be within Top 20 results
Пример #2
0
 def test_movie_search_advanced(self):
     query_params = {'query': ['luke', 'father'], 'movie_title': '', 'year': '1980-1980', 'actor': ''}
     start = time.time()
     results = ranked_movie_search(query_params, 100)
     end = time.time()
     print("Advanced match {:.4f} s".format(end-start))
     self.assertEqual("tt0080684", results[0])  # Star Wars V should definitely be Top 1 result in year 1980
Пример #3
0
 def test_movie_search_advanced_categories(self):
     query_params = {'query': ['luke', 'father'], 'categories': 'History,Biography'}
     start = time.time()
     results = ranked_movie_search(query_params, 100)
     end = time.time()
     print("Advanced categories match {:.4f} s".format(end - start))
     self.assertNotIn("tt0080684", results)  # Star Wars V should not be among history movies
     self.assertIn("tt0112573", results[:10])  # Braveheart, however, should be among top results
Пример #4
0
def movie_search():
    """ Returns ranked query results for a given query. Additionally, returns sorted list of categories for filtering.
        Input:
            query
        Output:
            'movies', query results
            'category list', list of categories
    """
    number_results = 500
    t0 = time.time()
    output = cache.get(request.get_json(), which_cache=MOVIES_CACHE)
    if output:
        output['query_time'] = time.time() - t0
        return output

    query_params = preprocess_query_params(request.get_json().copy())
    query = query_params['query']
    if query is None or len(
            query
    ) == 0:  # no query or the query consists only of stop words. Abort...
        return {
            'movies': [],
            'category_list': [],
            'query_time': time.time() - t0
        }

    movie_id_results = ranked_movie_search(query_params, number_results)
    movies = db.get_movies_by_list_of_ids(movie_id_results)
    for dic_movie in movies:
        if dic_movie is not None:
            dic_movie['movie_id'] = dic_movie[
                '_id']  # both movie_id and _id can be used

    # Create sorted list of all returned categories
    category_list = find_categories(movies)
    t1 = time.time()
    print(f"Query took {t1-t0} s to process")

    if len(query_params['keywords']) > 0:
        movies = filtering_keywords(movies, query_params['keywords'])

    output = {
        'movies': clean_results(movies),
        'category_list': category_list,
        'query_time': t1 - t0
    }
    cache.store(request.get_json(), output, which_cache=MOVIES_CACHE)
    return output