def whoosh_search(user_id, query_terms): ret = {} user_artists_profile = get_user_artists_profile(user_id) # q = qp.parse( # "(" + query + " ANDMAYBE ((" + ") OR (".join([ # (" artist_id:" + str(artist_id) + "^" + str(1 + artist_score)) for ( # artist_id, artist_score) in user_artists_profile]) + ")))") # see https://pythonhosted.org/Whoosh/api/query.html#whoosh.query.AndMaybe q = AndMaybe( And([Term('title', qt) for qt in query_terms.split(' ')]), Or([Term('artist_id', artist_id, boost=artist_score) for ( artist_id, artist_score) in user_artists_profile])) with ix.searcher() as searcher: results = searcher.search(q, limit=10) ret = { 'items': [hit.fields() for hit in results], 'runtime': results.runtime} return ret
def whoosh_search(user_id, query_terms): ret = {} user_artists_profile = get_user_artists_profile(user_id) # q = qp.parse( # "(" + query + " ANDMAYBE ((" + ") OR (".join([ # (" artist_id:" + str(artist_id) + "^" + str(1 + artist_score)) for ( # artist_id, artist_score) in user_artists_profile]) + ")))") # see https://pythonhosted.org/Whoosh/api/query.html#whoosh.query.AndMaybe q = AndMaybe( And([Term('title', qt) for qt in query_terms.split(' ')]), Or([ Term('artist_id', artist_id, boost=artist_score) for (artist_id, artist_score) in user_artists_profile ])) with ix.searcher() as searcher: results = searcher.search(q, limit=10) ret = { 'items': [hit.fields() for hit in results], 'runtime': results.runtime } return ret
def elasticsearch_search(user_id, query_terms, ix_name="songs"): ret = {} es = Elasticsearch() user_artists_profile = get_user_artists_profile(user_id) # https://www.elastic.co/guide/en/elasticsearch/guide/current/bool-query.html # The bool query calculates the relevance _score for each document # by adding together the _score from all of the matching must and should clauses, # and then dividing by the total number of must and should clauses. # All the must clauses must match, and all the must_not clauses must not match, # but how many should clauses should match? By default, # none of the should clauses are required to match, with one exception: # if there are no must clauses, then at least one should clause must match. query = { "query": { "bool": { "must": { "match": { "title": { "query": query_terms, "operator": "and" } } # slop could be interesting as order of term # seems important in this context # "span_near": { # "title": { # "query": query_terms, # "slop": 50 # } }, "should": [{ "match": { "artist_id": { "query": str(artist_id), "boost": artist_score } } } for artist_id, artist_score in user_artists_profile] } }, # this should be slightly more performing once es cache management is right ( # though incorrect unless window size is large enough) # https://www.elastic.co/guide/en/elasticsearch/reference/2.1/search-request-rescore.html # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#score-functions # "rescore": { # "window_size": 100, # "query": { # "rescore_query": { # "function_score": { # "functions": [ # { # "filter": { # "term": { # "artist_id": str(artist_id) # }, # }, # "script_score": { # "script": "_score + " + str(artist_score) # } # } # for artist_id, artist_score in user_artists_profile # ], # "score_mode": "first", # "boost_mode": "replace" # } # } # } # } } ret = es.search(index=ix_name, body=query) return ret
def elasticsearch_search(user_id, query_terms, ix_name="songs"): ret = {} es = Elasticsearch() user_artists_profile = get_user_artists_profile(user_id) # https://www.elastic.co/guide/en/elasticsearch/guide/current/bool-query.html # The bool query calculates the relevance _score for each document # by adding together the _score from all of the matching must and should clauses, # and then dividing by the total number of must and should clauses. # All the must clauses must match, and all the must_not clauses must not match, # but how many should clauses should match? By default, # none of the should clauses are required to match, with one exception: # if there are no must clauses, then at least one should clause must match. query = { "query": { "bool": { "must": { "match": { "title": { "query": query_terms, "operator": "and" } } # slop could be interesting as order of term # seems important in this context # "span_near": { # "title": { # "query": query_terms, # "slop": 50 # } }, "should": [ {"match": { "artist_id": { "query": str(artist_id), "boost": artist_score } }} for artist_id, artist_score in user_artists_profile ] } }, # this should be slightly more performing once es cache management is right ( # though incorrect unless window size is large enough) # https://www.elastic.co/guide/en/elasticsearch/reference/2.1/search-request-rescore.html # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#score-functions # "rescore": { # "window_size": 100, # "query": { # "rescore_query": { # "function_score": { # "functions": [ # { # "filter": { # "term": { # "artist_id": str(artist_id) # }, # }, # "script_score": { # "script": "_score + " + str(artist_score) # } # } # for artist_id, artist_score in user_artists_profile # ], # "score_mode": "first", # "boost_mode": "replace" # } # } # } # } } ret = es.search(index=ix_name, body=query) return ret