def delete_subject(subject): index_name = utils.get_config()['elasticsearch_index_name'] body = {'query': {'term': {'subject': subject}}, 'conflicts': 'proceed'} utils.get_es().delete_by_query(index=index_name, doc_type=index_name, body=body)
def search_es(es_query): body = { 'query': { 'function_score': { 'query': { 'multi_match': { 'query': es_query, 'fields': [ 'subject', 'displayId^3', 'version', 'name', 'description', 'type', 'keywords' ], 'operator': 'or', 'fuzziness': 'AUTO', } }, 'script_score': { 'script': { 'source': "_score * Math.log(doc['pagerank'].value + 1)" # Math.log is a natural log } } } }, 'from': 0, 'size': 10000 } return utils.get_es().search(index=utils.get_config()['elasticsearch_index_name'], body=body)
def create_parts_index(index_name): if utils.get_es().indices.exists(index_name): utils.log('Index already exists -> deleting') utils.get_es().indices.delete(index=index_name) mapping = { 'mappings': { index_name: { 'properties': { 'subject': { 'type': 'keyword' }, 'graph': { 'type': 'keyword' } } } } } utils.get_es().indices.create(index=index_name, body=mapping) utils.log('Index created')
def bulk_index_parts(parts_response, index_name): actions = [] for i in range(len(parts_response)): action = { '_index': index_name, '_type': index_name, '_id': i, '_source': parts_response[i] } actions.append(action) utils.log('Bulk indexing') stats = helpers.bulk(utils.get_es(), actions) if len(stats[1]) == 0: utils.log('Bulk indexing complete') else: utils.log('[Error] Error_messages: ' + '\n'.join(stats[1]))
def empty_search_es(offset, limit, allowed_graphs): if len(allowed_graphs) == 1: query = { 'term': { 'graph': allowed_graphs[0] } } else: query = { 'terms': { 'graph': allowed_graphs } } body = { 'query': { 'function_score': { 'query': query, 'script_score': { 'script': { 'source': "_score * Math.log(doc['pagerank'].value + 1)" # Math.log is a natural log } } } }, 'from': offset, 'size': limit } return utils.get_es().search(index=utils.get_config()['elasticsearch_index_name'], body=body)
def build_query_index(cls, type, verbose=False, explain=True, method='like_text'): es = utils.get_es() if type == 'tmdb': return QueryIndex( es, 'tmdb', 'movies', search_fields=[ 'alternative_titles.titles.title', 'credits.cast.character', 'credits.cast.name', 'credits.crew.name', 'genres.name', 'keywords.keywords.name', 'original_title', 'overview', 'production_companies.name', 'production_countries.name', 'tagline', 'title' ], more_like_this_params={ 'fields': [ 'keywords.keywords.name.terms', 'title', 'overview', 'tagline', 'genres.name.terms', 'credits.cast.name.terms', 'credits.crew.name.terms', 'alternative_titles.titles.title', # 'credits.cast.character.terms', # 'original_title', # 'production_companies.name.terms', # 'production_countries.name.terms', ] }, verbose=verbose, explain=explain, method=method ) elif type == 'imdb': return QueryIndex( es, 'imdb', 'movies', search_fields=[ 'title', 'also_known_as', 'plot', 'actors', 'writers', 'directors', 'genres', 'plot_keywords', 'tagline', ], more_like_this_params={ 'fields': [ 'plot_keywords.terms' ] }, facets=[ ('Plot Keywords', 'plot_keywords.terms'), ('Actors', 'actors.terms'), ('Directors', 'directors.terms'), ('Genres', 'genres.terms') ], verbose=verbose, explain=explain, method=method ) elif type == 'movielens': return QueryIndex( es, 'movielens', 'movies', [ 'title', 'likes' ], more_like_this_params={ 'fields': [ 'likes' ] }, verbose=verbose, explain=explain, method=method ) elif type == 'mirflickr': return QueryIndex( es, 'mirflickr', 'images', [ '_all' ], more_like_this_params={ 'fields': [ 'tags_raw.raw', 'annotations.potential.raw', 'annotations.relevant.raw', 'features.colors' # 'features.colors_with_counts' ] }, verbose=verbose, explain=explain, method=method )
def index_part(part): delete_subject(part['subject']) index_name = utils.get_config()['elasticsearch_index_name'] utils.get_es().index(index=index_name, doc_type=index_name, body=part)