Пример #1
0
def delete_subject(subject):
    index_name = utils.get_config()['elasticsearch_index_name']

    body = {'query': {'term': {'subject': subject}}, 'conflicts': 'proceed'}
    utils.get_es().delete_by_query(index=index_name,
                                   doc_type=index_name,
                                   body=body)
Пример #2
0
def search_es(es_query):
    body = {
        'query': {
            'function_score': {
                'query': {
                    'multi_match': {
                        'query': es_query,
                        'fields': [
                            'subject',
                            'displayId^3',
                            'version',
                            'name',
                            'description',
                            'type',
                            'keywords'
                        ],
                        'operator': 'or',
                        'fuzziness': 'AUTO',
                    }
                },
                'script_score': {
                    'script': {
                        'source': "_score * Math.log(doc['pagerank'].value + 1)" # Math.log is a natural log
                    }
                }
            }
        },
        'from': 0,
        'size': 10000
    }
    return utils.get_es().search(index=utils.get_config()['elasticsearch_index_name'], body=body)
Пример #3
0
def create_parts_index(index_name):
    if utils.get_es().indices.exists(index_name):
        utils.log('Index already exists -> deleting')
        utils.get_es().indices.delete(index=index_name)

    mapping = {
        'mappings': {
            index_name: {
                'properties': {
                    'subject': {
                        'type': 'keyword'
                    },
                    'graph': {
                        'type': 'keyword'
                    }
                }
            }
        }
    }
    utils.get_es().indices.create(index=index_name, body=mapping)
    utils.log('Index created')
Пример #4
0
def bulk_index_parts(parts_response, index_name):
    actions = []
    for i in range(len(parts_response)):
        action = {
            '_index': index_name,
            '_type': index_name,
            '_id': i,
            '_source': parts_response[i]
        }

        actions.append(action)

    utils.log('Bulk indexing')
    stats = helpers.bulk(utils.get_es(), actions)
    if len(stats[1]) == 0:
        utils.log('Bulk indexing complete')
    else:
        utils.log('[Error] Error_messages: ' + '\n'.join(stats[1]))
Пример #5
0
def empty_search_es(offset, limit, allowed_graphs):
    if len(allowed_graphs) == 1:
        query = { 'term': { 'graph': allowed_graphs[0] } }
    else:
        query = { 'terms': { 'graph': allowed_graphs } }

    body = {
        'query': {
            'function_score': {
                'query': query,
                'script_score': {
                    'script': {
                        'source': "_score * Math.log(doc['pagerank'].value + 1)" # Math.log is a natural log
                    }
                }
            }
        },
        'from': offset,
        'size': limit
    }
    return utils.get_es().search(index=utils.get_config()['elasticsearch_index_name'], body=body)
Пример #6
0
 def build_query_index(cls, type, verbose=False, explain=True, method='like_text'):
     es = utils.get_es()
     if type == 'tmdb':
         return QueryIndex(
             es,
             'tmdb',
             'movies',
             search_fields=[
                 'alternative_titles.titles.title',
                 'credits.cast.character',
                 'credits.cast.name',
                 'credits.crew.name',
                 'genres.name',
                 'keywords.keywords.name',
                 'original_title',
                 'overview',
                 'production_companies.name',
                 'production_countries.name',
                 'tagline',
                 'title'
             ],
             more_like_this_params={
                 'fields': [
                     'keywords.keywords.name.terms',
                     'title',
                     'overview',
                     'tagline',
                     'genres.name.terms',
                     'credits.cast.name.terms',
                     'credits.crew.name.terms',
                     'alternative_titles.titles.title',
                     # 'credits.cast.character.terms',
                     # 'original_title',
                     # 'production_companies.name.terms',
                     # 'production_countries.name.terms',
                 ]
             },
             verbose=verbose,
             explain=explain,
             method=method
         )
     elif type == 'imdb':
         return QueryIndex(
             es,
             'imdb',
             'movies',
             search_fields=[
                 'title',
                 'also_known_as',
                 'plot',
                 'actors',
                 'writers',
                 'directors',
                 'genres',
                 'plot_keywords',
                 'tagline',
             ],
             more_like_this_params={
                 'fields': [
                     'plot_keywords.terms'
                 ]
             },
             facets=[
                 ('Plot Keywords', 'plot_keywords.terms'),
                 ('Actors', 'actors.terms'),
                 ('Directors', 'directors.terms'),
                 ('Genres', 'genres.terms')
             ],
             verbose=verbose,
             explain=explain,
             method=method
         )
     elif type == 'movielens':
         return QueryIndex(
             es,
             'movielens',
             'movies',
             [
                 'title',
                 'likes'
             ],
             more_like_this_params={
                 'fields': [
                     'likes'
                 ]
             },
             verbose=verbose,
             explain=explain,
             method=method
         )
     elif type == 'mirflickr':
         return QueryIndex(
             es,
             'mirflickr',
             'images',
             [
                 '_all'
             ],
             more_like_this_params={
                 'fields': [
                     'tags_raw.raw',
                     'annotations.potential.raw',
                     'annotations.relevant.raw',
                     'features.colors'
                     # 'features.colors_with_counts'
                 ]
             },
             verbose=verbose,
             explain=explain,
             method=method
         )
Пример #7
0
def index_part(part):
    delete_subject(part['subject'])
    index_name = utils.get_config()['elasticsearch_index_name']
    utils.get_es().index(index=index_name, doc_type=index_name, body=part)