Exemplo n.º 1
0
def index_collection(collection):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'summary': collection.summary,
        'category': collection.category,
        'countries': collection.countries,
        'languages': collection.languages,
        'managed': collection.managed,
        'roles': collection.roles
    }
    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
    data.update(get_collection_stats(collection.id))
    es.index(index=collection_index(),
             doc_type=collection_type(),
             id=collection.id,
             body=data)
Exemplo n.º 2
0
def upgrade_search():
    """Add any missing properties to the index mappings."""
    INDEXES = [
        (collection_index(), COLLECTION_MAPPING),
        (entity_index(), ENTITY_MAPPING),
        (record_index(), RECORD_MAPPING),
    ]
    for (index, mapping) in INDEXES:
        log.info("Creating index: %s", index)
        es.indices.create(index, ignore=[404, 400])
        es.indices.put_mapping(index=index, doc_type='doc', body=mapping)
        es.indices.open(index=index, ignore=[400, 404])
        es.indices.refresh(index=index)
Exemplo n.º 3
0
    def setUp(self):
        if not hasattr(TestCase, '_global_test_state'):
            TestCase._global_test_state = True
            delete_index()
            upgrade_search()
        else:
            indexes = [collection_index(), entity_index(), record_index()]
            es.delete_by_query(index=indexes,
                               body={'query': {
                                   'match_all': {}
                               }},
                               refresh=True,
                               conflicts='proceed')

        destroy_db()
        db.create_all()
        create_system_roles()
Exemplo n.º 4
0
def all_indexes():
    return [collection_index(), entity_index(), record_index()]
Exemplo n.º 5
0
def index_collection(collection):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'summary': collection.summary,
        'category': collection.category,
        'countries': collection.countries,
        'languages': collection.languages,
        'managed': collection.managed,
        'roles': collection.roles,
        'schemata': {},
    }

    texts = [
        collection.label, collection.foreign_id, collection.summary,
        collection.category
    ]

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    # Compute some statistics on the content of a collection.
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection.id
                    }
                }, {
                    'term': {
                        'schemata': Entity.THING
                    }
                }]
            }
        },
        'aggs': {
            'schema': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 100
                }
            },
        }
    }
    result = es.search(index=entities_index(), body=query)
    aggregations = result.get('aggregations')
    data['count'] = result['hits']['total']

    # expose entities by schema count.
    for schema in aggregations['schema']['buckets']:
        data['schemata'][schema['key']] = schema['doc_count']

    # if no countries or langs are given, take the most common from the data.
    if not data.get('countries'):
        countries = aggregations['countries']['buckets']
        data['countries'] = [c['key'] for c in countries]

    if not data.get('languages'):
        countries = aggregations['languages']['buckets']
        data['languages'] = [c['key'] for c in countries]

    texts.extend([match_form(t) for t in texts])
    data['text'] = index_form(texts)
    es.index(index=collection_index(),
             doc_type='doc',
             id=collection.id,
             body=data)
Exemplo n.º 6
0
def flush_index():
    """Run a refresh to apply all indexing changes."""
    es.indices.refresh(index=collection_index())
    es.indices.refresh(index=entity_index())
    es.indices.refresh(index=record_index())
Exemplo n.º 7
0
def delete_index():
    es.indices.delete(collection_index(), ignore=[404, 400])
    es.indices.delete(entity_index(), ignore=[404, 400])
    es.indices.delete(record_index(), ignore=[404, 400])
Exemplo n.º 8
0
def index_collection(collection):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'kind': collection.kind,
        'summary': collection.summary,
        'category': collection.category,
        'publisher': collection.publisher,
        'publisher_url': collection.publisher_url,
        'info_url': collection.info_url,
        'data_url': collection.data_url,
        'casefile': collection.casefile,
        'roles': collection.roles,
        'schemata': {},
    }
    texts = [v for v in data.values() if isinstance(v, six.string_types)]

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    # Compute some statistics on the content of a collection.
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection.id
                    }
                }, {
                    'term': {
                        'schemata': Entity.THING
                    }
                }]
            }
        },
        'aggs': {
            'schema': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 100
                }
            },
        }
    }
    result = es.search(index=entities_index(), body=query)
    aggregations = result.get('aggregations')
    data['count'] = result['hits']['total']

    # expose entities by schema count.
    for schema in aggregations['schema']['buckets']:
        data['schemata'][schema['key']] = schema['doc_count']

    # if no countries or langs are given, take the most common from the data.
    countries = collection.countries
    if countries is None or not len(countries):
        countries = aggregations['countries']['buckets']
        countries = [c['key'] for c in countries]
    data['countries'] = exactitude.countries.normalize_set(countries)

    languages = collection.languages
    if languages is None or not len(languages):
        languages = aggregations['languages']['buckets']
        languages = [c['key'] for c in languages]
    data['languages'] = exactitude.languages.normalize_set(languages)

    texts.extend([normalize(t, ascii=True) for t in texts])
    data['text'] = index_form(texts)
    return index_doc(collection_index(), collection.id, data)
Exemplo n.º 9
0
def clear_index():
    indexes = [collection_index(), entity_index(), record_index()]
    q = {'query': {'match_all': {}}}
    es.delete_by_query(index=indexes, body=q, refresh=True)