示例#1
0
    def test_model_merge(self):
        merged = model.merge({'schema': 'Person'},
                             {'schema': 'Company'})
        assert merged['schema'] == 'LegalEntity'

        merged = model.merge({}, {'id': 'banana'})
        assert merged['id'] == 'banana'
示例#2
0
文件: entities.py 项目: arezola/aleph
def _index_updates(collection, entities):
    """Look up existing index documents and generate an updated form.

    This is necessary to make the index accumulative, i.e. if an entity or link
    gets indexed twice with different field values, it'll add up the different
    field values into a single record. This is to avoid overwriting the
    document and losing field values. An alternative solution would be to
    implement this in Groovy on the ES.
    """
    common = {
        'collection_id': collection.id,
        'bulk': True,
        'roles': collection.roles,
        'updated_at': datetime.utcnow()
    }
    if not len(entities):
        return

    query = {
        'query': {
            'ids': {
                'values': list(entities.keys())
            }
        },
        '_source': ['schema', 'properties', 'created_at']
    }
    result = search_safe(index=entity_index(), body=query)
    for doc in result.get('hits').get('hits', []):
        entity_id = doc['_id']
        entity = entities.get(entity_id)
        existing = doc.get('_source')
        combined = model.merge(existing, entity)
        combined['created_at'] = existing.get('created_at')
        entities[entity_id] = combined

    for doc_id, entity in entities.items():
        entity.pop('id', None)
        entity.update(common)
        schema = model.get(entity.get('schema'))
        entity = finalize_index(entity, schema, [])
        # pprint(entity)
        yield {
            '_id': doc_id,
            '_index': entity_index(),
            '_type': 'doc',
            '_source': entity
        }
示例#3
0
def _index_updates(collection, entities):
    """Look up existing index documents and generate an updated form.

    This is necessary to make the index accumulative, i.e. if an entity or link
    gets indexed twice with different field values, it'll add up the different
    field values into a single record. This is to avoid overwriting the
    document and losing field values. An alternative solution would be to
    implement this in Groovy on the ES.
    """
    common = {
        'collection_id': collection.id,
        '$bulk': True,
        'roles': collection.roles,
        'updated_at': datetime.utcnow()
    }
    if not len(entities):
        return

    result = es.mget(index=es_index,
                     doc_type=TYPE_ENTITY,
                     body={'ids': entities.keys()},
                     _source=['schema', 'properties', 'created_at'])
    for doc in result.get('docs', []):
        if not doc.get('found', False):
            continue
        entity_id = doc['_id']
        entity = entities.get(entity_id)
        existing = doc.get('_source')
        combined = model.merge(existing, entity)
        combined['created_at'] = existing.get('created_at')
        entities[entity_id] = combined

    for doc_id, entity in entities.items():
        entity.pop('id', None)
        entity.pop('data', None)
        entity.update(common)
        if 'created_at' not in entity:
            entity['created_at'] = entity.get('updated_at')
        schema = model.get(entity.get('schema'))
        entity = finalize_index(entity, schema)
        # pprint(entity)
        yield {
            '_id': doc_id,
            '_type': TYPE_ENTITY,
            '_index': str(es_index),
            '_source': entity
        }