def merge(self, other): if self.id == other.id: raise ValueError("Cannot merge an entity with itself.") if self.collection_id != other.collection_id: raise ValueError( "Cannot merge entities from different collections.") # noqa self.schema = model.precise_schema(self.schema, other.schema) self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids) self.created_at = min((self.created_at, other.created_at)) self.updated_at = datetime.utcnow() data = merge_data(self.data, other.data) if self.name != other.name: data = merge_data(data, {'alias': [other.name]}) self.data = data # update alerts from aleph.model.alert import Alert q = db.session.query(Alert).filter(Alert.entity_id == other.id) q.update({Alert.entity_id: self.id}) # delete source entities other.delete() db.session.add(self) db.session.commit() db.session.refresh(other)
def test_merge_list(self): old = {'lst': ['a', 'b', 'c']} new = {'lst': ['c', 'd', 'e']} result = merge_data(old, new) assert 'a' in result['lst'], result assert 'c' in result['lst'], result assert 'e' in result['lst'], result
def bulk_load_query(collection_id, query): collection = Collection.by_id(collection_id) if collection is None: log.warning("Collection does not exist: %s", collection_id) return mapping = model.make_mapping(query, key_prefix=collection.foreign_id) entities = {} total = 0 for idx, record in enumerate(mapping.source.records, 1): for entity in mapping.map(record).values(): entity_id = entity.get('id') if entity_id is None: continue # When loading from a tabular data source, we will often # encounter mappings where the same entity is emitted # multiple times in short sequence, e.g. when the data # describes all the directors of a single company. base = entities.get(entity_id, {}) entities[entity_id] = merge_data(entity, base) total += 1 if idx % 1000 == 0: log.info("[%s] Loaded %s records, %s entities...", collection.foreign_id, idx, total) if len(entities) >= BULK_PAGE: index_bulk(collection, entities, chunk_size=BULK_PAGE) entities = {} if len(entities): index_bulk(collection, entities, chunk_size=BULK_PAGE) # Update collection stats index_collection(collection)
def test_merge_value(self): old = { 'foo': 'bar', } new = { 'foo': 'quux', } result = merge_data(old, new) assert result['foo'] == 'quux', result
def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=get_flag('sync', True)) return EntitySerializer.jsonify(data)
def merge(self, left, right): """Merge two entities and return a combined version.""" properties = merge_data(left.get('properties'), right.get('properties')) schema = self.precise_schema(left.get('schema'), right.get('schema')) return { 'id': left.get('id', right.get('id')), 'schema': schema, 'properties': properties }
def test_merge_different(self): old = { 'foo': 'quux', } new = { 'bar': 'quux', } result = merge_data(old, new) assert result['foo'] == 'quux', result assert result['bar'] == 'quux', result
def update(id): entity = get_db_entity(id, request.authz.WRITE) data = parse_request(schema=EntityUpdateSchema) if as_bool(request.args.get('merge')): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() update_entity(entity) update_collection(entity.collection) return view(entity.id)
def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=get_flag('sync', True)) return EntitySerializer.jsonify(data)
def update(id): _, entity = get_entity(id, request.authz.WRITE) data = parse_request(schema=EntitySchema) if as_bool(request.args.get('merge')): data['data'] = merge_data(data.get('data') or {}, entity.data or {}) entity.update(data) db.session.commit() update_entity(entity) update_collection(entity.collection) return view(entity.id)
def update(id): entity = get_db_entity(id, request.authz.WRITE) data = parse_request(EntityUpdateSchema) sync = get_flag('sync') if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=sync) return serialize_data(data, CombinedSchema)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request('EntityUpdate') if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() update_entity(entity, sync=get_flag('sync', True)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def finalize_index(proxy, context, texts): """Apply final denormalisations to the index.""" for prop, value in proxy.itervalues(): if prop.type.name in ['entity', 'date', 'url', 'country', 'language']: continue texts.append(value) entity = proxy.to_full_dict() data = merge_data(context, entity) data['name'] = proxy.caption data['text'] = index_form(texts) names = data.get('names', []) fps = [fingerprints.generate(name) for name in names] fps = [fp for fp in fps if fp is not None] data['fingerprints'] = list(set(fps)) if not data.get('created_at'): data['created_at'] = data.get('updated_at') data.pop('id', None) return clean_dict(data)
def test_merge_objects(self): old = {'data': {'nested': True}} new = {'data': {'banana': 'hello'}} result = merge_data(old, new) assert result['data']['nested'], result assert result['data']['banana'], result