def merge(self, other): if self.id == other.id: raise ValueError("Cannot merge an entity with itself.") if self.collection_id != other.collection_id: raise ValueError( "Cannot merge entities from different collections.") # noqa self.schema = model.precise_schema(self.schema, other.schema) self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids) self.created_at = min((self.created_at, other.created_at)) self.updated_at = datetime.utcnow() data = merge_data(self.data, other.data) if self.name != other.name: data = merge_data(data, {'alias': [other.name]}) self.data = data # update alerts from aleph.model.alert import Alert q = db.session.query(Alert).filter(Alert.entity_id == other.id) q.update({Alert.entity_id: self.id}) # delete source entities other.delete() db.session.add(self) db.session.commit() db.session.refresh(other)
def test_model_precise_schema(self): assert model.precise_schema('Thing', 'Thing') == 'Thing' assert model.precise_schema('Thing', 'Person') == 'Person' assert model.precise_schema('Person', 'Thing') == 'Person' assert model.precise_schema('Person', 'Company') == 'LegalEntity' with assert_raises(InvalidData): model.precise_schema('Person', 'Directorship')
def compare(left, right): """Compare two entities and return number between 0 and 1. Returned number indicates probability that two entities are the same. """ left_schema = model.get(left.get('schema')) right_schema = model.get(right.get('schema')) if right_schema not in list(left_schema.matchable_schemata): return 0 schema = model.precise_schema(left_schema, right_schema) score = compare_fingerprints(left, right) * FP_WEIGHT left_properties = left.get('properties', {}) right_properties = right.get('properties', {}) for name, prop in schema.properties.items(): weight = MATCH_WEIGHTS.get(prop.type, 0) if weight == 0: continue left_values = left_properties.get(name) right_values = right_properties.get(name) prop_score = prop.type.compare_sets(left_values, right_values) score = score + prop_score * weight return max(0.0, min(1.0, score)) * 0.9
def test_model_precise_schema(self): assert model.precise_schema('Thing', 'Thing') == 'Thing' assert model.precise_schema('Thing', 'Person') == 'Person' assert model.precise_schema('Person', 'Thing') == 'Person' assert model.precise_schema('Person', 'Company') == 'LegalEntity'