def reconcile_op(query): """Reconcile operation for a single query.""" state = QueryState({ 'limit': query.get('limit', '5'), 'strict': 'false' }, request.authz) name = query.get('query', '') entity = { 'id': 'fake', 'names': [name], 'fingerprints': [make_fingerprint(name)], 'schemata': ensure_list(query.get('type')) } for p in query.get('properties', []): entity[p.get('pid')] = ensure_list(p.get('v')) suggested = similar_entities(entity, state) matches = [] for ent in suggested.get('results'): types = [t for t in get_freebase_types() if ent['schema'] == t['id']] matches.append({ 'id': ent.get('id'), 'name': ent.get('name'), 'type': types, 'score': min(100, ent.get('score') * 10), 'uri': entity_link(ent.get('id')), 'match': ent.get('name') == name }) log.info("Reconciled: %r -> %d matches", name, len(matches)) return { 'result': matches, 'num': len(matches) }
def fingerprint(self, values): # TODO: this should not be a property thing, so that fp's can include # dates etx. fingerprints = [] for value in values: fingerprints.append(make_fingerprint(value)) return [fp for fp in fingerprints if fp is not None]
def compute_key(self, record): digest = sha1(self.query.dataset.name.encode('utf-8')) has_key = False for key in self.keys: value = record.get(key) if self.key_fingerprint: value = make_fingerprint(value) else: value = string_value(value) if value is None: continue digest.update(value.encode('utf-8')) has_key = True if has_key: return digest.hexdigest()
def normalize_value(self, value): return make_fingerprint(value)