示例#1
0
def dedup(dataset):
    ds = Dataset.by_name(dataset)
    from time import time
    from nomenklatura.matching import match
    begin = time()
    for value in Value.all(ds).limit(20):
        matches = match(value.value, ds)
        matches = filter(lambda (c, v, s): v != value.id, matches)
        print[value.value, '=?', matches[0][1], matches[0][2]]
    print "Time: %.2fms" % ((time() - begin) * 1000)
示例#2
0
def dedup(dataset):
    ds = Dataset.by_name(dataset)
    from time import time
    from nomenklatura.matching import match
    begin = time()
    for value in Value.all(ds).limit(20):
        matches = match(value.value, ds)
        matches = filter(lambda (c,v,s): v!=value.id, matches)
        print [value.value, '=?', matches[0][1], matches[0][2]]
    print "Time: %.2fms" % ((time() - begin)*1000)
示例#3
0
def reconcile_op(dataset, query):
    try:
        limit = max(1, min(100, int(query.get('limit'))))
    except ValueError:
        limit = 5
    except TypeError:
        limit = 5

    filters = [(p.get('p'), p.get('v')) for p in query.get('properties', [])]

    if dataset is None:
        dataset = type_to_dataset(query.get('type', ''))

    results = match(query.get('query', ''), dataset)[:limit]
    entities = Entity.id_map(dataset, map(lambda (c, e, s): e, results))
    matches = []
    skip = False
    for (candidate, entity_id, score) in results:
        entity = entities[entity_id]

        for key, fv in filters:
            if entity.data.get(key) != fv:
                skip = True
        if skip:
            continue

        id = url_for('entity.view', dataset=dataset.name, entity=entity.id)
        uri = url_for('entity.view',
                      dataset=dataset.name,
                      entity=entity.id,
                      _external=True)
        matches.append({
            'name':
            entity.name,
            'score':
            score,
            'type': [{
                'id': '/' + dataset.name,
                'name': dataset.label
            }],
            'id':
            id,
            'uri':
            uri,
            'match':
            score == 100
        })
    return {'result': matches, 'num': len(results)}
示例#4
0
def reconcile_op(dataset, query):
    try:
        limit = max(1, min(100, int(query.get('limit'))))
    except ValueError: limit = 5
    except TypeError: limit = 5

    filters = [(p.get('p'), p.get('v')) for p in query.get('properties', [])]

    if dataset is None:
        dataset = type_to_dataset(query.get('type', ''))

    results = match(query.get('query', ''), dataset)[:limit]
    entities = Entity.id_map(dataset, map(lambda (c,e,s): e, results))
    matches = []
    skip = False
    for (candidate, entity_id, score) in results:
        entity = entities[entity_id]

        for key, fv in filters:
            if entity.data.get(key) != fv:
                skip = True
        if skip:
            continue

        id = url_for('entity.view', dataset=dataset.name, entity=entity.id)
        uri = url_for('entity.view', dataset=dataset.name, entity=entity.id, _external=True)
        matches.append({
            'name': entity.name,
            'score': score,
            'type': [{
                'id': '/' + dataset.name,
                'name': dataset.label
                }],
            'id': id,
            'uri': uri,
            'match': score==100
            })
    return {
        'result': matches, 
        'num': len(results)
        }