示例#1
0
def test_unsupported_shape(datasets_n):
    candidates = (
        array('d', [.5]),
        tuple(array('I', [3]) for _ in range(datasets_n)),
        tuple(array('I', [2]) for _ in range(datasets_n)))
    with pytest.raises(NotImplementedError):
        greedy_solve(candidates)
示例#2
0
def test_greedy_threeparty():
    candidates = [(.9, ((1, 0), (2, 0))), (.8, ((0, 0), (1, 1))),
                  (.8, ((0, 0), (2, 1))), (.8, ((1, 1), (2, 1))),
                  (.7, ((0, 0), (1, 0))), (.7, ((0, 0), (2, 0)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 1), (2, 1)}, {(1, 0), (2, 0)}])

    candidates = [(.8, ((0, 0), (1, 0))), (.8, ((0, 1), (2, 1))),
                  (.8, ((1, 1), (2, 1))), (.7, ((0, 0), (2, 0))),
                  (.7, ((0, 1), (1, 1)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0)}, {(0, 1), (1, 1), (2, 1)}])

    candidates = [(1., ((0, 0), (1, 0))), (1., ((0, 0), (2, 0))),
                  (1., ((2, 0), (2, 1)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0)}, {(2, 0), (2, 1)}])

    candidates = [(1., ((0, 0), (1, 0))), (1., ((2, 0), (3, 0))),
                  (1., ((2, 0), (4, 0))), (1., ((3, 0), (4, 0))),
                  (1., ((0, 0), (2, 0))), (1., ((0, 0), (3, 0))),
                  (1., ((0, 0), (4, 0))), (1., ((1, 0), (2, 0))),
                  (1., ((1, 0), (3, 0))), (1., ((1, 0), (4, 0)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)}])
示例#3
0
def test_inconsistent_dataset_number():
    candidates = (
        array('d', [.5]),
        (array('I', [3]), array('I', [4])),
        (array('I', [2]), array('I', [6]), array('I', [7])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)
示例#4
0
def test_greedy_twoparty():
    candidates = [(.8, ((0, 0), (1, 0)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0)}])

    candidates = [(.8, ((0, 0), (1, 0))), (.7, ((0, 1), (1, 0)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0)}])

    candidates = []
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [])

    candidates = [(.8, ((0, 0), (1, 0))), (.7, ((0, 0), (1, 1))),
                  (.7, ((0, 1), (1, 0))), (.6, ((0, 1), (1, 1)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0, 0), (1, 0)}, {(0, 1), (1, 1)}])
示例#5
0
def test_greedy_fourparty():
    candidates = [(.9, ((0, 0), (1, 0))),
                  (.9, ((2, 0), (3, 0))),
                  (.7, ((0, 0), (2, 0))),
                  (.7, ((1, 0), (3, 0))),
                  (.7, ((0, 0), (3, 0))),
                  (.7, ((1, 0), (2, 0)))]
    result = greedy_solve(_zip_candidates(candidates))
    _compare_matching(result, [{(0,0), (1,0), (2,0), (3,0)}])
示例#6
0
def test_probabilistic_nonprobabilistic_match_ndedup(candidate_pairs):
    candidates = _zip_candidates(candidate_pairs)
    solution_probabilistic = probabilistic_greedy_solve(
        candidates, merge_threshold=1, deduplicated=False)
    solution_nonprobabilistic = greedy_solve(candidates)

    # We don't care about the order
    solution_probabilistic = frozenset(map(frozenset, solution_probabilistic))
    solution_nonprobabilistic = frozenset(map(frozenset,
                                              solution_nonprobabilistic))

    assert solution_probabilistic == solution_nonprobabilistic
示例#7
0
def test_greedy_2p(candidate_pairs):
    candidates = _zip_candidates(candidate_pairs)
    solution = greedy_solve(candidates)
    assert all(len(group) <= 2 for group in solution)
    similarity_map = dict(map(reversed, candidate_pairs))
    matches = {records: similarity_map[records]
               for records in map(tuple, map(sorted, solution))}

    # Every record that could have a match does have a match
    matched = set(itertools.chain.from_iterable(solution))
    assert all(i in matched or j in matched for _, (i, j) in candidate_pairs)

    # Every pair is taken unless either of the candidates have a better match
    match_similarities = {i: sim for recs, sim in matches.items() for i in recs}
    for sim, (i, j) in candidate_pairs:
        assert ((i, j) in matches
                or match_similarities.get(i, float('-inf')) >= sim
                or match_similarities.get(j, float('-inf')) >= sim)
示例#8
0
def test_greedy_np(candidate_pairs):
    candidates = _zip_candidates(candidate_pairs)
    all_candidate_pairs = {x for _, x in candidate_pairs}
    all_records = set(itertools.chain.from_iterable(all_candidate_pairs))

    solution = list(greedy_solve(candidates))
    matched = Counter(itertools.chain.from_iterable(solution))
    # Every record is in at most one group
    assert all(matched[i] <= 1 and matched[j] <= 1
               for _, (i, j) in candidate_pairs)

    # Include singleton groups
    all_groups = list(solution)
    all_groups.extend([x] for x in all_records - matched.keys())
    # All groups that can be merged have been merged.
    for g1, g2 in itertools.combinations(all_groups, 2):
        assert any(tuple(sorted((r1, r2))) not in all_candidate_pairs
                   for r1 in g1 for r2 in g2)
示例#9
0
def test_inconsistent_entry_number():
    candidates = (array('d', [.5, .3]), (array('I', [3]), array('I', [4])),
                  (array('I', [2]), array('I', [6])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3, 3]), array('I', [4])),
                  (array('I', [2]), array('I', [6])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3, 3]), array('I', [4, 6])),
                  (array('I', [2]), array('I', [6])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3]), array('I', [4, 6])),
                  (array('I', [2]), array('I', [6])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])),
                  (array('I', [2]), array('I', [6, 3])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])),
                  (array('I', [2, 1]), array('I', [6, 3])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)

    candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])),
                  (array('I', [2, 1]), array('I', [6])))
    with pytest.raises(ValueError):
        greedy_solve(candidates)