def test_groups_correctness(requests): # We assume that anonlink computes the right results. with open(DATA_PATH, 'rb') as f: # Here's some filters I prepared earlier. filters = pickle.load(f) candidate_pairs = anonlink.candidate_generation.find_candidate_pairs( filters, anonlink.similarities.dice_coefficient_accelerated, THRESHOLD) true_groups = anonlink.solving.greedy_solve(candidate_pairs) filter_size = len(filters[0][0]) assert all( len(filter_) == filter_size for dataset in filters for filter_ in dataset) packed_filters = [ b''.join(binary_pack_for_upload(f, filter_size)) for f in filters ] project_data, _ = create_project_upload_data(requests, packed_filters, result_type='groups', binary=True, hash_size=DATA_HASH_SIZE) try: run = post_run(requests, project_data, threshold=THRESHOLD) result_groups = get_run_result(requests, project_data, run)['groups'] finally: delete_project(requests, project_data) # Compare ES result with anonlink. result_group_set = {frozenset(map(tuple, g)) for g in result_groups} true_group_set = set(map(frozenset, true_groups)) assert result_group_set == true_group_set
def test_delete_project_after_creating_run_with_clks( requests, result_type_number_parties): project, run_id = _create_data_linkage_run(requests, result_type_number_parties) delete_project(requests, project) with pytest.raises(AssertionError): get_project_description(requests, project)
def project(request, requests, result_type_number_parties): result_type, number_parties = result_type_number_parties project = create_project_no_data( requests, result_type=result_type, number_parties=number_parties) yield project # Release project resource delete_project(requests, project)
def a_blocking_project(request, requests): # a 2 party project with blocking disabled project = create_project_no_data( requests, result_type="groups", number_parties=2, uses_blocking=True ) yield project # Release project resource delete_project(requests, project)
def test_similarity_scores(requests, the_truth): project_data, _ = create_project_upload_data( requests, (the_truth['clks_a'], the_truth['clks_b']), result_type='similarity_scores') run = post_run(requests, project_data, threshold=the_truth['threshold']) result = get_run_result(requests, project_data, run, timeout=60) true_scores = the_truth['similarity_scores'] result_scores = {tuple(index for _, index in sorted([a, b])): score for a, b, score in result['similarity_scores']} # Anonlink is more strict on enforcing the k parameter. Hence the # subset. assert true_scores.keys() <= result_scores.keys() for pair in true_scores: assert true_scores[pair] == result_scores[pair] delete_project(requests, project_data)
def test_delete_project_types(requests, project): delete_project(requests, project)
def groups_project(request, requests): size, overlap, encoding_size, uses_blocking = request.param prj = create_project_response(requests, size, overlap, 'groups', encoding_size, uses_blocking) yield prj delete_project(requests, prj)
def permutations_project(request, requests): size, overlap, encoding_size = request.param prj = create_project_response(requests, size, overlap, 'permutations', encoding_size) yield prj delete_project(requests, prj)
def similarity_scores_project(request, requests): size, overlap, encoding_size = request.param prj = create_project_response(requests, size, overlap, 'similarity_scores', encoding_size) yield prj delete_project(requests, prj)