def testGetBFBit(self): cohort = 0 hash_no = 0 input_word = "abc" ti = self.typical_instance # expected_hash = ("\x13O\x0b\xa0\xcc\xc5\x89\x01oI\x85\xc8\xc3P\xfe\xa7 H" # "\xb0m") # Output should be # (ord(expected_hash[0]) + ord(expected_hash[1])*256) % 16 expected_output = 3 actual = rappor.get_bf_bit(input_word, cohort, hash_no, ti.num_bloombits) self.assertEquals(expected_output, actual) hash_no = 1 # expected_hash = ("\xb6\xcc\x7f\xee@\x95\xb0\xdb\xf5\xf1z\xc7\xdaPM" # "\xd4\xd6u\xed3") expected_output = 6 actual = rappor.get_bf_bit(input_word, cohort, hash_no, ti.num_bloombits) self.assertEquals(expected_output, actual)
def HashCandidates(params, stdin, stdout): num_bloombits = params.num_bloombits csv_out = csv.writer(stdout) for line in stdin: word = line.strip() row = [word] for cohort in xrange(params.num_cohorts): for hash_no in xrange(params.num_hashes): bf_bit = rappor.get_bf_bit(word, cohort, hash_no, num_bloombits) + 1 row.append(cohort * num_bloombits + bf_bit) csv_out.writerow(row)
def print_map(all_words, params, mapfile): """Print Bloom Filter map of values from infile.""" # Print maps of distributions # Required by the R analysis tool k = params.num_bloombits for word in all_words: mapfile.write(word) for cohort in xrange(params.num_cohorts): for hash_no in xrange(params.num_hashes): bf_bit = rappor.get_bf_bit(word, cohort, hash_no, k) + 1 mapfile.write("," + str(cohort * k + bf_bit)) mapfile.write("\n")