def test_max_words_per_user_validation(self): with self.assertRaisesRegex(ValueError, 'max_words_per_user'): iblt_tff.build_iblt_computation(max_words_per_user=0) with self.assertRaisesRegex(ValueError, 'max_words_per_user'): iblt_tff.build_iblt_computation(max_words_per_user=-1) iblt_tff.build_iblt_computation(max_words_per_user=1) iblt_tff.build_iblt_computation(max_words_per_user=None)
def test_max_heavy_hitters_validation(self): with self.assertRaisesRegex(ValueError, 'max_heavy_hitters'): iblt_tff.build_iblt_computation(max_heavy_hitters=0) with self.assertRaisesRegex(ValueError, 'max_heavy_hitters'): iblt_tff.build_iblt_computation(max_heavy_hitters=-1) iblt_tff.build_iblt_computation(max_heavy_hitters=1) iblt_tff.build_iblt_computation(max_heavy_hitters=None)
def test_default_construction(self): iblt_computation = iblt_tff.build_iblt_computation() self.assertIsInstance(iblt_computation, computation_base.Computation) type_test_utils.assert_types_identical( iblt_computation.type_signature, computation_types.FunctionType( parameter=computation_types.at_clients( computation_types.SequenceType( computation_types.TensorType(shape=[None], dtype=tf.string))), result=computation_types.at_server( iblt_tff.ServerOutput( clients=tf.int32, heavy_hitters=computation_types.TensorType( shape=[None], dtype=tf.string), heavy_hitters_unique_counts=computation_types. TensorType(shape=[None], dtype=tf.int64), heavy_hitters_counts=computation_types.TensorType( shape=[None], dtype=tf.int64), num_not_decoded=tf.int64, round_timestamp=tf.int64, ))))
def _execute_computation( data: List[List[str]], *, batch_size: int = 1, capacity: int = 1000, max_string_length: int = 10, repetitions: int = 3, seed: int = 0, max_heavy_hitters: Optional[int] = None, max_words_per_user: Optional[int] = None, k_anonymity: int = 1, secure_sum_bitwidth: Optional[int] = None, multi_contribution: bool = True, string_postprocessor: Optional[Callable[[tf.Tensor], tf.Tensor]] = None ) -> Dict[str, tf.Tensor]: """Executes one round of IBLT computation over the given datasets. Args: data: A reference to all ClientData on device. batch_size: The number of elements in each batch of the dataset. Defaults to `1`, means the input dataset is processed by `tf.data.Dataset.batch(1)`. capacity: Capacity of the underlying IBLT. Defaults to `1000`. max_string_length: Maximum length (in bytes) of an item in the IBLT. Multi- byte characters in the string will be truncated on byte (not character) boundaries. Defaults to `10`. repetitions: The number of repetitions in IBLT data structure (must be >= 3). Defaults to `3`. seed: An integer seed for hash functions. Defaults to `0`. max_heavy_hitters: The maximum number of items to return. If the decoded results have more than this number of items, will order decreasingly by the estimated counts and return the top max_heavy_hitters items. Default max_heavy_hitters == `None`, which means to return all the heavy hitters in the result. max_words_per_user: If set, bounds the number of contributions any user can make to the total counts in the iblt. If not `None`, must be a positive integer. Defaults to `None`. k_anonymity: Sets the number of users required for an element's count to be visible. Defaults to `1`. secure_sum_bitwidth: The bitwidth used for secure sum. The default value is `None`, which disables secure sum. If not `None`, must be in the range `[1,62]`. See `tff.federated_secure_sum_bitwidth`. multi_contribution: Whether each client is allowed to contribute multiple counts or only a count of one for each unique word. Defaults to `True`. string_postprocessor: A callable function that is run after strings are decoded from the IBLT in order to postprocess them. It should accept a single string tensor and output a single string tensor of the same shape. If `None`, no postprocessing is done. Returns: A dictionary containing the heavy hitter results. """ one_round_computation = iblt_tff.build_iblt_computation( capacity=capacity, max_string_length=max_string_length, repetitions=repetitions, seed=seed, max_heavy_hitters=max_heavy_hitters, max_words_per_user=max_words_per_user, k_anonymity=k_anonymity, secure_sum_bitwidth=secure_sum_bitwidth, batch_size=batch_size, multi_contribution=multi_contribution, string_postprocessor=string_postprocessor) datasets = _iblt_test_data_sampler(data, batch_size) output = one_round_computation(datasets) heavy_hitters = output.heavy_hitters heavy_hitters_counts = output.heavy_hitters_counts heavy_hitters_unique_counts = output.heavy_hitters_unique_counts heavy_hitters = [word.decode('utf-8', 'ignore') for word in heavy_hitters] iteration_results = dict( zip(heavy_hitters, zip(heavy_hitters_unique_counts, heavy_hitters_counts))) return dict( iteration_results), output.num_not_decoded, output.round_timestamp
def test_multi_contribution_validation(self): iblt_tff.build_iblt_computation(multi_contribution=True) iblt_tff.build_iblt_computation(multi_contribution=False)
def test_batch_size_validation(self): with self.assertRaisesRegex(ValueError, 'batch_size'): iblt_tff.build_iblt_computation(batch_size=0) with self.assertRaisesRegex(ValueError, 'batch_size'): iblt_tff.build_iblt_computation(batch_size=-1) iblt_tff.build_iblt_computation(batch_size=1)
def test_secure_sum_bitwidth_validation(self): with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'): iblt_tff.build_iblt_computation(secure_sum_bitwidth=-1) with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'): iblt_tff.build_iblt_computation(secure_sum_bitwidth=0) with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'): iblt_tff.build_iblt_computation(secure_sum_bitwidth=63) with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'): iblt_tff.build_iblt_computation(secure_sum_bitwidth=64) iblt_tff.build_iblt_computation(secure_sum_bitwidth=None) iblt_tff.build_iblt_computation(secure_sum_bitwidth=1) iblt_tff.build_iblt_computation(secure_sum_bitwidth=62)
def test_k_anonymity_validation(self): with self.assertRaisesRegex(ValueError, 'k_anonymity'): iblt_tff.build_iblt_computation(k_anonymity=0) with self.assertRaisesRegex(ValueError, 'k_anonymity'): iblt_tff.build_iblt_computation(k_anonymity=-1) iblt_tff.build_iblt_computation(k_anonymity=1)
def test_repetitions_validation(self): with self.assertRaisesRegex(ValueError, 'repetitions'): iblt_tff.build_iblt_computation(repetitions=0) with self.assertRaisesRegex(ValueError, 'repetitions'): iblt_tff.build_iblt_computation(repetitions=2) iblt_tff.build_iblt_computation(repetitions=3)
def test_max_string_length_validation(self): with self.assertRaisesRegex(ValueError, 'max_string_length'): iblt_tff.build_iblt_computation(max_string_length=0) with self.assertRaisesRegex(ValueError, 'max_string_length'): iblt_tff.build_iblt_computation(max_string_length=-1) iblt_tff.build_iblt_computation(max_string_length=1)