def best_attack(self, a1, a2) -> MlAttack: """ Runs the optimizer to construct an attack for given input pair a1, a2. Args: a1: 1d array representing the first input a2: 1d array representing the second input Returns: The constructed MlAttack """ log.debug("Searching best attack for mechanism %s, classifier %s...", type(self.mechanism).__name__, type(self.classifier_factory).__name__) classifier = self._train_classifier(a1, a2) with time_measure("time_determine_threshold"): log.debug("Determining threshold...") # TODO: maybe parallelize this loop? probabilities = [] for parallel_size in split_into_parts(self.config.n, self.config.n_processes): sequential_probabilities = [] for sequential_size in split_by_batch_size( parallel_size, self.config.prediction_batch_size): # generate samples from a2 b_new = self.mechanism.m(a2, sequential_size) if len(b_new.shape) == 1: # make sure b1 and b2 have shape (n_samples, 1) b_new = np.atleast_2d(b_new).T # compute Pr[a1 | M(a1) = b_new] probabilities_new = classifier.predict_probabilities(b_new) # wrap up sequential_probabilities.append(probabilities_new) sequential_probabilities = np.concatenate( sequential_probabilities) probabilities.append(sequential_probabilities) probabilities = np.concatenate(probabilities) probabilities[::-1].sort() # sorts descending, in-place assert (probabilities.shape[0] == self.config.n) # find optimal threshold log.debug("Finding optimal threshold...") with time_measure("time_dp_distinguisher_find_threshold"): thresh, q = DPSniper._find_threshold( probabilities, self.config.c * probabilities.shape[0]) log.debug("Selected t = %f, q = %f", thresh, q) return MlAttack(classifier, thresh, q)
def _probe(self, eps) -> Tuple: """ Returns a tuple (p_value, (a1, a2, event, postprocessing, a0)) a0 is the reference input to be used for HammingDistance postprocessing """ log.info("checking eps = %f", eps) self._nof_probes += 1 with time_measure("statdp_time_one_probe"): min_p_value = 1.0 min_attack = None for pps in self.all_postprocessed_algs: log.info("trying postprocessing %s...", str(pps)) self.default_kwargs['alg'] = pps result = detect_counterexample(compose_postprocessing, eps, num_input=self.num_input, default_kwargs=self.default_kwargs, sensitivity=self.sensitivity, detect_iterations=self.detect_iterations, quiet=True) del self.default_kwargs['alg'] (_, p_value, d1, d2, kwargs, event) = result[0] if min_attack is None or p_value < min_p_value: min_p_value = p_value min_attack = (d1, d2, event, pps.postprocessing, kwargs['_d1']) log.info("p_value = %f", min_p_value) log.info("event = %s", min_attack) log.data("statdp_intermediate_probe", {"eps": eps, "p_value": min_p_value}) return min_p_value, min_attack
def run_statdp(name: str, algorithm, pp_config: PostprocessingConfig, num_input: tuple, sensitivity, default_kwargs): with log_context(name): try: log.info("Running StatDP binary search...") with time_measure("statdp_time"): eps, a1, a2, event, postprocessing, a0 = BinarySearch( algorithm, num_input, sensitivity, n_samples_detector, default_kwargs, pp_config).find(p_value, precision) log.info( "StatDP result: [eps=%f, a1=%s, a2=%s, event=%s, postprocessing=%s, a0=%s]", eps, a1, a2, str(event), str(postprocessing), a0) log.info("Verifying epsilon using %d samples...", config.n_final) with time_measure("statdp_verification_time"): attack = StatDPAttack(event, postprocessing) if postprocessing.requires_noisefree_reference: noisefree_reference = algorithm(the_zero_noise_prng, a0, **default_kwargs) attack.set_noisefree_reference(noisefree_reference) pr_estimator = StatDPPrEstimator(algorithm, config.n_final, config, use_parallel_executor=True, **default_kwargs) eps_verified, eps_lcb = EpsEstimator(pr_estimator, allow_swap=True)\ .compute_eps_estimate(a1, a2, attack) log.info("Verified eps=%f (lcb=%f)", eps_verified, eps_lcb) log.data( "statdp_result", { "eps": eps_verified, "eps_lcb": eps_lcb, "eps_preliminary": eps, "a1": a1, "a2": a2, "event": event, "postprocessing": str(postprocessing) }) except Exception: log.error("Exception while running StatDP on %s", name, exc_info=True)
def _one_input_pair(task): optimizer, a1, a2 = task pr_estimator = EpsEstimator(optimizer.pr_estimator) log.debug("selecting attack...") with time_measure("time_dp_distinguisher"): attack = optimizer.attack_optimizer.best_attack(a1, a2) log.debug("best attack: %s", attack) cur = DDWitness(a1, a2, attack) log.debug("computing estimate for eps...") with time_measure("time_estimate_eps"): cur.compute_eps_using_estimator(pr_estimator) log.debug("current eps: %s", cur.eps) log.data("eps_for_sample", cur.eps) log.debug("storing result...") filename = cur.to_tmp_file() log.debug("done!") return filename
def run(self): log.info("using configuration %s", self.config) attack_opt = DPSniper(self.mechanism, self.classifier_factory, self.config) with time_measure("time_dd_search"): log.debug("running dd-search...") opt = DDSearch(self.mechanism, attack_opt, self.input_pair_sampler, self.config) res = opt.run() log.debug("finished dd-search, preliminary eps=%f", res.eps) with time_measure("time_final_estimate_eps"): log.debug("computing final eps estimate...") res.compute_eps_high_precision(self.mechanism, self.config) log.info("done!") log.info("> a1 = {}".format(res.a1)) log.info("> a2 = {}".format(res.a2)) log.info("> attack = {}".format(res.attack)) log.info("> eps = {}".format(res.eps)) log.info("> eps lcb = {}".format(res.lower_bound)) log.data("eps", res.eps) log.data("eps_lcb", res.lower_bound)
def run(self) -> DDWitness: """ Runs the optimizer and returns the result. """ # compute intermediate results (approximate eps) with time_measure("time_dp_distinguisher_all_inputs"): results = self._compute_results_for_all_inputs() # find best result best = None for res in results: if best is None or res > best: best = res log.data('best_result', best.to_json()) return best
def _train_classifier(self, a1, a2) -> StableClassifier: """ Trains the classifier for inputs a1, a2. """ def generate_batches(): for size in split_by_batch_size(self.config.n_train, self.config.training_batch_size): yield self._generate_data_batch(a1, a2, size) log.debug("Creating classifier...") classifier = self.classifier_factory.create() log.debug("Training classifier...") with time_measure("time_dp_distinguisher_train"): classifier.train(generate_batches()) log.debug("Done training") return classifier