def _windsorized(self, epsilon, lower_bounds, higher_bounds, output): """ Privatize each dimension of the output in a winsorized manner """ if isiterable(output[0]): noise = [] estimate = [] for index in range(len(output)): e, n = self._windsorized(epsilon / len(output), lower_bounds[index], higher_bounds[index], output[index]) estimate.append(e) noise.append(n) return estimate, noise dimension = list(output) rad = len(output) ** (1.0 / 3 + 0.1) lps = dpalgos.estimate_percentile(0.25, dimension, epsilon / 4, lower_bounds, higher_bounds) hps = dpalgos.estimate_percentile(0.75, dimension, epsilon / 4, lower_bounds, higher_bounds) crude_mu = float(lps + hps) / 2 crude_iqr = abs(hps - lps) u = crude_mu + 4 * rad * crude_iqr l = crude_mu - 4 * rad * crude_iqr # Compute windsorized mean for range self._sanitize_multidim(dimension, [l] * len(dimension), [u] * len(dimension)) mean_estimate = float(sum(dimension)) / len(dimension) noise = dpalgos.gen_noise(self.sensitivity_factor * float(abs(u - l)) / (2 * epsilon * len(dimension))) return mean_estimate, noise
def _windsorized(self, epsilon, lower_bounds, higher_bounds, output): """ Privatize each dimension of the output in a winsorized manner """ if isiterable(output[0]): noise = [] estimate = [] for index in range(len(output)): e, n = self._windsorized(epsilon / len(output), lower_bounds[index], higher_bounds[index], output[index]) estimate.append(e) noise.append(n) return estimate, noise dimension = list(output) rad = len(output)**(1.0 / 3 + 0.1) lps = dpalgos.estimate_percentile(0.25, dimension, epsilon / 4, lower_bounds, higher_bounds) hps = dpalgos.estimate_percentile(0.75, dimension, epsilon / 4, lower_bounds, higher_bounds) crude_mu = float(lps + hps) / 2 crude_iqr = abs(hps - lps) u = crude_mu + 4 * rad * crude_iqr l = crude_mu - 4 * rad * crude_iqr # Compute windsorized mean for range self._sanitize_multidim(dimension, [l] * len(dimension), [u] * len(dimension)) mean_estimate = float(sum(dimension)) / len(dimension) noise = dpalgos.gen_noise(self.sensitivity_factor * float(abs(u - l)) / (2 * epsilon * len(dimension))) return mean_estimate, noise
def _get_data_bounds(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles") lower_percentiles = [] higher_percentiles = [] for index in range(len(records_transpose)): if not sensitive[index]: lower_percentiles.append(0) higher_percentiles.append(0) else: lp = dpalgos.estimate_percentile(percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) hp = dpalgos.estimate_percentile(percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) lower_percentiles.append(lp) higher_percentiles.append(hp) logger.debug("Finished percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)
def _get_data_bounds(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles") lower_percentiles = [] higher_percentiles = [] for index in range(len(records_transpose)): if not sensitive[index]: lower_percentiles.append(0) higher_percentiles.append(0) else: lp = dpalgos.estimate_percentile( percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) hp = dpalgos.estimate_percentile( percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) lower_percentiles.append(lp) higher_percentiles.append(hp) logger.debug("Finished percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)