def makeDPNode(self, geounit_node: GeounitNode) -> GeounitNode: """ This function takes a GeounitNode with "raw" data and generates noisy DP query answers depending the specifications in the config object. NOTE: This function is called inside the mapper (see above), so it is executed for every GeounitNode, on the workers. This may be confusing, because the .map() function is called from self.noisyAnswers() (above), which is run on the Master node. Inputs: geounit_node: a Node object with "raw" data dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config Outputs: dp_geounit_node: a Node object with selected DP measurements """ logging.info( json.dumps({ 'geocode': geounit_node.geocode, 'geolevel': geounit_node.geolevel })) geolevel_prop = self.getNodePLB(geounit_node) main_hist: np.ndarray = geounit_node.getDenseRaw() unit_hist: np.ndarray = geounit_node.getDenseRawHousing() dp_geounit_node = geounit_node dp_geounit_node.dp_queries, dp_geounit_node.unit_dp_queries = self.nodeDPQueries( geolevel_prop, main_hist, unit_hist, geounit_node.geolevel) if self.optimization_query_ordering.est_and_qadd_queries: print("multi_pass_try") # make estimation queries dp_geounit_node = self.optimization_query_ordering.makeOptQueries( dp_geounit_node) rounder_queries = {} if self.optimization_query_ordering.rounder_query_names: rounder_queries = self.setup.schema_obj.getQueries( list(self.optimization_query_ordering.rounder_query_names[ geounit_node.geolevel])) dp_geounit_node.rounder_queries = rounder_queries dp_geounit_node.query_ordering = self.optimization_query_ordering.query_ordering[ geounit_node.geolevel] return dp_geounit_node
def makeDPNode(self, geounit_node: GeounitNode, tot_budget=None, dp_queries=True, small_cell_query=False) -> GeounitNode: """ This function takes a GeounitNode with "raw" data and generates noisy DP query answers depending the specifications in the config object. NOTE: This function is called inside the mapper (see above), so it is executed for every GeounitNode, on the workers. This may be confusing, because the .map() function is called from self.noisyAnswers() (above), which is run on the Master node. Inputs: geounit_node: a Node object with "raw" data tot_budget: in minimal schema runs (there are two) the total budget differs, using only a part dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config small_cell_query: boolean indicating whether a public-historical-data smallCellQuery is present, based on config Outputs: dp_geounit_node: a Node object with selected DP measurements """ logging.info( json.dumps({ 'geocode': geounit_node.geocode, 'geolevel': geounit_node.geolevel })) # For minimal schema, a part of budget is used in phase 1 and # the rest in phase 2, so this function is called with those # as tot_budget if tot_budget is None: tot_budget = self.total_budget # For minimal schema phase 1, no DP preset queries are made, so the argument should be set to False # Also, no budget is to be spent on the other queries than the detailed one in phase 1 if dp_queries: dp_queries = self.dp_queries detailed_prop: float = self.detailed_prop else: detailed_prop = 1.0 # For bottom-up (for example), no proportioning over geolevels if self.geolevel_prop_budgets is None: geolevel_prop = 1. else: # index relative to the top level index = self.levels_reversed.index(geounit_node.geolevel) geolevel_prop = self.geolevel_prop_budgets[index] node_hist: np.ndarray = geounit_node.getDenseRaw() dp_budget: float = tot_budget * geolevel_prop dp_geounit_node = geounit_node dp_geounit_node.dp_queries = self.nodeDPQueries( dp_budget, node_hist) if dp_queries else {} dp_geounit_node.dp = self.makeDPQuery( hist=node_hist, query=querybase.QueryFactory.makeTabularGroupQuery( array_dims=node_hist.shape), epsilon=detailed_prop * dp_budget) if small_cell_query: smallCellName = C.SMALLCELLBASENAME + f"_geocode{geounit_node.geocode}" multiindices = das_utils.loadJSONFile( self.small_cell_basepath + f"geocode{geounit_node.geocode}.json") dp_geounit_node.smallCellQuery = querybase.QueryFactory.makeInefficientCountQuery( array_dims=node_hist.shape, multiindices=multiindices, name=smallCellName) print( f"In geocode {geounit_node.geocode}, smallCellQuery has answer: {dp_geounit_node.smallCellQuery.answer(node_hist)}" ) assert True == False, "Thou shallt not pass" return dp_geounit_node