示例#1
0
    def makeDPNode(self, geounit_node: GeounitNode) -> GeounitNode:
        """
        This function takes a GeounitNode with "raw" data and generates
        noisy DP query answers depending the specifications in the
        config object.

        NOTE: This function is called inside the mapper (see above),
        so it is executed for every GeounitNode, on the workers.

        This may be confusing, because the .map() function is called from
        self.noisyAnswers() (above), which is run on the Master node.

        Inputs:
            geounit_node: a Node object with "raw" data
            dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config
        Outputs:
            dp_geounit_node: a Node object with selected DP measurements

        """
        logging.info(
            json.dumps({
                'geocode': geounit_node.geocode,
                'geolevel': geounit_node.geolevel
            }))

        geolevel_prop = self.getNodePLB(geounit_node)

        main_hist: np.ndarray = geounit_node.getDenseRaw()
        unit_hist: np.ndarray = geounit_node.getDenseRawHousing()

        dp_geounit_node = geounit_node
        dp_geounit_node.dp_queries, dp_geounit_node.unit_dp_queries = self.nodeDPQueries(
            geolevel_prop, main_hist, unit_hist, geounit_node.geolevel)

        if self.optimization_query_ordering.est_and_qadd_queries:
            print("multi_pass_try")
            # make estimation queries
            dp_geounit_node = self.optimization_query_ordering.makeOptQueries(
                dp_geounit_node)

        rounder_queries = {}
        if self.optimization_query_ordering.rounder_query_names:
            rounder_queries = self.setup.schema_obj.getQueries(
                list(self.optimization_query_ordering.rounder_query_names[
                    geounit_node.geolevel]))
        dp_geounit_node.rounder_queries = rounder_queries
        dp_geounit_node.query_ordering = self.optimization_query_ordering.query_ordering[
            geounit_node.geolevel]

        return dp_geounit_node
    def makeDPNode(self,
                   geounit_node: GeounitNode,
                   tot_budget=None,
                   dp_queries=True,
                   small_cell_query=False) -> GeounitNode:
        """
        This function takes a GeounitNode with "raw" data and generates
        noisy DP query answers depending the specifications in the
        config object.

        NOTE: This function is called inside the mapper (see above),
        so it is executed for every GeounitNode, on the workers.

        This may be confusing, because the .map() function is called from
        self.noisyAnswers() (above), which is run on the Master node.

        Inputs:
            geounit_node: a Node object with "raw" data
            tot_budget: in minimal schema runs (there are two) the total budget differs, using only a part
            dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config
            small_cell_query: boolean indicating whether a public-historical-data smallCellQuery is present, based on config
        Outputs:
            dp_geounit_node: a Node object with selected DP measurements

        """
        logging.info(
            json.dumps({
                'geocode': geounit_node.geocode,
                'geolevel': geounit_node.geolevel
            }))

        # For minimal schema, a part of budget is used in phase 1 and
        # the rest in phase 2, so this function is called with those
        # as tot_budget
        if tot_budget is None:
            tot_budget = self.total_budget

        # For minimal schema phase 1, no DP preset queries are made, so the argument should be set to False
        # Also, no budget is to be spent on the other queries than the detailed one in phase 1
        if dp_queries:
            dp_queries = self.dp_queries
            detailed_prop: float = self.detailed_prop
        else:
            detailed_prop = 1.0

        # For bottom-up (for example), no proportioning over geolevels
        if self.geolevel_prop_budgets is None:
            geolevel_prop = 1.
        else:
            # index relative to the top level
            index = self.levels_reversed.index(geounit_node.geolevel)
            geolevel_prop = self.geolevel_prop_budgets[index]

        node_hist: np.ndarray = geounit_node.getDenseRaw()
        dp_budget: float = tot_budget * geolevel_prop

        dp_geounit_node = geounit_node
        dp_geounit_node.dp_queries = self.nodeDPQueries(
            dp_budget, node_hist) if dp_queries else {}
        dp_geounit_node.dp = self.makeDPQuery(
            hist=node_hist,
            query=querybase.QueryFactory.makeTabularGroupQuery(
                array_dims=node_hist.shape),
            epsilon=detailed_prop * dp_budget)
        if small_cell_query:
            smallCellName = C.SMALLCELLBASENAME + f"_geocode{geounit_node.geocode}"
            multiindices = das_utils.loadJSONFile(
                self.small_cell_basepath +
                f"geocode{geounit_node.geocode}.json")
            dp_geounit_node.smallCellQuery = querybase.QueryFactory.makeInefficientCountQuery(
                array_dims=node_hist.shape,
                multiindices=multiindices,
                name=smallCellName)
            print(
                f"In geocode {geounit_node.geocode}, smallCellQuery has answer: {dp_geounit_node.smallCellQuery.answer(node_hist)}"
            )
            assert True == False, "Thou shallt not pass"

        return dp_geounit_node