Exemplo n.º 1
0
def getGurobiEnvironment(config, retries=10):
    """ Create a new license environment

    Input:
        config: config file.

    Output:
        environment object

    Notes:
        if config["ENVIRONMENT"] is "GAM" it uses the free license.

    """
    clogging.setup(syslog=True)
    logging.info("Creating environment...")
    os.environ[GRB_LICENSE_FILE] = os.path.expandvars(
        config[GUROBI][GUROBI_LIC])
    cluster = config.get(ENVIRONMENT, CLUSTER_OPTION, fallback=CENSUS_CLUSTER)
    env = None
    rand_wait_base = np.random.uniform(1, 3)
    attempts = 0
    success = False
    while (not success) and attempts < retries:
        try:
            if cluster == GAM_CLUSTER:
                env = gb.Env()
            else:
                logfile = os.path.expandvars(
                    config[GUROBI][GUROBI_LOGFILE_NAME])
                env1 = config[ENVIRONMENT][GRB_ISV_NAME]
                env2 = config[ENVIRONMENT][GRB_APP_NAME]
                env3 = int(config[ENVIRONMENT][GRB_ENV3])
                env4 = config[ENVIRONMENT][GRB_ENV4].strip()
                env = gb.Env.OtherEnv(logfile, env1, env2, env3, env4)
            success = True
        except gb.GurobiError as err:
            attempts += 1
            if attempts == retries:
                raise err
            rand_wait = 1.3**(attempts - 1) * rand_wait_base
            time.sleep(rand_wait / 1000)
    if cluster == GAM_CLUSTER:
        logging.debug("gurobi environment creation succeeded on attempt %s",
                      attempts)
    else:
        logging.debug(
            "Successfully connected to Gurobi token server on attempt %s",
            attempts)
    return env
Exemplo n.º 2
0
def geoimp_wrapper_root(*,
                        config,
                        parent_shape,
                        root_node: GeounitNode,
                        optimizers,
                        min_schema=None,
                        keep_debug_info=False):
    """
    This function performs the Post-Processing Step of Root Geonode (e.g. US or a State) to Root Geonode level.
    It is called from engine_utils.py:topdown in a Spark map operation

    Inputs:
        config: configuration object
        root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs)

    Output:
        root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs)
        :param optimizers:
    """

    # Make sure that the logger is set up on all of the nodes
    clogging.setup(level=logging.INFO)
    # t_start = time.time()
    parent_hist = [None] * len(parent_shape)

    children = [root_node.unzipNoisy()]

    children, backup_solve_status = makeInputsAndRunOptimizer(
        children,
        config,
        min_schema,
        parent_hist,
        parent_shape,
        "root_to_root",
        optimizers,
        keep_debug_info=keep_debug_info)

    return children[0]
def geoimp_wrapper(*, config, parent_child_node, accum, min_schema=None):
    """
    This function performs the Post-Processing Step for a generic parent to the Child geography.
    It is called from topdown_engine.py:topdown in a Spark map operation. 
    It runs on the CORE and TASK nodes, not on the MASTER.
    So there is no das object!
    
    Inputs:
        config: configuration object
        parent_child_node: a (k,v) RDD with key being a geocode and
            value being a tuple of GeounitNode objects containing one parent and multiple children
        accum: spark accumulator object which tracks the number of solves that use the backup solve

    Output:
        children: a list of Node objects for each of the children, after post-processing
    """

    # Make sure that the logger is set up on all the nodes
    clogging.setup(level=logging.INFO,
                   syslog='True',
                   syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP))
    parent: GeounitNode
    children: List[GeounitNode]
    parent, children = findParentChildNodes(parent_child_node)

    n_children = len(children)

    #######
    # under cenrtain circumstances we can skip the gurobi optimization
    #######
    #
    # Only 1 child

    if n_children == 1:
        children[0].syn = parent.syn
        return children

    if parent.syn.sum() == 0:
        for child in children:
            child.syn = sparse.multiSparse(np.zeros(parent.syn.shape))
        return children

    #########
    # resume code for gurobi optimization
    ########
    # stack the dp arrays on top of one another, if only 1 child just expand the axis

    if parent.dp:
        if n_children > 1:
            noisy_child = np.stack(
                [asDense(child.dp.DPanswer) for child in children], axis=-1)
        else:
            noisy_child = np.expand_dims(asDense(children[0].dp.DPanswer),
                                         axis=len(
                                             children[0].dp.DPanswer.shape))
    else:
        noisy_child = None

    noisy_child_weight = 1. / children[0].dp.Var if parent.dp else None

    # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater
    dp_queries_comb = stackNodeProperties(children,
                                          lambda node: node.dp_queries,
                                          cons_dpq.StackedDPquery,
                                          lambda name: name != C.DETAILED)
    query_weights = map(
        lambda sdpq: 1. / sdpq.Var, dp_queries_comb
    )  # We can get actual variance for each query if we want
    constraints_comb = stackNodeProperties(children, lambda node: node.cons,
                                           cons_dpq.StackedConstraint)
    parent_hist = parent.getDenseSyn()
    parent_geocode = parent.geocode

    seq_opt = sequential_optimizers.L2PlusRounderWithBackup(
        das=None,
        config=config,
        parent=parent_hist,
        parent_shape=parent_hist.shape,
        NoisyChild=noisy_child,
        childGeoLen=n_children,
        DPqueries=dp_queries_comb,
        constraints=constraints_comb,
        NoisyChild_weight=noisy_child_weight,
        query_weights=query_weights,
        identifier=parent_geocode,
        min_schema=min_schema,
        stat_node=children[0])

    l2_answer, int_answer, backup_solve_status = seq_opt.run()

    # slice off the combined child solution to make separate arrays for each child
    int_answer_list = np_utils.sliceArray(int_answer)
    l2_answer_list = np_utils.sliceArray(l2_answer)

    # check constraints
    for i, child in enumerate(children):
        child.syn = int_answer_list[i]
        constraintsCheck(child)

    # make sparse arrays
    for i, child in enumerate(children):
        child.syn = sparse.multiSparse(int_answer_list[i])
        child.syn_unrounded = sparse.multiSparse(l2_answer_list[i])

    if backup_solve_status is True:
        accum += 1

    return children
def geoimp_wrapper_nat(*,
                       config,
                       parent_shape,
                       nat_node: GeounitNode,
                       min_schema=None):
    """
    This function performs the Post-Processing Step of National to National level.
    It is called from engine_utils.py:topdown in a Spark map operation

    Inputs:
        config: configuration object
        nat_node: a GeounitNode object referring to entire nation

    Output:
        nat_node: a GeounitNode object referring to entire nation
    """

    # Make sure that the logger is set up on all of the nodes
    clogging.setup(level=logging.INFO,
                   syslog=True,
                   syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP))
    # t_start = time.time()
    parent_hist = None

    noisy_child = np.expand_dims(
        asDense(nat_node.dp.DPanswer), axis=len(
            nat_node.dp.DPanswer.shape)) if nat_node.dp else None
    noisy_child_weight = 1. / nat_node.dp.Var if nat_node.dp else None
    parent_geocode = "nat_to_nat"

    # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater
    dp_queries_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.dp_queries, cons_dpq.StackedDPquery,
                                          lambda name: name != C.DETAILED)
    query_weights = map(
        lambda sdpq: 1. / sdpq.Var, dp_queries_comb
    )  # We can get actual variance for each query if we want
    constraints_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.cons, cons_dpq.StackedConstraint)

    # Create an L2PlusRounderWithBackup object
    seq_opt = sequential_optimizers.L2PlusRounderWithBackup(
        das=None,
        parent=parent_hist,
        parent_shape=parent_shape,
        NoisyChild=noisy_child,
        childGeoLen=1,
        config=config,
        DPqueries=dp_queries_comb,
        constraints=constraints_comb,
        NoisyChild_weight=noisy_child_weight,
        query_weights=query_weights,
        identifier="nat_to_nat",
        min_schema=min_schema,
        stat_node=nat_node)

    l2_answer, int_answer, backup_solve_status = seq_opt.run()

    # get rid of extra dimension
    int_answer = int_answer.squeeze()
    l2_answer = l2_answer.squeeze()

    nat_node.syn = int_answer
    constraintsCheck(nat_node, parent_geocode)

    nat_node.syn = sparse.multiSparse(int_answer)
    nat_node.syn_unrounded = sparse.multiSparse(l2_answer)
    return nat_node
Exemplo n.º 5
0
def geoimp_wrapper(*,
                   config,
                   parent_child_node,
                   accum,
                   optimizers,
                   min_schema=None,
                   keep_debug_info=False,
                   aian=False):
    """
    This function performs the Post-Processing Step for a generic parent to the Child geography.
    It is called from topdown_engine.py:topdown in a Spark map operation.
    It runs on the CORE and TASK nodes, not on the MASTER.
    So there is no das object!

    Inputs:
        config: configuration object
        parent_child_node: a (k,v) RDD with key being a geocode and
            value being a tuple of GeounitNode objects containing one parent and multiple children
        accum: spark accumulator object which tracks the number of solves that use the backup solve
        optimizers: which L2, Rounder and SequentialOptimizer to use,
        min_schema: backup feasibility schema for the optimizer
        keep_debug_info: keep dp_queries ans syn_unrounded in the optimized nodes; delete if False
        aian: if it's AIAN spine, it will keep state total invariance on US -> ({aian_parts_of_states} + {non_aian_parts_of_states}) optimization

    Output:
        children: a list of Node objects for each of the children, after post-processing
        :param optimizers:
    """

    # Make sure that the logger is set up on all the nodes
    clogging.setup(level=logging.INFO)

    parent, children = findParentChildNodes(parent_child_node)
    parent_hist = parent.getDenseSyn(), parent.getDenseSynHousing()
    parent_geocode = parent.geocode
    parent_shape = tuple(h.shape for h in parent_hist)

    #######
    # under certain circumstances we can skip the gurobi optimization
    #######

    # Only 1 child
    if len(children) == 1:
        children = [children[0].copyParentSyn(parent, keep_debug_info)]
        return constraintsCheck(children, parent_geocode)

    # If the parent is empty (NOTE: all histograms should be empty. Also, the sum check obviously works only if values are non-negative)
    if parent.histsAreEmpty():
        children = [
            child.copyParentSyn(parent, keep_debug_info, zerosyn=True)
            for child in children
        ]
        print(f"parent geocode {parent_geocode} is empty")
        return constraintsCheck(children, parent_geocode)

    #########
    # resume code for gurobi optimization
    ########

    children = [child.unzipNoisy() for child in children]

    children, backup_solve_status = makeInputsAndRunOptimizer(
        children,
        config,
        min_schema,
        parent_hist,
        parent_shape,
        parent_geocode,
        optimizers,
        keep_debug_info=keep_debug_info,
        aian=aian)

    if backup_solve_status is True:
        accum += 1

    return children
    def getGurobiEnvironment(self, retries=C.GUROBI_LICENSE_MAX_RETRIES):
        """ Create a new license environment
            IMPORTANT: HAS TO BE NEW ENVIRONMENT, DO NOT TRY TO RETURN ONE ALREADY IN PYTHON OBJECT
        Input:
            config: config file.

        Output:
            environment object

        Notes:
            1. if config["ENVIRONMENT"] is "GAM" or if ISV_NAME is not sent, create an environment using the public
               gb.Env() API, which typically uses the academic license.
            2. If a license cannot be obtained, implements retries with random backoff.

        """

        # This appears to be the first function called in the python environment on each worker node.
        # Be sure the enviornment is propertly set up.

        if self.gurobi_path and self.gurobi_path not in sys.path:
            sys.path.insert(0, self.gurobi_path)
            import gurobipy as gb

        # Syslog does not require the datetime because it
        # is included automatically by protocol, but syslog does not include the year, so we manually add it.
        #
        # NOTE: yarn may not be running on the CORE and TASK nodes
        # when the bootstrap is run, so attempts to set the MASTER_IP
        # on the core nodes sometimes failed. We avoid this now by passing the MASTER_IP
        # in the configuration environment

        clogging.setup(level=logging.INFO,
                       syslog=True,
                       syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP),
                       syslog_format=clogging.YEAR + " " +
                       clogging.SYSLOG_FORMAT)

        # THIS BELOW SHOULD NOT BE DONE, THERE'S A REASON FOR RE-CREATING THE ENVIRONMENT
        # # If we already have a grb_env, just return it.
        # if self.grb_env is not None:
        #     return self.grb_env

        os.environ[C.GRB_LICENSE_FILE] = self.getconfig(C.GUROBI_LIC)

        import gurobipy as gb

        # Get environment variables
        cluster = self.getconfig(C.CLUSTER_OPTION,
                                 section=C.ENVIRONMENT,
                                 default=C.CENSUS_CLUSTER)
        logfile = self.getconfig(C.GUROBI_LOGFILE_NAME)
        isv_name = self.getconfig(C.GRB_ISV_NAME,
                                  section=C.ENVIRONMENT,
                                  default='')
        app_name = self.getconfig(C.GRB_APP_NAME,
                                  section=C.ENVIRONMENT,
                                  default='')

        # env = None
        for attempt in range(1, retries):
            try:
                if (cluster == C.GAM_CLUSTER) or (isv_name == ''):
                    # Use academic license
                    env = gb.Env(logfile)
                else:
                    # Use commercial license
                    env3 = self.getint(C.GRB_ENV3, section=C.ENVIRONMENT)
                    env4 = self.getconfig(C.GRB_ENV4,
                                          section=C.ENVIRONMENT).strip()
                    env = gb.Env.OtherEnv(logfile, isv_name, app_name, env3,
                                          env4)
                    logging.info("Acquired gurobi license on attempt %s",
                                 attempt)
                # We got the environment, so break and return it
                return env
            except gb.GurobiError as err:
                # If the environment is not obtained, wait some random time and try again if attempt number is still within range
                rand_wait = (
                    C.GUROBI_LICENSE_RETRY_EXPONENTIAL_BASE**(attempt - 1) +
                    np.random.uniform(0, C.GUROBI_LICENSE_RETRY_JITTER))
                logging.info(
                    "Failed to acquire gurobi license on attempt %s; waiting %s",
                    (attempt, rand_wait))
                logging.info("(Gurobi error %s)", str(err))
                time.sleep(rand_wait)

        # Attempt number loop is over, ran out of attempts, raise the latest Gurobi error
        raise RuntimeError(
            "Could not acquire Gurobi license, see logfile for more info")