def distribution_cost( distribution: Distribution, computation_graph: ComputationGraph, agentsdef: Iterable[AgentDef], computation_memory: Callable[[ComputationNode], float], communication_load: Callable[[ComputationNode, str], float], ) -> float: route = route_fonc(agentsdef) msg_load = msg_load_func(computation_graph, communication_load) hosting_cost = hosting_cost_func(agentsdef) comm = 0 agt_names = [a.name for a in agentsdef] for l in computation_graph.links: # As we support hypergraph, we may have more than 2 ends to a link for c1, c2 in combinations(l.nodes, 2): a1 = distribution.agent_for(c1) a2 = distribution.agent_for(c2) comm += route(a1, a2) * msg_load(c1, c2) hosting = 0 for computation in computation_graph.nodes: agent = distribution.agent_for(computation.name) hosting += hosting_cost(agent, computation.name) cost = RATIO_HOST_COMM * comm + (1 - RATIO_HOST_COMM) * hosting return cost, comm, hosting
def test_dist_2(self): d = Distribution({'a1': ['v1', 'v2'], 'a2': ['v3']}) self.assertEqual(len(d.computations_hosted('a1')), 2) self.assertEqual(len(d.computations_hosted('a2')), 1) self.assertIn('v1', d.computations_hosted('a1')) self.assertIn('v2', d.computations_hosted('a1')) self.assertEqual(d.agent_for('v1'), 'a1') self.assertEqual(d.agent_for('v2'), 'a1') self.assertEqual(d.agent_for('v3'), 'a2')
def test_host_on_new_agent(self): d = Distribution({'a1': ['v1', 'v2'], 'a2': ['v3']}) d.host_on_agent('a3', ['v4']) self.assertEqual(d.agent_for('v4'), 'a3') self.assertIn('a3', d.agents) self.assertIn('v4', d.computations_hosted('a3'))
def distribution_cost( distribution: Distribution, computation_graph: ComputationGraph, agentsdef: Iterable[AgentDef], computation_memory: Callable[[ComputationNode], float], communication_load: Callable[[ComputationNode, str], float]) -> float: """ Compute the cost for a distribution. In this model, the cost only includes the communication costs based on message size. Parameters ---------- distribution computation_graph agentsdef computation_memory communication_load Returns ------- """ # No hosting and route cost here, as this distribution only takes message size # into account: # route = route_fonc(agentsdef) # hosting_cost = hosting_cost_func(agentsdef) comm = 0 agt_names = [a.name for a in agentsdef] for l in computation_graph.links: # As we support hypergraph, we may have more than 2 ends to a link for c1, c2 in combinations(l.nodes, 2): if distribution.agent_for(c1) != distribution.agent_for(c2): edge_cost = communication_load( computation_graph.computation(c1), c2) logger.debug(f"edge cost between {c1} and {c2} : {edge_cost}") comm += edge_cost else: logger.debug( f"On same agent, no edge cost between {c1} and {c2}") # This distribution model only takes communication cost into account. # cost = RATIO_HOST_COMM * comm + (1-RATIO_HOST_COMM) * hosting return comm, comm, 0
def distribution_cost( distribution: Distribution, computation_graph: ComputationsFactorGraph, agentsdef: Iterable[AgentDef], computation_memory: Callable[[ComputationNode], float], communication_load: Callable[[ComputationNode, str], float], ) -> float: """ Compute the cost of the distribution. Only takes communication costs into account (no hosting nor route costs). Parameters ---------- distribution computation_graph agentsdef computation_memory communication_load Returns ------- """ comm = 0 agt_names = [a.name for a in agentsdef] for l in computation_graph.links: # As we support hypergraph, we may have more than 2 ends to a link for c1, c2 in combinations(l.nodes, 2): if distribution.agent_for(c1) != distribution.agent_for(c2): edge_cost = communication_load( computation_graph.computation(c1), c2) logger.debug(f"edge cost between {c1} and {c2} : {edge_cost}") comm += edge_cost else: logger.debug( f"On same agent, no edge cost between {c1} and {c2}") # This distribution model only takes communication cost into account. # cost = RATIO_HOST_COMM * comm + (1-RATIO_HOST_COMM) * hosting return comm, comm, 0
def fg_secp_ilp( cg: ComputationsFactorGraph, agents: List[AgentDef], already_assigned: Distribution, computation_memory: Callable[[ComputationNode], float], communication_load: Callable[[ComputationNode, str], float], ) -> Distribution: variables = [n for n in cg.nodes if n.type == "VariableComputation"] factors = [n for n in cg.nodes if n.type == "FactorComputation"] agents = list(agents) agents_names = [a.name for a in agents] # Only keep computations for which we actually need to find an agent. vars_to_host = [ v.name for v in variables if not already_assigned.has_computation(v.name) ] facs_to_host = [ f.name for f in factors if not already_assigned.has_computation(f.name) ] # x_i^k : binary variable indicating if var x_i is hosted on agent a_k. xs = _build_xs_binvar(vars_to_host, agents_names) # f_j^k : binary variable indicating if factor f_j is hosted on agent a_k. fs = _build_fs_binvar(facs_to_host, agents_names) # alpha_ijk : binary variable indicating if x_i and f_j are both on a_k. alphas = _build_alphaijk_binvars(cg, agents_names) logger.debug(f"alpha_ijk {alphas}") # LP problem with objective function (total communication cost). pb = LpProblem("distribution", LpMinimize) pb += ( secp_dist_objective_function(cg, communication_load, alphas, agents_names), "Communication costs", ) # Constraints. # All variable computations must be hosted: for i in vars_to_host: pb += ( lpSum([xs[(i, k)] for k in agents_names]) == 1, "var {} is hosted".format(i), ) # All factor computations must be hosted: for j in facs_to_host: pb += ( lpSum([fs[(j, k)] for k in agents_names]) == 1, "factor {} is hosted".format(j), ) # Each agent must host at least one computation: # We only need this constraints for agents that do not already host a # computation: empty_agents = [ a for a in agents_names if not already_assigned.computations_hosted(a) ] for k in empty_agents: pb += ( lpSum([xs[(i, k)] for i in vars_to_host]) + lpSum([fs[(j, k)] for j in facs_to_host]) >= 1, "atleastone {}".format(k), ) # Memory capacity constraint for agents for a in agents: # Decrease capacity for already hosted computations capacity = a.capacity - sum([ secp_computation_memory_in_cg(c, cg, computation_memory) for c in already_assigned.computations_hosted(a.name) ]) pb += ( lpSum([ secp_computation_memory_in_cg(i, cg, computation_memory) * xs[ (i, a.name)] for i in vars_to_host ]) + lpSum([ secp_computation_memory_in_cg(j, cg, computation_memory) * fs[ (j, a.name)] for j in facs_to_host ]) <= capacity, "memory {}".format(a.name), ) # Linearization constraints for alpha_ijk. for link in cg.links: i, j = link.variable_node, link.factor_node for k in agents_names: if i in vars_to_host and j in facs_to_host: pb += alphas[((i, j), k)] <= xs[(i, k)], "lin1 {}{}{}".format( i, j, k) pb += alphas[((i, j), k)] <= fs[(j, k)], "lin2 {}{}{}".format( i, j, k) pb += ( alphas[((i, j), k)] >= xs[(i, k)] + fs[(j, k)] - 1, "lin3 {}{}{}".format(i, j, k), ) elif i in vars_to_host and j not in facs_to_host: # Var is free, factor is already hosted if already_assigned.agent_for(j) == k: pb += alphas[((i, j), k)] == xs[(i, k)] else: pb += alphas[((i, j), k)] == 0 elif i not in vars_to_host and j in facs_to_host: # if i is not in vars_vars_to_host, it means that it's a # computation that is already hosted (from hints) if already_assigned.agent_for(i) == k: pb += alphas[((i, j), k)] == fs[(j, k)] else: pb += alphas[((i, j), k)] == 0 else: # i and j are both alredy hosted if (already_assigned.agent_for(i) == k and already_assigned.agent_for(j) == k): pb += alphas[((i, j), k)] == 1 else: pb += alphas[((i, j), k)] == 0 # Now solve our LP # status = pb.solve(GLPK_CMD()) # status = pb.solve(GLPK_CMD(mip=1)) # status = pb.solve(GLPK_CMD(mip=0, keepFiles=1, # options=['--simplex', '--interior'])) status = pb.solve(GLPK_CMD(keepFiles=0, msg=False, options=["--pcost"])) if status != LpStatusOptimal: raise ImpossibleDistributionException("No possible optimal" " distribution ") else: logger.debug("GLPK cost : %s", pulp.value(pb.objective)) comp_dist = already_assigned for k in agents_names: agt_vars = [ i for i, ka in xs if ka == k and pulp.value(xs[(i, ka)]) == 1 ] comp_dist.host_on_agent(k, agt_vars) agt_rels = [ j for j, ka in fs if ka == k and pulp.value(fs[(j, ka)]) == 1 ] comp_dist.host_on_agent(k, agt_rels) return comp_dist
def cg_secp_ilp( cg: ComputationConstraintsHyperGraph, agents: List[AgentDef], already_assigned: Distribution, computation_memory: Callable[[ComputationNode], float], communication_load: Callable[[ComputationNode, str], float], timeout=600, # Max 10 min ) -> Distribution: start_t = time.time() agents = list(agents) agents_names = [a.name for a in agents] # Only keep computations for which we actually need to find an agent. comps_to_host = [ c for c in cg.node_names() if not already_assigned.has_computation(c) ] # x_i^k : binary variable indicating if var x_i is hosted on agent a_k. xs = _build_cs_binvar(comps_to_host, agents_names) # alpha_ijk : binary variable indicating if x_i and f_j are both on a_k. alphas = _build_alphaijk_binvars(cg, agents_names) logger.debug(f"alpha_ijk {alphas}") # LP problem with objective function (total communication cost). pb = LpProblem("distribution", LpMinimize) pb += ( _objective_function(cg, communication_load, alphas, agents_names), "Communication costs", ) # Constraints. # All variable computations must be hosted: for i in comps_to_host: pb += ( lpSum([xs[(i, k)] for k in agents_names]) == 1, "var {} is hosted".format(i), ) # Each agent must host at least one computation: # We only need this constraints for agents that do not already host a # computation: empty_agents = [ a for a in agents_names if not already_assigned.computations_hosted(a) ] for k in empty_agents: pb += ( lpSum([xs[(i, k)] for i in comps_to_host]) >= 1, "atleastone {}".format(k), ) # Memory capacity constraint for agents for a in agents: # Decrease capacity for already hosted computations capacity = a.capacity - sum([ secp_computation_memory_in_cg(c, cg, computation_memory) for c in already_assigned.computations_hosted(a.name) ]) pb += ( lpSum([ secp_computation_memory_in_cg(i, cg, computation_memory) * xs[(i, a.name)] for i in comps_to_host ]) <= capacity, "memory {}".format(a.name), ) # Linearization constraints for alpha_ijk. for (i, j), k in alphas: if i in comps_to_host and j in comps_to_host: pb += alphas[((i, j), k)] <= xs[(i, k)], "lin1 {}{}{}".format( i, j, k) pb += alphas[((i, j), k)] <= xs[(j, k)], "lin2 {}{}{}".format( i, j, k) pb += ( alphas[((i, j), k)] >= xs[(i, k)] + xs[(j, k)] - 1, "lin3 {}{}{}".format(i, j, k), ) elif i in comps_to_host and j not in comps_to_host: # Var is free, factor is already hosted if already_assigned.agent_for(j) == k: pb += alphas[((i, j), k)] == xs[(i, k)] else: pb += alphas[((i, j), k)] == 0 elif i not in comps_to_host and j in comps_to_host: # if i is not in vars_vars_to_host, it means that it's a # computation that is already hosted (from hints) if already_assigned.agent_for(i) == k: pb += alphas[((i, j), k)] == xs[(j, k)] else: pb += alphas[((i, j), k)] == 0 else: # i and j are both alredy hosted if (already_assigned.agent_for(i) == k and already_assigned.agent_for(j) == k): pb += alphas[((i, j), k)] == 1 else: pb += alphas[((i, j), k)] == 0 # the timeout for the solver must be monierd by the time spent to build the pb: remaining_time = round(timeout - (time.time() - start_t)) - 2 # Now solve our LP status = pb.solve( GLPK_CMD(keepFiles=0, msg=False, options=["--pcost", "--tmlim", str(remaining_time)])) if status != LpStatusOptimal: raise ImpossibleDistributionException("No possible optimal" " distribution ") else: logger.debug("GLPK cost : %s", pulp.value(pb.objective)) comp_dist = already_assigned for k in agents_names: agt_vars = [ i for i, ka in xs if ka == k and pulp.value(xs[(i, ka)]) == 1 ] comp_dist.host_on_agent(k, agt_vars) return comp_dist
def factor_graph_lp_model(cg: ComputationsFactorGraph, agents: List[AgentDef], must_host: Dict[str, List], computation_memory=None, communication_load=None): """ To distribute we need: * com : the communication cost of an edge between a var and a fact * mem_var : the memory footprint of a variable computation * mem_fac : the memory footprint of a factor computation These function depends on the algorithm. Here * mem_var and mem_fac are given by the computation_memory method. * com is given by computation_memory :return: """ variables = [n for n in cg.nodes if n.type == 'VariableComputation'] factors = [n for n in cg.nodes if n.type == 'FactorComputation'] agents = list(agents) agents_names = [a.name for a in agents] fixed_dist = Distribution(must_host) # Only keep computations for which we actually need to find an agent. vars_to_host = [ v.name for v in variables if not fixed_dist.has_computation(v.name) ] facs_to_host = [ f.name for f in factors if not fixed_dist.has_computation(f.name) ] # x_i^k : binary variable indicating if var x_i is hosted on agent a_k. xs = _build_xs_binvar(vars_to_host, agents_names) # f_j^k : binary variable indicating if factor f_j is hosted on agent a_k. fs = _build_fs_binvar(facs_to_host, agents_names) # alpha_ijk : binary variable indicating if x_i and f_j are both on a_k. alphas = _build_alphaijk_binvars(cg, agents_names) # LP problem with objective function (total communication cost). pb = LpProblem('distribution', LpMinimize) pb += _objective_function(cg, communication_load, alphas, agents_names), 'Communication costs' # Constraints. # All variable computations must be hosted: for i in vars_to_host: pb += lpSum([xs[(i, k)] for k in agents_names]) == 1, \ 'var {} is hosted'.format(i) # All factor computations must be hosted: for j in facs_to_host: pb += lpSum([fs[(j, k)] for k in agents_names]) == 1, \ 'factor {} is hosted'.format(j) # Each agent must host at least one computation: # We only need this constraints for agents that do not already host a # computation: empty_agents = [a for a in agents_names if not must_host[a]] for k in empty_agents: pb += lpSum([xs[(i, k)] for i in vars_to_host]) + \ lpSum([fs[(j, k)] for j in facs_to_host]) >= 1, \ 'atleastone {}'.format(k) # Memory capacity constraint for agents for a in agents: # Decrease capacity for already hosted computations capacity = a.capacity - \ sum([_computation_memory_in_cg(c, cg, computation_memory) for c in must_host[a.name]]) pb += lpSum([_computation_memory_in_cg(i, cg, computation_memory) * xs[(i, a.name)] for i in vars_to_host]) \ + lpSum([_computation_memory_in_cg(j, cg, computation_memory) * fs[(j, a.name)] for j in facs_to_host]) <= capacity, \ 'memory {}'.format(a.name) # Linearization constraints for alpha_ijk. for link in cg.links: i, j = link.variable_node, link.factor_node for k in agents_names: if i in vars_to_host and j in facs_to_host: pb += alphas[((i, j), k)] <= xs[(i, k)], \ 'lin1 {}{}{}'.format(i, j, k) pb += alphas[((i, j), k)] <= fs[(j, k)], \ 'lin2 {}{}{}'.format(i, j, k) pb += alphas[((i, j), k)] >= xs[(i, k)] + fs[(j, k)] - 1, \ 'lin3 {}{}{}'.format(i, j, k) elif i in vars_to_host and j not in facs_to_host: # Var is free, factor is already hosted if fixed_dist.agent_for(j) == k: pb += alphas[((i, j), k)] == xs[(i, k)] else: pb += alphas[((i, j), k)] == 0 elif i not in vars_to_host and j in facs_to_host: # if i is not in vars_vars_to_host, it means that it's a # computation that is already hosted (from hints) if fixed_dist.agent_for(i) == k: pb += alphas[((i, j), k)] == fs[(j, k)] else: pb += alphas[((i, j), k)] == 0 else: # i and j are both alredy hosted if fixed_dist.agent_for(i) == k and fixed_dist.agent_for(j) \ == k: pb += alphas[((i, j), k)] == 1 else: pb += alphas[((i, j), k)] == 0 # Now solve our LP # status = pb.solve(GLPK_CMD()) # status = pb.solve(GLPK_CMD(mip=1)) # status = pb.solve(GLPK_CMD(mip=0, keepFiles=1, # options=['--simplex', '--interior'])) status = pb.solve(GLPK_CMD(keepFiles=0, msg=False, options=['--pcost'])) if status != LpStatusOptimal: raise ImpossibleDistributionException("No possible optimal" " distribution ") else: logger.debug('GLPK cost : %s', value(pb.objective)) comp_dist = fixed_dist for k in agents_names: agt_vars = [ i for i, ka in xs if ka == k and value(xs[(i, ka)]) == 1 ] comp_dist.host_on_agent(k, agt_vars) agt_rels = [ j for j, ka in fs if ka == k and value(fs[(j, ka)]) == 1 ] comp_dist.host_on_agent(k, agt_rels) return comp_dist