def _tree(adj, attr, n_regions, solver, metric): """ Parameters ---------- adj : class:`scipy.sparse.csr_matrix` Refer to the corresponding argument in :func:`_flow`. attr : :class:`numpy.ndarray` Refer to the corresponding argument in :func:`_flow`. n_regions : int Refer to the corresponding argument in :func:`_flow`. solver : str Refer to the corresponding argument in :func:`_flow`. metric : function Refer to the corresponding argument in :func:`_flow`. Returns ------- result : :class:`numpy.ndarray` Refer to the return value in :func:`_flow`. """ print("running TREE algorithm") # TODO: rm prob = LpProblem("Tree", LpMinimize) # Parameters of the optimization problem n_areas = attr.shape[0] I = list(range(n_areas)) # index for areas II = [(i, j) for i in I for j in I] II_upper_triangle = [(i, j) for i, j in II if i < j] d = { (i, j): metric( attr[i].reshape(attr.shape[1], 1), # reshaping to... attr[j].reshape(attr.shape[1], 1)) # ...avoid warnings for i, j in II } # Decision variables t = LpVariable.dicts("t", ((i, j) for i, j in II), lowBound=0, upBound=1, cat=LpInteger) x = LpVariable.dicts("x", ((i, j) for i, j in II), lowBound=0, upBound=1, cat=LpInteger) u = LpVariable.dicts("u", (i for i in I), lowBound=0, cat=LpInteger) # Objective function # (3) in Duque et al. (2011): "The p-Regions Problem" prob += lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle) # Constraints # (4) in Duque et al. (2011): "The p-Regions Problem" lhs = lpSum(x[i, j] for i in I for j in neighbors(adj, i)) prob += lhs == n_areas - n_regions # (5) in Duque et al. (2011): "The p-Regions Problem" for i in I: prob += lpSum(x[i, j] for j in neighbors(adj, i)) <= 1 # (6) in Duque et al. (2011): "The p-Regions Problem" for i in I: for j in I: for m in I: if i != j and i != m and j != m: prob += t[i, j] + t[i, m] - t[j, m] <= 1 # (7) in Duque et al. (2011): "The p-Regions Problem" for i, j in II: prob += t[i, j] - t[j, i] == 0 # (8) in Duque et al. (2011): "The p-Regions Problem" for i in I: for j in neighbors(adj, i): prob += x[i, j] <= t[i, j] # (9) in Duque et al. (2011): "The p-Regions Problem" for i in I: for j in neighbors(adj, i): prob += u[i] - u[j] + (n_areas - n_regions) * x[i, j] \ + (n_areas - n_regions - 2) * x[j, i] \ <= n_areas - n_regions - 1 # (10) in Duque et al. (2011): "The p-Regions Problem" for i in I: prob += u[i] <= n_areas - n_regions prob += u[i] >= 1 # (11) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # (12) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # Solve the optimization problem solver = get_solver_instance(solver) prob.solve(solver) # build a list of regions like [[0, 1, 2, 5], [3, 4, 6, 7, 8]] idx_copy = set(I) regions = [[] for _ in range(n_regions)] for i in range(n_regions): area = idx_copy.pop() regions[i].append(area) for other_area in idx_copy: if t[area, other_area].varValue == 1: regions[i].append(other_area) idx_copy.difference_update(regions[i]) result = array_from_region_list(regions) return result
def _flow(adj, attr, n_regions, solver, metric): """ Parameters ---------- adj : class:`scipy.sparse.csr_matrix` See the corresponding argument in :meth:`PRegionsExact.fit_from_scipy_sparse_matrix`. attr : :class:`numpy.ndarray` See the corresponding argument in :meth:`PRegionsExact.fit_from_scipy_sparse_matrix`. n_regions : int See the corresponding argument in :meth:`PRegionsExact.fit_from_scipy_sparse_matrix`. solver : str See the corresponding argument in :meth:`PRegionsExact.fit_from_scipy_sparse_matrix`. metric : function A function fulfilling the 4 conditions described in the docsting of :func:`region.util.get_metric_function`. Returns ------- result : :class:`numpy.ndarray` A one-dimensional array containing each area's region label. """ print("running FLOW algorithm") # TODO: rm prob = LpProblem("Flow", LpMinimize) # Parameters of the optimization problem n_areas = adj.shape[0] I = list(range(n_areas)) # index for areas II = [(i, j) for i in I for j in I] II_upper_triangle = [(i, j) for i, j in II if i < j] K = range(n_regions) # index for regions d = { (i, j): metric( attr[i].reshape(attr.shape[1], 1), # reshaping to... attr[j].reshape(attr.shape[1], 1)) # ...avoid warnings for i, j in II_upper_triangle } # Decision variables t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle), lowBound=0, upBound=1, cat=LpInteger) f = LpVariable.dicts( # The amount of flow (non-negative integer) "f", # from area i to j in region k. ((i, j, k) for i in I for j in neighbors(adj, i) for k in K), lowBound=0, cat=LpInteger) y = LpVariable.dicts( # 1 if area i is assigned to region k. 0 otherwise. "y", ((i, k) for i in I for k in K), lowBound=0, upBound=1, cat=LpInteger) w = LpVariable.dicts( # 1 if area i is chosen as a sink. 0 otherwise. "w", ((i, k) for i in I for k in K), lowBound=0, upBound=1, cat=LpInteger) # Objective function # (20) in Duque et al. (2011): "The p-Regions Problem" prob += lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle) # Constraints # (21) in Duque et al. (2011): "The p-Regions Problem" for i in I: prob += sum(y[i, k] for k in K) == 1 # (22) in Duque et al. (2011): "The p-Regions Problem" for i in I: for k in K: prob += w[i, k] <= y[i, k] # (23) in Duque et al. (2011): "The p-Regions Problem" for k in K: prob += sum(w[i, k] for i in I) == 1 # (24) in Duque et al. (2011): "The p-Regions Problem" for i in I: for j in neighbors(adj, i): for k in K: prob += f[i, j, k] <= y[i, k] * (n_areas - n_regions) # (25) in Duque et al. (2011): "The p-Regions Problem" for i in I: for j in neighbors(adj, i): for k in K: prob += f[i, j, k] <= y[j, k] * (n_areas - n_regions) # (26) in Duque et al. (2011): "The p-Regions Problem" for i in I: for k in K: lhs = sum(f[i, j, k] - f[j, i, k] for j in neighbors(adj, i)) prob += lhs >= y[i, k] - (n_areas - n_regions) * w[i, k] # (27) in Duque et al. (2011): "The p-Regions Problem" for i, j in II_upper_triangle: for k in K: prob += t[i, j] >= y[i, k] + y[j, k] - 1 # (28) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # (29) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # (30) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # (31) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # Solve the optimization problem solver = get_solver_instance(solver) prob.solve(solver) result = np.zeros(n_areas) for i in I: for k in K: if y[i, k].varValue == 1: result[i] = k return result
def _order(adj, attr, n_regions, solver, metric): """ Parameters ---------- adj : class:`scipy.sparse.csr_matrix` Refer to the corresponding argument in :func:`_flow`. attr : :class:`numpy.ndarray` Refer to the corresponding argument in :func:`_flow`. n_regions : int Refer to the corresponding argument in :func:`_flow`. solver : str Refer to the corresponding argument in :func:`_flow`. metric : function Refer to the corresponding argument in :func:`_flow`. Returns ------- result : :class:`numpy.ndarray` Refer to the return value in :func:`_flow`. """ print("running ORDER algorithm") # TODO: rm prob = LpProblem("Order", LpMinimize) # Parameters of the optimization problem n_areas = attr.shape[0] I = list(range(n_areas)) # index for areas II = [(i, j) for i in I for j in I] II_upper_triangle = [(i, j) for i, j in II if i < j] K = range(n_regions) # index for regions O = range(n_areas - n_regions) # index for orders d = { (i, j): metric( attr[i].reshape(attr.shape[1], 1), # reshaping to... attr[j].reshape(attr.shape[1], 1)) # ...avoid warnings for i, j in II_upper_triangle } # Decision variables t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle), lowBound=0, upBound=1, cat=LpInteger) x = LpVariable.dicts("x", ((i, k, o) for i in I for k in K for o in O), lowBound=0, upBound=1, cat=LpInteger) # Objective function # (13) in Duque et al. (2011): "The p-Regions Problem" prob += lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle) # Constraints # (14) in Duque et al. (2011): "The p-Regions Problem" for k in K: prob += sum(x[i, k, 0] for i in I) == 1 # (15) in Duque et al. (2011): "The p-Regions Problem" for i in I: prob += sum(x[i, k, o] for k in K for o in O) == 1 # (16) in Duque et al. (2011): "The p-Regions Problem" for i in I: for k in K: for o in range(1, len(O)): prob += x[i, k, o] <= \ sum(x[j, k, o-1] for j in neighbors(adj, i)) # (17) in Duque et al. (2011): "The p-Regions Problem" for i, j in II_upper_triangle: for k in K: summ = sum(x[i, k, o] + x[j, k, o] for o in O) - 1 prob += t[i, j] >= summ # (18) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # (19) in Duque et al. (2011): "The p-Regions Problem" # already in LpVariable-definition # Solve the optimization problem solver = get_solver_instance(solver) prob.solve(solver) result = np.zeros(n_areas) for i in I: for k in K: for o in O: if x[i, k, o].varValue == 1: result[i] = k return result
def fit_from_scipy_sparse_matrix(self, adj, attr, spatially_extensive_attr, threshold, solver="cbc", metric="euclidean"): """ Solve the max-p-regions problem as MIP as described in [DAR2012]_. The resulting region labels are assigned to the instance's :attr:`labels_` attribute. Parameters ---------- adj : class:`scipy.sparse.csr_matrix` Adjacency matrix representing the areas' contiguity relation. attr : :class:`numpy.ndarray` Array (number of areas x number of attributes) of areas' attributes relevant to clustering. spatially_extensive_attr : :class:`numpy.ndarray` Array (number of areas x number of attributes) of areas' attributes relevant to ensuring the threshold condition. threshold : numbers.Real or :class:`numpy.ndarray` The lower bound for a region's sum of spatially extensive attributes. The argument's type is numbers.Real if there is only one spatially extensive attribute per area, otherwise it is a one-dimensional array with as many entries as there are spatially extensive attributes per area. solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc" The solver to use. Unless the default solver is used, the user has to make sure that the specified solver is installed. * "cbc" - the Cbc (Coin-or branch and cut) solver * "cplex" - the CPLEX solver * "glpk" - the GLPK (GNU Linear Programming Kit) solver * "gurobi" - the Gurobi Optimizer metric : str or function, default: "euclidean" See the `metric` argument in :func:`region.util.get_metric_function`. """ self.metric = get_metric_function(metric) check_solver(solver) prob = LpProblem("Max-p-Regions", LpMinimize) # Parameters of the optimization problem n_areas = adj.shape[0] I = list(range(n_areas)) # index for areas II = [(i, j) for i in I for j in I] II_upper_triangle = [(i, j) for i, j in II if i < j] # index of potential regions, called k in [DAR2012]_: K = range(n_areas) # index of contiguity order, called c in [DAR2012]_: O = range(n_areas) d = {(i, j): self.metric(attr[i].reshape(1, -1), attr[j].reshape(1, -1)) for i, j in II_upper_triangle} h = 1 + floor(log10(sum(d[(i, j)] for i, j in II_upper_triangle))) # Decision variables t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle), lowBound=0, upBound=1, cat=LpInteger) x = LpVariable.dicts("x", ((i, k, o) for i in I for k in K for o in O), lowBound=0, upBound=1, cat=LpInteger) # Objective function # (1) in Duque et al. (2012): "The Max-p-Regions Problem" prob += -10**h * lpSum(x[i, k, 0] for k in K for i in I) \ + lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle) # Constraints # (2) in Duque et al. (2012): "The Max-p-Regions Problem" for k in K: prob += lpSum(x[i, k, 0] for i in I) <= 1 # (3) in Duque et al. (2012): "The Max-p-Regions Problem" for i in I: prob += lpSum(x[i, k, o] for k in K for o in O) == 1 # (4) in Duque et al. (2012): "The Max-p-Regions Problem" for i in I: for k in K: for o in range(1, len(O)): prob += x[i, k, o] <= lpSum(x[j, k, o - 1] for j in neighbors(adj, i)) # (5) in Duque et al. (2012): "The Max-p-Regions Problem" if isinstance(spatially_extensive_attr[I[0]], numbers.Real): for k in K: lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i] for i in I for o in O) prob += lhs >= threshold * lpSum(x[i, k, 0] for i in I) elif isinstance(spatially_extensive_attr[I[0]], collections.Iterable): for el in range(len(spatially_extensive_attr[I[0]])): for k in K: lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i][el] for i in I for o in O) if isinstance(threshold, numbers.Real): rhs = threshold * lpSum(x[i, k, 0] for i in I) prob += lhs >= rhs elif isinstance(threshold, np.ndarray): rhs = threshold[el] * lpSum(x[i, k, 0] for i in I) prob += lhs >= rhs # (6) in Duque et al. (2012): "The Max-p-Regions Problem" for i, j in II_upper_triangle: for k in K: prob += t[i, j] >= \ lpSum(x[i, k, o] + x[j, k, o] for o in O) - 1 # (7) in Duque et al. (2012): "The Max-p-Regions Problem" # already in LpVariable-definition # (8) in Duque et al. (2012): "The Max-p-Regions Problem" # already in LpVariable-definition # additional constraint for speedup (p. 405 in [DAR2012]_) for o in O: prob += x[I[0], K[0], o] == (1 if o == 0 else 0) # Solve the optimization problem solver = get_solver_instance(solver) print("start solving with", solver) prob.solve(solver) print("solved") result = np.zeros(n_areas) for i in I: for k in K: for o in O: if x[i, k, o].varValue == 1: result[i] = k self.labels_ = result self.solver = solver