def extended_condorcet_simple(rankings): # assumes: cands -> 0,N-1 n = rankings.shape[1] cands = np.arange(n) pairs = combs(range(n), 2) condorcet_rows, condorcet_cols = [], [] for cand, other_cand in pairs: cand_pos = np.where(rankings == cand)[1] other_pos = np.where(rankings == other_cand)[1] if np.all(cand_pos < other_pos): condorcet_rows.append(cand) condorcet_cols.append(other_cand) elif np.all(other_pos < cand_pos): condorcet_rows.append(other_cand) condorcet_cols.append(cand) if len(condorcet_rows) > 0: mat = sp.coo_matrix( (np.ones(len(condorcet_rows)), (condorcet_rows, condorcet_cols))) else: mat = None return mat
def combine_variables(X): X_plus = [] for j in range(1, 4): for i in combs(7, j): X_plus.append([hash(tuple(v)) % 800000 for v in X[:, i]]) X = np.array(X_plus).T return X
def yield_extreme_pts(p, m_bar): m = 2 * m_bar + 1 ext_pts = [] for zero_loc in utils.combs(n=p, k=p - m - 1): cp = chordal_prod(N=p) cp.set_polyn(zero_indices=zero_loc) ext_pts.append(cp.get_polyn_vals()) yield from ext_pts
def sample(self): gr = basic_graph(nr_vertices=self.nr_vertices, directed=self.directed) edge_set = set() for c in utils.combs(self.nr_vertices, 2): if np.random.uniform(low=0.0, high=1.0) <= self.edge_probability: edge_set.add(tuple(c)) gr.set_edges(edge_container=edge_set) return gr
def combine_variables(X, k): """ Create combinations (tuples) of variables and return then From size 1 (singles) to k """ X_plus = [] # Create combinations of features, up to size k for j in range(1, k+1): for i in combs(7, j): X_plus.append([hash(tuple(v)) for v in X[:, i]]) X = np.array(X_plus).T return X
def cal_s_in_max_asc_prob(p, m_bar): sparsity_range = range(1, p) ## ATTENTION HERE!! all_probs = {s: {} for s in sparsity_range} if scipy.special.comb(p, 2 * m_bar + 2) >= 5000: raise Exception( 'cal_s_in_max_asc_prob : Probably there are too many extreme points. Comment out this warning to continue.' ) ext_pts = list(yield_extreme_pts(p=p, m_bar=m_bar)) succ_rate_zero = False for s in sparsity_range: print('Working with sparsity {}...'.format(s)) fail = 0.0 success = 0.0 for supp_set in utils.combs(n=p, k=s): fail_flag = False for extp in ext_pts: supp_set_sum = sum([np.linalg.norm(extp[k]) for k in supp_set]) supp_set_comp_sum = sum([ np.linalg.norm(extp[k]) for k in range(p) if k not in supp_set ]) if supp_set_sum >= supp_set_comp_sum: fail_flag = True break if fail_flag: fail += 1.0 else: success += 1.0 success_rate = success / (success + fail) if not succ_rate_zero: all_probs[s]['success_rate'] = success_rate else: break succ_rate_zero = (success_rate == 0.0) return all_probs
def cal_max_asc(p, m_bar, memory_efficient=False): sparsity_range = range(1, p + 1) max_asc = {s: {} for s in sparsity_range} if memory_efficient: ext_pts = yield_extreme_pts(p=p, m_bar=m_bar) else: ext_pts = list(yield_extreme_pts(p=p, m_bar=m_bar)) for s in sparsity_range: fail = [] success = [] for supp_set in utils.combs(n=p, k=s): fail_flag = False for extp in ext_pts: supp_set_sum = sum([np.linalg.norm(extp[k]) for k in supp_set]) supp_set_comp_sum = sum([ np.linalg.norm(extp[k]) for k in range(p) if k not in supp_set ]) if supp_set_sum >= supp_set_comp_sum: fail_flag = True break if fail_flag: fail.append(supp_set) else: success.append(supp_set) if len(success) <= len(fail): max_asc[s]['type'] = True max_asc[s]['idx_sets'] = success else: max_asc[s]['type'] = False max_asc[s]['idx_sets'] = fail return max_asc
def facets(self): yield from (self.join_groups(i) for i in combs(self.nr_groups, self.s))
def solve_ilp(self): """ Solves problem exactly using MIP/ILP approach Used solver: CoinOR CBC Incidence-matrix Q holds complete information needed for opt-process """ if self.verbose: print('Solve: build model') if self.condorcet_red: condorcet_red_mat = extended_condorcet_simple(self.votes_arr) n = self.Q.shape[0] x_n = n * n model = CyClpSimplex() # MODEL x = model.addVariable('x', x_n, isInt=True) # VARS model.objective = self.Q.ravel() # OBJ # x_ab = boolean (already int; need to constrain to [0,1]) model += sp.eye(x_n) * x >= np.zeros(x_n) model += sp.eye(x_n) * x <= np.ones(x_n) idx = lambda i, j: np.ravel_multi_index((i, j), (n, n)) # constraints for every pair start_time = time() n_pairwise_constr = n * (n - 1) // 2 if self.verbose: print(' # pairwise constr: ', n_pairwise_constr) # Somewhat bloated just to get some vectorization / speed ! combs_ = combs(range(n), 2) inds_a = np.ravel_multi_index(combs_.T, (n, n)) inds_b = np.ravel_multi_index(combs_.T[::-1], (n, n)) row_inds = np.tile(np.arange(n_pairwise_constr), 2) col_inds = np.hstack((inds_a, inds_b)) pairwise_constraints = sp.coo_matrix( (np.ones(n_pairwise_constr * 2), (row_inds, col_inds)), shape=(n_pairwise_constr, n * n)) end_time = time() if self.verbose: print(" Took {:.{prec}f} secs".format(end_time - start_time, prec=3)) # and for every cycle of length 3 start_time = time() n_triangle_constrs = n * (n - 1) * (n - 2) if self.verbose: print(' # triangle constr: ', n_triangle_constrs) # Somewhat bloated just to get some vectorization / speed ! perms_ = perms(range(n), 3) inds_a = np.ravel_multi_index(perms_.T[(0, 1), :], (n, n)) inds_b = np.ravel_multi_index(perms_.T[(1, 2), :], (n, n)) inds_c = np.ravel_multi_index(perms_.T[(2, 0), :], (n, n)) row_inds = np.tile(np.arange(n_triangle_constrs), 3) col_inds = np.hstack((inds_a, inds_b, inds_c)) triangle_constraints = sp.coo_matrix( (np.ones(n_triangle_constrs * 3), (row_inds, col_inds)), shape=(n_triangle_constrs, n * n)) end_time = time() if self.verbose: print(" Took {:.{prec}f} secs".format(end_time - start_time, prec=3)) model += pairwise_constraints * x == np.ones(n_pairwise_constr) model += triangle_constraints * x >= np.ones(n_triangle_constrs) if self.condorcet_red and condorcet_red_mat != None: I, J, V = sp.find(condorcet_red_mat) indices_pos = np.ravel_multi_index([J, I], (n, n)) indices_neg = np.ravel_multi_index([I, J], (n, n)) nnz = len(indices_pos) if self.verbose: print( ' Extended Condorcet reductions: {} * 2 relations fixed'. format(nnz)) lhs = sp.coo_matrix( (np.ones(nnz * 2), (np.arange(nnz * 2), np.hstack((indices_pos, indices_neg)))), shape=(nnz * 2, n * n)) rhs = np.hstack( (np.ones(len(indices_pos)), np.zeros(len(indices_neg)))) model += lhs * x == rhs cbcModel = model.getCbcModel() # Clp -> Cbc model / LP -> MIP cbcModel.logLevel = self.verbose if self.verbose: print('Solve: run MIP\n') start_time = time() status = cbcModel.solve() #-> "Call CbcMain. Solve the problem # "using the same parameters used # "by CbcSolver." # This deviates from cylp's docs which are sparse! # -> preprocessing will be used and is very important! end_time = time() if self.verbose: print(" CoinOR CBC used {:.{prec}f} secs".format(end_time - start_time, prec=3)) x_sol = cbcModel.primalVariableSolution['x'] self.obj_sol = cbcModel.objectiveValue x = np.array(x_sol).reshape((n, n)).round().astype(int) self.aggr_rank = np.argsort(x.sum(axis=0))[::-1]
def LSH(data, signatureMatrix, nrOfRows, nrofBands): 'Perform LSH on the given dataset' # Initializations nrofUsers = len(np.unique(data[:, 0])) cumSums = np.cumsum(np.bincount(data[:, 0])) indices = np.hstack((np.zeros(1), cumSums)).astype(int) row = 0 pairsList = {} beginLSH = time.time() for band in range(nrofBands): # Make two dictionaries. 'Buckets' is the full dictionary, whereas # 'nonEmptyBuckets' is a subset containing only the keys that have more # than one values. We are looping on the latter for efficiency. buckets = {} nonEmptyBuckets = {} print(" Exploring Band : ", band, "...") for user in range(nrofUsers): bucketIds = tuple(signatureMatrix[row:row + nrOfRows, user]) if bucketIds in buckets: buckets[bucketIds].append(user) nonEmptyBuckets[bucketIds] = 1 else: buckets[bucketIds] = [] buckets[bucketIds].append(user) row += nrOfRows # Loop on the small dictionary. Make a temp matrix to store the # users columns of the pairs in each bucket and map the new indices. # Again, this avoids looping on the whole Signatures matrix. for bucket in nonEmptyBuckets.keys(): tempMat = signatureMatrix[:, buckets[bucket]] tempInd = range(len(buckets[bucket])) pairs = combs(tempInd) for pair in pairs: # Check whether 'pair' is not found already. If found before # skip it, else procceed to estimate Jaccard Similarity # according to the Theorem. try: test = pairsList[(buckets[bucket][pair[0]], buckets[bucket][pair[1]])] except KeyError: (i, j) = pair # Use the subMatrix of the signature matrix defined above. # Note that we set the threshold to 0.45 so as to get more # pairs, given that time allows for that. if (np.mean(tempMat[:, i] == tempMat[:, j])) >= 0.45: # Get back the initial indices for which we compute # the actual Jaccard similarity. (user1, user2) = (buckets[bucket][i], buckets[bucket][j]) jaccSim = jaccardIndex(data, indices, user1, user2) if jaccSim >= 0.5: # Write the pair if similarity is above the required threshold. pairsList.update({(user1, user2): ""}) writePair(user1, user2) totalTimee = np.round((time.time() - beginLSH) / 60, 2) print(totalTimee, " Minutes elapsed for LSH in total : ") return (pairsList.keys())
def facets(self): yield from combs(self.sig_dim, self.s)