예제 #1
0
def extended_condorcet_simple(rankings):
    # assumes: cands -> 0,N-1
    n = rankings.shape[1]
    cands = np.arange(n)
    pairs = combs(range(n), 2)

    condorcet_rows, condorcet_cols = [], []

    for cand, other_cand in pairs:
        cand_pos = np.where(rankings == cand)[1]
        other_pos = np.where(rankings == other_cand)[1]

        if np.all(cand_pos < other_pos):
            condorcet_rows.append(cand)
            condorcet_cols.append(other_cand)
        elif np.all(other_pos < cand_pos):
            condorcet_rows.append(other_cand)
            condorcet_cols.append(cand)

    if len(condorcet_rows) > 0:
        mat = sp.coo_matrix(
            (np.ones(len(condorcet_rows)), (condorcet_rows, condorcet_cols)))
    else:
        mat = None
    return mat
def combine_variables(X):
    X_plus = []
    for j in range(1, 4):
        for i in combs(7, j):
            X_plus.append([hash(tuple(v)) % 800000 for v in X[:, i]])

    X = np.array(X_plus).T

    return X
예제 #3
0
def yield_extreme_pts(p, m_bar):
    m = 2 * m_bar + 1
    ext_pts = []
    for zero_loc in utils.combs(n=p, k=p - m - 1):
        cp = chordal_prod(N=p)
        cp.set_polyn(zero_indices=zero_loc)
        ext_pts.append(cp.get_polyn_vals())

    yield from ext_pts
예제 #4
0
    def sample(self):
        gr = basic_graph(nr_vertices=self.nr_vertices, directed=self.directed)
        edge_set = set()
        for c in utils.combs(self.nr_vertices, 2):
            if np.random.uniform(low=0.0, high=1.0) <= self.edge_probability:
                edge_set.add(tuple(c))

        gr.set_edges(edge_container=edge_set)

        return gr
def combine_variables(X, k):
    """
    Create combinations (tuples) of variables and return then
    From size 1 (singles) to k
    """
    X_plus = []

    # Create combinations of features, up to size k
    for j in range(1, k+1):
        for i in combs(7, j):
            X_plus.append([hash(tuple(v)) for v in X[:, i]])



    X = np.array(X_plus).T
    return X
예제 #6
0
def cal_s_in_max_asc_prob(p, m_bar):
    sparsity_range = range(1, p)  ## ATTENTION HERE!!
    all_probs = {s: {} for s in sparsity_range}

    if scipy.special.comb(p, 2 * m_bar + 2) >= 5000:
        raise Exception(
            'cal_s_in_max_asc_prob : Probably there are too many extreme points. Comment out this warning to continue.'
        )

    ext_pts = list(yield_extreme_pts(p=p, m_bar=m_bar))

    succ_rate_zero = False

    for s in sparsity_range:
        print('Working with sparsity {}...'.format(s))
        fail = 0.0
        success = 0.0
        for supp_set in utils.combs(n=p, k=s):
            fail_flag = False
            for extp in ext_pts:
                supp_set_sum = sum([np.linalg.norm(extp[k]) for k in supp_set])
                supp_set_comp_sum = sum([
                    np.linalg.norm(extp[k]) for k in range(p)
                    if k not in supp_set
                ])
                if supp_set_sum >= supp_set_comp_sum:
                    fail_flag = True
                    break
            if fail_flag:
                fail += 1.0
            else:
                success += 1.0

        success_rate = success / (success + fail)
        if not succ_rate_zero:
            all_probs[s]['success_rate'] = success_rate
        else:
            break
        succ_rate_zero = (success_rate == 0.0)

    return all_probs
예제 #7
0
def cal_max_asc(p, m_bar, memory_efficient=False):
    sparsity_range = range(1, p + 1)
    max_asc = {s: {} for s in sparsity_range}

    if memory_efficient:
        ext_pts = yield_extreme_pts(p=p, m_bar=m_bar)
    else:
        ext_pts = list(yield_extreme_pts(p=p, m_bar=m_bar))

    for s in sparsity_range:
        fail = []
        success = []
        for supp_set in utils.combs(n=p, k=s):
            fail_flag = False
            for extp in ext_pts:
                supp_set_sum = sum([np.linalg.norm(extp[k]) for k in supp_set])
                supp_set_comp_sum = sum([
                    np.linalg.norm(extp[k]) for k in range(p)
                    if k not in supp_set
                ])
                if supp_set_sum >= supp_set_comp_sum:
                    fail_flag = True
                    break
            if fail_flag:
                fail.append(supp_set)
            else:
                success.append(supp_set)

        if len(success) <= len(fail):
            max_asc[s]['type'] = True
            max_asc[s]['idx_sets'] = success
        else:
            max_asc[s]['type'] = False
            max_asc[s]['idx_sets'] = fail

    return max_asc
 def facets(self):
     yield from (self.join_groups(i) for i in combs(self.nr_groups, self.s))
예제 #9
0
    def solve_ilp(self):
        """ Solves problem exactly using MIP/ILP approach
            Used solver: CoinOR CBC
            Incidence-matrix Q holds complete information needed for opt-process
        """
        if self.verbose:
            print('Solve: build model')

        if self.condorcet_red:
            condorcet_red_mat = extended_condorcet_simple(self.votes_arr)

        n = self.Q.shape[0]
        x_n = n * n

        model = CyClpSimplex()  # MODEL
        x = model.addVariable('x', x_n, isInt=True)  # VARS

        model.objective = self.Q.ravel()  # OBJ

        # x_ab = boolean (already int; need to constrain to [0,1])
        model += sp.eye(x_n) * x >= np.zeros(x_n)
        model += sp.eye(x_n) * x <= np.ones(x_n)

        idx = lambda i, j: np.ravel_multi_index((i, j), (n, n))

        # constraints for every pair
        start_time = time()
        n_pairwise_constr = n * (n - 1) // 2
        if self.verbose:
            print('  # pairwise constr: ', n_pairwise_constr)

        # Somewhat bloated just to get some vectorization / speed !
        combs_ = combs(range(n), 2)

        inds_a = np.ravel_multi_index(combs_.T, (n, n))
        inds_b = np.ravel_multi_index(combs_.T[::-1], (n, n))

        row_inds = np.tile(np.arange(n_pairwise_constr), 2)
        col_inds = np.hstack((inds_a, inds_b))

        pairwise_constraints = sp.coo_matrix(
            (np.ones(n_pairwise_constr * 2), (row_inds, col_inds)),
            shape=(n_pairwise_constr, n * n))
        end_time = time()
        if self.verbose:
            print("    Took {:.{prec}f} secs".format(end_time - start_time,
                                                     prec=3))

        # and for every cycle of length 3
        start_time = time()
        n_triangle_constrs = n * (n - 1) * (n - 2)
        if self.verbose:
            print('  # triangle constr: ', n_triangle_constrs)

        # Somewhat bloated just to get some vectorization / speed !
        perms_ = perms(range(n), 3)

        inds_a = np.ravel_multi_index(perms_.T[(0, 1), :], (n, n))
        inds_b = np.ravel_multi_index(perms_.T[(1, 2), :], (n, n))
        inds_c = np.ravel_multi_index(perms_.T[(2, 0), :], (n, n))

        row_inds = np.tile(np.arange(n_triangle_constrs), 3)
        col_inds = np.hstack((inds_a, inds_b, inds_c))

        triangle_constraints = sp.coo_matrix(
            (np.ones(n_triangle_constrs * 3), (row_inds, col_inds)),
            shape=(n_triangle_constrs, n * n))
        end_time = time()
        if self.verbose:
            print("    Took {:.{prec}f} secs".format(end_time - start_time,
                                                     prec=3))

        model += pairwise_constraints * x == np.ones(n_pairwise_constr)
        model += triangle_constraints * x >= np.ones(n_triangle_constrs)

        if self.condorcet_red and condorcet_red_mat != None:
            I, J, V = sp.find(condorcet_red_mat)
            indices_pos = np.ravel_multi_index([J, I], (n, n))
            indices_neg = np.ravel_multi_index([I, J], (n, n))
            nnz = len(indices_pos)

            if self.verbose:
                print(
                    '  Extended Condorcet reductions: {} * 2 relations fixed'.
                    format(nnz))

            lhs = sp.coo_matrix(
                (np.ones(nnz * 2),
                 (np.arange(nnz * 2), np.hstack((indices_pos, indices_neg)))),
                shape=(nnz * 2, n * n))
            rhs = np.hstack(
                (np.ones(len(indices_pos)), np.zeros(len(indices_neg))))
            model += lhs * x == rhs

        cbcModel = model.getCbcModel()  # Clp -> Cbc model / LP -> MIP
        cbcModel.logLevel = self.verbose

        if self.verbose:
            print('Solve: run MIP\n')
        start_time = time()
        status = cbcModel.solve()  #-> "Call CbcMain. Solve the problem
        #   "using the same parameters used
        #   "by CbcSolver."
        # This deviates from cylp's docs which are sparse!
        # -> preprocessing will be used and is very important!
        end_time = time()
        if self.verbose:
            print("  CoinOR CBC used {:.{prec}f} secs".format(end_time -
                                                              start_time,
                                                              prec=3))

        x_sol = cbcModel.primalVariableSolution['x']
        self.obj_sol = cbcModel.objectiveValue
        x = np.array(x_sol).reshape((n, n)).round().astype(int)
        self.aggr_rank = np.argsort(x.sum(axis=0))[::-1]
예제 #10
0
def LSH(data, signatureMatrix, nrOfRows, nrofBands):
    'Perform LSH on the given dataset'

    # Initializations

    nrofUsers = len(np.unique(data[:, 0]))
    cumSums = np.cumsum(np.bincount(data[:, 0]))
    indices = np.hstack((np.zeros(1), cumSums)).astype(int)
    row = 0
    pairsList = {}

    beginLSH = time.time()
    for band in range(nrofBands):

        # Make two dictionaries. 'Buckets' is the full dictionary, whereas
        # 'nonEmptyBuckets' is a subset containing only the keys that have more
        # than one values. We are looping on the latter for efficiency.

        buckets = {}
        nonEmptyBuckets = {}

        print(" Exploring Band : ", band, "...")
        for user in range(nrofUsers):
            bucketIds = tuple(signatureMatrix[row:row + nrOfRows, user])
            if bucketIds in buckets:
                buckets[bucketIds].append(user)
                nonEmptyBuckets[bucketIds] = 1
            else:
                buckets[bucketIds] = []
                buckets[bucketIds].append(user)
        row += nrOfRows

        # Loop on the small dictionary. Make a temp matrix to store the
        # users columns of the pairs in each bucket and map the new indices.
        #  Again, this avoids looping on the whole Signatures matrix.

        for bucket in nonEmptyBuckets.keys():
            tempMat = signatureMatrix[:, buckets[bucket]]
            tempInd = range(len(buckets[bucket]))
            pairs = combs(tempInd)

            for pair in pairs:

                # Check whether 'pair' is not found already. If found before
                # skip it, else procceed to estimate Jaccard Similarity
                # according to the Theorem.
                try:
                    test = pairsList[(buckets[bucket][pair[0]],
                                      buckets[bucket][pair[1]])]
                except KeyError:
                    (i, j) = pair

                    # Use the subMatrix of the signature matrix defined above.
                    # Note that we set the threshold to 0.45 so as to get more
                    # pairs, given that time allows for that.

                    if (np.mean(tempMat[:, i] == tempMat[:, j])) >= 0.45:

                        # Get back the initial indices for which we compute
                        # the actual Jaccard similarity.

                        (user1, user2) = (buckets[bucket][i],
                                          buckets[bucket][j])
                        jaccSim = jaccardIndex(data, indices, user1, user2)

                        if jaccSim >= 0.5:
                            # Write the pair if similarity is above the required threshold.
                            pairsList.update({(user1, user2): ""})
                            writePair(user1, user2)

    totalTimee = np.round((time.time() - beginLSH) / 60, 2)
    print(totalTimee, " Minutes elapsed for LSH in total : ")
    return (pairsList.keys())
예제 #11
0
 def facets(self):
     yield from combs(self.sig_dim, self.s)