示例#1
0
class SparseRecovery(object):
    def __init__(self, n, d, rand_count):
        self.A = np.random.randn(n, d)
        self.d = d
        # self.A = np.ones((n, d))
        self.rand_count = rand_count
        self.countsketch = CountSketch(3, 10000)

    def run(self):
        print("A {}".format(self.A))
        for i in range(1):
            self.x = np.zeros((self.d, 1))
            for j in range(self.rand_count):
                pos = np.random.randint(self.d)
                self.x[pos] = np.random.rand() * np.random.randint(10000)
            self.non_zero_values = self.x[self.x > 0]
            self.non_zero_x, self.non_zero_y = np.where(self.x > 0)
            for j in range(len(self.x)):
                values = self.A[:,j]*self.x[j]
                for k in range(0, len(values)):
                    self.countsketch.update(k, values[k])
                # self.top_k.push(Node(i, value))
            print("non zero values {}".format(self.non_zero_values))
            print("non zero x {}".format(self.non_zero_x))
            print("printing heap")
            approximate_values = []
            for k in range(len(self.x)):
                approximate_values.append(self.countsketch.query(k))
            approximate_values = np.array(approximate_values)
            print(approximate_values.argsort()[-self.rand_count:][::-1])
示例#2
0
def repetitions():
    for n in range(nodes):
        power_law_distribution[n] = power_law(k_min, k_max,
                                              np.random.uniform(0, 1), gamma)
    round_values = [int(round(item)) for item in power_law_distribution]
    pos_neg = [1, -1]
    random_numbers = [random.choice(pos_neg) * item for item in round_values]
    count_dict = Counter(random_numbers)
    actual_count_dict = count_dict
    count_dict = sorted(count_dict.items())
    ccms = ComplementaryCountMinSketch(4, 25)
    # top frequent items comparison
    cs = CountSketch(5, 50)
    for item in random_numbers:
        if item > 0:
            ccms.update(item)
            cs.update(item)
        else:
            ccms.update(abs(item), -1)
            cs.update(abs(item), -1)
    items = list(val[0] for val in count_dict)
    items = list(set(items))
    ccms_loss = 0
    cs_loss = 0
    for item in items:
        ccms_val = ccms.query(item)
        cs_val = cs.query(item)
        actual_count = actual_count_dict[item] - actual_count_dict[-item]
        ccms_loss += (actual_count - ccms_val)**2
        cs_loss += (actual_count - cs_val)**2
    ccms_losses.append(ccms_loss / len(items))
    cs_losses.append(cs_loss / len(items))