def __init__(self, num_features, top_k_size, learning_rate):
     self.D = num_features
     self.w = np.array([0] * self.D)
     self.b = 0
     self.learning_rate = learning_rate
     self.cms = CustomCountSketch(3, (1 << 18) - 1)
     self.top_k = TopK(top_k_size)
     self.loss_val = 0
Пример #2
0
class LogisticRegression(object):
    def __init__(self, num_features):
        self.D = num_features
        self.learning_rate = 0.5
        # self.cms = CustomCountSketch(3, int(np.log(self.D) ** 2 / 3))
        self.cms = CustomCountSketch(3, (1 << 18) - 1)
        self.top_k = TopK(1 << 14 - 1)

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        print("normalized weights {}".format(normalized_weights))
        sigm_val = self.sigmoid(normalized_weights)
        if sigm_val == 1.0:
            sigm_val = sigm_val - (1e-5)
        print("label {} sigmoid {}".format(label, sigm_val))
        loss = self.loss(y=label, p=sigm_val)
        gradient = (label - sigm_val)
        if gradient != 0:
            for i in range(len(feature_pos)):
                # updating the change only on previous values
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        if a > 0.5:
            return 1
        else:
            return 0
Пример #3
0
 def __init__(self, num_features):
     self.D = num_features
     self.learning_rate = 0.5
     # self.cms = CustomCountSketch(3, int(np.log(self.D) ** 2 / 3))
     self.cms = CustomCountSketch(3, (1 << 18) - 1)
     self.top_k = TopK(1 << 14 - 1)
Пример #4
0
class LogisticRegression(object):
    def __init__(self, num_features, top_k_size):
        self.D = num_features
        self.w = np.array([0] * self.D)
        self.b = 0
        self.learning_rate = 5e-1
        self.cms = CustomCountSketch(3, (1 << 18) - 1)
        # self.cms = CustomCountMinSketch(2, (1<<15) - 1)
        self.top_k = TopK(top_k_size)
        self.loss_val = 0

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            # multiplying w[i] with x[i]
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            # calculating wTx
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        sigm_val = self.sigmoid(normalized_weights)
        if sigm_val == 1.0:
            sigm_val = sigm_val - (1e-5)
        # print("label {} sigmoid {}".format(label, sigm_val))
        gradient = (label - sigm_val)
        loss = self.loss(y=label, p=sigm_val)
        self.loss_val += loss
        if gradient != 0:
            for i in range(len(feature_pos)):
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def negative_log_likelihood(self, y, x):
        return -y * x / (1 + math.exp(y))

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(
                feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        return a
Пример #5
0
 def __init__(self, num_features):
     self.learning_rate = 5e-1
     self.cms = CustomCountSketch(3, (1 << 18) - 1)
     self.top_k = TopK(num_features)
     self.loss_val = 0