def test_gradient(self):
        """logistic_regression -- gradient"""
        # x is [1, experience, salary]
        x = [[1] + row[:2] for row in self.data]

        # y is "paid account"
        y = [row[2] for row in self.data]

        rescaled = munge.rescale(x)

        random.seed(0)
        x_train, x_test, y_train, y_test = ml.train_test_split(rescaled,
                                                               y,
                                                               0.33)

        beta_0 = [1, 1, 1]
        beta_hat = gradient.\
            maximize_stochastic(logistic_regression.logistic_log_likelihood_i,
                                logistic_regression.logistic_log_gradient_i,
                                x_train,
                                y_train,
                                beta_0)

        true_positives = false_positives = true_negatives = false_negatives = 0

        for x_i, y_i in zip(x_test, y_test):
            predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i))

            if y_i == 1 and predict >= 0.5:
                true_positives += 1
            elif y_i == 1:
                false_negatives += 1
            elif predict >= 0.5:
                false_positives += 1
            else:
                true_negatives += 1

        message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}"
        logging.debug(message.format(true_positives,
                                     false_negatives,
                                     false_positives,
                                     true_negatives))
        precision = true_positives / float((true_positives + false_positives))
        recall = true_positives / float((true_positives + false_negatives))

        self.assertEqual(0.93, round(precision, 2))
        self.assertEqual(0.82, round(recall, 2))
示例#2
0
    def test_gradient(self):
        """logistic_regression -- gradient"""
        # x is [1, experience, salary]
        x = [[1] + row[:2] for row in self.data]

        # y is "paid account"
        y = [row[2] for row in self.data]

        rescaled = munge.rescale(x)

        random.seed(0)
        x_train, x_test, y_train, y_test = ml.train_test_split(
            rescaled, y, 0.33)

        beta_0 = [1, 1, 1]
        beta_hat = gradient.\
            maximize_stochastic(logistic_regression.logistic_log_likelihood_i,
                                logistic_regression.logistic_log_gradient_i,
                                x_train,
                                y_train,
                                beta_0)

        true_positives = false_positives = true_negatives = false_negatives = 0

        for x_i, y_i in zip(x_test, y_test):
            predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i))

            if y_i == 1 and predict >= 0.5:
                true_positives += 1
            elif y_i == 1:
                false_negatives += 1
            elif predict >= 0.5:
                false_positives += 1
            else:
                true_negatives += 1

        message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}"
        logging.debug(
            message.format(true_positives, false_negatives, false_positives,
                           true_negatives))
        precision = true_positives / float((true_positives + false_positives))
        recall = true_positives / float((true_positives + false_negatives))

        self.assertEqual(0.93, round(precision, 2))
        self.assertEqual(0.82, round(recall, 2))
示例#3
0
 def test_dot(self):
     """algebra -- dot"""
     _dot = algebra.dot(self.a, self.b)
     self.assertEqual(28221.0, _dot)
示例#4
0
 def test_dot(self):
     """algebra -- dot"""
     _dot = algebra.dot(self.a, self.b)
     self.assertEqual(28221.0, _dot)