Пример #1
0
    def get_b0b1(self, x, y):
        """
        In simple linear regression, b1 is the slope of the regression line.
        b0 is the intercept of the regression line with the y-axis.

        Equation for the simple regression line : y-hat = b0 + b1*x


        :param x: a list of x values for a series of points
        :param y: a list of y values for a series of points

        :return: b0, b1

        """

        x_bar = mean(x)
        y_bar = mean(y)

        x_minus_xbar = [i - x_bar for i in x]
        y_minus_ybar = [j - y_bar for j in y]

        b1_column_denom = [i * j for i, j in zip(x_minus_xbar, y_minus_ybar)]
        b1_column_num = [i**2 for i in x_minus_xbar]

        numerator = sum(b1_column_denom)
        denominator = sum(b1_column_num)

        b1 = numerator / denominator
        b0 = y_bar - (b1 * x_bar)

        return b0, b1
    def get_b0b1(self, x, y):

        """
        In simple linear regression, b1 is the slope of the regression line.
        b0 is the intercept of the regression line with the y-axis.

        Equation for the simple regression line : y-hat = b0 + b1*x


        :param x: a list of x values for a series of points
        :param y: a list of y values for a series of points

        :return: b0, b1

        """

        x_bar = mean(x)
        y_bar = mean(y)

        x_minus_xbar = [i - x_bar for i in x]
        y_minus_ybar = [j - y_bar for j in y]

        b1_column_denom = [i*j for i, j in zip(x_minus_xbar, y_minus_ybar)]
        b1_column_num = [i**2 for i in x_minus_xbar]

        numerator = sum(b1_column_denom)
        denominator = sum(b1_column_num)

        b1 = numerator / denominator
        b0 = y_bar - (b1 * x_bar)

        return b0, b1
Пример #3
0
    def r_squared(self, x, y):
        """
           R-squared is a statistical measure of how close the data is to the
           fitted regression line.
           It is also known as the coefficient of determination.

           The higher the value of R squared, the better the model fits your data.
           R is always between 0 and 1.

        :param x: a list of x values for a series of points
        :param y: a list of y values for a series of points
        :return: r_squared

        """

        y_bar = mean(y)
        y_minus_ybar = [j - y_bar for j in y]
        y_minus_ybar_squared = [j**2 for j in y_minus_ybar]
        b0, b1 = self.get_b0b1(x, y)
        y_hat = [b0 + b1 * i for i in x]
        yhat_minus_ybar = [yh - y_bar for yh in y_hat]
        yhat_minus_ybar_squared = [h**2 for h in yhat_minus_ybar]

        r_squared = sum(yhat_minus_ybar_squared) / sum(y_minus_ybar_squared)

        return r_squared
    def r_squared(self, x, y):
        """
           R-squared is a statistical measure of how close the data is to the
           fitted regression line.
           It is also known as the coefficient of determination.

           The higher the value of R squared, the better the model fits your data.
           R is always between 0 and 1.

        :param x: a list of x values for a series of points
        :param y: a list of y values for a series of points
        :return: r_squared

        """

        y_bar = mean(y)
        y_minus_ybar = [j - y_bar for j in y]
        y_minus_ybar_squared = [j ** 2 for j in y_minus_ybar]
        b0, b1 = self.get_b0b1(x, y)
        y_hat = [b0 + b1 * i for i in x]
        yhat_minus_ybar = [yh - y_bar for yh in y_hat]
        yhat_minus_ybar_squared = [h ** 2 for h in yhat_minus_ybar]

        r_squared = sum(yhat_minus_ybar_squared) / sum(y_minus_ybar_squared)

        return r_squared
Пример #5
0
def scale(data_matrix):
    """returns the mean and standard deviation of each column"""
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix,j)) for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix, j))
              for j in range(num_cols)]
    return means, stdevs
Пример #6
0
def scale(data_matrix):
    """returns the mean and standard deviation of each column"""
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j)) for j in range(num_cols)]
    stdevs = [
        standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)
    ]
    return means, stdevs
Пример #7
0
def de_mean(x):
    """translate x by subtracting its mean (so the result has mean 0)"""
    x_bar = mean(x)
    return [x_i - x_bar for x_i in x]