Exemplo n.º 1
0
 def _update_parameters(self):
     """Update mean sd etc after we have updated our samples
     """
     self.n = len(self.members)
     self.mean = self.orig_mean = StatTool.calc_mean(self.members)
     self.sd = self.orig_sd = StatTool.calc_sd(self.members,
                                               self.is_population)
     print("Got %d samples, mean: %.3f, sd: %.3f" %
           (self.n, self.mean, self.sd))
Exemplo n.º 2
0
    def simple_decision(cls, r, n, alphas):
        """Decide whether to accept or reject the null hypothesis.
        """
        df = n - 2
        t = (r * math.sqrt(df)) / math.sqrt(1 - r**2)
        p = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df)

        for alpha in alphas:
            ci = StatTool.pearson_r_confidence_interval(r, alpha, n)
            conclusion1 = cls.spell_conclusion_by_ci(ci)
            conclusion2 = cls.spell_conclusion_by_p(p, alpha)
            print("For alpha=%.4f:" % alpha)
            print(" - %s" % conclusion1)
            print(" - %s" % conclusion2)
            print("")
Exemplo n.º 3
0
 def _fix_samples(self):
     """Decide if the two samples are dependent and we should do something
     about it"""
     samp_dependent = not self.samp0.is_population and not self.samp1.is_population
     if samp_dependent and self.samp0.members and self.samp1.members:
         members = [self.samp1.members[i] - self.samp0.members[i]
                    for i in range(len(self.samp1.members))]
         self.samp0.mean = 0.0
         self.samp0.sd = None
         self.samp0.notes = "mean and sd have been reset"
         self.samp1.mean = StatTool.calc_mean(members)
         self.samp1.sd = StatTool.calc_sd(members, self.samp1.is_population)
         self.samp1.notes = "mean and sd are difference from sample-0"
     elif samp_dependent and self.samp0.orig_sd is not None and self.samp1.orig_sd is not None:
         self.samp1.sd = self.standard_deviation_difference()
         self.samp1.notes = "sd is difference from sample-0"
Exemplo n.º 4
0
 def margin_of_error(self):
     """Margin of error is the distance from μ (in either direction), expressed
     in the same unit as the original sample, where it is within the confidence
     interval (CI). Margin of error is half the width of the CI.
     
     For example, if μ is $50, and alpha is 5%, and Standard Error is $15.
     Then for 95% confidence, the confidence interval is between $50 - (1.96 * $15)
     and $50 + (1.96 * $15). In this case, (1.96 * $15) is the margin of error. 
     """
     z_critical2 = StatTool.z_critical_value(self.alpha, StatTool.TWO_TAILED_TEST)
     return z_critical2 * self.SEM()
Exemplo n.º 5
0
    def simple_calculation(cls, k, n, f_ratio, alphas):
        if f_ratio < 0:
            print("Cannot make decision because F is negative")
            return

        df_n = k - 1
        df_d = (k * n) - k
        print("df_n:        %d" % df_n)
        print("df_d:        %d" % df_d)
        print("F-ratio:    % .3f" % f_ratio)

        for alpha in alphas:
            critical = StatTool.f_critical_value(alpha, df_n, df_d)
            print("alpha %.3f: critical: %.3f conclusion: %s" %
                  (alpha, critical,
                   "reject H0" if f_ratio >= critical else "accept H0"))
Exemplo n.º 6
0
 def _calc(self):
     qstar = StatTool.q_value(self.alpha, self.df_n(), self.df_d())
     msw = self.mean_squares_within()
     k = len(self.groups)
     return qstar * math.sqrt(
         sum([msw / grp.n for grp in self.groups]) / k)
Exemplo n.º 7
0
 def _calc(self):
     return StatTool.probability_for_f(self.score(), self.df_n(),
                                       self.df_d())
Exemplo n.º 8
0
 def _calc(self):
     return StatTool.f_critical_value(self.alpha, self.df_between(),
                                      self.df_within())
Exemplo n.º 9
0
 def p_value(self):
     """Returns the probability (in proportion) for the result's t-statistic value.
     """
     return StatTool.probability_for_t(self.t_statistics(), self.dir, self.df())
Exemplo n.º 10
0
 def critical(self):
     """t-critical value for the specified confidence/alpha.
     Value may be negative!"""
     return StatTool.t_critical_value(self.alpha, self.dir, self.df())
Exemplo n.º 11
0
 def p_value(self):
     """Returns the probability (in proportion) for the result's z-score.
     """
     return StatTool.probability_for_z(self.score(), self.dir)
Exemplo n.º 12
0
 def critical(self):
     """Z-critical value for the specified alpha and direction"""
     return StatTool.z_critical_value(self.alpha, self.dir)
Exemplo n.º 13
0
 def spell_type_of_test(self):
     """Spell the type of t-test"""
     return StatTool.spell_directionality(self.dir)
Exemplo n.º 14
0
    def print_correlation(self, grp0, grp1):
        print("%s --> %s correlation" % (grp0.title, grp1.title))
        print('-' * 70)

        r, _ = StatTool.pearson_r(grp0.members, grp1.members)
        """r, also called Pearson's r, is correlation coefficient, to quantify relationship.
        r measures the correlatoin for the sample
             cov(x,y)
        r = ---------
             Sx * Sy
        """

        r_squared = r**2
        """"r squared (r^2):
           r^2 = % of variation in Y explained by variation in x
           r^2 = coefficient of determination
        """

        df = grp0.n - 2
        """"Degree of freedom. We substract one from each sample"""

        # Convert r to t
        t = (r * math.sqrt(df)) / math.sqrt(1 - r**2)

        # Calculate the probability for t
        p = pval2 = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df)

        ci = StatTool.pearson_r_confidence_interval(r, self.alpha, grp0.n)
        """
        if ρ (rho) is true correlation for population, CI is the confidence interval
        for ρ, meaning the range of likely values for the population correlation 
        coefficient ρ.
        """

        conclusion1 = self.spell_conclusion_by_ci(ci)
        conclusion2 = self.spell_conclusion_by_p(p, self.alpha)

        slope = r * grp1.sd / grp0.sd
        """Slope for linear regression."""

        intercept = grp1.mean - slope * grp0.mean
        """The regression line always goes through the mean."""
        def calc_se_est(x, y, slope, intercept):
            # Calculate standard error of the estimate.
            #  y = slope * x + intercept
            ss = 0.0
            n = len(x)
            for i in range(n):
                ss += (y[i] - (slope * x[i] + intercept))**2
            return math.sqrt(ss / float(n - 2))

        se_est = calc_se_est(grp0.members, grp1.members, slope, intercept)
        """Standard error of the estimate, measures the accuracy of our regression
        line compared to the actual data."""

        print("DF                       : % d" % df)
        print("Pearson r                : % .3f" % r)
        print("r^2 (coef of determ.)    : % .3f (%.2f%%)" %
              (r_squared, r_squared * 100.0))
        print("Confidence interval      : % .3f - %.3f" % ci)
        print("t-statistic              : % .3f" % t)
        print("P-value                  : % .5f" % p)
        print("Conclusion               : - %s" % conclusion1)
        print("                           - %s" % conclusion2)
        print("Linear regression:")
        print("Slope                    : % .3f" % slope)
        print("Intercept                : % .3f" % intercept)
        print("Standard err. of estimate: % .3f" % se_est)
        print("")