Exemplo n.º 1
0
 def _add_point_estimate_ci(self, df: DataFrame):
     df[CI_LOWER], df[CI_UPPER] = _zconfint_generic(
         mean=df[POINT_ESTIMATE],
         std_mean=np.sqrt(df[VARIANCE] / df[self._denominator]),
         alpha=1 - self._interval_size,
         alternative=TWO_SIDED)
     return df
Exemplo n.º 2
0
    def conf_int(self, value=None, alpha=0.05, alternative="two-sided"):
        """
        Confidence interval for probability that sample 1 has larger values

        Confidence interval is for the shifted probability

            P(x1 > x2) + 0.5 * P(x1 = x2) - value

        Parameters
        ----------
        value : float
            Value, default 0, shifts the confidence interval,
            e.g. ``value=0.5`` centers the confidence interval at zero.
        alpha : float
            Significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : str
            The alternative hypothesis, H1, has to be one of the following

               * 'two-sided' : H1: ``prob - value`` not equal to 0.
               * 'larger' :   H1: ``prob - value > 0``
               * 'smaller' :  H1: ``prob - value < 0``

        Returns
        -------
        lower : float or ndarray
            Lower confidence limit. This is -inf for the one-sided alternative
            "smaller".
        upper : float or ndarray
            Upper confidence limit. This is inf for the one-sided alternative
            "larger".

        """

        p0 = value
        if p0 is None:
            p0 = 0
        diff = self.prob1 - p0
        std_diff = np.sqrt(self.var / self.nobs)

        if self.use_t is False:
            return _zconfint_generic(diff, std_diff, alpha, alternative)
        else:
            return _tconfint_generic(diff, std_diff, self.df, alpha,
                                     alternative)
Exemplo n.º 3
0
def get_peruser_diff_zstat(df,
                  nobs_name,
                  mean_name,
                  std_name,
                  test_group_column='test_group',
                  test='TEST',
                  control='CONTROL',
                  alpha=0.05,
                  alternative='two-sided'):
        '''confidence interval for the difference in means.
        Similar to https://www.statsmodels.org/dev/generated/statsmodels.stats.weightstats.CompareMeans.zconfint_diff.html#statsmodels.stats.weightstats.CompareMeans.zconfint_diff
        Example: pre = s1.build_and_run_sql(grouping=['test_group','user_id'], sql_template=TEMPLATE_USER_STATS)
        get_user_agg_zstat(pre, 'units','buyers mean', 'buyers std')
        Parameters
        ----------
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : string
            This specifies the alternative hypothesis for the test that
            corresponds to the confidence interval.
            The alternative hypothesis, H1, has to be one of the following :
            'two-sided': H1: difference in means not equal to value (default)
            'larger' :   H1: difference in means larger than value
            'smaller' :  H1: difference in means smaller than value
        Returns
        -------
        diff, zstat, pvalue, confint : floats
        '''
        summary = df.set_index(test_group_column)#.T.loc[[nobs_name, mean_name, std_name],[test, control]].to_dict()

        d1 = summary.loc[test, :]
        d2 = summary.loc[control,:]
        diff = d1[mean_name] - d2[mean_name]
        std_diff = np.sqrt(d1[std_name]**2/(d1[nobs_name]-1) + d2[std_name]**2/(d2[nobs_name]-1)) #Assume unequal variance
        confint = wstats._zconfint_generic(diff, std_diff, alpha=alpha, alternative=alternative)
        zstat, pvalue = wstats._zstat_generic2(diff, std_diff, alternative)
        return d1[mean_name], d2[mean_name], diff/d2[mean_name], confint/d2[mean_name], zstat, pvalue
Exemplo n.º 4
0
 def _ci(self, row, alpha_column: str) -> Tuple[float, float]:
     return _zconfint_generic(mean=row[DIFFERENCE],
                              std_mean=row[STD_ERR],
                              alpha=row[alpha_column],
                              alternative=row[PREFERENCE])