def test__delta__not_providing_data_fails(self): """ Value error raised when not providing data. """ # Check if error is raised for None data with self.assertRaises(ValueError): statx.delta(self.samples.temperature, None) # Check if error is raised for None data with self.assertRaises(ValueError): statx.delta(None, self.samples.temperature)
def test__delta__nan_handling(self): """ Test correct handling of nans. (ignored). """ res = statx.delta(self.rand_s1, self.rand_s2) self.assertEqual(res.treatment_statistics.sample_size, 1000) self.assertEqual(res.control_statistics.sample_size, 1000) r1 = self.rand_s1.copy() r1[90:] = np.nan res = statx.delta(r1, self.rand_s2) self.assertAlmostEqual(res.delta, -0.1, 1) self.assertEqual(res.treatment_statistics.sample_size, 90) self.assertEqual(res.control_statistics.sample_size, 1000)
def test__delta__nan_handling(self): """ Test correct handling of nans. (ignored). """ res = statx.delta(self.rand_s1, self.rand_s2) self.assertEqual(res.treatment_statistics.sample_size, 1000) self.assertEqual(res.control_statistics.sample_size, 1000) r1 = self.rand_s1.copy() r1[90:] = np.nan res = statx.delta(r1, self.rand_s2) self.assertAlmostEqual (res.delta, -0.1, 1) self.assertEqual (res.treatment_statistics.sample_size, 90) self.assertEqual (res.control_statistics.sample_size, 1000)
def _delta_all_variants(metric_df, baseline_variant, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, weighted=False): """Applies delta to all variants, given a metric and a baseline variant. metric_df has 4 columns: entity, variant, metric, reference_kpi """ baseline_metric = metric_df.iloc[:, 2][metric_df.iloc[:, 1] == baseline_variant] baseline_weights = metric_df.iloc[:, 3][metric_df.iloc[:, 1] == baseline_variant] if weighted: # ASSUMPTIONS: # - reference KPI is never NaN (such that sum works the same as np.nansum) # - whenever the reference KPI is 0, it means the derived KPI is NaN, # and therefore should not be counted (only works for ratio) do_delta = (lambda f: delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=f.iloc[:, 2], y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative, x_weights=f.iloc[:, 3] / sum(f.iloc[:, 3]) * sum( f.iloc[:, 3] != 0), y_weights=baseline_weights / sum(baseline_weights) * sum(baseline_weights != 0)))) else: do_delta = (lambda f: delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=f.iloc[:, 2], y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative))) # Actual calculation return metric_df.groupby('variant').apply(do_delta).unstack(0)
def test__delta__nan_handling(self): """ Test correct handling of nans. (ignored) """ res = statx.delta(self.rand_s1, self.rand_s2) self.assertEqual(res['treatment_sample_size'], 1000) self.assertEqual(res['control_sample_size'], 1000) r1 = self.rand_s1.copy(); r1[90:] = np.nan res = statx.delta(r1, self.rand_s2) self.assertAlmostEqual (res['delta'], -0.1, 1) self.assertEqual (res['treatment_sample_size'], 90) self.assertEqual (res['control_sample_size'], 1000)
def test__delta__nan_handling(self): """ Test correct handling of nans. (ignored) """ result1 = statx.delta(self.rand_s1, self.rand_s2) self.assertEqual(result1[2], 1000) self.assertEqual(result1[3], 1000) r1 = self.rand_s1.copy(); r1[90:] = np.nan (uplift, pctiles, ss_x, ss_y, mean_x, mean_y) = statx.delta(r1, self.rand_s2) self.assertAlmostEqual(uplift, -0.1, 1) self.assertEqual(ss_x, 90) self.assertEqual(ss_y, 1000)
def test__delta__nan_handling(self): """ Test correct handling of nans. (ignored) """ res = statx.delta(self.rand_s1, self.rand_s2) self.assertEqual(res['n_x'], 1000) self.assertEqual(res['n_y'], 1000) r1 = self.rand_s1.copy() r1[90:] = np.nan res = statx.delta(r1, self.rand_s2) self.assertAlmostEqual(res['delta'], -0.1, 1) self.assertEqual(res['n_x'], 90) self.assertEqual(res['n_y'], 1000)
def do_delta(f, bin_name): # find the corresponding bin in the baseline variant baseline_metric = f.iloc[:, 2][(f.iloc[:, 0] == variants[1])] out_df = pd.DataFrame() for v in f['variant'].unique(): v_metric = f.iloc[:, 2][(f.iloc[:, 0] == v)] df = delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=v_metric, y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative)) # add new index levels for variant and binning df['_tmp_bin_'] = bin_name df['variant'] = v df.set_index(['variant', '_tmp_bin_'], append=True, inplace=True) df = df.reorder_levels([ 'variant', '_tmp_bin_', 'metric', 'subgroup_metric', 'subgroup', 'statistic', 'pctile' ]) out_df = out_df.append(df) return out_df
def test__delta__computation_assumed_normal(self): """ Result of delta() assuming normality equals expected result. """ # Computing delta assumed normal res = statx.delta(self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], percentiles=[2.5, 97.5], assume_normal=True) # Checking if mean has right value self.assertAlmostEqual(res['delta'], -0.28923076923075541) value025 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 2.5, 'value') value975 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 97.5, 'value') # Checking if lower percentile has right value self.assertAlmostEqual(value025, -0.53770569567692295) # Checking if uper percentile has right value self.assertAlmostEqual(value975, -0.040755842784587965) # Checking if sample size 1 is correct self.assertEqual(res['treatment_sample_size'], 65) # Checking if sample size 2 is correct self.assertEqual(res['control_sample_size'], 65)
def _delta_all_variants(metric_df, baseline_variant, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, weighted=False): """Applies delta to all variants, given a metric and a baseline variant. metric_df has 4 columns: entity, variant, metric, reference_kpi """ baseline_metric = metric_df.iloc[:, 2][metric_df.iloc[:, 1] == baseline_variant] baseline_weights = metric_df.iloc[:, 3][metric_df.iloc[:, 1] == baseline_variant] if weighted: do_delta = (lambda f: delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=f.iloc[:, 2], y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative, x_weights=f.iloc[:, 3] / sum(f.iloc[:, 3]) * len( f.iloc[:, 3]), y_weights=baseline_weights / sum(baseline_weights) * len(baseline_weights)))) else: do_delta = (lambda f: delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=f.iloc[:, 2], y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative))) # Actual calculation return metric_df.groupby('variant').apply(do_delta).unstack(0)
def test__delta__2percentiles_no_tests(self): """ Percentiles of delta() for sga are corrected for no tests (1 as a default). """ res = statx.delta( self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2]) value025 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 2.5, 'value') value975 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 97.5, 'value') self.assertAlmostEqual(value025, -0.53963938185557114) self.assertAlmostEqual(value975, -0.038822156605939739)
def test__delta__2percentiles_no_tests(self): """ Percentiles of delta() for sga are corrected for no tests (1 as a default). """ res = statx.delta( self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2]) value025 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 2.5, 'value') value975 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 97.5, 'value') self.assertAlmostEqual(value025, -0.53770569567692295) self.assertAlmostEqual(value975, -0.040755842784587965)
def test__delta__2percentiles_no_tests(self): """ Percentiles of delta() for sga are corrected for no tests (1 as a default). """ res = statx.delta(self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2]) value025 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 2.5, 'value') value975 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 97.5, 'value') self.assertAlmostEqual(value025, -0.53963938185557114) self.assertAlmostEqual(value975, -0.038822156605939739)
def do_delta_numerical(df): mu, ci, ss_x, ss_y, mean_x, mean_y = statx.delta( x=df.iloc[:, 2], y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative) return feature_check_to_dataframe(metric=df.columns[2], samplesize_variant=ss_x, mu=mu, pctiles=ci, mu_variant=mean_x)
def test__delta__2percentiles_25_tests(self): """ Percentiles of delta() for sga are corrected for 25 tests """ res = statx.delta(self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], percentiles=[2.5, 97.5], assume_normal=True, num_tests=25) value025 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 0.1, 'value') value975 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 99.9, 'value') self.assertAlmostEqual(value025, -0.68544085117601006) self.assertAlmostEqual(value975, 0.1069793127145035)
def test__delta__2percentiles_no_tests(self): """ Percentiles of delta() for sga are corrected for no tests (1 as a default) """ res = statx.delta(self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], percentiles=[2.5, 97.5], assume_normal=True, num_tests=1) value025 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 2.5, 'value') value975 = find_list_of_dicts_element(res['confidence_interval'], 'percentile', 97.5, 'value') self.assertAlmostEqual(value025, -0.53770569567692295) self.assertAlmostEqual(value975, -0.040755842784587965)
def test__delta__computation_not_assumed_normal(self): """ Result of delta() not assuming normality equals expected result. """ # Computing delta not assumed normal res = statx.delta(self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], percentiles=[2.5, 97.5], assume_normal=True) # Checking if mean has right value self.assertAlmostEqual(res['delta'], -0.28923076923075541) # Checking if lower percentile has right value self.assertAlmostEqual(res['interval'][2.5], -0.53770569567692295) # Checking if uper percentile has right value self.assertAlmostEqual(res['interval'][97.5], -0.040755842784587965) # Checking if sample size 1 is correct self.assertEqual(res['n_x'], 65) # Checking if sample size 2 is correct self.assertEqual(res['n_y'], 65)
def test__delta__computation_assumed_normal(self): """ Result of delta() assuming normality equals expected result. """ # Computing delta assumed normal result1 = statx.delta( self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], percentiles=[2.5, 97.5], assume_normal=True) # Checking if mean has right value self.assertAlmostEqual(result1[0], -0.28923076923075541) # Checking if lower percentile has right value self.assertAlmostEqual(result1[1][2.5], -0.53770569567692295) # Checking if uper percentile has right value self.assertAlmostEqual(result1[1][97.5], -0.040755842784587965) # Checking if sample size 1 is correct self.assertEqual(result1[2], 65) # Checking if sample size 2 is correct self.assertEqual(result1[3], 65)
def test__delta__computation_not_assumed_normal(self): """ Result of delta() not assuming normality equals expected result. """ # Computing delta not assumed normal res = statx.delta( self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], assume_normal=True) # Checking if mean has right value self.assertAlmostEqual(res.delta, -0.28923076923075541) value025 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 2.5, 'value') value975 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 97.5, 'value') # Checking if lower percentile has right value self.assertAlmostEqual(value025, -0.53963938185557114) # Checking if uper percentile has right value self.assertAlmostEqual(value975, -0.038822156605939739) # Checking if sample size 1 is correct self.assertEqual(res.treatment_statistics.sample_size, 65) # Checking if sample size 2 is correct self.assertEqual(res.control_statistics.sample_size, 65)
def test__delta__computation_not_assumed_normal(self): """ Result of delta() not assuming normality equals expected result. """ # Computing delta not assumed normal res = statx.delta( self.samples.temperature[self.samples.gender == 1], self.samples.temperature[self.samples.gender == 2], assume_normal=True) # Checking if mean has right value self.assertAlmostEqual(res.delta, -0.28923076923075541) value025 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 2.5, 'value') value975 = find_value_by_key_with_condition(res.confidence_interval, 'percentile', 97.5, 'value') # Checking if lower percentile has right value self.assertAlmostEqual(value025, -0.53770569567692295) # Checking if uper percentile has right value self.assertAlmostEqual(value975, -0.040755842784587965) # Checking if sample size 1 is correct self.assertEqual(res.treatment_statistics.sample_size, 65) # Checking if sample size 2 is correct self.assertEqual(res.control_statistics.sample_size, 65)