def bin_size(orig_exp, ci_method, nib_lim=0.01, max_ci_diff=0.25, min_corcoef=0.3): for bin_size in itertools.count(1): exp = orig_exp.aggregate_bins(times_bin_size=bin_size) df = logit.ci_for_df(exp.as_data_frame(), ci_method, ci_min=max_ci_diff) ratio_nib = logit.get_nib_ratio(df) pvar = corrcoeff(df) log.notice('testing bin size %d, nib ratio: %.4f, spearmanr: %.3f' % (bin_size, ratio_nib, pvar)) if ratio_nib <= nib_lim and pvar > min_corcoef: return exp.bin_size assert bin_size < 100, "Could not find a suitable bin size."
def __adjust_bin_size_and_get_df(self, exp): if self.bin_size is None: self.bin_size = estimate.bin_size(exp, self.ci_method, max_ci_diff=self.ci_lim, nib_lim=self.nib_lim) log.notice('Optimal bin size: %d' % self.bin_size) else: log.notice('Using preset bin size of: %d' % self.bin_size) odf = exp.aggregate_bins(new_bin_size=self.bin_size).as_data_frame() df = logit.ci_for_df(odf, self.ci_method, ci_min=self.ci_lim) ratio_nib = logit.get_nib_ratio(df) log.notice('''Manually specified bin size of %dKB gives %.2f%% informative bins. \ The required amount is %.2f%%.''' % (self.bin_size / 1000, (1-ratio_nib)*100, (1-self.nib_lim)*100)) assert ratio_nib < self.nib_lim, '''\ The selected bin size results in less informative bins that what specified by the\ parameter required_fraction_of_informative_bins. Please try a bigger bin size or let \ EDD auto-estimate a bin size for you. Please consult the EDD manual for more information''' return df
def __adjust_bin_size_and_get_df(self, exp): if self.bin_size is None: self.bin_size = estimate.bin_size(exp, self.ci_method, max_ci_diff=self.ci_lim, nib_lim=self.nib_lim) log.notice('Optimal bin size: %d' % self.bin_size) else: log.notice('Using preset bin size of: %d' % self.bin_size) odf = exp.aggregate_bins(new_bin_size=self.bin_size).as_data_frame() df = logit.ci_for_df(odf, self.ci_method, ci_min=self.ci_lim) ratio_nib = logit.get_nib_ratio(df) log.notice( '''Manually specified bin size of %dKB gives %.2f%% informative bins. \ The required amount is %.2f%%.''' % (self.bin_size / 1000, (1 - ratio_nib) * 100, (1 - self.nib_lim) * 100)) assert ratio_nib < self.nib_lim, '''\ The selected bin size results in less informative bins that what specified by the\ parameter required_fraction_of_informative_bins. Please try a bigger bin size or let \ EDD auto-estimate a bin size for you. Please consult the EDD manual for more information''' return df