def significance_one_vs_any(self) -> Series: """ Return the probability that the response to each question is higher than a randomly selected other question. """ keys = list(self._item_dict.keys()) results = [] for key in keys: other_keys = [k for k in keys if k != key] data_one = self._item_dict[key].make_features() data_rest = concat( [self._item_dict[k].make_features() for k in other_keys], axis=0) bb_one = BetaBinomialConjugate(alpha=1, beta=1, n=len(data_one), k=data_one.sum()) bb_rest = BetaBinomialConjugate(alpha=1, beta=1, n=len(data_rest), k=data_rest.sum()) results.append({ 'name': key, 'p': bb_one.posterior() > bb_rest.posterior() }) results_data = DataFrame(results).set_index('name')['p'] return results_data
def significance_one_vs_all(self) -> Series: """ Return the probabilities that a random respondent is more likely to answer each one category than all others combined. """ results = [] for category in self.categories: try: category_count = self.data.value_counts()[category] except KeyError: category_count = 0 num_responses = len(self.data.dropna()) bb_category = BetaBinomialConjugate(alpha=1, beta=1, n=num_responses, k=category_count) bb_rest = BetaBinomialConjugate(alpha=1, beta=1, n=num_responses, k=num_responses - category_count) results.append({ 'category': category, 'p': bb_category.posterior() > bb_rest.posterior() }) return DataFrame(results).set_index('category')['p']
def __gt__(self, other: 'LikertQuestion') -> float: """ Return the probability that the posterior estimate for the probability of max-rating is greater in self than other. """ data_self = self.make_features() data_other = other.make_features() bb_self = BetaBinomialConjugate(alpha=1, beta=1, n=len(data_self), k=data_self.sum()) bb_other = BetaBinomialConjugate(alpha=1, beta=1, n=len(data_other), k=data_other.sum()) return bb_self.posterior() > bb_other.posterior()
def prob_superior(self, question: CategoricalQuestion, attribute: SingleCategoryAttribute, exp_attr_values: List[str], exp_answers: List[str], ctl_attr_values: List[str], ctl_answers: List[str]) -> BBProbSuperiorResult: """ Calculate the probability that the number of responses from the experimental group in `exp_answers` is significantly higher than the number of responses from the control group in `ctl_answers`. N.B. to assess the effect of respondent attributes, `exp_answers` and `ctl_answers` should be identical. :param question: The question to consider. :param attribute: The attribute to use. :param exp_attr_values: The attribute values of the experimental group. :param exp_answers: The answers to count in the experimental group. :param ctl_attr_values: The attribute values of the control group. :param ctl_answers: The answers to count in the control group. """ # find n and k for experimental respondent and answer group n_exp = self.count_responses(question=question, condition_category=attribute, condition_values=exp_attr_values) k_exp = self.count_responses(question=question, answers=exp_answers, condition_category=attribute, condition_values=exp_attr_values) # find n and k for control respondent and answer group n_ctl = self.count_responses(question=question, condition_category=attribute, condition_values=ctl_attr_values) k_ctl = self.count_responses(question=question, answers=ctl_answers, condition_category=attribute, condition_values=ctl_attr_values) # create beta-binomial distribution for each group bb_exp = BetaBinomialConjugate(alpha=1, beta=1, n=n_exp, m=k_exp) bb_ctl = BetaBinomialConjugate(alpha=1, beta=1, n=n_ctl, m=k_ctl) # calculate probability of superiority of test group p_superior = bb_exp > bb_ctl return BBProbSuperiorResult( p_superior=p_superior, experimental_mean=bb_exp.posterior().mean(), control_mean=bb_ctl.posterior().mean())
def significance_one_vs_one(self) -> DataFrame: """ Return the probability that a random respondent is more likely to answer each category than each other. """ results = [] for category_1, category_2 in product(self._categories, self._categories): try: category_1_count = self._data.value_counts()[category_1] except KeyError: category_1_count = 0 try: category_2_count = self._data.value_counts()[category_2] except KeyError: category_2_count = 0 num_responses = len(self._data.dropna()) bb_category_1 = BetaBinomialConjugate(alpha=1, beta=1, n=num_responses, k=category_1_count) bb_category_2 = BetaBinomialConjugate(alpha=1, beta=1, n=num_responses, k=category_2_count) results.append({ 'category_1': category_1, 'category_2': category_2, 'p': bb_category_1.posterior() > bb_category_2.posterior() }) results_data = DataFrame(results) pt = pivot_table(data=results_data, index='category_1', columns='category_2', values='p') return pt