def calc_empirical_cdf(subject_answers, num_clusters): """Find fraction of subjects in each cluster. Add some mass to clusters with no subjects. Args: subject_answers: the cluster which each subject is in, i.e. the constructed answer the subject is endorsing. num_clusters: number of possible clusters, since some clusters may be empty. Returns: list where each element is the (adjusted) fraction of subjects endorsing each constructed answer. Should sum to 1. """ num_occupying = [len(np.where(subject_answers == c)[0]) for c in range(num_clusters)] num_occupying = jmutils.eliminate_zero_probs(num_occupying) return num_occupying
def calc_predictions_cdf(meta_params, distribution_cdf, boundaries): """Calculate fraction of subjects each subject expects to fall within each answer. Ensures some prob mass in every cluster. Args: meta_parms: list where each element gives the induced parameters for each subject's meta distribution. distribution_cdf: function which takes points to calculate cdf between and two parameters of distribution. boundaries: list where each element is a tuple giving the lower and upper boundary for an answer. Returns: list where each sublist corresponds to a subject and gives the fraction of subjects expected to fall within each answer. """ predictions = [] for params in meta_params: subject_predictions = [distribution_cdf(boundary[0], boundary[1], params[0], params[1]) for boundary in boundaries] subject_predictions = jmutils.eliminate_zero_probs(subject_predictions) predictions.append(subject_predictions) return predictions