def gaussian_misclassification(this: List[Rational], that: List[Rational]) -> Rational: """ Calculates the sample means and standard deviations of this and that, and then uses them to calculate the log-likelihood of drawing this from that and vice versa. The Results are then averaged together to provide a relative measure of the likelihood of misclassification. Lower values indicate lower probability of misclassification. Since likelihood scales with the number of observations, the same number of observations are used from both this and that. """ this_g: Callable[[Rational], Rational] = Likelihood.to_gaussian(this) that_g: Callable[[Rational], Rational] = Likelihood.to_gaussian(that) # permuting in case we get a sorted list zipped: List[Tuple[Rational, Rational]] = ops.zipLists( np.random.permutation(this), np.random.permutation(that)) def tuple_ll_f( agg: Tuple[Rational, Rational], next_val: Tuple[Rational, Rational]) -> Tuple[Rational, Rational]: """ Combines likelihoods of drawing the next value of this or that from the other distribution with the running likelihood total. """ this_agg, that_agg = agg this_next, that_next = next_val return (this_agg + that_g(this_next), that_agg + this_g(that_next)) zipped_ll: Tuple[Rational, Rational] = ops.foldSeq(zipped, (0, 0), tuple_ll_f) this_ll, that_ll = zipped_ll return (this_ll + that_ll) / 2
def byConfidenceInterval(self) -> ConfidenceIntervalResults: """ Classifies individual observations as data, lower outliers, and upper outliers. Leverages a definition of confidence interval that extends one standard deviation of the upper data above, and one standard deviation of the lower data below. All other data are outliers. """ global_mean: Rational = Moment.mean(self.data) upper, lower = ops.splitList(self.data.data, lambda obs: obs <= global_mean) upper_std_dev: Rational = Moment.std_dev(Vector(upper)) lower_std_dev: Rational = Moment.std_dev(Vector(lower)) np_upper = np.std(upper) np_lower = np.std(lower) upper_outliers, upper_data = ops.splitList( upper, lambda obs: obs <= global_mean + upper_std_dev) lower_outliers, lower_data = ops.splitList( lower, lambda obs: obs >= global_mean - lower_std_dev) return ConfidenceIntervalResults(global_mean, upper_std_dev, lower_std_dev, upper_data + lower_data, Vector(lower_outliers).sort().data, Vector(upper_outliers).sort().data)
def log_likelihood(data: Sequence[Rational], dist_ll: Callable[[Rational], Rational]) -> Rational: """ Given a log-likelihood function and data, the function returns the aggregate log-likelihood over the entire data sequence. """ return ops.foldSeq(data, 0, lambda out, next: out + dist_ll(next))
def covariance(this: Vector, that: Vector) -> Rational: if this.length != that.length: raise ValueError("Input vectors must be of equivalent length") zipped: Iterator[Tuple[Rational, Rational]] = zip(this.data, that.data) dev_product: Callable[[Rational, Rational], Rational] = lambda s, o: (s - Moment.mean(this)) * (o - Moment.mean(that)) sum_dev_prods: Rational = ops.foldSeq(zipped, 0, lambda out, next: out + dev_product(next[0], next[1])) return sum_dev_prods / (this.length - 1)
def __add__(self, that: object) -> Optional[object]: if not isinstance(that, Vector): return NotImplemented elif len(self.data) != len(that.data): return None else: new_data: List[Rational] = ops.combineListElems( self.data, that.data, lambda s, t: s + t) return Vector(new_data)
def conditional_prop(self, predicate: Callable[[Rational], bool]) -> Rational: def f(counts: Tuple[Rational, Rational], next_val: Rational) -> Tuple[Rational, Rational]: old_total, old_passed = counts return (old_total + 1, old_passed + 1 if predicate(next_val) else old_passed) total, passed = ops.foldSeq(self.data, (0, 0), f) return passed / total
def higherOrderMoment(data: Vector, order: int) -> Rational: sum_exp_dev: Rational = ops.foldSeq(data, 0, lambda out, next: out + (next - Moment.mean(data))**order) raw_moment: Rational = sum_exp_dev / (data.length - 1) return raw_moment / (Moment.std_dev(data)**order)
def variance(data: Vector) -> Rational: mu: Rational = Moment.mean(data) sum_squares: Rational = ops.foldSeq(data, 0, lambda out, xi: out + (xi - mu)**2) return sum_squares / (data.length - 1)
def filter(self, predicate: Callable[[Rational], bool]) -> BaseVector: new_data: List[Rational] = ops.filterList(self.data, predicate) return Vector(new_data)
def concat(self, that: BaseVector) -> BaseVector: new_data: List[Rational] = ops.concatLists(self.data, that.data) return Vector(new_data)