Пример #1
0
def median_confidence_interval(dx, cutoff=.95):
    ''' cutoff is the significance level as a decimal between 0 and 1'''
    dx = sorted(dx, reverse=False)
    factor = statistics.NormalDist().inv_cdf((1 + cutoff) / 2)
    factor *= math.sqrt(len(dx))  # avoid doing computation twice

    lix = round(0.5 * (len(dx) - factor))
    uix = round(0.5 * (1 + len(dx) + factor))

    return (dx[lix], dx[uix])
Пример #2
0
 def fit(self, X_train, y_train):
     y_col = y_train.columns[-1]
     self.variables = X_train.columns
     self.prior_probabilities = dict.fromkeys(y_train[y_col].unique())
     for cls in self.prior_probabilities.keys():
         count = len(y_train[y_train[y_col] == cls])
         self.prior_probabilities[cls] = count / len(y_train)
     self.variable_distr_A1 = dict.fromkeys(X_train.columns)
     self.variable_distr_A2 = dict.fromkeys(X_train.columns)
     train_A1 = X_train[y_train[y_col] == 0]
     train_A2 = X_train[y_train[y_col] == 1]
     for var in X_train.columns:
         mu_A1 = train_A1[var].mean()
         sigma_A1 = train_A1[var].std()
         self.variable_distr_A1[var] = statistics.NormalDist(mu=mu_A1,
                                                             sigma=sigma_A1)
         mu_A2 = train_A2[var].mean()
         sigma_A2 = train_A2[var].std()
         self.variable_distr_A2[var] = statistics.NormalDist(mu=mu_A2,
                                                             sigma=sigma_A2)
Пример #3
0
 def _statistics_():
     print("Statistics")
     print("--")
     print(statistics.fmean([3.5, 4.0, 5.25]))
     print("--")
     print(round(statistics.geometric_mean([54, 24, 36]), 1))
     print("--")
     temperature_feb = statistics.NormalDist.from_samples(
         [4, 12, -3, 2, 7, 14])
     print(temperature_feb.mean)
     print(temperature_feb.stdev)
     print(temperature_feb.cdf(3))
     print(temperature_feb.pdf(7) / temperature_feb.pdf(10))
     el_niño = statistics.NormalDist(4, 2.5)
     temperature_feb += el_niño
     print(temperature_feb)
     statistics.NormalDist(mu=10.0, sigma=6.830080526611674)
     temperature_feb * (9 / 5) + 32
     statistics.NormalDist(mu=50.0, sigma=12.294144947901014)
     print(temperature_feb.samples(3))
     print("--------")
Пример #4
0
    def terminate(self, measurement: float) -> bool:
        self._data.append(measurement)

        if len(self._data) > 10:
            (mu, sigma) = stats.norm.fit(self._data)
            current_fit = statistics.NormalDist(mu=mu, sigma=sigma)

            if self._last_fit:
                current_overlap = current_fit.overlap(other=self._last_fit)
                if current_overlap >= self._threshold:
                    return True

            self._last_fit = current_fit

        return False
Пример #5
0
def normal_quantiles(length: int,
                     mu: float = 0.0,
                     sigma: float = 1.0) -> Generator[float, None, None]:

    norm_dist = statistics.NormalDist(mu=mu, sigma=sigma)
    splits = 1.0 / length
    current = splits
    count = 0
    while count < length:
        if current >= 1.0:
            current = 0.9999999999999999

        yield norm_dist.inv_cdf(current)
        current += splits
        count += 1
Пример #6
0
    def run(self):
        df = self.time_series()
        ts = df[df.keys()[0]]

        current = ts[0]
        past = ts[1:]

        dist = statistics.NormalDist(past.mean(), past.std())
        cdf = dist.cdf(current)

        if cdf > 0.99:
            return StrategyResult.RED
        elif cdf > 0.95:
            return StrategyResult.ORANGE
        else:
            return StrategyResult.GREEN
Пример #7
0
    async def daily(self, ctx: MyContext):
        """Get some more experience..."""
        db_hunter: Player = await get_player(ctx.author, ctx.channel)

        _ = await ctx.get_translate_function()
        if db_hunter.prestige < 3:
            await ctx.send(
                _("❌ Your prestige level is not high enough yet. "
                  "See `{ctx.prefix}prestige info` to learn more."))
            return False

        now = datetime.datetime.now()
        if db_hunter.prestige_last_daily.date() == now.date():
            nextturn = db_hunter.prestige_last_daily.date() - now.date()
            await ctx.send(
                _("❌ You already claimed your dailies today. Try again in: `{td}`",
                  td=format_timedelta(nextturn, locale='en_US')))
            return False

        max_experience = 20 * db_hunter.prestige
        distrib = statistics.NormalDist(max_experience / 2, max_experience / 6)
        added_experience = int(distrib.samples(1)[0])

        added_experience = min(max(5, added_experience), max_experience + 5)

        await db_hunter.edit_experience_with_levelups(ctx, added_experience)
        db_hunter.prestige_last_daily = now
        db_hunter.prestige_dailies += 1

        await db_hunter.save()

        if ctx.author.id == 618209176434507816:
            # This is just a prank for the guy who made me add the Normal Dist,
            # with "a tiny chance for it to become negative"
            # It's not really negative, but heh :)
            # It'll look like so.
            added_experience = -added_experience

        await ctx.send(
            _("💰️ You took {exp} experience out of the prestige bank. Come back soon!",
              exp=added_experience))
Пример #8
0
def subcommand_range(args):
    frames = {}

    for seed in range(256):
        filename = os.path.join(args.directory, f'{seed:03d}.txt')

        if not os.path.exists(filename):
            continue

        with open(filename) as f:
            for line in f:
                if line.startswith('FRAMES'):
                    frames[seed] = int(line.strip().split('\t')[1])

    range_averages = {}

    for start in range(256):
        range_frames = []
        seed = start

        for i in range(args.size):
            seed = (start + i) % 256
            range_frames.append(frames[seed])

        sigma = args.size / 2 / 1.96
        dist = statistics.NormalDist(args.size / 2, sigma)

        total = 0.0
        divisor = 0.0

        for i, value in enumerate(range_frames):
            total += dist.pdf(i) * value
            divisor += dist.pdf(i)

        average = total / divisor
        range_averages[start] = average

    for start, average in sorted(range_averages.items(), key=lambda x: x[1]):
        print(
            f'{start:3d} - {(start + args.size - 1) % 256:3d}: {average * 655171 / 39375000000:12.3f}'
        )
Пример #9
0
def pochisq(x, df=255):
    """
    Compute probability of χ² test value.

    Adapted from: Hill, I. D. and Pike, M. C.  Algorithm 299 Collected
    Algorithms for the CACM 1967 p. 243 Updated for rounding errors based on
    remark in ACM TOMS June 1985, page 185.

    According to http://www.fourmilab.ch/random/:

      We interpret the percentage (return value*100) as the degree to which
      the sequence tested is suspected of being non-random. If the percentage
      is greater than 99% or less than 1%, the sequence is almost certainly
      not random. If the percentage is between 99% and 95% or between 1% and
      5%, the sequence is suspect. Percentages between 90% and 95% and 5% and
      10% indicate the sequence is “almost suspect”.

    Arguments:
        x: Obtained χ² value.
        df: Degrees of freedom, defaults to 255 for random bytes.

    Returns:
        The degree to which the sequence tested is suspected of being
        non-random.
    """
    # Check arguments first
    if not isinstance(df, int):
        raise ValueError("df must be an integer")
    if x <= 0.0 or df < 1:
        return 1.0
    # Constants
    LOG_SQRT_PI = 0.5723649429247000870717135  # log(√π)
    I_SQRT_PI = 0.5641895835477562869480795  # 1/√π
    BIGX = 20.0
    a = 0.5 * x
    even = df % 2 == 0
    if df > 1:
        y = math.exp(-a)
    nd = stat.NormalDist()
    s = y if even else 2.0 * nd.cdf(-math.sqrt(x))
    if df > 2:
        x = 0.5 * (df - 1.0)
        z = 1.0 if even else 0.5
        if a > BIGX:
            e = 0 if even else LOG_SQRT_PI
            c = math.log(a)
            while z <= x:
                e = math.log(z) + e
                s += math.exp(c * z - a - e)
                z += 1.0
            return s
        else:
            e = 1.0 if even else I_SQRT_PI / math.sqrt(a)
            c = 0.0
            while z <= x:
                e = e * a / z
                c = c + e
                z += 1.0
            return c * y + s
    else:
        return s
Пример #10
0
def centileFromSDS(z: float) -> float:
    p = stats.norm.sf(abs(z))
    centile = statistics.NormalDist().cdf(p)
    return centile
Пример #11
0
 def aggregate(self) -> statistics.NormalDist:
     return statistics.NormalDist(mu=np.mean(self._data),
                                  sigma=np.std(self._data))
Пример #12
0
    print(f"❌ Music vs. classics ratio is off: {ratio:.2f} to 1")
print()

print("5. Music play distribution")
print("--------------------------")
with open("data/index.json") as index_file:
    index_entries = json.load(index_file)

music_plays = [
    entry["play_count"] for entry in index_entries.values()
    if entry["playlist"] == "music"
]
avg = statistics.mean(music_plays)
stdev = statistics.pstdev(music_plays, avg)
deciles = statistics.quantiles(music_plays, n=10)
normal_dist = statistics.NormalDist(avg, stdev)
diffs = [
    measured - expected
    for measured, expected in zip(deciles, normal_dist.quantiles(n=10))
]

if all(abs(diff) <= 1 for diff in diffs):
    print(f"✅ Normal distribution: {avg:.2f}±{stdev:.2f}")
elif all(abs(diff) <= 3 for diff in diffs):
    print(f"🔶 Almost normal distribution: {avg:.2f}±{stdev:.2f}")
    for i, diff in enumerate(diffs):
        if diff > 1:
            print(
                f"   {i + 1}. decile: {diff:.2f} off (measured {deciles[i]:.2f}, "
                f"expected {deciles[i] - diff:.2f})")
else: