示例#1
0
def coco_stats():
    """
    http://stattrek.com/online-calculator/hypergeometric.aspx

    CommandLine:
        python -m mtgmonte.stats --exec-coco_stats --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from mtgmonte.stats import *  # NOQA
        >>> result = coco_stats()
        >>> print(result)
        >>> ut.show_if_requested()
    """
    import plottool as pt
    from scipy.stats import hypergeom

    N = pop_size = 60  # cards in deck  # NOQA
    K = num_success = 21  # number of creatures in deck  # NOQA
    n = sample_size = 6  # cards seen by coco  # NOQA

    # prob of at least that many hits
    hypergeom
    prb = hypergeom(N, K, n)

    k = number_of_success = 1  # number of hits you want  # NOQA

    prb.pmf(k)  # P(X = k)
    #
    prb.cdf(k)  # P(X <= k)

    1 - prb.cdf(k)  # P(X > k)

    (1 - prb.cdf(k)) + prb.pmf(k)  # P(X >= k)

    def prob_ge(k, prb=prb):
        return (1 - prb.cdf(k)) + prb.pmf(k)  # P(X >= k)

    pt.ensure_pylab_qt4()

    import numpy as np

    k = np.arange(1, 3)

    K_list = np.arange(15, 30)

    label_list = [str(K_) + " creatures in deck" for K_ in K_list]

    ydata_list = [prob_ge(k, prb=hypergeom(N, K_, n)) for K_ in K_list]

    pt.multi_plot(
        k,
        ydata_list,
        label_list=label_list,
        title="probability of at least k hits with coco",
        xlabel="k",
        ylabel="prob",
        num_xticks=len(k),
        use_darkbackground=True,
    )
示例#2
0
def land_stats():
    """
    http://stattrek.com/online-calculator/hypergeometric.aspx

    CommandLine:
        python -m mtgmonte.stats --exec-land_stats --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from mtgmonte.stats import *  # NOQA
        >>> result = land_stats()
        >>> print(result)
        >>> ut.show_if_requested()
    """
    import plottool as pt
    from scipy.stats import hypergeom

    N = pop_size = 60  # cards in deck  # NOQA
    # K = num_success = 25  # lands in deck  # NOQA
    n = sample_size = 6  # cards seen by coco  # NOQA

    # prob of at least that many hits

    def prob_ge(k, prb):
        return (1 - prb.cdf(k)) + prb.pmf(k)  # P(X >= k)

    pt.ensure_pylab_qt4()

    N = deck_size = 60  # NOQA
    land_range = (24, 27 + 1)

    # N = deck_size = 40  # NOQA
    # land_range = (15, 18 + 1)

    xdata = range(0, 15)  # turn
    ydata_list = [[hypergeom(N, K, x + 7).expect() for x in xdata] for K in range(*land_range)]
    spread_list = [[hypergeom(N, K, x + 7).std() for x in xdata] for K in range(*land_range)]
    # spread_list = None
    import numpy as np

    label_list = ["%d lands" % (K,) for K in range(*land_range)]
    pt.multi_plot(
        xdata, ydata_list, spread_list=spread_list, label_list=label_list, num_xticks=15, num_yticks=13, fnum=1
    )
    min_lands_acceptable = np.minimum(np.array(xdata), [1, 2, 3, 4, 5, 6] + [6] * (len(xdata) - 6))
    pt.multi_plot(
        xdata,
        [min_lands_acceptable, (np.array(xdata) ** 0.9) * 0.5 + 4],
        label_list=["minimum ok", "maximum ok"],
        num_xticks=15,
        num_yticks=13,
        fnum=1,
        marker="o",
    )
示例#3
0
文件: learn.py 项目: dilas12345/local
def ewma():
    import plottool as pt
    import ubelt as ub
    import numpy as np
    pt.qtensure()

    # Investigate the span parameter
    span = 20
    alpha = 2 / (span + 1)

    # how long does it take for the estimation to hit 0?
    # (ie, it no longer cares about the initial 1?)
    # about 93 iterations to get to 1e-4
    # about 47 iterations to get to 1e-2
    # about 24 iterations to get to 1e-1
    # 20 iterations goes to .135
    data = ([1] + [0] * 20 + [1] * 40 + [0] * 20 + [1] * 50 + [0] * 20 +
            [1] * 60 + [0] * 20 + [1] * 165 + [0] * 20 + [0])
    mave = []

    iter_ = iter(data)
    current = next(iter_)
    mave += [current]
    for x in iter_:
        current = (alpha * x) + (1 - alpha) * current
        mave += [current]

    if False:
        pt.figure(fnum=1, doclf=True)
        pt.plot(data)
        pt.plot(mave)

    np.where(np.array(mave) < 1e-1)

    import sympy as sym

    # span, alpha, n = sym.symbols('span, alpha, n')
    n = sym.symbols('n', integer=True, nonnegative=True, finite=True)
    span = sym.symbols('span', integer=True, nonnegative=True, finite=True)
    thresh = sym.symbols('thresh', real=True, nonnegative=True, finite=True)
    # alpha = 2 / (span + 1)

    a, b, c = sym.symbols('a, b, c', real=True, nonnegative=True, finite=True)
    sym.solve(sym.Eq(b**a, c), a)

    current = 1
    x = 0
    steps = []
    for _ in range(10):
        current = (alpha * x) + (1 - alpha) * current
        steps.append(current)

    alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True)
    base = sym.symbols('base', real=True, finite=True)
    alpha = 2 / (span + 1)
    thresh_expr = (1 - alpha)**n
    thresthresh_exprh_expr = base**n
    n_expr = sym.ceiling(sym.log(thresh) / sym.log(1 - 2 / (span + 1)))

    sym.pprint(sym.simplify(thresh_expr))
    sym.pprint(sym.simplify(n_expr))
    print(sym.latex(sym.simplify(n_expr)))

    # def calc_n2(span, thresh):
    #     return np.log(thresh) / np.log(1 - 2 / (span + 1))

    def calc_n(span, thresh):
        return np.log(thresh) / np.log((span - 1) / (span + 1))

    def calc_thresh_val(n, span):
        alpha = 2 / (span + 1)
        return (1 - alpha)**n

    span = np.arange(2, 200)
    n_frac = calc_n(span, thresh=.5)
    n = np.ceil(n_frac)
    calc_thresh_val(n, span)

    pt.figure(fnum=1, doclf=True)
    ydatas = ut.odict([('thresh=%f' % thresh,
                        np.ceil(calc_n(span, thresh=thresh)))
                       for thresh in [1e-3, .01, .1, .2, .3, .4, .5]])
    pt.multi_plot(
        span,
        ydatas,
        xlabel='span',
        ylabel='n iters to acheive thresh',
        marker='',
        # num_xticks=len(span),
        fnum=1)
    pt.gca().set_aspect('equal')

    def both_sides(eqn, func):
        return sym.Eq(func(eqn.lhs), func(eqn.rhs))

    eqn = sym.Eq(thresh_expr, thresh)
    n_expr = sym.solve(eqn,
                       n)[0].subs(base,
                                  (1 - alpha)).subs(alpha, (2 / (span + 1)))

    eqn = both_sides(eqn, lambda x: sym.log(x, (1 - alpha)))
    lhs = eqn.lhs

    from sympy.solvers.inequalities import solve_univariate_inequality

    def eval_expr(span_value, n_value):
        return np.array(
            [thresh_expr.subs(span, span_value).subs(n, n_) for n_ in n_value],
            dtype=np.float)

    eval_expr(20, np.arange(20))

    def linear(x, a, b):
        return a * x + b

    def sigmoidal_4pl(x, a, b, c, d):
        return d + (a - d) / (1 + (x / c)**b)

    def exponential(x, a, b, c):
        return a + b * np.exp(-c * x)

    import scipy.optimize

    # Determine how to choose span, such that you get to .01 from 1
    # in n timesteps
    thresh_to_span_to_n = []
    thresh_to_n_to_span = []
    for thresh_value in ub.ProgIter([.0001, .001, .01, .1, .2, .3, .4, .5]):
        print('')
        test_vals = sorted([2, 3, 4, 5, 6])
        n_to_span = []
        for n_value in ub.ProgIter(test_vals):
            # In n iterations I want to choose a span that the expression go
            # less than a threshold
            constraint = thresh_expr.subs(n, n_value) < thresh_value
            solution = solve_univariate_inequality(constraint, span)
            try:
                lowbound = np.ceil(float(solution.args[0].lhs))
                highbound = np.floor(float(solution.args[1].rhs))
                assert lowbound <= highbound
                span_value = lowbound
            except AttributeError:
                span_value = np.floor(float(solution.rhs))
            n_to_span.append((n_value, span_value))

        # Given a threshold, find a minimum number of steps
        # that brings you up to that threshold given a span
        test_vals = sorted(set(list(range(2, 1000, 50)) + [2, 3, 4, 5, 6]))
        span_to_n = []
        for span_value in ub.ProgIter(test_vals):
            constraint = thresh_expr.subs(span, span_value) < thresh_value
            solution = solve_univariate_inequality(constraint, n)
            n_value = solution.lhs
            span_to_n.append((span_value, n_value))

        thresh_to_n_to_span.append((thresh_value, n_to_span))
        thresh_to_span_to_n.append((thresh_value, span_to_n))

    thresh_to_params = []
    for thresh_value, span_to_n in thresh_to_span_to_n:
        xdata, ydata = [np.array(_, dtype=np.float) for _ in zip(*span_to_n)]

        p0 = (1 / np.diff((ydata - ydata[0])[1:]).mean(), ydata[0])
        func = linear
        popt, pcov = scipy.optimize.curve_fit(func, xdata, ydata, p0)
        # popt, pcov = scipy.optimize.curve_fit(exponential, xdata, ydata)

        if False:
            yhat = func(xdata, *popt)
            pt.figure(fnum=1, doclf=True)
            pt.plot(xdata, ydata, label='measured')
            pt.plot(xdata, yhat, label='predicteed')
            pt.legend()
        # slope = np.diff(ydata).mean()
        # pt.plot(d)
        thresh_to_params.append((thresh_value, popt))

    # pt.plt.plot(*zip(*thresh_to_slope), 'x-')

    # for thresh_value=.01, we get a rough line with slop ~2.302,
    # for thresh_value=.5, we get a line with slop ~34.66

    # if we want to get to 0 in n timesteps, with a thresh_value of
    # choose span=f(thresh_value) * (n + 2))
    # f is some inverse exponential

    # 0.0001, 460.551314197147
    # 0.001, 345.413485647860,
    # 0.01, 230.275657098573,
    # 0.1, 115.137828549287,
    # 0.2, 80.4778885203347,
    # 0.3, 60.2031233261536,
    # 0.4, 45.8179484913827,
    # 0.5, 34.6599400289520

    # Seems to be 4PL symetrical sigmoid
    # f(x) = -66500.85 + (66515.88 - -66500.85) / (1 + (x/0.8604672)^0.001503716)
    # f(x) = -66500.85 + (66515.88 - -66500.85)/(1 + (x/0.8604672)^0.001503716)

    def f(x):
        return -66500.85 + (66515.88 -
                            -66500.85) / (1 + (x / 0.8604672)**0.001503716)
        # return (10000 * (-6.65 + (13.3015) / (1 + (x/0.86) ** 0.00150)))

    # f(.5) * (n - 1)

    # f(
    solve_rational_inequalities(thresh_expr < .01, n)
示例#4
0
def demo_refresh():
    r"""
    CommandLine:
        python -m ibeis.algo.graph.refresh demo_refresh \
                --num_pccs=40 --size=2 --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.graph.refresh import *  # NOQA
        >>> demo_refresh()
        >>> ut.show_if_requested()
    """
    from ibeis.algo.graph import demo
    demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4})
    refreshkw = ut.argparse_funckw(RefreshCriteria)
    # make an inference object
    infr = demo.demodata_infr(size_std=0, **demokw)
    edges = list(infr.dummy_verif.find_candidate_edges(K=100))
    scores = np.array(infr.dummy_verif.predict_edges(edges))
    sortx = scores.argsort()[::-1]
    edges = ut.take(edges, sortx)
    scores = scores[sortx]
    ys = infr.match_state_df(edges)[POSTV].values
    y_remainsum = ys[::-1].cumsum()[::-1]
    # Do oracle reviews and wait to converge
    refresh = RefreshCriteria(**refreshkw)
    xdata = []
    pprob_any = []
    rfrac_any = []
    for count, (edge, y) in enumerate(zip(edges, ys)):
        refresh.add(y, user_id='user:oracle')
        rfrac_any.append(y_remainsum[count] / y_remainsum[0])
        pprob_any.append(refresh.prob_any_remain())
        xdata.append(count + 1)
        if refresh.check():
            break
    xdata = xdata
    ydatas = ut.odict([
        ('Est. probability any remain', pprob_any),
        ('Fraction remaining', rfrac_any),
    ])

    ut.quit_if_noshow()
    import plottool as pt
    pt.qtensure()
    from ibeis.scripts.thesis import TMP_RC
    import matplotlib as mpl
    mpl.rcParams.update(TMP_RC)
    pt.multi_plot(
        xdata, ydatas, xlabel='# manual reviews', rcParams=TMP_RC, marker='',
        ylim=(0, 1), use_legend=False,
    )
    demokw = ut.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'},
                         demokw)
    thresh = refreshkw.pop('thresh')
    refreshkw['span'] = refreshkw.pop('window')
    pt.relative_text((.02, .58 + .0), ut.get_cfg_lbl(demokw, sep=' ')[1:],
                     valign='bottom')
    pt.relative_text((.02, .68 + .0), ut.get_cfg_lbl(refreshkw, sep=' ')[1:],
                     valign='bottom')
    legend = pt.gca().legend()
    legend.get_frame().set_alpha(1.0)
    pt.plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')
示例#5
0
def limited_power_toughness_histogram():
    r"""
    CommandLine:
        python -m mtgmonte.stats --exec-limited_power_toughness_histogram --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from mtgmonte.stats import *  # NOQA
        >>> result = limited_power_toughness_histogram()
        >>> print(result)
        >>> ut.show_if_requested()
    """
    from mtgmonte import mtgobjs
    from mtglib.gatherer_request import SearchRequest
    from mtglib.card_extractor import CardExtractor

    # from mtglib.card_renderer import CardList
    request = SearchRequest({"set": "Oath of the Gatewatch"})

    def add_page(url, page):
        parts = url.split("/")
        part1 = "/".join(parts[:-1])
        part2 = "/Default.aspx?page=%d&" % (page,)
        part3 = parts[-1].replace("Default.aspx?", "")
        url2 = part1 + part2 + part3
        return url2

    card_list = []
    for page in range(0, 10):
        url = request.url
        url2 = add_page(url, page)
        extract = CardExtractor(url2)
        card_list0 = extract.cards

        for card in card_list0:
            card2 = mtgobjs.Card2()
            card2.__dict__.update(card.__dict__)
            card_list.append(card2)

        if len(card_list0) != 100:
            break

    for c in card_list:
        c.nice_attrs += ["rarity"]

    creats = [_card2 for _card2 in card_list if "Creature" in card2.types]
    creats = [_card2 for _card2 in creats if _card2.rarity in ["Common", "Uncommon"]]

    powtough = []

    for c in creats:
        try:
            powtough.append((int(c.power), int(c.toughness)))
        except ValueError:
            pass

    import plottool as pt

    pt.ensure_pylab_qt4()
    import numpy as np

    scores_list = np.array(list(zip(*powtough)))
    xdata = np.arange(0, np.max(scores_list) + 1)
    powhist = np.histogram(scores_list[0], bins=xdata)[0]
    toughist = np.histogram(scores_list[1], bins=xdata)[0]
    pt.multi_plot(xdata, [powhist, toughist], label_list=["power", "toughness"], kind="bar")

    bothhist = ut.dict_hist(powtough)
    xdata = np.arange(len(bothhist))
    dat = sorted(bothhist.items())
    xticklabels = ut.take_column(dat, 0)
    ydata = ut.take_column(dat, 1)

    pt.multi_plot(xdata, [ydata], xticklabels=xticklabels, kind="bar")