def coco_stats(): """ http://stattrek.com/online-calculator/hypergeometric.aspx CommandLine: python -m mtgmonte.stats --exec-coco_stats --show Example: >>> # DISABLE_DOCTEST >>> from mtgmonte.stats import * # NOQA >>> result = coco_stats() >>> print(result) >>> ut.show_if_requested() """ import plottool as pt from scipy.stats import hypergeom N = pop_size = 60 # cards in deck # NOQA K = num_success = 21 # number of creatures in deck # NOQA n = sample_size = 6 # cards seen by coco # NOQA # prob of at least that many hits hypergeom prb = hypergeom(N, K, n) k = number_of_success = 1 # number of hits you want # NOQA prb.pmf(k) # P(X = k) # prb.cdf(k) # P(X <= k) 1 - prb.cdf(k) # P(X > k) (1 - prb.cdf(k)) + prb.pmf(k) # P(X >= k) def prob_ge(k, prb=prb): return (1 - prb.cdf(k)) + prb.pmf(k) # P(X >= k) pt.ensure_pylab_qt4() import numpy as np k = np.arange(1, 3) K_list = np.arange(15, 30) label_list = [str(K_) + " creatures in deck" for K_ in K_list] ydata_list = [prob_ge(k, prb=hypergeom(N, K_, n)) for K_ in K_list] pt.multi_plot( k, ydata_list, label_list=label_list, title="probability of at least k hits with coco", xlabel="k", ylabel="prob", num_xticks=len(k), use_darkbackground=True, )
def land_stats(): """ http://stattrek.com/online-calculator/hypergeometric.aspx CommandLine: python -m mtgmonte.stats --exec-land_stats --show Example: >>> # DISABLE_DOCTEST >>> from mtgmonte.stats import * # NOQA >>> result = land_stats() >>> print(result) >>> ut.show_if_requested() """ import plottool as pt from scipy.stats import hypergeom N = pop_size = 60 # cards in deck # NOQA # K = num_success = 25 # lands in deck # NOQA n = sample_size = 6 # cards seen by coco # NOQA # prob of at least that many hits def prob_ge(k, prb): return (1 - prb.cdf(k)) + prb.pmf(k) # P(X >= k) pt.ensure_pylab_qt4() N = deck_size = 60 # NOQA land_range = (24, 27 + 1) # N = deck_size = 40 # NOQA # land_range = (15, 18 + 1) xdata = range(0, 15) # turn ydata_list = [[hypergeom(N, K, x + 7).expect() for x in xdata] for K in range(*land_range)] spread_list = [[hypergeom(N, K, x + 7).std() for x in xdata] for K in range(*land_range)] # spread_list = None import numpy as np label_list = ["%d lands" % (K,) for K in range(*land_range)] pt.multi_plot( xdata, ydata_list, spread_list=spread_list, label_list=label_list, num_xticks=15, num_yticks=13, fnum=1 ) min_lands_acceptable = np.minimum(np.array(xdata), [1, 2, 3, 4, 5, 6] + [6] * (len(xdata) - 6)) pt.multi_plot( xdata, [min_lands_acceptable, (np.array(xdata) ** 0.9) * 0.5 + 4], label_list=["minimum ok", "maximum ok"], num_xticks=15, num_yticks=13, fnum=1, marker="o", )
def ewma(): import plottool as pt import ubelt as ub import numpy as np pt.qtensure() # Investigate the span parameter span = 20 alpha = 2 / (span + 1) # how long does it take for the estimation to hit 0? # (ie, it no longer cares about the initial 1?) # about 93 iterations to get to 1e-4 # about 47 iterations to get to 1e-2 # about 24 iterations to get to 1e-1 # 20 iterations goes to .135 data = ([1] + [0] * 20 + [1] * 40 + [0] * 20 + [1] * 50 + [0] * 20 + [1] * 60 + [0] * 20 + [1] * 165 + [0] * 20 + [0]) mave = [] iter_ = iter(data) current = next(iter_) mave += [current] for x in iter_: current = (alpha * x) + (1 - alpha) * current mave += [current] if False: pt.figure(fnum=1, doclf=True) pt.plot(data) pt.plot(mave) np.where(np.array(mave) < 1e-1) import sympy as sym # span, alpha, n = sym.symbols('span, alpha, n') n = sym.symbols('n', integer=True, nonnegative=True, finite=True) span = sym.symbols('span', integer=True, nonnegative=True, finite=True) thresh = sym.symbols('thresh', real=True, nonnegative=True, finite=True) # alpha = 2 / (span + 1) a, b, c = sym.symbols('a, b, c', real=True, nonnegative=True, finite=True) sym.solve(sym.Eq(b**a, c), a) current = 1 x = 0 steps = [] for _ in range(10): current = (alpha * x) + (1 - alpha) * current steps.append(current) alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True) base = sym.symbols('base', real=True, finite=True) alpha = 2 / (span + 1) thresh_expr = (1 - alpha)**n thresthresh_exprh_expr = base**n n_expr = sym.ceiling(sym.log(thresh) / sym.log(1 - 2 / (span + 1))) sym.pprint(sym.simplify(thresh_expr)) sym.pprint(sym.simplify(n_expr)) print(sym.latex(sym.simplify(n_expr))) # def calc_n2(span, thresh): # return np.log(thresh) / np.log(1 - 2 / (span + 1)) def calc_n(span, thresh): return np.log(thresh) / np.log((span - 1) / (span + 1)) def calc_thresh_val(n, span): alpha = 2 / (span + 1) return (1 - alpha)**n span = np.arange(2, 200) n_frac = calc_n(span, thresh=.5) n = np.ceil(n_frac) calc_thresh_val(n, span) pt.figure(fnum=1, doclf=True) ydatas = ut.odict([('thresh=%f' % thresh, np.ceil(calc_n(span, thresh=thresh))) for thresh in [1e-3, .01, .1, .2, .3, .4, .5]]) pt.multi_plot( span, ydatas, xlabel='span', ylabel='n iters to acheive thresh', marker='', # num_xticks=len(span), fnum=1) pt.gca().set_aspect('equal') def both_sides(eqn, func): return sym.Eq(func(eqn.lhs), func(eqn.rhs)) eqn = sym.Eq(thresh_expr, thresh) n_expr = sym.solve(eqn, n)[0].subs(base, (1 - alpha)).subs(alpha, (2 / (span + 1))) eqn = both_sides(eqn, lambda x: sym.log(x, (1 - alpha))) lhs = eqn.lhs from sympy.solvers.inequalities import solve_univariate_inequality def eval_expr(span_value, n_value): return np.array( [thresh_expr.subs(span, span_value).subs(n, n_) for n_ in n_value], dtype=np.float) eval_expr(20, np.arange(20)) def linear(x, a, b): return a * x + b def sigmoidal_4pl(x, a, b, c, d): return d + (a - d) / (1 + (x / c)**b) def exponential(x, a, b, c): return a + b * np.exp(-c * x) import scipy.optimize # Determine how to choose span, such that you get to .01 from 1 # in n timesteps thresh_to_span_to_n = [] thresh_to_n_to_span = [] for thresh_value in ub.ProgIter([.0001, .001, .01, .1, .2, .3, .4, .5]): print('') test_vals = sorted([2, 3, 4, 5, 6]) n_to_span = [] for n_value in ub.ProgIter(test_vals): # In n iterations I want to choose a span that the expression go # less than a threshold constraint = thresh_expr.subs(n, n_value) < thresh_value solution = solve_univariate_inequality(constraint, span) try: lowbound = np.ceil(float(solution.args[0].lhs)) highbound = np.floor(float(solution.args[1].rhs)) assert lowbound <= highbound span_value = lowbound except AttributeError: span_value = np.floor(float(solution.rhs)) n_to_span.append((n_value, span_value)) # Given a threshold, find a minimum number of steps # that brings you up to that threshold given a span test_vals = sorted(set(list(range(2, 1000, 50)) + [2, 3, 4, 5, 6])) span_to_n = [] for span_value in ub.ProgIter(test_vals): constraint = thresh_expr.subs(span, span_value) < thresh_value solution = solve_univariate_inequality(constraint, n) n_value = solution.lhs span_to_n.append((span_value, n_value)) thresh_to_n_to_span.append((thresh_value, n_to_span)) thresh_to_span_to_n.append((thresh_value, span_to_n)) thresh_to_params = [] for thresh_value, span_to_n in thresh_to_span_to_n: xdata, ydata = [np.array(_, dtype=np.float) for _ in zip(*span_to_n)] p0 = (1 / np.diff((ydata - ydata[0])[1:]).mean(), ydata[0]) func = linear popt, pcov = scipy.optimize.curve_fit(func, xdata, ydata, p0) # popt, pcov = scipy.optimize.curve_fit(exponential, xdata, ydata) if False: yhat = func(xdata, *popt) pt.figure(fnum=1, doclf=True) pt.plot(xdata, ydata, label='measured') pt.plot(xdata, yhat, label='predicteed') pt.legend() # slope = np.diff(ydata).mean() # pt.plot(d) thresh_to_params.append((thresh_value, popt)) # pt.plt.plot(*zip(*thresh_to_slope), 'x-') # for thresh_value=.01, we get a rough line with slop ~2.302, # for thresh_value=.5, we get a line with slop ~34.66 # if we want to get to 0 in n timesteps, with a thresh_value of # choose span=f(thresh_value) * (n + 2)) # f is some inverse exponential # 0.0001, 460.551314197147 # 0.001, 345.413485647860, # 0.01, 230.275657098573, # 0.1, 115.137828549287, # 0.2, 80.4778885203347, # 0.3, 60.2031233261536, # 0.4, 45.8179484913827, # 0.5, 34.6599400289520 # Seems to be 4PL symetrical sigmoid # f(x) = -66500.85 + (66515.88 - -66500.85) / (1 + (x/0.8604672)^0.001503716) # f(x) = -66500.85 + (66515.88 - -66500.85)/(1 + (x/0.8604672)^0.001503716) def f(x): return -66500.85 + (66515.88 - -66500.85) / (1 + (x / 0.8604672)**0.001503716) # return (10000 * (-6.65 + (13.3015) / (1 + (x/0.86) ** 0.00150))) # f(.5) * (n - 1) # f( solve_rational_inequalities(thresh_expr < .01, n)
def demo_refresh(): r""" CommandLine: python -m ibeis.algo.graph.refresh demo_refresh \ --num_pccs=40 --size=2 --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.graph.refresh import * # NOQA >>> demo_refresh() >>> ut.show_if_requested() """ from ibeis.algo.graph import demo demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4}) refreshkw = ut.argparse_funckw(RefreshCriteria) # make an inference object infr = demo.demodata_infr(size_std=0, **demokw) edges = list(infr.dummy_verif.find_candidate_edges(K=100)) scores = np.array(infr.dummy_verif.predict_edges(edges)) sortx = scores.argsort()[::-1] edges = ut.take(edges, sortx) scores = scores[sortx] ys = infr.match_state_df(edges)[POSTV].values y_remainsum = ys[::-1].cumsum()[::-1] # Do oracle reviews and wait to converge refresh = RefreshCriteria(**refreshkw) xdata = [] pprob_any = [] rfrac_any = [] for count, (edge, y) in enumerate(zip(edges, ys)): refresh.add(y, user_id='user:oracle') rfrac_any.append(y_remainsum[count] / y_remainsum[0]) pprob_any.append(refresh.prob_any_remain()) xdata.append(count + 1) if refresh.check(): break xdata = xdata ydatas = ut.odict([ ('Est. probability any remain', pprob_any), ('Fraction remaining', rfrac_any), ]) ut.quit_if_noshow() import plottool as pt pt.qtensure() from ibeis.scripts.thesis import TMP_RC import matplotlib as mpl mpl.rcParams.update(TMP_RC) pt.multi_plot( xdata, ydatas, xlabel='# manual reviews', rcParams=TMP_RC, marker='', ylim=(0, 1), use_legend=False, ) demokw = ut.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'}, demokw) thresh = refreshkw.pop('thresh') refreshkw['span'] = refreshkw.pop('window') pt.relative_text((.02, .58 + .0), ut.get_cfg_lbl(demokw, sep=' ')[1:], valign='bottom') pt.relative_text((.02, .68 + .0), ut.get_cfg_lbl(refreshkw, sep=' ')[1:], valign='bottom') legend = pt.gca().legend() legend.get_frame().set_alpha(1.0) pt.plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')
def limited_power_toughness_histogram(): r""" CommandLine: python -m mtgmonte.stats --exec-limited_power_toughness_histogram --show Example: >>> # DISABLE_DOCTEST >>> from mtgmonte.stats import * # NOQA >>> result = limited_power_toughness_histogram() >>> print(result) >>> ut.show_if_requested() """ from mtgmonte import mtgobjs from mtglib.gatherer_request import SearchRequest from mtglib.card_extractor import CardExtractor # from mtglib.card_renderer import CardList request = SearchRequest({"set": "Oath of the Gatewatch"}) def add_page(url, page): parts = url.split("/") part1 = "/".join(parts[:-1]) part2 = "/Default.aspx?page=%d&" % (page,) part3 = parts[-1].replace("Default.aspx?", "") url2 = part1 + part2 + part3 return url2 card_list = [] for page in range(0, 10): url = request.url url2 = add_page(url, page) extract = CardExtractor(url2) card_list0 = extract.cards for card in card_list0: card2 = mtgobjs.Card2() card2.__dict__.update(card.__dict__) card_list.append(card2) if len(card_list0) != 100: break for c in card_list: c.nice_attrs += ["rarity"] creats = [_card2 for _card2 in card_list if "Creature" in card2.types] creats = [_card2 for _card2 in creats if _card2.rarity in ["Common", "Uncommon"]] powtough = [] for c in creats: try: powtough.append((int(c.power), int(c.toughness))) except ValueError: pass import plottool as pt pt.ensure_pylab_qt4() import numpy as np scores_list = np.array(list(zip(*powtough))) xdata = np.arange(0, np.max(scores_list) + 1) powhist = np.histogram(scores_list[0], bins=xdata)[0] toughist = np.histogram(scores_list[1], bins=xdata)[0] pt.multi_plot(xdata, [powhist, toughist], label_list=["power", "toughness"], kind="bar") bothhist = ut.dict_hist(powtough) xdata = np.arange(len(bothhist)) dat = sorted(bothhist.items()) xticklabels = ut.take_column(dat, 0) ydata = ut.take_column(dat, 1) pt.multi_plot(xdata, [ydata], xticklabels=xticklabels, kind="bar")