示例#1
0
        deck_50_pct[name] = mask.sum() / len(mask)
        print("Deck {0}={2}: {1} matches, {3}%".format(ii, mask.sum(), name,
                                                       mask.sum() / len(mask)))
        deck_ids[name] = ii
        #pd['Archetype'][mask] = name
    #print(deck[-20:])

print(len(deck_50_pct), deck_50_pct)

final = pandas.DataFrame.from_dict(list(deck_50_pct.items()))
final.sort_values(by=1, inplace=True)
print(final.sort_values(by=1))

timestep = 3
week_starts = [
    day for day in daterange(start_date, datetime.date.today(), step=timestep)
]

weekly_summary = pandas.DataFrame(index=week_starts, columns=deck_guess.keys())

dates = pandas.to_datetime(pd.Date.str.decode('ascii'))

for week_start in week_starts:
    week_end = week_start + datetime.timedelta(timestep)
    date_matches = (dates >= week_start) & (dates < week_end)
    for deck in deck_guess:
        deck_matches = (pd.Archetype == deck) & date_matches
        weekly_summary[deck][week_start] = deck_matches.sum(
        ) / date_matches.sum()

weekly_summary.plot(style=[
    pd['Losses'] = [int(losses) for wins, losses, ties in records]
    pd['Ties'] = [int(ties) for wins, losses, ties in records]
    pd['Matches'] = pd['Wins'] + pd['Losses'] + pd['Ties']

    for archetype in set(pd['Archetype']):
        Wins = pd['Wins'][pd['Archetype'] == archetype].sum()
        Losses = pd['Losses'][pd['Archetype'] == archetype].sum()
        Matches = pd['Matches'][pd['Archetype'] == archetype].sum()
        print(
            "{3:30s} Win pct: {4:6.2f} Total wins: {0:3d} losses: {1:3d} matches: {2:3d}"
            .format(Wins, Losses, Matches, archetype, Wins / Matches))

    timestep = 7
    week_starts = [
        day for day in daterange(datetime.date(year=2016, month=10, day=1),
                                 datetime.date.today(),
                                 step=timestep)
    ]

    undefeatedfraction_weekly_summary = pandas.DataFrame(
        index=week_starts, columns=deck_guess.keys())

    dates = pandas.to_datetime([x.decode() for x in pd.Date])
    dates = np.array([
        datetime.date(year=2000 + int(x[6:8]),
                      month=int(x[3:5]),
                      day=int(x[0:2])) for x in pd.Date
    ])

    for week_start in week_starts:
        week_end = week_start + datetime.timedelta(timestep)
示例#3
0
def do_clustering(
    alldecks,
    prefix="MTGTOP8",
    deck_guess=KLD_deck_guess,
    start_date=datetime.date(year=2016, month=10, day=1),
    timestep=7,
    standard_set_savename='data/standard_legal.json',
    forced_legal=[],
    card_to_check=None,
):

    standard_legal = [
        x.lower() for x in get_standard_legal(savename=standard_set_savename,
                                              forced_legal=forced_legal)
    ]
    if card_to_check is not None:
        assert card_to_check in standard_legal

    card_namespace = set(card.lower() for dailies in alldecks.values()
                         for deck in dailies.values()
                         for card in deck['mainboard']
                         if card.lower() in standard_legal)

    lowercase_decks = {
        date: {
            key: {
                board:
                ({(card.lower() if '/' not in card else
                   card.lower().split("/")[0].strip()): deck[board][card]
                  for card in deck[board]}
                 if board not in ('eventid', 'record') else deck[board])
                for board in deck
            }
            for key, deck in results.items()
        }
        for date, results in alldecks.items()
    }

    legaldecks = {
        date: results
        for date, results in lowercase_decks.items()
        if not any(card.lower() not in standard_legal
                   for deck in results.values() for card in deck['mainboard'])
    }
    illegalcards = {
        date: {
            user: ([
                card for card in deck['mainboard']
                if card.lower() not in standard_legal
            ], deck['eventid'])
            for user, deck in results.items() if any([
                card for card in deck['mainboard']
                if card.lower() not in standard_legal
            ])
        }
        for date, results in lowercase_decks.items()
        if any(card.lower() not in standard_legal for deck in results.values()
               for card in deck['mainboard'])
    }

    deckcount = sum(len(x) for x in legaldecks.values())
    print("Found {0} decks".format(deckcount))

    array = np.recarray(deckcount,
                        dtype=([('ID', 'S40'), ('Date', 'S10'),
                                ('EventID', int), ("Archetype", "S25")] +
                               [(name, np.int16) for name in card_namespace]))

    ii = 0
    for date, daily in legaldecks.items():
        for deckname, deck in daily.items():
            array['ID'][ii] = deckname.encode('ascii', errors='replace')
            array['Date'][ii] = date
            array['EventID'][ii] = deck['eventid']

            for card in card_namespace:
                if card in deck['mainboard']:
                    array[card][ii] = int(deck['mainboard'][card]) + 10
                else:
                    array[card][ii] = 0

            ii = ii + 1

    pd = pandas.DataFrame(array)

    # get rid of the id column and date column
    justdata = pd.T[4:]

    # import skfuzzy as fuzz
    # # Set up the loop and plot
    # #fig1, axes1 = pl.subplots(3, 3, figsize=(8, 8))
    # fpcs = []
    #
    #
    # for ncenters in range(2,20):
    #     cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    #         data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None)
    #
    #     # Store fpc values for later
    #     fpcs.append(fpc)
    #
    # #    # Plot assigned clusters, for each data point in training set
    #     cluster_membership = np.argmax(u, axis=0)
    #
    #     print()
    #     print("Nclusters = {0}".format(ncenters))
    #     print()
    #
    #     for ii in range(ncenters):
    #         mask = cluster_membership==ii
    #         deck = (justdata.T[mask] > 0).sum(axis=0)
    #         deck.sort_values(inplace=True)
    #         if any(k in deck.keys()[-10:] for k in easy_decks):
    #             for k in easy_decks:
    #                 if k in deck.keys()[-10:]:
    #                     name = k
    #             print("Deck {0}={2}: {1} matches, {3}%".format(ii, mask.sum(), name, mask.sum()/len(mask)))
    #         else:
    #             print("Deck {0}: {1} matches, {2}%".format(ii, mask.sum(), mask.sum()/len(mask)))
    #         print(deck[-10:])
    # #    for j in range(ncenters):
    # #        ax.plot(xpts[cluster_membership == j],
    # #                ypts[cluster_membership == j], '.', color=colors[j])
    # #
    # #    # Mark the center of each fuzzy cluster
    # #    for pt in cntr:
    # #        ax.plot(pt[0], pt[1], 'rs')
    # #
    # #    ax.set_title('Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
    # #    ax.axis('off')
    # #
    # #fig1.tight_layout()
    #
    # fig2, ax2 = pl.subplots()
    # ax2.plot(range(2,20), fpcs)
    # ax2.set_xlabel("Number of centers")
    # ax2.set_ylabel("Fuzzy partition coefficient")

    distortions = []

    deck_class = {
        'Panharmonicon': ['Panharmonicon'],
        'Metalwork Colossus': ['Metalwork Colossus'],
        'Aetherworks Marvel': ['Aetherworks Marvel'],
        #'BG Delirium Aggro': ['Grim Flayer', ],
        'BG Delirium Control': [
            "Liliana, the Last Hope", 'Grim Flayer', 'Ishkanah, Grafwidow',
            'Grasp of Darkness', 'Vessel of Nascency', 'Noxious Gearhulk',
            'Ruinous Path'
        ],
        'UW Flash': [
            'Reflector Mage', "Smuggler's Copter", "Thraben Inspector",
            "Prairie Stream"
        ],
        'Bux Graveyard':
        ['Haunted Dead', 'Prized Amalgam', 'Voldaren Pariah', 'Cryptbreaker'],
        'RW Vehicle Aggro': [
            "Smuggler's Copter", "Inspiring Vantage", "Pia Nalaar",
            "Toolcraft Exemplar", "Thraben Inspector"
        ],
        'Mardu Vehicle Aggro': [
            "Concealed Courtyard", "Scrapheap Scrounger", "Smuggler's Copter",
            "Inspiring Vantage", "Toolcraft Exemplar", "Thraben Inspector"
        ],
        'Wx Humans': [
            "Thalia's Lieutenant", "Thraben Inspector", "Town Gossipmonger",
            "Expedition Envoy", "Always Watching"
        ],  # has a dwarf?!
        'RG Energy Aggro': [
            'Servant of the Conduit', 'Attune with Aether', 'Longtusk Cub',
            'Voltaic Brawler', 'Bristling Hydra'
        ],
        'Grixis Graveyard Emerge': [
            'Elder Deep-Fiend', "Kozilek's Return", "Prized Amalgam",
            "Cathartic Reunion", "Haunted Dead", "Wretched Gryff"
        ],
        'RG Pummeler': [
            "Electrostatic Pummeler", 'Servant of the Conduit',
            'Attune with Aether', "Blossoming Defense", "Built to Smash"
        ],
        'RB Aggro':
        ['Fiery Temper', 'Bomat Courier', 'Unlicensed Disintegration'],
        'UR Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Harnessed Lightning',
            'Spirebluff Canal', 'Wandering Fumarole'
        ],
        'UW Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Immolating Glare',
            'Blessed Alliance'
        ],
        'UB Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Grasp of Darkness',
            'Liliana, the Last Hope', 'Sunken Hollow'
        ],
    }

    easy_decks = ('Panharmonicon', 'Metalwork Colossus', 'Aetherworks Marvel')

    # +1 for the "others"
    guess_array = np.zeros([len(deck_guess) + 1, justdata.shape[0]])
    for ii, (deckname, deck) in enumerate(deck_guess.items()):
        for card in deck:
            if card.lower() not in pd.columns:
                print(
                    "Deck {0} is not represented in the meta".format(deckname))
                #raise ValueError("Card {0} is not real".format(card))
            guess_array[ii,
                        justdata.T.keys() == card.lower()] = deck[card] + 10

    codebook, distortion = scipy.cluster.vq.kmeans(
        np.array(justdata.T, 'float'), guess_array)
    code, dist = scipy.cluster.vq.vq(np.array(justdata.T, 'int'), codebook)
    #cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    #    data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None)

    pd['Distortion'] = dist

    # Store fpc values for later
    distortions.append(distortion)

    cluster_membership = code

    deck_50_pct = {}
    deck_counts = {}
    deck_top20s = {}
    deck_ids = {}

    def top20_match(x):
        x = list(x)
        if len(x) == 0:
            return 0
        return np.sum(x) / len(x)

    for ii in range(len(deck_guess) + 1):
        mask = cluster_membership == ii
        deck = (justdata.T[mask] > 0).sum(axis=0)
        deck.sort_values(inplace=True)

        deck_top20s[ii] = deck

        name = None
        card_match_fraction = {
            dk:
            top20_match(k.lower() in
                        [this_key.lower() for this_key in deck.keys()[-20:]]
                        for k in ks)
            for dk, ks in deck_guess.items()
        }
        bestfrac = 0
        for dk, frac in card_match_fraction.items():
            if frac > bestfrac:
                bestfrac = frac
                name = dk
        if bestfrac < 0.7:
            print("### Deck {0} has bad matches {1}".format(ii, bestfrac))
            name = None
        if name is None:
            print("Deck {0}: {1} matches, {2:0.2f}% of total".format(
                ii, mask.sum(),
                mask.sum() / len(mask) * 100))
            deck_ids[ii] = ii
            deck_50_pct[ii] = mask.sum() / len(mask)
            deck_counts[ii] = mask.sum()
            pd.ix[mask, 'Archetype'] = "Other " + str(ii)
        else:
            if name in deck_50_pct:
                print("**********DUPLICATE**********")
                pd.ix[mask, 'Archetype'] = name + str(ii)
            else:
                pd.ix[mask, 'Archetype'] = name
            deck_50_pct[name] = mask.sum() / len(mask)
            deck_counts[name] = mask.sum()
            print("Deck {0}={2}: {1} matches, {3:0.2f}% of total".format(
                ii, mask.sum(), name,
                mask.sum() / len(mask) * 100))
            deck_ids[name] = ii
            #pd['Archetype'][mask] = name
        #print(deck[-20:])

    print(len(deck_50_pct), deck_50_pct)

    final = pandas.DataFrame.from_dict([
        (name, deck_50_pct[name], deck_counts[name]) for name in deck_50_pct
    ])
    final.sort_values(by=1, inplace=True)
    print(final.sort_values(by=1))

    week_starts = [
        day
        for day in daterange(start_date, datetime.date.today(), step=timestep)
    ]

    weekly_summary = pandas.DataFrame(index=week_starts,
                                      columns=deck_guess.keys())

    dates = pandas.to_datetime([x.decode() for x in pd.Date])
    dates = np.array([
        datetime.date(year=2000 + int(x[6:8]),
                      month=int(x[3:5]),
                      day=int(x[0:2])) for x in pd.Date
    ])

    for week_start in week_starts:
        week_end = week_start + datetime.timedelta(timestep)
        date_matches = (dates >= week_start) & (dates < week_end)
        for deck in deck_guess:
            deck_matches = (pd.Archetype == deck) & date_matches
            weekly_summary[deck][week_start] = deck_matches.sum(
            ) / date_matches.sum()

    weekly_summary.plot(style=[
        x + 'o' + y for x, y in zip('rgbcmykrgbcmykrgbcmykrgbcmyk', ['-'] * 7 +
                                    ['--'] * 7 + [':'] * 7 + ['-.'] * 7)
    ],
                        figsize=[24, 20])
    pl.xlabel("First date in week")
    pl.ylabel("Fraction decks in that week")
    pl.legend(loc='center left', bbox_to_anchor=(1.05, 0.5))
    pl.savefig("{prefix}_meta.png".format(prefix=prefix), bbox_inches='tight')

    def get_deck(num):
        deck = pd.loc[num]
        return deck[deck != 0]

    return pd, get_deck, deck_50_pct, deck_ids, deck_top20s, deck_counts