def main(args): db = utils.get_mongo_database() games = db.games game_stats = db.game_stats for player_name in args.players: log.debug("Processing top level player name %s", player_name) norm_target_player = norm_name(player_name) games_coll = games.find({keys.PLAYERS: norm_target_player}) calculate_game_stats(list(games_coll), game_stats)
def GET(self): import count_buys web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) db = utils.get_mongo_database() stats = count_buys.DeckBuyStats() utils.read_object_from_db(stats, db.buys, '') player_buy_summary = None if 'player' in query_dict: targ_name = norm_name(query_dict['player']) games = map(game.Game, list(db.games.find({'players': targ_name}))) player_buy_summary = count_buys.DeckBuyStats() match_name = lambda g, name: norm_name(name) == targ_name count_buys.accum_buy_stats(games, player_buy_summary, match_name) count_buys.add_effectiveness(player_buy_summary, stats) render = web.template.render('', globals={'round': round}) return render.buy_template(stats, player_buy_summary)
def test_single_game_winner_stats(self): stats = count_buys.DeckBuyStats() targ_name = 'moop' match_name = lambda g, name: name_merger.norm_name(name) == targ_name count_buys.accum_buy_stats([self.outpost_game], stats, match_name) count_buys.add_effectiveness(stats, stats) # Harem bought by both players cstats = stats[dominioncards.Harem] self.assertEquals(cstats.available.freq, 1) self.assertEquals(cstats.any_gained.freq, 1) self.assertEquals(cstats.effect_with().freq, 1) self.assertEquals(cstats.effect_without().freq, 0) self.assertEquals(cstats.effectiveness_gain.freq, 1) # Salvager only bought by winner cstats = stats[dominioncards.Salvager] self.assertEquals(cstats.available.freq, 1) self.assertEquals(cstats.any_gained.freq, 1) self.assertEquals(cstats.effect_with().freq, 1) self.assertEquals(cstats.effect_without().freq, 0) self.assertEquals(cstats.effectiveness_gain.freq, 1) # Potion only bought by loser cstats = stats[dominioncards.Potion] self.assertEquals(cstats.available.freq, 1) self.assertEquals(cstats.any_gained.freq, 0) self.assertEquals(cstats.effect_with().freq, 0) self.assertEquals(cstats.effect_without().freq, 1) self.assertEquals(cstats.effectiveness_gain.freq, 0) # Outpost only bought by loser cstats = stats[dominioncards.Outpost] self.assertEquals(cstats.available.freq, 1) self.assertEquals(cstats.any_gained.freq, 0) self.assertEquals(cstats.effect_with().freq, 0) self.assertEquals(cstats.effect_without().freq, 1) self.assertEquals(cstats.effectiveness_gain.freq, 0) # Workshop not bought by either player cstats = stats[dominioncards.Workshop] self.assertEquals(cstats.available.freq, 1) self.assertEquals(cstats.any_gained.freq, 0) self.assertEquals(cstats.effect_with().freq, 0) self.assertEquals(cstats.effect_without().freq, 1) self.assertEquals(cstats.effectiveness_gain.freq, 0) # Chapel not present in this game cstats = stats[dominioncards.Chapel] self.assertEquals(cstats.available.freq, 0) self.assertEquals(cstats.any_gained.freq, 0) self.assertEquals(cstats.effect_with().freq, 0) self.assertEquals(cstats.effect_without().freq, 0)
def parse_vetoes(game_dict, veto_str): matches = VETO_RE.findall(veto_str) v_dict = {} if matches: for (player, card) in matches: # Use the player index number (as a string) as the # dictionary key, instead of the player's name, because # some names contain periods, which are invalid keys for # structures stored in MongoDB. player = name_merger.norm_name(player) try: v_dict[str(game_dict[PLAYERS].index(player))] = int(capture_cards(card)[0].index) except ValueError, ve: raise ParsingError("Failed to handle veto: %s" % ve)
def GET(self): web.header("Content-Type", "text/plain; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'] db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({'players': norm_target_player}) from pymongo import json_util games_arr = [{'game': g['decks'], 'id': g['_id']} for g in games_coll] return json.dumps(games_arr, default=json_util.default)
def associate_turns_with_owner(game_dict, turns): """ Move each turn in turns to be a member of the corresponding player in game_dict. Remove the names from the turn, since it is redundant with the name on the player level dict.""" name_to_owner = {} for idx, deck in enumerate(game_dict[DECKS]): deck[NAME] = name_merger.norm_name(deck[NAME]) name_to_owner[deck[NAME]] = deck deck[TURNS] = [] order_ct = 0 for idx, turn in enumerate(turns): owner = name_to_owner[name_merger.norm_name(turn[NAME])] owner[TURNS].append(turn) if not ORDER in owner: owner[ORDER] = idx + 1 order_ct += 1 del turn[NAME] if order_ct != len(game_dict[DECKS]): raise BogusGameError('Did not find turns for all players')
def GET(self): web.header("Content-Type", "text/plain; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'] db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({PLAYERS: norm_target_player}) from pymongo import json_util games_arr = [{'game': g[DECKS], 'id': g['_id']} for g in games_coll] return json.dumps(games_arr, default=json_util.default)
def main(): c = pymongo.Connection() games_collection = c.test.games output_collection = c.test.goals total_checked = 0 goal_check_funcs = [] checker_output = collections.defaultdict(list) for name in globals(): if name.startswith('CheckMatch'): goal = name[len('CheckMatch'):] #FIXME: this is nonobvious checker_output[goal] goal_check_funcs.append((goal, globals()[name])) output_collection.ensure_index('attainers.player') output_collection.ensure_index('goal') scanner = incremental_scanner.IncrementalScanner('goals', c.test) print 'starting with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() for idx, g in enumerate(scanner.scan(games_collection, {})): if idx % 1000 == 0: print idx total_checked += 1 game_val = game.Game(g) for goal_name, goal_checker in goal_check_funcs: output = goal_checker(game_val) if output: for attainer in output: attainer['player'] = name_merger.norm_name( attainer['player']) checker_output[goal_name].append( (game_val.isotropic_url(), output)) mongo_val = {'_id': game_val.get_id(), 'goal': goal_name, 'attainers': output} output_collection.save(mongo_val) print 'ending with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() scanner.save() for goal_name, output in checker_output.iteritems(): print goal_name, len(output)
def get_game_stat_entries(game_val, g_id, date, supply): ret = [] if game_val.dubious_quality(): return ret all_p = game_val.all_player_names() for full_name in all_p: m = {} name = norm_name(full_name) m['_id'] = { keys.NAME: name, 'game_id': g_id } m[keys.PLAYERS] = [p for p in all_p if p != full_name] m['game_date'] = date pd = game_val.get_player_deck(full_name) m[keys.WIN_POINTS] = pd.WinPoints() m[keys.RESULT] = game_val.win_loss_tie(full_name) m[keys.ORDER] = pd.TurnOrder() m[keys.SUPPLY] = supply ret.append(m) return ret
def get_game_stat_entries(game_val, g_id, date, supply): ret = [] if game_val.dubious_quality(): return ret all_p = game_val.all_player_names() for full_name in all_p: m = {} name = norm_name(full_name) m['_id'] = {keys.NAME: name, 'game_id': g_id} m[keys.PLAYERS] = [p for p in all_p if p != full_name] m['game_date'] = date pd = game_val.get_player_deck(full_name) m[keys.WIN_POINTS] = pd.WinPoints() m[keys.RESULT] = game_val.win_loss_tie(full_name) m[keys.ORDER] = pd.TurnOrder() m[keys.SUPPLY] = supply ret.append(m) return ret
def main(): c = pymongo.Connection() games_collection = c.test.games output_collection = c.test.goals total_checked = 0 checker_output = collections.defaultdict(list) parser = utils.incremental_max_parser() args = parser.parse_args() scanner = incremental_scanner.IncrementalScanner('goals', c.test) if not args.incremental: scanner.reset() output_collection.remove() output_collection.ensure_index('attainers.player') output_collection.ensure_index('goal') print 'starting with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() for g in utils.progress_meter(scanner.scan(games_collection, {})): total_checked += 1 game_val = game.Game(g) for goal_name, output in all_goals(game_val).items(): for attainer in output: attainer['player'] = name_merger.norm_name( attainer['player']) checker_output[goal_name].append( (game_val.isotropic_url(), output)) mongo_val = {'_id': game_val.get_id(), 'goal': goal_name, 'attainers': output} output_collection.save(mongo_val) print 'ending with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() scanner.save() print_totals(checker_output, total_checked)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Record By Opponent for %s</span>' % target_player ret += '<br/><br/>\n\n' ret += '<table border=1>' ret += '<tr><td>Opponent</td><td>Record</td></tr>' db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({PLAYERS: norm_target_player}) keyed_by_opp = collections.defaultdict(list) game_list = [] real_name_usage = collections.defaultdict( lambda: collections.defaultdict(int)) for g in games_coll: game_val = game.Game(g) if game_val.dubious_quality(): continue all_player_names = game_val.all_player_names() norm_names = map(norm_name, all_player_names) if len(set(norm_names)) != len(all_player_names): continue target_player_cur_name_cand = [ n for n in all_player_names if norm_name(n) == norm_target_player] if len(target_player_cur_name_cand) != 1: continue game_list.append(game_val) target_player_cur_name = target_player_cur_name_cand[0] for p in game_val.get_player_decks(): if p.name() != target_player_cur_name: other_norm_name = norm_name(p.name()) keyed_by_opp[other_norm_name].append( (p.name(), target_player_cur_name, game_val)) real_name_usage[other_norm_name][p.name()] += 1 keyed_by_opp_list = keyed_by_opp.items() keyed_by_opp_list.sort(key = lambda x: (-len(x[1]), x[0])) for opp_norm_name, game_list in keyed_by_opp_list: record = [0, 0, 0] for opp_name, tgt_player_curname, g in game_list: record[g.win_loss_tie(tgt_player_curname, opp_name)] += 1 ret += '<tr>' # Get most freq used name for opponent #TODO: lambdas can be switched to itemgetters opp_cannon_name = max(real_name_usage[opp_norm_name].iteritems(), key=lambda x: x[1])[0] row_span = (len(game_list) - 1) / 10 + 1 ret += '<td rowspan=%d>%s</td>' % ( row_span, game.PlayerDeck.PlayerLink(opp_cannon_name)) ret += '<td rowspan=%d>%d-%d-%d</td>' % (row_span, record[0], record[1], record[2]) for idx, (opp_name, tgt_player_curname, g) in enumerate( game_list): if idx % 10 == 0 and idx > 0: ret += '</tr><tr>' ret += g.short_render_cell_with_perspective(tgt_player_curname, opp_name) ret += '</tr>\n' ret += '</table></body></html>' return ret
def associate_game_with_norm_names(game_dict): """ Fill players field in game_dict with list of normed player names.""" game_dict['players'] = [] for player_deck in game_dict['decks']: normed_name = name_merger.norm_name(player_deck['name']) game_dict['players'].append(normed_name)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({'players': norm_target_player}) keyed_by_opp = collections.defaultdict(list) real_name_usage = collections.defaultdict( lambda: collections.defaultdict(int)) game_list = [] aliases = set() overall_record = RecordSummary() rec_by_game_size = collections.defaultdict(RecordSummary) rec_by_date = collections.defaultdict(RecordSummary) rec_by_turn_order = collections.defaultdict(RecordSummary) date_buckets = ( 1, 3, 5, 10 ) for g in games_coll: game_val = game.Game(g) if game_val.dubious_quality(): continue all_player_names = game_val.all_player_names() norm_names = map(norm_name, all_player_names) if len(set(norm_names)) != len(all_player_names): continue target_player_cur_name_cand = [ n for n in all_player_names if norm_name(n) == norm_target_player] if len(target_player_cur_name_cand) != 1: continue game_list.append(game_val) target_player_cur_name = target_player_cur_name_cand[0] aliases.add(target_player_cur_name) for p in game_val.get_player_decks(): if p.name() != target_player_cur_name: other_norm_name = norm_name(p.name()) keyed_by_opp[other_norm_name].append( (p.name(), target_player_cur_name, game_val)) real_name_usage[other_norm_name][p.name()] += 1 else: #this is getting fidgety about 80 chars, which sometimes #can mean that it's getting too nested and could use a #rethink res = game_val.win_loss_tie(p.name()) overall_record.record_result(res, p.WinPoints()) game_len = len(game_val.get_player_decks()) rec_by_game_size[game_len].record_result(res, p.WinPoints()) _ord = p.TurnOrder() rec_by_turn_order[_ord].record_result(res, p.WinPoints()) for delta in date_buckets: _padded = (game_val.date() + datetime.timedelta(days = delta)) delta_padded_date = _padded.date() today = datetime.datetime.now().date() if delta_padded_date >= today: rec_by_date[delta].record_result(res, p.WinPoints()) keyed_by_opp_list = keyed_by_opp.items() keyed_by_opp_list.sort(key = lambda x: (-len(x[1]), x[0])) #TODO: a good choice for a template like jinja2 ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Profile for %s</span>' % target_player ret += '<span class="search2">' ret += """ Search for another player: <input type="text" name="player" style="width:100px;" /> <input type="submit" value="View Stats!" /> </span></form><br><br> """ if len(aliases) > 1: ret += 'Aliases: ' + ', '.join(aliases) + '\n' ret += render_record_table('Record by game size', overall_record, rec_by_game_size, lambda game_size: '%d players' % game_size) ret += render_record_table('Recent Record', overall_record, rec_by_date, lambda num_days: 'Last %d days' % num_days) ret += render_record_table('Record by turn order', overall_record, rec_by_turn_order, lambda pos: 'Table position %d' % pos) ret += '<div style="clear: both;"> </div>' ret += goals.MaybeRenderGoals(db, norm_target_player) ret += '<A HREF="/popular_buys?player=%s"><h2>Stats by card</h2></A><BR>\n' % target_player ret += '<h2>Most recent games</h2>\n' game_list.sort(key = game.Game.get_id, reverse = True) qm = query_matcher.QueryMatcher(p1_name=target_player) for g in game_list[:3]: ret += (query_matcher.GameMatcher(g, qm).display_game_snippet() + '<br>') ret += ('<A HREF="/search_result?p1_name=%s">(See more)</A>' % target_player) ret += '<h2>Record by opponent</h2>' ret += '<table border=1>' ret += '<tr><td>Opponent</td><td>Record</td></tr>' for opp_norm_name, game_list in keyed_by_opp_list: record = [0, 0, 0] for opp_name, tgt_player_curname, g in game_list: record[g.win_loss_tie(tgt_player_curname, opp_name)] += 1 ret += '<tr>' # Get most freq used name for opponent #TODO: lambdas can be switched to itemgetters opp_cannon_name = max(real_name_usage[opp_norm_name].iteritems(), key=lambda x: x[1])[0] row_span = (len(game_list) - 1) / 10 + 1 ret += '<td rowspan=%d>%s</td>' % ( row_span, game.PlayerDeck.PlayerLink(opp_cannon_name)) ret += '<td rowspan=%d>%d-%d-%d</td>' % (row_span, record[0], record[1], record[2]) for idx, (opp_name, tgt_player_curname, g) in enumerate( game_list): if idx % 10 == 0 and idx > 0: ret += '</tr><tr>' ret += g.short_render_cell_with_perspective(tgt_player_curname, opp_name) ret += '</tr>\n' ret += '</table></body></html>' return ret
def main(): c = pymongo.Connection() games_collection = c.test.games output_collection = c.test.goals stats_collection = c.test.goal_stats total_checked = 0 checker_output = collections.defaultdict(int) parser = utils.incremental_max_parser() parser.add_argument( '--goals', metavar='goal_name', nargs='+', help=('If set, check only the goals specified for all of ' + 'the games that have already been scanned')) args = parser.parse_args() if args.goals: valid_goals = True for goal_name in args.goals: if goal_name not in goal_check_funcs: valid_goals = False print "Unrecognized goal name '%s'" % goal_name if not valid_goals: exit(-1) goals_to_check = args.goals for goal_name in args.goals: stats_collection.save( {'_id': goal_name, 'total': 0} ) scanner = incremental_scanner.IncrementalScanner('subgoals', c.test) scanner.reset() main_scanner = incremental_scanner.IncrementalScanner('goals', c.test) last = main_scanner.get_max_game_id() else: goals_to_check = None scanner = incremental_scanner.IncrementalScanner('goals', c.test) last = None if not args.incremental: scanner.reset() output_collection.remove() output_collection.ensure_index('goals.player') print 'starting with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() for g in utils.progress_meter(scanner.scan(games_collection, {})): total_checked += 1 game_val = game.Game(g) # Get existing goal set (if exists) game_id = game_val.get_id() mongo_val = output_collection.find_one({'_id': game_id}) if mongo_val is None: mongo_val = collections.defaultdict( dict ) mongo_val['_id'] = game_id mongo_val['goals'] = [] # If rechecking, delete old values if goals_to_check is not None: goals = mongo_val['goals'] for ind in range(len(goals) - 1, -1, -1): goal = goals[ind] if goal['goal_name'] in goals_to_check: del goals[ind] # Get new values goals = check_goals(game_val, goals_to_check) # Write new values for goal in goals: name = name_merger.norm_name(goal['player']) goal_name = goal['goal_name'] mongo_val['goals'].append(goal) checker_output[goal_name] += 1 mongo_val = dict(mongo_val) output_collection.save(mongo_val) if last and game_id == last: break if args.max_games >= 0 and total_checked >= args.max_games: break print 'ending with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() scanner.save() print_totals(checker_output, total_checked) for goal_name, count in checker_output.items(): stats = stats_collection.find_one( {'_id': goal_name} ) if stats is None: stats = {'_id': goal_name, 'total': 0} stats['total'] += count stats_collection.save( stats )
def associate_game_with_norm_names(game_dict): """ Fill players field in game_dict with list of normed player names.""" game_dict[PLAYERS] = [] for player_deck in game_dict[DECKS]: normed_name = name_merger.norm_name(player_deck[NAME]) game_dict[PLAYERS].append(normed_name)
def main(): filename_pattern = re.compile(r'^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$') leaderboard_pattern = re.compile(r'<td>(?P<skill_mean>-?\d+\.\d+) ± ' + \ r'(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \ r'(?P<rank>\d+)</td><td class=c>' + \ r'(?P<eligible_games_played>\d+)</td><td>' + \ r'(?P<nickname>[^<]*) <') database = utils.get_mongo_database() history_collection = database.leaderboard_history scanner_collection = database.scanner db_val = scanner_collection.find_one({'_id': 'leaderboard_history'}) last_date = db_val['last_date'] if db_val else '0000-00-00' directory = 'static/leaderboard/' filenames = os.listdir(directory) filenames.sort() bad_leaderboard_dates = utils.get_bad_leaderboard_dates() for filename in filenames: match = filename_pattern.search(filename) if not match: continue date = match.group('date') if date in bad_leaderboard_dates: # don't load data from when the leaderboard was messed up log.warning("Skipping %s because the leaderboard was messed up", date) continue if date <= last_date: log.warning("Date %s is less than last date %s", date, last_date) continue log.info('Processing %s', date) file_obj = bz2.BZ2File(directory + filename) content = file_obj.read().decode('utf-8') file_obj.close() nickname_to_entry = {} num_matches = 0 last_rank = -1 pos = 0 while True: match = leaderboard_pattern.search(content, pos) if not match: break num_matches += 1 skill_mean = float(match.group('skill_mean')) skill_error = float(match.group('skill_error')) rank = int(match.group('rank')) eligible_games_played = int(match.group('eligible_games_played')) nickname = match.group('nickname') normed_nickname = name_merger.norm_name(nickname) if normed_nickname not in nickname_to_entry: nickname_to_entry[normed_nickname] = [ date, skill_mean, skill_error, rank, eligible_games_played ] else: log.info('normed nickname %s already exists for %s', normed_nickname, date) last_rank = rank pos = match.end() log.info('%d entries matched', num_matches) if num_matches == 0: log.error( 'No entries found, so the regex is probably not doing its job anymore.' ) break if num_matches != last_rank: log.error( 'ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.' ) break for nickname, data in nickname_to_entry.iteritems(): history_collection.update({'_id': nickname}, {'$push': { 'history': data }}, upsert=True) log.info('%d player histories updated', len(nickname_to_entry)) last_date = date scanner_collection.update({'_id': 'leaderboard_history'}, {'$set': { 'last_date': last_date }}, upsert=True)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({'players': norm_target_player}) leaderboard_history_result = db.leaderboard_history.find_one( {'_id': norm_target_player}) leaderboard_history = None if leaderboard_history_result: leaderboard_history = leaderboard_history_result['history'] game_list = [] aliases = set() overall_record = RecordSummary() rec_by_game_size = collections.defaultdict(RecordSummary) rec_by_date = collections.defaultdict(RecordSummary) rec_by_turn_order = collections.defaultdict(RecordSummary) expansion_dist = collections.defaultdict(float) expansion_win_points = collections.defaultdict(float) date_buckets = [1, 3, 5, 10] for g in games_coll: game_val = game.Game(g) if game_val.dubious_quality(): continue all_player_names = game_val.all_player_names() norm_names = map(norm_name, all_player_names) if len(set(norm_names)) != len(all_player_names): continue target_player_cur_name_cand = [ n for n in all_player_names if norm_name(n) == norm_target_player] if len(target_player_cur_name_cand) != 1: continue game_list.append(game_val) target_player_cur_name = target_player_cur_name_cand[0] aliases.add(target_player_cur_name) pd = game_val.get_player_deck(target_player_cur_name) wp = pd.WinPoints() res = game_val.win_loss_tie(target_player_cur_name) overall_record.record_result(res, wp) game_len = len(game_val.get_player_decks()) rec_by_game_size[game_len].record_result(res, wp) _ord = pd.TurnOrder() rec_by_turn_order[_ord].record_result(res, wp) for delta in date_buckets: _padded = (game_val.date() + datetime.timedelta(days = delta)) delta_padded_date = _padded.date() today = datetime.datetime.now().date() if delta_padded_date >= today: rec_by_date[delta].record_result(res, wp) for (ex, wt) in game_val.get_expansion_weight().items(): expansion_dist[ex] += wt expansion_win_points[ex] += wt * wp #TODO: a good choice for a template like jinja2 ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Profile for %s</span>' % target_player leaderboard_history_most_recent = (leaderboard_history[-1] if leaderboard_history else None) if leaderboard_history_most_recent: level = (leaderboard_history_most_recent[1] - leaderboard_history_most_recent[2]) level = int(max(math.floor(level), 0)) ret += '<span class="level">Level ' + str(level) + '</span>' ret += '<span class="search2">' ret += """ Search for another player: <input type="text" name="player" style="width:100px;" /> <input type="submit" value="View Stats!" /> </span></form><br><br> """ if len(aliases) > 1: ret += 'Aliases: ' + ', '.join(aliases) + '\n' ret += render_record_table('Record by game size', overall_record, rec_by_game_size, lambda game_size: '%d players' % game_size) ret += render_record_table('Recent Record', overall_record, rec_by_date, lambda num_days: 'Last %d days' % num_days) ret += render_record_table('Record by turn order', overall_record, rec_by_turn_order, lambda pos: 'Table position %d' % pos) ret += '<div style="clear: both;"> </div>' ret += '<div class="cardborder yellow"><h3>Expansion Data</h3><table class="stats">' ret += '<tr><th>Card Set<th>Avg. Cards<br/> Per Kingdom<th>Weighted<br/> Win Points<th>Favor' for (ex, weight) in sorted(expansion_dist.iteritems(), key=operator.itemgetter(1), reverse=True): if ex == 'Fan': continue wp = expansion_win_points[ex] / weight average = overall_record.average_win_points() ret += '<tr><th>%s</th>'%ex ret += '<td>%.2f</td>'% (weight * 10. / len(game_list)) ret += '<td>%.2f<td>' % wp if average > 0: ret += '<td>%.2f%%</td>'% ( (wp - average) * 100. / average ) else: ret += '<td>0</td>' ret += '</table></div>' ret += '<div style="clear: both;"> </div>' ret += goals.MaybeRenderGoals(db, norm_target_player) ret += '<A HREF="/popular_buys?player=%s"><h2>Stats by card</h2></A>\n' % target_player ret += '<A HREF="/games_by_opponent?player=%s"><h2>Record by opponent</h2></A>\n' % target_player if leaderboard_history: render = web.template.render('') ret += str(render.player_page_leaderboard_history_template( json.dumps(leaderboard_history))) ret += '<h2>Most recent games</h2>\n' game_list.sort(key = game.Game.get_id, reverse = True) qm = query_matcher.QueryMatcher(p1_name=target_player) for g in game_list[:3]: ret += (query_matcher.GameMatcher(g, qm).display_game_snippet() + '<br>') ret += ('<A HREF="/search_result?p1_name=%s">(See more)</A>' % target_player) ret += '</body></html>' return ret
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Record By Opponent for %s</span>' % target_player ret += '<br/><br/>\n\n' ret += '<table border=1>' ret += '<tr><td>Opponent</td><td>Record</td></tr>' db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({'players': norm_target_player}) keyed_by_opp = collections.defaultdict(list) game_list = [] real_name_usage = collections.defaultdict( lambda: collections.defaultdict(int)) for g in games_coll: game_val = game.Game(g) if game_val.dubious_quality(): continue all_player_names = game_val.all_player_names() norm_names = map(norm_name, all_player_names) if len(set(norm_names)) != len(all_player_names): continue target_player_cur_name_cand = [ n for n in all_player_names if norm_name(n) == norm_target_player] if len(target_player_cur_name_cand) != 1: continue game_list.append(game_val) target_player_cur_name = target_player_cur_name_cand[0] for p in game_val.get_player_decks(): if p.name() != target_player_cur_name: other_norm_name = norm_name(p.name()) keyed_by_opp[other_norm_name].append( (p.name(), target_player_cur_name, game_val)) real_name_usage[other_norm_name][p.name()] += 1 keyed_by_opp_list = keyed_by_opp.items() keyed_by_opp_list.sort(key = lambda x: (-len(x[1]), x[0])) for opp_norm_name, game_list in keyed_by_opp_list: record = [0, 0, 0] for opp_name, tgt_player_curname, g in game_list: record[g.win_loss_tie(tgt_player_curname, opp_name)] += 1 ret += '<tr>' # Get most freq used name for opponent #TODO: lambdas can be switched to itemgetters opp_cannon_name = max(real_name_usage[opp_norm_name].iteritems(), key=lambda x: x[1])[0] row_span = (len(game_list) - 1) / 10 + 1 ret += '<td rowspan=%d>%s</td>' % ( row_span, game.PlayerDeck.PlayerLink(opp_cannon_name)) ret += '<td rowspan=%d>%d-%d-%d</td>' % (row_span, record[0], record[1], record[2]) for idx, (opp_name, tgt_player_curname, g) in enumerate( game_list): if idx % 10 == 0 and idx > 0: ret += '</tr><tr>' ret += g.short_render_cell_with_perspective(tgt_player_curname, opp_name) ret += '</tr>\n' ret += '</table></body></html>' return ret
def main(): filename_pattern = re.compile('^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$') leaderboard_pattern = re.compile('<td>(?P<skill_mean>-?\d+\.\d+) ± ' + \ '(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \ '(?P<rank>\d+)</td><td class=c>' + \ '(?P<eligible_games_played>\d+)</td><td>' + \ '(?P<nickname>[^<]*) <') conn = pymongo.Connection() database = conn.test history_collection = database.leaderboard_history scanner_collection = database.scanner db_val = scanner_collection.find_one({'_id': 'leaderboard_history'}) last_date = db_val['last_date'] if db_val else '0000-00-00' directory = 'static/leaderboard/' filenames = os.listdir(directory) filenames.sort() for filename in filenames: match = filename_pattern.search(filename) if not match: continue date = match.group('date') if '2011-11-24' <= date and date <= '2011-12-04': # don't load data from when the leaderboard was messed up continue if date <= last_date: continue print date file_obj = bz2.BZ2File(directory + filename) content = file_obj.read().decode('utf-8') file_obj.close() nickname_to_entry = {} num_matches = 0 last_rank = -1 pos = 0 while True: match = leaderboard_pattern.search(content, pos) if not match: break num_matches += 1 skill_mean = float(match.group('skill_mean')) skill_error = float(match.group('skill_error')) rank = int(match.group('rank')) eligible_games_played = int(match.group('eligible_games_played')) nickname = match.group('nickname') normed_nickname = name_merger.norm_name(nickname) if normed_nickname not in nickname_to_entry: nickname_to_entry[normed_nickname] = [date, skill_mean, skill_error, rank, eligible_games_played] else: print 'normed nickname already exists for this day:', normed_nickname last_rank = rank pos = match.end() print num_matches, 'entries matched' if num_matches != last_rank: print 'ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.' break for nickname, data in nickname_to_entry.iteritems(): history_collection.update({'_id': nickname}, {'$push': {'history': data}}, upsert=True) print len(nickname_to_entry), 'player histories updated' print last_date = date scanner_collection.update({'_id': 'leaderboard_history'}, {'$set': {'last_date': last_date}}, upsert=True)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') db = utils.get_mongo_database() games = db.games norm_target_player = norm_name(target_player) games_coll = games.find({'players': norm_target_player}) keyed_by_opp = collections.defaultdict(list) real_name_usage = collections.defaultdict( lambda: collections.defaultdict(int)) game_list = [] aliases = set() overall_record = RecordSummary() rec_by_game_size = collections.defaultdict(RecordSummary) rec_by_date = collections.defaultdict(RecordSummary) rec_by_turn_order = collections.defaultdict(RecordSummary) date_buckets = ( 1, 3, 5, 10 ) for g in games_coll: game_val = game.Game(g) if game_val.dubious_quality(): continue all_player_names = game_val.all_player_names() norm_names = map(norm_name, all_player_names) if len(set(norm_names)) != len(all_player_names): continue target_player_cur_name_cand = [ n for n in all_player_names if norm_name(n) == norm_target_player] if len(target_player_cur_name_cand) != 1: continue game_list.append(game_val) target_player_cur_name = target_player_cur_name_cand[0] aliases.add(target_player_cur_name) for p in game_val.get_player_decks(): if p.name() != target_player_cur_name: other_norm_name = norm_name(p.name()) keyed_by_opp[other_norm_name].append( (p.name(), target_player_cur_name, game_val)) real_name_usage[other_norm_name][p.name()] += 1 else: #this is getting fidgety about 80 chars, which sometimes #can mean that it's getting too nested and could use a #rethink res = game_val.win_loss_tie(p.name()) overall_record.record_result(res, p.WinPoints()) game_len = len(game_val.get_player_decks()) rec_by_game_size[game_len].record_result(res, p.WinPoints()) _ord = p.TurnOrder() rec_by_turn_order[_ord].record_result(res, p.WinPoints()) for delta in date_buckets: _padded = (game_val.date() + datetime.timedelta(days = delta)) delta_padded_date = _padded.date() today = datetime.datetime.now().date() if delta_padded_date >= today: rec_by_date[delta].record_result(res, p.WinPoints()) keyed_by_opp_list = keyed_by_opp.items() keyed_by_opp_list.sort(key = lambda x: (-len(x[1]), x[0])) #TODO: a good choice for a template like jinja2 ret = ('<html><head><title>CouncilRoom.com: Dominion Stats: ' '%s</title></head>\n' % target_player) ret += '<body><A HREF="/">Back to CouncilRoom.com</A><BR><BR>' ret += """ Search for another player: <form action='/player' method='get'> <input type="text" name="player" style="width:100px;" /> <input type="submit" value="Submit" /> </form><hr> """ ret += '<h2>CouncilRoom Profile for %s</h2><BR>' % target_player if len(aliases) > 1: ret += 'Aliases: ' + ', '.join(aliases) + '<br>\n' ret += render_record_table('Record by game size', overall_record, rec_by_game_size, lambda game_size: '%d players' % game_size) ret += render_record_table('Recent Record', overall_record, rec_by_date, lambda num_days: 'Last %d days' % num_days) ret += render_record_table('Record by turn order', overall_record, rec_by_turn_order, lambda pos: 'Table position %d' % pos) ret += '<div style="clear: both;"> </div>' ret += goals.MaybeRenderGoals(db, norm_target_player) ret += '<A HREF="/popular_buys?player=%s"><h2>Stats by card</h2></A><BR>\n' % target_player ret += '<h2>Most recent games</h2>\n' game_list.sort(key = game.Game.get_id, reverse = True) qm = query_matcher.QueryMatcher(p1_name=target_player) for g in game_list[:3]: ret += (query_matcher.GameMatcher(g, qm).display_game_snippet() + '<br>') ret += ('<A HREF="/search_result?p1_name=%s">(See more)</A>' % target_player) ret += '<h2>Record by opponent</h2>' ret += '<table border=1>' ret += '<tr><td>Opponent</td><td>Record</td></tr>' for opp_norm_name, game_list in keyed_by_opp_list: record = [0, 0, 0] for opp_name, tgt_player_curname, g in game_list: record[g.win_loss_tie(tgt_player_curname, opp_name)] += 1 ret += '<tr>' # Get most freq used name for opponent #TODO: lambdas can be switched to itemgetters opp_cannon_name = max(real_name_usage[opp_norm_name].iteritems(), key=lambda x: x[1])[0] row_span = (len(game_list) - 1) / 10 + 1 ret += '<td rowspan=%d>%s</td>' % ( row_span, game.PlayerDeck.PlayerLink(opp_cannon_name)) ret += '<td rowspan=%d>%d-%d-%d</td>' % (row_span, record[0], record[1], record[2]) for idx, (opp_name, tgt_player_curname, g) in enumerate( game_list): if idx % 10 == 0 and idx > 0: ret += '</tr><tr>' ret += g.short_render_cell_with_perspective(tgt_player_curname, opp_name) ret += '</tr>\n' ret += '</table></body></html>' return ret
def name_match(self, name): return (name in self.exact_names) + ( name_merger.norm_name(name) in self.players_restrict)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') db = utils.get_mongo_database() game_stats = db.game_stats norm_target_player = norm_name(target_player) games_coll = game_stats.find({compkey('_id', NAME): norm_target_player}) leaderboard_history_result = db.leaderboard_history.find_one( {'_id': norm_target_player}) leaderboard_history = None if leaderboard_history_result: leaderboard_history = leaderboard_history_result['history'] game_list = [] aliases = set() overall_record = RecordSummary() rec_by_game_size = collections.defaultdict(RecordSummary) rec_by_date = collections.defaultdict(RecordSummary) rec_by_turn_order = collections.defaultdict(RecordSummary) expansion_dist = collections.defaultdict(float) expansion_win_points = collections.defaultdict(float) date_buckets = [1, 3, 5, 10] cutoffs = {} for delta in date_buckets: cutoff = datetime.datetime.now().date() + datetime.timedelta(days = -delta) cutoffs[delta] = cutoff.strftime("%Y%m%d") # NOTE: This assumes that game IDs can be lexically sorted # into temporal order for g in games_coll.sort('_id', pymongo.DESCENDING): g_id = g['_id']['game_id'] game_list.append(g_id) name = g['_id'][NAME] # TODO: Turn this back. The concept of aliases only comes #into play when two different "real" player names both #normalize to the same "normalized" player name. # aliases.add(target_player_cur_name) wp = g[WIN_POINTS] res = g[RESULT] overall_record.record_result(res, wp) game_len = len( g[PLAYERS] ) + 1 rec_by_game_size[game_len].record_result(res, wp) _ord = g[ORDER] rec_by_turn_order[_ord].record_result(res, wp) for delta in date_buckets: if g['game_date'] >= cutoffs[delta]: rec_by_date[delta].record_result(res, wp) supply = [dominioncards.index_to_card(i) for i in g[SUPPLY]] for (ex, wt) in dominioncards.get_expansion_weight(supply).items(): expansion_dist[ex] += wt expansion_win_points[ex] += wt * wp #TODO: a good choice for a template like jinja2 ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Profile for %s</span>' % target_player leaderboard_history_most_recent = (leaderboard_history[-1] if leaderboard_history else None) if leaderboard_history_most_recent: level = (leaderboard_history_most_recent[1] - leaderboard_history_most_recent[2]) level = int(max(math.floor(level), 0)) ret += '<span class="level">Level ' + str(level) + '</span>' ret += '<span class="search2">' ret += """ Search for another player: <input type="text" name="player" style="width:100px;" /> <input type="submit" value="View Stats!" /> </span></form><br><br> """ if len(aliases) > 1: ret += 'Aliases: ' + ', '.join(aliases) + '\n' ret += render_record_table('Record by game size', overall_record, rec_by_game_size, lambda game_size: '%d players' % game_size) ret += render_record_table('Recent Record', overall_record, rec_by_date, lambda num_days: 'Last %d days' % num_days) ret += render_record_table('Record by turn order', overall_record, rec_by_turn_order, lambda pos: 'Table position %d' % pos) ret += '<div style="clear: both;"> </div>' ret += '<div class="cardborder yellow"><h3>Expansion Data</h3><table class="stats">' ret += '<tr><th>Card Set<th>Avg. Cards<br/> Per Kingdom<th>Weighted<br/> Win Points<th>Favor' for (ex, weight) in sorted(expansion_dist.iteritems(), key=operator.itemgetter(1), reverse=True): if ex == 'Fan': continue wp = expansion_win_points[ex] / weight average = overall_record.average_win_points() ret += '<tr><th>%s</th>'%ex ret += '<td>%.2f</td>'% (weight * 10. / len(game_list)) ret += '<td>%.2f<td>' % wp if average > 0: ret += '<td>%.2f%%</td>'% ( (wp - average) * 100. / average ) else: ret += '<td>0</td>' ret += '</table></div>' ret += '<div style="clear: both;"> </div>' ret += goals.MaybeRenderGoals(db, norm_target_player) ret += '<A HREF="/popular_buys?player=%s"><h2>Stats by card</h2></A>\n' % target_player ret += '<A HREF="/games_by_opponent?player=%s"><h2>Record by opponent</h2></A>\n' % target_player if leaderboard_history: render = web.template.render('') ret += str(render.player_page_leaderboard_history_template( json.dumps(leaderboard_history))) ret += '<h2>Most recent games</h2>\n' qm = query_matcher.QueryMatcher(p1_name=target_player) for g_id in game_list[:3]: g = db.games.find_one({'_id': g_id}) game_val = game.Game(g) ret += (query_matcher.GameMatcher(game_val, qm).display_game_snippet() + '<br>') ret += ('<A HREF="/search_result?p1_name=%s">(See more)</A>' % target_player) ret += '</body></html>' return ret
def _add_name(self, name): if type(name) is not unicode: name = name.decode('utf8') self.players_restrict.append(name_merger.norm_name(name)) self.exact_names.append(name)
def main(): c = pymongo.Connection() games_collection = c.test.games output_collection = c.test.goals stats_collection = c.test.goal_stats total_checked = 0 checker_output = collections.defaultdict(int) parser = utils.incremental_max_parser() parser.add_argument( '--goals', metavar='goal_name', nargs='+', help=('If set, check only the goals specified for all of ' + 'the games that have already been scanned')) args = parser.parse_args() if args.goals: valid_goals = True for goal_name in args.goals: if goal_name not in goal_check_funcs: valid_goals = False print "Unrecognized goal name '%s'" % goal_name if not valid_goals: exit(-1) goals_to_check = args.goals for goal_name in args.goals: stats_collection.save({'_id': goal_name, 'total': 0}) scanner = incremental_scanner.IncrementalScanner('subgoals', c.test) scanner.reset() main_scanner = incremental_scanner.IncrementalScanner('goals', c.test) last = main_scanner.get_max_game_id() else: goals_to_check = None scanner = incremental_scanner.IncrementalScanner('goals', c.test) last = None if not args.incremental: scanner.reset() output_collection.remove() output_collection.ensure_index('goals.player') print 'starting with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() for g in utils.progress_meter(scanner.scan(games_collection, {})): total_checked += 1 game_val = game.Game(g) # Get existing goal set (if exists) game_id = game_val.get_id() mongo_val = output_collection.find_one({'_id': game_id}) if mongo_val is None: mongo_val = collections.defaultdict(dict) mongo_val['_id'] = game_id mongo_val['goals'] = [] # If rechecking, delete old values if goals_to_check is not None: goals = mongo_val['goals'] for ind in range(len(goals) - 1, -1, -1): goal = goals[ind] if goal['goal_name'] in goals_to_check: del goals[ind] # Get new values goals = check_goals(game_val, goals_to_check) # Write new values for goal in goals: name = name_merger.norm_name(goal['player']) goal_name = goal['goal_name'] mongo_val['goals'].append(goal) checker_output[goal_name] += 1 mongo_val = dict(mongo_val) output_collection.save(mongo_val) if last and game_id == last: break if args.max_games >= 0 and total_checked >= args.max_games: break print 'ending with id', scanner.get_max_game_id(), 'and num games', \ scanner.get_num_games() scanner.save() print_totals(checker_output, total_checked) for goal_name, count in checker_output.items(): stats = stats_collection.find_one({'_id': goal_name}) if stats is None: stats = {'_id': goal_name, 'total': 0} stats['total'] += count stats_collection.save(stats)
def associate_game_with_norm_names(game_dict): """ Fill players field in game_dict with list of normed player names.""" game_dict["players"] = [] for player_deck in game_dict["decks"]: normed_name = name_merger.norm_name(player_deck["name"]) game_dict["players"].append(normed_name)
def GET(self): web.header("Content-Type", "text/html; charset=utf-8") query_dict = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) target_player = query_dict['player'].decode('utf-8') db = utils.get_mongo_database() game_stats = db.game_stats norm_target_player = norm_name(target_player) games_coll = game_stats.find({compkey('_id', NAME): norm_target_player}) leaderboard_history_result = db.leaderboard_history.find_one( {'_id': norm_target_player}) leaderboard_history = None if leaderboard_history_result: leaderboard_history = leaderboard_history_result['history'] game_list = [] aliases = set() overall_record = RecordSummary() rec_by_game_size = collections.defaultdict(RecordSummary) rec_by_date = collections.defaultdict(RecordSummary) rec_by_turn_order = collections.defaultdict(RecordSummary) expansion_dist = collections.defaultdict(float) expansion_win_points = collections.defaultdict(float) date_buckets = [1, 3, 5, 10] cutoffs = {} for delta in date_buckets: cutoff = datetime.datetime.now().date() + datetime.timedelta(days = -delta) cutoffs[delta] = cutoff.strftime("%Y%m%d") # NOTE: This assumes that game IDs can be lexically sorted # into temporal order for g in games_coll.sort('_id', pymongo.DESCENDING): g_id = g['_id']['game_id'] game_list.append(g_id) name = g['_id'][NAME] # TODO: Turn this back. The concept of aliases only comes #into play when two different "real" player names both #normalize to the same "normalized" player name. # aliases.add(target_player_cur_name) wp = g[WIN_POINTS] res = g[RESULT] overall_record.record_result(res, wp) game_len = len( g[PLAYERS] ) + 1 rec_by_game_size[game_len].record_result(res, wp) _ord = g[ORDER] rec_by_turn_order[_ord].record_result(res, wp) for delta in date_buckets: if g['game_date'] >= cutoffs[delta]: rec_by_date[delta].record_result(res, wp) supply = [dominioncards.index_to_card(i) for i in g[SUPPLY]] for (ex, wt) in dominioncards.get_expansion_weight(supply).items(): expansion_dist[ex] += wt expansion_win_points[ex] += wt * wp #TODO: a good choice for a template like jinja2 ret = standard_heading("CouncilRoom.com: Dominion Stats: %s" % target_player) ret += '<form action="/player" method="get">' ret += '<span class="subhead">Profile for %s</span>' % target_player leaderboard_history_most_recent = (leaderboard_history[-1] if leaderboard_history else None) if leaderboard_history_most_recent: level = (leaderboard_history_most_recent[1] - leaderboard_history_most_recent[2]) level = int(max(math.floor(level), 0)) ret += '<span class="level">Level ' + str(level) + '</span>' ret += '<span class="search2">' ret += """ Search for another player: <input type="text" name="player" style="width:100px;" /> <input type="submit" value="View Stats!" /> </span></form><br><br> """ if len(aliases) > 1: ret += 'Aliases: ' + ', '.join(aliases) + '\n' ret += render_record_table('Record by game size', overall_record, rec_by_game_size, lambda game_size: '%d players' % game_size) ret += render_record_table('Recent Record', overall_record, rec_by_date, lambda num_days: 'Last %d days' % num_days) ret += render_record_table('Record by turn order', overall_record, rec_by_turn_order, lambda pos: 'Table position %d' % pos) ret += '<div style="clear: both;"> </div>' ret += '<div class="cardborder yellow"><h3>Expansion Data</h3><table class="stats">' ret += '<tr><th>Card Set<th>Avg. Cards<br/> Per Kingdom<th>Weighted<br/> Win Points<th>Favor' for (ex, weight) in sorted(expansion_dist.iteritems(), key=operator.itemgetter(1), reverse=True): if ex == 'Fan': continue wp = expansion_win_points[ex] / weight average = overall_record.average_win_points() ret += '<tr><th>%s</th>'%ex ret += '<td>%.2f</td>'% (weight * 10. / len(game_list)) ret += '<td>%.2f<td>' % wp if average > 0: ret += '<td>%.2f%%</td>'% ( (wp - average) * 100. / average ) else: ret += '<td>0</td>' ret += '</table></div>' ret += '<div style="clear: both;"> </div>' ret += goals.MaybeRenderGoals(db, norm_target_player) ret += '<A HREF="/popular_buys?player=%s"><h2>Stats by card</h2></A>\n' % target_player ret += '<A HREF="/games_by_opponent?player=%s"><h2>Record by opponent</h2></A>\n' % target_player if leaderboard_history: render = web.template.render('') ret += str(render.player_page_leaderboard_history_template( json.dumps(leaderboard_history))) ret += '<h2>Most recent games</h2>\n' qm = query_matcher.QueryMatcher(p1_name=target_player) goko_games = [g for g in game_list if '.txt' in game_list] if len(goko_games) > 2: goko_games.sort(reverse=True) most_recent = goko_games[:3] else: most_recent = game_list[:3] for g_id in most_recent: g = db.games.find_one({'_id': g_id}) game_val = game.Game(g) ret += (query_matcher.GameMatcher(game_val, qm).display_game_snippet() + '<br>') ret += ('<A HREF="/search_result?p1_name=%s">(See more)</A>' % target_player) ret += '</body></html>' return ret
def main(): filename_pattern = re.compile(r'^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$') iso_leaderboard_pattern = re.compile(r'<td>(?P<skill_mean>-?\d+\.\d+) ± ' + \ r'(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \ r'(?P<rank>\d+)</td><td class=c>' + \ r'(?P<eligible_games_played>\d+)</td><td>' + \ r'(?P<nickname>[^<]*) <') goko_leaderboard_pattern = re.compile(r'\s+<td class="leaders-table-item table-item-rank">(?P<rank>\d+)</td>\s*\n' + r'\s*<td class="leaders-table-item table-item-name"><img [^>]*>(?P<nickname>.*)</td>\s*\n' + r'\s*<td class="leaders-table-item table-item-points">(?P<skill_mean>\d+)</td>') database = utils.get_mongo_database() history_collection = database.leaderboard_history scanner_collection = database.scanner db_val = scanner_collection.find_one({'_id': 'leaderboard_history'}) last_date = db_val['last_date'] if db_val else '0000-00-00' directory = 'static/leaderboard/' filenames = os.listdir(directory) filenames.sort() bad_leaderboard_dates = utils.get_bad_leaderboard_dates() for filename in filenames: match = filename_pattern.search(filename) if not match: continue date = match.group('date') if date in bad_leaderboard_dates: # don't load data from when the leaderboard was messed up log.warning("Skipping %s because the leaderboard was messed up", date) continue if date <= last_date: log.warning("Date %s is less than last date %s", date, last_date) continue log.info('Processing %s', date) file_obj = bz2.BZ2File(directory + filename) content = file_obj.read().decode('utf-8') file_obj.close() nickname_to_entry = {} num_matches = 0 last_rank = -1 pos = 0 while True: match = iso_leaderboard_pattern.search(content, pos) if not match: break num_matches += 1 skill_mean = float(match.group('skill_mean')) skill_error = float(match.group('skill_error')) rank = int(match.group('rank')) eligible_games_played = int(match.group('eligible_games_played')) nickname = match.group('nickname') normed_nickname = name_merger.norm_name(nickname) if normed_nickname not in nickname_to_entry: nickname_to_entry[normed_nickname] = [date, skill_mean, skill_error, rank, eligible_games_played] else: log.info('normed nickname %s already exists for %s', normed_nickname, date) last_rank = rank pos = match.end() pos = 0 while True: match = goko_leaderboard_pattern.search(content, pos) if not match: break num_matches += 1 skill_mean = float(match.group('skill_mean')) skill_error = 0 rank = int(match.group('rank')) eligible_games_played = 0 nickname = match.group('nickname') normed_nickname = nickname if normed_nickname not in nickname_to_entry: nickname_to_entry[normed_nickname] = [date, skill_mean, skill_error, rank, eligible_games_played] else: log.info('normed nickname %s already exists for %s', normed_nickname, date) last_rank = rank pos = match.end() log.info('%d entries matched', num_matches) if num_matches == 0: log.error('No entries found, so the regex is probably not doing its job anymore.') break if num_matches != last_rank: log.error('ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.') break for nickname, data in nickname_to_entry.iteritems(): history_collection.update({'_id': nickname}, {'$push': {'history': data}}, upsert=True) log.info('%d player histories updated', len(nickname_to_entry)) last_date = date scanner_collection.update({'_id': 'leaderboard_history'}, {'$set': {'last_date': last_date}}, upsert=True)
def main(): filename_pattern = re.compile('^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$') leaderboard_pattern = re.compile('<td>(?P<skill_mean>-?\d+\.\d+) ± ' + \ '(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \ '(?P<rank>\d+)</td><td class=c>' + \ '(?P<eligible_games_played>\d+)</td><td>' + \ '(?P<nickname>[^<]*) <') conn = pymongo.Connection() database = conn.test history_collection = database.leaderboard_history scanner_collection = database.scanner db_val = scanner_collection.find_one({'_id': 'leaderboard_history'}) last_date = db_val['last_date'] if db_val else '0000-00-00' directory = 'static/leaderboard/' filenames = os.listdir(directory) filenames.sort() for filename in filenames: match = filename_pattern.search(filename) if not match: continue date = match.group('date') if ('2011-11-24' <= date and date <= '2011-12-04' or '2012-06-08' == date): # don't load data from when the leaderboard was messed up continue if date <= last_date: continue print date file_obj = bz2.BZ2File(directory + filename) content = file_obj.read().decode('utf-8') file_obj.close() nickname_to_entry = {} num_matches = 0 last_rank = -1 pos = 0 while True: match = leaderboard_pattern.search(content, pos) if not match: break num_matches += 1 skill_mean = float(match.group('skill_mean')) skill_error = float(match.group('skill_error')) rank = int(match.group('rank')) eligible_games_played = int(match.group('eligible_games_played')) nickname = match.group('nickname') normed_nickname = name_merger.norm_name(nickname) if normed_nickname not in nickname_to_entry: nickname_to_entry[normed_nickname] = [ date, skill_mean, skill_error, rank, eligible_games_played ] else: print 'normed nickname already exists for this day:', normed_nickname last_rank = rank pos = match.end() print num_matches, 'entries matched' if num_matches == 0: print 'ERROR: no entries found, so the regex is probably not doing its job anymore.' break if num_matches != last_rank: print 'ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.' break for nickname, data in nickname_to_entry.iteritems(): history_collection.update({'_id': nickname}, {'$push': { 'history': data }}, upsert=True) print len(nickname_to_entry), 'player histories updated' print last_date = date scanner_collection.update({'_id': 'leaderboard_history'}, {'$set': { 'last_date': last_date }}, upsert=True)