def get_current_win_rate_table(days): """Sets a summary table for current win rates, spanning `days` worth of time""" engine, _ = db_util.connect_database() end = int(db_util.get_max_start_time()) begin = int(end - days * 24 * 3600) stmt = "SELECT * FROM dota_hero_win_rate WHERE time>={0} AND time<={1};".\ format(begin, end) summary = pd.read_sql(stmt, engine) # Columns to re-arrange into... used later for now construct blank # dataframe if needed cols = [ 'hero_skill', 'skill', 'hero', 'time_range', 'radiant_win', 'radiant_total', 'radiant_win_pct', 'dire_win', 'dire_total', 'dire_win_pct', 'win', 'total', 'win_pct' ] summary = summary[[ 'hero', 'skill', 'radiant_win', 'radiant_total', 'dire_win', 'dire_total' ]] grpd = summary.groupby(["hero", "skill"]).sum() grpd.reset_index(inplace=True) # Actual hero names & legacy label field grpd['hero'] = [meta.HERO_DICT[a] for a in grpd['hero']] grpd['hero_skill'] = [ a.upper() + "_" + str(b) for a, b in zip(grpd['hero'], grpd['skill']) ] # Time string sbegin = dt.datetime.utcfromtimestamp(begin).isoformat() send = dt.datetime.utcfromtimestamp(end).isoformat() grpd['time_range'] = "{0} to {1}".format(sbegin, send) # Maths grpd['win'] = grpd['radiant_win'] + grpd['dire_win'] grpd['total'] = grpd['radiant_total'] + grpd['dire_total'] grpd['radiant_win_pct'] = 100.0 * grpd['radiant_win'] / grpd[ 'radiant_total'] grpd['dire_win_pct'] = 100.0 * grpd['dire_win'] / grpd['dire_total'] grpd['win_pct'] = 100.0 * grpd['win'] / grpd['total'] grpd = grpd.fillna(0) grpd = grpd[cols] return grpd
def main(): """Main program execution""" parser = argparse.ArgumentParser(description='Update table containing ' 'record count by hour.') parser.add_argument("horizon_days", type=int) opts = parser.parse_args() engine, session = connect_database() rows = fetch_rows(opts.horizon_days, engine) print("Records: {}".format(rows.rowcount)) times = [] match_ids = [] skills = [] for row in rows: # Round off timestamps to the nearest hour, create data frame and # aggregate on counts. round_time = dt.datetime.fromtimestamp( row.start_time).strftime("%Y-%m-%dT%H:00") times.append( int( dt.datetime.strptime(round_time, "%Y-%m-%dT%H:%M").timestamp())) match_ids.append(row.match_id) skills.append(row.api_skill) df_matches = pd.DataFrame({ 'date_hour': times, 'skill': skills, 'match_ids': match_ids }) summary = df_matches.groupby(["date_hour", "skill"]).count() summary.reset_index(inplace=True) # Write to database, overwriting old records for _, row in summary.iterrows(): fetch = FetchSummary() fetch.date_hour_skill = "{0:10d}_{1}".format(row['date_hour'], row['skill']) fetch.rec_count = row['match_ids'] session.merge(fetch) session.commit()
def test_win_rate_pick_rate(self): """Test code to calculate win rate vs. pick rate tables""" win_rate_pick_rate.main(days=1, skill=1) engine, _ = db_util.connect_database() stmt = "select * from dota_hero_win_rate" df_out = pd.read_sql(stmt, engine) # Integrity checks summary = df_out.sum() self.assertEqual( summary['radiant_win'] + summary['dire_win'], 25) self.assertEqual(summary['radiant_total'], summary['dire_total']) self.assertEqual( 50, sum(df_out[['radiant_total', 'dire_total']].sum(axis=1)))
def main(): """Main entry point. """ # Parse command line heroes, skill = parse_command_line() # Database connection engine, session = connect_database() # Populate dictionary with matches we already have within # INITIAL_HORIZON (don't refetch there) # Get UTC timestamps spanning HORIZON_DAYS ago to today start_time = int((dt.datetime.utcnow() - dt.timedelta(days=INITIAL_HORIZON)).timestamp()) end_time = int(dt.datetime.utcnow().timestamp()) with engine.connect() as conn: stmt = "select start_time, match_id from dota_matches where " \ "start_time>={} and start_time<={};".format(start_time, end_time) rows1 = conn.execute(stmt) count = 0 for row in rows1: MATCH_IDS[row.match_id] = row.start_time count += 1 print("Records to seed MATCH_IDS 1: {}".format(count)) # Main loop over heroes. Create the thread pool now to prevent constant # creation and destruction of threads. Also, destroy database connection # in between heroes just in case something hangs. executor = futures.ThreadPoolExecutor(max_workers=int(NUM_THREADS)) counter = 1 for hero in heroes: log.info("---------------------------------------------------------") log.info(">>>>>>>> Hero: %s %d/%d Skill: %d <<<<<<<<", meta.HERO_DICT[hero], counter, len(heroes), skill) log.info("---------------------------------------------------------") fetch_matches(session, hero, skill, executor) counter += 1 session.commit()
def main(days, skill): """Main entry point""" text, begin, end = dotautil.TimeMethods.get_hour_blocks( db_util.get_max_start_time(), int(days * 24)) # Get database connection engine, _ = db_util.connect_database() with engine.connect() as conn: for ttime, btime, etime in zip(text, begin, end): stmt = "select radiant_win, radiant_heroes, dire_heroes from " \ "dota_matches where start_time>={0} and start_time<={1}"\ " and api_skill={2};".format(btime, etime, skill) matches = conn.execute(stmt) log.info("Skill level: %d Time: %s Count: %d", skill, ttime, matches.rowcount) df_hero = parse_records(matches) write_to_database(df_hero, skill, etime)
def setUpClass(cls): """Setup database for testing""" # Bail if we're not in development... if os.environ['DOTA_DB_URI'].split("/")[-1] != "dota_dev": raise NotImplementedError("Must be on development environment!") # Create and upgrade database db_util.create_database() os.system("alembic upgrade head >/dev/null 2>&1") cls.engine, cls.session = db_util.connect_database() # Populate tables filename = os.path.join("testing", "test_database.txt") with open(filename, "r") as filehandle: db_txt = filehandle.read() conn = cls.engine.connect() for stmt in db_txt.split("\n"): conn.execute(stmt)
def write_to_database(summary, skill, end_time): """Update win rate data in database""" rows = [] engine, _ = db_util.connect_database() # Coerce to integers summary = summary.astype('int') for _, row in summary.iterrows(): time_hero_skill = "{0}_H{1:03}_S{2}".format(end_time, row['hero'], skill) rows.append( (time_hero_skill, end_time, row['hero'], skill, row['radiant_win'], row['radiant_total'], row['dire_win'], row['dire_total'])) conn = engine.raw_connection() cursor = conn.cursor() stmt = "REPLACE INTO dota_hero_win_rate VALUES (%s, %s, %s, %s, %s, %s, " \ "%s, %s)" cursor.executemany(stmt, rows) conn.commit()
def get_health_summary(days, timezone, hour=True): """Returns a Pandas dataframe summarizing number of matches processed over an interval of days. Defaults to by hour, can we chaned to daily view through use of optional `hour` argument`. """ # Database connection engine, _ = connect_database() # Get TZ offsets, do everything relative to current TZ offset local_tz = pytz.timezone(timezone) utc_offset = local_tz.utcoffset(dt.datetime.now()) utc_hour = int(utc_offset.total_seconds() / 3600) now = dt.datetime.utcnow() now = now + utc_offset # Create a blank dataframe for the time range of interest, starting with # times. if hour: now_hour = dt.datetime(now.year, now.month, now.day, now.hour, 0, 0) times = [ isoformat_with_tz(now_hour - dt.timedelta(hours=i), utc_hour) for i in range(24 * days) ] else: now_day = dt.datetime(now.year, now.month, now.day, 0, 0, 0) times = [ isoformat_with_tz(now_day - dt.timedelta(days=i), utc_hour) for i in range(days) ] # Blank dataframe, one entry for each skill level... df_blank = pd.DataFrame(index=times, data={ 1: [0] * len(times), 2: [0] * len(times), 3: [0] * len(times), }) # Fetch from database begin = int((dt.datetime.utcnow() - dt.timedelta(days=days)).timestamp()) begin = str(begin) + "_0" stmt = "select date_hour_skill, rec_count from dota_fetch_summary " stmt += "where date_hour_skill>='{}'" rows = pd.read_sql_query(stmt.format(begin), engine) if len(rows) == 0: df_summary = df_blank else: # Split out time and skills rows['time'] = [(int(t.split("_")[0])) for t in rows['date_hour_skill']] rows['skill'] = [(int(t.split("_")[1])) for t in rows['date_hour_skill']] # Apply UTC offset rows['time_local'] = rows['time'] + utc_hour * 3600 rows['time_local_rnd'] = [ dotautil.TimeMethods.get_time_nearest(t, hour=hour)[0] for t in rows['time_local'] ] df_summary = rows[["time_local_rnd", "skill", "rec_count"]] df_summary = df_summary.groupby(["time_local_rnd", "skill"]).sum() df_summary.reset_index(inplace=True) df_summary = df_summary.pivot(index='time_local_rnd', columns='skill', values='rec_count') dt2 = [ dt.datetime.utcfromtimestamp(float(t)) for t in df_summary.index ] dt3 = [isoformat_with_tz(t, utc_hour) for t in dt2] df_summary.index = dt3 # Add them together df_summary = df_blank.add(df_summary, fill_value=0) # Rename columns df_summary = df_summary[[1, 2, 3]] df_summary.columns = ['normal', 'high', 'very_high'] df_summary = df_summary.sort_index(ascending=False) # For summary table rows = zip(df_summary.index, df_summary['normal'].values, df_summary['high'].values, df_summary['very_high'].values) return df_summary, rows