Пример #1
0
def get_current_win_rate_table(days):
    """Sets a summary table for current win rates, spanning `days` worth of
    time"""

    engine, _ = db_util.connect_database()
    end = int(db_util.get_max_start_time())
    begin = int(end - days * 24 * 3600)

    stmt = "SELECT * FROM dota_hero_win_rate WHERE time>={0} AND time<={1};".\
        format(begin, end)
    summary = pd.read_sql(stmt, engine)

    # Columns to re-arrange into... used later for now construct blank
    # dataframe if needed
    cols = [
        'hero_skill', 'skill', 'hero', 'time_range', 'radiant_win',
        'radiant_total', 'radiant_win_pct', 'dire_win', 'dire_total',
        'dire_win_pct', 'win', 'total', 'win_pct'
    ]

    summary = summary[[
        'hero', 'skill', 'radiant_win', 'radiant_total', 'dire_win',
        'dire_total'
    ]]
    grpd = summary.groupby(["hero", "skill"]).sum()
    grpd.reset_index(inplace=True)

    # Actual hero names & legacy label field
    grpd['hero'] = [meta.HERO_DICT[a] for a in grpd['hero']]
    grpd['hero_skill'] = [
        a.upper() + "_" + str(b) for a, b in zip(grpd['hero'], grpd['skill'])
    ]

    # Time string
    sbegin = dt.datetime.utcfromtimestamp(begin).isoformat()
    send = dt.datetime.utcfromtimestamp(end).isoformat()
    grpd['time_range'] = "{0} to {1}".format(sbegin, send)

    # Maths
    grpd['win'] = grpd['radiant_win'] + grpd['dire_win']
    grpd['total'] = grpd['radiant_total'] + grpd['dire_total']

    grpd['radiant_win_pct'] = 100.0 * grpd['radiant_win'] / grpd[
        'radiant_total']
    grpd['dire_win_pct'] = 100.0 * grpd['dire_win'] / grpd['dire_total']
    grpd['win_pct'] = 100.0 * grpd['win'] / grpd['total']
    grpd = grpd.fillna(0)

    grpd = grpd[cols]

    return grpd
Пример #2
0
def main():
    """Main program execution"""

    parser = argparse.ArgumentParser(description='Update table containing '
                                     'record count by hour.')
    parser.add_argument("horizon_days", type=int)
    opts = parser.parse_args()

    engine, session = connect_database()
    rows = fetch_rows(opts.horizon_days, engine)
    print("Records: {}".format(rows.rowcount))

    times = []
    match_ids = []
    skills = []
    for row in rows:
        # Round off timestamps to the nearest hour, create data frame and
        # aggregate on counts.
        round_time = dt.datetime.fromtimestamp(
            row.start_time).strftime("%Y-%m-%dT%H:00")
        times.append(
            int(
                dt.datetime.strptime(round_time,
                                     "%Y-%m-%dT%H:%M").timestamp()))
        match_ids.append(row.match_id)
        skills.append(row.api_skill)

    df_matches = pd.DataFrame({
        'date_hour': times,
        'skill': skills,
        'match_ids': match_ids
    })
    summary = df_matches.groupby(["date_hour", "skill"]).count()
    summary.reset_index(inplace=True)

    # Write to database, overwriting old records
    for _, row in summary.iterrows():

        fetch = FetchSummary()
        fetch.date_hour_skill = "{0:10d}_{1}".format(row['date_hour'],
                                                     row['skill'])
        fetch.rec_count = row['match_ids']
        session.merge(fetch)
    session.commit()
Пример #3
0
    def test_win_rate_pick_rate(self):
        """Test code to calculate win rate vs. pick rate tables"""

        win_rate_pick_rate.main(days=1, skill=1)

        engine, _ = db_util.connect_database()
        stmt = "select * from dota_hero_win_rate"

        df_out = pd.read_sql(stmt, engine)

        # Integrity checks
        summary = df_out.sum()
        self.assertEqual(
            summary['radiant_win'] + summary['dire_win'],
            25)
        self.assertEqual(summary['radiant_total'], summary['dire_total'])
        self.assertEqual(
            50,
            sum(df_out[['radiant_total', 'dire_total']].sum(axis=1)))
Пример #4
0
def main():
    """Main entry point. """

    # Parse command line
    heroes, skill = parse_command_line()

    # Database connection
    engine, session = connect_database()

    # Populate dictionary with matches we already have within
    # INITIAL_HORIZON (don't refetch there)

    # Get UTC timestamps spanning HORIZON_DAYS ago to today
    start_time = int((dt.datetime.utcnow() -
                      dt.timedelta(days=INITIAL_HORIZON)).timestamp())
    end_time = int(dt.datetime.utcnow().timestamp())

    with engine.connect() as conn:
        stmt = "select start_time, match_id from dota_matches where " \
               "start_time>={} and start_time<={};".format(start_time, end_time)
        rows1 = conn.execute(stmt)

    count = 0
    for row in rows1:
        MATCH_IDS[row.match_id] = row.start_time
        count += 1
    print("Records to seed MATCH_IDS 1: {}".format(count))

    # Main loop over heroes. Create the thread pool now to prevent constant
    # creation and destruction of threads. Also, destroy database connection
    # in between heroes just in case something hangs.
    executor = futures.ThreadPoolExecutor(max_workers=int(NUM_THREADS))
    counter = 1

    for hero in heroes:
        log.info("---------------------------------------------------------")
        log.info(">>>>>>>> Hero: %s %d/%d Skill: %d <<<<<<<<",
                 meta.HERO_DICT[hero], counter, len(heroes), skill)
        log.info("---------------------------------------------------------")
        fetch_matches(session, hero, skill, executor)
        counter += 1
        session.commit()
Пример #5
0
def main(days, skill):
    """Main entry point"""

    text, begin, end = dotautil.TimeMethods.get_hour_blocks(
        db_util.get_max_start_time(), int(days * 24))

    # Get database connection
    engine, _ = db_util.connect_database()

    with engine.connect() as conn:
        for ttime, btime, etime in zip(text, begin, end):
            stmt = "select radiant_win, radiant_heroes, dire_heroes from " \
                   "dota_matches where start_time>={0} and start_time<={1}"\
                   " and api_skill={2};".format(btime, etime, skill)
            matches = conn.execute(stmt)

            log.info("Skill level: %d Time: %s Count: %d", skill, ttime,
                     matches.rowcount)

            df_hero = parse_records(matches)
            write_to_database(df_hero, skill, etime)
Пример #6
0
    def setUpClass(cls):
        """Setup database for testing"""

        # Bail if we're not in development...
        if os.environ['DOTA_DB_URI'].split("/")[-1] != "dota_dev":
            raise NotImplementedError("Must be on development environment!")

        # Create and upgrade database
        db_util.create_database()
        os.system("alembic upgrade head >/dev/null 2>&1")

        cls.engine, cls.session = db_util.connect_database()

        # Populate tables
        filename = os.path.join("testing", "test_database.txt")
        with open(filename, "r") as filehandle:
            db_txt = filehandle.read()

        conn = cls.engine.connect()
        for stmt in db_txt.split("\n"):
            conn.execute(stmt)
Пример #7
0
def write_to_database(summary, skill, end_time):
    """Update win rate data in database"""

    rows = []
    engine, _ = db_util.connect_database()

    # Coerce to integers
    summary = summary.astype('int')

    for _, row in summary.iterrows():
        time_hero_skill = "{0}_H{1:03}_S{2}".format(end_time, row['hero'],
                                                    skill)

        rows.append(
            (time_hero_skill, end_time, row['hero'], skill, row['radiant_win'],
             row['radiant_total'], row['dire_win'], row['dire_total']))

    conn = engine.raw_connection()
    cursor = conn.cursor()
    stmt = "REPLACE INTO dota_hero_win_rate VALUES (%s, %s, %s, %s, %s, %s, " \
           "%s, %s)"
    cursor.executemany(stmt, rows)
    conn.commit()
Пример #8
0
def get_health_summary(days, timezone, hour=True):
    """Returns a Pandas dataframe summarizing number of matches processed
    over an interval of days. Defaults to by hour, can we chaned to daily
    view through use of optional `hour` argument`.
    """

    # Database connection
    engine, _ = connect_database()

    # Get TZ offsets, do everything relative to current TZ offset
    local_tz = pytz.timezone(timezone)
    utc_offset = local_tz.utcoffset(dt.datetime.now())
    utc_hour = int(utc_offset.total_seconds() / 3600)

    now = dt.datetime.utcnow()
    now = now + utc_offset

    # Create a blank dataframe for the time range of interest, starting with
    # times.
    if hour:
        now_hour = dt.datetime(now.year, now.month, now.day, now.hour, 0, 0)
        times = [
            isoformat_with_tz(now_hour - dt.timedelta(hours=i), utc_hour)
            for i in range(24 * days)
        ]
    else:
        now_day = dt.datetime(now.year, now.month, now.day, 0, 0, 0)
        times = [
            isoformat_with_tz(now_day - dt.timedelta(days=i), utc_hour)
            for i in range(days)
        ]

    # Blank dataframe, one entry for each skill level...
    df_blank = pd.DataFrame(index=times,
                            data={
                                1: [0] * len(times),
                                2: [0] * len(times),
                                3: [0] * len(times),
                            })

    # Fetch from database
    begin = int((dt.datetime.utcnow() - dt.timedelta(days=days)).timestamp())
    begin = str(begin) + "_0"
    stmt = "select date_hour_skill, rec_count from dota_fetch_summary "
    stmt += "where date_hour_skill>='{}'"
    rows = pd.read_sql_query(stmt.format(begin), engine)

    if len(rows) == 0:
        df_summary = df_blank
    else:
        # Split out time and skills
        rows['time'] = [(int(t.split("_")[0]))
                        for t in rows['date_hour_skill']]
        rows['skill'] = [(int(t.split("_")[1]))
                         for t in rows['date_hour_skill']]

        # Apply UTC offset
        rows['time_local'] = rows['time'] + utc_hour * 3600
        rows['time_local_rnd'] = [
            dotautil.TimeMethods.get_time_nearest(t, hour=hour)[0]
            for t in rows['time_local']
        ]

        df_summary = rows[["time_local_rnd", "skill", "rec_count"]]
        df_summary = df_summary.groupby(["time_local_rnd", "skill"]).sum()
        df_summary.reset_index(inplace=True)
        df_summary = df_summary.pivot(index='time_local_rnd',
                                      columns='skill',
                                      values='rec_count')

        dt2 = [
            dt.datetime.utcfromtimestamp(float(t)) for t in df_summary.index
        ]
        dt3 = [isoformat_with_tz(t, utc_hour) for t in dt2]
        df_summary.index = dt3

        # Add them together
        df_summary = df_blank.add(df_summary, fill_value=0)

    # Rename columns
    df_summary = df_summary[[1, 2, 3]]
    df_summary.columns = ['normal', 'high', 'very_high']
    df_summary = df_summary.sort_index(ascending=False)

    # For summary table
    rows = zip(df_summary.index, df_summary['normal'].values,
               df_summary['high'].values, df_summary['very_high'].values)

    return df_summary, rows