def get_si2_sg2() -> Tuple[pd.DataFrame, pd.DataFrame]: try: si2 = pkg.load_dataset('si2') sg2 = pkg.load_dataset('sg2') except: st2 = get_st2() si2, sg2 = label.index_games(parse.index_games(st2)) pkg.save_dataset(si2, 'si2') pkg.save_dataset(sg2, 'sg2') return si2, sg2
def get_setups( ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: try: db_setups = pkg.load_dataset('db_setups') game_setups = pkg.load_dataset('game_setups') free_setups = pkg.load_dataset('free_setups') null_setups = pkg.load_dataset('null_setups') except: st2 = games.get_st2() si2, _ = games.get_si2_sg2() free_setups = si2.query('type == "free"') null_setups = si2.query('field_content.isnull()') game_setups = ( si2.query('type != "free" & field_content.notnull()').assign( setup_str_red=lambda r: r.field_content.str[:40], setup_str_blue=lambda r: r.field_content.str[60:].str[::-1], dmz=lambda r: r.field_content.str[40:60])) assert (game_setups.dmz == 'AA__AA__AAAA__AA__AA').all() db_setups = (pd.wide_to_long( st2.merge(game_setups).drop( columns=['player_red', 'player_blue', 'dmz']), stubnames=['setup_str'], i='gid', j='player', sep='_', suffix='(red|blue)').reset_index().assign( result=lambda r: np.where( r.player == r.winner, 'win', np.where(r.winner == 'draw', 'draw', 'loss')), score=lambda r: np.where( r.result == 'win', 1.0, np.where(r.result == 'draw', 0.5, 0.0)), setup_str=lambda r: r.setup_str.apply(strados2.decode_setup), setup_obj=lambda r: r.apply( lambda x: Setup(x.setup_str, x.type), axis=1) ).astype( dtype={ 'result': pd.CategoricalDtype(categories=['win', 'draw', 'loss']) } ).pipe(lambda df: pd.concat( [df, pd.get_dummies(df.result, prefix='', prefix_sep='')], axis=1 )).loc[:, [ 'gid', 'filename', 'period', 'freq', 'ext', 'type', 'player', 'result', 'win', 'draw', 'loss', 'score', 'ending', 'num_moves', 'num_turns', 'next_move', 'setup_str', 'setup_obj' ]].pipe(label.setups).sort_values(['gid', 'player' ]).reset_index(drop=True)) assert all(db_setups.setup_obj.apply(lambda x: x.ok())) pkg.save_dataset(db_setups, 'db_setups') pkg.save_dataset(game_setups, 'game_setups') pkg.save_dataset(free_setups, 'free_setups') pkg.save_dataset(null_setups, 'null_setups') return db_setups, game_setups, free_setups, null_setups
def get_daily(): try: last = dt.date(*map( int, sorted(os.listdir(pkg.daily_dir))[-1].split('.')[0].split('-') [1:])) results = pkg.load_dataset('results') assert last == dt.date(*map(int, results.date.max().split('-'))) start = last + dt.timedelta(days=1) except: results = pd.DataFrame() start = dt.date(2003, 6, 1) filenames = scrape.results(start, pd.to_datetime('today').date(), pkg.daily_dir) try: update = (pd.concat( [ pd.read_csv(os.path.join(pkg.daily_dir, filename)) for filename in tqdm(filenames) ], ignore_index=True).rename(columns=lambda c: c.lower()).rename( columns=lambda c: c.replace(' ', '_')).rename( columns={ '#_of_turns': 'num_turns' }).loc[:, [ 'date', 'time', 'type', 'player_red', 'player_blue', 'result', 'ending', 'num_turns', 'duration' ]]) except: update = pd.DataFrame() if not update.empty: results = results.append(update, ignore_index=True) pkg.save_dataset(results, 'results') return results, update
def get_txt_files() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: zip_files, _ = get_zip_files() txt_files_packed = unpack.infolist(pkg.zip_dir, zip_files) try: txt_files_cached = pkg.load_dataset('txt_files') except: os.makedirs(pkg.txt_dir) txt_files_cached = pd.DataFrame(columns=txt_files_packed.columns.values) assert sorted(os.listdir(pkg.txt_dir)) == sorted(txt_files_cached.filename) txt_queue = (pd .merge( txt_files_packed, txt_files_cached, how='outer', indicator=True, validate='one_to_one' ) .query('_merge == "left_only"') .drop(columns='_merge') ) zip_queue = txt_queue.drop_duplicates('name') unpack.extract(pkg.zip_dir, zip_queue, pkg.txt_dir) unpacked, txt_files = txt_queue.loc[:, ['filename']], txt_files_packed if not unpacked.empty: pkg.save_dataset(txt_files, 'txt_files') assert sorted(os.listdir(pkg.txt_dir)) == sorted(txt_files.filename) repaired = repair.directory(pkg.txt_dir, unpacked) return txt_files, unpacked, repaired
def get_st2() -> pd.DataFrame: try: st2 = pkg.load_dataset('st2') except: st2 = (games .get_txt_files()[0] .pipe(parse.txt_files) .pipe(label.txt_files) ) pkg.save_dataset(st2, 'st2') return st2
def get_zip_files() -> Tuple[pd.DataFrame, pd.DataFrame]: zip_files_remote = scrape.list_directory_contents_recursive(pkg.strados2_url) try: zip_files_cached = pkg.load_dataset('zip_files') except: os.makedirs(pkg.zip_dir) zip_files_cached = pd.DataFrame(columns=zip_files_remote.columns.values) assert sorted(os.listdir(pkg.zip_dir)) == sorted(zip_files_cached.name) zip_queue = (pd .merge( zip_files_remote, zip_files_cached, how='outer', indicator=True, validate='one_to_one' ) .query('_merge == "left_only"') .drop(columns='_merge') ) scraped, zip_files = scrape.mirror_no_directories(pkg.zip_dir, '*.zip', zip_queue), zip_files_remote if not scraped.empty: pkg.save_dataset(zip_files, 'zip_files') assert sorted(os.listdir(pkg.zip_dir)) == sorted(zip_files.name) return zip_files, scraped
# Copyright Rein Halbersma 2018-2021. # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) import datetime as dt import os import pandas as pd from tqdm import tqdm import gravon.package as pkg import gravon.transform.label as label import gravon.transform.tidy as tidy results = pkg.load_dataset('results') sr2 = label.results(results) pkg.save_dataset(sr2, 'sr2') all_results = (tidy .results(raw_results) .query('player_red.notnull() & player_blue.notnull()') ) # There have been 798K matches. all_results.shape[0] # There have been almost 70K players. all_players = tidy.players(all_results) all_players.player.unique().shape[0]
# Copyright Rein Halbersma 2018-2021. # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) import pandas as pd import gravon.package as pkg import gravon.pattern as pattern import gravon.archive as archive ss2 = pkg.load_dataset('ss2').query('type == "classic" & period >= "2006-09"') setup = (""" 55X....... .......... .......... .......... """) games = pattern.match(ss2, setup) pd.crosstab(games.player, games.match_type, margins=True) archive.make_zip(games.filename.tolist(), 'constrictor')