def download(filename, load=True, n_tries=100, sleep_seconds=300, verbose=False): """ Download current Numerai dataset; overwrites if file exists. If `load` is True (default) then return data object; otherwise return None. If download fails then retry download `n_tries` times, pausing `sleep_seconds` between each try. Unlike nx.download() this function loads and returns the data object. """ # line below expands e.g. ~/tmp to /home/me/tmp... filename = os.path.expanduser(filename) count = 0 while count < n_tries: try: if verbose: print("Download dataset {}".format(filename)) napi = NumerAPI() url = napi.get_dataset_url(tournament=8) download_file(url, filename) break except: # noqa print('download failed') time.sleep(sleep_seconds) count += 1 if load: data = nx.load_zip(filename, verbose=verbose) else: data = None return data
def upload(filename, tournament, public_id, secret_key, block=True, n_tries=100, sleep_seconds=60, verbose=False): """ Upload tournament submission (csv file) to Numerai. If upload fails then retry upload `n_tries` times, pausing `sleep_seconds` between each try. If block is True (default) then the scope of your token must be both upload_submission and read_submission_info. If block is False then only upload_submission is needed. """ tournament = nx.tournament_int(tournament) count = 0 while count < n_tries: try: napi = NumerAPI(public_id=public_id, secret_key=secret_key, verbosity='warning') upload_id = napi.upload_predictions(filename, tournament=tournament) if block: status = status_block(upload_id, public_id, secret_key) else: status = upload_status(upload_id, public_id, secret_key) break except: # noqa print('upload failed') time.sleep(sleep_seconds) count += 1 return upload_id, status
def test_get_leaderboard_returns_empty_list(): # don't use fixture here, create our own competition api = NumerAPI(manager=NumerMockManager()) api.manager.create_competition(number=67) lb = api.get_leaderboard(67) assert isinstance(lb, list) assert not lb
def main(project_dir): logger = logging.getLogger(__name__) logger.info('Getting raw data') napi = NumerAPI() dataset_url = napi.get_dataset_url() round_number = napi.get_current_round() dataset_filename = '{}_numerai_raw.pkl'.format(round_number) raw_data_path = os.path.join(project_dir, 'data', 'raw') raw_data_file = os.path.join(raw_data_path, dataset_filename) if dataset_filename in [pkl for pkl in os.listdir(raw_data_path)]: logger.info("Dataset for round {} already downloaded as {}".format( round_number, dataset_filename)) else: logger.info("Downloading data for round {}".format(round_number)) df = download_dataset_as_df(dataset_url) logger.info('Data concatenated, downcasting data') df = df_to_numeric(df) logger.info('Data converted, saving to file') df.to_pickle(raw_data_file) logger.info("Dataset for round {} downloaded as {}".format( round_number, dataset_filename))
def download_raw_leaderboard(round_number=None, tournament=1): "Download leaderboard for given round number" query = ''' query($number: Int! $tournament: Int!) { rounds(number: $number tournament: $tournament) { leaderboard { username LiveLogloss paymentGeneral { nmrAmount usdAmount } paymentStaking { nmrAmount usdAmount } stake { value } stakeResolution { destroyed } } } } ''' napi = NumerAPI(verbosity='warn') if round_number is None: round_number = napi.get_current_round() arguments = {'number': round_number, 'tournament': tournament} leaderboard = napi.raw_query(query, arguments) leaderboard = leaderboard['data']['rounds'][0]['leaderboard'] return leaderboard
def download(filename, tournament=1, verbose=False): "Download the current Numerai dataset; overwrites if file exists" if verbose: print("Download dataset {}".format(filename)) napi = NumerAPI() url = napi.get_dataset_url(tournament=tournament) filename = os.path.expanduser(filename) # expand ~/tmp to /home/... download_file(url, filename)
def get_user_activities(user): "Activity of `user` across all rounds and tournaments as dataframe" napi = NumerAPI() data = [] for number, name in nx.tournament_iter(): data += napi.get_user_activities(user, number) flat = [flatten_dict(d) for d in data] df = pd.DataFrame.from_dict(flat) return df
def upload_status(upload_id, public_id, secret_key): "Dictionary containing the status of upload" napi = NumerAPI(public_id=public_id, secret_key=secret_key, verbosity='warning') status_raw = napi.submission_status(upload_id) status = {} for key, value in status_raw.items(): if isinstance(value, dict): value = value['value'] status[key] = value return status
def test_get_current_round(): # don't use fixture here, create our own rounds api = NumerAPI(public_id='foo', secret_key='bar', manager=NumerMockManager()) api.manager.create_competition(number=1) current_round = api.get_current_round() assert current_round == 1 api.manager.create_competition(number=2) current_round = api.get_current_round() assert current_round == 2
def round_resolution_date(tournament=1): "The date each round was resolved as a Dataframe." napi = NumerAPI(verbosity='warn') dates = napi.get_competitions(tournament=tournament) dates = pd.DataFrame(dates)[['number', 'resolveTime']] rename_map = {'number': 'round', 'resolveTime': 'date'} dates = dates.rename(rename_map, axis=1) date = dates['date'].tolist() date = [d.date() for d in date] dates['date'] = date dates = dates.set_index('round') dates = dates.sort_index() return dates
def downloadNumeraiData(): # set up paths for download of dataset and upload of predictions dataset_parent_folder = "./datasets/" # We don't need to login in order to download the dataset napi = NumerAPI(verbosity="info") # download current dataset napi.download_current_dataset(dest_path=dataset_parent_folder, unzip=True) sp.call("mv " + dataset_parent_folder + "/*.zip ZipFiles/", shell=True) sp.call("rm " + dataset_parent_folder + "/*/example*")
def load_data(round_number=False): napi = NumerAPI() if not round_number: round_number = napi.get_current_round() project_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) raw_data_path = os.path.join(project_dir, 'data', 'raw') raw_data_file = os.path.join(raw_data_path, '{}_numerai_raw.pkl'.format(round_number)) try: return pd.read_pickle(raw_data_file) except FileNotFoundError: get_raw_data.main(project_dir) return pd.read_pickle(raw_data_file)
def test_get_competitions(): # don't use fixtures here, create our own competitions api = NumerAPI(manager=NumerMockManager()) all_competitions = api.get_competitions() assert isinstance(all_competitions, list) assert not all_competitions round_number = 42 api.manager.create_competition(number=round_number) all_competitions = api.get_competitions() assert isinstance(all_competitions, list) assert len(all_competitions) == 1 assert all_competitions[0]['number'] == round_number
def get_user_names(): "A list containing all Numerai users, past and present." q = ''' query { rankings(limit:100000, offset:0) { username } } ''' napi = NumerAPI() users = napi.raw_query(q) users = [x['username'] for x in users['data']['rankings']] return users
def download_leaderboard(round_number=None, tournament=1): """ Download leaderboard for specified tournament and round. Default is to download current round. """ if round_number is None: napi = NumerAPI(verbosity='warn') num = napi.get_current_round(tournament=tournament) else: num = round_number df = download_raw_leaderboard(round_number=num, tournament=tournament) df = raw_leaderboard_to_df(df, num) return df
def upload(filename, public_id, secret_key, tournament=1, block=True): """ Upload tournament submission (csv file) to Numerai. If block is True (default) then the scope of your token must be both upload_submission and read_submission_info. If block is False then only upload_submission is needed. """ napi = NumerAPI(public_id=public_id, secret_key=secret_key, verbosity='warning') upload_id = napi.upload_predictions(filename, tournament=tournament) if block: status = status_block(upload_id, public_id, secret_key) else: status = upload_status(upload_id, public_id, secret_key) return upload_id, status
def round_dates(): "The dates each round was opened and resolved as a Dataframe." napi = NumerAPI(verbosity='warn') dates = napi.get_competitions(tournament=1) dates = pd.DataFrame(dates)[['number', 'openTime', 'resolveTime']] rename_map = {'number': 'round', 'openTime': 'open', 'resolveTime': 'resolve'} dates = dates.rename(rename_map, axis=1) for item in ('open', 'resolve'): date = dates[item].tolist() date = [d.date() for d in date] dates[item] = date dates = dates.set_index('round') dates = dates.sort_index() return dates
def download_leaderboard(round1=None, round2=None, tournament=1): "Download leaderboard for specified round range." napi = NumerAPI(verbosity='warn') if round1 is None and round2 is None: r0 = napi.get_current_round(tournament=tournament) r1 = r0 elif round1 is None: r0 = napi.get_current_round(tournament=tournament) r1 = round2 elif round2 is None: r0 = round1 r1 = napi.get_current_round(tournament=tournament) else: r0 = round1 r1 = round2 for num in range(r0, r1 + 1): e = download_raw_leaderboard(round_number=num, tournament=tournament) e = raw_leaderboard_to_df(e, num) if num == r0: df = e else: df = pd.concat([df, e]) return df
def download_leaderboard(round_number=None, tournament=1): """ Download leaderboard for specified tournament and round. Default is to download current round. """ tournament = nx.tournament_int(tournament) if round_number is None: napi = NumerAPI(verbosity='warn') num = napi.get_current_round() else: num = round_number df = download_raw_leaderboard(round_number=num, tournament=tournament) df = raw_leaderboard_to_df(df, num) df.insert(1, 'tournament', tournament) cols = ['usd_main', 'usd_stake', 'nmr_main', 'nmr_stake', 'nmr_burn'] d = df[cols] total = d.abs().sum().sum() if total == 0: resolved = False else: resolved = True df.insert(2, 'resolved', resolved) return df
from utils import ( save_model, load_model, neutralize, get_biggest_change_features, validation_metrics, ERA_COL, DATA_TYPE_COL, TARGET_COL, EXAMPLE_PREDS_COL ) # download all the things napi = NumerAPI() current_round = napi.get_current_round() # Tournament data changes every week so we specify the round in their name. Training # and validation data only change periodically, so no need to download them every time. print('Downloading dataset files...') Path("./v4").mkdir(parents=False, exist_ok=True) napi.download_dataset("v4/train.parquet") napi.download_dataset("v4/validation.parquet") napi.download_dataset("v4/live.parquet", f"v4/live_{current_round}.parquet") napi.download_dataset("v4/validation_example_preds.parquet") napi.download_dataset("v4/features.json") print('Reading minimal training data')
def getapi(): return NumerAPI(getenv('NUMERAI_ID'), getenv('NUMERAI_SECRET'))
def download_data(): data_archive = NumerAPI().download_current_dataset(dest_path='../../tmp', unzip=False) with zipfile.ZipFile(data_archive, "r") as zip_ref: zip_ref.extractall("../../kazutsugi/datasets")
def fixture_for_api(): mock_manager = NumerMockManager() mock_manager.create_competition(0, resolved=False) return NumerAPI(public_id='foo', secret_key='bar', manager=mock_manager)
def get_stakes(round_number=None, tournament=1, sort_by='prize pool', mark_user=None, use_integers=True): """ Download stakes, modify it to make it more useful, return as dataframe. cumsum is dollars ABOVE you. """ # get raw stakes napi = NumerAPI() query = ''' query stakes($number: Int! $tournament: Int!){ rounds(number: $number tournament: $tournament){ leaderboard { username stake { insertedAt soc confidence value } } } } ''' if round_number is None: round_number = 0 elif round_number < 61: raise ValueError('First staking was in round 61') arguments = {'number': round_number, 'tournament': tournament} stakes = napi.raw_query(query, arguments) # massage raw stakes stakes = stakes['data']['rounds'][0]['leaderboard'] stakes2 = [] strptime = datetime.datetime.strptime now = datetime.datetime.utcnow() secperday = 24 * 60 * 60 micperday = 1000000 * secperday for s in stakes: user = s['username'] s = s['stake'] if s['value'] is not None: s2 = {} s2['user'] = user s2['s'] = float(s['value']) s2['c'] = decimal.Decimal(s['confidence']) s2['soc'] = float(s['soc']) t = now - strptime(s['insertedAt'], '%Y-%m-%dT%H:%M:%S.%fZ') d = t.days d += 1.0 * t.seconds / secperday d += 1.0 * t.microseconds / micperday s2['days'] = d stakes2.append(s2) stakes = stakes2 # jam stakes into a dataframe stakes = pd.DataFrame(stakes) stakes = stakes[['days', 's', 'soc', 'c', 'user']] # remove C=0 stakers c_zero_users = stakes.user[stakes.c == 0].tolist() stakes = stakes[stakes.c != 0] # index by user stakes = stakes.set_index('user') # sort in prize pool order; add s/c cumsum stakes = stakes.sort_values(['c', 'days'], axis=0, ascending=[False, False]) cumsum = stakes.soc.cumsum(axis=0) - stakes.soc # dollars above you stakes.insert(3, 'cumsum', cumsum) # other sorting if sort_by == 'prize pool': pass elif sort_by == 'c': stakes = stakes.sort_values(['c'], ascending=[False]) elif sort_by == 's': stakes = stakes.sort_values(['s'], ascending=[False]) elif sort_by == 'soc': stakes = stakes.sort_values(['soc'], ascending=[False]) elif sort_by == 'days': stakes = stakes.sort_values(['days'], ascending=[True]) elif sort_by == 'user': stakes = stakes.sort_values(['user'], ascending=[True]) else: raise ValueError("`sort_by` key not recognized") # round stakes if use_integers: stakes['days'] = stakes['days'].round(4) stakes['s'] = stakes['s'].astype(int) stakes['soc'] = stakes['soc'].astype(int) stakes['cumsum'] = stakes['cumsum'].astype(int) # mark user if mark_user is not None and mark_user in stakes.index: stakes['mark'] = '' me = stakes.loc[mark_user]['days'] idx = stakes.days < me stakes.loc[idx, 'mark'] = 'new' stakes.loc[mark_user, 'mark'] = '<<<<' return stakes, c_zero_users
def get_stakes_minimal(round_number=None, tournament=1, mark_user=None): "Download stakes, modify it to make it more useful, return as dataframe." tournament = nx.tournament_int(tournament) # get raw stakes napi = NumerAPI() query = ''' query stakes($number: Int! $tournament: Int!){ rounds(number: $number tournament: $tournament){ leaderboard { username stake { insertedAt soc confidence value } } } } ''' if round_number is None: round_number = 0 elif round_number < 61: raise ValueError('First staking was in round 61') arguments = {'number': round_number, 'tournament': tournament} stakes = napi.raw_query(query, arguments) # massage raw stakes stakes = stakes['data']['rounds'][0]['leaderboard'] stakes2 = [] strptime = datetime.datetime.strptime now = datetime.datetime.utcnow() secperday = 24 * 60 * 60 micperday = 1000000 * secperday for s in stakes: user = s['username'] s = s['stake'] if s['value'] is not None: s2 = {} s2['user'] = user s2['s'] = float(s['value']) s2['c'] = decimal.Decimal(s['confidence']) s2['soc'] = float(s['soc']) t = now - strptime(s['insertedAt'], '%Y-%m-%dT%H:%M:%S.%fZ') d = t.days d += 1.0 * t.seconds / secperday d += 1.0 * t.microseconds / micperday s2['days'] = d stakes2.append(s2) stakes = stakes2 # jam stakes into a dataframe stakes = pd.DataFrame(stakes) stakes = stakes[['days', 's', 'soc', 'c', 'user']] # index by user stakes = stakes.set_index('user') # sort in prize pool order stakes = stakes.sort_values(['c', 'days'], axis=0, ascending=[False, False]) # mark user if mark_user is not None and mark_user in stakes.index: stakes['mark'] = '' me = stakes.loc[mark_user]['days'] idx = stakes.days < me stakes.loc[idx, 'mark'] = 'new' stakes.loc[mark_user, 'mark'] = '<<<<' return stakes
from numerapi import NumerAPI id = "OML65REYFDPC5O7N22XCRP44BG2M74XH" key = "YSTL455VERL7WZ4D7OQ6XEYEQN2MRCCICBMILNFP3DUZC4MSAS2WSH2MV7ED6WB3" api = NumerAPI(public_id=id,secret_key=key) base_path = "../../kazutsugi/submissions/" path = base_path + 'kazutsugi' + "_submission.csv" print('uploading') api.upload_predictions(path)
def get_current_round_number(tournament): "Current round number as an integer." napi = NumerAPI(verbosity='warn') cr = napi.get_current_round(tournament=tournament) return cr
def upload(filename, tournament, public_id, secret_key, block=True, n_tries=100, sleep_seconds=60, verbose=False, model_id=None): """ Upload tournament submission (csv file) to Numerai. Accounts with multiple models must specify model_id If upload fails then retry upload `n_tries` times, pausing `sleep_seconds` between each try. If block is True (default) then the scope of your token must be both upload_submission and read_submission_info. If block is False then only upload_submission is needed. """ tournament = nx.tournament_int(tournament) count = 0 napi = NumerAPI(public_id=public_id, secret_key=secret_key, verbosity='warning') models = napi.get_models() if len(models) > 1 and model_id is None: raise Exception( f"Account has multiple models - you must specify model_id from {models}" ) elif model_id and model_id not in models.values(): raise Exception( f"Specified model_id {model_id} not found in account models {models}" ) while count < n_tries: try: upload_id = napi.upload_predictions(filename, tournament=tournament, model_id=model_id) if block: status = status_block(upload_id, public_id, secret_key, model_id=model_id) else: status = upload_status(upload_id, public_id, secret_key, model_id=model_id) break except Exception as e: # noqa if str(e).startswith("Can't update submission after deadline"): # Bailout with error message and do not retry uploads raise Exception(e) else: print('Upload exception - %s' % e) time.sleep(sleep_seconds) count += 1 else: raise Exception('Upload failed after reaching max retries') return upload_id, status
# import dependencies from numerapi import NumerAPI from os import environ, path, getcwd from yaml import safe_load # Load your API keys and model from config.yml with open("config.yml", "r") as yml: numerai_conf = safe_load(yml) # Set your API keys and model_id public_id = numerai_conf["public_id"] if numerai_conf["public_id"] is not None else environ['NUMERAI_PUBLIC_ID'] secret_key = numerai_conf["secret_key"] if numerai_conf["secret_key"] is not None else environ['NUMERAI_SECRET_KEY'] model_id = numerai_conf["model_id"] if numerai_conf["model_id"] is not None else environ['NUMERAI_MODEL_ID'] napi = NumerAPI(public_id=public_id, secret_key=secret_key, verbosity="info") current_round = napi.get_current_round() dest_path = numerai_conf["dest_path"] if numerai_conf["dest_path"] is not None else environ['NUMERAI_DEST_PATH'] if not dest_path: dest_path=environ['PWD'] path_numerai_dataset = f'{dest_path}/numerai_dataset_{current_round}' # Download and unzip the tournament dataset of current round if not path.isdir(path_numerai_dataset): napi.download_current_dataset(dest_path=dest_path, unzip=True) # Upload example_predictions.csv submission_id = napi.upload_predictions(f'{path_numerai_dataset}/example_predictions.csv', model_id=model_id)