def download_data(api: numerapi.NumerAPI, keys): if int(keys['LATEST_ROUND']) == api.get_current_round(): return int(keys['LATEST_ROUND']) else: LATEST_ROUND = api.get_current_round() api.download_current_dataset('./data') return LATEST_ROUND
def test_get_current_round(): # don't use fixture here, create our own rounds api = NumerAPI(public_id='foo', secret_key='bar', manager=NumerMockManager()) api.manager.create_competition(number=1) current_round = api.get_current_round() assert current_round == 1 api.manager.create_competition(number=2) current_round = api.get_current_round() assert current_round == 2
def main(project_dir): logger = logging.getLogger(__name__) logger.info('Getting raw data') napi = NumerAPI() dataset_url = napi.get_dataset_url() round_number = napi.get_current_round() dataset_filename = '{}_numerai_raw.pkl'.format(round_number) raw_data_path = os.path.join(project_dir, 'data', 'raw') raw_data_file = os.path.join(raw_data_path, dataset_filename) if dataset_filename in [pkl for pkl in os.listdir(raw_data_path)]: logger.info("Dataset for round {} already downloaded as {}".format( round_number, dataset_filename)) else: logger.info("Downloading data for round {}".format(round_number)) df = download_dataset_as_df(dataset_url) logger.info('Data concatenated, downcasting data') df = df_to_numeric(df) logger.info('Data converted, saving to file') df.to_pickle(raw_data_file) logger.info("Dataset for round {} downloaded as {}".format( round_number, dataset_filename))
def download_raw_leaderboard(round_number=None, tournament=1): "Download leaderboard for given round number" query = ''' query($number: Int! $tournament: Int!) { rounds(number: $number tournament: $tournament) { leaderboard { username LiveLogloss paymentGeneral { nmrAmount usdAmount } paymentStaking { nmrAmount usdAmount } stake { value } stakeResolution { destroyed } } } } ''' napi = NumerAPI(verbosity='warn') if round_number is None: round_number = napi.get_current_round() arguments = {'number': round_number, 'tournament': tournament} leaderboard = napi.raw_query(query, arguments) leaderboard = leaderboard['data']['rounds'][0]['leaderboard'] return leaderboard
def download_leaderboard(round1=None, round2=None, tournament=1): "Download leaderboard for specified round range." napi = NumerAPI(verbosity='warn') if round1 is None and round2 is None: r0 = napi.get_current_round(tournament=tournament) r1 = r0 elif round1 is None: r0 = napi.get_current_round(tournament=tournament) r1 = round2 elif round2 is None: r0 = round1 r1 = napi.get_current_round(tournament=tournament) else: r0 = round1 r1 = round2 for num in range(r0, r1 + 1): e = download_raw_leaderboard(round_number=num, tournament=tournament) e = raw_leaderboard_to_df(e, num) if num == r0: df = e else: df = pd.concat([df, e]) return df
def download_leaderboard(round_number=None, tournament=1): """ Download leaderboard for specified tournament and round. Default is to download current round. """ if round_number is None: napi = NumerAPI(verbosity='warn') num = napi.get_current_round(tournament=tournament) else: num = round_number df = download_raw_leaderboard(round_number=num, tournament=tournament) df = raw_leaderboard_to_df(df, num) return df
def load_data(round_number=False): napi = NumerAPI() if not round_number: round_number = napi.get_current_round() project_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) raw_data_path = os.path.join(project_dir, 'data', 'raw') raw_data_file = os.path.join(raw_data_path, '{}_numerai_raw.pkl'.format(round_number)) try: return pd.read_pickle(raw_data_file) except FileNotFoundError: get_raw_data.main(project_dir) return pd.read_pickle(raw_data_file)
def download_leaderboard(round_number=None, tournament=1): """ Download leaderboard for specified tournament and round. Default is to download current round. """ tournament = nx.tournament_int(tournament) if round_number is None: napi = NumerAPI(verbosity='warn') num = napi.get_current_round() else: num = round_number df = download_raw_leaderboard(round_number=num, tournament=tournament) df = raw_leaderboard_to_df(df, num) df.insert(1, 'tournament', tournament) cols = ['usd_main', 'usd_stake', 'nmr_main', 'nmr_stake', 'nmr_burn'] d = df[cols] total = d.abs().sum().sum() if total == 0: resolved = False else: resolved = True df.insert(2, 'resolved', resolved) return df
load_model, neutralize, get_biggest_change_features, validation_metrics, ERA_COL, DATA_TYPE_COL, TARGET_COL, EXAMPLE_PREDS_COL ) # download all the things napi = NumerAPI() current_round = napi.get_current_round() # Tournament data changes every week so we specify the round in their name. Training # and validation data only change periodically, so no need to download them every time. print('Downloading dataset files...') Path("./v4").mkdir(parents=False, exist_ok=True) napi.download_dataset("v4/train.parquet") napi.download_dataset("v4/validation.parquet") napi.download_dataset("v4/live.parquet", f"v4/live_{current_round}.parquet") napi.download_dataset("v4/validation_example_preds.parquet") napi.download_dataset("v4/features.json") print('Reading minimal training data') # read the feature metadata and get a feature set (or all the features) with open("v4/features.json", "r") as f:
def get_current_round_number(tournament): "Current round number as an integer." napi = NumerAPI(verbosity='warn') cr = napi.get_current_round(tournament=tournament) return cr