def main(project_dir):
    logger = logging.getLogger(__name__)
    logger.info('Getting raw data')
    
    napi = NumerAPI()

    dataset_url = napi.get_dataset_url()
    round_number = napi.get_current_round()
    dataset_filename = '{}_numerai_raw.pkl'.format(round_number)
    raw_data_path = os.path.join(project_dir, 'data', 'raw')
    raw_data_file = os.path.join(raw_data_path, dataset_filename)

    if dataset_filename in [pkl for pkl in os.listdir(raw_data_path)]:
        logger.info("Dataset for round {} already downloaded as {}".format(
                        round_number, dataset_filename))
    else:
        logger.info("Downloading data for round {}".format(round_number))
        df = download_dataset_as_df(dataset_url)

        logger.info('Data concatenated, downcasting data')
        df = df_to_numeric(df)

        logger.info('Data converted, saving to file')
        df.to_pickle(raw_data_file)

        logger.info("Dataset for round {} downloaded as {}".format(
                        round_number, dataset_filename))
示例#2
0
def download(filename, load=True, n_tries=100, sleep_seconds=300,
             verbose=False):
    """
    Download current Numerai dataset; overwrites if file exists.

    If `load` is True (default) then return data object; otherwise return
    None.

    If download fails then retry download `n_tries` times, pausing
    `sleep_seconds` between each try.

    Unlike nx.download() this function loads and returns the data object.
    """
    # line below expands e.g. ~/tmp to /home/me/tmp...
    filename = os.path.expanduser(filename)
    count = 0
    while count < n_tries:
        try:
            if verbose:
                print("Download dataset {}".format(filename))
            napi = NumerAPI()
            url = napi.get_dataset_url(tournament=8)
            download_file(url, filename)
            break
        except: # noqa
            print('download failed')
            time.sleep(sleep_seconds)
        count += 1
    if load:
        data = nx.load_zip(filename, verbose=verbose)
    else:
        data = None
    return data
示例#3
0
def download(filename, tournament=1, verbose=False):
    "Download the current Numerai dataset; overwrites if file exists"
    if verbose:
        print("Download dataset {}".format(filename))
    napi = NumerAPI()
    url = napi.get_dataset_url(tournament=tournament)
    filename = os.path.expanduser(filename)  # expand ~/tmp to /home/...
    download_file(url, filename)