Exemplo n.º 1
0
def read_data(bench_dir, datasets):

    bench = Benchmark(bench_dir, cache=False)
    n_configs = bench.get_number_of_configs(datasets[0])
    data = [
        bench.query(dataset_name=d, tag="Train/val_accuracy", config_id=ind)
        for d in datasets for ind in range(n_configs)
    ]
    configs = [
        bench.query(dataset_name=d, tag="config", config_id=ind)
        for d in datasets for ind in range(n_configs)
    ]
    dataset_names = [d for d in datasets for ind in range(n_configs)]

    y = np.array([curve[-1] for curve in data])
    return np.array(configs), y, np.array(dataset_names)
def read_data(bench_dir, dataset_name):
    bench = Benchmark(bench_dir, cache=False)
    dataset_name = 'Fashion-MNIST'
    n_configs = bench.get_number_of_configs(dataset_name)
    # Query API
    data = []
    for config_id in range(n_configs):
        data_point = dict()
        data_point["config"] = bench.query(dataset_name=dataset_name,
                                           tag="config",
                                           config_id=config_id)
        for tag in bench.get_queriable_tags(dataset_name=dataset_name,
                                            config_id=config_id):
            if tag.startswith("Train/"):
                data_point[tag] = bench.query(dataset_name=dataset_name,
                                              tag=tag,
                                              config_id=config_id)
        data.append(data_point)

    # Split: 50% train, 25% validation, 25% test (the data is already shuffled)
    indices = np.arange(n_configs)
    ind_train = indices[0:int(np.floor(0.5 * n_configs))]
    ind_val = indices[int(np.floor(0.5 *
                                   n_configs)):int(np.floor(0.75 * n_configs))]
    ind_test = indices[int(np.floor(0.75 * n_configs)):]

    array_data = np.array(data)
    train_data = array_data[ind_train]
    val_data = array_data[ind_val]
    test_data = array_data[ind_test]

    # Cut curves for validation and test
    cut_position = 11
    val_data, val_targets = cut_data(val_data, cut_position)
    test_data, test_targets = cut_data(test_data, cut_position)
    train_data, train_targets = cut_data(
        train_data, 51)  # Cut last value as it is repeated

    return train_data, val_data, test_data, train_targets, val_targets, test_targets
    # predictions and true values have to be multiplied by 100 first
    predictions = predictions * 100
    trueVal = trueVal * 100

    mse = torch.mean((predictions - trueVal)**2)
    rmse = torch.sqrt(torch.mean((predictions - trueVal)**2))

    return mse, rmse


if __name__ == "__main__":
    print("------------- Let us predict some learning curves --------------")

    data_path = '/home/sambit/PROGRAMMING/DL_PROJECT/TEAM_WORK_FREIBURG/Extrapolation-of-Learning-Curves/DATA/fashion_mnist.json'
    data_root = Benchmark(data_dir=data_path)

    train_data, val_data, test_data, train_targets, val_targets, test_targets = read_data(
        data_root)

    print("Train:", len(train_data))
    print("Validation:", len(val_data))
    print("Test:", len(test_data))

    train_X, train_Y, val_X, val_Y, test_X, test_Y = get_data(
        train_data, val_data, test_data, train_targets, val_targets,
        test_targets)  # get the prepared data

    print(train_X.shape, train_Y.shape, val_X.shape, val_Y.shape)

    # get model and send to GPU
import os
import pandas as pd

# git clone [email protected]:automl/LCBench.git
# and install all requirements before
from api import Benchmark

# Download from https://ndownloader.figshare.com/files/21188598 and unzip
bench_dir = "data_2k_lw.json"
bench = Benchmark(bench_dir, cache=True)

path = "data/runs/mlp_results/"

dataset_names = bench.get_dataset_names()
openml_task_ids = bench.get_openml_task_ids()

os.mkdir(path)

for task in dataset_names:
    nrun = bench.get_number_of_configs(task)
    df = pd.DataFrame(
        [bench.query(dataset_name=task, tag="config", config_id=0)])
    df['final_val_accuracy'] = bench.query(dataset_name=task,
                                           tag="final_val_accuracy",
                                           config_id=0)
    df['final_test_accuracy'] = bench.query(dataset_name=task,
                                            tag="final_val_accuracy",
                                            config_id=0)
    df['final_val_balanced_accuracy'] = bench.query(dataset_name=task,
                                                    tag="final_val_accuracy",
                                                    config_id=0)