Пример #1
0
def generate_test_sets(trials,
                       N_test,
                       Delta,
                       d,
                       generator=MaxCallStochasticModel):
    """
    generate #trials test sets of given dimensions using the util func in aggregating
    :return: X_test_lists, y_test_list of specified dimensions; stacked into a single numpy array (trials, N,Delta*d / 1)
    """
    X_test_list = []
    y_test_list = []

    for _ in range(trials):
        X_test, y_test = generate_train_set(N_test, Delta, d, generator)
        X_test_list.append(X_test)
        y_test_list.append(y_test)

    return np.stack(X_test_list, axis=0), np.stack(y_test_list, axis=0)
Пример #2
0
## create logger

logger = generate_logger_MPI(LOGFILE, LOGLEVEL, rank)
logger.info(f"node with rank {rank} started")

if rank == 0:
    """
    executed by main MPI process 
    
    mpiexec -n <num_nodes> python -m mpi4py.futures mpi\mpi_bagging.py
    will create 1 dispatcher node with rank 0 and num_node-1 workers for the pool

    """

    ## generate Training set, Test set & V_0s
    X_train, y_train = generate_train_set(Config.N_train, Config.Delta,
                                          Config.d)
    X_test, y_test = generate_test_set(Config.N_test, Config.Delta, Config.d)

    V_0_train = generate_V_0(Config.N_train, Config.Delta, Config.d)
    V_0_test = generate_V_0(Config.N_test, Config.Delta, Config.d)

    logger.info(f"V_0_test = {V_0_test}")

    reference = create_GPR(Config.N_train)
    reference.fit(X_train, y_train)
    f_X = reference.predict(X_test)
    reference_error = normalized_error_VT(f_X, y_test, V_0_test)
    logger.info(f"reference error : {reference_error}")

    ## MPI execute
    results = []
    return train_and_evaluate(model, X_train, y_train,
                              DataContainer.X_test_list)


## init
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

logger = generate_logger_MPI(LOGFILE, LOGLEVEL, rank)
logger.info(f"node with rank {rank}/{size} started")

## let the main task create the train & testsets
if rank == 0:
    logger.info(f"creating train & testsets")
    DataContainer.X_train, DataContainer.y_train = generate_train_set(
        Config.N_train, Config.Delta, Config.d)
    DataContainer.X_test_list, DataContainer.y_test_list = generate_test_sets(
        Config.trials, Config.N_test, Config.Delta, Config.d)

## broadcast the required data to all nodes
# broadcast the numpy arrays separately for efficiency gains
# make broadcasts non blocking since the worker nodes are spawned at different times
# https://github.com/mpi4py/mpi4py/blob/70333ef76db05f643347b9880a05967891fb1eed/src/mpi4py/MPI/Comm.pyx#L750
# this feature is not documented in the documentation, but the source code clearly indicates it is present
xtrain_req = comm.Ibcast(DataContainer.X_train, root=0)
ytrain_req = comm.Ibcast(DataContainer.y_train, root=0)
ytest_req = comm.Ibcast(DataContainer.X_test_list, root=0)

if rank > 0:
    # want the broadcast to be blocking since we need the data before continuing
    logger.debug(f"waiting for broadcasts")