示例#1
0
def evaluate(test_job,
             autoencoder,
             reg,
             lods,
             observed_traces,
             observed_traces_slices,
             out_data,
             test_aliases=[]):
    # Get the trace(s) for this specific test job
    idx = observed_traces_slices[test_job]
    trace = observed_traces.a[idx], observed_traces.X[idx], \
        observed_traces.Y[idx]

    # Extract encoding from given trace(s) and trained autoencoder
    extract_and_set_encoding(autoencoder, trace)
    proxy = extract_encoding_and_map_to_nearest(autoencoder, trace,
                                                lods.alias_to_id, test_aliases)
    out_data['mappings'][test_job].append(proxy)

    # Test data (without encodings)
    test = lods.test
    slices_test = test.slice_by_job_id(lods.alias_to_id)
    idxs_test = slices_test[test_job]
    X_test = np.hstack([test.a, test.X])[idxs_test, :]
    y_test = test.targets.ravel()[idxs_test]

    # Test data for regressor prediction (without mapping)
    X_test_, y_test_ = translate_X_y(X_test, y_test, autoencoder.centroids)

    # Test data for regressor prediction (with mapping)
    X_test__, y_test__ = translate_X_y(X_test, y_test,
                                       autoencoder.altered_centroids)

    reg_ = reg.clone()
    err = reg_.MAPE(X_test_, y_test_)
    logging.info("[Test job: {}] \t Error no-opt: {:.2f}".format(
        test_job, err))

    reg_ = reg.clone()
    assert np.abs(err - reg_.MAPE(X_test_, y_test_)) < 1e-10
    # Now let's use the data from mapping...
    err_mapping = reg_.MAPE(X_test__, y_test__)
    logging.info("[Test job: {}] \t Error map: {:.2f}".format(
        test_job, err_mapping))

    # err, err_mapping
    out_data['errs']['no-opt'][test_job].append(err)
    out_data['errs']['map'][test_job].append(err_mapping)
示例#2
0
def evaluate(test_job,
             autoencoder,
             reg,
             lods,
             observed_traces,
             observed_traces_slices,
             out_data,
             test_aliases=[]):
    n_knob_cols = len(lods.config['COLS_KNOBS'])

    # Get the trace(s) for this specific test job
    idx = observed_traces_slices[test_job]
    trace = observed_traces.a[idx], observed_traces.X[idx], \
        observed_traces.Y[idx]

    # Extract encoding from given trace(s) and trained autoencoder
    extract_and_set_encoding(autoencoder, trace)
    proxy = extract_encoding_and_map_to_nearest(autoencoder,
                                                trace,
                                                lods.alias_to_id,
                                                test_aliases,
                                                within_template=False,
                                                metric='euclidean')
    out_data['mappings'][test_job].append(proxy)

    # Calibration data for autoencoder
    X_calib = np.hstack([observed_traces.a[idx], observed_traces.X[idx]])
    y_calib = observed_traces.targets.ravel()[idx]

    # Calibration Data for regressor ('without mapping' case):
    X_calib_, y_calib_ = translate_X_y(X_calib, y_calib, autoencoder.centroids,
                                       n_knob_cols)
    # Calibration Data for regressor ('with mapping' case):
    X_calib__, y_calib__ = translate_X_y(X_calib, y_calib,
                                         autoencoder.altered_centroids,
                                         n_knob_cols)

    # Test data (without encodings)
    test = lods.test
    slices_test = test.slice_by_job_id(lods.alias_to_id)
    idxs_test = slices_test[test_job]
    X_test = np.hstack([test.a, test.X])[idxs_test, :]
    y_test = test.targets.ravel()[idxs_test]

    # Test data for regressor prediction (without mapping)
    X_test_, y_test_ = translate_X_y(X_test, y_test, autoencoder.centroids,
                                     n_knob_cols)

    # Test data for regressor prediction (with mapping)
    X_test__, y_test__ = translate_X_y(X_test, y_test,
                                       autoencoder.altered_centroids,
                                       n_knob_cols)

    reg_ = reg.clone()
    err = reg_.MAPE(X_test_, y_test_)
    logging.info("[Test job: {}] \t Error no-opt: {:.2f}".format(
        test_job, err))
    reg_.calibrate(X_calib_, y_calib_)
    err_cal = reg_.MAPE(X_test_, y_test_)
    logging.info("[Test job: {}] \t Error cal: {:.2f}".format(
        test_job, err_cal))

    reg_ = reg.clone()
    assert np.abs(err - reg_.MAPE(X_test_, y_test_)) < 1e-10
    # Now let's use the data from mapping...
    err_mapping = reg_.MAPE(X_test__, y_test__)
    logging.info("[Test job: {}] \t Error map: {:.2f}".format(
        test_job, err_mapping))
    reg_.calibrate(X_calib__, y_calib__)
    err_mapping_cal = reg_.MAPE(X_test__, y_test__)
    logging.info("[Test job: {}] \t Error map_and_cal: {:.2f}".format(
        test_job, err_mapping_cal))

    # err, err_cal, err_mapping, err_mapping_cal
    out_data['errs']['no-opt'][test_job].append(err)
    out_data['errs']['cal'][test_job].append(err_cal)
    out_data['errs']['map'][test_job].append(err_mapping)
    out_data['errs']['map_then_cal'][test_job].append(err_mapping_cal)
示例#3
0
def train_model_and_evaluate(lods, out_data, seed=10):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)
    n_knob_cols = len(lods.config['COLS_KNOBS'])

    nn_params = HYPER_PARAMS['nn_params']
    ae_params = HYPER_PARAMS['ae_params']
    ae_params['knob_cols'] = lods.config['COLS_KNOBS']
    ae_params['random_state'] = seed

    tmp_trainval = lods.trainval
    tmp_shared_trainval = lods.shared_trainval

    if N_TRAIN_PER_JOB != -1:
        tmp_trainval = lods.trainval.get_x(N_TRAIN_PER_JOB)
    if N_SHARED_TRAIN_PER_JOB != -1:
        tmp_shared_trainval = lods.shared_trainval.get_x(
            N_SHARED_TRAIN_PER_JOB)
    if tmp_trainval is not None:
        logging.info("shape of remaining trainval (X): {}".format(
            tmp_trainval.X.shape))
    else:
        logging.info("tmp_trainval is None (perhaps because of get_x(0))")

    if tmp_shared_trainval is not None:
        logging.info("shape of remaining shared trainval (X): {}".format(
            tmp_shared_trainval.X.shape))
    else:
        logging.info(
            "tmp_shared_trainval is None (perhaps because of get_x(0))")

    if tmp_trainval is None:
        # in case we're invoking dataset.get_x(0)
        ds_train = tmp_shared_trainval
    else:
        ds_train = tmp_trainval + tmp_shared_trainval

    X_train = np.hstack([ds_train.a, ds_train.X, ds_train.Y])
    y_train = ds_train.targets.ravel()

    logging.info("Fitting autoencoder on data of shape: {}".format(
        X_train.shape))

    # Make autoencoder and fit on loaded data
    autoencoder = FancyAutoEncoder.build(**ae_params)
    logging.info("Fitting autoencoder on data of shape: {}".format(
        X_train.shape))
    if ENCODING_STRATEGY == 'shared':
        shared_train = lods.shared_trainval.get_x(N_OBS)
        X_shared_train = np.hstack(
            [shared_train.a, shared_train.X, shared_train.Y])
        autoencoder.fit(X_train,
                        centroids_strategy='shared',
                        X_shared=X_shared_train,
                        log_time=True)
    else:
        autoencoder.fit(X_train, log_time=True)

    # Get centroids of encodings for different workloads
    centroids = autoencoder.centroids

    # Adjust the X vector by transforming Y into job's centroid
    X, y = translate_X_y(X_train, y_train, centroids, n_knob_cols)

    # Make and fit a NN Regressor
    logging.info("Fitting regressor on data of shapes: {}, {}".format(
        X.shape, y.shape))
    reg = NNregressor(with_calibration=True,
                      v1_compat_mode=True,
                      **nn_params,
                      random_state=seed)
    reg.fit(X, y, log_time=True)
    training_mape = reg.MAPE(X, y)
    logging.info("Training Error: {:.2f}%".format(training_mape))
    out_data['training_errs'].append(training_mape)

    if ENCODING_STRATEGY == 'shared':
        observed_traces = lods.shared_traincomplement.get_x(N_OBS)
    else:
        observed_traces = lods.traincomplement.get_x(N_OBS)

    logging.info("observed_traces description: ")
    observed_traces.describe()

    observed_traces_slices = observed_traces.slice_by_job_id(
        alias_to_id=lods.alias_to_id)

    test_aliases = sorted(list(set(lods.test.a.ravel())))

    for test_job in observed_traces_slices:
        evaluate(test_job, autoencoder, reg, lods, observed_traces,
                 observed_traces_slices, out_data, test_aliases)

    # Append trained autoencoder information (with centroids) to output_data
    out_data['autoencoders'].append(autoencoder.get_persist_info())

    # Append trained regressor information to output_data
    out_data['regressors'].append(reg.get_persist_info())

    persist_data(copyDict(out_data), DATA_FNAME)
示例#4
0
def train_model_and_evaluate(lods, out_data, seed=10):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)
    n_knob_cols = len(lods.config['COLS_KNOBS'])

    nn_params = HYPER_PARAMS['nn_params']
    kpca_params = HYPER_PARAMS['kpca_params']

    tmp_trainval = lods.trainval
    tmp_shared_trainval = lods.shared_trainval

    if N_TRAIN_PER_JOB != -1:
        tmp_trainval = lods.trainval.get_x(N_TRAIN_PER_JOB)
    if N_SHARED_TRAIN_PER_JOB != -1:
        tmp_shared_trainval = lods.shared_trainval.get_x(
            N_SHARED_TRAIN_PER_JOB)

    if tmp_trainval is not None:
        logging.info("shape of remaining trainval (X): {}".format(
            tmp_trainval.X.shape))
    else:
        logging.info("tmp_trainval is None (perhaps because of get_x(0))")

    if tmp_shared_trainval is not None:
        logging.info("shape of remaining shared trainval (X): {}".format(
            tmp_shared_trainval.X.shape))
    else:
        logging.info(
            "tmp_shared_trainval is None (perhaps because of get_x(0))")

    if tmp_trainval is None:
        # in case we're invoking dataset.get_x(0)
        ds_train = tmp_shared_trainval
    else:
        ds_train = tmp_trainval + tmp_shared_trainval

    X_train = np.hstack([ds_train.a, ds_train.X, ds_train.Y])
    y_train = ds_train.targets.ravel()

    logging.info("Fitting KPCA on data of shape: {}".format(X_train.shape))

    # Make PCA and fit on loaded data
    fit_t = time.time()
    pca = KernelPCA(**kpca_params)
    pca.altered_centroids = None
    logging.info("Fitting KPCA on data of shape: {}".format(ds_train.Y.shape))

    if ENCODING_STRATEGY == 'shared':
        shared_train = lods.shared_trainval.get_x(N_OBS)
        pca.fit(ds_train.Y)
        encods_shared = pca.transform(shared_train.Y)
        centroids = compute_centroids(encods_shared, shared_train.a)
    else:
        encods = pca.fit_transform(ds_train.Y)
        centroids = compute_centroids(encods, ds_train.a)
    pca.centroids = centroids  # thisis why I love Python! :-)
    fit_t = time.time() - fit_t
    logging.info("KPCA fitting time is: {} minutes and {} seconds".format(
        fit_t // 60, int(fit_t / 60)))

    # Adjust the X vector by transforming Y into job's centroid
    X, y = translate_X_y(X_train, y_train, pca.centroids, n_knob_cols)

    # Make and fit a NN Regressor
    logging.info("Fitting regressor on data of shapes: {}, {}".format(
        X.shape, y.shape))
    reg = NNregressor(with_calibration=True,
                      **nn_params,
                      v1_compat_mode=True,
                      random_state=seed)
    reg.fit(X, y, log_time=True)
    training_mape = reg.MAPE(X, y)
    logging.info("Training Error: {:.2f}%".format(training_mape))
    out_data['training_errs'].append(training_mape)

    if ENCODING_STRATEGY == 'shared':
        observed_traces = lods.shared_traincomplement.get_x(N_OBS)
    else:
        observed_traces = lods.traincomplement.get_x(N_OBS)

    logging.info("observed_traces description: ")
    observed_traces.describe()

    observed_traces_slices = observed_traces.slice_by_job_id(
        alias_to_id=lods.alias_to_id)

    test_aliases = sorted(list(set(lods.test.a.ravel())))

    for test_job in observed_traces_slices:
        evaluate(test_job, pca, reg, lods, observed_traces,
                 observed_traces_slices, out_data, test_aliases)

    out_data['regressors'].append(reg.get_persist_info())

    persist_data(copyDict(out_data), DATA_FNAME)
示例#5
0
def train_model_and_evaluate(lods,
                             triplet_idxs,
                             out_data,
                             seed=10,
                             tf=None,
                             TripletPlusPlus=None,
                             NNregressor=None):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

    n_knob_cols = lods.trainval.X.shape[1]
    # 2. train the autoencoder on the triplets

    autoencoder_params = HYPER_PARAMS['encoder_params']
    layer_sizes = [autoencoder_params['_nh']
                   ] * autoencoder_params['_nhlayers'] + [
                       ENCODING_SIZE + n_knob_cols
                   ]
    del autoencoder_params['_nh']
    del autoencoder_params['_nhlayers']

    # Setting activations to be relu
    autoencoder_params['layer_sizes'] = layer_sizes
    autoencoder_params['activations'] = ['relu'] * (
        len(autoencoder_params['layer_sizes']) - 1) + [None]

    # 561 metrics for streaming
    autoencoder_params['input_dim'] = lods.trainval.Y.shape[1]

    # n_knobs
    autoencoder_params['config_vec_size'] = n_knob_cols

    autoencoder = TripletPlusPlus(v1_compat_mode=True, **autoencoder_params)
    autoencoder.compile()
    autoencoder.centroids = None
    autoencoder.altered_centroids = None
    autoencoder.fit_idxs(triplet_idxs, fetch_triplets, lods, log_time=True)

    # 3. extract encodings for training workloads
    compute_centroids(autoencoder, lods, scheme=ENCODING_SCHEME)

    # 4. fetch training data for the regressor
    ds_train = lods.trainval + lods.shared_trainval
    X_ = np.hstack([ds_train.a, ds_train.X])
    y_ = ds_train.targets.ravel()
    X, y = translate_X_y(X_, y_, autoencoder.centroids)

    # 5. Train a regressor on the training data
    nn_params = HYPER_PARAMS['nn_params']
    logging.info("Fitting regressor on data of shapes: {}, {}".format(
        X.shape, y.shape))
    reg = NNregressor(**nn_params, random_state=seed)
    reg.fit(X, y, log_time=True)
    training_mape = reg.MAPE(X, y)
    # 6. calculate the training error
    logging.info("Training Error: {:.2f}%".format(training_mape))
    out_data['training_errs'].append(training_mape)

    # 7. get observed traces and evaluate on test jobs...
    if ENCODING_SCHEME == 'shared':
        observed_traces = lods.shared_traincomplement.get_x(N_OBS)
    else:
        observed_traces = lods.traincomplement.get_x(N_OBS)

    logging.info("observed_traces description: ")
    observed_traces.describe()

    observed_traces_slices = observed_traces.slice_by_job_id(
        alias_to_id=lods.alias_to_id)

    test_aliases = sorted(list(set(lods.test.a.ravel())))

    for test_job in observed_traces_slices:
        evaluate(test_job, autoencoder, reg, lods, observed_traces,
                 observed_traces_slices, out_data, test_aliases)

    # Append trained autoencoder information (with centroids) to output_data
    out_data['autoencoders'].append(autoencoder.get_persist_info())

    # Append trained regressor information to output_data
    out_data['regressors'].append(reg.get_persist_info())

    persist_data(copyDict(out_data), DATA_FNAME)
示例#6
0
def train_model_and_evaluate(lods, out_data, seed=10):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

    ds_train = lods.trainval + lods.shared_trainval

    # 2. train the autoencoder
    ae_params = HYPER_PARAMS['ae_params']
    ae_params['input_dim'] = ds_train.Y.shape[1]  # 561 metrics

    snnae = SNNAE.build(**ae_params)
    snnae.compile()
    snnae.fit(ds_train.Y, ds_train.a, log_time=True)

    # 3. extract encodings for training workloads
    compute_centroids(snnae, lods, scheme=ENCODING_SCHEME)

    # 4. fetch training data for the regressor
    X_ = np.hstack([ds_train.a, ds_train.X])
    y_ = ds_train.targets.ravel()
    X, y = translate_X_y(X_, y_, snnae.centroids)

    # 5. Train a regressor on the training data
    nn_params = HYPER_PARAMS['nn_params']
    logging.info("Fitting regressor on data of shapes: {}, {}".format(
        X.shape, y.shape))
    reg = NNregressor(**nn_params,
                      v1_compat_mode=True,
                      keras_2=True,
                      random_state=seed)
    reg.fit(X, y, log_time=True)
    training_mape = reg.MAPE(X, y)
    # 6. calculate the training error
    logging.info("Training Error: {:.2f}%".format(training_mape))
    out_data['training_errs'].append(training_mape)

    # 7. get observed traces and evaluate on test jobs...
    if ENCODING_SCHEME == 'shared':
        observed_traces = lods.shared_traincomplement.get_x(N_OBS)
    else:
        observed_traces = lods.traincomplement.get_x(N_OBS)

    logging.info("observed_traces description: ")
    observed_traces.describe()

    observed_traces_slices = observed_traces.slice_by_job_id(
        alias_to_id=lods.alias_to_id)

    test_aliases = sorted(list(set(lods.test.a.ravel())))

    for test_job in observed_traces_slices:
        evaluate(test_job, snnae, reg, lods, observed_traces,
                 observed_traces_slices, out_data, test_aliases)

    # Append trained encoder information (with centroids) to output_data
    out_data['autoencoders'].append(snnae.get_persist_info())

    # Append trained regressor information to output_data
    out_data['regressors'].append(reg.get_persist_info())

    persist_data(copyDict(out_data), DATA_FNAME)
示例#7
0
def train_model_and_evaluate(lods, triplets, out_data, seed=10):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

    # 1. adjusts signature to take the triplets #FIXME
    Ya = triplets['Ya']
    Yp = triplets['Yp']
    Yn = triplets['Yn']

    # 2. train the autoencoder on the triplets
    autoencoder_params = HYPER_PARAMS['autoencoder_params']
    autoencoder_params['input_dim'] = Ya.shape[1]  # 561 metrics

    autoencoder = TAutoEncoder(v1_compat_mode=True, **autoencoder_params)
    autoencoder.compile()
    autoencoder.centroids = None
    autoencoder.altered_centroids = None
    autoencoder.fit(Ya, Yp, Yn, log_time=True)

    # 3. extract encodings for training workloads
    compute_centroids(autoencoder, lods, scheme=ENCODING_SCHEME)

    # 4. fetch training data for the regressor
    ds_train = lods.trainval + lods.shared_trainval
    X_ = np.hstack(
        [ds_train.a, ds_train.X])
    y_ = ds_train.targets.ravel()
    X, y = translate_X_y(X_, y_, autoencoder.centroids)

    # 5. Train a regressor on the training data
    nn_params = HYPER_PARAMS['nn_params']
    logging.info(
        "Fitting regressor on data of shapes: {}, {}".format(
            X.shape, y.shape))
    reg = NNregressor(**nn_params, random_state=seed)
    reg.fit(X, y, log_time=True)
    training_mape = reg.MAPE(X, y)
    # 6. calculate the training error
    logging.info("Training Error: {:.2f}%".format(training_mape))
    out_data['training_errs'].append(training_mape)

    # 7. get observed traces and evaluate on test jobs...
    if ENCODING_SCHEME == 'shared':
        observed_traces = lods.shared_traincomplement.get_x(N_OBS)
    else:
        observed_traces = lods.traincomplement.get_x(N_OBS)

    logging.info("observed_traces description: ")
    observed_traces.describe()

    observed_traces_slices = observed_traces.slice_by_job_id(
        alias_to_id=lods.alias_to_id)

    test_aliases = sorted(list(set(lods.test.a.ravel())))

    for test_job in observed_traces_slices:
        evaluate(test_job, autoencoder, reg, lods, observed_traces,
                 observed_traces_slices, out_data, test_aliases)

    # Append trained autoencoder information (with centroids) to output_data
    out_data['autoencoders'].append(autoencoder.get_persist_info())

    # Append trained regressor information to output_data
    out_data['regressors'].append(reg.get_persist_info())

    persist_data(copyDict(out_data), DATA_FNAME)