Exemplo n.º 1
0
def split_online_b(online_b_data, test_idx):
    primer = pd.DataFrame(columns=online_b_data.get_dataframe().columns)
    eval = pd.DataFrame(columns=online_b_data.get_dataframe().columns)
    for wl_id in online_b_data.get_workload_ids():
        curr_ds = online_b_data.get_specific_workload(wl_id)
        for idx in range(curr_ds.get_dataframe().values.shape[0]):
            if idx == test_idx:
                eval = eval.append(curr_ds.get_dataframe().iloc[idx:idx + 1],
                                   ignore_index=True)
            else:
                primer = primer.append(curr_ds.get_dataframe().iloc[idx:idx +
                                                                    1],
                                       ignore_index=True)
    primer = Dataset(dataframe=primer)
    eval = Dataset(dataframe=eval)
    latency_gt = eval.get_column_values('latency')
    eval = eval.prune_columns(['workload id'] + eval.get_tuning_knob_headers())

    return primer, eval, latency_gt
def main():
    # only training GPRs for offline loads
    dataset = Dataset(file_path=DATASET_PATHS['offline_workload'])
    # load the pruned metric headers
    pruned_metrics = Dataset.load_pruned_metrics()
    # prune the dataset
    dataset = dataset.prune_columns(pruned_metrics + ['workload id'] +
                                    dataset.get_tuning_knob_headers())

    # build the GPRs
    start = time()
    gprs = WorkloadGPR(dataset=dataset)
    LOG.info(f"Finished building GPRs in {round(time() - start)} seconds.")

    # pickle 'em
    LOG.info("Pickling GPRs...")
    start = time()
    gprs.pickle_models()
    LOG.info(f"Finished pickling models in {round(time() - start)} seconds.")
Exemplo n.º 3
0
def main():
    LOG.debug('Clearing out all of the workload models.')
    clear_wl_models()

    dataset = Dataset(file_path=DATASET_PATHS['offline_workload'])
    pruned_metrics = Dataset.load_pruned_metrics()
    pruned_dataset = dataset.prune_columns(pruned_metrics + ['workload id'] +
                                           dataset.get_tuning_knob_headers())
    df = pruned_dataset.get_dataframe()

    # pick the ith data to use as validation
    i = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
    workload_ids = pruned_dataset.get_workload_ids()
    validation_df = pd.concat(
        [df[df['workload id'] == wid].iloc[i] for wid in workload_ids])
    validation_idx = validation_df.index
    valid_dataset = Dataset(dataframe=validation_df)

    diff_idx = df.index.difference(validation_df.index)

    train_df = df.iloc[diff_idx]
    train_dataset = Dataset(dataframe=train_df)

    #  LOG.info("Fitting input scaler...")
    #  scaler = StandardScaler()
    #  scaler.fit(train_df[dataset.get_tuning_knob_headers()].values)
    scaler = None

    LOG.info("Training workload GPRs...")
    gprs = WorkloadGPR(dataset=train_dataset, scaler=scaler)

    LOG.info("Validating GPRs...")
    train = {}
    result = {}
    for pm in pruned_metrics:
        for wid in workload_ids:
            name = f"{pm}|{wid}"
            model = gprs.get_model(wid, pm)

            # train
            #  X = train_df[dataset.get_tuning_knob_headers()].values
            #  X = scaler.transform(X)
            #  y = train_df[pm].values
            #  y_hat = model.predict(X)
            #  mape = np.mean(np.abs((y - y_hat) / y)) * 100
            #  train[name] = mape

            # validation
            X = validation_df[dataset.get_tuning_knob_headers()].values
            if scaler is not None:
                X = scaler.transform(X)

            y = validation_df[pm].values
            y_hat = model.predict(X)
            mape = np.mean(np.abs((y - y_hat) / y)) * 100
            result[name] = mape
            #  LOG.info('%s: %s', name, mape)

    #  LOG.info('Training average MAPE: %s',
    #  np.array(list(train.values())).mean())
    LOG.info('Validation average MAPE: %s',
             np.array(list(result.values())).mean())