Пример #1
0
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha,
         study, out_directory):
    """
    Driver for BOWLOS weight learning
    """

    # Initialize logging level, switch to DEBUG for more info.
    initLogging(logging_level=logging.INFO)

    logging.info("Performing BowlOS on {}:{}:{}".format(
        srl_method_name, evaluator_name, example_name))

    # model specific parameters
    num_weights = HELPER_METHODS[srl_method_name]['get_num_weights'](
        example_name)
    predicate = EVAL_PREDICATE[example_name]

    logging.info("Optimizing over {} weights".format(num_weights))

    # the dataframes we will be using for evaluation
    truth_df = load_truth_frame(example_name, fold, predicate, 'learn')
    observed_df = load_observed_frame(example_name, fold, predicate, 'learn')
    target_df = load_target_frame(example_name, fold, predicate, 'learn')

    get_function_value = write_get_function_value_fun(
        srl_method_name, example_name, fold, seed, evaluator_name,
        out_directory, study, truth_df, observed_df, target_df)

    best_weights = doLearn(num_weights, seed, get_function_value, alpha)

    HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights,
                                                             example_name)
def calculate_experiment_performance(dataset, wl_method, evaluator, acq,
                                     folds):
    # initialize the experiment list that will be populated in the following for
    # loop with the performance outcome of each fold
    experiment_performance = np.array([])

    for fold in folds:
        # load the prediction dataframe
        try:
            # prediction dataframe
            if METHOD == 'psl':
                predicted_df = load_psl_prediction_frame(
                    dataset,
                    wl_method,
                    evaluator,
                    fold,
                    dataset_properties[dataset]['evaluation_predicate'],
                    "acquisition_study",
                    acq=acq)
            elif METHOD == 'tuffy':
                predicted_df = load_tuffy_prediction_frame(
                    dataset,
                    wl_method,
                    evaluator,
                    fold,
                    dataset_properties[dataset]['evaluation_predicate'],
                    "acquisition_study",
                    acq=acq)
            else:
                raise ValueError(
                    "{} not supported. Try: ['psl', 'tuffy']".format(METHOD))
        except FileNotFoundError as err:
            print(err)
            continue

        # truth dataframe
        truth_df = load_truth_frame(
            dataset, fold, dataset_properties[dataset]['evaluation_predicate'])
        # observed dataframe
        observed_df = load_observed_frame(
            dataset, fold, dataset_properties[dataset]['evaluation_predicate'])
        # target dataframe
        target_df = load_target_frame(
            dataset, fold, dataset_properties[dataset]['evaluation_predicate'])

        experiment_performance = np.append(
            experiment_performance,
            evaluator_name_to_method[evaluator](predicted_df, truth_df,
                                                observed_df, target_df))

    # organize into a performance_series
    performance_series = pd.Series(index=PERFORMANCE_COLUMNS, dtype=float)
    performance_series['Dataset'] = dataset
    performance_series['Wl_Method'] = wl_method
    performance_series['Acquisition_Function'] = acq
    performance_series['Evaluation_Method'] = evaluator
    performance_series['Mean'] = experiment_performance.mean()
    performance_series['Standard_Deviation'] = experiment_performance.std()

    return performance_series
def calculate_experiment_training_performance(dataset, wl_method, evaluator, folds):
    dirname = os.path.dirname(__file__)

    # initialize the experiment list that will be populated in the following for
    # loop with the performance outcome of each fold
    training_performance = np.array([])

    for fold in folds:
        path = '{}/../results/weightlearning/{}/performance_study/{}/{}/{}/{}'.format(
            dirname, METHOD, dataset, wl_method, evaluator, fold
        )
        # load the prediction dataframe
        try:
            # prediction dataframe
            if METHOD == 'psl':
                predicted_df = load_psl_prediction_frame(dataset, wl_method, evaluator, fold,
                                                         dataset_properties[dataset]['evaluation_predicate'],
                                                         "performance_study", "learn",
                                                         inferred_predicates_file='inferred-train-predicates.txt')
            elif METHOD == 'tuffy':
                if wl_method == 'DiagonalNewton':
                    predicted_df = load_tuffy_prediction_frame(dataset, wl_method, evaluator, fold,
                                                               dataset_properties[dataset]['evaluation_predicate'],
                                                               "performance_study", "learn",
                                                               inferred_predicates_file='inferred-train-predicates.txt')
            else:
                raise ValueError("{} not supported. Try: ['psl', 'tuffy']".format(METHOD))
        except FileNotFoundError as err:
            print(err)
            continue

        if wl_method == 'DiagonalNewton':
            # truth dataframe
            truth_df = load_truth_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn')
            # observed dataframe
            observed_df = load_observed_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn')
            # target dataframe
            target_df = load_target_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn')

            training_performance = np.append(training_performance,
                                             evaluator_name_to_method[evaluator](predicted_df,
                                                                                   truth_df,
                                                                                   observed_df,
                                                                                   target_df))
        else:
            cmd = "cat {}/learn_out.txt".format(path)
            output = subprocess.getoutput(cmd)
            training_performance = np.append()

    # organize into a performance_series
    performance_series = pd.Series(index=PERFORMANCE_COLUMNS,
                                   dtype=float)
    performance_series['Dataset'] = dataset
    performance_series['Wl_Method'] = wl_method
    performance_series['Evaluation_Method'] = evaluator
    performance_series['Mean'] = training_performance.mean()
    performance_series['Standard_Deviation'] = training_performance.std()

    return performance_series
def calculate_experiment_performance(dataset, inference_method, evaluator, folds):
    # initialize the experiment list that will be populated in the following for
    # loop with the performance outcome of each fold
    fold_performance = np.array([])
    for fold in folds:
        experiment_performance = np.array([])

        path = '{}/../results/online/performance_study/{}/{}/{}/{}'.format(DIRNAME, inference_method,
                                                                           dataset, evaluator, fold)

        time_steps = [time_step for time_step in os.listdir(path) if os.path.isdir(os.path.join(path, fold))]
        for time_step in time_steps:
            # load the prediction dataframe
            try:
                predicted_df = load_psl_prediction_frame(dataset, inference_method, evaluator, fold, time_step,
                                                         dataset_properties[dataset]['evaluation_predicate'],
                                                         "performance_study")
            except FileNotFoundError as err:
                print(err)
                continue

            # truth dataframe
            truth_df = load_truth_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate'])
            # observed dataframe
            observed_df = load_observed_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate'])
            # target dataframe
            target_df = load_target_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate'])

            # experiment_performance will be a np array with experiment performance values indexed by the time step
            experiment_performance = np.append(experiment_performance,
                                               evaluator_name_to_method[evaluator](predicted_df,
                                                                                   truth_df,
                                                                                   observed_df,
                                                                                   target_df))
        if fold_performance.shape[0] == 0:
            fold_performance = np.array([experiment_performance])
        else:
            fold_performance = np.append(fold_performance, [experiment_performance], axis=0)

    # organize into a performance_dataframe
    performance_df = pd.DataFrame(columns=PERFORMANCE_COLUMNS, index=time_steps)
    performance_df['Dataset'] = dataset
    performance_df['Inference_Method'] = inference_method
    performance_df['Evaluation_Method'] = evaluator
    performance_df['Time_Step'] = time_steps
    performance_df['Mean'] = fold_performance.mean(axis=0)
    performance_df['Standard_Error'] = fold_performance.std(axis=0, ddof=1)

    return performance_df
def calculate_experiment_robustness(dataset, wl_method, evaluator, iters):
    # initialize the experiment list that will be populated in the following for
    # loop with the performance outcome of each fold
    experiment_performance = np.array([])

    # truth dataframe
    truth_df = load_truth_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate'])
    # observed dataframe
    observed_df = load_observed_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate'])
    # target dataframe
    target_df = load_target_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate'])

    for iter in iters:
        # load the prediction dataframe
        try:
            # prediction dataframe
            if method == 'psl':
                predicted_df = load_psl_prediction_frame(dataset, wl_method, evaluator, iter,
                                                         dataset_properties[dataset]['evaluation_predicate'],
                                                         "robustness_study")
            elif method == 'tuffy':
                predicted_df = load_tuffy_prediction_frame(dataset, wl_method, evaluator, iter,
                                                           dataset_properties[dataset]['evaluation_predicate'],
                                                           "robustness_study", )
            else:
                raise ValueError("{} not supported. Try: ['psl', 'tuffy']".format(method))

        except FileNotFoundError as err:
            print(err)
            continue

        experiment_performance = np.append(experiment_performance,
                                           evaluator_name_to_method[evaluator](predicted_df,
                                                                               truth_df,
                                                                               observed_df,
                                                                               target_df))

    # organize into a performance_series
    robustness_series = pd.Series(index=ROBUSTNESS_COLUMNS,
                                   dtype=float)
    robustness_series['Dataset'] = dataset
    robustness_series['Wl_Method'] = wl_method
    robustness_series['Evaluation_Method'] = evaluator
    robustness_series['Mean'] = experiment_performance.mean()
    robustness_series['Standard_Deviation'] = experiment_performance.std()

    return robustness_series
Пример #6
0
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha, study, out_directory):
    """
    Driver for RGS weight learning
    """
    # path to this file relative to caller
    dirname = os.path.dirname(__file__)

    # Initialize logging level, switch to DEBUG for more info.
    initLogging(logging_level=logging.INFO)

    logging.info("Performing RGS on {}:{}:{}".format(srl_method_name, evaluator_name, example_name))

    # the same grid as the default psl core implementation of RGS
    grid = GRID[srl_method_name]

    # the same number of iterations as the default psl RGS for this experiment
    n = 50

    # model specific parameters
    num_weights = HELPER_METHODS[srl_method_name]['get_num_weights'](example_name)
    predicate = EVAL_PREDICATE[example_name]

    # the dataframe we will be using as ground truth for this process
    truth_df = load_truth_frame(example_name, fold, predicate, 'learn')
    observed_df = load_observed_frame(example_name, fold, predicate, 'learn')
    target_df = load_target_frame(example_name, fold, predicate, 'learn')

    # initial state
    if IS_HIGHER_REP_BETTER[evaluator_name]:
        best_performance = -np.inf
    else:
        best_performance = np.inf
    best_weights = np.zeros(num_weights)
    np.random.seed(int(seed))

    for i in range(n):
        logging.info("Iteration {}".format(i))

        # obtain a random weight configuration for the model
        weights = np.random.choice(grid, num_weights)
        logging.info("Trying Configuration: {}".format(weights))

        # assign weight configuration to the model file
        HELPER_METHODS[srl_method_name]['write_learned_weights'](weights, example_name)

        # perform inference
        # TODO: psl file structure does not fit this pattern: wrapper_learn
        process = subprocess.Popen('cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {} {}'.format(
            dirname, srl_method_name, example_name, 'RGS', 'wrapper_learn', fold, evaluator_name, out_directory),
            shell=True)
        process.wait()

        # fetch results
        if study == "robustness_study":
            predicted_df = HELPER_METHODS[srl_method_name]['load_prediction_frame'](example_name, 'RGS', evaluator_name,
                                                                                    seed, predicate, study, "learn", alpha)
        else:
            predicted_df = HELPER_METHODS[srl_method_name]['load_prediction_frame'](example_name, 'RGS', evaluator_name,
                                                                                    fold, predicate, study, "learn", alpha)

        performance = EVALUATE_METHOD[evaluator_name](predicted_df, truth_df, observed_df, target_df)

        logging.info("Configuration Performance: {}: {}".format(evaluator_name, performance))

        # update best weight configuration if improved
        if IS_HIGHER_REP_BETTER[evaluator_name]:
            if performance > best_performance:
                best_performance = performance
                best_weights = weights
        else:
            if performance < best_performance:
                best_performance = performance
                best_weights = weights

    # assign best weight configuration to the model file
    HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights, example_name)
Пример #7
0
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha,
         study, out_directory):
    """
    Driver for CRGS weight learning
    :param srl_method_name:
    :param evaluator_name:
    :param example_name:
    :param fold:
    :param seed:
    :param alpha:
    :param study:
    :param out_directory:
    :return:
    """
    # path to this file relative to caller
    dirname = os.path.dirname(__file__)

    # Initialize logging level, switch to DEBUG for more info.
    initLogging(logging_level=logging.INFO)

    logging.info("Performing CRGS on {}:{}:{}".format(srl_method_name,
                                                      evaluator_name,
                                                      example_name))

    # the number of samples
    n = NUM_SAMPLES

    # the defaults from the psl core code and recentered for tuffy to allow for negative weights.
    weight_mean = MEAN[srl_method_name]
    variance = 0.20

    # model specific parameters
    num_weights = HELPER_METHODS[srl_method_name]['get_num_weights'](
        example_name)
    predicate = EVAL_PREDICATE[example_name]

    # parameters for sampling distribution
    mean_vector = np.array([weight_mean] * num_weights)
    variance_matrix = np.eye(num_weights) * variance

    logging.info("Optimizing over {} weights".format(num_weights))

    # the dataframes we will be using for evaluation
    truth_df = load_truth_frame(example_name, fold, predicate, 'learn')
    observed_df = load_observed_frame(example_name, fold, predicate, 'learn')
    target_df = load_target_frame(example_name, fold, predicate, 'learn')

    # initial state
    if IS_HIGHER_REP_BETTER[evaluator_name]:
        best_performance = -np.inf
    else:
        best_performance = np.inf
    best_weights = np.zeros(num_weights)
    print("setting seed {}".format(seed))
    np.random.seed(int(seed))

    for i in range(n):
        logging.info("Iteration {}".format(i))

        # obtain a random weight configuration for the model
        # sample from dirichlet and randomly set the orthant
        weights = np.random.dirichlet(
            (np.ones(num_weights) * alpha)) * np.random.choice([-1, 1],
                                                               num_weights)
        logging.info("Trying Configuration: {}".format(weights))

        # assign weight configuration to the model file
        HELPER_METHODS[srl_method_name]['write_learned_weights'](weights,
                                                                 example_name)

        # perform inference
        # TODO: (Charles.) psl file structure needs to fit this pattern: wrapper_learn
        logging.info("writing to {}".format(out_directory))
        process = subprocess.Popen(
            'cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {}'.format(
                dirname, srl_method_name, example_name, 'wrapper_learn', fold,
                evaluator_name, out_directory),
            shell=True)
        logging.info("Waiting for inference")
        process.wait()

        # fetch results
        if study == "robustness_study":
            predicted_df = HELPER_METHODS[srl_method_name][
                'load_prediction_frame'](example_name, 'CRGS', evaluator_name,
                                         seed, predicate, study, alpha)
        else:
            predicted_df = HELPER_METHODS[srl_method_name][
                'load_prediction_frame'](example_name, 'CRGS', evaluator_name,
                                         fold, predicate, study, alpha)
        performance = EVALUATE_METHOD[evaluator_name](predicted_df, truth_df,
                                                      observed_df, target_df)

        logging.info("Configuration Performance: {}: {}".format(
            evaluator_name, performance))

        # update best weight configuration if improved
        if IS_HIGHER_REP_BETTER[evaluator_name]:
            if performance > best_performance:
                best_performance = performance
                best_weights = weights
        else:
            if performance < best_performance:
                best_performance = performance
                best_weights = weights

    # assign best weight configuration to the model file
    HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights,
                                                             example_name)
Пример #8
0
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha,
         study, out_directory):
    """
    Driver for HB weight learning
    :param srl_method_name:
    :param evaluator_name:
    :param example_name:
    :param fold:
    :param seed:
    :param study:
    :param out_directory:
    :return:
    """
    # path to this file relative to caller
    dirname = os.path.dirname(__file__)

    # Initialize logging level, switch to DEBUG for more info.
    initLogging(logging_level=logging.INFO)

    logging.info("Performing Hyperband on {}:{}:{}".format(
        srl_method_name, evaluator_name, example_name))

    # model specific parameters
    num_weights = HELPER_METHODS[srl_method_name]['get_num_weights'](
        example_name)
    predicate = EVAL_PREDICATE[example_name]

    logging.info("Optimizing over {} weights".format(num_weights))

    # the dataframes we will be using for evaluation
    truth_df = load_truth_frame(example_name, fold, predicate, 'learn')
    observed_df = load_observed_frame(example_name, fold, predicate, 'learn')
    target_df = load_target_frame(example_name, fold, predicate, 'learn')

    # inital state
    np.random.seed(int(seed))

    def get_random_configuration():
        weights = np.random.dirichlet(
            (np.ones(num_weights) * alpha)) * np.random.choice([-1, 1],
                                                               num_weights)
        return weights

    def run_then_return_val_loss(num_iters, weights):
        # assign weight configuration to the model file
        HELPER_METHODS[srl_method_name]['write_learned_weights'](weights,
                                                                 example_name)

        # extra options to set max number of iterations
        extra_options = MAX_ITER_OPTION[srl_method_name] + str(
            int(np.ceil(num_iters)))

        # perform inference
        # TODO: (Charles.) psl file structure needs to fit this pattern if we want to use this wrapper : wrapper_learn
        process = subprocess.Popen(
            'cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {} {}'.format(
                dirname, srl_method_name, example_name, 'wrapper_learn', fold,
                evaluator_name, out_directory, extra_options),
            shell=True)
        process.wait()

        # fetch results
        if study == "robustness_study":
            predicted_df = HELPER_METHODS[srl_method_name][
                'load_prediction_frame'](example_name, 'HB', evaluator_name,
                                         seed, predicate, study, alpha)
        else:
            predicted_df = HELPER_METHODS[srl_method_name][
                'load_prediction_frame'](example_name, 'HB', evaluator_name,
                                         fold, predicate, study, alpha)

        # return negative if we are maximizing performance else positive
        if IS_HIGHER_REP_BETTER[evaluator_name]:
            return -EVALUATE_METHOD[evaluator_name](predicted_df, truth_df,
                                                    observed_df, target_df)
        else:
            return EVALUATE_METHOD[evaluator_name](predicted_df, truth_df,
                                                   observed_df, target_df)

    max_iter = MAX_ITER_DEFAULT[
        srl_method_name]  # maximum iterations/epochs per configuration
    eta = SURVIVAL_DEFAULT  # defines downsampling rate (default=4)
    logeta = lambda x: np.log(x) / np.log(eta)
    s_max = int(
        logeta(max_iter)
    )  # number of unique executions of Successive Halving (minus one)
    B = (
        s_max + 1
    ) * max_iter  # total number of iterations (without reuse) per execution of Succesive Halving (n,r)

    # initialize
    best_val = np.inf
    best_weights = np.zeros(num_weights)

    # Begin Finite Horizon Hyperband outerloop.
    for s in reversed(range(s_max + 1)):
        n = int(np.ceil(int(B / max_iter / (s + 1)) *
                        eta**s))  # initial number of configurations
        r = max_iter * eta**(
            -s)  # initial number of iterations to run configurations for

        # Begin Finite Horizon Successive Halving with (n,r)
        T = [get_random_configuration() for _ in range(n)]
        val_losses = []
        total_iter = 0
        for i in range(s + 1):
            # Run each of the n_i configs for r_i iterations and keep best n_i/eta
            n_i = n * eta**(-i)
            r_i = r * eta**(i)
            total_iter = total_iter + r_i
            # the standard algorithm will only run r_i iterations, but this is a continuation of the optimization
            # we are starting over, so to get the same effect we need to run the total number of iterations over again.
            # Note: Very inefficient
            val_losses = [
                run_then_return_val_loss(num_iters=total_iter, weights=t)
                for t in T
            ]
            T = [
                T[i] for i in np.argsort(val_losses)[0:int(np.ceil(n_i / eta))]
            ]
            logging.info(
                "Successive halving: (n,r) = ({}, {}) Bracket winners: Configs: {} Vals: {}"
                .format(n_i, r_i, T,
                        np.sort(val_losses)[0:int(np.ceil(n_i / eta))]))

        tournament_winning_val = min(val_losses)
        logging.info(
            "Hyperband outerloop: (s) = ({}) Tournament winner: Config: {} Val:"
            .format(s, T, tournament_winning_val))
        if tournament_winning_val < best_val:
            best_weights = T[0]

    # assign best weight configuration to the model file
    HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights,
                                                             example_name)