Exemplo n.º 1
0
def main(arguments, parameters, fold):
    """
    If we are not called from cv means we are called from CLI. This means
    the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we
    have to do some bookkeeping here
    """

    cfg = wrapping_util.load_experiment_config_file()

    dispatch_function_name = cfg.get("HPOLIB", "dispatcher")
    dispatch_function_name = re.sub("(\.py)$", "", dispatch_function_name)
    try:
        dispatch_function = importlib.import_module("HPOlib.dispatcher.%s" %
                                                    dispatch_function_name)

        additional_data, result, status, wallclock_time = \
            dispatch_function.dispatch(cfg, fold, parameters)
    except ImportError:
        additional_data = ""
        result = float("NaN")
        status = "CRASHED"
        wallclock_time = 0.
        logger.error("Invalid value %s for HPOLIB:dispatcher" %
                     dispatch_function_name)

    return status, wallclock_time, result, additional_data
Exemplo n.º 2
0
def main(job_id, params):
    """Implement the Spearmint interface and then call HPOlib"""
    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                               'message)s', datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    cli_target = "HPOlib.optimization_interceptor"
    result = command_line_function(params, cli_target)
    return result
Exemplo n.º 3
0
def main(job_id, params):
    """Implement the Spearmint interface and then call HPOlib"""
    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                        'message)s',
                        datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    cli_target = "HPOlib.optimization_interceptor"
    result = command_line_function(params, cli_target)
    return result
Exemplo n.º 4
0
def main():
    """Implement the SMAC interface and then call HPOlib"""
    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                               'message)s', datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    cli_target = "HPOlib.optimization_interceptor"

    fold, seed = parse_command_line()
    params = get_parameters()

    result, runtime = command_line_function(params, fold, cli_target)
    print format_return_string("SAT", runtime, 1, result, seed, "")
    return result
Exemplo n.º 5
0
def main():
    """Implement the SMAC interface and then call HPOlib"""
    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                        'message)s',
                        datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    cli_target = "HPOlib.optimization_interceptor"

    fold, seed = parse_command_line()
    params = get_parameters()

    result, runtime = command_line_function(params, fold, cli_target)
    print format_return_string("SAT", runtime, 1, result, seed, "")
    return result
Exemplo n.º 6
0
def main(*args, **kwargs):
    logger.critical('args: %s kwargs: %s', str(args), str(kwargs))

    params = None

    if 'params' in kwargs.keys():
        params = kwargs['params']
    else:
        for arg in args:
            if type(arg) == dict:
                params = arg
                break

    if params is None:
        logger.critical(
            "No parameter dict found in cv.py.\n"
            "args: %s\n kwargs: %s", args, kwargs)
        # TODO: Hack for TPE and AUTOWeka
        params = args

    # Load the experiment to do time-keeping
    cv_starttime = time.time()
    experiment = load_experiment_file()
    # experiment.next_iteration()
    experiment.start_cv(cv_starttime)
    del experiment

    # cfg_filename = "config.cfg"
    cfg = load_experiment_config_file()

    # Load number of folds
    folds = cfg.getint('HPOLIB', 'number_cv_folds')

    params = flatten_parameter_dict(params)

    res = do_cv(params, folds=folds)
    logger.info("Result: %f", res)

    # Load the experiment to do time-keeping
    experiment = load_experiment_file()
    experiment.end_cv(time.time())
    del experiment

    return res
Exemplo n.º 7
0
def main(*args, **kwargs):
    logger.critical('args: %s kwargs: %s', str(args), str(kwargs))

    params = None

    if 'params' in kwargs.keys():
        params = kwargs['params']
    else:
        for arg in args:
            if type(arg) == dict:
                params = arg
                break

    if params is None:
        logger.critical("No parameter dict found in cv.py.\n"
                        "args: %s\n kwargs: %s", args, kwargs)
        # TODO: Hack for TPE and AUTOWeka
        params = args

    # Load the experiment to do time-keeping
    cv_starttime = time.time()
    experiment = load_experiment_file()
    # experiment.next_iteration()
    experiment.start_cv(cv_starttime)
    del experiment

    # cfg_filename = "config.cfg"
    cfg = load_experiment_config_file()

    # Load number of folds
    folds = cfg.getint('HPOLIB', 'number_cv_folds')

    params = flatten_parameter_dict(params)

    res = do_cv(params, folds=folds)
    logger.info("Result: %f", res)
    
    # Load the experiment to do time-keeping
    experiment = load_experiment_file()
    experiment.end_cv(time.time())
    del experiment
    
    return res
Exemplo n.º 8
0
def do_cv(params, folds=10):
    logger.info("Starting Cross validation")
    sys.stdout.flush()
    optimizer = get_optimizer()
    cfg = load_experiment_config_file()

    # Store the results to hand them back to tpe and spearmint
    results = []

    try:
        logger.info("%s", params)
        param_array = [
            "-" + str(param_name) + " " + str(params[param_name])
            for param_name in params
        ]
        param_string = " ".join(param_array)

        for fold in range(folds):
            # "Usage: runsolver_wrapper <instancename> " + \
            # "<instancespecificinformation> <cutofftime> <cutofflength> " + \
            # "<seed> <param> <param> <param>"
            # Cutofftime, cutofflength and seed can be safely ignored since they
            # are read in runsolver_wrapper
            runsolver_wrapper_script = "python " + \
                os.path.join(os.path.dirname(os.path.realpath(__file__)), "runsolver_wrapper.py")
            cmd = "%s %d %s %d %d %d %s" % \
                (runsolver_wrapper_script, fold, optimizer, 0, 0, 0, param_string)
            logger.info("Calling command:\n%s", cmd)

            process = subprocess.Popen(cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       shell=True,
                                       executable="/bin/bash")
            logger.info(
                "--------------RUNNING RUNSOLVER_WRAPPER--------------")
            stdoutdata, stderrdata = process.communicate()
            if stdoutdata:
                logger.info(stdoutdata)
            if stderrdata:
                logger.error(stderrdata)

            # Read the runsolver_wrapper output
            lines = stdoutdata.split("\n")
            result_string = None
            for line in lines:
                pos = line.find("Result for ParamILS: SAT")
                if pos != -1:
                    result_string = line[pos:]
                    result_array = result_string.split()
                    results.append(float(result_array[6].strip(",")))
                    break

            if result_string is None:
                raise NotImplementedError(
                    "No result string available or result string doesn't contain SAT"
                )

            # If a specified number of runs crashed, quit the whole cross validation
            # in order to save time.
            worst_possible = cfg.getfloat("HPOLIB", "result_on_terminate")
            crashed_runs = np.nansum(
                [0 if res != worst_possible else 1 for res in results])
            if crashed_runs >= cfg.getint("HPOLIB", "max_crash_per_cv"):
                logger.warning("Aborting CV because the number of crashes "
                               "exceeds the configured max_crash_per_cv value")
                return worst_possible

            # TODO: Error Handling

        assert (len(results) == folds)
        mean = np.mean(results)

    except Exception as e:
        logger.error(format_traceback(sys.exc_info()))
        logger.error("CV failed %s %s", sys.exc_info()[0], e)
        # status = "CRASHED"
        # status = "SAT"
        mean = np.NaN

    # Do not return any kind of nan because this would break spearmint
    if not np.isfinite(mean):
        mean = float(cfg.get("HPOLIB", "result_on_terminate"))

    logger.info("Finished CV")
    return mean
Exemplo n.º 9
0
def main():
    prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]"
    description = "Return some statistical information"

    parser = ArgumentParser(description=description, prog=prog)

    parser.add_argument("-p", "--space",
                        dest="spaceFile", help="Where is the space.py located?")
    parser.add_argument("-m", "--maxEvals",
                        dest="maxEvals", help="How many evaluations?")
    parser.add_argument("-s", "--seed", default="1",
                        dest="seed", type=int, help="Seed for the TPE algorithm")
    parser.add_argument("-r", "--restore", action="store_true",
                        dest="restore", help="When this flag is set state.pkl is restored in " +
                             "the current working directory")
    parser.add_argument("--random", default=False, action="store_true",
                        dest="random", help="Use a random search")
    parser.add_argument("--cwd", help="Change the working directory before "
                                      "optimizing.")

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                               'message)s', datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    if not os.path.exists(args.spaceFile):
        logger.critical("Search space not found: %s" % args.spaceFile)
        sys.exit(1)

    # First remove ".py"
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append("./")
    sys.path.append("")

    module = import_module(space)
    search_space = module.space

    cli_target = "HPOlib.optimization_interceptor"
    fn = partial(command_line_function, cli_target=cli_target)
    
    if args.random:
        # We use a random search
        tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed))
        logger.info("Using Random Search")
    else:
        tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
    
    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if args.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed))
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()
    
    for i in range(int(args.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
Exemplo n.º 10
0
def main():
    """
    If we are not called from cv means we are called from CLI. This means
    the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some
    bookkeeping here
    """

    cfg = wrappingUtil.load_experiment_config_file()
    called_from_cv = True
    if cfg.getint("HPOLIB", "handles_cv") == 1:
        # If Our Optimizer can handle crossvalidation,
        # we are called from CLI. To keep a sane nice .pkl
        # we have to do some bookkeeping here
        called_from_cv = False

    # This has to be done here for SMAC, since smac does not call cv.py
    if not called_from_cv:
        cv_starttime = time.time()
        experiment = load_experiment_file()
        experiment.start_cv(cv_starttime)
        del experiment

    fold, seed = parse_command_line()
    # Side-effect: removes all additional information like log and applies
    # transformations to the parameters
    params = get_parameters()
    param_string = " ".join([key + " " + str(params[key]) for key in params])

    time_string = wrappingUtil.get_time_string()
    run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out")
    runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out")

    cmd = make_command(cfg, fold, param_string, run_instance_output)

    fh = open(runsolver_output_file, "w")
    experiment = load_experiment_file()
    # Side-effect: adds a job if it is not yet in the experiments file
    trial_index = get_trial_index(experiment, fold, params)
    experiment.set_one_fold_running(trial_index, fold)
    del experiment  # release Experiment lock
    logger.debug("Calling: %s" % cmd)
    # sys.stdout.write(cmd + "\n")
    # sys.stdout.flush()
    process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash")

    logger.info("-----------------------RUNNING RUNSOLVER----------------------------")
    process.wait()
    fh.close()

    cpu_time, wallclock_time, status, result, additional_data = parse_output_files(
        cfg, run_instance_output, runsolver_output_file
    )

    experiment = load_experiment_file()
    if status == "SAT":
        experiment.set_one_fold_complete(trial_index, fold, result, wallclock_time)
    elif status == "CRASHED" or status == "UNSAT":
        result = cfg.getfloat("HPOLIB", "result_on_terminate")
        experiment.set_one_fold_crashed(trial_index, fold, result, wallclock_time)
        status = "SAT"
    else:
        # TODO: We need a global stopping mechanism
        pass
    del experiment  # release lock

    return_string = format_return_string(status, wallclock_time, 1, result, seed, additional_data)

    if not called_from_cv:
        experiment = load_experiment_file()
        experiment.end_cv(time.time())
        del experiment

    logger.info(return_string)
    print return_string
    return return_string
Exemplo n.º 11
0
def main():
    prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]"
    description = "Return some statistical information"

    parser = ArgumentParser(description=description, prog=prog)

    parser.add_argument("-p",
                        "--space",
                        dest="spaceFile",
                        help="Where is the space.py located?")
    parser.add_argument("-m",
                        "--maxEvals",
                        dest="maxEvals",
                        help="How many evaluations?")
    parser.add_argument("-s",
                        "--seed",
                        default="1",
                        dest="seed",
                        type=int,
                        help="Seed for the TPE algorithm")
    parser.add_argument(
        "-r",
        "--restore",
        action="store_true",
        dest="restore",
        help="When this flag is set state.pkl is restored in " +
        "the current working directory")
    parser.add_argument("--random",
                        default=False,
                        action="store_true",
                        dest="random",
                        help="Use a random search")
    parser.add_argument("--cwd",
                        help="Change the working directory before "
                        "optimizing.")

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                        'message)s',
                        datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    if not os.path.exists(args.spaceFile):
        logger.critical("Search space not found: %s" % args.spaceFile)
        sys.exit(1)

    # First remove ".py"
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append("./")
    sys.path.append("")

    module = import_module(space)
    search_space = module.space

    cli_target = "HPOlib.optimization_interceptor"
    fn = partial(command_line_function, cli_target=cli_target)

    if args.random:
        # We use a random search
        tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed))
        logger.info("Using Random Search")
    else:
        tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))

    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if args.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed))
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    for i in range(int(args.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
Exemplo n.º 12
0
def do_cv(params, folds=10):
    logger.info("Starting Cross validation")
    sys.stdout.flush()
    optimizer = get_optimizer()
    cfg = load_experiment_config_file()

    # Store the results to hand them back to tpe and spearmint
    results = []

    try:
        logger.info("%s",  params)
        param_array = ["-" + str(param_name) + " " + str(params[param_name]) for param_name in params]
        param_string = " ".join(param_array)
        
        for fold in range(folds):
            # "Usage: runsolver_wrapper <instancename> " + \
            # "<instancespecificinformation> <cutofftime> <cutofflength> " + \
            # "<seed> <param> <param> <param>"
            # Cutofftime, cutofflength and seed can be safely ignored since they
            # are read in runsolver_wrapper
            runsolver_wrapper_script = "python " + \
                os.path.join(os.path.dirname(os.path.realpath(__file__)), "runsolver_wrapper.py")
            cmd = "%s %d %s %d %d %d %s" % \
                (runsolver_wrapper_script, fold, optimizer, 0, 0, 0, param_string)
            logger.info("Calling command:\n%s", cmd)

            process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE, shell=True, executable="/bin/bash")
            logger.info("--------------RUNNING RUNSOLVER_WRAPPER--------------")
            stdoutdata, stderrdata = process.communicate()
            if stdoutdata:
                logger.info(stdoutdata)
            if stderrdata:
                logger.error(stderrdata)

            # Read the runsolver_wrapper output
            lines = stdoutdata.split("\n")
            result_string = None
            for line in lines:
                pos = line.find("Result for ParamILS: SAT")
                if pos != -1:
                    result_string = line[pos:]
                    result_array = result_string.split()
                    results.append(float(result_array[6].strip(",")))
                    break

            if result_string is None:
                raise NotImplementedError("No result string available or result string doesn't contain SAT")

            # If a specified number of runs crashed, quit the whole cross validation
            # in order to save time.
            worst_possible = cfg.getfloat("HPOLIB", "result_on_terminate")
            crashed_runs = np.nansum([0 if res != worst_possible else 1 for res in results])
            if crashed_runs >= cfg.getint("HPOLIB", "max_crash_per_cv"):
                logger.warning("Aborting CV because the number of crashes "
                               "exceeds the configured max_crash_per_cv value")
                return worst_possible

            # TODO: Error Handling
        
        assert(len(results) == folds)
        mean = np.mean(results)

    except Exception as e:
        logger.error(format_traceback(sys.exc_info()))
        logger.error("CV failed %s %s", sys.exc_info()[0], e)
        # status = "CRASHED"
        # status = "SAT"
        mean = np.NaN
        
    # Do not return any kind of nan because this would break spearmint
    if not np.isfinite(mean):
        mean = float(cfg.get("HPOLIB", "result_on_terminate"))

    logger.info("Finished CV")
    return mean
Exemplo n.º 13
0
def main():
    """
    If we are not called from cv means we are called from CLI. This means
    the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some
    bookkeeping here
    """

    cfg = wrappingUtil.load_experiment_config_file()
    called_from_cv = True
    if cfg.getint('HPOLIB', 'handles_cv') == 1:
        # If Our Optimizer can handle crossvalidation,
        # we are called from CLI. To keep a sane nice .pkl
        # we have to do some bookkeeping here
        called_from_cv = False

    # This has to be done here for SMAC, since smac does not call cv.py
    if not called_from_cv:
        cv_starttime = time.time()
        experiment = load_experiment_file()
        experiment.start_cv(cv_starttime)
        del experiment

    fold, seed = parse_command_line()
    # Side-effect: removes all additional information like log and applies
    # transformations to the parameters
    params = get_parameters()
    param_string = " ".join([key + " " + str(params[key]) for key in params])

    time_string = wrappingUtil.get_time_string()
    run_instance_output = os.path.join(os.getcwd(),
                                       time_string + "_run_instance.out")
    runsolver_output_file = os.path.join(os.getcwd(),
                                         time_string + "_runsolver.out")

    cmd = make_command(cfg, fold, param_string, run_instance_output)

    fh = open(runsolver_output_file, "w")
    experiment = load_experiment_file()
    # Side-effect: adds a job if it is not yet in the experiments file
    trial_index = get_trial_index(experiment, fold, params)
    experiment.set_one_fold_running(trial_index, fold)
    del experiment  # release Experiment lock
    logger.debug("Calling: %s" % cmd)
    #sys.stdout.write(cmd + "\n")
    #sys.stdout.flush()
    process = subprocess.Popen(cmd,
                               stdout=fh,
                               stderr=fh,
                               shell=True,
                               executable="/bin/bash")

    logger.info(
        "-----------------------RUNNING RUNSOLVER----------------------------")
    process.wait()
    fh.close()

    cpu_time, wallclock_time, status, result, additional_data = \
        parse_output_files(cfg, run_instance_output, runsolver_output_file)

    experiment = load_experiment_file()
    if status == "SAT":
        experiment.set_one_fold_complete(trial_index, fold, result,
                                         wallclock_time)
    elif status == "CRASHED" or status == "UNSAT":
        result = cfg.getfloat("HPOLIB", "result_on_terminate")
        experiment.set_one_fold_crashed(trial_index, fold, result,
                                        wallclock_time)
        status = "SAT"
    else:
        # TODO: We need a global stopping mechanism
        pass
    del experiment  # release lock

    return_string = format_return_string(status, wallclock_time, 1, result,
                                         seed, additional_data)

    if not called_from_cv:
        experiment = load_experiment_file()
        experiment.end_cv(time.time())
        del experiment

    logger.info(return_string)
    print return_string
    return return_string