Exemplo n.º 1
0
def main_worker(queue, configurer):
    configurer(queue)

    logger = logging.getLogger(__name__)
    logger.debug("Starting the evogil. Parsing arguments.")
    with log_time(system_time, logger, "Parsing done in {time_res}s"):
        argv = docopt(__doc__, version='EvoGIL 3.0')
    logger.debug("Parsing result: %s", argv)

    run_dict = {
        'run': simulation.run_parallel.run_parallel,
        'statistics': statistic.stats.statistics,
        'stats': statistic.stats.statistics,
        'rank': statistic.ranking.rank,
        'table': statistic.ranking.table_rank,
        'rank_details': statistic.ranking.detailed_rank,
        'pictures': plots.pictures.pictures_from_stats,
        'pictures_summary': plots.pictures.pictures_summary,
        'best_fronts': plots.best_fronts.best_fronts,
        'violin': plots.violin.violin,
        'summary': statistic.summary.analyse_results,
        'list': all_algos_problems,
    }

    for k, v in run_dict.items():
        logger.debug("run_dict: k,v = %s,%s", k, v)
        if argv[k]:
            logger.debug("run_dict match. argv[k]=%s", argv[k])
            v(argv, queue)
            break
Exemplo n.º 2
0
def main_worker(queue, configurer):
    configurer(queue)

    logger = logging.getLogger(__name__)
    logger.debug("Starting the evogil. Parsing arguments.")
    with log_time(system_time, logger, "Parsing done in {time_res}s"):
        argv = docopt(__doc__, version='EvoGIL 3.0')
    logger.debug("Parsing result: %s", argv)

    run_dict = {
        'run':         simulation.run_parallel.run_parallel,
        'statistics':  statistic.stats.statistics,
        'stats':       statistic.stats.statistics,
        'rank':        statistic.ranking.rank,
        'table':       statistic.ranking.table_rank,
        'rank_details': statistic.ranking.detailed_rank,
        'pictures':    plots.pictures.pictures_from_stats,
        'pictures_summary':    plots.pictures.pictures_summary,
        'best_fronts': plots.best_fronts.best_fronts,
        'violin':      plots.violin.violin,
        'summary':     statistic.summary.analyse_results,
        'list':        all_algos_problems,
    }

    for k, v in run_dict.items():
        logger.debug("run_dict: k,v = %s,%s", k, v)
        if argv[k]:
            logger.debug("run_dict match. argv[k]=%s", argv[k])
            v(argv, queue)
            break
Exemplo n.º 3
0
def run_parallel(args):
    worker_factory, simulation_cases = factory.resolve_configuration(args)

    logger.debug("Shuffling the job queue")
    random.shuffle(simulation_cases)

    logger.debug("Creating the pool")

    processes_no = int(args["-j"])
    rxtools.configure_default_executor(processes_no)

    wall_time = []
    start_time = datetime.now()
    results = []
    logger.debug("Simulation cases: %s", simulation_cases)
    logger.debug("Work will be divided into %d processes", processes_no)

    sys = ActorSystem("multiprocTCPBase", logDefs=log_helper.EVOGIL_LOG_CONFIG)

    with log_time(system_time, logger, "Pool evaluated in {time_res}s", out=wall_time):

        def process_result(subres):
            results.append(subres)
            log_simulation_stats(start_time, subres[-1], len(simulation_cases))

        rx.from_iterable(range(len(simulation_cases))).pipe(
            ops.map(lambda i: worker_factory(simulation_cases[i], i)),
            # ops.map(lambda w: rxtools.from_process(w.run)),
            ops.map(lambda w : w.run()),
            # ops.merge(max_concurrent=1)
            ops.do_action(on_next=process_result)
        ).run()
    log_summary(args, results, simulation_cases, wall_time)
    rxtools.shutdown_default_executor()
    sys.shutdown()
Exemplo n.º 4
0
def main_worker():
    logger = logging.getLogger(__name__)
    logger.debug("Starting the evogil. Parsing arguments.")
    with log_time(system_time, logger, "Parsing done in {time_res}s"):
        argv = docopt(__doc__, version="EvoGIL 3.0")
    logger.debug("Parsing result: %s", argv)

    run_dict = {
        "run": simulation.run_parallel.run_parallel,
        "statistics": statistic.stats.statistics,
        "stats": statistic.stats.statistics,
        "rank": statistic.ranking.rank,
        "table": statistic.ranking.table_rank,
        "rank_details": statistic.ranking.detailed_rank,
        "pictures": plots.pictures.pictures_time,
        "pictures_summary": plots.pictures.pictures_summary,
        "best_fronts": plots.best_fronts.best_fronts,
        "violin": plots.violin.violin,
        "summary": statistic.summary.analyse_results,
        "list": all_algos_problems,
    }
    set_default_options(argv)

    for k, v in run_dict.items():
        logger.debug("run_dict: k,v = %s,%s", k, v)
        if argv[k]:
            logger.debug("run_dict match. argv[k]=%s", argv[k])
            v(argv)
            break
Exemplo n.º 5
0
def pictures_summary(args):
    logger = logging.getLogger(__name__)
    logger.debug("pictures_summary")

    selected = set(args["--selected"].upper().split(","))
    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    logger.debug("Plotting summary with selected algos: " + ",".join(selected))

    scoring = collections.defaultdict(lambda: collections.defaultdict(dict))
    problems = set()

    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            BudgetResultsExtractor(), results_dir
        ):
            problems.add(problem_name)
            problem_score = collections.defaultdict(list)
            algos = list(algorithms)
            for algo_name, results in algos:
                max_result = find_acceptable_result_for_budget(list(results), boot_size)
                if max_result:
                    print(
                        "{}, {} , budget={}".format(
                            problem_name, algo_name, max_result["budget"]
                        )
                    )
                    for metric_name, metric_name_long, data_process in max_result[
                        "analysis"
                    ]:
                        if metric_name in ranking.best_func:
                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(data_process, boot_size)

                            score = math.log(
                                math.fabs(data_analysis["btstrpd"]["metrics"]) + 1.0
                            )

                            scoring[metric_name][algo_name][problem_name] = score
                            problem_score[metric_name].append((algo_name, score))
                else:
                    print("{}, {}, NO BUDGET".format(problem_name, algo_name))

            for metric_name in scoring:
                if metric_name != "pdi":

                    max_score = (
                        max(x for algo, x in problem_score[metric_name]) + 0.0001
                    )
                    for algo_name, _ in algos:
                        if (
                            algo_name in scoring[metric_name]
                            and problem_name in scoring[metric_name][algo_name]
                        ):
                            scoring[metric_name][algo_name][problem_name] /= max_score

    plot_results_summary(problems, scoring, selected, plots_dir)
Exemplo n.º 6
0
def rank(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("ranking")

    boot_size = int(args["--bootstrap"])

    scoring = collections.defaultdict(list)

    with log_time(process_time, logger,
                  "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
                BudgetResultsExtractor(), RESULTS_DIR):
            for algo_name, results in algorithms:
                max_budget_result = find_acceptable_result_for_budget(
                    list(results), boot_size)
                if max_budget_result:
                    for (
                            metric_name,
                            metric_name_long,
                            data_process,
                    ) in max_budget_result["analysis"]:
                        if metric_name in best_func:

                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(
                                data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            scoring[(problem_name, metric_name)].append(
                                (algo_name, score))

    global_scoring = collections.defaultdict(collections.Counter)

    print("Problem ranking\n################")
    for problem_name, metric_name in scoring:
        metric_scoring = scoring[(problem_name, metric_name)]
        algo_win, score = best_func[metric_name](metric_scoring,
                                                 key=lambda x: x[1])
        print("{}, {} : {}".format(problem_name, metric_name, algo_win))

        weak_winners = get_weak_winners(metric_scoring, (algo_win, score),
                                        winner_tolerance[metric_name])
        # if not weak_winners:
        #     global_scoring[metric_name].update([algo_win])
        if not weak_winners:
            global_scoring[metric_name].update([algo_win, algo_win])
        else:
            global_scoring[metric_name].update(
                [algo_win] + [algo for algo, score in weak_winners])

    print("\nGlobal ranking\n##############")
    for metric_name in global_scoring:
        print("{} : ".format(metric_name) + ", ".join(
            "{} ({})".format(score[0], score[1])
            for score in global_scoring[metric_name].most_common()))
Exemplo n.º 7
0
    def run(self):
        log_helper.init()
        logger = logging.getLogger(__name__)
        logger.info(
            "Starting the worker. PID: %d, simulation case: %s",
            os.getpid(),
            self.simulation,
        )

        if self.simulation.renice and os.name == "posix":
            logger.debug("Renice the process PID:%s by %s", os.getpid(),
                         self.simulation)
            os.nice(int(self.simulation.renice))  # pylint: disable=no-member

        self._init_random_seed(logger)

        driver = None
        try:
            final_driver, problem_mod = factory.prepare(
                self.simulation.algorithm_name, self.simulation.problem_name)

            logger.debug("Creating the driver used to perform computation")
            driver = final_driver()
            proc_time = []
            logger.debug("Beginning processing of %s, simulation: %s", driver,
                         self.simulation)
            with log_time(
                    process_time,
                    logger,
                    "Processing done in {time_res}s CPU time",
                    out=proc_time,
            ):
                results = self.run_driver(driver, problem_mod, logger)

            return results, proc_time[-1], self.simulation_no

        except NotViableConfiguration as e:
            reason = inspect.trace()[-1]
            logger.info(
                "Configuration disabled by %s:%d:%s. simulation case:%s",
                reason[1],
                reason[2],
                reason[3],
                self.simulation,
            )
            logger.debug("Configuration disabled args:%s. Stack:", exc_info=e)

        except Exception as e:
            logger.exception("Some error", exc_info=e)

        finally:
            if driver:
                driver.shutdown()
            logger.debug("Finished processing. simulation case:%s",
                         self.simulation)
Exemplo n.º 8
0
def pictures_from_stats(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    results = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            BudgetResultsExtractor(), results_dir
        ):
            for algo_name, budgets in algorithms:
                for result in budgets:
                    _, _, cost_data = next(result["analysis"])
                    cost_data = list(x() for x in cost_data)
                    cost_analysis = yield_analysis(cost_data, boot_size)

                    budget = cost_analysis["btstrpd"]["metrics"]
                    budget_err = cost_analysis["stdev"]

                    for metric_name, metric_name_long, data_process in result[
                        "analysis"
                    ]:
                        if metric_name in best_func:
                            if metric_name == "dst from pareto":
                                metric_name = "dst"
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [
                                (problem_name, algo_name, metric_name, group)
                                for group in algos_groups[algo_name]
                            ]
                            value = (budget, budget_err, score, score_err)
                            print("PLOT: " + str(value))

                            for key in keys:
                                results[key].append(value)
    plot_results(results, plots_dir, (500, 4500))
Exemplo n.º 9
0
def detailed_rank(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("detailed ranking")

    boot_size = int(args["--bootstrap"])

    for result_set in result_dirs:
        print("***{}***".format(result_set))
        scoring = collections.defaultdict(list)

        with log_time(process_time, logger,
                      "Preparing data done in {time_res:.3f}"):
            for problem_name, problem_mod, algorithms in serialization.each_result(
                    BudgetResultsExtractor(), result_set):
                for algo_name, results in algorithms:
                    for result in results:
                        if validate_cost(result, boot_size):
                            for metric_name, metric_name_long, data_process in result[
                                    "analysis"]:
                                if metric_name in best_func:
                                    data_process = list(x()
                                                        for x in data_process)
                                    data_analysis = yield_analysis(
                                        data_process, boot_size)

                                    score = data_analysis["btstrpd"]["metrics"]
                                    scoring[(problem_name, result["budget"],
                                             metric_name)].append(
                                                 (algo_name, score))

        global_scoring = collections.defaultdict(collections.Counter)

        for problem_name, budget, metric_name in scoring:
            algo_win, score = best_func[metric_name](scoring[(problem_name,
                                                              budget,
                                                              metric_name)],
                                                     key=lambda x: x[1])
            global_scoring[(budget, metric_name)].update([algo_win])

        for budget, metric_name in sorted(global_scoring):
            print("{} {} : ".format(budget, metric_name) +
                  ", ".join("{} ({})".format(score[0], score[1])
                            for score in global_scoring[
                                (budget, metric_name)].most_common()))
Exemplo n.º 10
0
def pictures_summary(args, queue):
    logger = logging.getLogger(__name__)
    logger.debug("pictures_summary")

    selected = set(args['--selected'].upper().split(','))
    boot_size = int(args['--bootstrap'])

    logger.debug('Plotting summary with selected algos: ' + ','.join(selected))

    scoring = collections.defaultdict(lambda: collections.defaultdict(dict))
    problems = set()

    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in RunResult.each_result(RESULTS_DIR):
            problems.add(problem_name)
            problem_score = collections.defaultdict(list)
            algos = list(algorithms)
            for algo_name, results in algos:
                max_result = find_acceptable_result_for_budget(list(results), boot_size)
                if max_result:
                    print('{}, {} , budget={}'.format(problem_name, algo_name, max_result['budget']))
                    for metric_name, metric_name_long, data_process in max_result["analysis"]:
                        if metric_name in ranking.best_func:
                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(data_process, boot_size)

                            score = math.log(math.fabs(data_analysis["btstrpd"]["metrics"]) + 1.0)

                            scoring[metric_name][algo_name][problem_name] = score
                            problem_score[metric_name].append((algo_name, score))
                else:
                    print('{}, {}, NO BUDGET'.format(problem_name, algo_name))

            for metric_name in scoring:
                if metric_name != 'pdi':

                    max_score = max(x for algo, x in problem_score[metric_name]) + 0.0001
                    for algo_name, _ in algos:
                        if algo_name in scoring[metric_name] and problem_name in scoring[metric_name][algo_name]:
                            scoring[metric_name][algo_name][problem_name] /= max_score

    plot_results_summary(problems, scoring, selected)
Exemplo n.º 11
0
def pictures_time(args):
    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    plot_data = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            TimeResultsExtractor(), results_dir
        ):
            for algo_name, results in algorithms:
                for result in results:
                    time = result["time"]

                    for metric_name, metric_name_long, data_process in result[
                        "analysis"
                    ]:
                        if metric_name in best_func:
                            if metric_name == "dst from pareto":
                                metric_name = "dst"
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [
                                (problem_name, algo_name, metric_name, group)
                                for group in algos_groups[algo_name]
                            ]
                            value = (time, 0, score, score_err)
                            for key in keys:
                                plot_data[key].append(value)
    max_time = max(list(plot_data.values())[0])[0]
    plot_results(plot_data, plots_dir, (0, max_time))
Exemplo n.º 12
0
def pictures_from_stats(args, queue):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args['--bootstrap'])

    results = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in RunResult.each_result(RESULTS_DIR):
            for algo_name, budgets in algorithms:
                for result in budgets:
                    _, _, cost_data = next(result["analysis"])
                    cost_data = list(x() for x in cost_data)
                    cost_analysis = yield_analysis(cost_data, boot_size)

                    budget = cost_analysis["btstrpd"]["metrics"]
                    budget_err = cost_analysis["stdev"]

                    for metric_name, metric_name_long, data_process in result["analysis"]:
                        if metric_name in best_func:
                            if metric_name == 'dst from pareto':
                                metric_name = 'dst'
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [(problem_name, algo_name, metric_name, group) for group in
                                    algos_groups[algo_name]]
                            value = (budget, budget_err, score, score_err)

                            for key in keys:
                                results[key].append(value)
    plot_results(results)
Exemplo n.º 13
0
def worker(args):
    logger = logging.getLogger(__name__)

    logger.debug("Starting the worker. args:%s", args)
    (problem, algo), budgets, runid, renice = args

    if renice:
        logger.debug("Renice the process PID:%s by %s", os.getpid(), renice)
        os.nice(int(renice))

    logger.debug("Getting random seed")
    # basically we duplicate the code of https://github.com/python/cpython/blob/master/Lib/random.py#L111 because
    # in case os.urandom is not available, random.seed defaults to epoch time. That would set the seed equal in each
    # process, which is not acceptable.
    try:
        random_seed = int.from_bytes(os.urandom(2500), 'big')
    except NotImplementedError:
        random_seed = int(time.time() * 256 + os.getpid())  # that's not enough for MT, but will have to do for now.
    random.seed(random_seed)

    drivers = algo.split('+')

    runres = RunResult(algo, problem, runid=runid, results_path=RESULTS_DIR)

    try:
        final_driver, problem_mod = None, None
        for driver_pos, driver in list(enumerate(drivers))[::-1]:
            final_driver, problem_mod = prepare(driver,
                                                problem,
                                                final_driver,
                                                drivers, driver_pos
                                                )

        logger.debug("Creating the driver used to perform computation")
        driver = final_driver()
        total_cost, result = 0, None

        proc_time = []
        results = []

        logger.debug("Beginning processing of %s, args: %s", driver, args)
        with log_time(process_time, logger, "Processing done in {time_res}s CPU time", out=proc_time):
            if isinstance(driver, DriverGen):
                logger.debug("The driver %s is DriverGen-based", show_partial(driver))
                driver.max_budget = budgets[-1]
                gen = driver.population_generator()
                proxy = None
                logger.debug("Starting processing")

                for budget in budgets:
                    logger.debug("Curr budget step is %d", budget)
                    while total_cost < budget:
                        logger.debug("Waiting for next proxy")
                        proxy = gen.send(proxy)
                        logger.debug("Proxy.cost: %d", proxy.cost)
                        total_cost += proxy.cost
                        logger.debug("total_cost: %d", total_cost)

                    logger.debug("Cost %d equals/overpasses next budget step %d. Storing finalized population",
                                 total_cost,
                                 budget)
                    finalpop = proxy.finalized_population()
                    finalpop_fit = [[fit(x) for fit in problem_mod.fitnesses] for x in finalpop]
                    runres.store(budget, total_cost, finalpop, finalpop_fit)
                    results.append((total_cost, finalpop))

                logger.debug("End loop, total_cost:%d", total_cost)
                logger.debug("Final population: %s", proxy.finalized_population())

            elif isinstance(driver, DriverLegacy):
                logger.debug("The driver %s is DriverLegacy-based", show_partial(driver))
                with log_time(process_time, logger, "All iterations in {time_res}s CPU time"):
                    for budget in budgets:
                        logger.debug("Re-creating the driver used to perform computation")
                        driver = final_driver()
                        driver.budget = budget
                        with log_time(process_time, logger,
                                      "Iteration with budget {0} in {{time_res}}s CPU time".format(budget)):
                            logger.debug("Running with budget=%d", budget)
                            total_cost = driver.steps(count(), budget)
                        finalpop = driver.finish()
                        finalpop_fit = [[fit(x) for fit in problem_mod.fitnesses] for x in finalpop]
                        runres.store(budget, total_cost, finalpop, finalpop_fit)
                        results.append((budget, finalpop))
            else:
                e = NotImplementedError()
                logger.exception("Oops. The driver type is not recognized, got %s", show_partial(driver), exc_info=e)
                raise e

        return results, proc_time[-1]

    except NotViableConfiguration as e:
        reason = inspect.trace()[-1]
        logger.info("Configuartion disabled by %s:%d:%s. args:%s", reason[1], reason[2], reason[3], args)
        logger.debug("Configuration disabled args:%s. Stack:", exc_info=e)

    except Exception as e:
        logger.exception("Some error", exc_info=e)

    finally:
        logger.debug("Finished processing. args:%s", args)
Exemplo n.º 14
0
def table_rank(args):
    logger = logging.getLogger(__name__)
    logger.debug("table ranking")

    boot_size = int(args["--bootstrap"])

    results = collections.defaultdict(
        lambda: collections.defaultdict(collections.Counter))

    for result_set in result_dirs:
        print("***{}***".format(result_set))
        with log_time(process_time, logger,
                      "Preparing data done in {time_res:.3f}"):
            for problem_name, problem_mod, algorithms in serialization.each_result(
                    BudgetResultsExtractor(), result_set):
                print(result_set, problem_name)
                scoring = collections.defaultdict(list)
                for algo_name, results_data in algorithms:
                    results_data = list(results_data)

                    for i in range(len(results_data)):
                        original_budget = results_data[i]["budget"]
                        if original_budget == 40:
                            result = results_data[i]
                            # result = find_acceptable_result_for_budget(
                            #     results_data[: i + 1], boot_size
                            # )
                            if result:
                                print("{} {} {} -> {}".format(
                                    problem_name,
                                    algo_name,
                                    original_budget,
                                    result["budget"],
                                ))
                                for metric_name, metric_name_long, data_process in result[
                                        "analysis"]:
                                    if metric_name in best_func:
                                        data_process = list(
                                            x() for x in data_process)
                                        data_analysis = yield_analysis(
                                            data_process, boot_size)

                                        score = data_analysis["btstrpd"][
                                            "metrics"]
                                        scoring[(original_budget,
                                                 metric_name)].append(
                                                     (algo_name, score))
                print("****{}****".format(problem_name))
                for budget, metric_name in sorted(scoring):
                    metric_scoring = scoring[(budget, metric_name)]
                    algo_win, score = best_func[metric_name](
                        metric_scoring, key=lambda x: x[1])
                    weak_winners = get_weak_winners(
                        metric_scoring, (algo_win, score),
                        winner_tolerance[metric_name])

                    # # Only strong
                    # if not weak_winners:
                    #     results[(budget, metric_name)][result_set].update([algo_win])

                    # Strong = 2 points, Weak or Winner = 1 point
                    if not weak_winners:
                        results[(budget, metric_name)][result_set].update(
                            [algo_win, algo_win])
                    else:
                        results[(budget, metric_name)][result_set].update(
                            [algo_win] +
                            [algo for algo, score in weak_winners])

                    # print('{} {} {}'.format(budget, metric_name, scoring[(budget, metric_name)]))
                    print("*****{} {}".format(budget, metric_name))
                    if not weak_winners:
                        print("*****Strong winner: {} :{}".format(
                            algo_win, score))
                    else:
                        print("*****Winner: {} :{}".format(algo_win, score))
                        print("*****Weak winners: {}".format(weak_winners))

    print("""\\begin{table}[ht]
  \\centering
    \\caption{Final results}
    \\label{tab:results"}
    \\resizebox{\\textwidth}{!}{%
    \\begin{tabular}{  r@{ }l | c | c | c | }
          \multicolumn{2}{c}{}
        & $K_0$
        & $K_1$
        & $K_2$
      \\\\ \\hline""")

    prevous_budget = None
    for budget, metric_name in sorted(
            sorted(results.keys(), key=lambda x: metrics_order.index(x[1])),
            key=lambda x: x[0],
    ):
        budget_label = str(budget) + " "
        if prevous_budget and prevous_budget != budget:
            print("\\hdashline")
        elif prevous_budget:
            budget_label = ""

        score_str = ""
        for result_set in result_dirs:
            results_counter = results[(budget, metric_name)][result_set]
            algo_ranking = results_counter.most_common(2)
            values = list(results_counter.values())
            if len(algo_ranking) == 2:
                winner = format_result(algo_ranking[0], values, 2)
                second = format_result(
                    algo_ranking[1],
                    values,
                    1 if algo_ranking[0][1] != algo_ranking[1][1] else 2,
                )
                score_str += "& {}, {}".format(winner, second)
            elif len(algo_ranking) == 1:
                winner = format_result(algo_ranking[0], values, 2)
                score_str += "& {}".format(winner)
            else:
                score_str += "& "
        print("{}& {} {}\\\\".format(budget_label, metric_name, score_str))
        prevous_budget = budget

    print("""    \\end{tabular}}\n\\end{table}""")
Exemplo n.º 15
0
def statistics(args, queue):
    logger = logging.getLogger(__name__)

    badbench = []
    cost_badbench = []
    boot_size = int(args['--bootstrap'])

    # pretty format
    screen_width = sum(y for x in fields for y in x[1]) + 4 * len(fields)
    [err_prefix] = [
        i
        for i, (name, lens, fmt) in enumerate(fields)
        if name == "RESULT, confidence interval"
    ]
    err_prefix = sum(y for x in fields[:err_prefix] for y in x[1]) + 4 * err_prefix - 2
    err_prefix = " " * err_prefix

    def print_header():
        print()
        print('..'.join('[{0:^{1}}]'.format(head, sum(width))
                        for head, width, var
                        in fields)
              + "..",
              flush=True)
        return True

    with close_and_join(multiprocessing.Pool(min(int(args['-j']), 8))) as p:
        for problem_name, problem_mod, algorithms in RunResult.each_result():
            for algo_name, budgets in algorithms:
                header_just_printed = print_header()

                for result in budgets:
                    len_data = len(result["results"])

                    first_budget_line = True
                    avg_pop_len = average([len(x.population) for x in result["results"]])

                    with log_time(process_time,
                                  logger,
                                  "Calculating metrics for {} :: {} :: {} in {{time_res:.3f}}s".format(
                                      problem_name, algo_name, result["budget"]
                                  )):

                        results_precalc = p.map(force_data,
                                                zip(repeat(boot_size), result["analysis"]),
                                                chunksize=1)

                        for metric_name, metric_name_long, data_process, analysis in results_precalc:
                            if first_budget_line and not header_just_printed:
                                if screen_width % 2 == 1:
                                    print("-" + " -" * (screen_width // 2))
                                else:
                                    print(" -" * (screen_width // 2))
                            first_budget_line = False

                            columns = []
                            for i, (head, width, var) in enumerate(fields):
                                columns.append(var.format(*width, **locals()))

                            # the data
                            print("", " :: ".join(columns), ":: ", flush=True)
                            header_just_printed = False

                            if analysis["goodbench"] != "✓":
                                lower_process = analysis["lower"]
                                upper_process = analysis["upper"]
                                low_out_fence_process = analysis["low_out_fence"]
                                upp_out_fence_process = analysis["upp_out_fence"]
                                stdev_process = analysis["stdev"]
                                mean_process = analysis["mean"]

                                outliers = len([x
                                                for x
                                                in data_process
                                                if lower_process <= x <= upper_process])
                                print(
                                    "{err_prefix}:: Suspicious result analysis:\n"
                                    "{err_prefix}::             {0:>2} / {1:2} ({4:7.3f}%) out of [ {2:>18.13} ; {3:<18.13} ]\n"
                                    "{err_prefix}::                                                            Δ {7:<18.13}\n"
                                    "{err_prefix}::                               Bounds: [ {5:>18.13} ; {6:<18.13} ]\n"
                                    "{err_prefix}::                                                            Δ {8:<18.13}".format(
                                        outliers,
                                        len(data_process),
                                        lower_process,
                                        upper_process,
                                        100.0 * outliers / len(data_process),
                                        min(data_process),
                                        max(data_process),
                                        upper_process - lower_process,
                                        max(data_process) - min(data_process),
                                        err_prefix=err_prefix)
                                )
                                print("{err_prefix}:: Values".format(err_prefix=err_prefix))

                                def aux(x):
                                    try:
                                        return abs(x - mean_process) * 100.0 / stdev_process
                                    except ZeroDivisionError:
                                        return float("inf")

                                print(''.join(
                                    "{err_prefix}:: {0:>30.20}  = avg {1:<+30} = avg {3:+8.3f}% ⨉ σ | {2:17} {4:17} {5:17}\n".format(
                                        x,
                                        x - mean_process,
                                        (lower_process <= x <= upper_process) and "(out of mean±3σ)" or "",
                                        aux(x),
                                        ((low_out_fence_process <= x < analysis["low_inn_fence"]) or (
                                            analysis[
                                                "upp_inn_fence"] <= x < upp_out_fence_process)) and " (mild outlier)" or "",
                                        ((x < low_out_fence_process) or (
                                            upp_out_fence_process < x)) and "(EXTREME outlier)" or "",
                                        err_prefix=err_prefix
                                    )
                                    for x in data_process),
                                    end=''
                                )
                                if abs(analysis["mean_nooutliers_diff"]) > 10.:
                                    badbench.append([problem_name, algo_name, result["budget"], metric_name_long])
                                    print(err_prefix + "::", "#"*22, "#"*67, "#"*22)
                                    print(err_prefix + "::", "#"*22,
                                          "Mean of results changed a lot (> 10%), so probably UNTRUSTED result",
                                          "#"*22)
                                    print(err_prefix + "::", "#"*22, "#"*67, "#"*22)
                                else:
                                    print(err_prefix + "::",
                                          "Mean of results changed a little (< 10%), so probably that's all okay")

    if badbench:
        print("#" * 237)
        for i in badbench:
            print(">>> " + " :: ".join(str(x) for x in i))
Exemplo n.º 16
0
def statistics(args):
    logger = logging.getLogger(__name__)

    badbench = []
    cost_badbench = []
    boot_size = int(args["--bootstrap"])

    # pretty format
    screen_width = sum(y for x in fields for y in x[1]) + 4 * len(fields)
    [err_prefix] = [
        i for i, (name, lens, fmt) in enumerate(fields)
        if name == "RESULT, confidence interval"
    ]
    err_prefix = sum(y for x in fields[:err_prefix]
                     for y in x[1]) + 4 * err_prefix - 2
    err_prefix = " " * err_prefix

    def print_header():
        print()
        print(
            "..".join("[{0:^{1}}]".format(head, sum(width))
                      for head, width, var in fields) + "..",
            flush=True,
        )
        return True

    with close_and_join(multiprocessing.Pool(min(int(args["-j"]), 8))) as p:
        for problem_name, problem_mod, algorithms in serialization.each_result(
                BudgetResultsExtractor()):
            for algo_name, budgets in algorithms:
                header_just_printed = print_header()

                for result in budgets:
                    len_data = len(result["results"])

                    first_budget_line = True
                    avg_pop_len = average(
                        [len(x.population) for x in result["results"]])

                    with log_time(
                            process_time,
                            logger,
                            "Calculating metrics for {} :: {} :: {} in {{time_res:.3f}}s"
                            .format(problem_name, algo_name, result["budget"]),
                    ):

                        results_precalc = p.map(
                            force_data,
                            zip(repeat(boot_size), result["analysis"]),
                            chunksize=1,
                        )

                        for (
                                metric_name,
                                metric_name_long,
                                data_process,
                                analysis,
                        ) in results_precalc:
                            if first_budget_line and not header_just_printed:
                                if screen_width % 2 == 1:
                                    print("-" + " -" * (screen_width // 2))
                                else:
                                    print(" -" * (screen_width // 2))
                            first_budget_line = False

                            columns = []
                            for i, (head, width, var) in enumerate(fields):
                                columns.append(var.format(*width, **locals()))

                            # the data
                            print("", " :: ".join(columns), ":: ", flush=True)
                            header_just_printed = False

                            if analysis["goodbench"] != "✓":
                                lower_process = analysis["lower"]
                                upper_process = analysis["upper"]
                                low_out_fence_process = analysis[
                                    "low_out_fence"]
                                upp_out_fence_process = analysis[
                                    "upp_out_fence"]
                                stdev_process = analysis["stdev"]
                                mean_process = analysis["mean"]

                                outliers = len([
                                    x for x in data_process
                                    if lower_process <= x <= upper_process
                                ])
                                print(
                                    "{err_prefix}:: Suspicious result analysis:\n"
                                    "{err_prefix}::             {0:>2} / {1:2} ({4:7.3f}%) out of [ {2:>18.13} ; {3:<18.13} ]\n"
                                    "{err_prefix}::                                                            Δ {7:<18.13}\n"
                                    "{err_prefix}::                               Bounds: [ {5:>18.13} ; {6:<18.13} ]\n"
                                    "{err_prefix}::                                                            Δ {8:<18.13}"
                                    .format(
                                        outliers,
                                        len(data_process),
                                        lower_process,
                                        upper_process,
                                        100.0 * outliers / len(data_process),
                                        min(data_process),
                                        max(data_process),
                                        upper_process - lower_process,
                                        max(data_process) - min(data_process),
                                        err_prefix=err_prefix,
                                    ))
                                print("{err_prefix}:: Values".format(
                                    err_prefix=err_prefix))

                                def aux(x):
                                    try:
                                        return (abs(x - mean_process) * 100.0 /
                                                stdev_process)
                                    except ZeroDivisionError:
                                        return float("inf")

                                print(
                                    "".join(
                                        "{err_prefix}:: {0:>30.20}  = avg {1:<+30} = avg {3:+8.3f}% ⨉ σ | {2:17} {4:17} {5:17}\n"
                                        .format(
                                            x,
                                            x - mean_process,
                                            (lower_process <= x <=
                                             upper_process)
                                            and "(out of mean±3σ)" or "",
                                            aux(x),
                                            ((low_out_fence_process <= x <
                                              analysis["low_inn_fence"]) or
                                             (analysis["upp_inn_fence"] <= x <
                                              upp_out_fence_process))
                                            and " (mild outlier)" or "",
                                            ((x < low_out_fence_process) or
                                             (upp_out_fence_process < x))
                                            and "(EXTREME outlier)" or "",
                                            err_prefix=err_prefix,
                                        ) for x in data_process),
                                    end="",
                                )
                                if abs(analysis["mean_nooutliers_diff"]
                                       ) > 10.0:
                                    badbench.append([
                                        problem_name,
                                        algo_name,
                                        result["budget"],
                                        metric_name_long,
                                    ])
                                    print(err_prefix + "::", "#" * 22,
                                          "#" * 67, "#" * 22)
                                    print(
                                        err_prefix + "::",
                                        "#" * 22,
                                        "Mean of results changed a lot (> 10%), so probably UNTRUSTED result",
                                        "#" * 22,
                                    )
                                    print(err_prefix + "::", "#" * 22,
                                          "#" * 67, "#" * 22)
                                else:
                                    print(
                                        err_prefix + "::",
                                        "Mean of results changed a little (< 10%), so probably that's all okay",
                                    )

    if badbench:
        print("#" * 237)
        for i in badbench:
            print(">>> " + " :: ".join(str(x) for x in i))
Exemplo n.º 17
0
def run_parallel(args, queue):
    logger = logging.getLogger(__name__)

    budgets = sorted([int(budget) for budget in args['<budget>'].split(',')])
    logger.debug("Budgets: %s", budgets)

    order = list(product(run_config.problems, run_config.algorithms))

    logger.debug("Problems * algorithms: %s", order)

    algorithms = run_config.algorithms
    if args['--algo']:
        algorithms_filter = args['--algo'].lower().split(',')
        logger.debug("Selecting algorithms by name: %s", algorithms)
        algorithms = [
            algo for algo in algorithms if algo.lower() in algorithms_filter
        ]
        logger.debug("Selected: %s", algorithms)

    problems = run_config.problems
    if args['--problem']:
        problems_filter = args['--problem'].lower().split(',')
        logger.debug("Selecting problems by name: %s", problems)
        problems = [
            problem for problem in problems
            if problem.lower() in problems_filter
        ]
        logger.debug("Selected: %s", problems)

    order = list(product(problems, algorithms))

    logger.info("Selected following tests:")
    for problem, algo in order:
        logger.info("  {problem:12} :: {algo:12}".format(problem=problem,
                                                         algo=algo))

    logger.debug("Duplicating problems (-N flag)")
    order = [(test, budgets, runid, args["--renice"]) for test in order
             for runid in range(int(args['-N']))]

    logger.debug("Shuffling the job queue")
    random.shuffle(order)

    logger.debug("Creating the pool")

    with close_and_join(multiprocessing.Pool(int(args['-j']))) as p:

        wall_time = []
        start_time = datetime.now()
        results = []
        with log_time(system_time,
                      logger,
                      "Pool evaluated in {time_res}s",
                      out=wall_time):

            for i, subres in enumerate(p.imap(worker, order, chunksize=1)):
                results.append(subres)

                current_time = datetime.now()
                diff_time = current_time - start_time
                ratio = i * 1. / len(order)
                try:
                    est_delivery_time = start_time + diff_time / ratio
                    time_to_delivery = est_delivery_time - current_time
                    logging.info(
                        "Job queue progress: %.3f%%. Est. finish in %02d:%02d:%02d (at %s)",
                        ratio * 100, time_to_delivery.days * 24 +
                        time_to_delivery.seconds // 3600,
                        time_to_delivery.seconds // 60,
                        time_to_delivery.seconds % 60,
                        est_delivery_time.strftime("%Y-%m-%d %H:%M:%S.%f"))
                except ZeroDivisionError:
                    logging.info(
                        "Job queue progress: %.3f%%. Est. finish: unknown yet",
                        ratio)

    proc_times = sum(subres[1] for subres in results if subres is not None)
    errors = [(test, budgets, runid)
              for comp_result, (test, budgets, runid,
                                renice) in zip(results, order)
              if comp_result is None]

    speedup = proc_times / wall_time[0]

    logger.info("SUMMARY:")
    logger.info("  wall time:     %7.3f", wall_time[0])
    logger.info("  CPU+user time: %7.3f", proc_times)
    logger.info("  est. speedup:  %7.3f", speedup)

    if errors:
        logger.error("Errors encountered:")
        for (probl, algo), budgets, runid in errors:
            logger.error("  %9s :: %14s :: runID=%d :: budgets=%s", probl,
                         algo, runid, ','.join(str(x) for x in budgets))

    summary = collections.defaultdict(float)
    for (bench, _, _, _), subres in zip(order, results):
        if subres:
            summary[bench] += subres[1]

    if logger.isEnabledFor(logging.INFO):
        logger.info("Running time:")
        res = []
        for (prob, alg), timesum in sorted(summary.items(),
                                           key=operator.itemgetter(1),
                                           reverse=True):
            prob_show = "'" + prob + "'"
            alg_show = "'" + alg + "'"
            avg_time = timesum / float(args['-N'])
            logger.info(
                "  prob:{prob_show:16} algo:{alg_show:16}) time:{avg_time:>8.3f}s"
                .format(**locals()))
Exemplo n.º 18
0
def worker(args):
    logger = logging.getLogger(__name__)

    logger.debug("Starting the worker. args:%s", args)
    (problem, algo), budgets, runid, renice = args

    if renice:
        logger.debug("Renice the process PID:%s by %s", os.getpid(), renice)
        os.nice(int(renice))

    logger.debug("Getting random seed")
    # basically we duplicate the code of https://github.com/python/cpython/blob/master/Lib/random.py#L111 because
    # in case os.urandom is not available, random.seed defaults to epoch time. That would set the seed equal in each
    # process, which is not acceptable.
    try:
        random_seed = int.from_bytes(os.urandom(2500), 'big')
    except NotImplementedError:
        random_seed = int(
            time.time() * 256 + os.getpid()
        )  # that's not enough for MT, but will have to do for now.
    random.seed(random_seed)

    drivers = algo.split('+')

    runres = RunResult(algo, problem, runid=runid, results_path=RESULTS_DIR)

    try:
        final_driver, problem_mod = None, None
        for driver_pos, driver in list(enumerate(drivers))[::-1]:
            final_driver, problem_mod = prepare(driver, problem, final_driver,
                                                drivers, driver_pos)

        logger.debug("Creating the driver used to perform computation")
        driver = final_driver()
        total_cost, result = 0, None

        proc_time = []
        results = []

        logger.debug("Beginning processing of %s, args: %s", driver, args)
        with log_time(process_time,
                      logger,
                      "Processing done in {time_res}s CPU time",
                      out=proc_time):
            if isinstance(driver, DriverGen):
                logger.debug("The driver %s is DriverGen-based",
                             show_partial(driver))
                driver.max_budget = budgets[-1]
                gen = driver.population_generator()
                proxy = None
                logger.debug("Starting processing")

                for budget in budgets:
                    logger.debug("Curr budget step is %d", budget)
                    while total_cost < budget:
                        logger.debug("Waiting for next proxy")
                        proxy = gen.send(proxy)
                        logger.debug("Proxy.cost: %d", proxy.cost)
                        total_cost += proxy.cost
                        logger.debug("total_cost: %d", total_cost)

                    logger.debug(
                        "Cost %d equals/overpasses next budget step %d. Storing finalized population",
                        total_cost, budget)
                    finalpop = proxy.finalized_population()
                    finalpop_fit = [[fit(x) for fit in problem_mod.fitnesses]
                                    for x in finalpop]
                    runres.store(budget, total_cost, finalpop, finalpop_fit)
                    results.append((total_cost, finalpop))

                logger.debug("End loop, total_cost:%d", total_cost)
                logger.debug("Final population: %s",
                             proxy.finalized_population())

            elif isinstance(driver, DriverLegacy):
                logger.debug("The driver %s is DriverLegacy-based",
                             show_partial(driver))
                with log_time(process_time, logger,
                              "All iterations in {time_res}s CPU time"):
                    for budget in budgets:
                        logger.debug(
                            "Re-creating the driver used to perform computation"
                        )
                        driver = final_driver()
                        driver.budget = budget
                        with log_time(
                                process_time, logger,
                                "Iteration with budget {0} in {{time_res}}s CPU time"
                                .format(budget)):
                            logger.debug("Running with budget=%d", budget)
                            total_cost = driver.steps(count(), budget)
                        finalpop = driver.finish()
                        finalpop_fit = [[
                            fit(x) for fit in problem_mod.fitnesses
                        ] for x in finalpop]
                        runres.store(budget, total_cost, finalpop,
                                     finalpop_fit)
                        results.append((budget, finalpop))
            else:
                e = NotImplementedError()
                logger.exception(
                    "Oops. The driver type is not recognized, got %s",
                    show_partial(driver),
                    exc_info=e)
                raise e

        return results, proc_time[-1]

    except NotViableConfiguration as e:
        reason = inspect.trace()[-1]
        logger.info("Configuartion disabled by %s:%d:%s. args:%s", reason[1],
                    reason[2], reason[3], args)
        logger.debug("Configuration disabled args:%s. Stack:", exc_info=e)

    except Exception as e:
        logger.exception("Some error", exc_info=e)

    finally:
        logger.debug("Finished processing. args:%s", args)
Exemplo n.º 19
0
def run_parallel(args, queue):
    logger = logging.getLogger(__name__)

    budgets = sorted([int(budget) for budget in args['<budget>'].split(',')])
    logger.debug("Budgets: %s", budgets)

    order = list(product(run_config.problems, run_config.algorithms))

    logger.debug("Problems * algorithms: %s",
                 order)

    algorithms = run_config.algorithms
    if args['--algo']:
        algorithms_filter = args['--algo'].lower().split(',')
        logger.debug("Selecting algorithms by name: %s",
                     algorithms)
        algorithms = [
            algo
            for algo in algorithms
            if algo.lower() in algorithms_filter
        ]
        logger.debug("Selected: %s",
                     algorithms)

    problems = run_config.problems
    if args['--problem']:
        problems_filter = args['--problem'].lower().split(',')
        logger.debug("Selecting problems by name: %s",
                     problems)
        problems = [
            problem
            for problem in problems
            if problem.lower() in problems_filter
        ]
        logger.debug("Selected: %s",
                     problems)

    order = list(product(problems, algorithms))

    logger.info("Selected following tests:")
    for problem, algo in order:
        logger.info("  {problem:12} :: {algo:12}".format(problem=problem, algo=algo))

    logger.debug("Duplicating problems (-N flag)")
    order = [
        (test, budgets, runid, args["--renice"])
        for test in order
        for runid in range(int(args['-N']))
    ]

    logger.debug("Shuffling the job queue")
    random.shuffle(order)

    logger.debug("Creating the pool")

    with close_and_join(multiprocessing.Pool(int(args['-j']))) as p:

        wall_time = []
        start_time = datetime.now()
        results = []
        with log_time(system_time, logger, "Pool evaluated in {time_res}s", out=wall_time):

            for i, subres in enumerate(p.imap(worker, order, chunksize=1)):
                results.append(subres)

                current_time = datetime.now()
                diff_time = current_time - start_time
                ratio = i * 1. / len(order)
                try:
                    est_delivery_time = start_time + diff_time / ratio
                    time_to_delivery = est_delivery_time - current_time
                    logging.info("Job queue progress: %.3f%%. Est. finish in %02d:%02d:%02d (at %s)",
                                 ratio * 100,
                                 time_to_delivery.days * 24 + time_to_delivery.seconds // 3600,
                                 time_to_delivery.seconds // 60,
                                 time_to_delivery.seconds % 60,
                                 est_delivery_time.strftime("%Y-%m-%d %H:%M:%S.%f")
                                 )
                except ZeroDivisionError:
                    logging.info("Job queue progress: %.3f%%. Est. finish: unknown yet", ratio)

    proc_times = sum(subres[1]
                     for subres
                     in results
                     if subres is not None)
    errors = [(test, budgets, runid)
              for comp_result, (test, budgets, runid, renice)
              in zip(results, order)
              if comp_result is None
              ]

    speedup = proc_times / wall_time[0]

    logger.info("SUMMARY:")
    logger.info("  wall time:     %7.3f", wall_time[0])
    logger.info("  CPU+user time: %7.3f", proc_times)
    logger.info("  est. speedup:  %7.3f", speedup)

    if errors:
        logger.error("Errors encountered:")
        for (probl, algo), budgets, runid in errors:
            logger.error("  %9s :: %14s :: runID=%d :: budgets=%s", probl, algo, runid,
                         ','.join(str(x) for x in budgets))

    summary = collections.defaultdict(float)
    for (bench, _, _, _), subres in zip(order, results):
        if subres:
            summary[bench] += subres[1]

    if logger.isEnabledFor(logging.INFO):
        logger.info("Running time:")
        res = []
        for (prob, alg), timesum in sorted(summary.items(),
                                           key=operator.itemgetter(1),
                                           reverse=True):
            prob_show = "'" + prob + "'"
            alg_show = "'" + alg + "'"
            avg_time = timesum / float(args['-N'])
            logger.info("  prob:{prob_show:16} algo:{alg_show:16}) time:{avg_time:>8.3f}s".format(**locals()))