示例#1
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY, collect_on, output_file
    INFINITY = args.infinity
    collect_on = args.collect_on
    output_file = args.output

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    dcop = filter_dcop(dcop)

    if args.distribution in DISTRIBUTION_METHODS:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info("loading scenario from {}".format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info("Building computation graph ")
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph ")
    if dist_module is not None:
        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug("Distribution Computation graph: %s ", distribution)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method,
        )
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
        )

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario, timeout=timeout)
            if timer:
                timer.cancel()
            if not timeout_stopped:
                if orchestrator.status == "TIMEOUT":
                    _results("TIMEOUT")
                    sys.exit(0)
                elif orchestrator.status != "STOPPED":
                    _results("FINISHED")
                    sys.exit(0)

    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
示例#2
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "orchestrator" with arguments {} '.format(args))

    global collect_on, output_file
    output_file = args.output
    collect_on = args.collect_on

    dcop_yaml_files = args.dcop_files

    output_file = args.output
    collect_on = args.collect_on

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in ["oneagent", "adhoc", "ilp_fgdp", "heur_comhost"]:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info("loading dcop from {}".format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    if args.scenario:
        logger.info("loading scenario from {}".format(args.scenario))
        scenario = load_scenario_from_file(args.scenario)
    else:
        logger.debug("No scenario")
        scenario = None

    # Build factor-graph computation graph
    logger.info(
        "Building computation graph for dcop {}".format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph ")
    if dist_module is not None:

        if not hasattr(algo_module, "computation_memory"):
            algo_module.computation_memory = lambda *v, **k: 0
        if not hasattr(algo_module, "communication_load"):
            algo_module.communication_load = lambda *v, **k: 0

        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)

    logger.info("Dcop distribution : {}".format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # When using the (default) 'fork' start method, http servers on agent's
    # processes did not work (why ?), but seems to be ok now ?!
    # multiprocessing.set_start_method('spawn')

    # FIXME
    infinity = 10000

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    if args.ktarget:
        ktarget = args.ktarget
    else:
        if scenario:
            logger.debug("Scenario without k target, use 3 as default level")
            ktarget = 3

    global orchestrator, start_time
    port = args.port if args.port else 9000
    addr = args.address if args.address else None
    comm = HttpCommunicationLayer((addr, port))
    orchestrator = Orchestrator(
        algo,
        cg,
        distribution,
        comm,
        dcop,
        infinity,
        collector=collector_queue,
        collect_moment=args.collect_on,
        collect_period=period,
        ui_port=args.uiport,
    )

    try:
        start_time = time()
        logger.debug(f"Starting Orchestrator")
        orchestrator.start()
        logger.debug(f"Deploying computations")
        orchestrator.deploy_computations()
        if scenario:
            logger.debug(f"Starting Replication, targert {ktarget}")
            orchestrator.start_replication(ktarget)
            if orchestrator.wait_ready():

                orchestrator.run(scenario=scenario, timeout=timeout)
        else:
            logger.debug("No scenario, run the problem directly")
            orchestrator.run(timeout=timeout)
        if not timeout_stopped:
            if orchestrator.status == "TIMEOUT":
                _results("TIMEOUT")
                sys.exit(0)
            else:
                _results("FINISHED")
                sys.exit(0)

    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
示例#3
0
文件: run.py 项目: PierreRust/pyDcop
def run_cmd(args, timer):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY
    INFINITY = args.infinity

    global collect_on
    collect_on = args.collect_on
    period = None
    if args.collect_on == 'period':
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    logger.info('Loading distribution from {}'.format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    # FIXME: load replica dist from file and pass to orchestrator
    # logger.info('Loading replica distribution from {}'.format(
    #     args.distribution))
    # replica_dist = load_replica_dist_from_file(args.replica_dist)
    # logger.info('Dcop distribution : %s', replica_dist)

    logger.info('loading scenario from {}'.format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info('Building computation graph ')
    cg = graph_module.build_computation_graph(dcop)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              collector=collector_queue,
                                              collect_moment=args.collect_on,
                                              period=period)

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario)
        # orchestrator.run(scenario) # FIXME
    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results('ERROR', e)