示例#1
0
def generate(args):
    logger.info("loading dcop from {}".format(args.dcop_files))

    if args.dcop_files:
        dcop_files = args.dcop_files
    elif args.dcop_files_end:
        dcop_files = args.dcop_files_end

    dcop = load_dcop_from_file(dcop_files)
    agents = list(dcop.agents)

    scenario = generate_scenario(
        args.evts_count,
        args.actions_count,
        args.delay,
        args.initial_delay,
        args.end_delay,
        agents,
    )

    serialized = yaml_scenario(scenario)

    if args.output:
        output_file = args.output
        with open(output_file, encoding="utf-8", mode="w") as fo:
            fo.write(serialized)
    else:
        print(serialized)
示例#2
0
def run_cmd(args):
    logger.debug('dcop command "distribute" with arguments {} '.format(args))

    dcop_yaml_files = args.dcop_files
    dist_module, algo_module, graph_module = _load_modules(
        args.dist, args.algo, args.graph)

    logger.info('loading dcop from {}'.format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    # Build factor-graph computation graph
    logger.info(
        'Building computation graph for dcop {}'.format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info(
        'Distributing computation graph for dcop {}'.format(dcop_yaml_files))

    if algo_module is None:
        computation_memory = None
        communication_load = None
    else:
        computation_memory = algo_module.computation_memory
        communication_load = algo_module.communication_load

    try:
        distribution = dist_module\
            .distribute(cg, dcop.agents.values(),
                        hints=dcop.dist_hints,
                        computation_memory=computation_memory,
                        communication_load=communication_load)
        dist = distribution.mapping()
        cost = dist_module.distribution_cost(
            distribution,
            cg,
            dcop.agents.values(),
            computation_memory=computation_memory,
            communication_load=communication_load)

        result = {
            'inputs': {
                'dist_algo': args.dist,
                'dcop': args.dcop_files,
                'graph': args.graph,
                'algo': args.algo,
            },
            'distribution': dist,
            'cost': cost
        }
        if args.output is not None:
            with open(args.output, encoding='utf-8', mode='w') as fo:
                fo.write(yaml.dump(result))
        print(yaml.dump(result))
        sys.exit(0)

    except ImpossibleDistributionException as e:
        result = {'status': 'FAIL', 'error': str(e)}
        print(yaml.dump(result))
        sys.exit(2)
示例#3
0
def run_cmd(args):
    logger.debug('dcop command "solve" with arguments {} '.format(args))

    dcop_yaml_files = args.dcop_files

    if args.distribution in ['oneagent', 'adhoc', 'ilp_fgdp']:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info('loading dcop from {}'.format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    # Build factor-graph computation graph
    logger.info(
        'Building computation graph for dcop {}'.format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info('Distributing computation graph ')
    if dist_module is not None:
        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load)
    else:
        distribution = load_dist_from_file(args.distribution)
        logger.debug('Distribution Computation graph: %s ', distribution)

    logger.info('Dcop distribution : {}'.format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # When using the (default) 'fork' start method, http servers on agent's
    # processes do not work (why ?)
    multiprocessing.set_start_method('spawn')

    # FIXME
    infinity = 10000

    global orchestrator, start_time
    port = 9000
    comm = HttpCommunicationLayer(('127.0.0.1', port))
    orchestrator = Orchestrator(algo, cg, distribution, comm, dcop, infinity)

    start_time = time()
    orchestrator.start()
    orchestrator.deploy_computations()
    orchestrator.run()
示例#4
0
def run_cmd(args):
    logger.debug('dcop command "graph" with arguments {} '.format(args))

    dcop_yaml_file = args.dcop_file
    logger.info('loading dcop from {}'.format(dcop_yaml_file))
    dcop = load_dcop_from_file(dcop_yaml_file)

    try:
        graph_module = import_module('pydcop.computations_graph.{}'.format(
            args.graph))
        graph_stats(dcop, graph_module)
    except ImportError:
        _error('Could not find computation graph type: {}'.format(args.graph))
示例#5
0
def generate(args):
    if not args.dcop_files and args.dcop_files_end:
        args.dcop_files = args.dcop_files_end

    check_args(args)

    variables = []
    if args.dcop_files:
        logger.info("loading dcop from {}".format(args.dcop_files))
        dcop = load_dcop_from_file(args.dcop_files)
        variables = list(dcop.variables)

    agents_name = generate_agents_names(args.mode, args.count, variables,
                                        args.agent_prefix)

    mapping = {}
    hosting_costs = {}
    if args.hosting != "None":
        mapping = agent_variables_mapping(args.hosting, agents_name, variables)
        hosting_costs = generate_hosting_costs(args.hosting, mapping)

    routes_costs = {}
    if args.routes != "None":
        routes_costs = generate_routes_costs(args.routes, mapping, dcop)

    agents = []
    for agt_name in agents_name:
        kw = {}
        if agt_name in hosting_costs:
            kw["hosting_costs"] = hosting_costs[agt_name]
        if args.hosting_default:
            kw["default_hosting_cost"] = args.hosting_default
        if args.capacity:
            kw["capacity"] = args.capacity
        if agt_name in routes_costs:
            kw["routes"] = routes_costs[agt_name]
        if args.routes_default:
            kw["default_route"] = args.routes_default
        agents.append(AgentDef(agt_name, **kw))

    serialized = yaml_agents(agents)

    if args.output:
        output_file = args.output
        with open(output_file, encoding="utf-8", mode="w") as fo:
            fo.write(serialized)
    else:
        print(serialized)
示例#6
0
def distribution_cost(dcop_files: List[str], distribution_file, algo, target):
    logger.debug(f"analyse file {dcop_files}")

    dcop = load_dcop_from_file(dcop_files)
    path_glob = os.path.abspath(os.path.expanduser(distribution_file))
    distribution_files = sorted(glob.iglob(path_glob))
    for distribution_file in distribution_files:

        try:
            cost, comm, hosting = single_distrib_costs(dcop, distribution_file,
                                                       algo)

            csv_writer = csv.writer(target)
            csv_writer.writerow(
                [dcop_files[0], distribution_file, cost, hosting, comm])
        except:
            pass
    return target
示例#7
0
def run_cmd(args):
    logger.debug('dcop command "graph" with arguments {} '.format(args))

    dcop_yaml_file = args.dcop_file
    logger.info("loading dcop from {}".format(dcop_yaml_file))
    dcop = load_dcop_from_file(dcop_yaml_file)

    if args.display:
        if args.graph == "factor_graph":
            display_bipartite_graph(dcop.variables.values(),
                                    dcop.constraints.values())
        else:
            display_graph(dcop.variables.values(), dcop.constraints.values())

    try:
        graph_module = import_module("pydcop.computations_graph.{}".format(
            args.graph))
        logger.info("Building computation graph for dcop {}".format(dcop.name))
        graph_stats(dcop, graph_module)
    except ImportError:
        _error("Could not find computation graph type: {}".format(args.graph))
示例#8
0
def run_cmd(args, timer=None):
    logger.debug('dcop command "solve" with arguments {}'.format(args))

    global INFINITY
    INFINITY = args.infinity

    global collect_on
    collect_on = args.collect_on
    period = None
    if args.collect_on == 'period':
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in ['oneagent', 'adhoc', 'ilp_fgdp']:
        dist_module, algo_module, graph_module = _load_modules(args.distribution,
                                                               args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None,
                                                               args.algo)

    global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    # Build factor-graph computation graph
    logger.info('Building computation graph ')
    cg = graph_module.build_computation_graph(dcop)
    logger.debug('Computation graph: %s ', cg)

    logger.info('Distributing computation graph ')
    if dist_module is not None:
        distribution = dist_module.\
            distribute(cg, dcop.agents.values(),
                       hints=dcop.dist_hints,
                       computation_memory=algo_module.computation_memory,
                       communication_load=algo_module.communication_load)
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug('Distribution Computation graph: %s ', distribution)

    logger.info('Dcop distribution : {}'.format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                            args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(algo, cg, distribution, dcop,
                                             INFINITY,
                                             collector=collector_queue,
                                             collect_moment=args.collect_on,
                                             period=period)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo, cg, distribution, dcop,
                                              INFINITY,
                                              collector=collector_queue,
                                              collect_moment=args.collect_on,
                                              period=period)

    try:
        orchestrator.deploy_computations()
        orchestrator.run()
    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results('ERROR')
示例#9
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "orchestrator" with arguments {} '.format(args))

    global collect_on, output_file
    output_file = args.output
    collect_on = args.collect_on

    dcop_yaml_files = args.dcop_files

    output_file = args.output
    collect_on = args.collect_on

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in ["oneagent", "adhoc", "ilp_fgdp", "heur_comhost"]:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info("loading dcop from {}".format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    if args.scenario:
        logger.info("loading scenario from {}".format(args.scenario))
        scenario = load_scenario_from_file(args.scenario)
    else:
        logger.debug("No scenario")
        scenario = None

    # Build factor-graph computation graph
    logger.info(
        "Building computation graph for dcop {}".format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph ")
    if dist_module is not None:

        if not hasattr(algo_module, "computation_memory"):
            algo_module.computation_memory = lambda *v, **k: 0
        if not hasattr(algo_module, "communication_load"):
            algo_module.communication_load = lambda *v, **k: 0

        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)

    logger.info("Dcop distribution : {}".format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # When using the (default) 'fork' start method, http servers on agent's
    # processes did not work (why ?), but seems to be ok now ?!
    # multiprocessing.set_start_method('spawn')

    # FIXME
    infinity = 10000

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    if args.ktarget:
        ktarget = args.ktarget
    else:
        if scenario:
            logger.debug("Scenario without k target, use 3 as default level")
            ktarget = 3

    global orchestrator, start_time
    port = args.port if args.port else 9000
    addr = args.address if args.address else None
    comm = HttpCommunicationLayer((addr, port))
    orchestrator = Orchestrator(
        algo,
        cg,
        distribution,
        comm,
        dcop,
        infinity,
        collector=collector_queue,
        collect_moment=args.collect_on,
        collect_period=period,
        ui_port=args.uiport,
    )

    try:
        start_time = time()
        logger.debug(f"Starting Orchestrator")
        orchestrator.start()
        logger.debug(f"Deploying computations")
        orchestrator.deploy_computations()
        if scenario:
            logger.debug(f"Starting Replication, targert {ktarget}")
            orchestrator.start_replication(ktarget)
            if orchestrator.wait_ready():

                orchestrator.run(scenario=scenario, timeout=timeout)
        else:
            logger.debug("No scenario, run the problem directly")
            orchestrator.run(timeout=timeout)
        if not timeout_stopped:
            if orchestrator.status == "TIMEOUT":
                _results("TIMEOUT")
                sys.exit(0)
            else:
                _results("FINISHED")
                sys.exit(0)

    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
示例#10
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "solve" with arguments {}'.format(args))

    global INFINITY, collect_on, output_file
    INFINITY = args.infinity
    output_file = args.output
    collect_on = args.collect_on

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in DISTRIBUTION_METHODS:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)
    logger.debug(f"dcop  {dcop} ")

    # Build factor-graph computation graph
    logger.info("Building computation graph ")
    cg = graph_module.build_computation_graph(dcop)
    logger.debug("Computation graph: %s ", cg)

    logger.info("Distributing computation graph ")
    if dist_module is not None:

        if not hasattr(algo_module, "computation_memory"):
            algo_module.computation_memory = lambda *v, **k: 0
        if not hasattr(algo_module, "communication_load"):
            algo_module.communication_load = lambda *v, **k: 0

        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug("Distribution Computation graph: %s ", distribution)

    logger.info("Dcop distribution : {}".format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            delay=args.delay,
            uiport=args.uiport,
        )
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            delay=args.delay,
            uiport=args.uiport,
        )
    try:
        orchestrator.deploy_computations()
        orchestrator.run(timeout=timeout)
        if timer:
            timer.cancel()
        if not timeout_stopped:
            if orchestrator.status == "TIMEOUT":
                _results("TIMEOUT")
                sys.exit(0)
            elif orchestrator.status != "STOPPED":
                _results("FINISHED")
                sys.exit(0)

        # in case it did not stop, dump remaining threads

    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
示例#11
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "distribute" with arguments {} '.format(args))

    dcop_yaml_files = args.dcop_files
    logger.info("loading dcop from {}".format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    dist_module = load_distribution_module(args.distribution)
    if args.cost:
        cost_module = load_distribution_module(args.cost)
    elif hasattr(dist_module, "distribution_cost"):
        cost_module = dist_module
    else:
        cost_module = None

    algo_module, graph_module = None, None
    if args.algo is not None:
        algo_module = load_algo_module(args.algo)

    if args.graph is not None:
        graph_type = args.graph
        graph_module = load_graph_module(args.graph)
        # Check that the graph model and the algorithm are compatible:
        if algo_module is not None and algo_module.GRAPH_TYPE != args.graph:
            _error("Incompatible graph model and algorithm")
    elif algo_module is not None:
        graph_module = load_graph_module(algo_module.GRAPH_TYPE)
        graph_type = algo_module.GRAPH_TYPE
    else:
        _error("You must pass at leat --graph or --algo option")

    global output_file
    output_file = args.output

    # Build factor-graph computation graph
    logger.info("Building computation graph for dcop {}".format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph for dcop {}".format(dcop_yaml_files))

    if algo_module is None:
        computation_memory = None
        communication_load = None
    else:
        computation_memory = algo_module.computation_memory
        communication_load = algo_module.communication_load

    global result
    result.update({
            "inputs": {
                "dist_algo": args.distribution,
                "dcop": args.dcop_files,
                "graph": graph_type,
                "algo": args.algo,
            },
            "status": "PROGRESS"
        })
    
    try:
        global start_t
        start_t = time.time()
        if not timeout:
            timeout = 3600
        # Warning: some methods may not honor the timeout parameter
        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=computation_memory,
            communication_load=communication_load,
            timeout=timeout
        )
        duration = time.time() - start_t
        dist = distribution.mapping()

        if timer:
            timer.cancel()

        if cost_module:
            cost, comm, hosting = cost_module.distribution_cost(
                distribution,
                cg,
                dcop.agents.values(),
                computation_memory=computation_memory,
                communication_load=communication_load,
            )
        else:
            cost, comm, hosting = None, None, None

        result = {
            "inputs": {
                "dist_algo": args.distribution,
                "dcop": args.dcop_files,
                "graph": graph_type,
                "algo": args.algo,
                "duration": duration,
            },
            "distribution": dist,
            "cost": cost,
            "communication_cost": comm,
            "hosting_cost": hosting,
            "status": "SUCCESS"
        }
        if args.output is not None:
            with open(args.output, encoding="utf-8", mode="w") as fo:
                fo.write(yaml.dump(result))
        print(yaml.dump(result))
        sys.exit(0)

    except TimeoutError as e:
        if timer:
            timer.cancel()
        duration = time.time() - start_t
        result["status"] = "TIMEOUT"
        result["inputs"]["duration"] = duration

        if output_file is not None:
            with open(output_file, encoding="utf-8", mode="w") as fo:
                fo.write(yaml.dump(result))
        print(yaml.dump(result))
        sys.exit(0)

    except ImpossibleDistributionException as e:
        if timer:
            timer.cancel()
        result["status"] = "FAIL"
        result["error"] = str(e)
        if output_file is not None:
            with open(output_file, encoding="utf-8", mode="w") as fo:
                fo.write(yaml.dump(result))
        print(yaml.dump(result))
        sys.exit(0)
示例#12
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY, collect_on, output_file
    INFINITY = args.infinity
    collect_on = args.collect_on
    output_file = args.output

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    dcop = filter_dcop(dcop)

    if args.distribution in DISTRIBUTION_METHODS:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info("loading scenario from {}".format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info("Building computation graph ")
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph ")
    if dist_module is not None:
        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug("Distribution Computation graph: %s ", distribution)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method,
        )
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
        )

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario, timeout=timeout)
            if timer:
                timer.cancel()
            if not timeout_stopped:
                if orchestrator.status == "TIMEOUT":
                    _results("TIMEOUT")
                    sys.exit(0)
                elif orchestrator.status != "STOPPED":
                    _results("FINISHED")
                    sys.exit(0)

    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
示例#13
0
文件: run.py 项目: PierreRust/pyDcop
def run_cmd(args, timer):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY
    INFINITY = args.infinity

    global collect_on
    collect_on = args.collect_on
    period = None
    if args.collect_on == 'period':
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    logger.info('Loading distribution from {}'.format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    # FIXME: load replica dist from file and pass to orchestrator
    # logger.info('Loading replica distribution from {}'.format(
    #     args.distribution))
    # replica_dist = load_replica_dist_from_file(args.replica_dist)
    # logger.info('Dcop distribution : %s', replica_dist)

    logger.info('loading scenario from {}'.format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info('Building computation graph ')
    cg = graph_module.build_computation_graph(dcop)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              collector=collector_queue,
                                              collect_moment=args.collect_on,
                                              period=period)

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario)
        # orchestrator.run(scenario) # FIXME
    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results('ERROR', e)
示例#14
0
def run_cmd(args):
    logger.debug('dcop command "distribute" with arguments {} '.format(args))

    dcop_yaml_files = args.dcop_files
    logger.info('loading dcop from {}'.format(dcop_yaml_files))
    dcop = load_dcop_from_file(dcop_yaml_files)

    dist_module = load_distribution_module(args.dist)

    algo_module, graph_module = None, None
    if args.algo is not None:
        algo_module = load_algo_module(args.algo)

    if args.graph is not None:
        graph_module = load_graph_module(args.graph)
        # Check that the graph model and the algorithm are compatible:
        if algo_module is not None and algo_module.GRAPH_TYPE != args.graph:
            _error('Incompatible graph model and algorithm')
    elif algo_module is not None:
        graph_module = load_graph_module(algo_module.GRAPH_TYPE)
    else:
        _error('You must pass at leat --graph or --algo option')

    # Build factor-graph computation graph
    logger.info('Building computation graph for dcop {}'
                .format(dcop_yaml_files))
    cg = graph_module.build_computation_graph(dcop)

    logger.info('Distributing computation graph for dcop {}'
                .format(dcop_yaml_files))

    if algo_module is None:
        computation_memory = None
        communication_load = None
    else:
        computation_memory = algo_module.computation_memory
        communication_load = algo_module.communication_load

    try:
        distribution = dist_module\
            .distribute(cg, dcop.agents.values(),
                        hints=dcop.dist_hints,
                        computation_memory=computation_memory,
                        communication_load=communication_load)
        dist = distribution.mapping()

        if hasattr(dist_module, 'distribution_cost'):
            cost = dist_module.distribution_cost(
                distribution, cg, dcop.agents.values(),
                computation_memory=computation_memory,
                communication_load=communication_load)
        else:
            cost = None

        result = {
            'inputs': {
                'dist_algo': args.dist,
                'dcop': args.dcop_files,
                'graph': args.graph,
                'algo': args.algo,
            },
            'distribution': dist,
            'cost': cost
        }
        if args.output is not None:
            with open(args.output, encoding='utf-8', mode='w') as fo:
                fo.write(yaml.dump(result))
        print(yaml.dump(result))
        sys.exit(0)

    except ImpossibleDistributionException as e:
        result = {
            'status': 'FAIL',
            'error': str(e)
        }
        print(yaml.dump(result))
        sys.exit(2)
示例#15
0
def run_cmd(args, timer: Timer):

    logger.debug('Distribution replicas : %s', args)
    global orchestrator

    # global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    try:
        algo_module = import_module('pydcop.algorithms.{}'.format(args.algo))
        algo = build_algo_def(algo_module, args.algo, dcop.objective,
                              [])  # FIXME : algo params needed?

        graph_module = import_module('pydcop.computations_graph.{}'.format(
            algo_module.GRAPH_TYPE))
        logger.info('Building computation graph ')
        cg = graph_module.build_computation_graph(dcop)
        logger.info('Computation graph : %s', cg)

    except ImportError:
        _error('Could not find module for algorithm {} or graph model '
               'for this algorithm'.format(args.algo))

    logger.info('loading distribution from {}'.format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    INFINITY = 10000  # FIXME should not be mandatory

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(algo,
                                             cg,
                                             distribution,
                                             dcop,
                                             INFINITY,
                                             replication=args.replication)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              replication=args.replication)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        orchestrator.wait_ready()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        timer.cancel()
        rep_dist = {
            c: list(hosts)
            for c, hosts in orchestrator.mgt.replica_hosts.items()
        }
        result = {
            'inputs': {
                'dcop': args.dcop_files,
                'algo': args.algo,
                'replication': args.replication,
                'k': args.ktarget
            },
            'replica_dist': rep_dist
        }
        result['inputs']['distribution'] = args.distribution
        if args.output is not None:
            with open(args.output, encoding='utf-8', mode='w') as fo:
                fo.write(yaml.dump(result))

        print(yaml.dump(result))
        sys.exit(0)

        # TODO : retrieve and display replica distribution
        # Each agent should send back to the orchestrator the agents hosting
        # the replicas for each of it's computations
    except Exception as e:
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _error('ERROR', e)
示例#16
0
def run_cmd(args, timer: Timer = None, timeout=None):
    logger.debug("Distribution replicas : %s", args)
    global orchestrator

    # global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    try:
        algo_module = load_algorithm_module(args.algo)
        algo = build_algo_def(algo_module, args.algo, dcop.objective,
                              [])  # FIXME : algo params needed?

        graph_module = import_module("pydcop.computations_graph.{}".format(
            algo_module.GRAPH_TYPE))
        logger.info("Building computation graph ")
        cg = graph_module.build_computation_graph(dcop)
        logger.info("Computation graph : %s", cg)

    except ImportError:
        _error("Could not find module for algorithm {} or graph model "
               "for this algorithm".format(args.algo))

    logger.info("loading distribution from {}".format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    INFINITY = 10000  # FIXME should not be mandatory

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(algo,
                                             cg,
                                             distribution,
                                             dcop,
                                             INFINITY,
                                             replication=args.replication)
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              replication=args.replication)

    try:
        orchestrator.deploy_computations()
        start_t = time.time()
        orchestrator.start_replication(args.ktarget)
        orchestrator.wait_ready()
        # print(f" Replication Metrics {orchestrator.replication_metrics()}")
        metrics = orchestrator.replication_metrics()
        msg_count, msg_size = 0, 0
        for a in metrics:
            msg_count += metrics[a]["count_ext_msg"]
            msg_size += metrics[a]["size_ext_msg"]
        # print(f" Count: {msg_count} - Size {msg_size}")
        duration = time.time() - start_t
        if timer:
            timer.cancel()
        rep_dist = {
            c: list(hosts)
            for c, hosts in orchestrator.mgt.replica_hosts.items()
        }
        orchestrator.stop_agents(5)
        orchestrator.stop()
        result = {
            "inputs": {
                "dcop": args.dcop_files,
                "algo": args.algo,
                "replication": args.replication,
                "k": args.ktarget,
            },
            "metrics": {
                "duration": duration,
                "msg_size": msg_size,
                "msg_count": msg_count,
            },
            "replica_dist": rep_dist,
        }
        result["inputs"]["distribution"] = args.distribution
        if args.output is not None:
            with open(args.output, encoding="utf-8", mode="w") as fo:
                fo.write(yaml.dump(result))
        else:
            print(yaml.dump(result))
        sys.exit(0)

        # TODO : retrieve and display replica distribution
        # Each agent should send back to the orchestrator the agents hosting
        # the replicas for each of it's computations
    except Exception as e:
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _error("ERROR", e)