def load_algo_module(algo): algo_module = None try: algo_module = load_algorithm_module(algo) except ImportError as e: _error("Could not find dcop algorithm: {}".format(algo), e) return algo_module
def load_distribution_module(dist): dist_module = None try: dist_module = import_module("pydcop.distribution.{}".format(dist)) except ImportError as e: _error("Could not find distribution method {}".format(dist), e) return dist_module
def load_graph_module(graph): graph_module = None try: graph_module = import_module("pydcop.computations_graph.{}".format(graph)) except ImportError as e: _error("Could not find computation graph type: {}".format(graph), e) return graph_module
def load_algo_module(algo): algo_module = None try: algo_module = import_module('pydcop.algorithms.{}'.format(algo)) except ImportError as e: _error('Could not find dcop algorithm: {}'.format(algo), e) return algo_module
def _load_modules(dist, algo, graph): dist_module, algo_module, graph_module = None, None, None try: dist_module = import_module('pydcop.distribution.{}'.format(dist)) # TODO check the imported module has the right methods ? except ImportError as e: _error('Could not find distribution method {}'.format(dist), e) try: # Algo is optional, do not fail if we cannot find it if algo is not None: algo_module = import_module('pydcop.algorithms.{}'.format(algo)) graph_module = import_module( 'pydcop.computations_graph.{}'.format(graph)) except ImportError as e: _error( 'Could not find computation graph type: {}'.format( algo_module.GRAPH_TYPE), e) return dist_module, algo_module, graph_module
def run_cmd(args, timer=None): logger.debug('dcop command "solve" with arguments {}'.format(args)) global INFINITY INFINITY = args.infinity global collect_on collect_on = args.collect_on period = None if args.collect_on == 'period': period = 1 if args.period is None else args.period else: if args.period is not None: _error('Cannot use "period" argument when collect_on is not ' '"period"') csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics, collect_on) if args.distribution in ['oneagent', 'adhoc', 'ilp_fgdp']: dist_module, algo_module, graph_module = _load_modules(args.distribution, args.algo) else: dist_module, algo_module, graph_module = _load_modules(None, args.algo) global dcop logger.info('loading dcop from {}'.format(args.dcop_files)) dcop = load_dcop_from_file(args.dcop_files) # Build factor-graph computation graph logger.info('Building computation graph ') cg = graph_module.build_computation_graph(dcop) logger.debug('Computation graph: %s ', cg) logger.info('Distributing computation graph ') if dist_module is not None: distribution = dist_module.\ distribute(cg, dcop.agents.values(), hints=dcop.dist_hints, computation_memory=algo_module.computation_memory, communication_load=algo_module.communication_load) else: distribution = load_dist_from_file(args.distribution) logger.debug('Distribution Computation graph: %s ', distribution) logger.info('Dcop distribution : {}'.format(distribution)) algo = build_algo_def(algo_module, args.algo, dcop.objective, args.algo_params) # Setup metrics collection collector_queue = Queue() collect_t = Thread(target=collect_tread, args=[collector_queue, csv_cb], daemon=True) collect_t.start() global orchestrator if args.mode == 'thread': orchestrator = run_local_thread_dcop(algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period) elif args.mode == 'process': # Disable logs from agents, they are in other processes anyway agt_logs = logging.getLogger('pydcop.agent') agt_logs.disabled = True # When using the (default) 'fork' start method, http servers on agent's # processes do not work (why ?) multiprocessing.set_start_method('spawn') orchestrator = run_local_process_dcop(algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period) try: orchestrator.deploy_computations() orchestrator.run() except Exception as e: logger.error(e, exc_info=1) orchestrator.stop_agents(5) orchestrator.stop() _results('ERROR')
def run_cmd(args, timer=None, timeout=None): logger.debug('dcop command "solve" with arguments {}'.format(args)) global INFINITY, collect_on, output_file INFINITY = args.infinity output_file = args.output collect_on = args.collect_on period = None if args.collect_on == "period": period = 1 if args.period is None else args.period else: if args.period is not None: _error('Cannot use "period" argument when collect_on is not ' '"period"') csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics, collect_on) if args.distribution in DISTRIBUTION_METHODS: dist_module, algo_module, graph_module = _load_modules( args.distribution, args.algo) else: dist_module, algo_module, graph_module = _load_modules(None, args.algo) global dcop logger.info("loading dcop from {}".format(args.dcop_files)) dcop = load_dcop_from_file(args.dcop_files) logger.debug(f"dcop {dcop} ") # Build factor-graph computation graph logger.info("Building computation graph ") cg = graph_module.build_computation_graph(dcop) logger.debug("Computation graph: %s ", cg) logger.info("Distributing computation graph ") if dist_module is not None: if not hasattr(algo_module, "computation_memory"): algo_module.computation_memory = lambda *v, **k: 0 if not hasattr(algo_module, "communication_load"): algo_module.communication_load = lambda *v, **k: 0 distribution = dist_module.distribute( cg, dcop.agents.values(), hints=dcop.dist_hints, computation_memory=algo_module.computation_memory, communication_load=algo_module.communication_load, ) else: distribution = load_dist_from_file(args.distribution) logger.debug("Distribution Computation graph: %s ", distribution) logger.info("Dcop distribution : {}".format(distribution)) algo = build_algo_def(algo_module, args.algo, dcop.objective, args.algo_params) # Setup metrics collection collector_queue = Queue() collect_t = Thread(target=collect_tread, args=[collector_queue, csv_cb], daemon=True) collect_t.start() global orchestrator if args.mode == "thread": orchestrator = run_local_thread_dcop( algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period, delay=args.delay, uiport=args.uiport, ) elif args.mode == "process": # Disable logs from agents, they are in other processes anyway agt_logs = logging.getLogger("pydcop.agent") agt_logs.disabled = True # When using the (default) 'fork' start method, http servers on agent's # processes do not work (why ?) multiprocessing.set_start_method("spawn") orchestrator = run_local_process_dcop( algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period, delay=args.delay, uiport=args.uiport, ) try: orchestrator.deploy_computations() orchestrator.run(timeout=timeout) if timer: timer.cancel() if not timeout_stopped: if orchestrator.status == "TIMEOUT": _results("TIMEOUT") sys.exit(0) elif orchestrator.status != "STOPPED": _results("FINISHED") sys.exit(0) # in case it did not stop, dump remaining threads except Exception as e: logger.error(e, exc_info=1) orchestrator.stop_agents(5) orchestrator.stop() _results("ERROR")
def run_cmd(args, timer=None, timeout=None): logger.debug('dcop command "distribute" with arguments {} '.format(args)) dcop_yaml_files = args.dcop_files logger.info("loading dcop from {}".format(dcop_yaml_files)) dcop = load_dcop_from_file(dcop_yaml_files) dist_module = load_distribution_module(args.distribution) if args.cost: cost_module = load_distribution_module(args.cost) elif hasattr(dist_module, "distribution_cost"): cost_module = dist_module else: cost_module = None algo_module, graph_module = None, None if args.algo is not None: algo_module = load_algo_module(args.algo) if args.graph is not None: graph_type = args.graph graph_module = load_graph_module(args.graph) # Check that the graph model and the algorithm are compatible: if algo_module is not None and algo_module.GRAPH_TYPE != args.graph: _error("Incompatible graph model and algorithm") elif algo_module is not None: graph_module = load_graph_module(algo_module.GRAPH_TYPE) graph_type = algo_module.GRAPH_TYPE else: _error("You must pass at leat --graph or --algo option") global output_file output_file = args.output # Build factor-graph computation graph logger.info("Building computation graph for dcop {}".format(dcop_yaml_files)) cg = graph_module.build_computation_graph(dcop) logger.info("Distributing computation graph for dcop {}".format(dcop_yaml_files)) if algo_module is None: computation_memory = None communication_load = None else: computation_memory = algo_module.computation_memory communication_load = algo_module.communication_load global result result.update({ "inputs": { "dist_algo": args.distribution, "dcop": args.dcop_files, "graph": graph_type, "algo": args.algo, }, "status": "PROGRESS" }) try: global start_t start_t = time.time() if not timeout: timeout = 3600 # Warning: some methods may not honor the timeout parameter distribution = dist_module.distribute( cg, dcop.agents.values(), hints=dcop.dist_hints, computation_memory=computation_memory, communication_load=communication_load, timeout=timeout ) duration = time.time() - start_t dist = distribution.mapping() if timer: timer.cancel() if cost_module: cost, comm, hosting = cost_module.distribution_cost( distribution, cg, dcop.agents.values(), computation_memory=computation_memory, communication_load=communication_load, ) else: cost, comm, hosting = None, None, None result = { "inputs": { "dist_algo": args.distribution, "dcop": args.dcop_files, "graph": graph_type, "algo": args.algo, "duration": duration, }, "distribution": dist, "cost": cost, "communication_cost": comm, "hosting_cost": hosting, "status": "SUCCESS" } if args.output is not None: with open(args.output, encoding="utf-8", mode="w") as fo: fo.write(yaml.dump(result)) print(yaml.dump(result)) sys.exit(0) except TimeoutError as e: if timer: timer.cancel() duration = time.time() - start_t result["status"] = "TIMEOUT" result["inputs"]["duration"] = duration if output_file is not None: with open(output_file, encoding="utf-8", mode="w") as fo: fo.write(yaml.dump(result)) print(yaml.dump(result)) sys.exit(0) except ImpossibleDistributionException as e: if timer: timer.cancel() result["status"] = "FAIL" result["error"] = str(e) if output_file is not None: with open(output_file, encoding="utf-8", mode="w") as fo: fo.write(yaml.dump(result)) print(yaml.dump(result)) sys.exit(0)
def run_cmd(args, timer=None, timeout=None): logger.debug('dcop command "run" with arguments {}'.format(args)) global INFINITY, collect_on, output_file INFINITY = args.infinity collect_on = args.collect_on output_file = args.output period = None if args.collect_on == "period": period = 1 if args.period is None else args.period else: if args.period is not None: _error('Cannot use "period" argument when collect_on is not ' '"period"') csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics, collect_on) _, algo_module, graph_module = _load_modules(None, args.algo) global dcop logger.info("loading dcop from {}".format(args.dcop_files)) dcop = load_dcop_from_file(args.dcop_files) dcop = filter_dcop(dcop) if args.distribution in DISTRIBUTION_METHODS: dist_module, algo_module, graph_module = _load_modules( args.distribution, args.algo) else: dist_module, algo_module, graph_module = _load_modules(None, args.algo) logger.info("loading scenario from {}".format(args.scenario)) scenario = load_scenario_from_file(args.scenario) logger.info("Building computation graph ") cg = graph_module.build_computation_graph(dcop) logger.info("Distributing computation graph ") if dist_module is not None: distribution = dist_module.distribute( cg, dcop.agents.values(), hints=dcop.dist_hints, computation_memory=algo_module.computation_memory, communication_load=algo_module.communication_load, ) else: distribution = load_dist_from_file(args.distribution) logger.debug("Distribution Computation graph: %s ", distribution) algo = build_algo_def(algo_module, args.algo, dcop.objective, args.algo_params) # Setup metrics collection collector_queue = Queue() collect_t = Thread(target=collect_tread, args=[collector_queue, csv_cb], daemon=True) collect_t.start() global orchestrator if args.mode == "thread": orchestrator = run_local_thread_dcop( algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period, replication=args.replication_method, ) elif args.mode == "process": # Disable logs from agents, they are in other processes anyway agt_logs = logging.getLogger("pydcop.agent") agt_logs.disabled = True # When using the (default) 'fork' start method, http servers on agent's # processes do not work (why ?) multiprocessing.set_start_method("spawn") orchestrator = run_local_process_dcop( algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period, ) orchestrator.set_error_handler(_orchestrator_error) try: orchestrator.deploy_computations() orchestrator.start_replication(args.ktarget) if orchestrator.wait_ready(): orchestrator.run(scenario, timeout=timeout) if timer: timer.cancel() if not timeout_stopped: if orchestrator.status == "TIMEOUT": _results("TIMEOUT") sys.exit(0) elif orchestrator.status != "STOPPED": _results("FINISHED") sys.exit(0) except Exception as e: logger.error(e, exc_info=1) print(e) for th in threading.enumerate(): print(th) traceback.print_stack(sys._current_frames()[th.ident]) print() orchestrator.stop_agents(5) orchestrator.stop() _results("ERROR")
def run_cmd(args, timer): logger.debug('dcop command "run" with arguments {}'.format(args)) global INFINITY INFINITY = args.infinity global collect_on collect_on = args.collect_on period = None if args.collect_on == 'period': period = 1 if args.period is None else args.period else: if args.period is not None: _error('Cannot use "period" argument when collect_on is not ' '"period"') csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics, collect_on) _, algo_module, graph_module = _load_modules(None, args.algo) global dcop logger.info('loading dcop from {}'.format(args.dcop_files)) dcop = load_dcop_from_file(args.dcop_files) logger.info('Loading distribution from {}'.format(args.distribution)) distribution = load_dist_from_file(args.distribution) # FIXME: load replica dist from file and pass to orchestrator # logger.info('Loading replica distribution from {}'.format( # args.distribution)) # replica_dist = load_replica_dist_from_file(args.replica_dist) # logger.info('Dcop distribution : %s', replica_dist) logger.info('loading scenario from {}'.format(args.scenario)) scenario = load_scenario_from_file(args.scenario) logger.info('Building computation graph ') cg = graph_module.build_computation_graph(dcop) algo = build_algo_def(algo_module, args.algo, dcop.objective, args.algo_params) # Setup metrics collection collector_queue = Queue() collect_t = Thread(target=collect_tread, args=[collector_queue, csv_cb], daemon=True) collect_t.start() global orchestrator if args.mode == 'thread': orchestrator = run_local_thread_dcop( algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period, replication=args.replication_method) elif args.mode == 'process': # Disable logs from agents, they are in other processes anyway agt_logs = logging.getLogger('pydcop.agent') agt_logs.disabled = True # When using the (default) 'fork' start method, http servers on agent's # processes do not work (why ?) multiprocessing.set_start_method('spawn') orchestrator = run_local_process_dcop(algo, cg, distribution, dcop, INFINITY, collector=collector_queue, collect_moment=args.collect_on, period=period) orchestrator.set_error_handler(_orchestrator_error) try: orchestrator.deploy_computations() orchestrator.start_replication(args.ktarget) if orchestrator.wait_ready(): orchestrator.run(scenario) # orchestrator.run(scenario) # FIXME except Exception as e: logger.error(e, exc_info=1) print(e) for th in threading.enumerate(): print(th) traceback.print_stack(sys._current_frames()[th.ident]) print() orchestrator.stop_agents(5) orchestrator.stop() _results('ERROR', e)
def run_cmd(args): logger.debug('dcop command "distribute" with arguments {} '.format(args)) dcop_yaml_files = args.dcop_files logger.info('loading dcop from {}'.format(dcop_yaml_files)) dcop = load_dcop_from_file(dcop_yaml_files) dist_module = load_distribution_module(args.dist) algo_module, graph_module = None, None if args.algo is not None: algo_module = load_algo_module(args.algo) if args.graph is not None: graph_module = load_graph_module(args.graph) # Check that the graph model and the algorithm are compatible: if algo_module is not None and algo_module.GRAPH_TYPE != args.graph: _error('Incompatible graph model and algorithm') elif algo_module is not None: graph_module = load_graph_module(algo_module.GRAPH_TYPE) else: _error('You must pass at leat --graph or --algo option') # Build factor-graph computation graph logger.info('Building computation graph for dcop {}' .format(dcop_yaml_files)) cg = graph_module.build_computation_graph(dcop) logger.info('Distributing computation graph for dcop {}' .format(dcop_yaml_files)) if algo_module is None: computation_memory = None communication_load = None else: computation_memory = algo_module.computation_memory communication_load = algo_module.communication_load try: distribution = dist_module\ .distribute(cg, dcop.agents.values(), hints=dcop.dist_hints, computation_memory=computation_memory, communication_load=communication_load) dist = distribution.mapping() if hasattr(dist_module, 'distribution_cost'): cost = dist_module.distribution_cost( distribution, cg, dcop.agents.values(), computation_memory=computation_memory, communication_load=communication_load) else: cost = None result = { 'inputs': { 'dist_algo': args.dist, 'dcop': args.dcop_files, 'graph': args.graph, 'algo': args.algo, }, 'distribution': dist, 'cost': cost } if args.output is not None: with open(args.output, encoding='utf-8', mode='w') as fo: fo.write(yaml.dump(result)) print(yaml.dump(result)) sys.exit(0) except ImpossibleDistributionException as e: result = { 'status': 'FAIL', 'error': str(e) } print(yaml.dump(result)) sys.exit(2)