class TestBrokerController(unittest2.TestCase): def setUp(self): self.dbdir = tempfile.mkdtemp() loop = ioloop.IOLoop() self.broker = FakeBroker() dboptions = {'directory': self.dbdir} self.ctrl = BrokerController(self.broker, loop, dboptions=dboptions) self.old_exists = psutil.pid_exists psutil.pid_exists = lambda pid: True def tearDown(self): psutil.pid_exists = self.old_exists Stream.msgs[:] = [] shutil.rmtree(self.dbdir) def test_registration(self): self.ctrl.register_agent('1') self.assertTrue('1' in self.ctrl.agents) # make the agent busy before we unregister it self.ctrl.send_to_agent('1', ['something']) self.ctrl.reserve_agents(1, 'run') self.ctrl.unregister_agent('1') self.assertFalse('1' in self.ctrl.agents) def test_reserve_agents(self): self.ctrl.register_agent('1') self.ctrl.register_agent('2') self.assertRaises(NotEnoughWorkersError, self.ctrl.reserve_agents, 10, 'run') agents = self.ctrl.reserve_agents(2, 'run') agents.sort() self.assertEqual(agents, ['1', '2']) def test_run_and_stop(self): self.ctrl.register_agent('1') self.ctrl.register_agent('2') self.ctrl.register_agent('3') self.ctrl.reserve_agents(1, 'run') self.ctrl.reserve_agents(2, 'run2') runs = self.ctrl.list_runs(None, None).keys() runs.sort() self.assertEqual(['run', 'run2'], runs) self.ctrl.stop_run(['somemsg'], {'run_id': 'run'}) # make sure the STOP cmd made it through msgs = [msg for msg in Stream.msgs if '_STATUS' not in msg[-1]] self.assertEqual(msgs[0][-1], '{"command":"STOP"}') self.assertEqual(len(msgs), 1) def test_db_access(self): self.ctrl.register_agent('1') self.ctrl.reserve_agents(1, 'run') # metadata data = {'some': 'data'} self.ctrl.save_metadata('run', data) self.assertEqual(self.ctrl.get_metadata(None, {'run_id': 'run'}), data) # save data by agent self.ctrl.save_data('1', data) self.ctrl.flush_db() # we get extra run_id key, set for us self.assertEqual(data['run_id'], 'run') back = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertTrue(back[0]['some'], 'data') back2 = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertEqual(back, back2) def test_compute_observers(self): obs = ['irc', 'loads.observers.irc'] observers = _compute_observers(obs) self.assertEqual(len(observers), 2) self.assertRaises(ImportError, _compute_observers, ['blah']) def test_run(self): msg = ['somedata', '', 'target'] data = {'agents': 1, 'args': {}} # not enough agents self.ctrl.run(msg, data) res = self.broker.msgs.values()[0] self.assertEqual(res, [{'error': 'Not enough agents'}]) # one agent, we're good self.ctrl._agents.append('agent1') self.ctrl.run(msg, data) runs = self.broker.msgs.values()[0][-1] self.assertEqual(runs['result']['agents'], ['agent1']) def test_run_command(self): msg = ['somedata', '', 'target'] data = {'agents': 1, 'args': {}, 'agent_id': '1'} self.ctrl.run_command('RUN', msg, data) self.ctrl.run_command('AGENT_STATUS', msg, data) runs = self.broker.msgs.values()[0][-1] self.assertEqual(runs['result']['agents'], ['agent1']) msg = {"command": "STATUS", "args": {}, "agents": 1, "agent_id": "1"} msg = msg.items() msg.sort() self.assertTrue(len(self.broker._backstream.msgs), 1) self.assertTrue(len(self.broker._backstream.msgs[0]), 1) got = self.broker._backstream.msgs[0][3] got = json.loads(got) got = got.items() got.sort() self.assertEqual(msg, got) def test_clean(self): self.ctrl.agent_timeout = 0.1 self.ctrl._associate('run', ['1', '2']) self.ctrl.clean() self.assertTrue('1' in self.ctrl._agent_times) self.assertTrue('2' in self.ctrl._agent_times) time.sleep(.2) self.ctrl.clean() self.assertEqual(self.ctrl._agent_times, {}) self.ctrl.test_ended('run')
class TestBrokerController(unittest2.TestCase): def setUp(self): self.dbdir = tempfile.mkdtemp() loop = ioloop.IOLoop() self.broker = FakeBroker() dboptions = {'directory': self.dbdir} self.ctrl = BrokerController(self.broker, loop, dboptions=dboptions) self.old_exists = psutil.pid_exists psutil.pid_exists = lambda pid: True def tearDown(self): psutil.pid_exists = self.old_exists Stream.msgs[:] = [] shutil.rmtree(self.dbdir) def test_registration(self): self.ctrl.register_agent({'pid': '1', 'agent_id': '1'}) self.assertTrue('1' in self.ctrl.agents) # make the agent busy before we unregister it self.ctrl.send_to_agent('1', ['something']) self.ctrl.reserve_agents(1, 'run') self.ctrl.unregister_agent('1') self.assertFalse('1' in self.ctrl.agents) def test_reserve_agents(self): self.ctrl.register_agent({'pid': '1', 'agent_id': '1'}) self.ctrl.register_agent({'pid': '2', 'agent_id': '2'}) self.assertRaises(NotEnoughWorkersError, self.ctrl.reserve_agents, 10, 'run') agents = self.ctrl.reserve_agents(2, 'run') agents.sort() self.assertEqual(agents, ['1', '2']) def test_run_and_stop(self): self.ctrl.register_agent({'pid': '1', 'agent_id': '1'}) self.ctrl.register_agent({'pid': '2', 'agent_id': '2'}) self.ctrl.register_agent({'pid': '3', 'agent_id': '3'}) self.ctrl.reserve_agents(1, 'run') self.ctrl.reserve_agents(2, 'run2') runs = self.ctrl.list_runs(None, None).keys() runs.sort() self.assertEqual(['run', 'run2'], runs) self.ctrl.stop_run(['somemsg'], {'run_id': 'run'}) # make sure the STOP cmd made it through msgs = [msg for msg in Stream.msgs if '_STATUS' not in msg[-1]] self.assertEqual(msgs[0][-1], '{"command":"STOP"}') self.assertEqual(len(msgs), 1) def test_db_access(self): self.ctrl.register_agent({'agent_id': '1', 'agent_id': '1'}) self.ctrl.reserve_agents(1, 'run') # metadata data = {'some': 'data'} self.ctrl.save_metadata('run', data) self.assertEqual(self.ctrl.get_metadata(None, {'run_id': 'run'}), data) # save data by agent self.ctrl.save_data('1', data) self.ctrl.flush_db() # we get extra run_id key, set for us self.assertEqual(data['run_id'], 'run') back = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertTrue(back[0]['some'], 'data') back2 = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertEqual(back, back2) def test_compute_observers(self): obs = ['irc', 'loads.observers.irc'] observers = _compute_observers(obs) self.assertEqual(len(observers), 2) self.assertRaises(ImportError, _compute_observers, ['blah']) def test_run(self): msg = ['somedata', '', 'target'] data = {'agents': 1, 'args': {}} # not enough agents self.ctrl.run(msg, data) res = self.broker.msgs.values()[0] self.assertEqual(res, [{'error': 'Not enough agents'}]) # one agent, we're good self.ctrl._agents['agent1'] = {'pid': '1234'} self.ctrl.run(msg, data) runs = self.broker.msgs.values()[0][-1] self.assertEqual(runs['result']['agents'], ['agent1']) def test_run_command(self): msg = ['somedata', '', 'target'] data = {'agents': 1, 'args': {}, 'agent_id': '1'} self.ctrl.run_command('RUN', msg, data) self.ctrl.run_command('AGENT_STATUS', msg, data) runs = self.broker.msgs.values()[0][-1] self.assertEqual(runs['result']['agents'], ['agent1']) msg = {"command": "_STATUS", "args": {}, "agents": 1, "agent_id": "1"} msg = msg.items() msg.sort() self.assertTrue(len(self.broker._backstream.msgs), 1) self.assertTrue(len(self.broker._backstream.msgs[0]), 1) got = self.broker._backstream.msgs[0][-1] got = json.loads(got) got = got.items() got.sort() self.assertEqual(msg, got) def test_clean(self): self.ctrl.agent_timeout = 0.1 self.ctrl._associate('run', ['1', '2']) self.ctrl.clean() self.assertTrue('1' in self.ctrl._agent_times) self.assertTrue('2' in self.ctrl._agent_times) time.sleep(.2) self.ctrl.clean() self.assertEqual(self.ctrl._agent_times, {}) self.ctrl.test_ended('run')
class TestBrokerController(unittest2.TestCase): def setUp(self): self.dbdir = tempfile.mkdtemp() loop = ioloop.IOLoop() broker = FakeBroker() dboptions = {'directory': self.dbdir} self.ctrl = BrokerController(broker, loop, dboptions=dboptions) self.old_exists = psutil.pid_exists psutil.pid_exists = lambda pid: True def tearDown(self): psutil.pid_exists = self.old_exists Stream.msgs[:] = [] shutil.rmtree(self.dbdir) def test_registration(self): self.ctrl.register_agent('1') self.assertTrue('1' in self.ctrl.agents) # make the agent busy before we unregister it self.ctrl.send_to_agent('1', ['something']) self.ctrl.reserve_agents(1, 'run') self.ctrl.unregister_agent('1') self.assertFalse('1' in self.ctrl.agents) def test_reserve_agents(self): self.ctrl.register_agent('1') self.ctrl.register_agent('2') self.assertRaises(NotEnoughWorkersError, self.ctrl.reserve_agents, 10, 'run') agents = self.ctrl.reserve_agents(2, 'run') agents.sort() self.assertEqual(agents, ['1', '2']) def test_run_and_stop(self): self.ctrl.register_agent('1') self.ctrl.register_agent('2') self.ctrl.register_agent('3') self.ctrl.reserve_agents(1, 'run') self.ctrl.reserve_agents(2, 'run2') runs = self.ctrl.list_runs(None, None).keys() runs.sort() self.assertEqual(['run', 'run2'], runs) self.ctrl.stop_run(['somemsg'], {'run_id': 'run'}) # make sure the STOP cmd made it through msgs = [msg for msg in Stream.msgs if '_STATUS' not in msg[-1]] self.assertEqual(msgs[0][-1], '{"command": "STOP"}') self.assertEqual(len(msgs), 1) def test_db_access(self): self.ctrl.register_agent('1') self.ctrl.reserve_agents(1, 'run') # metadata data = {'some': 'data'} self.ctrl.save_metadata('run', data) self.assertEqual(self.ctrl.get_metadata(None, {'run_id': 'run'}), data) # save data by agent self.ctrl.save_data('1', data) self.ctrl.flush_db() # we get extra run_id key, set for us self.assertEqual(data['run_id'], 'run') back = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertTrue(back[0]['some'], 'data') back2 = self.ctrl.get_data(None, {'run_id': 'run'}) self.assertEqual(back, back2)
class Broker(object): """Class that route jobs to agents. Options: - **frontend**: the ZMQ socket to receive jobs. - **backend**: the ZMQ socket to communicate with agents. - **heartbeat**: the ZMQ socket to receive heartbeat requests. - **register** : the ZMQ socket to register agents. - **receiver**: the ZMQ socket that receives data from agents. - **publisher**: the ZMQ socket to publish agents data """ def __init__(self, frontend=DEFAULT_FRONTEND, backend=DEFAULT_BACKEND, heartbeat=None, register=DEFAULT_REG, io_threads=DEFAULT_IOTHREADS, agent_timeout=DEFAULT_TIMEOUT_MOVF, receiver=DEFAULT_BROKER_RECEIVER, publisher=DEFAULT_PUBLISHER, db='python', dboptions=None): # before doing anything, we verify if a broker is already up and # running logger.debug('Verifying if there is a running broker') pid = verify_broker(frontend) if pid is not None: # oops. can't do this ! logger.debug('Ooops, we have a running broker on that socket') raise DuplicateBrokerError(pid) self.endpoints = {'frontend': frontend, 'backend': backend, 'register': register, 'receiver': receiver, 'publisher': publisher} if heartbeat is not None: self.endpoints['heartbeat'] = heartbeat logger.debug('Initializing the broker.') for endpoint in self.endpoints.values(): if endpoint.startswith('ipc'): register_ipc_file(endpoint) self.context = zmq.Context(io_threads=io_threads) # setting up the sockets self._frontend = self.context.socket(zmq.ROUTER) self._frontend.identity = 'broker-' + frontend self._frontend.bind(frontend) self._backend = self.context.socket(zmq.ROUTER) self._backend.bind(backend) self._registration = self.context.socket(zmq.PULL) self._registration.bind(register) self._receiver = self.context.socket(zmq.PULL) self._receiver.bind(receiver) self._publisher = self.context.socket(zmq.PUB) self._publisher.bind(publisher) # setting up the streams self.loop = ioloop.IOLoop() self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop) self._frontstream.on_recv(self._handle_recv_front) self._backstream = zmqstream.ZMQStream(self._backend, self.loop) self._backstream.on_recv(self._handle_recv_back) self._regstream = zmqstream.ZMQStream(self._registration, self.loop) self._regstream.on_recv(self._handle_reg) self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop) self._rcvstream.on_recv(self._handle_recv) # heartbeat if heartbeat is not None: self.pong = Heartbeat(heartbeat, io_loop=self.loop, ctx=self.context, onregister=self._deregister) else: self.pong = None # status self.started = False self.poll_timeout = None # controller self.ctrl = BrokerController(self, self.loop, db=db, dboptions=dboptions, agent_timeout=agent_timeout) def _handle_recv(self, msg): # publishing all the data received from agents self._publisher.send(msg[0]) # saving the data locally data = json.loads(msg[0]) agent_id = str(data.get('agent_id')) self.ctrl.save_data(agent_id, data) def _deregister(self): logger.debug('Unregistering all agents') self.ctrl.unregister_agents() def _handle_reg(self, msg): if msg[0] == 'REGISTER': self.ctrl.register_agent(msg[1]) elif msg[0] == 'UNREGISTER': self.ctrl.unregister_agent(msg[1]) def _send_json(self, target, data): try: self._frontstream.send_multipart(target + [json.dumps(data)]) except ValueError: logger.error('Could not dump %s' % str(data)) raise def _handle_recv_front(self, msg, tentative=0): # front => back # if the last part of the message is 'PING', we just PONG back # this is used as a health check data = json.loads(msg[2]) target = msg[:-1] cmd = data['command'] if cmd == 'PING': res = {'result': {'pid': os.getpid(), 'endpoints': self.endpoints, 'agents': self.ctrl.agents}} self._send_json(target, res) return elif cmd == 'LISTRUNS': logger.debug('Asked for LISTRUNS') res = {'result': self.ctrl.list_runs()} logger.debug('Got %s' % str(res)) self._send_json(target, res) return elif cmd == 'STOPRUN': run_id = data['run_id'] stopped_agents = self.ctrl.stop_run(run_id, msg) # we give back the list of agents we stopped res = {'result': stopped_agents} self._send_json(target, res) return elif cmd == 'GET_DATA': # we send back the data we have in the db # XXX stream ? db_data = self.ctrl.get_data(data['run_id'], data_type=data.get('data_type'), groupby=data.get('groupby', False)) self._send_json(target, {'result': db_data}) return elif cmd == 'GET_COUNTS': counts = self.ctrl.get_counts(data['run_id']) self._send_json(target, {'result': counts}) return elif cmd == 'GET_METADATA': metadata = self.ctrl.get_metadata(data['run_id']) self._send_json(target, {'result': metadata}) return # other commands below this point are for agents if tentative == 3: logger.debug('No agents') self._send_json(target, {'error': 'No agent'}) return # the msg tells us which agent to work with data = json.loads(msg[2]) # XXX we need to unserialize here # broker protocol cmd = data['command'] if cmd == 'LIST': # we return a list of agent ids and their status self._send_json(target, {'result': self.ctrl.agents}) return elif cmd == 'RUN': # create a unique id for this run run_id = str(uuid4()) # get some agents try: agents = self.ctrl.reserve_agents(data['agents'], run_id) except NotEnoughWorkersError: self._send_json(target, {'error': 'Not enough agents'}) return # send to every agent with the run_id and the receiver endpoint data['run_id'] = run_id data['args']['zmq_receiver'] = self.endpoints['receiver'] msg[2] = json.dumps(data) # notice when the test was started data['args']['started'] = time.time() # save the tests metadata in the db self.ctrl.save_metadata(run_id, data['args']) self.ctrl.flush_db() for agent_id in agents: self.ctrl.send_to_agent(agent_id, msg) # tell the client which agents where selected. res = {'result': {'agents': agents, 'run_id': run_id}} self._send_json(target, res) return if 'agent_id' not in data: raise NotImplementedError('DEAD CODE?') else: agent_id = str(data['agent_id']) self.ctrl.send_to_agent(agent_id, msg) def _handle_recv_back(self, msg): # back => front #logger.debug('front <- back [%s]' % msg[0]) # let's remove the agent id and track the time it took agent_id = msg[0] msg = msg[1:] # grabbing the data to update the agents statuses if needed data = json.loads(msg[-1]) if 'error' in data: result = data['error'] logger.error(result.get('exception')) else: result = data['result'] if result.get('command') == '_STATUS': statuses = result['status'].values() run_id = self.ctrl.update_status(agent_id, statuses) if run_id is not None: # if the tests are finished, publish this on the pubsub. self._publisher.send(json.dumps({'data_type': 'run-finished', 'run_id': run_id})) return # other things are pass-through try: self._frontstream.send_multipart(msg) except Exception, e: logger.error('Could not send to front') logger.error(msg) # we don't want to die on error. we just log it exc_type, exc_value, exc_traceback = sys.exc_info() exc = traceback.format_tb(exc_traceback) exc.insert(0, str(e)) logger.error('\n'.join(exc))