def _shot(self): self.app.on_loop_begin() poll = self.app.polling_targets() nodes = _load_from(RedisNodeStatus, self.app, poll['nodes']) proxies = _load_from(ProxyStatus, self.app, poll['proxies']) # commit because `get_by` may create new nodes # to reattach session they must be persisted commit_session() all_nodes = nodes + proxies random.shuffle(all_nodes) pollers = [ Poller(all_nodes[i:i + NODES_EACH_THREAD]) for i in xrange(0, len(all_nodes), NODES_EACH_THREAD) ] for p in pollers: p.start() time.sleep(self.interval) for p in pollers: p.join() for p in pollers: for n in p.nodes: n.add_to_db() save_polling_stat(nodes, proxies) commit_session() logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies)) self.app.write_polling_details({n.addr: n.details for n in nodes}, {p.addr: p.details for p in proxies})
def try_create_exec_thread_by_task(t, app): t.check_completed() if t.completion is not None: return None if not t.runnable(): return None lock = t.acquire_lock() if lock is None: return None step = t.next_step() # When decide to run a task, it's possible that # its next step has been started at the last poll. # So we check # if no step have been bound to the lock, bind the next # if the step bound to the lock is still running, skip it # the step bound to the lock is completed, bind the next if lock.step_id is None: lock.step_id = step.id db.session.add(lock) elif lock.step.completion is None: return None else: lock.step_id = step.id db.session.add(lock) try: commit_session() except IntegrityError: return None logging.debug('Run task %d', t.id) return TaskRunner(app, t.id, step.id)
def _shot(self): poll = file_ipc.read_poll() nodes = _load_from(RedisNodeStatus, poll['nodes']) proxies = _load_from(ProxyStatus, poll['proxies']) # commit because `get_by` may create new nodes # to reattach session they must be persisted commit_session() all_nodes = nodes + proxies random.shuffle(all_nodes) pollers = [ Poller(all_nodes[i:i + NODES_EACH_THREAD], self.algalon_client) for i in xrange(0, len(all_nodes), NODES_EACH_THREAD) ] for p in pollers: p.start() time.sleep(self.interval) for p in pollers: p.join() for p in pollers: for n in p.nodes: n.add_to_db() save_polling_stat(nodes, proxies) commit_session() logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies)) try: file_ipc.write_details({n.addr: n.details for n in nodes}, {p.addr: p.details for p in proxies}) except StandardError, e: logging.exception(e)
def _shot(self): poll = file_ipc.read_poll() nodes = _load_from(RedisNodeStatus, poll['nodes']) proxies = _load_from(ProxyStatus, poll['proxies']) # commit because `get_by` may create new nodes # to reattach session they must be persisted commit_session() all_nodes = nodes + proxies random.shuffle(all_nodes) pollers = [Poller(all_nodes[i: i + NODES_EACH_THREAD], self.algalon_client) for i in xrange(0, len(all_nodes), NODES_EACH_THREAD)] for p in pollers: p.start() time.sleep(self.interval) for p in pollers: p.join() for p in pollers: for n in p.nodes: n.add_to_db() save_polling_stat(nodes, proxies) commit_session() logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies)) try: file_ipc.write_details({n.addr: n.details for n in nodes}, {p.addr: p.details for p in proxies}) except StandardError, e: logging.exception(e)
def _join(_, cluster_id, cluster_host, cluster_port, newin_host, newin_port): redistrib.command.join_no_load(cluster_host, cluster_port, newin_host, newin_port) n = get_node_by_host_port(newin_host, newin_port) if n is None: return True n.assignee_id = cluster_id db.session.add(n) commit_session() return True
def _replicate(_, cluster_id, master_host, master_port, slave_host, slave_port): redistrib.command.replicate(master_host, master_port, slave_host, slave_port) n = get_node_by_host_port(slave_host, slave_port) if n is None: return True n.assignee_id = cluster_id db.session.add(n) commit_session() return True
def _migrate_slots(command, src_host, src_port, dst_host, dst_port, slots, start=0): while start < len(slots): begin = datetime.now() redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port, [slots[start]]) start += 1 if (datetime.now() - begin).seconds >= config.POLL_INTERVAL: command.args['start'] = start command.save() commit_session() return start == len(slots) return True
def run(self): with self.app.app_context(): task = ClusterTask.query.get(self.task_id) if task is None: # not possible gonna happen return try: step = TaskStep.query.get(self.step_id) # check again the step haven't run yet if step.completion is not None: return task.check_completed() logging.info('Execute step %d', step.id) if not step.execute(): task.fail('Step fails') commit_session() return lock = task.acquired_lock() lock.step = None db.session.add(lock) commit_session() task.check_completed() except (StandardError, SQLAlchemyError), e: logging.exception(e) db.session.rollback() task.exec_error = traceback.format_exc() task.completion = datetime.now() db.session.add(task) commit_session()
def test_timed(self): CD = 5 class TestTimedClient(alarm.Timed): def __init__(self): alarm.Timed.__init__(self, CD) self.alarms = [] def do_send_alarm(self, endpoint, message, exception, **kwargs): self.alarms.append({ 'endpoint': endpoint, 'message': message, }) self.app.replace_alarm_client(TestTimedClient()) p = FakePoller(self.app) nm.create_instance('127.0.0.1', 29000) commit_session() self.app.write_polling_targets() p.poll_once() self.assertEqual(0, len(self.app.alarm_client.alarms)) n = nm.get_by_host_port('127.0.0.1', 29000) n.suppress_alert = False commit_session() self.app.write_polling_targets() p.poll_once() self.assertEqual(1, len(self.app.alarm_client.alarms)) p.poll_once() self.assertEqual(1, len(self.app.alarm_client.alarms)) time.sleep(CD + 1) p.poll_once() self.assertEqual(2, len(self.app.alarm_client.alarms))
def test_alarm(self): class TestAlarmClient(alarm.Base): def __init__(self): self.alarms = {} def send_alarm(self, endpoint, message, exception, **kwargs): self.alarms[(endpoint.host, endpoint.port)] = (message, exception) self.app.replace_alarm_client(TestAlarmClient()) p = FakePoller(self.app) nm.create_instance('127.0.0.1', 29000) commit_session() self.app.write_polling_targets() p.poll_once() self.assertEqual(0, len(self.app.alarm_client.alarms)) n = nm.get_by_host_port('127.0.0.1', 29000) n.suppress_alert = False commit_session() self.app.write_polling_targets() p.poll_once() self.assertEqual(1, len(self.app.alarm_client.alarms))
if len(me.assigned_slots) != 0: raise ValueError('node still holding slots') redistrib.command.quit_cluster(host, port) except SocketError, e: logging.exception(e) logging.info('Remove instance from cluster on exception') except ProtocolError, e: if NOT_IN_CLUSTER_MESSAGE not in e.message: raise remove_empty_cluster(cluster_id) n = get_node_by_host_port(host, port) if n is not None: n.assignee_id = None db.session.add(n) commit_session() return True def _migrate_slots(command, src_host, src_port, dst_host, dst_port, slots, start=0): while start < len(slots): begin = datetime.now() redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port, [slots[start]]) start += 1
def test_create_delete_cluster(self): with self.app.test_client() as client: r = client.post('/redis/add', data={ 'host': '127.0.0.1', 'port': '7100', }) self.assertReqStatus(200, r) self.assertEqual( { 'nodes': [{ 'host': '127.0.0.1', 'port': 7100, 'suppress_alert': 1, }], 'proxies': [], }, self.app.polling_targets()) with self.app.test_client() as client: r = client.post('/cluster/add', data={ 'descr': 'the-quick-brown-fox', }) self.assertReqStatus(200, r) cluster_id = int(r.data) r = client.post('/task/launch', data=json.dumps({ 'cluster': cluster_id, 'nodes': [{ 'host': '127.0.0.1', 'port': 7100, }], })) self.assertReqStatus(200, r) self.exec_all_tasks() with self.app.test_client() as client: r = client.post('/cluster/shutdown', data={ 'cluster_id': cluster_id, }) self.assertReqStatus(200, r) self.exec_all_tasks() tasks = models.task.ClusterTask.query.all() self.assertEqual(1, len(tasks)) self.assertEqual(cluster_id, tasks[0].cluster_id) with self.app.test_client() as client: r = client.post('/cluster/delete', data={ 'id': cluster_id, }) self.assertReqStatus(400, r) models.task.TaskStep.query.filter_by(task_id=tasks[0].id).delete() models.task.ClusterTask.query.delete() commit_session() with self.app.test_client() as client: r = client.post('/cluster/delete', data={ 'id': cluster_id, }) self.assertReqStatus(200, r)
if len(me.assigned_slots) != 0: raise ValueError('node still holding slots') redistrib.command.quit_cluster(host, port) except SocketError, e: logging.exception(e) logging.info('Remove instance from cluster on exception') except ProtocolError, e: if NOT_IN_CLUSTER_MESSAGE not in e.message: raise remove_empty_cluster(cluster_id) n = get_node_by_host_port(host, port) if n is not None: n.assignee_id = None db.session.add(n) commit_session() return True def _migrate_slots(command, src_host, src_port, dst_host, dst_port, slots, start=0): while start < len(slots): begin = datetime.now() redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port, [slots[start]]) start += 1 if (datetime.now() - begin).seconds >= config.POLL_INTERVAL: command.args['start'] = start command.save() commit_session() return start == len(slots)