def delayed_rebalance_worker(self, servers, num_nodes, delay_seconds, sc, max_retries=PerfDefaults.reb_max_retries, reb_mode=PerfDefaults.REB_MODE.IN): time.sleep(delay_seconds) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) self.log.info("rebalance started") if not sc: self.log.error("invalid stats collector") return status = False retries = 0 while not status and retries <= max_retries: start_time = time.time() if reb_mode == PerfDefaults.REB_MODE.OUT: status, nodes = RebalanceHelper.rebalance_out(servers, num_nodes) elif reb_mode == PerfDefaults.REB_MODE.SWAP: status, nodes = RebalanceHelper.rebalance_swap(servers, num_nodes) else: status, nodes = RebalanceHelper.rebalance_in(servers, num_nodes - 1, do_check=(not retries)) end_time = time.time() self.log.info("status: {0}, nodes: {1}, retries: {2}" .format(status, nodes, retries)) if not status: retries += 1 time.sleep(delay_seconds) sc.reb_stats(start_time, end_time - start_time) if self.parami("master_events", PerfDefaults.master_events): filename = "master_events.log" with open(filename, "w") as f: f.write(self.rest.diag_master_events()[1])
def delayed_rebalance_worker(servers, num_nodes, delay_seconds, sc, max_retries=PerfDefaults.reb_max_retries, reb_mode=PerfDefaults.REB_MODE.IN): time.sleep(delay_seconds) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) print "[delayed_rebalance_worker] rebalance started: %s" % gmt_now if not sc: print "[delayed_rebalance_worker] invalid stats collector" return status = False retries = 0 while not status and retries <= max_retries: start_time = time.time() if reb_mode == PerfDefaults.REB_MODE.OUT: status, nodes = RebalanceHelper.rebalance_out(servers, num_nodes) elif reb_mode == PerfDefaults.REB_MODE.SWAP: status, nodes = RebalanceHelper.rebalance_swap(servers, num_nodes) else: status, nodes = RebalanceHelper.rebalance_in(servers, num_nodes - 1, do_check=(not retries)) end_time = time.time() print "[delayed_rebalance_worker] status: {0}, nodes: {1}, retries: {2}"\ .format(status, nodes, retries) if not status: retries += 1 time.sleep(delay_seconds) sc.reb_stats(start_time, end_time - start_time)
def delayed_rebalance_worker(servers, num_nodes, delay_seconds, sc, max_retries=PerfDefaults.reb_max_retries, reb_mode=PerfDefaults.REB_MODE.IN): time.sleep(delay_seconds) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) print "[delayed_rebalance_worker] rebalance started: %s" % gmt_now if not sc: print "[delayed_rebalance_worker] invalid stats collector" return status = False retries = 0 while not status and retries <= max_retries: start_time = time.time() if reb_mode == PerfDefaults.REB_MODE.OUT: status, nodes = RebalanceHelper.rebalance_out( servers, num_nodes) elif reb_mode == PerfDefaults.REB_MODE.SWAP: status, nodes = RebalanceHelper.rebalance_swap( servers, num_nodes) else: status, nodes = RebalanceHelper.rebalance_in( servers, num_nodes - 1, do_check=(not retries)) end_time = time.time() print "[delayed_rebalance_worker] status: {0}, nodes: {1}, retries: {2}"\ .format(status, nodes, retries) if not status: retries += 1 time.sleep(delay_seconds) sc.reb_stats(start_time, end_time - start_time)
def wait_until_warmed_up(self, master=None): if not master: master = self.input.servers[0] bucket = self.param("bucket", "default") fn = RebalanceHelper.wait_for_mc_stats_no_timeout for bucket in self.buckets: RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_warmup_thread', 'complete', fn=fn)
def wait_until_repl(self): print "[perf.repl] waiting for replication: %s"\ % time.strftime(PerfDefaults.strftime) master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) print "[perf.repl] replication is done: %s"\ % time.strftime(PerfDefaults.strftime)
def wait_until_repl(self): self.log.info("waiting for replication") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all( master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("replication is done")
def wait_until_drained(self): self.log.info("draining disk write queue") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_flusher_todo', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("disk write queue has been drained") return time.time()
def wait_until_drained(self): print "[perf.drain] draining disk write queue : %s"\ % time.strftime(PerfDefaults.strftime) master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_flusher_todo', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) print "[perf.drain] disk write queue has been drained: %s"\ % time.strftime(PerfDefaults.strftime) return time.time()
def wait_until_drained(self): self.log.info("draining disk write queue") master = self.input.servers[0] bucket = self.param("bucket", "default") ready = RebalanceHelper.wait_for_persistence(master, bucket) self.assertTrue(ready, "not all items persisted. see logs") self.log.info("disk write queue has been drained") return time.time()
def rebalance_nodes(self, num_nodes): """Rebalance cluster(s) if more than 1 node provided""" if len(self.input.servers) == 1 or num_nodes == 1: self.log.warn("running on single node cluster") return else: self.log.info("rebalancing nodes - num_nodes = {0}" .format(num_nodes)) if self.input.clusters: for cluster in self.input.clusters.values(): status, _ = RebalanceHelper.rebalance_in(cluster, num_nodes - 1, do_shuffle=False) self.assertTrue(status) else: status, _ = RebalanceHelper.rebalance_in(self.input.servers, num_nodes - 1, do_shuffle=False) self.assertTrue(status)
def rebalance_nodes(self, num_nodes): """Rebalance cluster(s) if more than 1 node provided""" if len(self.input.servers) == 1 or num_nodes == 1: print "WARNING: running on single node cluster" return else: print "[perf.setUp] rebalancing nodes: num_nodes = {0}".\ format(num_nodes) if self.input.clusters: for cluster in self.input.clusters.values(): status, _ = RebalanceHelper.rebalance_in(cluster, num_nodes - 1, do_shuffle=False) self.assertTrue(status) else: status, _ = RebalanceHelper.rebalance_in(self.input.servers, num_nodes - 1, do_shuffle=False) self.assertTrue(status)
def rebalance_nodes(self, num_nodes, cluster=None): """Rebalance cluster(s) if more than 1 node provided""" if len(self.input.servers) == 1 or num_nodes == 1: self.log.warning("running on single node cluster") return else: self.log.info("rebalancing nodes - num_nodes = {0}" .format(num_nodes)) if not cluster: cluster = self.input.servers status, _ = RebalanceHelper.rebalance_in(cluster, num_nodes - 1, do_shuffle=False) self.assertTrue(status)
def wait_until_drained(self): print "[perf.drain] draining disk write queue : %s"\ % time.strftime(PerfDefaults.strftime) master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_flusher_todo', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) print "[perf.drain] disk write queue has been drained: %s"\ % time.strftime(PerfDefaults.strftime) return time.time()
def wait_until_repl(self): self.log.info("waiting for replication") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("replication is done")
def wait_until_repl(self): print "[perf.repl] waiting for replication: %s"\ % time.strftime(PerfDefaults.strftime) master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all( master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) print "[perf.repl] replication is done: %s"\ % time.strftime(PerfDefaults.strftime)
def test_view_rebalance(self): """Alk's specification. Cluster setup: -- 4 nodes -- 1 bucket -- 8GB total RAM -- 5GB bucket quota -- no data replica -- no index replica -- no view compaction All phases are enabled by default. Load phase: -- 10M items x 2KB average values size -- no expiration Index phase: -- 1 design ddoc, 1 view Access phase: -- no front-end workload -- rebalance out, from 4 to 3 nodes -- stale=false query after rebalance """ # Legacy self.spec(self.__str__().split(" ")[0]) # Disable stats self.input.test_params['stats'] = 0 # View compaction setup rc = RestConnection(self.input.servers[0]) vt = 30 if self.parami('view_compaction', 1) else 100 rc.set_auto_compaction(dbFragmentThresholdPercentage=30, viewFragmntThresholdPercentage=vt) # Consistent view setup if self.parami('consistent_view', 1): rc.set_reb_cons_view(disable=False) else: rc.set_reb_cons_view(disable=True) # rebalance_moves_per_second setup rmps = self.parami('rebalance_moves_per_second', 1) cmd = 'ns_config:set(rebalance_moves_per_node, {0}).'.format(rmps) rc.diag_eval(cmd) # index_pausing_disabled setup ipd = str(bool(self.parami('index_pausing_disabled', 0))).lower() cmd = 'ns_config:set(index_pausing_disabled, {0}).'.format(ipd) rc.diag_eval(cmd) # rebalance_index_waiting_disabled setup riwd = str(bool(self.parami('rebalance_index_waiting_disabled', 0))).lower() cmd = 'ns_config:set(rebalance_index_waiting_disabled, {0}).'.format(riwd) rc.diag_eval(cmd) # Customize number of design docs view_gen = ViewGen() if self.parami('ddocs', 1) == 1: ddocs = view_gen.generate_ddocs([1]) elif self.parami('ddocs', 1) == 8: ddocs = view_gen.generate_ddocs([1, 1, 1, 1, 1, 1, 1, 1]) else: sys.exit('Only 1 or 8 ddocs supported.') # Load phase if self.parami('load_phase', 1): num_nodes = self.parami('num_nodes', PerfDefaults.num_nodes) self.load_phase(num_nodes) # Index phase if self.parami('index_phase', 1): self.index_phase(ddocs) # Access phase if self.parami('access_phase', 1): if self.param('rebalance', 'out') == 'out': RebalanceHelper.rebalance_out(servers=self.input.servers, how_many=1, monitor=True) elif self.param('rebalance', 'out') == 'swap': RebalanceHelper.rebalance_swap(servers=self.input.servers, how_many=1, monitor=True) else: sys.exit('Only rebalance-out and swap rebalance supported.') self.measure_indexing_time(rc, ddocs)
def test_alk_rebalance(self): """Alk's specification. Cluster setup: -- 4 nodes -- 1 bucket -- 8GB total RAM -- 5GB bucket quota -- no data replica -- no index replica -- no view compaction All phases are enabled by default. Load phase: -- 10M items x 2KB average values size -- no expiration Index phase: -- 1 design ddoc, 1 view Access phase: -- no front-end workload -- rebalance out, from 4 to 3 nodes -- stale=false query after rebalance """ # Legacy self.spec(self.__str__().split(" ")[0]) # Disable stats self.input.test_params['stats'] = 0 # View compaction setup rc = RestConnection(self.input.servers[0]) vt = 30 if self.parami('view_compaction', 1) else 100 rc.set_auto_compaction(dbFragmentThresholdPercentage=30, viewFragmntThresholdPercentage=vt) # Consistent view setup if self.parami('consistent_view', 1): rc.set_reb_cons_view(disable=False) else: rc.set_reb_cons_view(disable=True) # rebalance_moves_per_second setup rmps = self.parami('rebalance_moves_per_node', 1) cmd = 'ns_config:set(rebalance_moves_per_node, {0}).'.format(rmps) rc.diag_eval(cmd) # index_pausing_disabled setup ipd = str(bool(self.parami('index_pausing_disabled', 0))).lower() cmd = 'ns_config:set(index_pausing_disabled, {0}).'.format(ipd) rc.diag_eval(cmd) # rebalance_index_waiting_disabled setup riwd = str(bool(self.parami('rebalance_index_waiting_disabled', 0))).lower() cmd = 'ns_config:set(rebalance_index_waiting_disabled, {0}).'.format( riwd) rc.diag_eval(cmd) # Customize number of design docs view_gen = ViewGen() views = self.param("views", None) if views is not None: views = [int(v) for v in eval(views)] ddocs = view_gen.generate_ddocs(views) elif self.parami('ddocs', 1) == 1: ddocs = view_gen.generate_ddocs([1]) elif self.parami('ddocs', 1) == 8: ddocs = view_gen.generate_ddocs([1, 1, 1, 1, 1, 1, 1, 1]) else: sys.exit('Only 1 or 8 ddocs supported.') # Load phase if self.parami('load_phase', 0): num_nodes = self.parami('num_nodes', PerfDefaults.num_nodes) self.load_phase(num_nodes) # Index phase if self.parami('index_phase', 0): self.index_phase(ddocs) # Access phase if self.parami('access_phase', 0): if self.param('rebalance', 'out') == 'out': RebalanceHelper.rebalance_out(servers=self.input.servers, how_many=1, monitor=True) elif self.param('rebalance', 'out') == 'swap': RebalanceHelper.rebalance_swap(servers=self.input.servers, how_many=1, monitor=True) elif self.param('rebalance', 'out') == 'in': RebalanceHelper.rebalance_in(servers=self.input.servers, how_many=1, monitor=True) else: sys.exit('Wrong "rebalance" parameter') self.measure_indexing_time(rc, ddocs)