def run(self): remote_client = RemoteMachineShellConnection(self.server) now = datetime.now() day = now.day month = now.month year = now.year hour = now.timetuple().tm_hour minute = now.timetuple().tm_min file_name = "%s-%s%s%s-%s%s-couch.tar.gz" % (self.server.ip, month, day, year, hour, minute) print "Collecting data files from %s\n" % self.server.ip remote_client.extract_remote_info() data_path = self.__get_data_path(os_type=remote_client.info.type.lower()) output, error = remote_client.execute_command( "tar -zcvf {0} '{1}' >/dev/null 2>&1".format(file_name, data_path) ) print "\n".join(output) print "\n".join(error) user_path = "/home/" if self.server.ssh_username == "root": user_path = "/" remote_path = "%s%s" % (user_path, self.server.ssh_username) status = remote_client.file_exists(remote_path, file_name) if not status: raise Exception("%s doesn't exists on server" % file_name) status = remote_client.get_file(remote_path, file_name, "%s/%s" % (self.path, file_name)) if not status: raise Exception("Fail to download zipped logs from %s" % self.server.ip) remote_client.execute_command("rm -f %s" % os.path.join(remote_path, file_name)) remote_client.disconnect()
def convert_to_hostname(self, servers_with_hostnames): try: hostname = [] for server in servers_with_hostnames: shell = RemoteMachineShellConnection(server) info = shell.extract_remote_info() domain = ''.join(info.domain[0]) hostname.append(info.hostname[0] + "." + domain) master_rest = RestConnection(server) var = master_rest.get_nodes_self().hostname flag = True if server.ip in var else False self.log.info("get_node_self function returned : {0}".format(var)) if flag: self.log.info("Node {0} is referred via IP. Need to be referred with hostname. Changing the name of the node!!".format(server.ip)) version = RestConnection(server).get_nodes_self().version if version.startswith("1.8.1") or version.startswith("2.0.0") or version.startswith("2.0.1"): RemoteUtilHelper.use_hostname_for_server_settings(server) obj = RestConnection(server) obj.init_cluster() else: obj = RestConnection(server) obj.init_cluster() var = master_rest.rename_node(username='******', password='******', port='', hostname=hostname[-1]) else: self.log.info("Node {0} already referred via hostname. No need to convert the name".format(server.ip)) finally: shell.disconnect() return hostname
def _save_snapshot(self, server, bucket, file_base=None): """Save data files to a snapshot""" src_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) dest_data_path = "{0}-snapshots".format(src_data_path) self.log.info("server={0}, src_data_path={1}, dest_data_path={2}" .format(server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") dest_file = self._build_tar_name(bucket, full_version, file_base) self._exec_and_log(shell, "mkdir -p {0}".format(dest_data_path)) # save as gzip file, if file exsits, overwrite # TODO: multiple buckets zip_cmd = "cd {0}; tar -cvzf {1}/{2} {3} {3}-data _*"\ .format(src_data_path, dest_data_path, dest_file, bucket) self._exec_and_log(shell, zip_cmd) shell.disconnect() return True
def customize_xdcr_settings(self): """Set custom XDCR environment variables""" max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc', None) xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None) xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None) xdcr_latency_optimization = self.param('xdcr_latency_optimization', None) if max_concurrent_reps_per_doc: env_var = 'MAX_CONCURRENT_REPS_PER_DOC' value = max_concurrent_reps_per_doc elif xdcr_doc_batch_size_kb: env_var = 'XDCR_DOC_BATCH_SIZE_KB' value = xdcr_doc_batch_size_kb elif xdcr_checkpoint_interval: env_var = 'XDCR_CHECKPOINT_INTERVAL' value = xdcr_checkpoint_interval elif xdcr_latency_optimization: env_var = 'XDCR_LATENCY_OPTIMIZATION' value = xdcr_latency_optimization else: return self.log.info("changing {0} to {1}".format(env_var, value)) for server in self.input.servers: rc = RemoteMachineShellConnection(server) rc.set_environment_variable(env_var, value)
def test_partial_rollback(self): kv_node = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) log.info("kv nodes:{0}".format(kv_node)) for node in kv_node: mem_client = MemcachedClientHelper.direct_client(node, self.src_bucket_name) mem_client.stop_persistence() body = self.create_save_function_body(self.function_name, self.handler_code, worker_count=3) try: task = self.cluster.async_load_gen_docs(self.master, self.src_bucket_name, self.gens_load, self.buckets[0].kvs[1], 'create', compression=self.sdk_compression) except Exception as e: log.info("error while loading data") self.deploy_function(body,wait_for_bootstrap=False) # Kill memcached on Node A self.log.info("Killing memcached on {0}".format(kv_node[1])) shell = RemoteMachineShellConnection(kv_node[1]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on {0}". format(kv_node[0])) mem_client = MemcachedClientHelper.direct_client(kv_node[0], self.src_bucket_name) mem_client.start_persistence() # Wait for bootstrap to complete self.wait_for_bootstrap_to_complete(body['appname']) stats_src = RestConnection(self.master).get_bucket_stats(bucket=self.src_bucket_name) log.info(stats_src) self.verify_eventing_results(self.function_name, stats_src["curr_items"], skip_stats_validation=True)
def convert_to_hostname(self, servers_with_hostnames, username='******', password='******'): try: hostname = [] for server in servers_with_hostnames: shell = RemoteMachineShellConnection(server) info = shell.extract_remote_info() domain = ''.join(info.domain[0]) if not domain: output = shell.execute_command_raw('nslookup %s' % info.hostname[0]) print output self.fail("Domain is not defined, couchbase cannot be configured correctly. NOT A BUG. CONFIGURATION ISSUE") hostname.append(info.hostname[0] + "." + domain) master_rest = RestConnection(server) current_hostname = master_rest.get_nodes_self().hostname self.log.info("get_node_self function returned : {0}".format(current_hostname)) if server.ip in current_hostname: self.log.info("Node {0} is referred via IP. Need to be referred with hostname. Changing the name of the node!!".format(server.ip)) version = RestConnection(server).get_nodes_self().version if version.startswith("1.8.1") or version.startswith("2.0.0") or version.startswith("2.0.1"): RemoteUtilHelper.use_hostname_for_server_settings(server) master_rest.init_cluster() else: master_rest.init_cluster() master_rest.rename_node(username=username, password=password, port='', hostname=hostname[-1]) else: self.log.info("Node {0} already referred via hostname. No need to convert the name".format(server.ip)) finally: shell.disconnect() return hostname
def start_atop(self): """Start atop collector""" for node in self.nodes: shell = RemoteMachineShellConnection(node) cmd = "killall atop; rm -fr /tmp/*.atop;" + \ "atop -w /tmp/{0}.atop -a 15".format(node.ip) + \ " > /dev/null 2> /dev.null < /dev/null &" shell.execute_command(cmd)
def set_ep_compaction(self, comp_ratio): """Set up ep_engine side compaction ratio""" for server in self.input.servers: shell = RemoteMachineShellConnection(server) cmd = "/opt/couchbase/bin/cbepctl localhost:11210 "\ "set flush_param db_frag_threshold {0}".format(comp_ratio) self._exec_and_log(shell, cmd) shell.disconnect()
def stop_atop(self): """Stop atop collector""" for node in self.nodes: try: shell = RemoteMachineShellConnection(node) except SystemExit: log.error("can't establish SSH session with {0}".format(node.ip)) else: shell.execute_command("killall atop")
def reboot_server(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.reboot_node() remote_client.disconnect() # wait for restart and warmup on all node self.sleep(self.wait_timeout * 5) # disable firewall on these nodes self.stop_firewall_on_node(server) # wait till node is ready after warmup ClusterOperationHelper.wait_for_ns_servers_or_assert([server], self, wait_if_warmup=True)
def tearDown(self): server = self.master shell = RemoteMachineShellConnection(server) shell.execute_command( """curl -X DELETE -u Administrator:password http://{0}:8091/pools/default/buckets/beer-sample""".format( server.ip ) ) self.sleep(20) super(TokenTests, self).tearDown()
def set_ep_param(self, type, param, value): """ Set ep-engine specific param, using cbepctl type: paramter type, e.g: flush_param, tap_param, etc """ bucket = Bucket(name=self.buckets[0], authType="sasl", saslPassword="") for server in self.input.servers: shell = RemoteMachineShellConnection(server) shell.execute_cbepctl(bucket, "", "set %s" % type, param, value) shell.disconnect()
def set_up_proxy(self, bucket=None): """Set up and start Moxi""" if self.input.moxis: self.log.info("setting up proxy") bucket = bucket or self.param('bucket', 'default') shell = RemoteMachineShellConnection(self.input.moxis[0]) shell.start_moxi(self.input.servers[0].ip, bucket, self.input.moxis[0].port) shell.disconnect()
def start_atop(self): """Start atop collector""" for node in self.nodes: try: shell = RemoteMachineShellConnection(node) except SystemExit: log.error("can't establish SSH session with {0}".format(node.ip)) else: cmd = "killall atop; rm -fr /tmp/*.atop;" + \ "atop -w /tmp/{0}.atop -a 15".format(node.ip) + \ " > /dev/null 2> /dev.null < /dev/null &" shell.execute_command(cmd)
def load_sample_buckets(self, bucketName="beer-sample" ): """ Load the specified sample bucket in Couchbase """ #self.cluster.bucket_delete(server=self.master, bucket="default") server = self.master shell = RemoteMachineShellConnection(server) shell.execute_command("""curl -v -u Administrator:password \ -X POST http://{0}:8091/sampleBuckets/install \ -d '["{1}"]'""".format(server.ip, bucketName)) self.sleep(30) shell.disconnect()
def run(self): remote_client = RemoteMachineShellConnection(self.server) now = datetime.now() day = now.day month = now.month year = now.year hour = now.timetuple().tm_hour min = now.timetuple().tm_min file_name = "%s-%s%s%s-%s%s-diag.zip" % (self.server.ip, month, day, year, hour, min) print "Collecting logs from %s\n" % self.server.ip output, error = remote_client.execute_cbcollect_info(file_name) print "\n".join(output) print "\n".join(error) user_path = "/home/" if self.server.ssh_username == "root": user_path = "/" remote_path = "%s%s" % (user_path, self.server.ssh_username) status = remote_client.file_exists(remote_path, file_name) if not status: raise Exception("%s doesn't exists on server" % file_name) status = remote_client.get_file(remote_path, file_name, "%s/%s" % (self.path, file_name)) if status: print "Downloading zipped logs from %s" % self.server.ip else: raise Exception("Fail to download zipped logs from %s" % self.server.ip) remote_client.execute_command("rm -f %s" % os.path.join(remote_path, file_name)) remote_client.disconnect()
def kill_erlang_service(self, server): remote_client = RemoteMachineShellConnection(server) os_info = remote_client.extract_remote_info() log.info("os_info : {0}", os_info) if os_info.type.lower() == "windows": remote_client.kill_erlang(os="windows") else: remote_client.kill_erlang() remote_client.start_couchbase() remote_client.disconnect() # wait for restart and warmup on all node self.sleep(self.wait_timeout * 2) # wait till node is ready after warmup ClusterOperationHelper.wait_for_ns_servers_or_assert([server], self, wait_if_warmup=True)
def run(self): file_name = "%s-%s-diag.zip" % (self.server.ip, time_stamp()) if not self.local: from lib.remote.remote_util import RemoteMachineShellConnection remote_client = RemoteMachineShellConnection(self.server) print "Collecting logs from %s\n" % self.server.ip output, error = remote_client.execute_cbcollect_info(file_name) print "\n".join(output) print "\n".join(error) user_path = "/home/" if remote_client.info.distribution_type.lower() == "mac": user_path = "/Users/" else: if self.server.ssh_username == "root": user_path = "/" remote_path = "%s%s" % (user_path, self.server.ssh_username) status = remote_client.file_exists(remote_path, file_name) if not status: raise Exception("%s doesn't exists on server" % file_name) status = remote_client.get_file(remote_path, file_name, "%s/%s" % (self.path, file_name)) if status: print "Downloading zipped logs from %s" % self.server.ip else: raise Exception("Fail to download zipped logs from %s" % self.server.ip) remote_client.execute_command("rm -f %s" % os.path.join(remote_path, file_name)) remote_client.disconnect()
def check_if_eventing_consumers_are_cleaned_up(self): eventing_nodes = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) array_of_counts = [] command = "ps -ef | grep eventing-consumer | grep -v grep | wc -l" for eventing_node in eventing_nodes: shell = RemoteMachineShellConnection(eventing_node) count, error = shell.execute_non_sudo_command(command) if isinstance(count, list): count = int(count[0]) else: count = int(count) log.info("Node : {0} , eventing_consumer processes running : {1}".format(eventing_node.ip, count)) array_of_counts.append(count) count_of_all_eventing_consumers = sum(array_of_counts) if count_of_all_eventing_consumers != 0: return False return True
def rename_nodes(self, servers, names={}): hostnames={} for server in servers: shell = RemoteMachineShellConnection(server) try: if not names: hostname = shell.get_full_hostname() else: hostname = names[server] rest = RestConnection(server) renamed, content = rest.rename_node(hostname, username=server.rest_username, password=server.rest_password) self.assertTrue(renamed, "Server %s is not renamed!Hostname %s. Error %s" %( server, hostname, content)) hostnames[server] = hostname finally: shell.disconnect() return hostnames
def __init__(self, server, path="/tmp", memcached_ip="localhost", memcached_port="11211", num_items=100000, extra_params=""): self.server = server self.shell = RemoteMachineShellConnection(self.server) self.path = path self.memcached_ip = memcached_ip self.memcached_port = memcached_port self.num_items = num_items self.extra_params = extra_params self.log = logger.Logger.get_logger()
def set_up_dgm(self): """Download fragmented, DGM dataset onto each cluster node, if not already locally available. The number of vbuckets and database schema must match the target cluster. Shutdown all cluster nodes. Do a cluster-restore. Restart all cluster nodes.""" bucket = self.param("bucket", "default") ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: remote = RemoteMachineShellConnection(server) #TODO: Better way to pass num_nodes and db_size? self.get_data_files(remote, bucket, 1, 10) remote.disconnect() ClusterOperationHelper.start_cluster(self.input.servers)
def index_query_beer_sample(self): #delete default bucket self._cb_cluster.delete_bucket("default") master = self._cb_cluster.get_master_node() from lib.remote.remote_util import RemoteMachineShellConnection shell = RemoteMachineShellConnection(master) shell.execute_command("""curl -v -u Administrator:password \ -X POST http://{0}:8091/sampleBuckets/install \ -d '["beer-sample"]'""".format(master.ip)) shell.disconnect() self.sleep(20) bucket = self._cb_cluster.get_bucket_by_name("beer-sample") index = self.create_index(bucket, "beer-index") self.wait_for_indexing_complete() self.validate_index_count(equal_bucket_doc_count=True, zero_rows_ok=False) query = {"match": "cafe", "field": "name"} hits, _, _, _ = index.execute_query(query, zero_results_ok=False, expected_hits=10) self.log.info("Hits: %s" % hits)
def stop_measure_sched_delay(self): for server in self.servers: shell = RemoteMachineShellConnection(server) cmd = "killall -9 -r .*measure-sched-delays" output, error = shell.execute_command(cmd) shell.log_command_output(output, error) shell.disconnect() self.log.info("measure-sched-delays was stopped on {0}".format(server.ip))
def warmup(self, collect_stats=True, flush_os_cache=False): """ Restart cluster and wait for it to warm up. In current version, affect the master node only. """ if not self.input.servers: print "[warmup error] empty server list" return if collect_stats: client_id = self.parami("prefix", 0) test_params = {'test_time': time.time(), 'test_name': self.id(), 'json': 0} sc = self.start_stats(self.spec_reference + ".warmup", test_params=test_params, client_id=client_id) print "[warmup] preparing to warmup cluster ..." server = self.input.servers[0] shell = RemoteMachineShellConnection(server) start_time = time.time() print "[warmup] stopping couchbase ... ({0}, {1})"\ .format(server.ip, time.strftime(PerfDefaults.strftime)) shell.stop_couchbase() print "[warmup] couchbase stopped ({0}, {1})"\ .format(server.ip, time.strftime(PerfDefaults.strftime)) if flush_os_cache: print "[warmup] flushing os cache ..." shell.flush_os_caches() shell.start_couchbase() print "[warmup] couchbase restarted ({0}, {1})"\ .format(server.ip, time.strftime(PerfDefaults.strftime)) self.wait_until_warmed_up() print "[warmup] warmup finished" end_time = time.time() ops = {'tot-sets': 0, 'tot-gets': 0, 'tot-items': 0, 'tot-creates': 0, 'tot-misses': 0, "start-time": start_time, "end-time": end_time} if collect_stats: self.end_stats(sc, ops, self.spec_reference + ".warmup")
def create_required_buckets(self): self.log.info("Get the available memory quota") bucket_util = bucket_utils(self.master) self.info = bucket_util.rest.get_nodes_self() threadhold_memory = 1024 total_memory_in_mb = self.info.memoryFree / 1024 ** 2 total_available_memory_in_mb = total_memory_in_mb active_service = self.info.services if "index" in active_service: total_available_memory_in_mb -= self.info.indexMemoryQuota if "fts" in active_service: total_available_memory_in_mb -= self.info.ftsMemoryQuota if "cbas" in active_service: total_available_memory_in_mb -= self.info.cbasMemoryQuota if "eventing" in active_service: total_available_memory_in_mb -= self.info.eventingMemoryQuota print(total_memory_in_mb) available_memory = total_available_memory_in_mb - threadhold_memory self.rest.set_service_memoryQuota(service='memoryQuota', memoryQuota=available_memory) self.rest.set_service_memoryQuota(service='cbasMemoryQuota', memoryQuota=available_memory-1024) self.rest.set_service_memoryQuota(service='indexMemoryQuota', memoryQuota=available_memory-1024) self.log.info("Create CB buckets") self.create_bucket(self.master, "GleambookUsers",bucket_ram=available_memory/3) self.create_bucket(self.master, "GleambookMessages",bucket_ram=available_memory/3) self.create_bucket(self.master, "ChirpMessages",bucket_ram=available_memory/3) shell = RemoteMachineShellConnection(self.master) command = 'curl -i -u Administrator:password --data \'ns_bucket:update_bucket_props("ChirpMessages", [{extra_config_string, "cursor_dropping_upper_mark=70;cursor_dropping_lower_mark=50"}]).\' http://%s:8091/diag/eval'%self.master shell.execute_command(command) command = 'curl -i -u Administrator:password --data \'ns_bucket:update_bucket_props("GleambookMessages", [{extra_config_string, "cursor_dropping_upper_mark=70;cursor_dropping_lower_mark=50"}]).\' http://%s:8091/diag/eval'%self.master shell.execute_command(command) command = 'curl -i -u Administrator:password --data \'ns_bucket:update_bucket_props("GleambookUsers", [{extra_config_string, "cursor_dropping_upper_mark=70;cursor_dropping_lower_mark=50"}]).\' http://%s:8091/diag/eval'%self.master shell.execute_command(command) result = RestConnection(self.query_node).query_tool("CREATE PRIMARY INDEX idx_GleambookUsers ON GleambookUsers;") self.sleep(10, "wait for index creation.") self.assertTrue(result['status'] == "success") result = RestConnection(self.query_node).query_tool("CREATE PRIMARY INDEX idx_GleambookMessages ON GleambookMessages;") self.sleep(10, "wait for index creation.") self.assertTrue(result['status'] == "success") result = RestConnection(self.query_node).query_tool("CREATE PRIMARY INDEX idx_ChirpMessages ON ChirpMessages;") self.sleep(10, "wait for index creation.") self.assertTrue(result['status'] == "success")
def fetch_logs(self): for server in self.servers: shell = RemoteMachineShellConnection(server) files = shell.list_files(self.path + "/") files = [file for file in files if file["file"].startswith("sched-delay")] for file in files: shell.copy_file_remote_to_local(file["path"] + file["file"], os.getcwd() + "/" + file["file"]) self.log.info("copied {0} from {1}".format([file["file"] for file in files] , server.ip)) shell.disconnect()
def customize_xdcr_settings(self): """Set custom XDCR environment variables""" max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc', None) xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None) xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None) if max_concurrent_reps_per_doc: env_var = 'MAX_CONCURRENT_REPS_PER_DOC' value = max_concurrent_reps_per_doc elif xdcr_doc_batch_size_kb: env_var = 'XDCR_DOC_BATCH_SIZE_KB' value = xdcr_doc_batch_size_kb elif xdcr_checkpoint_interval: env_var = 'XDCR_CHECKPOINT_INTERVAL' value = xdcr_checkpoint_interval else: return print 'Changing {0} to {1}'.format(env_var, value) for server in self.input.servers: rc = RemoteMachineShellConnection(server) rc.set_environment_variable(env_var, value)
def check_if_eventing_consumers_are_cleaned_up(self): if self.input.param("skip_host_login", False): log.warning( "-->Skipping check_if_eventing_consumers_are_cleaned_up as " "skip_host_login is set!") return eventing_nodes = self.get_nodes_from_services_map( service_type="eventing", get_all_nodes=True) array_of_counts = [] command = "ps -ef | grep eventing-consumer | grep -v grep | wc -l" for eventing_node in eventing_nodes: shell = RemoteMachineShellConnection(eventing_node) count, error = shell.execute_non_sudo_command(command) if isinstance(count, list): count = int(count[0]) else: count = int(count) log.info("Node : {0} , eventing_consumer processes running : {1}". format(eventing_node.ip, count)) array_of_counts.append(count) count_of_all_eventing_consumers = sum(array_of_counts) if count_of_all_eventing_consumers != 0: return False return True
def reboot_server(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.reboot_node() remote_client.disconnect() # wait for restart and warmup on all node self.sleep(self.wait_timeout * 2) # disable firewall on these nodes self.stop_firewall_on_node(server) # wait till node is ready after warmup ClusterOperationHelper.wait_for_ns_servers_or_assert([server], self, wait_if_warmup=True)
def perform_cb_collect(_input, log_path=None): import logger log = logger.Logger.get_logger() for node in _input.servers: params = dict() if len(_input.servers) != 1: params['nodes'] = 'ns_1@' + node.ip else: # In case of single node we have to pass ip as below params['nodes'] = 'ns_1@' + '127.0.0.1' log.info('Collecting log on node ' + node.ip) rest = RestConnection(node) status, _, _ = rest.perform_cb_collect(params) time.sleep( 10 ) # This is needed as it takes a few seconds before the collection start log.info('CB collect status on %s is %s' % (node.ip, status)) log.info('Polling active task endpoint to check CB collect status') if status is True: cb_collect_response = {} while True: content = rest.active_tasks() for response in content: if response['type'] == 'clusterLogsCollection': cb_collect_response = response break if cb_collect_response['status'] == 'completed': log.info(cb_collect_response) break else: time.sleep( 10 ) # CB collect in progress, wait for 10 seconds and check progress again log.info('Copy CB collect ZIP file to Client') remote_client = RemoteMachineShellConnection(node) cb_collect_path = cb_collect_response['perNode'][ params['nodes']]['path'] zip_file_copied = remote_client.get_file( os.path.dirname(cb_collect_path), os.path.basename(cb_collect_path), log_path) log.info('%s node cb collect zip coped on client : %s' % (node.ip, zip_file_copied)) if zip_file_copied: remote_client.execute_command("rm -f %s" % cb_collect_path) remote_client.disconnect()
def load(self, generators_load): gens_load = [] for generator_load in generators_load: gens_load.append(copy.deepcopy(generator_load)) items = 0 for gen_load in gens_load: items += (gen_load.end - gen_load.start) shell = RemoteMachineShellConnection(self.server) try: self.log.info("Delete directory's content %s/data/default/%s ..." % (self.directory, self.bucket_name)) shell.execute_command('rm -rf %s/data/default/*' % self.directory) self.log.info("Create directory %s/data/default/%s..." % (self.directory, self.bucket_name)) shell.execute_command('mkdir -p %s/data/default/%s' % (self.directory, self.bucket_name)) self.log.info("Load %s documents to %s/data/default/%s..." % (items, self.directory, self.bucket_name)) for gen_load in gens_load: for i in xrange(gen_load.end): key, value = gen_load.next() out = shell.execute_command("echo '%s' > %s/data/default/%s/%s.json" % (value, self.directory, self.bucket_name, key)) self.log.info("LOAD IS FINISHED") finally: shell.disconnect()
def check_eventing_logs_for_panic(self): self.generate_map_nodes_out_dist() panic_str = "panic" eventing_nodes = self.get_nodes_from_services_map( service_type="eventing", get_all_nodes=True) if not eventing_nodes: return None for eventing_node in eventing_nodes: shell = RemoteMachineShellConnection(eventing_node) _, dir_name = RestConnection(eventing_node).diag_eval( 'filename:absname(element(2, application:get_env(ns_server,error_logger_mf_dir))).' ) eventing_log = str(dir_name) + '/eventing.log*' count, err = shell.execute_command( "zgrep \"{0}\" {1} | wc -l".format(panic_str, eventing_log)) if isinstance(count, list): count = int(count[0]) else: count = int(count) if count > self.panic_count: log.info( "===== PANIC OBSERVED IN EVENTING LOGS ON SERVER {0}=====". format(eventing_node.ip)) panic_trace, _ = shell.execute_command( "zgrep \"{0}\" {1}".format(panic_str, eventing_log)) log.info("\n {0}".format(panic_trace)) self.panic_count = count os_info = shell.extract_remote_info() if os_info.type.lower() == "windows": # This is a fixed path in all windows systems inside couchbase dir_name_crash = 'c://CrashDumps' else: dir_name_crash = str(dir_name) + '/../crash/' core_dump_count, err = shell.execute_command( "ls {0}| wc -l".format(dir_name_crash)) if isinstance(core_dump_count, list): core_dump_count = int(core_dump_count[0]) else: core_dump_count = int(core_dump_count) if core_dump_count > 0: log.info( "===== CORE DUMPS SEEN ON EVENTING NODES, SERVER {0} : {1} crashes seen =====" .format(eventing_node.ip, core_dump_count)) shell.disconnect()
def fetch_logs(self): for server in self.servers: shell = RemoteMachineShellConnection(server) files = shell.list_files(self.path + "/") files = [ file for file in files if file["file"].startswith("sched-delay") ] for file in files: shell.copy_file_remote_to_local( file["path"] + file["file"], os.getcwd() + "/" + file["file"]) self.log.info("copied {0} from {1}".format( [file["file"] for file in files], server.ip)) shell.disconnect()
def test_tls_1_dot_2_blocking(self): """ 1. Set tls version = 1.3 2. Restart couchbase server 3. Verify tls version = 1.3 and not set to 1.2(default) """ rest = RestConnection(self.master) rest.set_min_tls_version(version="tlsv1.3") self.test_tls_min_version() try: for node in self.servers: shell = RemoteMachineShellConnection(node) shell.stop_couchbase() time.sleep(10) shell.start_couchbase() shell.disconnect() except Exception as e: self.fail(e) self.test_tls_min_version()
def eject_nodes(self, eject_nodes_structure=None, eject_type=None): eject_server_groups = eject_nodes_structure.split("|") eject_nodes = [] for eject_server_group in eject_server_groups: group_name = eject_server_group.split("-")[0] node_types = eject_server_group.split("-")[1] target_zone_nodes = self.rest.get_nodes_in_zone(group_name) node_type_arr = node_types.split(":") for node_type in node_type_arr: if 'D' == node_type: for kv_node in self._cb_cluster.get_kv_nodes(): if kv_node.ip in target_zone_nodes.keys(): if kv_node not in eject_nodes: eject_nodes.append(kv_node) break elif 'F' == node_type: for fts_node in self._cb_cluster.get_fts_nodes(): if fts_node.ip in target_zone_nodes.keys(): if fts_node not in eject_nodes: eject_nodes.append(fts_node) break else: self.fail("Unsupported node type found in nodes to eject.") for node in eject_nodes: if "remove" == eject_type: self._cb_cluster.rebalance_out_node(node=node) elif "failover" == eject_type: self._cb_cluster.failover(graceful=False, node=node) #self._cb_cluster.rebalance_failover_nodes() elif "shutdown" == eject_type: remote = RemoteMachineShellConnection(node) remote.stop_couchbase() self._cb_cluster.failover(graceful=False, node=node) self._cb_cluster.rebalance_failover_nodes() elif "shutdown_no_rebalance" == eject_type: remote = RemoteMachineShellConnection(node) remote.stop_couchbase() return eject_nodes
def _load_snapshot(self, server, bucket, file_base=None, overwrite=True): """Load data files from a snapshot""" dest_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) src_data_path = "{0}-snapshots".format(dest_data_path) self.log.info( "server={0}, src_data_path={1}, dest_data_path={2}".format( server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") src_file = self._build_tar_name(bucket, full_version, file_base) if not shell.file_exists(src_data_path, src_file): self.log.error("file '{0}/{1}' does not exist".format( src_data_path, src_file)) shell.disconnect() return False if not overwrite: self._save_snapshot(server, bucket, "{0}.tar.gz".format( time.strftime(PerfDefaults.strftime))) # TODO: filename rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format( dest_data_path, bucket) self._exec_and_log(shell, rm_cmd) unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path, src_data_path, src_file) self._exec_and_log(shell, unzip_cmd) shell.disconnect() return True
def get_scope_item_count(self, bucket, scope, node=None): count = 0 if not node: nodes = [self.node] elif isinstance(node, list): nodes = node else: nodes = [node] for node in nodes: cbstats, _ = RemoteMachineShellConnection( node).get_collection_stats(bucket) scope_id = self.get_scope_id(bucket, scope, cbstats) id_counts = {} for stat in cbstats: if ":items:" in stat: stat = stat.replace(' ', '') id_counts[stat.split(":items:")[0]] = int( stat.split(":items:")[1]) for id in id_counts.keys(): if id.split(':')[0] == scope_id: count += id_counts[id] return count
def rest_api_renameNode(self): try: self.shell = RemoteMachineShellConnection(self.master) #com_inst_build = "cat /opt/couchbase/VERSION.txt" #out = self.shell.execute_command(com_inst_build.format(com_inst_build)) self.install_builds(self.builds, self.servers[0:1]) if self.is_negative_test: master_rest = RestConnection(self.master) self.log.info("Renaming node {0} to {1}".format(self.master, self.name)) var = master_rest.rename_node(username=self.master.rest_username, password=self.master.rest_password, port=self.master.port, hostname=self.name, is_negative_test=True) out = var.pop() self.assertEqual(out, self.error, msg="Fail to find correct error. The error should be {0}, but we got : {1}".format(self.error, out)) self.log.info("Got correct error - {0}....Passing the test".format(out)) else: self.log.info("Node {0} is referred via IP. Changing the name of the node".format(self.servers[0:1])) hostname = [] info = self.shell.extract_remote_info() domain = ''.join(info.domain[0]) hostname.append(info.hostname[0] + "." + domain) self.convert_to_hostname(self, self.servers[0:1]) self.log.info("Calling get_node_self() to check the status of node {0}".format(self.servers[0:1])) obj = RestConnection(self.master) var = obj.get_nodes_self().hostname flag = True if self.master.ip in var else False self.assertEqual(flag, False, msg="Fail - Node {0} is still referred via IP. Should\ have been referred via hostname. Failing the test!".format(self.master.ip)) self.log.info("Name of node {0} got converted to hostname. Proceeding......!".format(self.master.ip)) self.sleep(10) self.log.info("Now changing name of node {0} from hostname to IP".format(self.master.ip)) var = obj.rename_node(username='******', password='******', port='', hostname=self.master.ip) self.log.info("Calling get_node_self() to check the status of the node {0}".format(self.master.ip)) var = obj.get_nodes_self().hostname flag = True if self.master.ip in var else False self.assertEqual(flag, True, msg="Fail - Node {0} is still referred via hostname. Should have been referred via IP. Failing the test!".format(self.master.ip)) self.log.info("Node {0} referred via IP. Pass !".format(self.master.ip)) finally: self.shell.disconnect()
def get_data_file_size(self, nodes, interval, bucket): shells = [] for node in nodes: try: shells.append(RemoteMachineShellConnection(node)) except Exception as error: log.error(error) paths = [] if shells[0].is_couchbase_installed(): bucket_path = self.data_path + '/{0}'.format(bucket) paths.append(bucket_path) view_path = bucket_path + '/set_view_{0}_design'.format(bucket) paths.append(view_path) else: paths.append(self.data_path + '/{0}-data'.format(bucket)) d = {"snapshots": []} start_time = str(self._task["time"]) while not self._aborted(): time.sleep(interval) current_time = time.time() i = 0 for shell in shells: node = nodes[i] unique_id = node.ip + '-' + start_time value = {} for path in paths: size = shell.get_data_file_size(path) value["file"] = path.split('/')[-1] value["size"] = size value["unique_id"] = unique_id value["time"] = current_time value["ip"] = node.ip d["snapshots"].append(value.copy()) i += 1 self._task["data_size_stats"] = d["snapshots"] log.info("finished data_size_stats")
def run(self): print "[NRUMonitor] started running" # TODO: evaluate all servers, smarter polling freq server = self.eperf.input.servers[0] self.shell = RemoteMachineShellConnection(server) nru_num = self.nru_num = self.get_nru_num() if self.nru_num < 0: return while nru_num <= self.nru_num: print "[NRUMonitor] nru_num = %d, sleep for %d seconds"\ % (nru_num, self.freq) time.sleep(self.freq) nru_num = self.get_nru_num() if nru_num < 0: break gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) speed, num_items, run_time = self.get_nru_speed() print "[NRUMonitor] access scanner finished at: %s, speed: %s, "\ "num_items: %s, run_time: %s"\ % (gmt_now, speed, num_items, run_time) self.eperf.clear_hot_keys() print "[NRUMonitor] scheduled rebalance after %d seconds"\ % self.reb_delay self.shell.disconnect() self.eperf.latched_rebalance(delay=self.reb_delay, sync=True) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) print "[NRUMonitor] rebalance finished: %s" % gmt_now print "[NRUMonitor] stopped running"
def convert_to_hostname(self, servers_with_hostnames, username='******', password='******'): try: hostname = [] for server in servers_with_hostnames: shell = RemoteMachineShellConnection(server) info = shell.extract_remote_info() domain = ''.join(info.domain[0]) if not domain: output = shell.execute_command_raw('nslookup %s' % info.hostname[0]) print output self.fail( "Domain is not defined, couchbase cannot be configured correctly. NOT A BUG. CONFIGURATION ISSUE" ) hostname.append(info.hostname[0] + "." + domain) master_rest = RestConnection(server) current_hostname = master_rest.get_nodes_self().hostname self.log.info("get_node_self function returned : {0}".format( current_hostname)) if server.ip in current_hostname: self.log.info( "Node {0} is referred via IP. Need to be referred with hostname. Changing the name of the node!!" .format(server.ip)) version = RestConnection(server).get_nodes_self().version if version.startswith("1.8.1") or version.startswith( "2.0.0") or version.startswith("2.0.1"): RemoteUtilHelper.use_hostname_for_server_settings( server) master_rest.init_cluster() else: master_rest.init_cluster() master_rest.rename_node(username=username, password=password, port='', hostname=hostname[-1]) else: self.log.info( "Node {0} already referred via hostname. No need to convert the name" .format(server.ip)) finally: shell.disconnect() return hostname
def get_collection_item_count(self, bucket, scope, collection, node=None): count = 0 if not node: nodes = [self.node] elif isinstance(node, list): nodes = node else: nodes = [node] for node in nodes: cbstats, _ = RemoteMachineShellConnection(node).execute_cbstats( bucket, "collections", cbadmin_user="******") collection_id = self.get_collection_id(bucket, scope, collection, cbstats) id_counts = {} for stat in cbstats: if ":items:" in stat: stat = stat.replace(' ', '') id_count = stat.split(":items:") id_counts[id_count[0].strip()] = int(id_count[1]) for id in id_counts.keys(): if id == collection_id: count += id_counts[id] return count
def _load_snapshot(self, server, bucket, file_base=None, overwrite=True): """Load data files from a snapshot""" dest_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) src_data_path = "{0}-snapshots".format(dest_data_path) self.log.info("server={0}, src_data_path={1}, dest_data_path={2}" .format(server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") src_file = self._build_tar_name(bucket, full_version, file_base) if not shell.file_exists(src_data_path, src_file): self.log.error("file '{0}/{1}' does not exist" .format(src_data_path, src_file)) shell.disconnect() return False if not overwrite: self._save_snapshot(server, bucket, "{0}.tar.gz".format( time.strftime(PerfDefaults.strftime))) # TODO: filename rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format(dest_data_path, bucket) self._exec_and_log(shell, rm_cmd) unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path, src_data_path, src_file) self._exec_and_log(shell, unzip_cmd) shell.disconnect() return True
def test_ingestion_after_kv_rollback(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA & NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[0], self.cb_bucket_name) mem_client.stop_persistence() mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "delete", 0, self.num_items / 2) # Validate no. of items in CBAS dataset if not self.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items / 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket") # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.start_persistence() # Failover Node B self.log.info("Failing over NodeB") self.sleep(10) failover_task = self._cb_cluster.async_failover(self.input.servers, [self.input.servers[1]]) failover_task.result() # Wait for Failover & CBAS rollback to complete self.sleep(60) # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "After Rollback : # Items in CBAS bucket does not match that in the CB bucket")
def test_xdcr_with_security(self): #Settings self.settings_values_map = { "autofailover": ["enable", None], "n2n": ["enable", "disable"], "tls": ["all", "control", "strict"] } self.apply_settings_before_setup = self._input.param( "apply_settings_before_setup", False) self.disable_autofailover = self._input.param("disable_autofailover", False) self.enable_n2n = self._input.param("enable_n2n", False) self.enforce_tls = self._input.param("enforce_tls", None) self.tls_level = self._input.param("tls_level", "control") self.enable_autofailover = self._input.param("enable_autofailover", False) self.disable_n2n = self._input.param("disable_n2n", None) self.disable_tls = self._input.param("disable_tls", None) rebalance_in = self._input.param("rebalance_in", None) rebalance_out = self._input.param("rebalance_out", None) swap_rebalance = self._input.param("swap_rebalance", None) failover = self._input.param("failover", None) graceful = self._input.param("graceful", None) pause = self._input.param("pause", None) reboot = self._input.param("reboot", None) initial_xdcr = self._input.param("initial_xdcr", random.choice([True, False])) random_setting = self._input.param("random_setting", False) multiple_ca = self._input.param("multiple_ca", None) use_client_certs = self._input.param("use_client_certs", None) int_ca_name = self._input.param("int_ca_name", "iclient1_clientroot") all_node_upload = self._input.param("all_node_upload", False) rotate_certs = self._input.param("rotate_certs", None) delete_certs = self._input.param("delete_certs", None) restart_pkey_nodes = self._input.param("restart_pkey_nodes", None) if not self.apply_settings_before_setup: if initial_xdcr: self.load_and_setup_xdcr() else: self.setup_xdcr_and_load() if self.enforce_tls: for cluster in self.get_cluster_objects_for_input( self.enforce_tls): if self.tls_level == "rotate": for level in self.settings_values_map["tls"]: cluster.toggle_security_setting( [cluster.get_master_node()], "tls", level) time.sleep(5) else: cluster.toggle_security_setting( [cluster.get_master_node()], "tls", self.tls_level) #Revert to default (control) tls level if self.disable_tls: for cluster in self.get_cluster_objects_for_input( self.disable_tls): cluster.toggle_security_setting([cluster.get_master_node()], "tls") if self.enable_n2n: for cluster in self.get_cluster_objects_for_input(self.enable_n2n): cluster.toggle_security_setting([cluster.get_master_node()], "n2n", "enable") if self.disable_n2n: for cluster in self.get_cluster_objects_for_input( self.disable_n2n): cluster.toggle_security_setting([cluster.get_master_node()], "n2n") if self.enable_autofailover: for cluster in self.get_cluster_objects_for_input( self.enable_autofailover): cluster.toggle_security_setting([cluster.get_master_node()], "autofailover", "enable") if self.disable_autofailover: for cluster in self.get_cluster_objects_for_input( self.disable_autofailover): cluster.toggle_security_setting([cluster.get_master_node()], "autofailover") if random_setting: for cluster in self.get_cluster_objects_for_input(random_setting): setting = random.choice(list(self.settings_values_map.keys())) value = random.choice(self.settings_values_map.get(setting)) cluster.toggle_security_setting([cluster.get_master_node()], setting, value) if multiple_ca: for cluster in self.get_cluster_objects_for_input(multiple_ca): master = cluster.get_master_node() ntonencryptionBase().disable_nton_cluster([master]) CbServer.x509 = x509main(host=master) for server in cluster.get_nodes(): CbServer.x509.delete_inbox_folder_on_server(server=server) CbServer.x509.generate_multiple_x509_certs( servers=cluster.get_nodes()) if all_node_upload: for node_num in range(len(cluster.get_nodes())): CbServer.x509.upload_root_certs( server=cluster.get_nodes()[node_num], root_ca_names=[ CbServer.x509.root_ca_names[node_num] ]) else: for server in cluster.get_nodes(): CbServer.x509.upload_root_certs(server) CbServer.x509.upload_node_certs(servers=cluster.get_nodes()) if use_client_certs: CbServer.x509.upload_client_cert_settings(server=master) client_cert_path, client_key_path = CbServer.x509.get_client_cert( int_ca_name=int_ca_name) # Copy the certs onto the test machines for server in cluster.get_nodes(): shell = RemoteMachineShellConnection(server) shell.execute_command( f"mkdir -p {os.path.dirname(client_cert_path)}") shell.copy_file_local_to_remote( client_cert_path, client_cert_path) shell.execute_command( f"mkdir -p {CbServer.x509.CACERTFILEPATH}all") shell.copy_file_local_to_remote( f"{CbServer.x509.CACERTFILEPATH}all/all_ca.pem", f"{CbServer.x509.CACERTFILEPATH}all/all_ca.pem") shell.disconnect() self._client_cert = self._read_from_file(client_cert_path) self._client_key = self._read_from_file(client_key_path) self.add_built_in_server_user(node=master) ntonencryptionBase().setup_nton_cluster( [master], clusterEncryptionLevel="strict") if rotate_certs: for cluster in self.get_cluster_objects_for_input( rotate_certs): CbServer.x509.rotate_certs(cluster.get_nodes()) if delete_certs: for cluster in self.get_cluster_objects_for_input( delete_certs): for node in cluster.get_nodes(): CbServer.x509.delete_trusted_CAs(node) if restart_pkey_nodes: for cluster in self.get_cluster_objects_for_input( restart_pkey_nodes): for node in cluster.get_nodes(): shell = RemoteMachineShellConnection(node) shell.restart_couchbase() shell.disconnect() time.sleep(10) cluster.failover_and_rebalance_nodes() cluster.add_back_node("delta") if self.apply_settings_before_setup: if initial_xdcr: self.load_and_setup_xdcr() else: self.setup_xdcr_and_load() if pause: for cluster in self.get_cluster_objects_for_input(pause): for remote_cluster_refs in cluster.get_remote_clusters(): remote_cluster_refs.pause_all_replications() time.sleep(60) if rebalance_in: for cluster in self.get_cluster_objects_for_input(rebalance_in): cluster.rebalance_in() if failover: for cluster in self.get_cluster_objects_for_input(failover): cluster.failover_and_rebalance_nodes(graceful=graceful, rebalance=True) if rebalance_out: for cluster in self.get_cluster_objects_for_input(rebalance_out): cluster.rebalance_out() if swap_rebalance: for cluster in self.get_cluster_objects_for_input(swap_rebalance): cluster.swap_rebalance() if pause: for cluster in self.get_cluster_objects_for_input(pause): for remote_cluster_refs in cluster.get_remote_clusters(): remote_cluster_refs.resume_all_replications() if reboot: for cluster in self.get_cluster_objects_for_input(reboot): cluster.warmup_node() time.sleep(60) self.perform_update_delete() self.verify_results()
def kill_erlang_service(self, server): remote_client = RemoteMachineShellConnection(server) os_info = remote_client.extract_remote_info() log.info("os_info : {0}", os_info) if os_info.type.lower() == "windows": remote_client.kill_erlang(os="windows") else: remote_client.kill_erlang() remote_client.start_couchbase() remote_client.disconnect() # wait for restart and warmup on all node self.sleep(self.wait_timeout * 2) # wait till node is ready after warmup ClusterOperationHelper.wait_for_ns_servers_or_assert( [server], self, wait_if_warmup=True)
def change_time_zone(self, server, timezone="UTC"): remote_client = RemoteMachineShellConnection(server) remote_client.execute_command("timedatectl set-timezone " + timezone) remote_client.disconnect()
def kill_producer(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.kill_eventing_process(name="eventing-producer") remote_client.disconnect()
def kill_memcached_service(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.kill_memcached() remote_client.disconnect()
def test_auto_retry_failed_rebalance(self): # Auto-retry rebalance settings body = {"enabled": "true", "afterTimePeriod": self.retry_time, "maxAttempts": self.num_retries} rest = RestConnection(self.master) rest.set_retry_rebalance_settings(body) result = rest.get_retry_rebalance_settings() self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " % ( nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self._async_load_all_buckets(self.master, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param( "restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) try: self.log.info("Rebalance nodes") self.cluster.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.sleep(10, message="Restarting couchbase after 10s on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.sleep(self.retry_time, "Wait for retry time to complete and then check the rebalance results") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.log.info("Rebalance status : {0}".format(reached)) self.sleep(20) self._check_retry_rebalance_succeeded() if reinitialize_cbas_util is True: self.cbas_util = cbas_utils(self.master, self.rebalanceServers[1]) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: task.get_result() self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") except Exception as e: self.fail("Some exception occurred : {0}".format(e.message)) finally: body = {"enabled": "false"} rest.set_retry_rebalance_settings(body)
def __init__(self, node): self.node = node self.shell = RemoteMachineShellConnection(self.node)
def test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds(self): self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " %(nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self._async_load_all_buckets(self.master, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param("restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) self.log.info("Rebalance nodes") self.cluster.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.log.info("Restart Couchbase on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.log.info("Verify subsequent rebalance is successful") nodes_to_add = [] # Node is already added to cluster in previous rebalance, adding it again will throw exception self.assertTrue(self.cluster.rebalance(self.servers, nodes_to_add, nodes_to_remove)) if reinitialize_cbas_util is True: self.cbas_util = cbas_utils(self.master, self.rebalanceServers[1]) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: task.get_result() self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def test_items_append(self): self.desired_item_size = self.input.param("desired_item_size", 2048) self.append_size = self.input.param("append_size", 1024) self.fixed_append_size = self.input.param("fixed_append_size", True) self.append_ratio = self.input.param("append_ratio", 0.5) self._load_all_buckets(self.master, self.gen_create, "create", 0, batch_size=1000, pause_secs=5, timeout_secs=100) for bucket in self.buckets: self.value_size = self.input.param("value_size", 512) verify_dict = {} vkeys, dkeys = bucket.kvs[1].key_set() key_count = len(vkeys) app_ratio = self.append_ratio * key_count selected_keys = [] i = 0 for key in vkeys: i += 1 if i >= app_ratio: break selected_keys.append(key) awareness = VBucketAwareMemcached(RestConnection(self.master), bucket.name) if self.kv_verify: for key in selected_keys: value = awareness.memcached(key).get(key)[2] verify_dict[key] = value self.log.info("Bucket: {0}".format(bucket.name)) self.log.info("Appending to have items whose initial size was " + "{0} to equal or cross a size of {1}".format( self.value_size, self.desired_item_size)) self.log.info("Item-appending of {0} items starting ..".format( len(selected_keys) + 1)) index = 3 while self.value_size < self.desired_item_size: str_len = self.append_size if not self.fixed_append_size: str_len = int(math.pow(2, index)) for key in selected_keys: random_string = self.random_str_generator(str_len) awareness.memcached(key).append(key, random_string) if self.kv_verify: verify_dict[key] = verify_dict[key] + random_string self.log.info( "for {0} items size was increased to {1} Bytes".format( len(selected_keys) + 1, self.value_size)) self.value_size += str_len index += 1 self.log.info("The appending of {0} items ended".format( len(selected_keys) + 1)) for bucket in self.buckets: msg = "Bucket:{0}".format(bucket.name) self.log.info("VERIFICATION <" + msg + ">: Phase 0 - Check the gap between " + "mem_used by the bucket and total_allocated_bytes") stats = StatsCommon() mem_used_stats = stats.get_stats(self.servers, bucket, 'memory', 'mem_used') total_allocated_bytes_stats = stats.get_stats( self.servers, bucket, 'memory', 'total_allocated_bytes') total_fragmentation_bytes_stats = stats.get_stats( self.servers, bucket, 'memory', 'total_fragmentation_bytes') for server in self.servers: self.log.info( "In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}" .format(server.ip, bucket.name, (int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server])))) self.log.info("In {0} bucket {1}, mem_used = {2}".format( server.ip, bucket.name, mem_used_stats[server])) self.log.info( "In {0} bucket {1}, the difference between actual memory used by memcached and mem_used is {2} times" .format( server.ip, bucket.name, float( int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server])) / float(mem_used_stats[server]))) self.log.info( "VERIFICATION <" + msg + ">: Phase1 - Check if any of the " + "selected keys have value less than the desired value size") for key in selected_keys: value = awareness.memcached(key).get(key)[2] if len(value) < self.desired_item_size: self.fail( "Failed to append enough to make value size surpass the " + "size {0}, key {1} has size {2}".format( self.desired_item_size, key, len(value))) if self.kv_verify: self.log.info("VERIFICATION <" + msg + ">: Phase2 - Check if the content " + "after the appends match what's expected") for k in verify_dict: if awareness.memcached(k).get(k)[2] != verify_dict[k]: self.fail( "Content at key {0}: not what's expected.".format( k)) self.log.info("VERIFICATION <" + msg + ">: Successful") shell = RemoteMachineShellConnection(self.master) shell.execute_cbstats("", "raw", keyname="allocator", vbid="") shell.disconnect()
def run(self): remote_client = RemoteMachineShellConnection(self.server) now = datetime.now() day = now.day month = now.month year = now.year hour = now.timetuple().tm_hour minute = now.timetuple().tm_min file_name = "%s-%s%s%s-%s%s-couch.tar.gz" % (self.server.ip, month, day, year, hour, minute) print("Collecting data files from %s\n" % self.server.ip) remote_client.extract_remote_info() data_path = self.__get_data_path(os_type=remote_client.info.type.lower()) output, error = remote_client.execute_command("tar -zcvf {0} '{1}' >/dev/null 2>&1". format(file_name, data_path)) print("\n".join(output)) print("\n".join(error)) user_path = "/home/" if self.server.ssh_username == "root": user_path = "/" remote_path = "%s%s" % (user_path, self.server.ssh_username) status = remote_client.file_exists(remote_path, file_name) if not status: raise Exception("%s doesn't exists on server" % file_name) status = remote_client.get_file(remote_path, file_name, "%s/%s" % (self.path, file_name)) if not status: raise Exception("Fail to download zipped logs from %s" % self.server.ip) remote_client.execute_command("rm -f %s" % os.path.join(remote_path, file_name)) remote_client.disconnect()
class MemcachetestRunner(): def __init__(self, server, path="/tmp/", memcached_ip="localhost", memcached_port="11211", num_items=100000, extra_params=""): self.server = server self.shell = RemoteMachineShellConnection(self.server) self.path = path self.memcached_ip = memcached_ip self.memcached_port = memcached_port self.num_items = num_items self.extra_params = extra_params self.log = logger.Logger.get_logger() def start_memcachetest(self): # check that memcachetest already installed exists = self.shell.file_exists('/usr/local/bin/', 'memcachetest') if not exists: # try to get from git and install output, error = self.shell.execute_command_raw( "cd {0}; git clone git://github.com/membase/memcachetest.git". format(self.path)) self.shell.log_command_output(output, error) if "git: command not found" in output[0]: self.fail("Git should be installed on hosts!") output, error = self.shell.execute_command_raw( "cd {0}/memcachetest; ./config/autorun.sh && ./configure && make install" .format(self.path)) self.shell.log_command_output(output, error) else: self.log.info( "memcachetest already set on {0}:/usr/local/bin/memcachetest". format(self.server.ip, self.path)) self.stop_memcachetest() return self.launch_memcachetest() def launch_memcachetest(self): exists = self.shell.file_exists('/usr/local/bin/', 'memcachetest') if not exists: command = "{0}/memcachetest/memcachetest -h {1}:{2} -i {3} {4}".format( self.path, self.memcached_ip, self.memcached_port, self.num_items, self.extra_params) else: command = "/usr/local/bin/memcachetest -h {0}:{1} -i {2} {3}".format( self.memcached_ip, self.memcached_port, self.num_items, self.extra_params) output, error = self.shell.execute_command_raw(command) return self.shell.log_command_output( output, error, track_words=("downstream timeout", )) def stop_memcachetest(self): cmd = "killall memcachetest" output, error = self.shell.execute_command(cmd) self.shell.log_command_output(output, error) self.log.info("memcachetest was stopped on {0}".format(self.server.ip))
def __init__(self, node): self.log = lib.logger.Logger.get_logger() self.shell = RemoteMachineShellConnection(node)