def execute_sgload(lgs_host, sgload_arg_list, sg_host): # Update the arg list the the appropriate SG sgload_arg_list_modified = add_sync_gateway_url(sgload_arg_list, sg_host) # convert from list -> string # eg, ["--createreaders", "--numreaders", "100"] -> "--createreaders --numreaders 100" sgload_args_str = " ".join(sgload_arg_list_modified) rex = RemoteExecutor(lgs_host) # Build sgload command to pass to ssh client # eg, "sgload --createreaders --numreaders 100" log_info("sgload {}".format(sgload_args_str)) command = "sgload {}".format(sgload_args_str) rex.must_execute(command) log_info("execute_sgload done.")
class CouchbaseServer: """ Installs Couchbase Server on machine host""" def __init__(self, url): self.url = url # Strip http prefix and port to store host host = self.url.replace("http://", "") host = host.replace(":8091", "") self.host = host self.remote_executor = RemoteExecutor(self.host) self._session = Session() self._session.auth = ("Administrator", "password") def delete_buckets(self): count = 0 while count < 3: resp = self._session.get("{}/pools/default/buckets".format(self.url)) log_r(resp) resp.raise_for_status() obj = json.loads(resp.text) existing_bucket_names = [] for entry in obj: existing_bucket_names.append(entry["name"]) log_info("Existing buckets: {}".format(existing_bucket_names)) log_info("Deleting buckets: {}".format(existing_bucket_names)) # HACK around Couchbase Server issue where issuing a bucket delete via REST occasionally returns 500 error delete_num = 0 # Delete existing buckets for bucket_name in existing_bucket_names: resp = self._session.delete("{0}/pools/default/buckets/{1}".format(self.url, bucket_name)) log_r(resp) if resp.status_code == 200: delete_num += 1 if delete_num == len(existing_bucket_names): break else: # A 500 error may have occured, query for buckets and try to delete them again time.sleep(5) count += 1 # Check that max retries did not occur if count == 3: raise CBServerError("Max retries for bucket creation hit. Could not delete buckets!") def wait_for_ready_state(self): """ Verify all server node is in are in a "healthy" state to avoid sync_gateway startup failures Work around for this - https://github.com/couchbase/sync_gateway/issues/1745 """ start = time.time() while True: if time.time() - start > keywords.constants.CLIENT_REQUEST_TIMEOUT: raise Exception("Verify Docs Present: TIMEOUT") # Verfy the server is in a "healthy", not "warmup" state try: resp = self._session.get("{}/pools/nodes".format(self.url)) log_r(resp) except ConnectionError: # If bringing a server online, there may be some connnection issues. Continue and try again. time.sleep(1) continue resp_obj = resp.json() all_nodes_healthy = True for node in resp_obj["nodes"]: if node["status"] != "healthy": all_nodes_healthy = False log_info("Node is still not healthy. Status: {} Retrying ...".format(node["status"])) time.sleep(1) if not all_nodes_healthy: continue log_info("All nodes are healthy") log_debug(resp_obj) # All nodes are heathy if it made it to here break def get_available_ram(self): """ Call the Couchbase REST API to get the total memory available on the machine """ resp = self._session.get("{}/pools/default".format(self.url)) resp.raise_for_status() resp_json = resp.json() # Workaround for https://github.com/couchbaselabs/mobile-testkit/issues/709 # where some node report mem_total = 0. Loop over all the nodes and find highest val mem_total_highest = 0 for node in resp_json["nodes"]: mem_total = node["systemStats"]["mem_total"] if mem_total > mem_total_highest: mem_total_highest = mem_total return mem_total_highest def create_buckets(self, bucket_names): """ # Figure out what total ram available is # Divide by number of buckets """ if len(bucket_names) == 0: return log_info("Creating buckets: {}".format(bucket_names)) ram_multiplier = 0.80 total_avail_ram_bytes = self.get_available_ram() total_avail_ram_mb = int(total_avail_ram_bytes / (1024 * 1024)) n1ql_indexer_ram_mb = 512 effective_avail_ram_mb = int(total_avail_ram_mb * ram_multiplier) - n1ql_indexer_ram_mb per_bucket_ram_mb = int(effective_avail_ram_mb / len(bucket_names)) log_info("total_avail_ram_mb: {} effective_avail_ram_mb: {} effective_avail_ram_mb: {}".format(total_avail_ram_mb, effective_avail_ram_mb, effective_avail_ram_mb)) for bucket_name in bucket_names: log_info("Create bucket {} with per_bucket_ram_mb {}".format(bucket_name, per_bucket_ram_mb)) self.create_bucket(bucket_name, per_bucket_ram_mb) def create_bucket(self, name, ramQuotaMB=1024): """ 1. Create CBS bucket via REST 2. Create client connection and poll until bucket is available Catch all connection exception and break when KeyNotFound error is thrown 3. Verify all server nodes are in a 'healthy' state before proceeding Followed the docs below that suggested this approach. http://docs.couchbase.com/admin/admin/REST/rest-bucket-create.html """ log_info("Creating bucket {} with RAM {}".format(name, ramQuotaMB)) data = { "name": name, "ramQuotaMB": str(ramQuotaMB), "authType": "sasl", "proxyPort": "11211", "bucketType": "couchbase", "flushEnabled": "1" } resp = self._session.post("{}/pools/default/buckets".format(self.url), data=data) log_r(resp) resp.raise_for_status() # Create client an retry until KeyNotFound error is thrown start = time.time() while True: if time.time() - start > keywords.constants.CLIENT_REQUEST_TIMEOUT: raise Exception("TIMEOUT while trying to create server buckets.") try: bucket = Bucket("couchbase://{}/{}".format(self.host, name)) bucket.get('foo') except ProtocolError: log_info("Client Connection failed: Retrying ...") time.sleep(1) continue except TemporaryFailError: log_info("Failure from server: Retrying ...") time.sleep(1) continue except NotFoundError: log_info("Key not found error: Bucket is ready!") break self.wait_for_ready_state() return name def delete_couchbase_server_cached_rev_bodies(self, bucket): """ Deletes docs that follow the below format _sync:rev:att_doc:34:1-e7fa9a5e6bb25f7a40f36297247ca93e """ b = Bucket("couchbase://{}/{}".format(self.host, bucket)) cached_rev_doc_ids = [] b.n1ql_query("CREATE PRIMARY INDEX ON `{}`".format(bucket)).execute() for row in b.n1ql_query("SELECT meta(`{}`) FROM `{}`".format(bucket, bucket)): if row["$1"]["id"].startswith("_sync:rev"): cached_rev_doc_ids.append(row["$1"]["id"]) log_info("Found temp rev docs: {}".format(cached_rev_doc_ids)) for doc_id in cached_rev_doc_ids: log_debug("Removing: {}".format(doc_id)) b.remove(doc_id) def get_server_docs_with_prefix(self, bucket, prefix): """ Returns server doc ids matching a prefix (ex. '_sync:rev:') """ b = Bucket("couchbase://{}/{}".format(self.host, bucket)) found_ids = [] b.n1ql_query("CREATE PRIMARY INDEX ON `{}`".format(bucket)).execute() for row in b.n1ql_query("SELECT meta(`{}`) FROM `{}`".format(bucket, bucket)): log_info(row) if row["$1"]["id"].startswith(prefix): found_ids.append(row["$1"]["id"]) return found_ids def _get_tasks(self): """ Returns the current tasks from the server """ resp = self._session.get("{}/pools/default/tasks".format(self.url)) log_r(resp) resp.raise_for_status() resp_obj = resp.json() return resp_obj def _wait_for_rebalance_complete(self): """ Polls couchbase server tasks endpoint for any running rebalances. Exits when no rebalances are in running state /pools/default/tasks format: [ { "type": "rebalance", "status": "running", ... } ] """ # Check that rebalance is in the tasks before polling for its completion start = time.time() found_rebalance = False while not found_rebalance: if time.time() - start > keywords.constants.CLIENT_REQUEST_TIMEOUT: raise TimeoutError("Did not find rebalance task!") tasks = self._get_tasks() for task in tasks: if task["type"] == "rebalance": log_info("Rebalance found in tasks!") found_rebalance = True if not found_rebalance: log_info("Did not find rebalance task. Retrying.") time.sleep(1) start = time.time() while True: if time.time() - start > keywords.constants.REBALANCE_TIMEOUT_SECS: raise Exception("wait_for_rebalance_complete: TIMEOUT") tasks = self._get_tasks() done_rebalacing = True for task in tasks: # loop through each task and see if any rebalance tasks are running task_type = task["type"] task_status = task["status"] log_info("{} is {}".format(task_type, task_status)) if task_type == "rebalance" and task_status == "running": done_rebalacing = False if done_rebalacing: break time.sleep(1) def add_node(self, server_to_add): """ Add the server_to_add to a Couchbase Server cluster """ if not isinstance(server_to_add, CouchbaseServer): raise TypeError("'server_to_add' must be a 'CouchbaseServer'") log_info("Adding server node {} to cluster ...".format(server_to_add)) data = "hostname={}&user=Administrator&password=password&services=kv".format( server_to_add.host ) # HACK: Retry below addresses the following problem: # 1. Rebalance a node out # 2. Try to to immediately add node back into the cluster # 3. Fails because node is in state where it can't be add in yet # To work around this: # 1. Retry / wait until add node POST command is successful start = time.time() while True: if time.time() - start > keywords.constants.CLIENT_REQUEST_TIMEOUT: raise Exception("wait_for_rebalance_complete: TIMEOUT") # Override session headers for this one off request resp = self._session.post( "{}/controller/addNode".format(self.url), headers={"Content-Type": "application/x-www-form-urlencoded"}, data=data ) log_r(resp) # If status of the POST is not 200, retry the request after a second if resp.status_code == 200: log_info("{} added to cluster successfully".format(server_to_add)) break else: log_info("{}: Could not add {} to cluster. Retrying ...".format(resp.status_code, server_to_add)) time.sleep(1) def rebalance_out(self, cluster_servers, server_to_remove): """ Issues a call to the admin_serve to remove a server from a pool. Then wait for rebalance to complete. """ if not isinstance(server_to_remove, CouchbaseServer): raise TypeError("'server_to_remove' must be a 'CouchbaseServer'") # Add all servers except server_to_add to known nodes known_nodes = "knownNodes=" for server in cluster_servers: server = server.replace("http://", "") server = server.replace(":8091", "") known_nodes += "ns_1@{},".format(server) # Add server_to_add to known nodes ejected_node = "ejectedNodes=ns_1@{}".format(server_to_remove.host) data = "{}&{}".format(ejected_node, known_nodes) log_info("Starting rebalance out: {} with nodes {}".format(server_to_remove.host, data)) # Override session headers for this one off request resp = self._session.post( "{}/controller/rebalance".format(self.url), headers={"Content-Type": "application/x-www-form-urlencoded"}, data=data ) log_r(resp) resp.raise_for_status() self._wait_for_rebalance_complete() return True def rebalance_in(self, cluster_servers, server_to_add): """ Adds a server from a pool and waits for rebalance to complete. cluster_servers should be a list of endpoints running Couchbase server. ex. ["http:192.168.33.10:8091", "http:192.168.33.11:8091", ...] """ if not isinstance(server_to_add, CouchbaseServer): raise TypeError("'server_to_add' must be a 'CouchbaseServer'") # Add all servers except server_to_add to known nodes known_nodes = "knownNodes=" for server in cluster_servers: server = server.replace("http://", "") server = server.replace(":8091", "") if server_to_add.host != server: known_nodes += "ns_1@{},".format(server) # Add server_to_add to known nodes data = "{}ns_1@{}".format(known_nodes, server_to_add.host) # Rebalance nodes log_info("Starting rebalance in for {}".format(server_to_add)) log_info("Known nodes: {}".format(data)) # Override session headers for this one off request resp = self._session.post( "{}/controller/rebalance".format(self.url), headers={"Content-Type": "application/x-www-form-urlencoded"}, data=data ) log_r(resp) resp.raise_for_status() self._wait_for_rebalance_complete() return True def recover(self, server_to_recover): if not isinstance(server_to_recover, CouchbaseServer): raise TypeError("'server_to_add' must be a 'CouchbaseServer'") log_info("Setting recover mode to 'delta' for server {}".format(server_to_recover.host)) data = "otpNode=ns_1@{}&recoveryType=delta".format(server_to_recover.host) # Override session headers for this one off request resp = self._session.post( "{}/controller/setRecoveryType".format(self.url), headers={"Content-Type": "application/x-www-form-urlencoded"}, data=data ) log_r(resp) resp.raise_for_status() # TODO reset Quota def start(self): """Starts a running Couchbase Server via 'service couchbase-server start'""" command = "sudo service couchbase-server start" self.remote_executor.must_execute(command) self.wait_for_ready_state() def _verify_stopped(self): """Polls until the server url is unreachable""" start = time.time() while True: if time.time() - start > keywords.constants.CLIENT_REQUEST_TIMEOUT: raise TimeoutError("Waiting for server to be unreachable but it never was!") try: resp = self._session.get("{}/pools".format(self.url)) log_r(resp) resp.raise_for_status() except ConnectionError: # This is expected and used to determine if a server node has gone offline break except HTTPError as e: # 500 errors may happen as a result of the node going down log_error(e) continue time.sleep(1) def stop(self): """Stops a running Couchbase Server via 'service couchbase-server stop'""" command = "sudo service couchbase-server stop" self.remote_executor.must_execute(command) self._verify_stopped()