def write_memcached_cluster_settings(filename, cluster_view): """Writes out the memcached cluster_settings file""" valid_servers_states = [constants.LEAVING_ACKNOWLEDGED_CHANGE, constants.LEAVING_CONFIG_CHANGED, constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.NORMAL] valid_new_servers_states = [constants.NORMAL, constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.JOINING_ACKNOWLEDGED_CHANGE, constants.JOINING_CONFIG_CHANGED] servers_ips = sorted([combine_ip_port(k, 11211) for k, v in cluster_view.iteritems() if v in valid_servers_states]) new_servers_ips = sorted([combine_ip_port(k, 11211) for k, v in cluster_view.iteritems() if v in valid_new_servers_states]) new_file_contents = WARNING_HEADER + "\n" if new_servers_ips == servers_ips: new_file_contents += "servers={}\n".format(",".join(servers_ips)) else: new_file_contents += "servers={}\nnew_servers={}\n".format( ",".join(servers_ips), ",".join(new_servers_ips)) safely_write(filename, new_file_contents)
def on_config_changed(self, value, alarm): _log.info("Updating the shared iFC sets configuration file") if self.status(value) != FileStatus.UP_TO_DATE: safely_write(_file, value) run_command(["/usr/share/clearwater/bin/reload_shared_ifcs_xml"]) alarm.update_file(_file)
def write_memcached_cluster_settings(filename, cluster_view): """Writes out the memcached cluster_settings file""" valid_servers_states = [ constants.LEAVING_ACKNOWLEDGED_CHANGE, constants.LEAVING_CONFIG_CHANGED, constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.NORMAL ] valid_new_servers_states = [ constants.NORMAL, constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.JOINING_ACKNOWLEDGED_CHANGE, constants.JOINING_CONFIG_CHANGED ] servers_ips = sorted([ combine_ip_port(k, 11211) for k, v in cluster_view.iteritems() if v in valid_servers_states ]) new_servers_ips = sorted([ combine_ip_port(k, 11211) for k, v in cluster_view.iteritems() if v in valid_new_servers_states ]) new_file_contents = WARNING_HEADER + "\n" if new_servers_ips == servers_ips: new_file_contents += "servers={}\n".format(",".join(servers_ips)) else: new_file_contents += "servers={}\nnew_servers={}\n".format( ",".join(servers_ips), ",".join(new_servers_ips)) safely_write(filename, new_file_contents)
def write_chronos_cluster_settings(filename, cluster_view, current_server): current_or_joining = [constants.JOINING_ACKNOWLEDGED_CHANGE, constants.JOINING_CONFIG_CHANGED, constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.NORMAL] leaving = [constants.LEAVING_ACKNOWLEDGED_CHANGE, constants.LEAVING_CONFIG_CHANGED] staying_servers = ([k for k, v in cluster_view.iteritems() if v in current_or_joining]) leaving_servers = ([k for k, v in cluster_view.iteritems() if v in leaving]) contents = dedent('''\ {} [cluster] localhost = {} ''').format(WARNING_HEADER, current_server) for node in staying_servers: contents += 'node = {}\n'.format(node) for node in leaving_servers: contents += 'leaving = {}\n'.format(node) safely_write(filename, contents)
def on_config_changed(self, value, alarm): _log.info("Updating {}".format(self._file)) safely_write(self._file, value) run_command("service sprout reload") alarm.update_file(self._file)
def on_config_changed(self, value, alarm): _log.info("Updating shared configuration file") if self.status(value) != FileStatus.UP_TO_DATE: safely_write(_file, value) if value != _default_value: run_command( "/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue add apply_config" )
def on_config_changed(self, value, alarm): _log.info("Updating Chronos shared configuration file") if self.status(value) != FileStatus.UP_TO_DATE: safely_write(_file, value) run_command([ "/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue", "add", "apply_chronos_shared_config" ]) alarm.update_file(_file)
def on_config_changed(self, value, alarm): _log.info("Updating SAS configuration file") if self.status(value) != FileStatus.UP_TO_DATE: safely_write(_file, value) run_command(["/usr/share/clearwater/infrastructure/scripts/sas_socket_factory"]) apply_config_key = subprocess.check_output(["/usr/share/clearwater/clearwater-queue-manager/scripts/get_apply_config_key"]) run_command(["/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue", "add", apply_config_key]) alarm.update_file(_file)
def write_chronos_cluster_settings(filename, cluster_view, current_server, instance_id, deployment_id): joining = [ constants.JOINING_ACKNOWLEDGED_CHANGE, constants.JOINING_CONFIG_CHANGED ] staying = [ constants.NORMAL_ACKNOWLEDGED_CHANGE, constants.NORMAL_CONFIG_CHANGED, constants.NORMAL ] leaving = [ constants.LEAVING_ACKNOWLEDGED_CHANGE, constants.LEAVING_CONFIG_CHANGED ] joining_servers = ([ k for k, v in cluster_view.iteritems() if v in joining ]) staying_servers = ([ k for k, v in cluster_view.iteritems() if v in staying ]) leaving_servers = ([ k for k, v in cluster_view.iteritems() if v in leaving ]) contents = dedent('''\ {} [identity] instance_id = {} deployment_id = {} [cluster] localhost = {} ''').format(WARNING_HEADER, instance_id, deployment_id, current_server) for node in joining_servers: contents += 'joining = {}\n'.format(node) for node in staying_servers: contents += 'node = {}\n'.format(node) for node in leaving_servers: contents += 'leaving = {}\n'.format(node) safely_write(filename, contents)
def write_new_cassandra_config(self, seeds_list, destructive_restart=False): seeds_list_str = ','.join(map(str, seeds_list)) _log.info("Cassandra seeds list is {}".format(seeds_list_str)) # Read cassandra.yaml template. with open(self.CASSANDRA_YAML_TEMPLATE) as f: doc = yaml.safe_load(f) # Fill in the correct listen_address and seeds values in the yaml # document. doc["listen_address"] = self._ip doc["broadcast_rpc_address"] = self._ip doc["seed_provider"][0]["parameters"][0]["seeds"] = seeds_list_str doc["endpoint_snitch"] = "GossipingPropertyFileSnitch" # We use Thrift timeouts of 250ms, and we need the Cassandra timeouts to # be able to time out before that, including inter-node latency, so we # set timeouts of 190ms for reads, range-reads and writes doc["read_request_timeout_in_ms"] = 190 doc["range_request_timeout_in_ms"] = 190 doc["write_request_timeout_in_ms"] = 190 # Commit logs. We want to cap these, as the default of 8GB is sufficient # to exhaust the root filesystem on a low-spec (20GB) node, but we should # allow higher spec machines to use more diskspace to avoid thrashing. # # Therefore, set the upper threshold for commit logs to be 1GB per core # (up to the maximum for a 64bit machine - namely 8192). # # We ignore security analysis here, as although we are shelling out, # we are doing so with a fixed command, so it's safe to do so. For # safety, we always force the result to be an integer. get_core_count = "grep processor /proc/cpuinfo | wc -l" core_count = subprocess.check_output(get_core_count, # nosec shell=True, stderr=subprocess.STDOUT) try: core_count_int = int(core_count) except ValueError: # pragma: no cover core_count_int = 2 doc["commitlog_total_space_in_mb"] = min(core_count_int * 1024, 8192) contents = WARNING_HEADER + "\n" + yaml.dump(doc) topology = WARNING_HEADER + "\n" + "dc={}\nrack=RAC1\n".format(self._local_site) # Restart Cassandra and make sure it picks up the new list of seeds. _log.info("Restarting Cassandra") # Remove the cassandra.yaml file first - Cassandra won't start up while # it's missing, so this keeps it stopped while we're clearing out its # database if os.path.exists(self.CASSANDRA_YAML_FILE): os.remove(self.CASSANDRA_YAML_FILE) # Stop Cassandra directly rather than going through any 'service' # commands - this should mean that supervisord keeps restarting # Cassandra when running in Docker. # # Note that we can't use the init.d script here, because cassandra.yaml # doesn't exist so it immediately exits. # # We do not want to kill cassandra if it is in the process of bootstrapping if not os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG): run_command(["start-stop-daemon", "-K", "-p", "/var/run/cassandra/cassandra.pid", "-R", "TERM/30/KILL/5"]) _log.info("Stopped Cassandra while changing config files") # We only want to perform these steps the first time we join a cluster # If we are bootstrapping, or already bootstrapped, doing this will leave # us unable to rejoin the cluster properly if ((destructive_restart) and not ((os.path.exists(self.BOOTSTRAPPED_FLAG) or (os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG))))): _log.warn("Deleting /var/lib/cassandra - this is normal on initial clustering") run_command(["rm", "-rf", "/var/lib/cassandra/"]) run_command(["mkdir", "-m", "755", "/var/lib/cassandra"]) run_command(["chown", "-R", "cassandra", "/var/lib/cassandra"]) # Set a state flag if we have performed a destructive restart, and not yet # completed bootstrapping. This will stop us re-deleting the data directory # if the cluster_manager dies, ensuring we cluster correctly. open(self.BOOTSTRAP_IN_PROGRESS_FLAG, 'a').close() # Write back to cassandra.yaml - this allows Cassandra to start again. safely_write(self.CASSANDRA_TOPOLOGY_FILE, topology) safely_write(self.CASSANDRA_YAML_FILE, contents) self.wait_for_cassandra() # If we were previously bootstrapping, alter the state flag to indicate # the process is complete. We will remove this when we leave the cluster if os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG): os.rename(self.BOOTSTRAP_IN_PROGRESS_FLAG, self.BOOTSTRAPPED_FLAG) if os.path.exists("/etc/clearwater/force_cassandra_yaml_refresh"): os.remove("/etc/clearwater/force_cassandra_yaml_refresh")
def write_new_cassandra_config(self, seeds_list, destructive_restart=False): seeds_list_str = ','.join(map(str, seeds_list)) _log.info("Cassandra seeds list is {}".format(seeds_list_str)) # Read cassandra.yaml template. with open(self.CASSANDRA_YAML_TEMPLATE) as f: doc = yaml.load(f) # Fill in the correct listen_address and seeds values in the yaml # document. doc["listen_address"] = self._ip doc["broadcast_rpc_address"] = self._ip doc["seed_provider"][0]["parameters"][0]["seeds"] = seeds_list_str doc["endpoint_snitch"] = "GossipingPropertyFileSnitch" # Work out the timeout from the target_latency_us value (assuming # 100000 if it isn't set) get_latency_cmd = "target_latency_us=100000; . /etc/clearwater/config; echo -n $target_latency_us" latency = subprocess.check_output(get_latency_cmd, shell=True, stderr=subprocess.STDOUT) try: # We want the timeout value to be 4/5ths the maximum acceptable time # of a HTTP request (which is 5 * target latency) timeout = (int(latency) / 1000) * 4 except ValueError: # pragma: no cover timeout = 400 doc["read_request_timeout_in_ms"] = timeout contents = WARNING_HEADER + "\n" + yaml.dump(doc) topology = WARNING_HEADER + "\n" + "dc={}\nrack=RAC1\n".format( self._local_site) # Restart Cassandra and make sure it picks up the new list of seeds. _log.info("Restarting Cassandra") # Remove the cassandra.yaml file first - Cassandra won't start up while # it's missing, so this keeps it stopped while we're clearing out its # database if os.path.exists(self.CASSANDRA_YAML_FILE): os.remove(self.CASSANDRA_YAML_FILE) # Stop Cassandra directly rather than going through any 'service' # commands - this should mean that supervisord keeps restarting # Cassandra when running in Docker. # # Note that we can't use the init.d script here, because cassandra.yaml # doesn't exist so it immediately exits. # # We do not want to kill cassandra if it is in the process of bootstrapping if not os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG): run_command( "start-stop-daemon -K -p /var/run/cassandra/cassandra.pid -R TERM/30/KILL/5" ) _log.info("Stopped Cassandra while changing config files") # We only want to perform these steps the first time we join a cluster # If we are bootstrapping, or already bootstrapped, doing this will leave # us unable to rejoin the cluster properly if ((destructive_restart) and not ((os.path.exists(self.BOOTSTRAPPED_FLAG) or (os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG))))): _log.warn( "Deleting /var/lib/cassandra - this is normal on initial clustering" ) run_command("rm -rf /var/lib/cassandra/") run_command("mkdir -m 755 /var/lib/cassandra") run_command("chown -R cassandra /var/lib/cassandra") # Set a state flag if we have performed a destructive restart, and not yet # completed bootstrapping. This will stop us re-deleting the data directory # if the cluster_manager dies, ensuring we cluster correctly. open(self.BOOTSTRAP_IN_PROGRESS_FLAG, 'a').close() # Write back to cassandra.yaml - this allows Cassandra to start again. safely_write(self.CASSANDRA_TOPOLOGY_FILE, topology) safely_write(self.CASSANDRA_YAML_FILE, contents) self.wait_for_cassandra() # If we were previously bootstrapping, alter the state flag to indicate # the process is complete. We will remove this when we leave the cluster if os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG): os.rename(self.BOOTSTRAP_IN_PROGRESS_FLAG, self.BOOTSTRAPPED_FLAG) if os.path.exists("/etc/clearwater/force_cassandra_yaml_refresh"): os.remove("/etc/clearwater/force_cassandra_yaml_refresh")
def on_config_changed(self, value, alarm): _log.info("Updating dns configuration file") if self.status(value) != FileStatus.UP_TO_DATE: safely_write(_file, value) run_command("/usr/share/clearwater/bin/reload_dns_config")