def validate_host_viable(): # this is something ccm does when starting, but preemptively check to avoid # spinning up the cluster if it's not going to work try: common.check_socket_available(('::1', 9042)) except: raise unittest.SkipTest('failed binding ipv6 loopback ::1 on 9042')
def load(self, options): for itf in self.network_interfaces.values(): if itf: common.check_socket_available(itf) cdir = self.get_cassandra_dir() loader_bin = common.join_bin(cdir, "bin", "sstableloader") env = common.make_cassandra_env(cdir, self.get_path()) if not "-d" in options: l = [node.network_interfaces["storage"][0] for node in self.cluster.nodes.values() if node.is_live()] options = ["-d", ",".join(l)] + options # print "Executing with", options os.execve(loader_bin, [common.platform_binary("sstableloader")] + options, env)
def start(self, no_wait=False, verbose=False, wait_for_binary_proto=False, wait_other_notice=True, jvm_args=None, profile_options=None, quiet_start=False, allow_root=False): if jvm_args is None: jvm_args = [] common.assert_jdk_valid_for_cassandra_version(self.cassandra_version()) # check whether all loopback aliases are available before starting any nodes for node in list(self.nodes.values()): if not node.is_running(): for itf in node.network_interfaces.values(): if itf is not None: if not common.check_socket_available(itf, return_on_error=True): addr, port = itf common.error("Inet address {}:{} is not available; a cluster may already be running or you may need to add the loopback alias".format(addr, port)) sys.exit(1) started = [] for node in list(self.nodes.values()): if not node.is_running(): mark = 0 if os.path.exists(node.logfilename()): mark = node.mark_log() p = node.start(update_pid=False, jvm_args=jvm_args, profile_options=profile_options, verbose=verbose, quiet_start=quiet_start, allow_root=allow_root) # Prior to JDK8, starting every node at once could lead to a # nanotime collision where the RNG that generates a node's tokens # gives identical tokens to several nodes. Thus, we stagger # the node starts if common.get_jdk_version() < '1.8': time.sleep(1) started.append((node, p, mark)) if no_wait: time.sleep(2) # waiting 2 seconds to check for early errors and for the pid to be set else: for node, p, mark in started: try: start_message = "Listening for thrift clients..." if self.cassandra_version() < "2.2" else "Starting listening for CQL clients" node.watch_log_for(start_message, timeout=60, process=p, verbose=verbose, from_mark=mark) except RuntimeError: return None self.__update_pids(started) for node, p, _ in started: if not node.is_running(): raise NodeError("Error starting {0}.".format(node.name), p) if not no_wait: if wait_other_notice: for (node, _, mark), (other_node, _, _) in itertools.permutations(started, 2): node.watch_log_for_alive(other_node, from_mark=mark) if wait_for_binary_proto: for node, p, mark in started: node.wait_for_binary_interface(process=p, verbose=verbose, from_mark=mark) return started
def start(self, join_ring=True, no_wait=False, verbose=False, update_pid=True, wait_other_notice=False, replace_token=None, replace_address=None, jvm_args=[], wait_for_binary_proto=False, profile_options=None, use_jna=False): """ Start the node. Options includes: - join_ring: if false, start the node with -Dcassandra.join_ring=False - no_wait: by default, this method returns when the node is started and listening to clients. If no_wait=True, the method returns sooner. - wait_other_notice: if True, this method returns only when all other live node of the cluster have marked this node UP. - replace_token: start the node with the -Dcassandra.replace_token option. - replace_address: start the node with the -Dcassandra.replace_address option. """ if self.is_running(): raise NodeError("%s is already running" % self.name) for itf in list(self.network_interfaces.values()): if itf is not None and replace_address is None: common.check_socket_available(itf) if wait_other_notice: marks = [ (node, node.mark_log()) for node in list(self.cluster.nodes.values()) if node.is_running() ] cdir = self.get_install_dir() launch_bin = common.join_bin(cdir, 'bin', 'dse') # Copy back the dse scripts since profiling may have modified it the previous time shutil.copy(launch_bin, self.get_bin_dir()) launch_bin = common.join_bin(self.get_path(), 'bin', 'dse') # If Windows, change entries in .bat file to split conf from binaries if common.is_win(): self.__clean_bat() if profile_options is not None: config = common.get_config() if not 'yourkit_agent' in config: raise NodeError("Cannot enable profile. You need to set 'yourkit_agent' to the path of your agent in a {0}/config".format(common.get_default_path_display_name())) cmd = '-agentpath:%s' % config['yourkit_agent'] if 'options' in profile_options: cmd = cmd + '=' + profile_options['options'] print_(cmd) # Yes, it's fragile as shit pattern=r'cassandra_parms="-Dlog4j.configuration=log4j-server.properties -Dlog4j.defaultInitOverride=true' common.replace_in_file(launch_bin, pattern, ' ' + pattern + ' ' + cmd + '"') os.chmod(launch_bin, os.stat(launch_bin).st_mode | stat.S_IEXEC) env = common.make_dse_env(self.get_install_dir(), self.get_path()) if common.is_win(): self._clean_win_jmx(); pidfile = os.path.join(self.get_path(), 'cassandra.pid') args = [launch_bin, 'cassandra'] if self.workload is not None: if 'hadoop' in self.workload: args.append('-t') if 'solr' in self.workload: args.append('-s') if 'spark' in self.workload: args.append('-k') if 'cfs' in self.workload: args.append('-c') args += [ '-p', pidfile, '-Dcassandra.join_ring=%s' % str(join_ring) ] if replace_token is not None: args.append('-Dcassandra.replace_token=%s' % str(replace_token)) if replace_address is not None: args.append('-Dcassandra.replace_address=%s' % str(replace_address)) if use_jna is False: args.append('-Dcassandra.boot_without_jna=true') args = args + jvm_args process = None if common.is_win(): # clean up any old dirty_pid files from prior runs if (os.path.isfile(self.get_path() + "/dirty_pid.tmp")): os.remove(self.get_path() + "/dirty_pid.tmp") process = subprocess.Popen(args, cwd=self.get_bin_dir(), env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: process = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Our modified batch file writes a dirty output with more than just the pid - clean it to get in parity # with *nix operation here. if common.is_win(): self.__clean_win_pid() self._update_pid(process) elif update_pid: if no_wait: time.sleep(2) # waiting 2 seconds nevertheless to check for early errors and for the pid to be set else: for line in process.stdout: if verbose: print_(line.rstrip('\n')) self._update_pid(process) if not self.is_running(): raise NodeError("Error starting node %s" % self.name, process) if wait_other_notice: for node, mark in marks: node.watch_log_for_alive(self, from_mark=mark) if wait_for_binary_proto: self.watch_log_for("Starting listening for CQL clients") # we're probably fine at that point but just wait some tiny bit more because # the msg is logged just before starting the binary protocol server time.sleep(0.2) if self.cluster.hasOpscenter(): self._start_agent() return process
def start(self, join_ring=True, no_wait=False, verbose=False, update_pid=True, wait_other_notice=False, replace_token=None, replace_address=None, jvm_args=None, wait_for_binary_proto=False, profile_options=None, use_jna=False, quiet_start=False): """ Start the node. Options includes: - join_ring: if false, start the node with -Dcassandra.join_ring=False - no_wait: by default, this method returns when the node is started and listening to clients. If no_wait=True, the method returns sooner. - wait_other_notice: if True, this method returns only when all other live node of the cluster have marked this node UP. - replace_token: start the node with the -Dcassandra.replace_token option. - replace_address: start the node with the -Dcassandra.replace_address option. """ if jvm_args is None: jvm_args = [] scylla_cassandra_mapping = { '-Dcassandra.replace_address_first_boot': '--replace-address-first-boot' } # Replace args in the form # ['-Dcassandra.foo=bar'] to ['-Dcassandra.foo', 'bar'] translated_args = [] new_jvm_args = [] for jvm_arg in jvm_args: if '=' in jvm_arg: split_option = jvm_arg.split("=") e_msg = ("Option %s not in the form '-Dcassandra.foo=bar'. " "Please check your test" % jvm_arg) assert len(split_option) == 2, e_msg option, value = split_option # If we have information on how to translate the jvm option, # translate it if option in scylla_cassandra_mapping: translated_args += [ scylla_cassandra_mapping[option], value ] # Otherwise, just pass it as is else: new_jvm_args.append(jvm_arg) else: new_jvm_args.append(jvm_arg) jvm_args = new_jvm_args if self.is_running(): raise NodeError("%s is already running" % self.name) for itf in list(self.network_interfaces.values()): if itf is not None and replace_address is None: try: common.check_socket_available(itf) except Exception as msg: print("{}. Looking for offending processes...".format(msg)) for proc in psutil.process_iter(): if any(self.cluster.ipprefix in cmd for cmd in proc.cmdline()): print("name={} pid={} cmdline={}".format( proc.name(), proc.pid, proc.cmdline())) raise msg marks = [] if wait_other_notice: marks = [(node, node.mark_log()) for node in list(self.cluster.nodes.values()) if node.is_running()] self.mark = self.mark_log() launch_bin = common.join_bin(self.get_path(), 'bin', 'scylla') options_file = os.path.join(self.get_path(), 'conf', 'scylla.yaml') os.chmod(launch_bin, os.stat(launch_bin).st_mode | stat.S_IEXEC) # TODO: we do not support forcing specific settings # TODO: workaround for api-address as we do not load it # from config file scylla#59 conf_file = os.path.join(self.get_conf_dir(), common.SCYLLA_CONF) with open(conf_file, 'r') as f: data = yaml.safe_load(f) jvm_args = jvm_args + ['--api-address', data['api_address']] jvm_args = jvm_args + [ '--collectd-hostname', '%s.%s' % (socket.gethostname(), self.name) ] # Let's add jvm_args and the translated args args = [ launch_bin, '--options-file', options_file, '--log-to-stdout', '1' ] + jvm_args + translated_args # Lets search for default overrides in SCYLLA_EXT_OPTS scylla_ext_opts = os.getenv('SCYLLA_EXT_OPTS', "").split() opts_i = 0 orig_args = list(args) while opts_i < len(scylla_ext_opts): if scylla_ext_opts[opts_i].startswith("--scylla-manager="): opts_i += 1 elif scylla_ext_opts[opts_i].startswith('-'): add = False if scylla_ext_opts[opts_i] not in orig_args: add = True args.append(scylla_ext_opts[opts_i]) opts_i += 1 while opts_i < len(scylla_ext_opts) and not scylla_ext_opts[ opts_i].startswith('-'): if add: args.append(scylla_ext_opts[opts_i]) opts_i += 1 if '--developer-mode' not in args: args += ['--developer-mode', 'true'] if '--smp' not in args: # If --smp is not passed from cmdline, use default (--smp 1) args += ['--smp', str(self._smp)] elif self._smp_set_during_test: # If node.set_smp() is called during the test, ignore the --smp # passed from the cmdline. args[args.index('--smp') + 1] = str(self._smp) else: # Update self._smp based on command line parameter. # It may be used below, along with self._mem_mb_per_cpu, for calculating --memory self._smp = int(args[args.index('--smp') + 1]) if '--memory' not in args: # If --memory is not passed from cmdline, use default (512M per cpu) args += [ '--memory', '{}M'.format(self._mem_mb_per_cpu * self._smp) ] elif self._mem_set_during_test: # If node.set_mem_mb_per_cpu() is called during the test, ignore the --memory # passed from the cmdline. args[args.index('--memory') + 1] = '{}M'.format( self._mem_mb_per_cpu * self._smp) if '--default-log-level' not in args: args += ['--default-log-level', self.__global_log_level] # TODO add support for classes_log_level if '--collectd' not in args: args += ['--collectd', '0'] if '--cpuset' not in args: args += ['--overprovisioned'] if '--prometheus-address' not in args: args += ['--prometheus-address', data['api_address']] if replace_address: args += ['--replace-address', replace_address] args += ['--unsafe-bypass-fsync', '1'] scylla_process = self._start_scylla(args, marks, update_pid, wait_other_notice, wait_for_binary_proto) self._start_jmx(data) if not self._wait_java_up(data): e_msg = ( "Error starting node %s: unable to connect to scylla-jmx" % self.name) raise NodeError(e_msg, scylla_process) self.is_running() return scylla_process
def start(self, join_ring=True, no_wait=False, verbose=False, update_pid=True, wait_other_notice=False, replace_token=None, replace_address=None, jvm_args=[], wait_for_binary_proto=False, profile_options=None, use_jna=False): """ Start the node. Options includes: - join_ring: if false, start the node with -Dcassandra.join_ring=False - no_wait: by default, this method returns when the node is started and listening to clients. If no_wait=True, the method returns sooner. - wait_other_notice: if True, this method returns only when all other live node of the cluster have marked this node UP. - replace_token: start the node with the -Dcassandra.replace_token option. - replace_address: start the node with the -Dcassandra.replace_address option. """ if self.is_running(): raise NodeError("%s is already running" % self.name) for itf in list(self.network_interfaces.values()): if itf is not None and replace_address is None: common.check_socket_available(itf) if wait_other_notice: marks = [ (node, node.mark_log()) for node in list(self.cluster.nodes.values()) if node.is_running() ] cdir = self.get_cassandra_dir() cass_bin = common.join_bin(cdir, 'bin', 'cassandra') # Copy back the cassandra scripts since profiling may have modified it the previous time shutil.copy(cass_bin, self.get_bin_dir()) cass_bin = common.join_bin(self.get_path(), 'bin', 'cassandra') # If Windows, change entries in .bat file to split conf from binaries if common.is_win(): self.__clean_bat() if profile_options is not None: config = common.get_config() if not 'yourkit_agent' in config: raise NodeError("Cannot enable profile. You need to set 'yourkit_agent' to the path of your agent in a ~/.ccm/config") cmd = '-agentpath:%s' % config['yourkit_agent'] if 'options' in profile_options: cmd = cmd + '=' + profile_options['options'] print_(cmd) # Yes, it's fragile as shit pattern=r'cassandra_parms="-Dlog4j.configuration=log4j-server.properties -Dlog4j.defaultInitOverride=true' common.replace_in_file(cass_bin, pattern, ' ' + pattern + ' ' + cmd + '"') os.chmod(cass_bin, os.stat(cass_bin).st_mode | stat.S_IEXEC) env = common.make_cassandra_env(cdir, self.get_path()) if common.is_win(): self._clean_win_jmx(); pidfile = os.path.join(self.get_path(), 'cassandra.pid') args = [ cass_bin, '-p', pidfile, '-Dcassandra.join_ring=%s' % str(join_ring) ] if replace_token is not None: args.append('-Dcassandra.replace_token=%s' % str(replace_token)) if replace_address is not None: args.append('-Dcassandra.replace_address=%s' % str(replace_address)) if use_jna is False: args.append('-Dcassandra.boot_without_jna=true') args = args + jvm_args process = None if common.is_win(): # clean up any old dirty_pid files from prior runs if (os.path.isfile(self.get_path() + "/dirty_pid.tmp")): os.remove(self.get_path() + "/dirty_pid.tmp") process = subprocess.Popen(args, cwd=self.get_bin_dir(), env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: process = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Our modified batch file writes a dirty output with more than just the pid - clean it to get in parity # with *nix operation here. if common.is_win(): self.__clean_win_pid() self._update_pid(process) elif update_pid: if no_wait: time.sleep(2) # waiting 2 seconds nevertheless to check for early errors and for the pid to be set else: for line in process.stdout: if verbose: print_(line.rstrip('\n')) self._update_pid(process) if not self.is_running(): raise NodeError("Error starting node %s" % self.name, process) if wait_other_notice: for node, mark in marks: node.watch_log_for_alive(self, from_mark=mark) if wait_for_binary_proto: self.watch_log_for("Starting listening for CQL clients") # we're probably fine at that point but just wait some tiny bit more because # the msg is logged just before starting the binary protocol server time.sleep(0.2) return process
def start(self, join_ring=True, no_wait=False, verbose=False, update_pid=True, wait_other_notice=False, replace_token=None, replace_address=None, jvm_args=None, wait_for_binary_proto=False, profile_options=None, use_jna=False, quiet_start=False, allow_root=False): """ Start the node. Options includes: - join_ring: if false, start the node with -Dcassandra.join_ring=False - no_wait: by default, this method returns when the node is started and listening to clients. If no_wait=True, the method returns sooner. - wait_other_notice: if True, this method returns only when all other live node of the cluster have marked this node UP. - replace_token: start the node with the -Dcassandra.replace_token option. - replace_address: start the node with the -Dcassandra.replace_address option. """ if jvm_args is None: jvm_args = [] if self.is_running(): raise NodeError("%s is already running" % self.name) for itf in list(self.network_interfaces.values()): if itf is not None and replace_address is None: common.check_socket_available(itf) if wait_other_notice: marks = [(node, node.mark_log()) for node in list(self.cluster.nodes.values()) if node.is_running()] self.mark = self.mark_log() cdir = self.get_install_dir() launch_bin = common.join_bin(cdir, 'bin', 'dse') # Copy back the dse scripts since profiling may have modified it the previous time shutil.copy(launch_bin, self.get_bin_dir()) launch_bin = common.join_bin(self.get_path(), 'bin', 'dse') # If Windows, change entries in .bat file to split conf from binaries if common.is_win(): self.__clean_bat() if profile_options is not None: config = common.get_config() if 'yourkit_agent' not in config: raise NodeError("Cannot enable profile. You need to set 'yourkit_agent' to the path of your agent in a {0}/config".format(common.get_default_path_display_name())) cmd = '-agentpath:%s' % config['yourkit_agent'] if 'options' in profile_options: cmd = cmd + '=' + profile_options['options'] print_(cmd) # Yes, it's fragile as shit pattern = r'cassandra_parms="-Dlog4j.configuration=log4j-server.properties -Dlog4j.defaultInitOverride=true' common.replace_in_file(launch_bin, pattern, ' ' + pattern + ' ' + cmd + '"') os.chmod(launch_bin, os.stat(launch_bin).st_mode | stat.S_IEXEC) env = self.get_env() if common.is_win(): self._clean_win_jmx() pidfile = os.path.join(self.get_path(), 'cassandra.pid') args = [launch_bin, 'cassandra'] for workload in self.workloads: if 'hadoop' in workload: args.append('-t') if 'solr' in workload: args.append('-s') if 'spark' in workload: args.append('-k') if 'cfs' in workload: args.append('-c') if 'graph' in workload: args.append('-g') args += ['-p', pidfile, '-Dcassandra.join_ring=%s' % str(join_ring)] args += ['-Dcassandra.logdir=%s' % os.path.join(self.get_path(), 'logs')] if replace_token is not None: args.append('-Dcassandra.replace_token=%s' % str(replace_token)) if replace_address is not None: args.append('-Dcassandra.replace_address=%s' % str(replace_address)) if use_jna is False: args.append('-Dcassandra.boot_without_jna=true') if allow_root: args.append('-R') args = args + jvm_args self._delete_old_pid() process = None FNULL = open(os.devnull, 'w') if common.is_win(): # clean up any old dirty_pid files from prior runs if (os.path.isfile(self.get_path() + "/dirty_pid.tmp")): os.remove(self.get_path() + "/dirty_pid.tmp") process = subprocess.Popen(args, cwd=self.get_bin_dir(), env=env, stdout=FNULL, stderr=subprocess.PIPE) else: process = subprocess.Popen(args, env=env, stdout=FNULL, stderr=subprocess.PIPE) # Our modified batch file writes a dirty output with more than just the pid - clean it to get in parity # with *nix operation here. if common.is_win(): self.__clean_win_pid() self._update_pid(process) elif update_pid: self._update_pid(process) if not self.is_running(): raise NodeError("Error starting node %s" % self.name, process) if wait_other_notice: for node, mark in marks: node.watch_log_for_alive(self, from_mark=mark) if wait_for_binary_proto: self.wait_for_binary_interface(from_mark=self.mark) if self.cluster.hasOpscenter(): self._start_agent() return process