def stress_seq_range(self, row_count, command_part1, command_part2): load_ip_count = len(self.load_ips) row_count_per_ip = row_count // load_ip_count range_points = [1] for i in range(load_ip_count): range_points.append(range_points[-1] + row_count_per_ip) range_points[-1] = row_count population_commands = [] # FIXME - cleanup for i in range(len(range_points) - 1): population_commands.append( f' n={range_points[i + 1] - range_points[i] + 1} -pop seq={range_points[i]}..{range_points[i + 1]} ' ) log(population_commands) log_important("Cassandra-Stress: started") run_parallel( self.__stress, [(ip, 10 if i > 0 else 0, command_part1 + pop_command + command_part2) for i, (ip, pop_command ) in enumerate(zip(self.load_ips, population_commands))]) log_important("Cassandra-Stress: done")
def __stress(self, ip, cmd): full_cmd = f'go/bin/scylla-bench {cmd}' dt = datetime.now().strftime("%d-%m-%Y_%H-%M-%S") full_cmd = full_cmd + f" 2>&1 | tee -a scylla-bench-{dt}.log" log(full_cmd) self.__new_ssh(ip).exec(full_cmd)
def run(self, command): log_important( f'Disk Explorer run: started [{datetime.now().strftime("%H:%M:%S")}]' ) log(f"python3 diskplorer.py {command}") run_parallel(self.__run, [(ip, command) for ip in self.ips]) log_important( f'Disk Explorer run: done [{datetime.now().strftime("%H:%M:%S")}]')
def stress(self, command, load_index=None): if load_index is None: log_important("scylla-bench: started") run_parallel(self.__stress, [(ip, command) for ip in self.load_ips]) log_important("scylla-bench: done") else: log("using load_index " + str(load_index)) self.__stress(self.load_ips[load_index], command)
def start(self): log(f"Starting Scylla nodes {self.cluster_public_ips}") for public_ip in self.cluster_public_ips: ssh = self.__new_ssh(public_ip) ssh.exec("sudo systemctl start scylla-server") for public_ip in self.cluster_public_ips: wait_for_cql_start(public_ip) log_machine(public_ip, "Node finished bootstrapping") log(f"Starting Scylla nodes {self.cluster_public_ips}: done")
def stop(self, load_index=None, erase_data=False): if load_index is None: log("Not implemented!") else: self.nodetool("drain", load_index=load_index) ssh = self.__new_ssh(self.cluster_public_ips[load_index]) ssh.exec("sudo systemctl stop scylla-server") if erase_data: ssh.exec("sudo rm -rf /var/lib/scylla/data/*") ssh.exec("sudo rm -rf /var/lib/scylla/commitlog/*")
def exec(self, command): """ Returns the perf command on the remote machine. The command needs to be the full command like 'sudo perf record ...' """ log_important(f"Perf: started") log(command) pssh = PSSH(self.ip_list, self.user, self.ssh_options) pssh.exec(f""" cd /tmp {command} """) log_important(f"Perf: done")
def clear_cluster(cluster_public_ips, cluster_user, ssh_options, duration_seconds=90): log_important("Shutting down cluster and removing all data") pssh = PSSH(cluster_public_ips, cluster_user, ssh_options) # pssh.exec("nodetool flush") log("Stopping scylla") pssh.exec("sudo systemctl stop scylla-server") log("Removing data dir") pssh.exec("sudo rm -fr /var/lib/scylla/data/*") log("Removing commit log") pssh.exec("sudo rm -fr /var/lib/scylla/commitlog/*") log("Starting scylla") pssh.exec("sudo systemctl start scylla-server") log(f"Waiting {duration_seconds} seconds") sleep(duration_seconds) log_important("Cluster cleared and restarted")
def __trim_recursivly(self, dir): if self.warmup_seconds is None and self.warmup_seconds is None: return log_important("HdrLogProcessor.trim_recursively") for hdr_file in glob.iglob(dir + '/*/*.hdr', recursive=True): filename = os.path.basename(hdr_file) if filename.startswith("trimmed_"): continue log(hdr_file) self.__trim(hdr_file) log_important("HdrLogProcessor.trim_recursively")
def __trim(self, file): filename = os.path.basename(file) filename_no_ext = os.path.splitext(filename)[0] old_cwd = os.getcwd() new_cwd = os.path.dirname(os.path.realpath(file)) os.chdir(new_cwd) args = f'union -if {filename} -of trimmed_{filename_no_ext}.hdr' if self.warmup_seconds is not None: args = f'{args} -start {self.warmup_seconds}' if self.cooldown_seconds is not None: args = f'{args} -end {self.cooldown_seconds}' cmd = f'{self.java_path} -cp {self.lib_dir}/processor.jar CommandDispatcherMain {args}' log(cmd) os.system(cmd) os.chdir(old_cwd)
def collect_results(self, dir, warmup_seconds=None, cooldown_seconds=None): """ Parameters ---------- dir: str The download directory. """ log_important(f"Collecting results: started") run_parallel(self.__collect, [(ip, dir) for ip in self.load_ips]) p = HdrLogProcessor(self.properties, warmup_seconds=warmup_seconds, cooldown_seconds=cooldown_seconds) p.process(dir) log_important(f"Collecting results: done") log(f"Results can be found in [{dir}]")
def insert(self, partition_count, nodes, partition_offset=0, concurrency=64, clustering_row_count=1, extra_args=""): log_important(f"Inserting {partition_count} partitions") start_seconds = time.time() # todo: there could be some loss if there is a reaminer. pc_per_lg = partition_count // len(self.load_ips) cmd_list = [] for i in range(0, len(self.load_ips)): cmd = f"""-workload sequential \ -clustering-row-count {clustering_row_count} \ -mode write \ -partition-count {pc_per_lg} \ -partition-offset {partition_offset} \ -nodes {nodes} \ -concurrency {concurrency} \ {extra_args}""" # clean the string up. cmd = " ".join(cmd.split()) cmd_list.append(cmd) partition_offset = partition_offset + pc_per_lg futures = [] for i in range(0, len(self.load_ips)): f = self.async_stress(cmd_list[i], load_index=i) futures.append(f) if i == 0: # first one is given some extra time to set up the tables and all that. time.sleep(10) for f in futures: f.join() duration_seconds = time.time() - start_seconds log(f"Duration : {duration_seconds} seconds") log(f"Insertion rate: {partition_count // duration_seconds} items/second" ) log_important(f"Inserting {partition_count} partitions: done")
def collect_results(self, dir, warmup_seconds=None, cooldown_seconds=None): """ Parameters ---------- dir: str The download directory. warmup_seconds : str The warmup period in seconds. If the value is set, additional files will be created where the warmup period is trimmed. cooldown_seconds : str The cooldown period in seconds. If the value is set, additional files will be created where the cooldown period is trimmed. """ log_important(f"Collecting results: started") run_parallel(self.__collect, [(ip, dir) for ip in self.load_ips]) p = HdrLogProcessor(self.properties, warmup_seconds=warmup_seconds, cooldown_seconds=cooldown_seconds) p.process(dir) log_important(f"Collecting results: done") log(f"Results can be found in [{dir}]")
def __stress(self, ip, startup_delay, cmd): time.sleep(startup_delay) if self.scylla_tools: cs_cmd = f'cassandra-stress {cmd}' else: cassandra_version = self.properties['cassandra_version'] cassandra_stress_dir = f'apache-cassandra-{cassandra_version}/tools/bin' cs_cmd = f'{cassandra_stress_dir}/cassandra-stress {cmd}' log(cs_cmd) dt = datetime.now().strftime("%d-%m-%Y_%H-%M-%S") cs_cmd = cs_cmd + f" 2>&1 | tee -a cassandra-stress-{dt}.log" full_cmd = f""" set -e set -o pipefail {cs_cmd} set +o pipefail """ self.__new_ssh(ip).exec(full_cmd)
def install(self): ips = ','.join(self.public_ips) log(f'[{ips}] raid: starting creating RAID') pssh = PSSH(self.public_ips, self.user, self.properties['ssh_options']) pssh.exec(f""" if [[ ! -b /dev/md/{self.raid_device_name} ]]; then sudo mdadm --create --verbose /dev/md/{self.raid_device_name} --chunk=256 --metadata=1.2 --level={self.level} --force --raid-devices=$(ls {self.device_name_wildcard} | wc -l) {self.device_name_wildcard} # /dev/md/raid_device_name maps to /dev/md[0-9]+ MD_NAME=$(basename $(readlink /dev/md/{self.raid_device_name})) # Tuning sudo sh -c "echo 1 > /sys/block/$MD_NAME/queue/nomerges" sudo sh -c "echo 8 > /sys/block/$MD_NAME/queue/read_ahead_kb" sudo sh -c "echo none > /sys/block/$MD_NAME/queue/scheduler" sudo mkfs.xfs -f /dev/$MD_NAME mkdir {self.raid_device_name} sudo mount /dev/$MD_NAME {self.raid_device_name} sudo chown $(id -u) {self.raid_device_name} fi """) log(f'[{ips}] raid: finished creating RAID')
def restart_cluster(cluster_public_ips, cluster_user, ssh_options, duration_seconds=90): log_important("Restart cluster ") pssh = PSSH(cluster_public_ips, cluster_user, ssh_options) log("nodetool drain") pssh.exec("nodetool drain") log("sudo systemctl restart scylla-server") pssh.exec("sudo systemctl restart scylla-server") log(f"Waiting {duration_seconds} seconds") sleep(duration_seconds) log_important("Cluster restarted")
def append_env_configuration(self, configuration): log(f"Appending cassandra-env.sh configuration to nodes {self.cluster_public_ips}: {configuration}" ) pssh = PSSH(self.cluster_public_ips, self.ssh_user, self.properties['ssh_options']) path_prefix = 'cassandra-raid/' if self.setup_raid else './' log("configuration[" + configuration + "]") pssh.exec( f'''echo '{configuration}' >> {path_prefix}apache-cassandra-{self.cassandra_version}/conf/cassandra-env.sh''' ) log(f"echo '{configuration}' >> {path_prefix}apache-cassandra-{self.cassandra_version}/conf/cassandra-env.sh" )
def insert(self, profile, item_count, nodes, mode="native cql3", rate="threads=100", sequence_start=None): log_important(f"Inserting {item_count} items") start_seconds = time.time() per_load_generator = item_count // len(self.load_ips) start = sequence_start if sequence_start is None: start = 1 end = start + per_load_generator - 1 cmd_list = [] for i in range(0, len(self.load_ips)): cmd = f'user profile={profile} "ops(insert=1)" n={per_load_generator} no-warmup -pop seq={start}..{end} -mode {mode} -rate {rate} -node {nodes}' log(self.load_ips[i] + " " + cmd) cmd_list.append(cmd) start = end + 1 end = end + per_load_generator futures = [] for i in range(0, len(self.load_ips)): f = self.async_stress(cmd_list[i], load_index=i) futures.append(f) if i == 0: time.sleep(10) for f in futures: f.join() duration_seconds = time.time() - start_seconds log(f"Duration : {duration_seconds} seconds") log(f"Insertion rate: {item_count // duration_seconds} items/second") log_important(f"Inserting {item_count} items: done")
def __merge_recursivly(self, dir): log_important("HdrLogProcessor.merge_recursively") log(dir) # todo be careful with merging the merge file. files_map = {} for hdr_file in glob.iglob(dir + '/*/*.hdr', recursive=True): log(hdr_file) base = os.path.splitext(os.path.basename(hdr_file))[0] files = files_map.get(base) if files is None: files = [] files_map[base] = files files.append(hdr_file) for name, files in files_map.items(): input = "" for file in files: input = input + " -ifp " + file cmd = f'{self.java_path} -cp {self.lib_dir}/processor.jar CommandDispatcherMain union {input} -of {dir}/{name}.hdr' log(cmd) os.system(cmd) log_important("HdrLogProcessor.merge_recursively")
def __process_recursivly(self, dir): log_important("HdrLogProcessor.process_recursively") for hdr_file in glob.iglob(dir + '/**/*.hdr', recursive=True): log(hdr_file) self.__process(hdr_file) log_important("HdrLogProcessor.process_recursively")
def download(self, dir): log_important("Disk Explorer Download: started") run_parallel(self.__download, [(ip, dir) for ip in self.ips]) log_important("Disk Explorer Download: done") log(f"Results can be found in [{dir}]")
def __init__(self, ip_list, user, ssh_options): log(ip_list) self.updated = False self.ip_list = ip_list self.user = user self.ssh_options = ssh_options
def run(self, options): log_important(f"fio run: started") log(f"sudo fio {options}") run_parallel(self.__run, [(ip, options) for ip in self.ips]) log_important(f"fio run: done")
def append_configuration(self, configuration): log(f"Appending configuration to nodes {self.cluster_public_ips}: {configuration}") pssh = PSSH(self.cluster_public_ips, self.ssh_user, self.properties['ssh_options']) pssh.exec(f"sudo sh -c \"echo '{configuration}' >> /etc/scylla/scylla.yaml\"")