def randomize_trace_file(filename): values = read_col_from_file(filename) random.shuffle(values) from write_array_to_file import write_array write_array(values, filename, p = False)
def main(): f = open(sys.argv[1]) output_file = 'response_times.txt' response_times = [] except_count = 0 for line in f: try: resp = get_response_time(line) response_times.append(resp) except: except_count += 1 continue response_times.sort() avg = 0 median = 0 ninety = 0 if len(response_times) > 0: avg = sum(response_times) / len(response_times) median = response_times[len(response_times) / 2] ninety = response_times[len(response_times) * 9 / 10] print len(response_times), avg, median, ninety from write_array_to_file import write_array write_array(response_times, output_file, p=True) return ids = {} for line in f: j = line.index("id") + 4 qid = line[j:j + 9] ids[qid] = 1 print 'Number ids ', len(ids)
def latency_over_time(tuples_file): folder = os.path.split(tuples_file)[0] line_max = 20000 f = open(tuples_file) lines = f.readlines() p_th = 1.0 if line_max < len(lines): p_th = line_max * 1.0 / len(lines) select_lines = [] for line in lines: p = random.random() if p > p_th: continue tokens = line.split() select_lines.append(line) from write_array_to_file import write_array filename = os.path.join(folder, "latency-over-time.txt") write_array(select_lines, filename, p=True) try: os.system("cd " + folder + "; gnuplot " + gnuplot_file_latency) except: print "ERROR: gnuplot error"
def run_all_lns(gns_folder): from read_array_from_file import read_col_from_file ns_hostnames = read_col_from_file(exp_config.ns_file) num_ns = len(ns_hostnames) tmp_cmd_file = '/tmp/local-name-server.sh' from read_array_from_file import read_col_from_file cmds = [] pl_lns = read_col_from_file(exp_config.lns_file) update_trace_param = ''#exp_config.update_trace_url lookup_trace_param = '' #exp_config.lookup_trace_url for i, lns in enumerate(pl_lns): node_id = str(i + num_ns) update_trace_param = 'update' # + node_id # may be nod id lookup_trace_param = 'lookup' # + node_id cmd = 'ssh -i ' + exp_config.ssh_key + ' -oConnectTimeout=60 -oStrictHostKeyChecking=no -l ' + exp_config.user + ' ' + lns + ' "mkdir -p ' + \ gns_folder + '; cd ' + gns_folder + '; python local-name-server.py --lookupTrace ' \ + lookup_trace_param + ' --updateTrace ' + update_trace_param + ' --id ' + node_id + '"' print cmd cmds.append(cmd) write_array(cmds, tmp_cmd_file, p=True) os.system('parallel -a ' + tmp_cmd_file)
def main(): f = open(sys.argv[1]) output_file = 'response_times.txt' response_times = [] except_count = 0 for line in f: try: resp = get_response_time(line) response_times.append(resp) except: except_count += 1 continue response_times.sort() avg = 0 median = 0 ninety = 0 if len(response_times) > 0: avg = sum(response_times) / len(response_times) median = response_times[len(response_times)/2] ninety = response_times[len(response_times)*9/10] print len(response_times), avg, median, ninety from write_array_to_file import write_array write_array(response_times, output_file, p = True) return ids = {} for line in f: j = line.index("id") + 4 qid = line[j: j + 9] ids[qid] = 1 print 'Number ids ', len(ids)
def output_stats_by_name(all_tuples_filename): value_index = 4 name_index = 0 # 0 = name, 1 = lns, 2 = ns # this option removes names for which there is a failed read request folder = dirname(all_tuples_filename) exclude_failed_reads = True if exclude_failed_reads: failed_reads_names = select_failed_reads_names(all_tuples_filename) write_array(failed_reads_names.keys(), os.path.join(folder, 'failed_reads_names.txt')) all_tuples_filename = write_all_tuples_excluding_failed(all_tuples_filename, failed_reads_names) outfile1 = os.path.join(folder, 'all_by_name.txt') output_tuples1 = group_by(all_tuples_filename, name_index, value_index) write_tuple_array(output_tuples1, outfile1, p = True) outfile2 =os.path.join(folder, 'writes_by_name.txt') output_tuples2 = group_by(all_tuples_filename, name_index, value_index, filter = write_filter) write_tuple_array(output_tuples2, outfile2, p = True) outfile3 = os.path.join(folder, 'reads_by_name.txt') output_tuples3 = group_by(all_tuples_filename, name_index, value_index, filter = read_filter) write_tuple_array(output_tuples3, outfile3, p = True) filenames = [outfile1, outfile2, outfile3] schemes = ['ALL', 'WRITES', 'READS'] template_file = os.path.join(script_folder, 'template1.gpt') col_no = 4 pdf_filename = os.path.join(folder, 'median_by_name.pdf') get_cdf_and_plot(filenames, schemes, [col_no]*len(schemes), pdf_filename, folder, template_file) col_no = 5 pdf_filename = os.path.join(folder, 'mean_by_name.pdf') get_cdf_and_plot(filenames, schemes, [col_no]*len(schemes), pdf_filename, folder, template_file) # output key stats read_median_list = [t[4] for t in output_tuples3] read_mean_list = [t[5] for t in output_tuples3] write_median_list = [t[4] for t in output_tuples2] write_mean_list = [t[5] for t in output_tuples2] # delete this. #read_median_list2 = [] #for v in read_median_list: # if v <5000: # read_median_list2.append(v) kv_tuples = [] kv_tuples.extend(get_stat_in_tuples(read_median_list, 'read_median_names')) kv_tuples.extend(get_stat_in_tuples(read_mean_list, 'read_mean_names')) kv_tuples.extend(get_stat_in_tuples(write_median_list, 'write_median_names')) kv_tuples.extend(get_stat_in_tuples(write_mean_list, 'write_mean_names')) outputfile = os.path.join(folder, 'latency_stats_names.txt') write_tuple_array(kv_tuples, outputfile, p = True) os.system('cat ' + outputfile)
def latency_over_time(tuples_file): folder = os.path.split(tuples_file)[0] line_max = 20000 f = open(tuples_file) lines = f.readlines() p_th = 1.0 if line_max < len(lines): p_th = line_max * 1.0 / len(lines) select_lines = [] for line in lines: p = random.random() if p > p_th: continue tokens = line.split() select_lines.append(line) from write_array_to_file import write_array filename = os.path.join(folder, 'latency-over-time.txt') write_array(select_lines, filename, p=True) try: os.system('cd ' + folder + '; gnuplot ' + gnuplot_file_latency) except: print 'ERROR: gnuplot error'
def output_request_trace_from_request_counts_by_name(filename, request_counts): requests = [] for t in request_counts: requests.extend([t[0]] * t[1]) random.shuffle(requests) #len(requests) write_array(requests, filename, p=False)
def output_request_trace_from_request_counts_by_name(filename, request_counts): requests = [] for t in request_counts: requests.extend([t[0]] * t[1]) random.shuffle(requests) #len(requests) write_array(requests, filename, p = False)
def write_queries(): filename = sys.argv[1] names = int(sys.argv[2]) size = int(sys.argv[3]) from random import randint x = [] for i in range(size): x.append(randint(0, names - 1)) y = [] #y.extend(x) y.extend(x) from write_array_to_file import write_array write_array(y, filename, p = True)
def write_random_name_trace(filename, names, size): #filename = sys.argv[1] #names = int(sys.argv[2]) #size = int(sys.argv[3]) from random import randint x = [] for i in range(size): x.append(randint(0, names - 1)) #y = [] #y.extend(x) #y.extend(x) from write_array_to_file import write_array write_array(x, filename, p = True)
def run_all_lns(gns_folder, num_ns): tmp_cmd_file = '/tmp/local-name-server.sh' from read_array_from_file import read_col_from_file cmds = [] pl_lns = read_col_from_file('pl_lns') for i, lns in enumerate(pl_lns): node_id = str(i + num_ns) cmd = 'ssh -i auspice.pem -oConnectTimeout=60 -oStrictHostKeyChecking=no -l ec2-user ' + lns + ' "mkdir -p ' + \ gns_folder + '; cd ' + gns_folder + '; python /home/ec2-user/local-name-server.py --lookupTrace ' \ '/home/ec2-user/lookup_' + lns + ' --updateTrace /home/ec2-user/update_' + lns + ' --id ' + node_id + '"' print cmd cmds.append(cmd) write_array(cmds, tmp_cmd_file, p=True) os.system('parallel -a ' + tmp_cmd_file)
def tabulate_stats(graph_format_file): # read params stat_name, output_file, file_names = \ read_graph_format(graph_format_file) print stat_name print output_file print file_names # read values values = read_values(stat_name, file_names) # write values to output_file from write_array_to_file import write_array write_array(values, output_file, p = True) os.system('cat ' + output_file)
def run_all_ns(gns_folder): tmp_cmd_file = '/tmp/name-server.sh' from read_array_from_file import read_col_from_file cmds = [] pl_ns = read_col_from_file(exp_config.ns_file) for i, ns in enumerate(pl_ns): node_id = str(i) cmd = 'ssh -i ' + exp_config.ssh_key + ' -oConnectTimeout=60 -oStrictHostKeyChecking=no -l ' + exp_config.user + ' ' + ns + ' "mkdir -p ' + \ gns_folder + '; cd ' + gns_folder + '; python name-server.py --id ' + node_id + '"' print cmd cmds.append(cmd) write_array(cmds, tmp_cmd_file, p=True) os.system('parallel -a ' + tmp_cmd_file)
def output_stats_by_name(all_tuples_filename): value_index = 4 name_index = 0 # 0 = name, 1 = lns, 2 = ns # this option removes names for which there is a failed read request folder = dirname(all_tuples_filename) exclude_failed_reads = True if exclude_failed_reads: failed_reads_names = select_failed_reads_names(all_tuples_filename) write_array(failed_reads_names.keys(), os.path.join(folder, 'failed_reads_names.txt')) all_tuples_filename = write_all_tuples_excluding_failed( all_tuples_filename, failed_reads_names) outfile1 = os.path.join(folder, 'all_by_name.txt') output_tuples1 = group_by(all_tuples_filename, name_index, value_index) write_tuple_array(output_tuples1, outfile1, p=True) outfile2 = os.path.join(folder, 'writes_by_name.txt') output_tuples2 = group_by(all_tuples_filename, name_index, value_index, filter=write_filter) write_tuple_array(output_tuples2, outfile2, p=True) outfile3 = os.path.join(folder, 'reads_by_name.txt') output_tuples3 = group_by(all_tuples_filename, name_index, value_index, filter=read_filter) write_tuple_array(output_tuples3, outfile3, p=True) filenames = [outfile1, outfile2, outfile3] schemes = ['ALL', 'WRITES', 'READS'] template_file = os.path.join(script_folder, 'template1.gpt') col_no = 4 pdf_filename = os.path.join(folder, 'median_by_name.pdf') get_cdf_and_plot(filenames, schemes, [col_no] * len(schemes), pdf_filename, folder, template_file) col_no = 5 pdf_filename = os.path.join(folder, 'mean_by_name.pdf') get_cdf_and_plot(filenames, schemes, [col_no] * len(schemes), pdf_filename, folder, template_file) # output key stats read_median_list = [t[4] for t in output_tuples3] read_mean_list = [t[5] for t in output_tuples3] write_median_list = [t[4] for t in output_tuples2] write_mean_list = [t[5] for t in output_tuples2] # delete this. #read_median_list2 = [] #for v in read_median_list: # if v <5000: # read_median_list2.append(v) kv_tuples = [] kv_tuples.extend(get_stat_in_tuples(read_median_list, 'read_median_names')) kv_tuples.extend(get_stat_in_tuples(read_mean_list, 'read_mean_names')) kv_tuples.extend( get_stat_in_tuples(write_median_list, 'write_median_names')) kv_tuples.extend(get_stat_in_tuples(write_mean_list, 'write_mean_names')) outputfile = os.path.join(folder, 'latency_stats_names.txt') write_tuple_array(kv_tuples, outputfile, p=True) os.system('cat ' + outputfile)
def write_sequence_name_trace(): filename = sys.argv[1] size = int(sys.argv[2]) x = range(size) from write_array_to_file import write_array write_array(x, filename, p = True)