def save_data(self, start_end_times): """ Calculates the actual request rate and prints as the first line. Saves the CDFs of latency and bandwidth betweeen 1-2 minutes to disk. Assumes that all redis client hosts report correct times. """ # Save the CDF of the start times. TODO: Debug. with open('data/distr_redis_raw_start_time_cdf.txt', 'w') as f: start_time_list = [t for (t, _, _) in start_end_times] for (t, p) in util.make_cdf_table(start_time_list): print >> f, '%.5f' % t, p # Save the start end times list. TODO: Debug. with open('data/distr_redis_raw_start_end_times.txt','w') as f: for (start_time, end_time, data_file) in start_end_times: print >> f, '%.5f' % start_time, end_time, data_file # Filter out irrelevant time values. Focus on 60th-120th seconds. min_time = min([start_time for (start_time, _, _) in start_end_times]) def is_steady_state(start_end_time_tuple): (start_time, _, _) = start_end_time_tuple return min_time + 60 <= start_time <= min_time + 120 filtered_times = filter(is_steady_state, start_end_times) filtered_times.sort() print 'Raw data size:', len(start_end_times), print 'Data between 60-120th seconds:', len(filtered_times) # Figure out the actual gaps in milliseconds. start_time_list = [start for (start, _, _) in filtered_times] gap_list = [] for index in range(0, len(start_time_list) - 1): gap = start_time_list[index + 1] - start_time_list[index] gap_list.append(gap * 1000.0) gap_list.sort() print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list), print 'median =', gap_list[len(gap_list)/2] # Calculate latency and bandwidth. latency_list = [] bandwidth_list = [] for (start_time, end_time, _) in filtered_times: if end_time is None: latency = -1 bandwidth = 0 else: latency = end_time - start_time # seconds bandwidth = DATA_LENGTH / latency # Bytes/s latency_list.append(latency * 1000.0) # milliseconds bandwidth_list.append(bandwidth * 8.0 / 1000000.0) # Mbps # Write to file. with open('data/distr_redis_latency.txt', 'w') as f: for (v, p) in util.make_cdf_table(latency_list): print >> f, v, p with open('data/distr_redis_bw.txt', 'w') as f: for (v, p) in util.make_cdf_table(bandwidth_list): print >> f, v, p
def data_analysis(): start_end_times = [] for path in os.listdir('.'): if path.startswith('simplified-') and path.endswith('.tmp'): with open(path) as f: start_end_times += pickle.load(f) print 'Loaded', path # Extract steady state. min_time = min([start_time for (start_time, _) in start_end_times]) def is_steady_state(start_end_time_tuple): (start_time, _) = start_end_time_tuple return min_time + INTERESTING_TIME_START <= start_time <= min_time + INTERESTING_TIME_END filtered_times = filter(is_steady_state, start_end_times) filtered_times.sort() print 'Raw data size:', len(start_end_times), print 'Data at steady state:', len(filtered_times) # Figure out the actual gaps in milliseconds. start_time_list = [start for (start, _) in filtered_times] gap_list = [] for index in range(0, len(start_time_list) - 1): gap = start_time_list[index + 1] - start_time_list[index] gap_list.append(gap * 1000.0) gap_list.sort() print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list), print 'median =', gap_list[len(gap_list) / 2] # Calculate latency and bandwidth. latency_list = [] bandwidth_list = [] for (start_time, end_time) in filtered_times: if end_time is None: latency = -1 bandwidth = 0 else: latency = end_time - start_time # seconds bandwidth = DATA_LENGTH / latency # Bytes/s latency_list.append(latency * 1000.0) # milliseconds bandwidth_list.append(bandwidth * 8.0 / 1000000.0) # Mbps # Write to file. with open('data/simplified_redis_latency.csv', 'w') as f: for (v, p) in util.make_cdf_table(latency_list): print >> f, '%.10f,%.10f' % (v, p) with open('data/simplified_redis_bw.csv', 'w') as f: for (v, p) in util.make_cdf_table(bandwidth_list): print >> f, '%.10f,%.10f' % (v, p)
def _make_cdf(csv_file, column=0, entry_count=None): inlist = [] with open(csv_file) as csv_f: for line in csv_f: line = line.strip() if line: if ',' in line: v = line.split(',')[column] elif '\t' in line: v = line.split('\t')[column] else: v = line.split()[column] inlist += [float(v)] if entry_count: inlist = inlist[0 : entry_count] inlist += [0] * (entry_count - len(inlist)) return make_cdf_table(inlist)
def data_analysis(): start_end_times = [] # Load experiment data file try: data_file = 'async-' + os.environ['EXP_NAME'] except KeyError: data_file = None for path in os.listdir('data'): if (path == data_file) or \ (data_file is None and path.startswith('async-') and path.endswith('.tmp')): with open('data/' + path) as f: start_end_times += pickle.load(f) print 'Loaded', path # Extract steady state. min_time = min([start_time for (start_time, _) in start_end_times]) def is_steady_state(start_end_time_tuple): (start_time, _) = start_end_time_tuple return min_time + INTERESTING_TIME_START <= start_time <= min_time + INTERESTING_TIME_END filtered_times = filter(is_steady_state, start_end_times) filtered_times.sort() print 'Raw data size:', len(start_end_times), print 'Data at steady state:', len(filtered_times) # Figure out the actual gaps in milliseconds. start_time_list = [start for (start, _) in filtered_times] gap_list = [] for index in range(0, len(start_time_list) - 1): gap = start_time_list[index + 1] - start_time_list[index] gap_list.append(gap * 1000.0) gap_list.sort() print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list), print 'median =', gap_list[len(gap_list)/2] # Save start_time list and gap list. with open('data/start_times.csv', 'w') as start_time_f: for start_time_v in start_time_list: print >> start_time_f, '%.8f' % start_time_v with open('data/gaps.csv', 'w') as gap_f: for (v, p) in util.make_cdf_table(gap_list): print >> gap_f, '%f,%f' % (v, p) # Calculate latency and bandwidth. latency_list = [] bandwidth_list = [] for (start_time, end_time) in filtered_times: if end_time is None: latency = 1000 bandwidth = 0 else: latency = end_time - start_time # seconds bandwidth = DATA_LENGTH / latency # Bytes/s latency_list.append(latency * 1000.0) # milliseconds bandwidth_list.append(bandwidth * 8.0 / 1000000.0) # Mbps # Write to file. print 'Writing to data/async_redis_latency.csv...' with open('data/async_redis_latency.csv', 'w') as f: for (v, p) in util.make_cdf_table(latency_list): print >> f, '%.10f,%.10f' % (v, p) print 'Writing to data/async_redis_bw.csv...' with open('data/async_redis_bw.csv', 'w') as f: for (v, p) in util.make_cdf_table(bandwidth_list): print >> f, '%.10f,%.10f' % (v, p) # Analyze timings of OF events. subprocess.call('cp of_timings.csv data/; cp /tmp/client.pcap /tmp/server.pcap data/', shell=True) import timing_analysis timing_analysis.main('data/client.pcap', 'data/of_timings.csv', 'data/server.pcap')