def get_first_experiment_path(experiments): # get path based on first experiment id dir_name = '' files = get_testid_file_list('', experiments[0], '', 'LC_ALL=C sort') if len(files) > 0: dir_name = os.path.dirname(files[0]) else: abort('Cannot find experiment %s\n' 'Remove outdated teacup_dir_cache.txt if files were moved.' % experiments[0]) return dir_name
def get_part_hosts(test_id): global part_hosts if test_id not in part_hosts: part_hosts[test_id] = [] # first process tcpdump files (ignore router and ctl interface tcpdumps) uname_files = get_testid_file_list('', test_id, 'uname.log.gz', '') for f in uname_files: res = re.search('.*_(.*)_uname.log.gz', f) if res: part_hosts[test_id].append(res.group(1)) return part_hosts[test_id]
def analyse_2d_density( exp_list='experiments_completed.txt', res_dir='', out_dir='', source_filter='', min_values='3', xmetric='throughput', ymetric='tcprtt', variables='', out_name='', xmin='0', xmax='0', ymin='0', ymax='0', lnames='', group_by='aqm', replot_only='0', pdf_dir='', stime='0.0', etime='0.0', ts_correct='1', smoothed='1', link_len='0', plot_params='', plot_script='', xstat_index='', ystat_index='', dupacks='0', cum_ackseq='1', merge_data='0', #sburst='1', eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_', sburst='1', eburst='0', test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_', slowest_only='0', query_host=''): "2d density / ellipse plot for different experiments" test_id_pfx = '' check = get_metric_params(xmetric, smoothed, ts_correct) if check == None: abort('Unknown metric %s specified with xmetric' % xmetric) check = get_metric_params(ymetric, smoothed, ts_correct) if check == None: abort('Unknown metric %s specified with ymetric' % ymetric) #if source_filter == '': # abort('Must specify at least one source filter') if len(source_filter.split(';')) > 12: abort('Cannot have more than 12 filters') # XXX more param checking # make sure res_dir has valid form (out_dir is handled by extract methods) res_dir = valid_dir(res_dir) # Initialise source filter data structure sfil = SourceFilter(source_filter) # read test ids experiments = read_experiment_ids(exp_list) # get path based on first experiment id dir_name = get_first_experiment_path(experiments) # if we haven' got the extracted data run extract method(s) first if res_dir == '': for experiment in experiments: (ex_function, kwargs) = get_extract_function(xmetric, link_len, xstat_index, sburst=sburst, eburst=eburst, slowest_only=slowest_only, query_host=query_host) (dummy, out_files, out_groups) = ex_function(test_id=experiment, out_dir=out_dir, source_filter=source_filter, replot_only=replot_only, ts_correct=ts_correct, **kwargs) (ex_function, kwargs) = get_extract_function(ymetric, link_len, ystat_index, sburst=sburst, eburst=eburst, slowest_only=slowest_only, query_host=query_host) (dummy, out_files, out_groups) = ex_function(test_id=experiment, out_dir=out_dir, source_filter=source_filter, replot_only=replot_only, ts_correct=ts_correct, **kwargs) if out_dir == '' or out_dir[0] != '/': res_dir = dir_name + '/' + out_dir else: res_dir = out_dir else: if res_dir[0] != '/': res_dir = dir_name + '/' + res_dir # make sure we have trailing slash res_dir = valid_dir(res_dir) if pdf_dir == '': pdf_dir = res_dir else: if pdf_dir[0] != '/': pdf_dir = dir_name + '/' + pdf_dir pdf_dir = valid_dir(pdf_dir) # if pdf_dir specified create if it doesn't exist mkdir_p(pdf_dir) # # build match string from variables # (match_str, match_str2) = build_match_strings(experiments[0], variables, test_id_prefix) # # filter out the experiments to plot, generate x-axis labels, get test id prefix # (fil_experiments, test_id_pfx, dummy) = filter_experiments(experiments, match_str, match_str2) # # get groups based on group_by variable # group_idx = 1 levels = {} groups = [] leg_names = [] _experiments = [] for experiment in fil_experiments: level = '' add_exp = True for g in group_by.split(';'): p = experiment.find(g) if p > -1: s = experiment.find('_', p) s += 1 e = experiment.find('_', s) level += g + ':' + experiment[s:e] + ' ' else: add_exp = False break # remove the final space from the string level = level[:-1] if add_exp == True: _experiments.append(experiment) #print('level: ' + level) if level not in levels: levels[level] = group_idx group_idx += 1 leg_names.append(level) if merge_data == '1': groups.append(levels[level]) else: for i in range(len(source_filter.split(';'))): groups.append(levels[level]) fil_experiments = _experiments # # get metric parameters and list of data files # # get the metric parameter for both x and y x_axis_params = get_metric_params(xmetric, smoothed, ts_correct, xstat_index, dupacks, cum_ackseq, slowest_only) y_axis_params = get_metric_params(ymetric, smoothed, ts_correct, ystat_index, dupacks, cum_ackseq, slowest_only) x_ext = x_axis_params[0] y_ext = y_axis_params[0] # if we merge responders make sure we only use the merged files if merge_data == '1': # reset source filter so we match the merged file sfil.clear() sfil = SourceFilter('S_0.0.0.0_0') x_files = [] y_files = [] for experiment in fil_experiments: _x_files = [] _y_files = [] _x_ext = x_ext _y_ext = y_ext _files = get_testid_file_list('', experiment, _x_ext, 'LC_ALL=C sort', res_dir) if merge_data == '1': _x_ext += '.all' _files = merge_data_files(_files) _x_files += _files _files = get_testid_file_list('', experiment, _y_ext, 'LC_ALL=C sort', res_dir) if merge_data == '1': _y_ext += '.all' _files = merge_data_files(_files) _y_files += _files match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _x_ext for f in _x_files: #print(f) res = re.search(match_str, f) #print(res.group(1)) if res and sfil.is_in(res.group(1)): # only add file if enough data points rows = int( local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True)) if rows > int(min_values): x_files.append(f) match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _y_ext for f in _y_files: # print(f) res = re.search(match_str, f) if res and sfil.is_in(res.group(1)): # only add file if enough data points rows = int( local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True)) if rows > int(min_values): y_files.append(f) yindexes = [str(x_axis_params[2]), str(y_axis_params[2])] yscalers = [str(x_axis_params[3]), str(y_axis_params[3])] aggr_flags = [x_axis_params[5], y_axis_params[5]] diff_flags = [x_axis_params[6], y_axis_params[6]] if lnames != '': lnames_arr = lnames.split(';') if len(lnames_arr) != len(leg_names): abort( 'Number of legend names must be qual to the number of source filters' ) leg_names = lnames_arr print(x_files) print(y_files) print(groups) print(leg_names) # # pass the data files and auxilary info to plot function # if out_name != '': oprefix = out_name + '_' + test_id_pfx + '_' + xmetric + '_' + ymetric else: oprefix = test_id_pfx + '_' + xmetric + '_' + ymetric title = oprefix plot_2d_density(title, x_files, y_files, x_axis_params[1], y_axis_params[1], yindexes, yscalers, 'pdf', oprefix, pdf_dir, x_axis_params[4], y_axis_params[4], aggr_flags, diff_flags, xmin, xmax, ymin, ymax, stime, etime, groups, leg_names, plot_params, plot_script) # done puts('\n[MAIN] COMPLETED analyse_2d_density %s \n' % test_id_pfx)
def analyse_cmpexp( exp_list='experiments_completed.txt', res_dir='', out_dir='', source_filter='', min_values='3', omit_const='0', metric='throughput', ptype='box', variables='', out_name='', ymin='0', ymax='0', lnames='', group_by_prefix='0', omit_const_xlab_vars='0', replot_only='0', pdf_dir='', stime='0.0', etime='0.0', ts_correct='1', smoothed='1', link_len='0', plot_params='', plot_script='', stat_index='', dupacks='0', cum_ackseq='1', merge_data='0', sburst='1', #eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_', eburst='0', test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_', slowest_only='0', res_time_mode='0', query_host=''): "Compare metrics for different experiments" if ptype != 'box' and ptype != 'mean' and ptype != 'median': abort('ptype must be either box, mean or median') check = get_metric_params(metric, smoothed, ts_correct) if check == None: abort('Unknown metric %s specified' % metric) if source_filter == '': abort('Must specify at least one source filter') if len(source_filter.split(';')) > 12: abort('Cannot have more than 12 filters') # prevent wrong use of res_time_mode if metric != 'restime' and res_time_mode != '0': res_time_mode = '0' if ptype == 'box' and res_time_mode == '2': res_time_mode = '0' # XXX more param checking # Initialise source filter data structure sfil = SourceFilter(source_filter) # read test ids experiments = read_experiment_ids(exp_list) # get path based on first experiment id dir_name = get_first_experiment_path(experiments) # if we haven' got the extracted data run extract method(s) first if res_dir == '': for experiment in experiments: (ex_function, kwargs) = get_extract_function(metric, link_len, stat_index, sburst=sburst, eburst=eburst, slowest_only=slowest_only, query_host=query_host) (dummy, out_files, out_groups) = ex_function(test_id=experiment, out_dir=out_dir, source_filter=source_filter, replot_only=replot_only, ts_correct=ts_correct, **kwargs) if out_dir == '' or out_dir[0] != '/': res_dir = dir_name + '/' + out_dir else: res_dir = out_dir else: if res_dir[0] != '/': res_dir = dir_name + '/' + res_dir # make sure we have trailing slash res_dir = valid_dir(res_dir) if pdf_dir == '': pdf_dir = res_dir else: if pdf_dir[0] != '/': pdf_dir = dir_name + '/' + pdf_dir pdf_dir = valid_dir(pdf_dir) # if pdf_dir specified create if it doesn't exist mkdir_p(pdf_dir) # # build match string from variables # (match_str, match_str2) = build_match_strings(experiments[0], variables, test_id_prefix) # # filter out the experiments to plot, generate x-axis labels, get test id prefix # (fil_experiments, test_id_pfx, xlabs) = filter_experiments(experiments, match_str, match_str2) # # get out data files based on filtered experiment list and source_filter # (ext, ylab, yindex, yscaler, sep, aggr, diff) = get_metric_params(metric, smoothed, ts_correct, stat_index, dupacks, cum_ackseq, slowest_only) if res_time_mode == '1': plot_params += ' NOMINAL_RES_TIME="1"' if res_time_mode == '2': if ptype == 'median': ylab = 'Median resp time / nominal resp time' elif ptype == 'mean': ylab = 'Mean resp time / nominal resp time' plot_params += ' RATIO_RES_TIME="1"' leg_names = source_filter.split(';') # if we merge responders make sure we only use the merged files if merge_data == '1': # set label to indicate merged data leg_names = ['Merged data'] # reset source filter so we match the merged file sfil.clear() source_filter = 'S_0.0.0.0_0' sfil = SourceFilter(source_filter) file_names = [] for experiment in fil_experiments: out_files = {} _ext = ext files = get_testid_file_list('', experiment, '%s' % _ext, 'LC_ALL=C sort', res_dir) if merge_data == '1': # change extension _ext += '.all' files = merge_data_files(files) #print(files) match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _ext for f in files: # print(f) res = re.search(match_str, f) #print(res.group(1)) if res and sfil.is_in(res.group(1)): # only add file if enough data points rows = int( local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True)) if rows > int(min_values): out_files[res.group(1)] = f #print(out_files) #print(leg_names) if len(out_files) < len(leg_names): abort( 'No data files for some of the source filters for experiment %s' % experiment) sorted_files = sort_by_flowkeys(out_files, source_filter) for name, file_name in sorted_files: file_names.append(file_name) if group_by_prefix == '1': # group by test prefix (and flow) # first, get all test id prefixes test_id_pfxs = {} for experiment in fil_experiments: res = re.search(match_str2, experiment) if res: test_id_pfxs[res.group(1)] = 1 # second, sort files so that same parameter combinations for different # prefixes are together # if we have multiple prefixes, create legend entry for each # prefix+flow combination _file_names = [''] * len(file_names) _leg_names = [] pfx_cnt = len(test_id_pfxs) i = 0 j = -1 last_pfx = '' for name in file_names: for p in test_id_pfxs: if name.find(p) > -1: curr_pfx = p break if curr_pfx != last_pfx: i = 0 j += 1 for l in leg_names: _leg_names.append(curr_pfx + '-' + l) _file_names[i * pfx_cnt + j] = name i += 1 last_pfx = curr_pfx file_names = _file_names leg_names = _leg_names # remove duplicates in the x-axis labels xlabs = list(set(xlabs)) if lnames != '': lnames_arr = lnames.split(';') if len(lnames_arr) != len(leg_names): abort( 'Number of legend names must be qual to the number of source filters' ) leg_names = lnames_arr # filter out unchanged variables in the x labels (need at least 2 labels) if omit_const_xlab_vars == '1' and len(xlabs) > 1: xlabs_arrs = {} xlabs_changed = {} for i in range(len(xlabs)): xlabs_arrs[i] = xlabs[i].split('\n') for i in range(len(xlabs_arrs[0])): changed = False xlab_var = xlabs_arrs[0][i] for j in range(1, len(xlabs)): if xlabs_arrs[j][i] != xlab_var: changed = True break xlabs_changed[i] = changed for i in range(len(xlabs)): tmp = [] for j in range(len(xlabs_arrs[i])): if xlabs_changed[j]: tmp.append(xlabs_arrs[i][j].replace('_', ' ', 1)) xlabs[i] = '\n'.join(tmp) print(leg_names) print(file_names) # # pass the data files and auxilary info to plot function # if out_name != '': oprefix = out_name + '_' + test_id_pfx + '_' + metric + '_' + ptype else: oprefix = test_id_pfx + '_' + metric + '_' + ptype title = oprefix plot_cmpexp(title, file_names, xlabs, ylab, yindex, yscaler, 'pdf', oprefix, pdf_dir, sep, aggr, diff, omit_const, ptype, ymin, ymax, leg_names, stime, etime, plot_params, plot_script) # done puts('\n[MAIN] COMPLETED analyse_cmpexp %s \n' % test_id_pfx)
def get_clock_offsets(exp_list='experiments_completed.txt', test_id='', pkt_filter='', baseline_host='', out_dir=''): "Get clock offsets for all hosts" if len(out_dir) > 0 and out_dir[-1] != '/': out_dir += '/' if test_id == '': try: with open(exp_list) as f: test_id_arr = f.readlines() except IOError: abort('Cannot open file %s' % exp_list) else: test_id_arr = test_id.split(';') if len(test_id_arr) == 0 or test_id_arr[0] == '': abort('Must specify test_id parameter') # specify complete tcpdump parameter list tcpdump_filter = '-tt -r - -n ' + pkt_filter for test_id in test_id_arr: test_id = test_id.rstrip() # first find tcpdump files tcpdump_files = get_testid_file_list('', test_id, '_ctl.dmp.gz', '') if len(tcpdump_files) == 0: warn('No tcpdump files for control interface for %s' % test_id) continue # if we have tcpdumps for control interface we can assume broadcast ping # was enabled dir_name = os.path.dirname(tcpdump_files[0]) # then look for tpconf_vars.log.gz file in that directory var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name, capture=True) bc_addr = '' router_name = '' if len(var_file) > 0: # new approach without using config.py # XXX no caching here yet, assume we only generate clockoffset file once # per experiment # unzip archived file local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE)) # load the TPCONF_variables into oldconfig oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE) # remove temporary unzipped file try: os.remove(TMP_CONF_FILE) os.remove(TMP_CONF_FILE + 'c') # remove the compiled file as well except OSError: pass try: bc_addr = oldconfig.TPCONF_bc_ping_address except AttributeError: pass router_name = oldconfig.TPCONF_router[0].split(':')[0] else: # old approach using config.py try: bc_addr = config.TPCONF_bc_ping_address except AttributeError: pass router_name = config.TPCONF_router[0].split(':')[0] if bc_addr == '': # assume default multicast address bc_addr = '224.0.1.199' # specify complete tcpdump parameter list if pkt_filter != '': tcpdump_filter = '-tt -r - -n ' + pkt_filter else: tcpdump_filter = '-tt -r - -n ' + 'icmp and dst host ' + bc_addr if baseline_host == '': baseline_host = router_name # # now read timestamps from each host's tcpdump # # map of host names (or IPs) and sequence numbers to timestamps host_times = {} for tcpdump_file in tcpdump_files: host = local( 'echo %s | sed "s/.*_\([a-z0-9\.]*\)_ctl.dmp.gz/\\1/"' % tcpdump_file, capture=True) host_times[host] = {} #print(host) #print(host_times) # We pipe gzcat through to tcpdump. Note, since tcpdump exits early # (due to "-c num_samples") gzcat's pipe will collapse and gzcat # will complain bitterly. So we dump its stderr to stderrhack. init_zcat = Popen(['zcat ' + tcpdump_file], stdin=None, stdout=PIPE, stderr=stderrhack, shell=True) init_tcpdump = Popen(['tcpdump ' + tcpdump_filter], stdin=init_zcat.stdout, stdout=PIPE, stderr=stderrhack, shell=True) for line in init_tcpdump.stdout.read().splitlines(): _time = line.split(" ")[0] _seq = int(line.split(" ")[11].replace(',', '')) host_times[host][_seq] = _time #print(host_times) # get time differences and get host list diffs = {} ref_times = {} host_str = '' host_list = sorted(host_times.keys()) # getting hosts from the config is problematic if different # experiments with different configs in same directory #host_list = sorted(config.TPCONF_router + config.TPCONF_hosts) for host in host_list: host_str += ' ' + host if host not in host_times: continue for seq in sorted(host_times[host].keys()): if seq not in diffs: diffs[seq] = {} if baseline_host in host_times and \ seq in host_times[baseline_host]: diffs[seq][host] = float(host_times[host][seq]) - \ float(host_times[baseline_host][seq]) ref_times[seq] = host_times[baseline_host][seq] else: # this only happens: # - if some other host has recorded more pings, OK we don't care # as this is only at the end after an experiment was finished # - in old versions of TEACUP if TPCONF_router was modified #warn('Cant find baseline host %s timestamp data number %i' % # (baseline_host, str(seq))) diffs[seq][host] = None ref_times[seq] = None #print(diffs) if out_dir == '' or out_dir[0] != '/': dir_name = os.path.dirname(tcpdump_files[0]) out_dir = dir_name + '/' + out_dir mkdir_p(out_dir) out_name = out_dir + test_id + CLOCK_OFFSET_FILE_EXT # write table of offsets (rows = time, cols = hosts) f = open(out_name, 'w') f.write('# ref_time' + host_str + '\n') for seq in sorted(diffs.keys()): if ref_times[seq] is not None: f.write(ref_times[seq]) else: # this case should not never happen continue f.write(' ') for host in host_list: if host in diffs[seq] and diffs[seq][host] is not None: f.write('{0:.6f}'.format(diffs[seq][host])) else: f.write('NA') if host != host_list[-1]: f.write(' ') f.write('\n') f.close()
def get_address_pair_analysis(test_id, host, do_abort='1'): global host_internal_ip_cache global host_list_cache internal = '' external = '' TMP_CONF_FILE = '___oldconfig.py' # XXX the whole old config access should be moved into separate module as # similar code is also in clockoffset # prior to TEACUP version 0.9 it was required to run the analysis with a config # file that had config.TPCONF_host_internal_ip as it was used to run the experiment # (or a superset of it). Since version 0.9 we use config.TPCONF_host_internal_ip # (as well as config.TPCONF_hosts and config.TPCONF_router) from the file # <test_id_prefix>_tpconf_vars.log.gz in the test experiment directory. if test_id not in host_internal_ip_cache: # first find the directory but looking for mandatory uname file uname_file = get_testid_file_list('', test_id, 'uname.log.gz', '') dir_name = os.path.dirname(uname_file[0]) if dir_name in host_internal_ip_cache: # create test id cache entry from directory entry host_internal_ip_cache[test_id] = host_internal_ip_cache[dir_name] if host_internal_ip_cache[test_id] != None: host_list_cache[test_id] = host_list_cache[dir_name] else: # try to find old config information # look for tpconf_vars.log.gz file in that directory var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name, capture=True) if len(var_file) > 0: # new approach without using config.py # unzip archived file local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE)) # load the TPCONF_variables into oldconfig oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE) # remove temporary unzipped file try: os.remove(TMP_CONF_FILE) os.remove(TMP_CONF_FILE + 'c') # remove the compiled file as well except OSError: pass # store data in cache (both under test id and directory name) host_internal_ip_cache[test_id] = oldconfig.TPCONF_host_internal_ip host_list_cache[test_id] = oldconfig.TPCONF_hosts + oldconfig.TPCONF_router host_internal_ip_cache[dir_name] = oldconfig.TPCONF_host_internal_ip host_list_cache[dir_name] = oldconfig.TPCONF_hosts + oldconfig.TPCONF_router else: # old approach using the functions in hostint.py that access config.py # store empty value in cache (both under test id and directory name) host_internal_ip_cache[test_id] = None host_internal_ip_cache[dir_name] = None if host_internal_ip_cache[test_id] != None: # new approach # pretend it is an external name and perform lookup internal = host_internal_ip_cache[test_id].get(host, []) if len(internal) == 0: # host is internal name, so need to find external name internal = host for e, i in host_internal_ip_cache[test_id].items(): if i[0] == host: external = e else: # host is external name internal = internal[0] external = host hosts = host_list_cache[test_id] else: # old approach (external, internal) = get_address_pair(host, do_abort) hosts = get_part_hosts(test_id) if external not in hosts: return ('', '') else: return (external, internal)
def _extract_owd_pktloss(test_id='', out_dir='', replot_only='0', source_filter='', ts_correct='1', burst_sep='0.0', sburst='1', eburst='0', seek_window='16000', log_loss='0', anchor_map='', owd_midpoint='0'): "Extract OWD or PKTLOSS of flows" ifile_ext = '.dmp.gz' if log_loss == '0': ofile_ext = '.owds2.gz' else: ofile_ext = '.loss2.gz' already_done = {} out_files = {} out_groups = {} test_id_arr = test_id.split(';') if len(test_id_arr) == 0 or test_id_arr[0] == '': abort('Must specify test_id parameter') if ts_correct == '0' and log_loss == '0': abort('Must use ts_correct=1 when calculating OWD') # Initialise source filter data structure sfil = SourceFilter(source_filter) # EXPERIMENTAL: anchor_map="<srcip1>:<dstip1>;<srcip2>:<dstip2>;..." # Normally a packet's OWD is logged as having occurred at the time the # packet is seen in pcap file associated with <srcip> (src_extname) # An 'anchor_map' entry allows you to specify that a packet's OWD be # logged as having occurred at the time the packet from <srcip> was # seen in the pcap file associated with <dstip> (dst_extname) # This is only operates on packets between <srcip>:<dstip>, other # flows in the same testID are unaffected. anchor_map_list = {} if anchor_map != '': if replot_only == '1': abort("Must specify replot_only=0 in conjunction with anchor_map") entries = anchor_map.split(';') for entry in entries: k, v = entry.split(':') anchor_map_list[k] = v group = 1 for test_id in test_id_arr: # first process tcpdump files (ignore router and ctl interface tcpdumps) tcpdump_files = get_testid_file_list('', test_id, ifile_ext, 'grep -v "router.dmp.gz" | grep -v "ctl.dmp.gz"') for tcpdump_file in tcpdump_files: # get input directory name and create result directory if necessary out_dirname = get_out_dir(tcpdump_file, out_dir) dir_name = os.path.dirname(tcpdump_file) # get unique flows flows = lookup_flow_cache(tcpdump_file) if flows == None: # If not previously found in flow_cache, # extract and identify the tcp and udp flows contained in tcpdump_file flows = _list(local('zcat %s | tcpdump -nr - "tcp" | ' 'awk \'{ if ( $2 == "IP" ) { print $3 " " $5 " tcp" } }\' | ' 'sed "s/://" | ' 'sed "s/\.\([0-9]*\) /,\\1 /g" | sed "s/ /,/g" | ' 'LC_ALL=C sort -u' % tcpdump_file, capture=True)) flows += _list(local('zcat %s | tcpdump -nr - "udp" | ' 'awk \'{ if ( $2 == "IP" ) { print $3 " " $5 " udp" } }\' | ' 'sed "s/://" | ' 'sed "s/\.\([0-9]*\) /,\\1 /g" | sed "s/ /,/g" | ' 'LC_ALL=C sort -u' % tcpdump_file, capture=True)) # Add them to the flow cache append_flow_cache(tcpdump_file, flows) # Walk through and process the flows identified in this current tcpdump_file for flow in flows: # First extract src & dst as IP addr on experiment networks src, src_port, dst, dst_port, proto = flow.split(',') # Map src & dst (IP addr) to testbed-specific external (control network) # host names from config.py ({src,dst}_extname) and internal (experiment network) # hostnames/addresses from config.py ({src,dst}_internal) src_extname, src_internal = get_address_pair_analysis(test_id, src, do_abort='0') dst_extname, dst_internal = get_address_pair_analysis(test_id, dst, do_abort='0') # Skip cases of random (broadcast) traffic involving an IP address for # which no experimental network NIC (and hence control network hostname) # is directly related if src_extname == '' or dst_extname == '': continue # flow name name = src_internal + '_' + src_port + '_' + dst_internal + '_' + dst_port # test id plus flow name if len(test_id_arr) > 1: long_name = test_id + '_' + name else: long_name = name if long_name not in already_done: # Construct filenames for files containing final <time> <owd|loss> pairs out_final = out_dirname + test_id + '_' + name + ofile_ext # Only embark on actual filtering/extraction if we're asked to regenerate # the intermediate OWD values, or for some reason the intermediate OWD # file is missing... if replot_only == '0' or not os.path.isfile(out_final): # Per flow/host: # Create intermediate file of timestamps + uniqString from pcap files, # THEN call adjust_timestamps to construct a version with timestamps adjusted # relative to a single reference host in the testbed # THEN use the adjusted timestamps in the subsequent owd/loss calculations. # To extract packets in in FORWARD direction from both src and dst pcap files, # construct dpkt flow filter in form <src_ip>:<src_port>:<dst_ip>:<dst_port> # (and more specifically, from src_internal:src_port to dst_internal:dst_port). filter_dpkt = src_internal + ':' + src_port + ':' + dst_internal + ':' + dst_port src_port_int = int(src_port) dst_port_int = int(dst_port) # Loop across the src and dst dmp files tmp_fwd_out_adj = {} for dmpfile_host, dirsuffix in ([src_extname,"src"],[dst_extname,"dst"]): # Construct the file name of the dump file that contains this # flow's packets. 'src' captured at (near) the flow's source, and 'dst' # captured at (near) the flow's destination. dmp_file = dir_name + '/' + test_id + '_' + dmpfile_host + ifile_ext print "Extracting packets for " + name + " from:" + dmp_file # Construct filename for intermediate "<time> <uniqueString>" output files # whose timestamps will be adjusted by adjust_timestamps() # before being used for owd calculations # (NOTE: Due to adjust_timestamps making assumptions about out_dir parameter, # we currently can't place these tmp files under /tmp) tmp_fwd_out = tempfile.mktemp(suffix=test_id + '_' + name +'_fwd_out_' + dirsuffix+".gz", dir=out_dirname) # Extract packet id info if dmp_file.endswith('.gz'): f_dmp_file = gzip.open(dmp_file) else: f_dmp_file = open(dmp_file) pcap_reader = dpkt.pcap.Reader(f_dmp_file) pcap_reader.setfilter(filter_dpkt) #pcap_reader.setfilter('') # Create a compressed temporary intermediate file f_tmp_fwd_out = gzip.open(tmp_fwd_out,'wb',1) # Walk across every packet in this pcap file for ts, pkt in pcap_reader: # get pointer to ethernet layer and check that we have IP eth = dpkt.ethernet.Ethernet(pkt) if eth.type != dpkt.ethernet.ETH_TYPE_IP: continue # get pointer to IP layer ip_pkt = eth.data # ignore if src or dst IP not the ones specified in filter if socket.inet_ntoa(ip_pkt.src) != src_internal or \ socket.inet_ntoa(ip_pkt.dst) != dst_internal: continue # ignore if UDP/TCP src or dst ports not the ones specified in filter # get pointer to payload if type(ip_pkt.data) == dpkt.udp.UDP: udp_frame = ip_pkt.data if udp_frame.sport != src_port_int or udp_frame.dport != dst_port_int: continue # Add IP ID field to the payload to ensure # at least something semi-unique is hashed # if UDP payload is invariant payload = str(ip_pkt.id) +udp_frame.data elif type(ip_pkt.data) == dpkt.tcp.TCP: tcp_frame = ip_pkt.data if tcp_frame.sport != src_port_int or tcp_frame.dport != dst_port_int: continue # Use IP ID field, TCP Sequence number and ACK number to # construct a mostly unique string within context of this flow payload = str(ip_pkt.id) + str(tcp_frame.seq) + str(tcp_frame.ack) else: continue # Write <timestamp> <crc32 hash of uniqueString bytes> # (hashing here eliminates any later problems parsing payloads # containing null bytes) f_tmp_fwd_out.write("%f %s\n" % (ts,zlib.crc32(payload))) f_tmp_fwd_out.close() f_dmp_file.close() # Apply timestamp corrections to the data thus extracted, prior to # calculating OWDs. Correction is MANDATORY otherwise the # 'calculated' OWDs are essentially useless. tmp_fwd_out_adj[dirsuffix] = adjust_timestamps(test_id, tmp_fwd_out, dmpfile_host, ' ', out_dir) # Remove pre-adjustment files. os.remove(tmp_fwd_out) # Now we have unique packet hashes seen at both src and dst locations, # and timestamps have been adjusted for clockoffsets. # Begin calculating OWD or identifying when packet losses occurred # Read into memory the <adjusted_timestamp> <uniqString> datasets captured # at dst (2nd place packet seen, "destination"). The src is (1st place packet # seen, "source") dst_data_time=list() dst_data_uniqString=list() for line in gzip.open(tmp_fwd_out_adj["dst"]).read().splitlines(): sline = line.split(" ") dst_data_time.append(float(sline[0])) dst_data_uniqString.append(sline[1]) # Walk through tmp_fwd_out_adj["src"] looking for matches to packets # in dst_data_uniqString, and write <time> <owd|loss> pairs in plain # ASCII to out_final # To limit potential duplicate matches to packets received forward # in time from previous match in dst_data_uniqString, maintain # index next_j pointing to next row in dst_data_uniqString to start # matching next packet from tmp_fwd_out_adj["src"] next_j = 0 last_j = len(dst_data_uniqString)-1 # As a speed-up hack, assume match in dst_data_uniqString is # within sk_window entries of next_j (saves searching all the # way to the end of dst_data_uniqString when seeking a lost packet) # Keeping seek_window in the low 1000s also minimises chances of # duplicate matches. if seek_window != '': sk_window = int(seek_window) else: sk_window = last_j # Create gzipped output file (rough experiments showed over reduction # in on-disk file size easily 100s of K down to 10s of K). # R automagically reads gzipped data files, so no changes required # to subsequent analyse_* plotting scripts. f = gzip.open(out_final, 'w') cumulative_loss = 0 # Decide whether to use timestamp at src or dst for OWD # (Default to src, unless anchor_map indicates using dst for # this particular traffic pattern) anchor = 0 # Default print timestamp at src if log_loss == '0': # Only relevant for OWD calculations if src_internal in anchor_map_list.keys(): #print "*** Found " + src_internal + " in anchor_map" if anchor_map_list[src_internal] == dst_internal: # Only relevant if the map relates to both src_internal and dst_internal #print "*** " + src_internal + " points to " + anchor_map_list[src_internal] + " in anchor_map" anchor = 1 # Print timestamp at dst for line in gzip.open(tmp_fwd_out_adj["src"]).read().splitlines(): i = line.split(" ") try: # The following search will raise a 'ValueError' exception if i[1] does not occur in dst_data_uniqString[next_j:] j = dst_data_uniqString[next_j:min((next_j+sk_window),last_j+1)].index(i[1]) if log_loss == '0': # OWD is diff between i[0] and dst_data_time[next_j+j] ts = float(i[0]) owd = dst_data_time[next_j+j]-float(i[0]) # If required, print event as occuring at dst timestamp rather than src timestamp if anchor: ts = dst_data_time[next_j+j] # If we want to imply the OWD "existed" at some mid-point # between pkt seen at src and seen at dst if owd_midpoint == '1': ts += owd/2 f.write('%f %f\n' % (ts, owd)) if log_loss == '1': # No lost packet, emit "0" f.write('%s 0\n' % (i[0])) if log_loss == '2': # No lost packet, emit previous cumulative count f.write('%s %i\n' % (i[0], cumulative_loss)) next_j = min(next_j+j+1,last_j) except ValueError: # No match means a packet loss if log_loss == '1': # Single loss event, emit "1" f.write('%s 1\n' % (i[0])) if log_loss == '2': # Single loss event, increment cumulative count, emit cumulative count cumulative_loss += 1 f.write('%s %i\n' % (i[0], cumulative_loss)) pass f.close() dst_data_time=[] dst_data_uniqString=[] # Clean up temporary post-adjustment files os.remove(tmp_fwd_out_adj["src"]) os.remove(tmp_fwd_out_adj["dst"]) already_done[long_name] = 1 if sfil.is_in(name): (out_files, out_groups) = select_bursts(long_name, group, out_final, burst_sep, sburst, eburst, out_files, out_groups) group += 1 return (test_id_arr, out_files, out_groups)
def get_clock_offsets(exp_list='experiments_completed.txt', test_id='', pkt_filter='', baseline_host='', out_dir=''): "Get clock offsets for all hosts" if len(out_dir) > 0 and out_dir[-1] != '/': out_dir += '/' if test_id == '': try: with open(exp_list) as f: test_id_arr = f.readlines() except IOError: abort('Cannot open file %s' % exp_list) else: test_id_arr = test_id.split(';') if len(test_id_arr) == 0 or test_id_arr[0] == '': abort('Must specify test_id parameter') # specify complete tcpdump parameter list tcpdump_filter = '-tt -r - -n ' + pkt_filter for test_id in test_id_arr: test_id = test_id.rstrip() # first find tcpdump files tcpdump_files = get_testid_file_list('', test_id, '_ctl.dmp.gz', '') if len(tcpdump_files) == 0: warn('No tcpdump files for control interface for %s' % test_id) continue # if we have tcpdumps for control interface we can assume broadcast ping # was enabled dir_name = os.path.dirname(tcpdump_files[0]) # then look for tpconf_vars.log.gz file in that directory var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name, capture=True) bc_addr = '' router = '' if len(var_file) > 0: # new approach without using config.py # XXX no caching here yet, assume we only generate clockoffset file once # per experiment # unzip archived file local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE)) # load the TPCONF_variables into oldconfig oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE) # remove temporary unzipped file try: os.remove(TMP_CONF_FILE) os.remove(TMP_CONF_FILE + 'c') # remove the compiled file as well except OSError: pass try: bc_addr = oldconfig.TPCONF_bc_ping_address except AttributeError: pass router_name = oldconfig.TPCONF_router[0].split(':')[0] else: # old approach using config.py try: bc_addr = config.TPCONF_bc_ping_address except AttributeError: pass router_name = config.TPCONF_router[0].split(':')[0] if bc_addr == '': # assume default multicast address bc_addr = '224.0.1.199' # specify complete tcpdump parameter list if pkt_filter != '': tcpdump_filter = '-tt -r - -n ' + pkt_filter else: tcpdump_filter = '-tt -r - -n ' + 'icmp and dst host ' + bc_addr if baseline_host == '': baseline_host = router_name # # now read timestamps from each host's tcpdump # # map of host names (or IPs) and sequence numbers to timestamps host_times = {} for tcpdump_file in tcpdump_files: host = local( 'echo %s | sed "s/.*_\([a-z0-9\.]*\)_ctl.dmp.gz/\\1/"' % tcpdump_file, capture=True) host_times[host] = {} #print(host) #print(host_times) # We pipe gzcat through to tcpdump. Note, since tcpdump exits early # (due to "-c num_samples") gzcat's pipe will collapse and gzcat # will complain bitterly. So we dump its stderr to stderrhack. init_zcat = Popen(['zcat ' + tcpdump_file], stdin=None, stdout=PIPE, stderr=stderrhack, shell=True) init_tcpdump = Popen(['tcpdump ' + tcpdump_filter], stdin=init_zcat.stdout, stdout=PIPE, stderr=stderrhack, shell=True) for line in init_tcpdump.stdout.read().splitlines(): _time = line.split(" ")[0] _seq = int(line.split(" ")[11].replace(',', '')) host_times[host][_seq] = _time #print(host_times) # get time differences and get host list diffs = {} ref_times = {} host_str = '' host_list = sorted(host_times.keys()) # getting hosts from the config is problematic if different # experiments with different configs in same directory #host_list = sorted(config.TPCONF_router + config.TPCONF_hosts) for host in host_list: host_str += ' ' + host if host not in host_times: continue for seq in sorted(host_times[host].keys()): if seq not in diffs: diffs[seq] = {} if baseline_host in host_times and seq in host_times[ baseline_host]: diffs[seq][host] = float(host_times[host][seq]) - \ float(host_times[baseline_host][seq]) ref_times[seq] = host_times[baseline_host][seq] else: # this should only happen if TPCONF_router was # modified warn('Cant find baseline host %s timestamp data' % baseline_host) diffs[seq][host] = None ref_times[seq] = None #print(diffs) if out_dir == '' or out_dir[0] != '/': dir_name = os.path.dirname(tcpdump_files[0]) out_dir = dir_name + '/' + out_dir mkdir_p(out_dir) out_name = out_dir + test_id + CLOCK_OFFSET_FILE_EXT # write table of offsets (rows = time, cols = hosts) f = open(out_name, 'w') f.write('# ref_time' + host_str + '\n') for seq in sorted(diffs.keys()): if ref_times[seq] is not None: f.write(ref_times[seq]) else: # this case should not never happen continue f.write(' ') for host in host_list: if host in diffs[seq] and diffs[seq][host] is not None: f.write('{0:.6f}'.format(diffs[seq][host])) else: f.write('NA') if host != host_list[-1]: f.write(' ') f.write('\n') f.close()