Пример #1
0
def get_first_experiment_path(experiments):
    # get path based on first experiment id
    dir_name = ''
    files = get_testid_file_list('', experiments[0], '', 'LC_ALL=C sort')
    if len(files) > 0:
        dir_name = os.path.dirname(files[0])
    else:
        abort('Cannot find experiment %s\n'
              'Remove outdated teacup_dir_cache.txt if files were moved.' %
              experiments[0])

    return dir_name
Пример #2
0
def get_part_hosts(test_id):
    global part_hosts

    if test_id not in part_hosts:

        part_hosts[test_id] = []

        # first process tcpdump files (ignore router and ctl interface tcpdumps)
        uname_files = get_testid_file_list('', test_id, 'uname.log.gz', '')

        for f in uname_files:
            res = re.search('.*_(.*)_uname.log.gz', f)
            if res:
                part_hosts[test_id].append(res.group(1))

    return part_hosts[test_id]
Пример #3
0
def analyse_2d_density(
        exp_list='experiments_completed.txt',
        res_dir='',
        out_dir='',
        source_filter='',
        min_values='3',
        xmetric='throughput',
        ymetric='tcprtt',
        variables='',
        out_name='',
        xmin='0',
        xmax='0',
        ymin='0',
        ymax='0',
        lnames='',
        group_by='aqm',
        replot_only='0',
        pdf_dir='',
        stime='0.0',
        etime='0.0',
        ts_correct='1',
        smoothed='1',
        link_len='0',
        plot_params='',
        plot_script='',
        xstat_index='',
        ystat_index='',
        dupacks='0',
        cum_ackseq='1',
        merge_data='0',
        #sburst='1', eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_',
        sburst='1',
        eburst='0',
        test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_',
        slowest_only='0',
        query_host=''):
    "2d density / ellipse plot for different experiments"

    test_id_pfx = ''

    check = get_metric_params(xmetric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified with xmetric' % xmetric)
    check = get_metric_params(ymetric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified with ymetric' % ymetric)

    #if source_filter == '':
    #    abort('Must specify at least one source filter')

    if len(source_filter.split(';')) > 12:
        abort('Cannot have more than 12 filters')

    # XXX more param checking

    # make sure res_dir has valid form (out_dir is handled by extract methods)
    res_dir = valid_dir(res_dir)

    # Initialise source filter data structure
    sfil = SourceFilter(source_filter)

    # read test ids
    experiments = read_experiment_ids(exp_list)

    # get path based on first experiment id
    dir_name = get_first_experiment_path(experiments)

    # if we haven' got the extracted data run extract method(s) first
    if res_dir == '':
        for experiment in experiments:

            (ex_function,
             kwargs) = get_extract_function(xmetric,
                                            link_len,
                                            xstat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

            (ex_function,
             kwargs) = get_extract_function(ymetric,
                                            link_len,
                                            ystat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

        if out_dir == '' or out_dir[0] != '/':
            res_dir = dir_name + '/' + out_dir
        else:
            res_dir = out_dir

    else:
        if res_dir[0] != '/':
            res_dir = dir_name + '/' + res_dir

    # make sure we have trailing slash
    res_dir = valid_dir(res_dir)

    if pdf_dir == '':
        pdf_dir = res_dir
    else:
        if pdf_dir[0] != '/':
            pdf_dir = dir_name + '/' + pdf_dir
        pdf_dir = valid_dir(pdf_dir)
        # if pdf_dir specified create if it doesn't exist
        mkdir_p(pdf_dir)

    #
    # build match string from variables
    #

    (match_str, match_str2) = build_match_strings(experiments[0], variables,
                                                  test_id_prefix)

    #
    # filter out the experiments to plot, generate x-axis labels, get test id prefix
    #

    (fil_experiments, test_id_pfx,
     dummy) = filter_experiments(experiments, match_str, match_str2)

    #
    # get groups based on group_by variable
    #

    group_idx = 1
    levels = {}
    groups = []
    leg_names = []
    _experiments = []
    for experiment in fil_experiments:
        level = ''
        add_exp = True
        for g in group_by.split(';'):
            p = experiment.find(g)
            if p > -1:
                s = experiment.find('_', p)
                s += 1
                e = experiment.find('_', s)
                level += g + ':' + experiment[s:e] + ' '
            else:
                add_exp = False
                break

        # remove the final space from the string
        level = level[:-1]

        if add_exp == True:
            _experiments.append(experiment)
            #print('level: ' + level)

            if level not in levels:
                levels[level] = group_idx
                group_idx += 1
                leg_names.append(level)

            if merge_data == '1':
                groups.append(levels[level])
            else:
                for i in range(len(source_filter.split(';'))):
                    groups.append(levels[level])

    fil_experiments = _experiments

    #
    # get metric parameters and list of data files
    #

    # get the metric parameter for both x and y
    x_axis_params = get_metric_params(xmetric, smoothed, ts_correct,
                                      xstat_index, dupacks, cum_ackseq,
                                      slowest_only)
    y_axis_params = get_metric_params(ymetric, smoothed, ts_correct,
                                      ystat_index, dupacks, cum_ackseq,
                                      slowest_only)

    x_ext = x_axis_params[0]
    y_ext = y_axis_params[0]

    # if we merge responders make sure we only use the merged files
    if merge_data == '1':
        # reset source filter so we match the merged file
        sfil.clear()
        sfil = SourceFilter('S_0.0.0.0_0')

    x_files = []
    y_files = []
    for experiment in fil_experiments:
        _x_files = []
        _y_files = []
        _x_ext = x_ext
        _y_ext = y_ext

        _files = get_testid_file_list('', experiment, _x_ext, 'LC_ALL=C sort',
                                      res_dir)
        if merge_data == '1':
            _x_ext += '.all'
            _files = merge_data_files(_files)
        _x_files += _files

        _files = get_testid_file_list('', experiment, _y_ext, 'LC_ALL=C sort',
                                      res_dir)
        if merge_data == '1':
            _y_ext += '.all'
            _files = merge_data_files(_files)
        _y_files += _files

        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _x_ext
        for f in _x_files:
            #print(f)
            res = re.search(match_str, f)
            #print(res.group(1))
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    x_files.append(f)

        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _y_ext
        for f in _y_files:
            # print(f)
            res = re.search(match_str, f)
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    y_files.append(f)

    yindexes = [str(x_axis_params[2]), str(y_axis_params[2])]
    yscalers = [str(x_axis_params[3]), str(y_axis_params[3])]
    aggr_flags = [x_axis_params[5], y_axis_params[5]]
    diff_flags = [x_axis_params[6], y_axis_params[6]]

    if lnames != '':
        lnames_arr = lnames.split(';')
        if len(lnames_arr) != len(leg_names):
            abort(
                'Number of legend names must be qual to the number of source filters'
            )
        leg_names = lnames_arr

    print(x_files)
    print(y_files)
    print(groups)
    print(leg_names)

    #
    # pass the data files and auxilary info to plot function
    #

    if out_name != '':
        oprefix = out_name + '_' + test_id_pfx + '_' + xmetric + '_' + ymetric
    else:
        oprefix = test_id_pfx + '_' + xmetric + '_' + ymetric
    title = oprefix

    plot_2d_density(title, x_files, y_files, x_axis_params[1],
                    y_axis_params[1], yindexes, yscalers, 'pdf', oprefix,
                    pdf_dir, x_axis_params[4], y_axis_params[4], aggr_flags,
                    diff_flags, xmin, xmax, ymin, ymax, stime, etime, groups,
                    leg_names, plot_params, plot_script)

    # done
    puts('\n[MAIN] COMPLETED analyse_2d_density %s \n' % test_id_pfx)
Пример #4
0
def analyse_cmpexp(
        exp_list='experiments_completed.txt',
        res_dir='',
        out_dir='',
        source_filter='',
        min_values='3',
        omit_const='0',
        metric='throughput',
        ptype='box',
        variables='',
        out_name='',
        ymin='0',
        ymax='0',
        lnames='',
        group_by_prefix='0',
        omit_const_xlab_vars='0',
        replot_only='0',
        pdf_dir='',
        stime='0.0',
        etime='0.0',
        ts_correct='1',
        smoothed='1',
        link_len='0',
        plot_params='',
        plot_script='',
        stat_index='',
        dupacks='0',
        cum_ackseq='1',
        merge_data='0',
        sburst='1',
        #eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_',
        eburst='0',
        test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_',
        slowest_only='0',
        res_time_mode='0',
        query_host=''):
    "Compare metrics for different experiments"

    if ptype != 'box' and ptype != 'mean' and ptype != 'median':
        abort('ptype must be either box, mean or median')

    check = get_metric_params(metric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified' % metric)

    if source_filter == '':
        abort('Must specify at least one source filter')

    if len(source_filter.split(';')) > 12:
        abort('Cannot have more than 12 filters')

    # prevent wrong use of res_time_mode
    if metric != 'restime' and res_time_mode != '0':
        res_time_mode = '0'
    if ptype == 'box' and res_time_mode == '2':
        res_time_mode = '0'

    # XXX more param checking

    # Initialise source filter data structure
    sfil = SourceFilter(source_filter)

    # read test ids
    experiments = read_experiment_ids(exp_list)

    # get path based on first experiment id
    dir_name = get_first_experiment_path(experiments)

    # if we haven' got the extracted data run extract method(s) first
    if res_dir == '':
        for experiment in experiments:

            (ex_function,
             kwargs) = get_extract_function(metric,
                                            link_len,
                                            stat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

        if out_dir == '' or out_dir[0] != '/':
            res_dir = dir_name + '/' + out_dir
        else:
            res_dir = out_dir
    else:
        if res_dir[0] != '/':
            res_dir = dir_name + '/' + res_dir

    # make sure we have trailing slash
    res_dir = valid_dir(res_dir)

    if pdf_dir == '':
        pdf_dir = res_dir
    else:
        if pdf_dir[0] != '/':
            pdf_dir = dir_name + '/' + pdf_dir
        pdf_dir = valid_dir(pdf_dir)
        # if pdf_dir specified create if it doesn't exist
        mkdir_p(pdf_dir)

    #
    # build match string from variables
    #

    (match_str, match_str2) = build_match_strings(experiments[0], variables,
                                                  test_id_prefix)

    #
    # filter out the experiments to plot, generate x-axis labels, get test id prefix
    #

    (fil_experiments, test_id_pfx,
     xlabs) = filter_experiments(experiments, match_str, match_str2)

    #
    # get out data files based on filtered experiment list and source_filter
    #

    (ext, ylab, yindex, yscaler, sep, aggr,
     diff) = get_metric_params(metric, smoothed, ts_correct, stat_index,
                               dupacks, cum_ackseq, slowest_only)

    if res_time_mode == '1':
        plot_params += ' NOMINAL_RES_TIME="1"'
    if res_time_mode == '2':
        if ptype == 'median':
            ylab = 'Median resp time / nominal resp time'
        elif ptype == 'mean':
            ylab = 'Mean resp time / nominal resp time'
        plot_params += ' RATIO_RES_TIME="1"'

    leg_names = source_filter.split(';')

    # if we merge responders make sure we only use the merged files
    if merge_data == '1':
        # set label to indicate merged data
        leg_names = ['Merged data']
        # reset source filter so we match the merged file
        sfil.clear()
        source_filter = 'S_0.0.0.0_0'
        sfil = SourceFilter(source_filter)

    file_names = []
    for experiment in fil_experiments:
        out_files = {}
        _ext = ext

        files = get_testid_file_list('', experiment, '%s' % _ext,
                                     'LC_ALL=C sort', res_dir)
        if merge_data == '1':
            # change extension
            _ext += '.all'
            files = merge_data_files(files)

        #print(files)
        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _ext
        for f in files:
            # print(f)
            res = re.search(match_str, f)
            #print(res.group(1))
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    out_files[res.group(1)] = f

        #print(out_files)
        #print(leg_names)
        if len(out_files) < len(leg_names):
            abort(
                'No data files for some of the source filters for experiment %s'
                % experiment)

        sorted_files = sort_by_flowkeys(out_files, source_filter)

        for name, file_name in sorted_files:
            file_names.append(file_name)

    if group_by_prefix == '1':
        # group by test prefix (and flow)

        # first, get all test id prefixes
        test_id_pfxs = {}
        for experiment in fil_experiments:
            res = re.search(match_str2, experiment)
            if res:
                test_id_pfxs[res.group(1)] = 1

        # second, sort files so that same parameter combinations for different
        # prefixes are together
        # if we have multiple prefixes, create legend entry for each
        # prefix+flow combination
        _file_names = [''] * len(file_names)
        _leg_names = []
        pfx_cnt = len(test_id_pfxs)
        i = 0
        j = -1
        last_pfx = ''
        for name in file_names:
            for p in test_id_pfxs:
                if name.find(p) > -1:
                    curr_pfx = p
                    break

            if curr_pfx != last_pfx:
                i = 0
                j += 1
                for l in leg_names:
                    _leg_names.append(curr_pfx + '-' + l)

            _file_names[i * pfx_cnt + j] = name

            i += 1
            last_pfx = curr_pfx

        file_names = _file_names
        leg_names = _leg_names

        # remove duplicates in the x-axis labels
        xlabs = list(set(xlabs))

    if lnames != '':
        lnames_arr = lnames.split(';')
        if len(lnames_arr) != len(leg_names):
            abort(
                'Number of legend names must be qual to the number of source filters'
            )
        leg_names = lnames_arr

    # filter out unchanged variables in the x labels (need at least 2 labels)
    if omit_const_xlab_vars == '1' and len(xlabs) > 1:

        xlabs_arrs = {}
        xlabs_changed = {}

        for i in range(len(xlabs)):
            xlabs_arrs[i] = xlabs[i].split('\n')

        for i in range(len(xlabs_arrs[0])):
            changed = False
            xlab_var = xlabs_arrs[0][i]
            for j in range(1, len(xlabs)):
                if xlabs_arrs[j][i] != xlab_var:
                    changed = True
                    break

            xlabs_changed[i] = changed

        for i in range(len(xlabs)):
            tmp = []
            for j in range(len(xlabs_arrs[i])):
                if xlabs_changed[j]:
                    tmp.append(xlabs_arrs[i][j].replace('_', ' ', 1))

            xlabs[i] = '\n'.join(tmp)

    print(leg_names)
    print(file_names)

    #
    # pass the data files and auxilary info to plot function
    #

    if out_name != '':
        oprefix = out_name + '_' + test_id_pfx + '_' + metric + '_' + ptype
    else:
        oprefix = test_id_pfx + '_' + metric + '_' + ptype
    title = oprefix

    plot_cmpexp(title, file_names, xlabs, ylab, yindex, yscaler, 'pdf',
                oprefix, pdf_dir, sep, aggr, diff, omit_const, ptype, ymin,
                ymax, leg_names, stime, etime, plot_params, plot_script)

    # done
    puts('\n[MAIN] COMPLETED analyse_cmpexp %s \n' % test_id_pfx)
Пример #5
0
def get_clock_offsets(exp_list='experiments_completed.txt',
                      test_id='',
                      pkt_filter='',
                      baseline_host='',
                      out_dir=''):
    "Get clock offsets for all hosts"

    if len(out_dir) > 0 and out_dir[-1] != '/':
        out_dir += '/'

    if test_id == '':
        try:
            with open(exp_list) as f:
                test_id_arr = f.readlines()
        except IOError:
            abort('Cannot open file %s' % exp_list)
    else:
        test_id_arr = test_id.split(';')

    if len(test_id_arr) == 0 or test_id_arr[0] == '':
        abort('Must specify test_id parameter')

    # specify complete tcpdump parameter list
    tcpdump_filter = '-tt -r - -n ' + pkt_filter

    for test_id in test_id_arr:
        test_id = test_id.rstrip()

        # first find tcpdump files
        tcpdump_files = get_testid_file_list('', test_id, '_ctl.dmp.gz', '')

        if len(tcpdump_files) == 0:
            warn('No tcpdump files for control interface for %s' % test_id)
            continue

        # if we have tcpdumps for control interface we can assume broadcast ping
        # was enabled

        dir_name = os.path.dirname(tcpdump_files[0])
        # then look for tpconf_vars.log.gz file in that directory
        var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name,
                         capture=True)

        bc_addr = ''
        router_name = ''

        if len(var_file) > 0:
            # new approach without using config.py
            # XXX no caching here yet, assume we only generate clockoffset file once
            # per experiment

            # unzip archived file
            local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE))

            # load the TPCONF_variables into oldconfig
            oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE)

            # remove temporary unzipped file
            try:
                os.remove(TMP_CONF_FILE)
                os.remove(TMP_CONF_FILE +
                          'c')  # remove the compiled file as well
            except OSError:
                pass

            try:
                bc_addr = oldconfig.TPCONF_bc_ping_address
            except AttributeError:
                pass

            router_name = oldconfig.TPCONF_router[0].split(':')[0]

        else:
            # old approach using config.py

            try:
                bc_addr = config.TPCONF_bc_ping_address
            except AttributeError:
                pass

            router_name = config.TPCONF_router[0].split(':')[0]

        if bc_addr == '':
            # assume default multicast address
            bc_addr = '224.0.1.199'

        # specify complete tcpdump parameter list
        if pkt_filter != '':
            tcpdump_filter = '-tt -r - -n ' + pkt_filter
        else:
            tcpdump_filter = '-tt -r - -n ' + 'icmp and dst host ' + bc_addr

        if baseline_host == '':
            baseline_host = router_name

        #
        # now read timestamps from each host's tcpdump
        #

        # map of host names (or IPs) and sequence numbers to timestamps
        host_times = {}
        for tcpdump_file in tcpdump_files:
            host = local(
                'echo %s | sed "s/.*_\([a-z0-9\.]*\)_ctl.dmp.gz/\\1/"' %
                tcpdump_file,
                capture=True)
            host_times[host] = {}
            #print(host)
            #print(host_times)

            # We pipe gzcat through to tcpdump. Note, since tcpdump exits early
            # (due to "-c num_samples") gzcat's pipe will collapse and gzcat
            # will complain bitterly. So we dump its stderr to stderrhack.
            init_zcat = Popen(['zcat ' + tcpdump_file],
                              stdin=None,
                              stdout=PIPE,
                              stderr=stderrhack,
                              shell=True)
            init_tcpdump = Popen(['tcpdump ' + tcpdump_filter],
                                 stdin=init_zcat.stdout,
                                 stdout=PIPE,
                                 stderr=stderrhack,
                                 shell=True)

            for line in init_tcpdump.stdout.read().splitlines():
                _time = line.split(" ")[0]
                _seq = int(line.split(" ")[11].replace(',', ''))
                host_times[host][_seq] = _time

        #print(host_times)

        # get time differences and get host list
        diffs = {}
        ref_times = {}
        host_str = ''
        host_list = sorted(host_times.keys())
        # getting hosts from the config is problematic if different
        # experiments with different configs in same directory
        #host_list = sorted(config.TPCONF_router + config.TPCONF_hosts)

        for host in host_list:
            host_str += ' ' + host
            if host not in host_times:
                continue
            for seq in sorted(host_times[host].keys()):
                if seq not in diffs:
                    diffs[seq] = {}
                if baseline_host in host_times and \
                   seq in host_times[baseline_host]:
                    diffs[seq][host] = float(host_times[host][seq]) - \
                        float(host_times[baseline_host][seq])
                    ref_times[seq] = host_times[baseline_host][seq]
                else:
                    # this only happens:
                    # - if some other host has recorded more pings, OK we don't care
                    #   as this is only at the end after an experiment was finished
                    # - in old versions of TEACUP if TPCONF_router was modified
                    #warn('Cant find baseline host %s timestamp data number %i' %
                    # (baseline_host, str(seq)))
                    diffs[seq][host] = None
                    ref_times[seq] = None

        #print(diffs)

        if out_dir == '' or out_dir[0] != '/':
            dir_name = os.path.dirname(tcpdump_files[0])
            out_dir = dir_name + '/' + out_dir
        mkdir_p(out_dir)
        out_name = out_dir + test_id + CLOCK_OFFSET_FILE_EXT

        # write table of offsets (rows = time, cols = hosts)
        f = open(out_name, 'w')
        f.write('# ref_time' + host_str + '\n')
        for seq in sorted(diffs.keys()):
            if ref_times[seq] is not None:
                f.write(ref_times[seq])
            else:
                # this case should not never happen
                continue

            f.write(' ')

            for host in host_list:
                if host in diffs[seq] and diffs[seq][host] is not None:
                    f.write('{0:.6f}'.format(diffs[seq][host]))
                else:
                    f.write('NA')
                if host != host_list[-1]:
                    f.write(' ')
            f.write('\n')

        f.close()
Пример #6
0
def get_address_pair_analysis(test_id, host, do_abort='1'):
    global host_internal_ip_cache
    global host_list_cache
    internal = ''
    external = ''
    TMP_CONF_FILE = '___oldconfig.py'

    # XXX the whole old config access should be moved into separate module as 
    # similar code is also in clockoffset

    # prior to TEACUP version 0.9 it was required to run the analysis with a config
    # file that had config.TPCONF_host_internal_ip as it was used to run the experiment
    # (or a superset of it). Since version 0.9 we use config.TPCONF_host_internal_ip
    # (as well as config.TPCONF_hosts and config.TPCONF_router) from the file 
    # <test_id_prefix>_tpconf_vars.log.gz in the test experiment directory.

    if test_id not in host_internal_ip_cache:
        # first find the directory but looking for mandatory uname file
        uname_file = get_testid_file_list('', test_id,
                                          'uname.log.gz', '')
        dir_name = os.path.dirname(uname_file[0])

        if dir_name in host_internal_ip_cache:
            # create test id cache entry from directory entry 
            host_internal_ip_cache[test_id] = host_internal_ip_cache[dir_name]
            if host_internal_ip_cache[test_id] != None:
                host_list_cache[test_id] = host_list_cache[dir_name]
        else:
            # try to find old config information

            # look for tpconf_vars.log.gz file in that directory 
            var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name,
                             capture=True)

            if len(var_file) > 0:
                # new approach without using config.py

                # unzip archived file
                local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE))

                # load the TPCONF_variables into oldconfig
                oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE)

                # remove temporary unzipped file 
                try:
                    os.remove(TMP_CONF_FILE)
                    os.remove(TMP_CONF_FILE + 'c') # remove the compiled file as well
                except OSError:
                    pass

                # store data in cache (both under test id and directory name)
                host_internal_ip_cache[test_id] = oldconfig.TPCONF_host_internal_ip
                host_list_cache[test_id] = oldconfig.TPCONF_hosts + oldconfig.TPCONF_router
                host_internal_ip_cache[dir_name] = oldconfig.TPCONF_host_internal_ip
                host_list_cache[dir_name] = oldconfig.TPCONF_hosts + oldconfig.TPCONF_router
            else:
                # old approach using the functions in hostint.py that access config.py
                # store empty value in cache (both under test id and directory name)
                host_internal_ip_cache[test_id] = None
                host_internal_ip_cache[dir_name] = None

    if host_internal_ip_cache[test_id] != None:
        # new approach

        # pretend it is an external name and perform lookup
        internal = host_internal_ip_cache[test_id].get(host, [])
        if len(internal) == 0:
            # host is internal name, so need to find external name
            internal = host
            for e, i in host_internal_ip_cache[test_id].items():
                if i[0] == host:
                    external = e
        else:
            # host is external name
            internal = internal[0]
            external = host

        hosts = host_list_cache[test_id]

    else:
        # old approach

        (external, internal) = get_address_pair(host, do_abort)

        hosts = get_part_hosts(test_id)

    if external not in hosts:
        return ('', '')
    else:
        return (external, internal)
Пример #7
0
def _extract_owd_pktloss(test_id='', out_dir='', replot_only='0', source_filter='',
                ts_correct='1', burst_sep='0.0', sburst='1', eburst='0',
                seek_window='16000', log_loss='0', anchor_map='', owd_midpoint='0'):
    "Extract OWD or PKTLOSS of flows"

    ifile_ext = '.dmp.gz'
    
    if log_loss == '0':
        ofile_ext = '.owds2.gz'
    else:
        ofile_ext = '.loss2.gz'

    already_done = {}
    out_files = {}
    out_groups = {}

    test_id_arr = test_id.split(';')
    if len(test_id_arr) == 0 or test_id_arr[0] == '':
        abort('Must specify test_id parameter')

    if ts_correct == '0' and log_loss == '0':
        abort('Must use ts_correct=1 when calculating OWD')

    # Initialise source filter data structure
    sfil = SourceFilter(source_filter)
    
    # EXPERIMENTAL: anchor_map="<srcip1>:<dstip1>;<srcip2>:<dstip2>;..."
    # Normally a packet's OWD is logged as having occurred at the time the
    # packet is seen in pcap file associated with <srcip> (src_extname)
    # An 'anchor_map' entry allows you to specify that a packet's OWD be
    # logged as having occurred at the time the packet from <srcip> was
    # seen in the pcap file associated with <dstip> (dst_extname)
    # This is only operates on packets between <srcip>:<dstip>, other
    # flows in the same testID are unaffected.

    anchor_map_list = {}
    if anchor_map != '':
        if replot_only == '1':
            abort("Must specify replot_only=0 in conjunction with anchor_map")
        entries = anchor_map.split(';')
        for entry in entries:
            k, v = entry.split(':')
            anchor_map_list[k] = v

    group = 1
    
    for test_id in test_id_arr:

        # first process tcpdump files (ignore router and ctl interface tcpdumps)
        tcpdump_files = get_testid_file_list('', test_id,
                                ifile_ext, 
                                'grep -v "router.dmp.gz" | grep -v "ctl.dmp.gz"')

        for tcpdump_file in tcpdump_files:
            
            # get input directory name and create result directory if necessary
            out_dirname = get_out_dir(tcpdump_file, out_dir) 
            dir_name = os.path.dirname(tcpdump_file)
            
            # get unique flows
            flows = lookup_flow_cache(tcpdump_file)
            if flows == None:
                # If not previously found in flow_cache,
                # extract and identify the tcp and udp flows contained in tcpdump_file
                flows = _list(local('zcat %s | tcpdump -nr - "tcp" | '
                                'awk \'{ if ( $2 == "IP" ) { print $3 " " $5 " tcp" } }\' | '
                                'sed "s/://" | '
                                'sed "s/\.\([0-9]*\) /,\\1 /g" | sed "s/ /,/g" | '
                                'LC_ALL=C sort -u' %
                                tcpdump_file, capture=True))
                flows += _list(local('zcat %s | tcpdump -nr - "udp" | '
                                 'awk \'{ if ( $2 == "IP" ) { print $3 " " $5 " udp" } }\' | '
                                 'sed "s/://" | '
                                 'sed "s/\.\([0-9]*\) /,\\1 /g" | sed "s/ /,/g" | '
                                 'LC_ALL=C sort -u' %
                                 tcpdump_file, capture=True))

                # Add them to the flow cache
                append_flow_cache(tcpdump_file, flows)

            # Walk through and process the flows identified in this current tcpdump_file
            
            for flow in flows:
                
                # First extract src & dst as IP addr on experiment networks
                src, src_port, dst, dst_port, proto = flow.split(',')
                
                # Map src & dst (IP addr) to testbed-specific external (control network)
                # host names from config.py ({src,dst}_extname) and internal (experiment network)
                # hostnames/addresses from config.py ({src,dst}_internal)
                src_extname, src_internal = get_address_pair_analysis(test_id, src, do_abort='0')
                dst_extname, dst_internal = get_address_pair_analysis(test_id, dst, do_abort='0')
                
                # Skip cases of random (broadcast) traffic involving an IP address for
                # which no experimental network NIC (and hence control network hostname)
                # is directly related
                if src_extname == '' or dst_extname == '':
                    continue

                # flow name
                name = src_internal + '_' + src_port + '_' + dst_internal + '_' + dst_port
                
                # test id plus flow name
                if len(test_id_arr) > 1:
                    long_name = test_id + '_' + name
                else:
                    long_name = name
                    
                if long_name not in already_done:

                    # Construct filenames for files containing final <time> <owd|loss> pairs
                    out_final = out_dirname + test_id + '_' + name + ofile_ext
                    
                    # Only embark on actual filtering/extraction if we're asked to regenerate
                    # the intermediate OWD values, or for some reason the intermediate OWD
                    # file is missing...
                    if replot_only == '0' or not os.path.isfile(out_final):
                                                
                        # Per flow/host:
                        #   Create intermediate file of timestamps + uniqString from pcap files,
                        #   THEN call adjust_timestamps to construct a version with timestamps adjusted
                        #   relative to a single reference host in the testbed
                        #   THEN use the adjusted timestamps in the subsequent owd/loss calculations.
                        
                        # To extract packets in in FORWARD direction from both src and dst pcap files,
                        # construct dpkt flow filter in form <src_ip>:<src_port>:<dst_ip>:<dst_port>
                        # (and more specifically, from src_internal:src_port to dst_internal:dst_port).

                        filter_dpkt = src_internal + ':' + src_port + ':' + dst_internal + ':' + dst_port
                        src_port_int = int(src_port)
                        dst_port_int = int(dst_port)
                        
                        # Loop across the src and dst dmp files
                        
                        tmp_fwd_out_adj = {}
                        for dmpfile_host, dirsuffix in ([src_extname,"src"],[dst_extname,"dst"]):
                            
                            # Construct the file name of the dump file that contains this
                            # flow's packets. 'src' captured at (near) the flow's source, and 'dst'
                            # captured at (near) the flow's destination.
                            dmp_file = dir_name + '/' + test_id + '_' + dmpfile_host + ifile_ext
                            print "Extracting packets for " + name + " from:" + dmp_file
                            
                            # Construct filename for intermediate "<time> <uniqueString>" output files
                            # whose timestamps will be adjusted by adjust_timestamps()
                            # before being used for owd calculations
                            # (NOTE: Due to adjust_timestamps making assumptions about out_dir parameter,
                            # we currently can't place these tmp files under /tmp)
                            tmp_fwd_out = tempfile.mktemp(suffix=test_id + '_' + name +'_fwd_out_' + dirsuffix+".gz", dir=out_dirname)
                            
                            # Extract packet id info
                            if dmp_file.endswith('.gz'):
                                f_dmp_file = gzip.open(dmp_file)
                            else:
                                f_dmp_file = open(dmp_file)
                            pcap_reader = dpkt.pcap.Reader(f_dmp_file)
                            pcap_reader.setfilter(filter_dpkt)
                            #pcap_reader.setfilter('')
                            
                            # Create a compressed temporary intermediate file
                            f_tmp_fwd_out = gzip.open(tmp_fwd_out,'wb',1)
                            
                            # Walk across every packet in this pcap file
                            for ts, pkt in pcap_reader:
                                # get pointer to ethernet layer and check that we have IP
                                eth = dpkt.ethernet.Ethernet(pkt)
                                if eth.type != dpkt.ethernet.ETH_TYPE_IP:
                                    continue

                                # get pointer to IP layer
                                ip_pkt = eth.data

                                # ignore if src or dst IP not the ones specified in filter
                                if socket.inet_ntoa(ip_pkt.src) != src_internal or \
                                    socket.inet_ntoa(ip_pkt.dst) != dst_internal:
                                        continue

                                # ignore if UDP/TCP src or dst ports not the ones specified in filter
                                # get pointer to payload
                                if type(ip_pkt.data) == dpkt.udp.UDP:
                                    udp_frame = ip_pkt.data
                                    if udp_frame.sport != src_port_int or udp_frame.dport != dst_port_int:
                                        continue
                                    # Add IP ID field to the payload to ensure
                                    # at least something semi-unique is hashed
                                    # if UDP payload is invariant
                                    payload = str(ip_pkt.id) +udp_frame.data
                                    
                                elif type(ip_pkt.data) == dpkt.tcp.TCP:
                                    tcp_frame = ip_pkt.data
                                    if tcp_frame.sport != src_port_int or tcp_frame.dport != dst_port_int:
                                        continue
                                    # Use IP ID field, TCP Sequence number and ACK number to
                                    # construct a mostly unique string within context of this flow
                                    payload = str(ip_pkt.id) + str(tcp_frame.seq) + str(tcp_frame.ack)
                                else:
                                    continue
                                
                                # Write <timestamp> <crc32 hash of uniqueString bytes>
                                # (hashing here eliminates any later problems parsing payloads
                                # containing null bytes)

                                f_tmp_fwd_out.write("%f %s\n" % (ts,zlib.crc32(payload)))

                            f_tmp_fwd_out.close()
                            f_dmp_file.close()
                            
                            # Apply timestamp corrections to the data thus extracted, prior to
                            # calculating OWDs. Correction is MANDATORY otherwise the
                            # 'calculated' OWDs are essentially useless.
                            tmp_fwd_out_adj[dirsuffix] = adjust_timestamps(test_id, tmp_fwd_out, dmpfile_host, ' ', out_dir)
                            
                            # Remove pre-adjustment files.
                            os.remove(tmp_fwd_out)
                            
                        # Now we have unique packet hashes seen at both src and dst locations,
                        # and timestamps have been adjusted for clockoffsets.
                        
                        # Begin calculating OWD or identifying when packet losses occurred
                        
                        # Read into memory the <adjusted_timestamp> <uniqString> datasets captured
                        # at dst (2nd place packet seen, "destination"). The src is (1st place packet
                        # seen, "source")
                        
                        dst_data_time=list()
                        dst_data_uniqString=list()
                        for line in gzip.open(tmp_fwd_out_adj["dst"]).read().splitlines():
                            sline = line.split(" ")
                            dst_data_time.append(float(sline[0]))
                            dst_data_uniqString.append(sline[1])

                        # Walk through tmp_fwd_out_adj["src"] looking for matches to packets
                        # in dst_data_uniqString, and write <time> <owd|loss> pairs in plain
                        # ASCII to out_final
                        
                        # To limit potential duplicate matches to packets received forward
                        # in time from previous match in dst_data_uniqString, maintain
                        # index next_j pointing to next row in dst_data_uniqString to start
                        # matching next packet from tmp_fwd_out_adj["src"]
                        next_j = 0
                        last_j = len(dst_data_uniqString)-1

                        # As a speed-up hack, assume match in dst_data_uniqString is
                        # within sk_window entries of next_j (saves searching all the
                        # way to the end of dst_data_uniqString when seeking a lost packet)
                        # Keeping seek_window in the low 1000s also minimises chances of
                        # duplicate matches.
                        if seek_window != '':
                            sk_window = int(seek_window)
                        else:
                            sk_window = last_j
                        
                        # Create gzipped output file (rough experiments showed over reduction
                        # in on-disk file size easily 100s of K down to 10s of K).
                        # R automagically reads gzipped data files, so no changes required
                        # to subsequent analyse_* plotting scripts.
                        f = gzip.open(out_final, 'w')
                        
                        cumulative_loss = 0
                        
                        # Decide whether to use timestamp at src or dst for OWD
                        # (Default to src, unless anchor_map indicates using dst for
                        # this particular traffic pattern)
                        anchor = 0 # Default print timestamp at src
                        if log_loss == '0': # Only relevant for OWD calculations
                            if src_internal in anchor_map_list.keys():
                                #print "*** Found " + src_internal + " in anchor_map"
                                if anchor_map_list[src_internal] == dst_internal:
                                    # Only relevant if the map relates to both src_internal and dst_internal
                                    #print "*** " + src_internal + " points to " + anchor_map_list[src_internal] + " in anchor_map"
                                    anchor = 1 # Print timestamp at dst
                                        
                        for line in gzip.open(tmp_fwd_out_adj["src"]).read().splitlines():
                            i = line.split(" ")
                            try:
                                # The following search will raise a 'ValueError' exception if i[1] does not occur in dst_data_uniqString[next_j:]
                                j = dst_data_uniqString[next_j:min((next_j+sk_window),last_j+1)].index(i[1])
                                
                                if log_loss == '0':
                                    # OWD is diff between i[0] and dst_data_time[next_j+j]
                                    ts = float(i[0])
                                    owd = dst_data_time[next_j+j]-float(i[0])
                                    # If required, print event as occuring at dst timestamp rather than src timestamp
                                    if anchor:
                                        ts = dst_data_time[next_j+j]
                                    # If we want to imply the OWD "existed" at some mid-point
                                    # between pkt seen at src and seen at dst
                                    if owd_midpoint == '1':
                                        ts += owd/2                                    
                                    f.write('%f %f\n' % (ts, owd))
                                    
                                if log_loss == '1':
                                    # No lost packet, emit "0"
                                    f.write('%s 0\n' % (i[0]))
                                if log_loss == '2':
                                    # No lost packet, emit previous cumulative count
                                    f.write('%s %i\n' % (i[0], cumulative_loss))
                                    
                                next_j = min(next_j+j+1,last_j)
                                
                            except ValueError:
                                # No match means a packet loss
                                if log_loss == '1':
                                    # Single loss event, emit "1"
                                    f.write('%s 1\n' % (i[0]))
                                if log_loss == '2':
                                    # Single loss event, increment cumulative count, emit cumulative count
                                    cumulative_loss += 1
                                    f.write('%s %i\n' % (i[0], cumulative_loss))
                                pass
                                                    
                        f.close()
                        dst_data_time=[]
                        dst_data_uniqString=[]
                        
                        # Clean up temporary post-adjustment files
                        os.remove(tmp_fwd_out_adj["src"])
                        os.remove(tmp_fwd_out_adj["dst"])
                        
                    already_done[long_name] = 1
                    
                    if sfil.is_in(name):
                        (out_files, 
                        out_groups) = select_bursts(long_name, group, out_final, burst_sep, sburst, eburst,
                                    out_files, out_groups)
                         
        group += 1

    return (test_id_arr, out_files, out_groups)
Пример #8
0
def get_clock_offsets(exp_list='experiments_completed.txt',
                      test_id='', pkt_filter='',
                      baseline_host='',
                      out_dir=''):
    "Get clock offsets for all hosts"

    if len(out_dir) > 0 and out_dir[-1] != '/':
        out_dir += '/'

    if test_id == '':
        try:
            with open(exp_list) as f:
                test_id_arr = f.readlines()
        except IOError:
            abort('Cannot open file %s' % exp_list)
    else:
        test_id_arr = test_id.split(';')

    if len(test_id_arr) == 0 or test_id_arr[0] == '':
        abort('Must specify test_id parameter')

    # specify complete tcpdump parameter list
    tcpdump_filter = '-tt -r - -n ' + pkt_filter

    for test_id in test_id_arr:
        test_id = test_id.rstrip()

        # first find tcpdump files
        tcpdump_files = get_testid_file_list('', test_id,
                                             '_ctl.dmp.gz', '')

        if len(tcpdump_files) == 0:
            warn('No tcpdump files for control interface for %s' % test_id)
            continue

        # if we have tcpdumps for control interface we can assume broadcast ping
        # was enabled

        dir_name = os.path.dirname(tcpdump_files[0])
        # then look for tpconf_vars.log.gz file in that directory 
        var_file = local('find -L %s -name "*tpconf_vars.log.gz"' % dir_name,
                         capture=True)

	bc_addr = ''
        router = ''

        if len(var_file) > 0:
            # new approach without using config.py
            # XXX no caching here yet, assume we only generate clockoffset file once
            # per experiment 

            # unzip archived file
            local('gzip -cd %s > %s' % (var_file, TMP_CONF_FILE))

            # load the TPCONF_variables into oldconfig
            oldconfig = imp.load_source('oldconfig', TMP_CONF_FILE)

            # remove temporary unzipped file 
            try:
                os.remove(TMP_CONF_FILE)
                os.remove(TMP_CONF_FILE + 'c') # remove the compiled file as well
            except OSError:
                pass

            try:
                bc_addr = oldconfig.TPCONF_bc_ping_address
            except AttributeError:
                pass

            router_name = oldconfig.TPCONF_router[0].split(':')[0]
            
        else:
            # old approach using config.py

            try:
                bc_addr = config.TPCONF_bc_ping_address
            except AttributeError:
                pass

            router_name = config.TPCONF_router[0].split(':')[0]

        if bc_addr == '':
            # assume default multicast address 
            bc_addr = '224.0.1.199' 

        # specify complete tcpdump parameter list
        if pkt_filter != '':
            tcpdump_filter = '-tt -r - -n ' + pkt_filter
        else:
            tcpdump_filter = '-tt -r - -n ' + 'icmp and dst host ' + bc_addr

        if baseline_host == '':
            baseline_host = router_name 

        #
        # now read timestamps from each host's tcpdump
        #

        # map of host names (or IPs) and sequence numbers to timestamps
        host_times = {}
        for tcpdump_file in tcpdump_files:
            host = local(
                'echo %s | sed "s/.*_\([a-z0-9\.]*\)_ctl.dmp.gz/\\1/"' %
                tcpdump_file,
                capture=True)
            host_times[host] = {}
            #print(host)
            #print(host_times)

            # We pipe gzcat through to tcpdump. Note, since tcpdump exits early
            # (due to "-c num_samples") gzcat's pipe will collapse and gzcat
            # will complain bitterly. So we dump its stderr to stderrhack.
            init_zcat = Popen(['zcat ' + tcpdump_file], stdin=None,
                              stdout=PIPE, stderr=stderrhack, shell=True)
            init_tcpdump = Popen(['tcpdump ' + tcpdump_filter],
                                 stdin=init_zcat.stdout,
                                 stdout=PIPE,
                                 stderr=stderrhack,
                                 shell=True)

            for line in init_tcpdump.stdout.read().splitlines():
                _time = line.split(" ")[0]
                _seq = int(line.split(" ")[11].replace(',', ''))
                host_times[host][_seq] = _time

        #print(host_times)

        # get time differences and get host list
        diffs = {}
        ref_times = {}
        host_str = ''
        host_list = sorted(host_times.keys())
        # getting hosts from the config is problematic if different 
        # experiments with different configs in same directory 
        #host_list = sorted(config.TPCONF_router + config.TPCONF_hosts)

        for host in host_list:
            host_str += ' ' + host
            if host not in host_times:
                continue
            for seq in sorted(host_times[host].keys()):
                if seq not in diffs:
                    diffs[seq] = {}
                if baseline_host in host_times and seq in host_times[
                        baseline_host]:
                    diffs[seq][host] = float(host_times[host][seq]) - \
                        float(host_times[baseline_host][seq])
                    ref_times[seq] = host_times[baseline_host][seq]
                else:
                    # this should only happen if TPCONF_router was
                    # modified
                    warn('Cant find baseline host %s timestamp data' % baseline_host)
                    diffs[seq][host] = None
                    ref_times[seq] = None

        #print(diffs)

        if out_dir == '' or out_dir[0] != '/':
              dir_name = os.path.dirname(tcpdump_files[0])
              out_dir = dir_name + '/' + out_dir
        mkdir_p(out_dir)
        out_name = out_dir + test_id + CLOCK_OFFSET_FILE_EXT

        # write table of offsets (rows = time, cols = hosts)
        f = open(out_name, 'w')
        f.write('# ref_time' + host_str + '\n')
        for seq in sorted(diffs.keys()):
            if ref_times[seq] is not None:
                f.write(ref_times[seq])
            else:
                # this case should not never happen
                continue

            f.write(' ')

            for host in host_list:
                if host in diffs[seq] and diffs[seq][host] is not None:
                    f.write('{0:.6f}'.format(diffs[seq][host]))
                else:
                    f.write('NA')
                if host != host_list[-1]:
                    f.write(' ')
            f.write('\n')

        f.close()