Пример #1
0
def extract_summary_from_config(inputdir, workload, worker, vc, reps,
                                xput_client, thinktime_ms, num_threads,
                                middlewares, warmup_period_endtime,
                                cooldown_period_starttime):
    unformatted_foldername = "{}_{}vc{}workers"
    foldername = unformatted_foldername.format(workload, str(int(vc)),
                                               str(int(worker)))

    all_reps = []
    for rep in range(1, reps + 1):

        log_folder_path = os.path.join(inputdir, foldername, str(int(rep)))

        # Now we extract throughput, responsetime, average queuetime and missrate from the middleware
        middleware_dirs = [
            dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir))
            for mw_dir in middlewares
        ]
        concatenated_requests = [
            gmws.concatenate_requestlogs(middleware_dir)
            for middleware_dir in middleware_dirs
        ]
        metrics = [
            gmws.extract_metrics(reqs) for reqs in concatenated_requests
        ]
        cut_metrics = [
            cut.cut_away_warmup_cooldown(mets, warmup_period_endtime,
                                         cooldown_period_starttime)
            for mets in metrics
        ]
        windows = [
            gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics
        ]
        rep_metrics = gmws.aggregate_over_middlewares(windows)
        all_reps.append(rep_metrics)

    mw_agg_over_reps = gmws.aggregate_over_reps(all_reps)
    mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
    avg = mw_averages.loc['mean', :]
    num_clients = vc * num_threads
    print("{} workers:\n".format(worker))
    print(
        "Throughput MW:\t\t\t\t\t\t\t\t{}\nThroughput (derived from MW ResponseTime):\t{}\nThroughput (Client):\t\t\t\t\t\t{}\nAvg Time in Queue:\t\t\t\t\t\t\t{}\nAvg Length of Queue:\t\t\t\t\t\t{}\nAvg Time waiting for MC:\t\t\t\t\t{}\n"
        .format(
            avg['throughput_mw'], num_clients /
            (float(avg['responseTime_ms']) + thinktime_ms) * 1000, xput_client,
            avg['queueTime_ms'], avg['queueLength'], avg['memcachedRTT_ms']))
Пример #2
0
def plot_mw_metric_over_time(inputdir, worker_configuration, metric_to_plot,
                             folder_prefix, reps, middlewares, ax):

    color_cycler = cycler('color',
                          ['#66c2a4', '#41ae76', '#238b45', '#005824'])
    #ax.set_ylim([0, 27000])
    ax.set_xlim([-5, 87])
    ax.set_prop_cycle(color_cycler)

    for worker_config in worker_configuration:
        all_windows = []
        for rep in reps:
            log_folder_path = create_log_folder_path(inputdir, folder_prefix,
                                                     rep, worker_config)

            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir))
                for mw_dir in middlewares
            ]
            requests = pd.concat([
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ])

            metrics = gmws.extract_metrics(requests)

            window = gmws.aggregate_over_windows(metrics)

            all_windows.append(window)

        concatenated = pd.concat(all_windows)[metric_to_plot]
        avg = concatenated.groupby(concatenated.index).agg('mean')
        ax.plot(avg.index, avg, label=worker_config)

    ax.legend(loc="upper left")
    ax.set_title(
        "Experiment 3.1:\nThroughput (MW) vs. Time for different number of workers"
    )
    ax.set_xlabel("Time (sec)")
    ax.set_ylabel("Throughput (ops/sec)")
Пример #3
0
def graph_responsetime(worker, vc, num_threads, workload, middlewares,
                       client_logfiles, reps, inputdir, xlim, ax):

    all_mw_metrics_per_rep = []
    all_mt_metrics_per_rep = []
    for rep in reps:
        run_dir = os.path.join(inputdir,
                               "{}_{}vc{}workers".format(workload, vc, worker),
                               str(rep))

        # Get MW response time
        middleware_dirs = [
            dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
            for mw_dir in middlewares
        ]
        concatenated_requests = [
            gmws.concatenate_requestlogs(middleware_dir)
            for middleware_dir in middleware_dirs
        ]

        metrics = [
            gmws.extract_metrics(reqs) for reqs in concatenated_requests
        ]

        cut_metrics = [
            cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
        ]

        windows = [
            gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics
        ]

        rep_metrics = gmws.aggregate_over_middlewares(windows)

        all_mw_metrics_per_rep.append(rep_metrics)

        # Get MT response time
        client_logfile_paths = [
            os.path.join(run_dir, client_logfile)
            for client_logfile in client_logfiles
        ]
        client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

        cut_client_metrics = cut.cut_away_warmup_cooldown(
            client_metrics, 10, 72)

        all_mt_metrics_per_rep.append(cut_client_metrics)

    mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
    mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
    mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
    mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

    metrics = pd.concat([mw_averages, mt_averages], axis=1)

    names = [
        'NetThread Service Time', 'Queue Time', 'Memcached RTT',
        'Worker Service Time', 'Total Response Time (MW)',
        'Total Response Time (MT)'
    ]
    metrics.rename(
        {
            'netthreadServiceTime_ms': names[0],
            'queueTime_ms': names[1],
            'memcachedRTT_ms': names[2],
            'workerServiceTime_ms': names[3],
            'responseTime_ms': names[4],
            'responsetime': names[5]
        },
        axis='columns',
        inplace=True)
    means = metrics.loc['mean', names]
    stds = metrics.loc['std', names]
    color_cycler = [
        '#bf812d', '#c7eae5', '#80cdc1', '#01665e', '#003c30', '#bf812d'
    ]
    means.plot(ax=ax, kind='barh', xerr=stds, color=color_cycler)
    ax.set_title("{}, {} clients, {} workers".format(workload,
                                                     vc * num_threads, worker))
    ax.set_xlabel("Time (msec)")
    ax.set_xlim([0, xlim])
Пример #4
0
def get_responsetime_data_old(sharded, multigets, middlewares, client_logfiles,
                              reps, inputdir):

    all_metrics_per_multiget = []
    for multiget in multigets:
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in reps:
            run_dir = os.path.join(inputdir,
                                   "{}_{}multiget".format(sharded, multiget),
                                   str(rep))

            # Get MW response time
            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics_old(reqs)
                for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

            # Get MT response time
            client_logfile_paths = [
                os.path.join(run_dir, client_logfile)
                for client_logfile in client_logfiles
            ]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut.cut_away_warmup_cooldown(
                client_metrics, 10, 72)

            all_mt_metrics_per_rep.append(cut_client_metrics)

        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics = pd.concat([mw_averages, mt_averages], axis=1)
        metrics['multigets'] = multiget
        all_metrics_per_multiget.append(metrics)

    all_metrics = pd.concat(all_metrics_per_multiget)
    return all_metrics.loc['mean', :]
Пример #5
0
def get_responsetime_data_old(worker, vc_settings, num_threads, workload,
                              middlewares, client_logfiles, reps, inputdir):

    all_metrics_per_vc = []
    for vc in vc_settings:
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in reps:
            run_dir = os.path.join(
                inputdir, "{}_{}vc{}workers".format(workload, vc, worker),
                str(rep))

            # Get MW response time
            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics_old(reqs)
                for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

            # Get MT response time
            client_logfile_paths = [
                os.path.join(run_dir, client_logfile)
                for client_logfile in client_logfiles
            ]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut.cut_away_warmup_cooldown(
                client_metrics, 10, 72)

            all_mt_metrics_per_rep.append(cut_client_metrics)

        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics = pd.concat([mw_averages, mt_averages], axis=1)
        metrics['num_clients'] = vc * num_threads
        all_metrics_per_vc.append(metrics)

    all_metrics = pd.concat(all_metrics_per_vc)
    #names = ['NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time',
    #         'Total Response Time (MW)', 'Total Response Time (MT)']
    #metrics.rename({'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5]}, axis='columns', inplace=True)
    return all_metrics.loc['mean', :]
Пример #6
0
def extract_summary_from_config(inputdir, workload, worker, vc, reps,
                                xput_client, resptime_client, thinktime_ms,
                                num_threads, middlewares, client_logfiles,
                                warmup_period_endtime,
                                cooldown_period_starttime):
    unformatted_foldername = "{}_{}vc{}workers"
    foldername = unformatted_foldername.format(workload, str(int(vc)),
                                               str(int(worker)))

    all_reps = []
    for rep in range(1, reps + 1):

        log_folder_path = os.path.join(inputdir, foldername, str(int(rep)))

        # Now we extract throughput, responsetime, average queuetime and missrate from the middleware
        middleware_dirs = [
            dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir))
            for mw_dir in middlewares
        ]
        concatenated_requests = [
            gmws.concatenate_requestlogs(middleware_dir)
            for middleware_dir in middleware_dirs
        ]
        metrics = [
            gmws.extract_metrics(reqs) for reqs in concatenated_requests
        ]
        cut_metrics = [
            cut.cut_away_warmup_cooldown(mets, warmup_period_endtime,
                                         cooldown_period_starttime)
            for mets in metrics
        ]
        windows = [
            gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics
        ]
        rep_metrics = gmws.aggregate_over_middlewares(windows)
        all_reps.append(rep_metrics)

    mw_agg_over_reps = gmws.aggregate_over_reps(all_reps)
    mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
    avg = mw_averages.loc['mean', :]

    client_logfile_paths = [
        os.path.join(log_folder_path, client_logfile)
        for client_logfile in client_logfiles
    ]
    total_hits_misses = [
        gmts.extract_total_numbers(filepath)
        for filepath in client_logfile_paths
    ]
    totals_df = pd.DataFrame(
        data=total_hits_misses,
        columns=['total_opsec', 'hits_persec', 'misses_persec'])
    missrate_client = gmts.calculate_miss_rate(totals_df)

    num_clients = vc * num_threads
    print("{} workers:\n".format(worker))
    print(
        "Throughput MW:\t\t\t\t\t\t\t\t{}\nResponse Time MW:\t\t\t\t\t\t{}\nAvg Time in Queue:\t\t\t\t\t\t{}\nMiss rate MW:\t\t\t\t\t\t\t{}\nThroughput MT:\t\t\t\t\t\t{}\nResponse Time MT:\t\t\t\t\t{}\nMiss rate MT:\t\t\t\t\t{}\nNum Clients:\t\t\t\t\t{}\n"
        .format(avg['throughput_mw'], avg['responseTime_ms'],
                avg['queueTime_ms'], avg['numKeysRequested'] - avg['numHits'],
                xput_client, resptime_client, missrate_client, num_clients))
Пример #7
0
def graph_queuelength(worker_configs, vc, num_threads, workload, middlewares,
                      reps, inputdir, ylim, ax):

    metrics_per_worker = []
    for worker in worker_configs:
        all_mw_metrics_per_rep = []
        for rep in reps:
            run_dir = os.path.join(
                inputdir, "{}_{}vc{}workers".format(workload, vc, worker),
                str(rep))

            # Get MW response time
            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics(reqs) for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mw_averages['worker'] = worker
        metrics = mw_averages.loc[:, ['queueLength', 'worker']]
        metrics_per_worker.append(metrics)

    metrics = pd.concat(metrics_per_worker)

    names = ['8', '16', '32', '64']
    metrics.rename(index={0: '8', 1: '16', 2: '32', 3: '64'}, inplace=True)
    y = metrics.loc['mean',
                    ['queueLength']].reset_index(drop=True).rename(index={
                        0: '8',
                        1: '16',
                        2: '32',
                        3: '64'
                    })

    stds = metrics.loc['std',
                       ['queueLength']].reset_index(drop=True).rename(index={
                           0: '8',
                           1: '16',
                           2: '32',
                           3: '64'
                       })
    color_cycler = ['#66c2a4', '#41ae76', '#238b45', '#005824']
    y.plot(ax=ax, kind='bar', yerr=stds, color=color_cycler)

    ax.set_title("{}, {} clients".format(workload, vc * num_threads, worker))
    ax.set_xlabel("Workers per Middleware")
    ax.set_ylabel("Average Queue Length")
    ax.set_ylim([0, ylim])
    ax.legend().set_visible(False)