def run(path_in: str, path_out: str) -> None: print(path_in) desc = """ [plot_rdb] parsing RDB to compute RDB overlaps """ print(desc) manifest_files = glob.glob(path_in + '/rdb.olap.e*.csv') print('{0} epoch files found\n'.format(len(manifest_files))) manifest_files.sort() fig_aggr, ax_aggr = plt.subplots(1, 1) for fin in manifest_files: fout = re.sub('csv$', 'pdf', Path(fin).name) fout = Path(path_out) / fout plot_epoch(fin, fout, ax_aggr=ax_aggr) print('Plot saved: {0} ==> {1}\n'.format(abbrv_path(fin), abbrv_path(fout))) ax_aggr.set_xlabel('Attribute Range') ax_aggr.set_ylabel('Overlap Percent') ax_aggr.set_title('Aggregate Manifest Stats') ax_aggr.legend() # fig_aggr.show() aggr_out = Path(path_out) / 'manifest.aggr.pdf' fig_aggr.savefig(aggr_out, dpi=300)
def plot_reneg_std(bincnts: Iterable[np.ndarray], fig_path: str) -> None: fig, ax = plt.subplots(1, 1) linestyle = '-' for epoch, epoch_stats in enumerate(bincnts): epoch_loads = np.diff(epoch_stats, n=1, axis=0) if (epoch_loads.shape[0] == 0): continue epoch_cum_loads = np.delete(epoch_stats.sum(1), 0) epoch_stds = epoch_loads.std(1) / epoch_loads.mean(1) ax.plot(epoch_cum_loads, epoch_stds, linestyle, mec='purple', label='Epoch {0}'.format(epoch)) ax.set_xlabel('Total Data Volume') ax.set_ylabel('Normalized Stddev (CoV)') ax.set_title('Renegotiation Events vs Interval Stddev') ax.legend() fig.show() plot_out = fig_path + '/reneg_vs_std.pdf' # fig.savefig(plot_out, dpi=300) print('Plot saved: ', abbrv_path(plot_out))
def read_all(perf_path: str) -> Tuple[Iterable, Iterable]: global USE_CACHE print('Reading perflogs from: {0}'.format(abbrv_path(perf_path))) aggr_bincnts = None epoch_counts = None cache_obj = cache.Cache() cache_miss = True if cache_obj.exists(perf_path): if not USE_CACHE: print('Cache Entry available, SKIPPING') else: aggr_bincnts = cache_obj.get(perf_path) print('Cache entry LOADED') cache_miss = False if cache_miss: all_fpaths = sorted(glob.glob(perf_path + PERFLOGFMT.format('*'))) # all_fpaths = all_fpaths[:8] with multiprocessing.Pool(8) as pool: parsed_fpaths = pool.map(read_bincnt, all_fpaths) # parsed_fpaths = map(read_bincnt, all_fpaths) aggr_bincnts = sum(parsed_fpaths) cache_obj.put(perf_path, aggr_bincnts) epoch_counts = read_epoch_counts(all_fpaths[0]) print(epoch_counts) # row_sum = aggr_bincnts.sum(1) # epoch_idx = np.argwhere(row_sum < 100).flatten() # assert (epoch_idx[0] == 0) # epoch_idx = np.delete(epoch_idx, 0) # first one needn't split epoch_counts = epoch_counts[:-1] # from IPython import embed; embed() epoch_bincnts = np.split(aggr_bincnts, epoch_counts) aggr_pivots = read_pivots(perf_path) epoch_pivots = np.split(aggr_pivots, epoch_counts) print('RTP data read from perflogs') print('RTP Total Epochs: ', len(epoch_bincnts)) total_mass = sum(aggr_bincnts[-1]) print('RTP Total Mass: ', f'{total_mass:,}') return epoch_pivots, epoch_bincnts
def get_manifest_overlaps(data_path: str, epoch: int, probe_points: List[float]) -> Tuple[int, List[int]]: mf_items = read_entire_manifest(data_path, epoch) if len(mf_items) == 0: return 0, [] _, _, item_sum = get_stats(mf_items) print('\nReading MockDB Manifest (path: ... {0}): {1}M items'.format( abbrv_path(data_path), int(item_sum / 1e6))) overlap_stats = list( map(lambda x: get_overlapping_count(mf_items, x)[1], probe_points)) return item_sum, overlap_stats
def compute_mdb_overlap(out_path: str): data_path = out_path + '/../plfs/manifests' path_fmt_mdb = '{0}/mdb.olap.e{1}.csv' mdb_epoch_overlaps = gen_mdb_overlaps(data_path) for epoch, data in enumerate(mdb_epoch_overlaps): points, total, overlaps = data print('Epoch {:d}: Max MDB Overlap: {:.2f}%'.format( epoch, max(overlaps) * 100.0 / total)) csv_path_mdb = path_fmt_mdb.format(out_path, epoch) dump_csv(epoch, points, overlaps, total, csv_path_mdb) print('Epoch {0} Written: {1}'.format(epoch, abbrv_path(csv_path_mdb)))
def compute_rtp_overlap(all_pivots: List[np.ndarray], all_counts: List[np.ndarray], out_path: str): path_fmt = '{0}/rtp.olap.e{1}.csv' path_fmt_mdb = '{0}/mdb.olap.e{1}.csv' npts = 100 for epoch in range(len(all_pivots)): points, overlaps, total = analyze_overlap_epoch(all_pivots[epoch], all_counts[epoch], npts) print('Epoch {:d}: Max RTP Overlap: {:.2f}%'.format( epoch, max(overlaps) * 100.0 / total)) csv_path = path_fmt.format(out_path, epoch) dump_csv(epoch, points, overlaps, total, csv_path) print('Epoch {0} Written: {1}'.format(epoch, abbrv_path(csv_path)))
def gen_overlaps(data_path: str) -> Tuple[List, int, List]: epoch = 0 all_data = [] while True: mf_items = read_entire_manifest(data_path, epoch) if len(mf_items) == 0: break probe_min = min(mf_items, key = lambda x: x[0])[0] probe_max = max(mf_items, key = lambda x: x[1])[1] probe_points = gen_probe_points(probe_min, probe_max) _, _, item_sum = get_stats(mf_items) print('\nReading MockDB Manifest E{2}, (path: ... {0}): {1}M items'.format( abbrv_path(data_path), int(item_sum / 1e6), epoch)) overlap_stats = None with multiprocessing.Pool(16, worker_initialize, [mf_items]) as pool: point_overlaps = pool.map(get_overlapping_count_parutil, probe_points) point_overlaps = map(lambda x: x[1], point_overlaps) overlap_stats = np.fromiter(point_overlaps, dtype=np.int64) overlap_pct = overlap_stats / item_sum * 100.0 overlap_max = max(overlap_pct) overlap_fmt = ['{:.2f}'.format(x) for x in overlap_pct] # print('Epoch {}, MDB Max Overlap: {:.2f}%, Avg Overlap: {:.2f}% ({} points)'.format( # epoch, max(overlap_pct), np.mean(overlap_pct), len(probe_points))) epoch += 1 epoch_data = [probe_points, item_sum, overlap_stats] all_data.append(epoch_data) return all_data