def plot_all_communication_overheads(): #labels = ['2-node', '4-node', '8-node', '16-node'] fig, ax = plt.subplots(figsize=(5, 4.5)) labels = ['%d-node' % i for i in num_of_nodes] colors = ['r', 'g', 'b', 'black', 'y', 'c'] markers = ['^', 'o', 'd', '*', 'x', 'v'] sizes = None sizes = np.arange(128.0, 1e5, step=8192) for i, n in enumerate(num_of_nodes): test_file = '/media/sf_Shared_Data/gpuhome/repositories/mpibench/allreduce%d.log' % n #test_file = '/media/sf_Shared_Data/gpuhome/repositories/mpibench/t716/allreduce%d.log' % n # 1Gbps #test_file = '/media/sf_Shared_Data/gpuhome/repositories/mpibench/t716/ompi2.1log/allreduce%d.log' % n # 1Gbps #test_file = '/media/sf_Shared_Data/gpuhome/repositories/mpibench/t716/ompi3.0log/allreduce%d.log' % n # 1Gbps #sizes = predict(test_file, n, colors[i], markers[i], labels[i]) predict(test_file, n, colors[i], markers[i], labels[i], sizes, ax) #plt.xlim(left=0) #plt.xlabel('Message size (bytes)') #ax.ticklabel_format(style='sci',axis='x') plt.xlabel('# of parameters') plt.ylabel(r'Latency ($\mu$s)') plt.ylim(bottom=0, top=plt.ylim()[1] + 200) #plt.xscale("log", nonposy='clip') plt.legend(ncol=1, loc=2) update_fontsize(ax, fontsize=14) plt.subplots_adjust(left=0.18, bottom=0.13, top=0.91, right=0.92) #plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'commtime')) plt.show()
def plot_figure(Data_Get_Configs, training_or_tensor, title, x_label, y_label, file_path, legend_location, subplots_adjust): plt.figure() fig, ax = plt.subplots(1, 1, figsize=(5, 3.4)) if title: plt.title(title) for Data_Get_Config in Data_Get_Configs: plot_one_line(Data_Get_Config.x_data, Data_Get_Config.y_data, Data_Get_Config.legend, scale=None, ax=ax, color=Data_Get_Config.color, marker=Data_Get_Config.marker) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_xlim(xmin=-1) ax.legend(fontsize=FONTSIZE, loc=legend_location) ax.grid(linestyle=':') u.update_fontsize(ax, FONTSIZE) #ax.legend().set_visible(False) plt.subplots_adjust(bottom=subplots_adjust[0], left=subplots_adjust[1], right=subplots_adjust[2], top=subplots_adjust[3]) # plt.savefig(file_path) plt.show()
def plot_fitted(): nworkers = 8 MB = 1024 * 1024.0 #fn = 'logs/nccl_lg_nw%d.log' % nworkers fn = 'logs/v2_alphabeta/n%d.log' % nworkers #sizes, comms, _ = read_times_from_nccl_log(fn, start=20*1024*1024*nworkers/2, end=1024*1024*1024, original=True) sizes, comms, _ = read_times_from_osu_log(fn) ax.plot(sizes / MB, comms, label='Measured', marker=markers[0]) alpha, beta = _fit_linear_function(sizes, comms) ax.plot(sizes / MB, (alpha + np.array(sizes) * beta), label=r'Fitted ($a$=%.2e,$b$=%.2e)' % (alpha, beta), linewidth=1, color='r', linestyle='--') print('alpha beta: ', (alpha, beta)) #ax.set_xticklabels(sizes/MB, size=18) ax.set_xlabel('Message size [MB]') ax.set_ylabel('Communication time [s]') ax.grid(linestyle=':') ax.set_title('%d nodes' % nworkers) ax.legend(fontsize=FONTSIZE - 2, loc='upper left') u.update_fontsize(ax, FONTSIZE) plt.savefig('%s/alpha-beta-%d.pdf' % (OUTPUTPATH, nworkers), bbox_inches='tight') plt.show()
def plot_flops(): linestyles = ['-', '--', '-.', ':'] device = 'titanx' logfile = '/media/sf_Shared_Data/tmp/spgemm/result.syth.%s.raw' % device print(logfile) f = open(logfile, 'r') content = f.readlines() header = content[0].split() print(header) content = content[1:] data = [line.split() for line in content] df = DataFrame(data, columns=header, dtype='float') sparsity = 0.995 #sparsity = 0.98 df = df[(df["sparsity"] == sparsity) & (df["N"] <= 14000)] flops_gcoo = df["N"]**3 * 2 * (1 - sparsity) / (df["groupcoospgemm"] / 1000) / 1e9 flops_cusp = df["N"]**3 * 2 * (1 - sparsity) / (df["cusparse"] / 1000) / 1e9 flops_cubl = df["N"]**3 * 2 * (1 - sparsity) / (df["cublas"] / 1000) / 1e9 #flops_gcoo = 4*df["N"] ** 2 * 3 * (1-sparsity) / (df["groupcoospgemm"] / 1000) / 1e9 #flops_cusp = 4*df["N"] ** 2 * 3 * (1-sparsity) / (df["cusparse"] / 1000) / 1e9 #flops_cubl = 4*df["N"] ** 2 * 3 * (1-sparsity) / (df["cublas"] / 1000) / 1e9 N_range = df["N"] ax.plot(N_range, flops_gcoo, label="GCOOSpDM", linestyle=linestyles[0], linewidth=1, marker='s', color='g') ax.plot(N_range, flops_cusp, label="cuSPARSE", linestyle=linestyles[1], linewidth=1, marker='d', color='r') ax.plot(N_range, flops_cubl, label="cuBLAS", linestyle=linestyles[0], linewidth=1, marker='^', color='b') ax.set_yscale("log", basey=2) ax.yaxis.set_major_formatter(ScalarFormatter()) #ax.set_xscale("log", basex=2) ax.legend(loc=4) ax.set_xlabel('Matrix size ' + r'$N$,' + ' s=%.3f' % sparsity) ax.set_ylabel('Throughput [GFLOPS]') u.update_fontsize(ax, 14) fig.subplots_adjust(bottom=0.15, top=0.94) #plt.show() plt.savefig('%s/%s%s%s.pdf' % (OUTPUT_PATH, 'effective_flops', device, str(sparsity)))
def plot_loss(logfile, label, isacc=False, title='ResNet-20', fixed_color=None): losses, times, average_delays, lrs = read_losses_from_log(logfile, isacc=isacc) norm_means, norm_stds = read_norm_from_log(logfile) #print('times: ', times) #print('losses: ', losses) if len(average_delays) > 0: delay = int(np.mean(average_delays)) else: delay = 0 if delay > 0: label = label + ' (delay=%d)' % delay if isacc: ax.set_ylabel('top-1 Validation Accuracy') else: ax.set_ylabel('training loss') ax.set_title(get_real_title(title)) marker = markeriter.next() if fixed_color: color = fixed_color else: color = coloriter.next() iterations = np.arange(len(losses)) if SMOOTH_CURVE: losses = savgol_filter(losses, 5, 3) print('Algo: %s max acc: %f\n' % (label, np.max(losses))) line = ax.plot(iterations, losses, label=label, marker=marker, markerfacecolor='none', color=color, linewidth=1) if False and len(norm_means) > 0: global ax2 if ax2 is None: ax2 = ax.twinx() ax2.set_ylabel('L2-Norm of : gTopK-Dense') ax2.plot(norm_means, label=label + ' norms', color=color) ax.set_xlabel('# of epochs') if len(lrs) > 0: lr_indexes = [0] lr = lrs[0] for i in range(len(lrs)): clr = lrs[i] if lr != clr: lr_indexes.append(i) lr = clr u.update_fontsize(ax, FONTSIZE) return line
def model_bcast_log(): FONTSIZE = 18 #plt.rc('font', size=FONTSIZE-4) #fn='logs/nccl-bcast-n16IB.log' #fn='logs/nccl-bcast-n64.log' #sizes, comms, errors = reader.read_times_from_nccl_log(fn, start=1024, end=1024*1024*512, original=True) comm_op = 'allreduce' short = 'ar' #comm_op = 'broadcast';short='bcast' #fn='logs/%s-n64-ib1-largesize.log' % comm_op #fn='logs/%s-n64-ib1-smallsize.log' % comm_op fn = 'logs/nccl-%s-n64.log' % comm_op #sizes, comms= reader.read_from_log_mean_std(fn) sizes, comms, errors = reader.read_times_from_nccl_log(fn, start=1024 * 1024, end=1024 * 1024 * 512, original=True) sizes = np.array(sizes) / 4 print('sizes: ', sizes) print('comms: ', comms) #print('errors: ', errors) alpha, beta = _fit_linear_function(np.array(sizes), comms) print('alpha: ', alpha, ', beta: ', beta) py = alpha + beta * np.array(sizes) fig, ax = plt.subplots(figsize=(4.4, 4.5)) #ax.plot(sizes, comms, marker='o', label='measured') #ax.plot(sizes, py, marker='^', label='fit') #fig, ax = plt.subplots() measured, = ax.plot(sizes, comms, label='Measured') #ax.plot(sizes, py, label=r'Predicted ($\alpha=%f') predicted, = ax.plot( sizes, py, '--', label=r'Predicted \n($\alpha_{%s}$=%.2e, $\beta_{%s}$=%.2e)' % (short, Decimal(alpha), short, Decimal(beta))) ax.set_xlabel('# of 32-bit elements') ax.set_ylabel('Communication time [s]') #plt.ticklabel_format(axis='x', style='sci', scilimits=(0,0)) #ax.legend(fontsize=FONTSIZE) ax.legend([measured, predicted], [ 'Measured', 'Predicted \n' + r'($\alpha_{%s}$=%.2e' % (short, Decimal(alpha)) + '\n' + r'$\beta_{%s}$=%.2e)' % (short, Decimal(beta)) ], fontsize=FONTSIZE) utils.update_fontsize(ax, FONTSIZE) plt.subplots_adjust(left=0.20, bottom=0.14, top=0.99, right=0.99) mf = matplotlib.ticker.ScalarFormatter(useMathText=True) mf.set_powerlimits((-2, 2)) plt.gca().yaxis.set_major_formatter(mf) #plt.savefig('%s/%s-communicaion-model.pdf' % (OUTPUT_PATH, comm_op)) plt.show()
def fit_roofline(specs, rawf): linestyles = ['-', '--', '-.', ':'] f = open(rawf, "r") content = f.readlines() f.close() content = [line.split() for line in content[1:]] real_flops = [ float(line[0])**3 * 2 / (float(line[4]) / 1000) / 10**9 for line in content ] N_range = range(400, 10100, 100) N_search = range(400, 4000) flops = specs[1] mem = specs[0] rc = flops / mem best_error = 1e10 best_n = 1000 for n in N_search: alpha = rc / n rs = [N * alpha for N in N_range] th_flops = [flops if r >= rc else mem * r for r in rs] index = [ 1 if th_flops[i] < real_flops[i] else 0 for i in range(len(th_flops)) ] if sum(index) > 0: continue aver_error = np.mean([ (abs(th_flops[i] - real_flops[i]) / real_flops[i]) for i in range(len(th_flops)) ]) if aver_error < best_error: best_error = aver_error best_n = n # print(th_flops, real_flops) alpha = rc / best_n #alpha = 0.0225 rs = [N * alpha for N in N_range] th_flops = [flops if r >= rc else mem * r for r in rs] print(best_n, alpha) ax.plot(rs, th_flops, label="theoretical", linestyle=linestyles[0], linewidth=2) ax.plot(rs, real_flops, label="real", linestyle=linestyles[1], linewidth=2) ax.set_yscale("log", basey=2) ax.set_xscale("log", basex=2) ax.legend(loc=4) ax.set_xlabel('Operational intensity (FLOPS/byte)') ax.set_ylabel('Throughput (GFLOPS)') u.update_fontsize(ax, 14) fig.subplots_adjust(bottom=0.15, top=0.94) plt.show()
def plot_breakdown_stepbystep(): fig, ax = plt.subplots(figsize=(7.4, 4.4)) FONTSIZE = 12 xticklabels = ['ResNet-50', 'ResNet-152', 'DenseNet-201', 'Inception-v4'] dnns = ['resnet50', 'resnet152', 'densenet201', 'inceptionv4'] #algos = ['dkfac', 'dkfac-mp', 'spd-kfac'] #algos = ['mpd-kfac', 'spd-kfac'] algos = ['algo1', 'algo2', 'algo3', 'algo4'] labels = ['-Pipe-LBP', '+Pipe-LBP', '-Pipe+LBP', '+Pipe+LBP'] names = labels colors = [ 'white', Color.factorcomm_color, Color.inversecomm_color, 'black' ] resnet50 = [0.8525, 0.7806, 0.7474, 0.6755] resnet152 = [1.5807, 1.4176, 1.3319, 1.1689] densenet201 = [1.4964, 1.4061, 1.4519, 1.3615] inceptionv4 = [1.1857, 1.0941, 1.0823, 0.9907] count = len(dnns) width = 0.2 margin = 0.02 s = (1 - (width * count + (count - 1) * margin)) / 2 + width ind = np.array([s + i + 1 for i in range(count)]) legend_p = [] for i, algo in enumerate(algos): newind = ind + s * width + (s + 1) * margin bp = [resnet50[i], resnet152[i], densenet201[i], inceptionv4[i]] color = colors[i] label = names[i] p = ax.bar(newind, bp, width, color=color, edgecolor='black', label=label) legend_p.append(p[0]) s += 1 ax.set_ylim(bottom=0.6) handles, labels = ax.get_legend_handles_labels() ax.legend(legend_p, names, ncol=1, handletextpad=0.2, columnspacing=1., loc='upper left', fontsize=FONTSIZE) ax.set_ylabel('Time [s]') #ax.set_xticks(newind-width-margin/2) ax.set_xticks(newind - width * 3 / 2 - margin * 3 / 2) ax.set_xticklabels(xticklabels) utils.update_fontsize(ax, FONTSIZE) plt.savefig('%s/step-by-step.pdf' % (OUTPUT_PATH), bbox_inches='tight')
def rooflines(): rs = np.arange(2, 8, step=0.05) rs = np.power(2, rs) #np.logspace(2, 8, base=2) linestyles = ['-', '--', '-.', ':'] for i, gpu in enumerate(gpus): perfs = [predict_perf(r, gpu) for r in rs] ax.plot(rs, perfs, label=gpu, linestyle=linestyles[i], linewidth=2) ax.set_yscale("log", basey=2) ax.set_xscale("log", basex=2) ax.legend(loc=4) ax.set_xlabel('Operational intensity (FLOPS/byte)') ax.set_ylabel('Throughput (GFLOPS)') u.update_fontsize(ax, 14) fig.subplots_adjust(bottom=0.15, top=0.94) plt.show()
def statastic_gradient_size(filename, label, color, marker): global ax sizes, comms, computes, merged_comms = read_log(filename) if ax is None: fig, ax = plt.subplots(figsize=(5,4.5)) fontsize = 14 ax.scatter(range(1, len(sizes)+1), sizes, c=color, label=label, marker=marker, s=40, facecolors='none', edgecolors=color) #plot_hist(sizes) ax.set_xlim(left=0) ax.set_xlabel('Learnable layer ID') #plt.ylim(bottom=1e3, top=1e7) #plt.ylabel('Message size (bytes)') ax.set_ylabel('# of parameters') ax.set_yscale("log", nonposy='clip') ax.legend() update_fontsize(ax, fontsize) print('total size: ', np.sum(sizes)) return sizes
def realdata_speedup(): configs = ['GoogleNet', 64] wfbps = [81.68 * 2, 74.83 * 2 * 2, 74.91 * 2 * 4, 2 * 62.9 * 8] gmwfbps = [81.68 * 2, 79.02 * 2 * 2, 75.03 * 2 * 4, 2 * 75.68 * 8] synceasgds = [81.68 * 2, 62.57 * 2 * 2, 57.67 * 2 * 4, 2 * 55.58 * 8] device = 'k80' configs = ['ResNet', 32] wfbps = [76.85, 75.55 * 2, 73.679 * 4, 58.2 * 8] gmwfbps = [76.85, 75.59 * 2, 73.8 * 4, 70.8251 * 8] synceasgds = [76.85, 60.0 * 2, 55.7 * 4, 50.8 * 8] datas = [wfbps, synceasgds, gmwfbps] #plot_realdata_comm(datas, configs) #return #configs = ['DenseNet', 128] name = configs[0] b = configs[1] nnodes = [2, 4, 8] fig, ax = plt.subplots(figsize=(5, 4.5)) optimal = nnodes wfbps = [i / wfbps[0] for i in wfbps[1:]] gmwfbps = [i / gmwfbps[0] for i in gmwfbps[1:]] synceasgds = [i / synceasgds[0] for i in synceasgds[1:]] print('compared to wfbp: ', np.array(gmwfbps) / np.array(wfbps)) print('compared to synceasgds: ', np.array(gmwfbps) / np.array(synceasgds)) ax.plot(nnodes, optimal, color='k', marker='s', label='Linear') ax.plot(nnodes, wfbps, color='r', marker='d', label='WFBP') ax.plot(nnodes, synceasgds, color='b', marker='o', label='SyncEASGD') ax.plot(nnodes, gmwfbps, color='g', marker='^', label='MG-WFBP') #plt.yscale('log', basey=2) #plt.xscale('log', basey=2) plt.legend(loc=2) plt.xlabel('# of nodes') plt.ylabel('Speedup') plt.xticks(nnodes) plt.yticks(nnodes) plt.ylim(bottom=1, top=nnodes[-1] + 1) plt.xlim(left=1, right=nnodes[-1] + 1) plt.grid(color='#5e5c5c', linestyle='-.', linewidth=1) #plt.title('%s-Realworld'%name) update_fontsize(ax, fontsize=14) plt.subplots_adjust(left=0.13, bottom=0.13, top=0.96, right=0.97) plt.savefig('%s/speedup%sreal.pdf' % (OUTPUT_PATH, name.lower() + device))
def plot_all_communication_overheads(): #labels = ['2-node', '4-node', '8-node', '16-node'] fig, ax = plt.subplots(figsize=(5, 4.5)) labels = ['%d-node' % i for i in num_of_nodes] colors = ['r', 'g', 'b', 'black', 'y', 'c'] markers = ['^', 'o', 'd', '*', 'x', 'v'] sizes = None #sizes = np.arange(128.0, 1e5, step=8192) for i, n in enumerate(num_of_nodes): test_file = '%s/mgdlogs/mgd140/ring-allreduce%d.log' % (INPUT_PATH, n) predict(test_file, n, colors[i], markers[i], labels[i], sizes, ax) plt.xlabel('Size of parameters (KBytes)') plt.ylabel(r'Communication time ($\mu$s)') plt.ylim(bottom=0, top=plt.ylim()[1] * 1.2) plt.legend(ncol=1, loc=2, prop={'size': 10}) update_fontsize(ax, fontsize=14) plt.subplots_adjust(left=0.18, bottom=0.13, top=0.91, right=0.92) #plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'commtime')) plt.show()
def plot_p2platency(): def _fit_linear_function(x, y): X = np.array(x) Y = np.array(y) A = np.vstack([X, np.ones(len(X))]).T beta, alpha = np.linalg.lstsq(A, Y, rcond=None)[0] return alpha, beta fig, ax = plt.subplots(figsize=(5, 3.8)) #fig, ax = plt.subplots(figsize=(5,4.2)) filename = '/media/sf_Shared_Data/tmp/icdcs2019/mgdlogs/mgd140/p2platency.log' sizes, comms, errors = read_allreduce_log(filename) comms = [c / 1000. for c in comms] errors = [c / 1000. for c in errors] alpha, beta = _fit_linear_function(sizes, comms) print('alpha: %f, beta: %f' % (alpha, beta)) ax.errorbar(sizes, comms, errors, label='Measured Point-to-point Communication', fmt='o', linewidth=1) ax.plot(sizes, alpha + np.array(sizes) * beta, label=r'Predicted ($\alpha=%.3f, \beta=%f$)' % (alpha, beta), linewidth=1) ax.grid(linestyle=':') plt.xlabel('Size of parameters [bytes]') plt.ylabel(r'Communication time [ms]') plt.ylim(bottom=0, top=plt.ylim()[1] * 1.2) plt.legend(ncol=1, loc=2, prop={'size': 10}) update_fontsize(ax, fontsize=16) plt.subplots_adjust(left=0.16, bottom=0.17, top=0.98, right=0.98) plt.ticklabel_format(axis='x', style='sci', scilimits=(0, 0)) plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'p2pcommtime')) plt.show()
def plot_realdata_comm(datas, configs): def calculate_real_comms(data, bs): times = [bs / ((d / 2) / 2**(i - 1)) for i, d in enumerate(data)] comp = times[0] comms = [t - times[0] for t in times[1:]] return comp, comms fig, ax = plt.subplots(figsize=(4.8, 3.4)) count = len(datas[0][1:]) ind = np.arange(count) width = 0.25 s = -int(count / 2) print('s: ', s) margin = 0.05 xticklabels = [str(2**(i + 1)) for i in range(count)] s = (1 - (width * count + (count - 1) * margin)) / 2 + width ind = np.array([s + i + 1 for i in range(count)]) centerind = None labels = ['WF.', 'S.E.', 'M.W.'] for i, data in enumerate(datas): comp, comms = calculate_real_comms(data, configs[1]) comps = [comp for j in comms] newind = ind + s * width + (s + 1) * margin p1 = ax.bar(newind, comps, width, color=Color.comp_color, hatch='x', label='Comp.') p2 = ax.bar(newind, comms, width, bottom=comps, color=Color.comm_color, label='Comm.') s += 1 autolabel(p2, ax, labels[i], 0) print('comp: ', comp) print('comms: ', comms) print('') rects = ax.patches ax.text(10, 10, 'ehhlo', color='b') handles, labels = ax.get_legend_handles_labels() #ax.legend([handles[0][0]], [labels[0][0]], ncol=2) print(labels) print(handles) ax.set_xlim(left=1 + 0.3) ax.set_ylim(top=ax.get_ylim()[1] * 1.3) ax.set_xticks(ind + 2 * (width + margin)) ax.set_xticklabels(xticklabels) ax.set_xlabel('# of nodes') ax.set_ylabel('Time [s]') update_fontsize(ax, 14) ax.legend((p1[0], p2[0]), (labels[0], labels[1]), ncol=2, handletextpad=0.2, columnspacing=1.) fig.subplots_adjust(left=0.16, right=0.96, bottom=0.17, top=0.94) #plt.savefig('%s/comm%sreal.pdf' % (OUTPUT_PATH, configs[0].lower())) plt.show()
def plot_breakdown_spdkfac(): FONTSIZE = 12 names = [ 'FF & BP', 'GradComm', 'FactorComp', 'FactorComm', 'InverseComp', 'InverseComm' ] colors = [ Color.backward_color, Color.comm_color, Color.factor_color, Color.factorcomm_color, Color.inverse_color, Color.inversecomm_color ] dnn = 'resnet50' sgd = [0.132, 0, 0, 0, 0, 0] ssgd = [0.132, 0.067, 0, 0, 0, 0] kfac = [0.132, 0, 0.205, 0, 0.282, 0] dkfac = [0.132, 0.199 - 0.132, 0.404 - 0.199, 0.704 - 0.404, 0.282, 0] dkfacmp = [ 0.132, 0.199 - 0.132, 0.404 - 0.199, 0.704 - 0.404, 0.736 - 0.704, 0.882 - 0.736 ] fig, ax = plt.subplots(figsize=(5.8, 4)) count = 5 ind = np.arange(count) width = 0.8 margin = 0.02 xticklabels = ['SGD', 'S-SGD', 'KFAC', 'D-KFAC', 'D-KFAC-MP'] newind = np.arange(count).astype(np.float32) bars = [] bottom = np.array([0] * count).astype(np.float32) for i in range(len(sgd)): label = names[i] data = [sgd[i], ssgd[i], kfac[i], dkfac[i], dkfacmp[i]] p1 = ax.bar(newind, data, width, bottom=bottom, color=colors[i], label=label, edgecolor='black') bottom += np.array(data) bars.append(p1[0]) handles, labels = ax.get_legend_handles_labels() #ax.legend([handles[0][0]], [labels[0][0]], ncol=2) print(labels) print(handles) #ax.set_xlim(right=2.5) ax.set_ylim(top=ax.get_ylim()[1] * 1.05) ax.set_xticks(newind) # -(width+margin)/2) ax.set_xticklabels(xticklabels, rotation=30) #ax.set_xlabel('Model') ax.set_ylabel('Time [s]') utils.update_fontsize(ax, FONTSIZE) ax.legend(tuple(bars), tuple(names), loc='center left', bbox_to_anchor=(1, 0.5), fontsize=FONTSIZE) #, handletextpad=0.2, columnspacing =1.) #ax.legend(tuple(bars), tuple(names), loc='upper center',bbox_to_anchor=(1, 0.5), fontsize=FONTSIZE, ncol=3)#, handletextpad=0.2, columnspacing =1.) fig.subplots_adjust(left=0.14, right=0.61, bottom=0.19, top=0.94) #plt.savefig('%s/naive-breakdown-%s.pdf' % (OUTPUT_PATH, dnn), bbox_inches='tight') #plt.savefig('%s/naive-breakdown-%s.pdf' % (OUTPUT_PATH, dnn)) plt.show()
def gmwfbp_speedup(): #configs = ['GoogleNet', 64] configs = ['ResNet', 32] #configs = ['DenseNet', 128] name = configs[0] b = configs[1] test_file = '/media/sf_Shared_Data/gpuhome/repositories/dpBenchmark/tools/caffe/cnn/%s/tmp8comm.log' % name.lower( ) sizes, comms, computes, merged_comms = read_log(test_file) device = 'k80' #device = 'p100' #pfn = '/media/sf_Shared_Data/gpuhome/repositories/dpBenchmark/tools/caffe/cnn/%s/tmp8commp100%s.log' % (name.lower(), name.lower()) #val_sizes, computes = read_p100_log(pfn) #print('computes: ', np.sum(computes)) #print('computes: ', computes) #assert len(computes) == len(sizes) nnodes = [4, 8, 16, 32, 64] #nnodes = [2, 4, 8] wfbps = [] gmwfbps = [] synceasgds = [] micomputes = np.array(computes) tf = np.sum(micomputes) * 0.5 / 1000 tb = np.sum(micomputes) / 1000 total_size = np.sum(sizes) single = b / (tf + tb) optimal = [] colors = ['k', 'r', 'g', 'b'] markers = ['s', '^', 'o', 'd'] for num_of_nodes in nnodes: sim = Simulator(name, computes, sizes, num_of_nodes, render=False) wfbp = sim.wfbp() wfbps.append(b * num_of_nodes / (wfbp + tf) / single) gmwfbp = sim.gmwfbp2() gmwfbps.append(b * num_of_nodes / (gmwfbp + tf) / single) tc = time_of_allreduce(num_of_nodes, total_size, B) / 1000 print('#nodes:', num_of_nodes, ', tc: ', tc) synceasgd = tb + tf + tc synceasgds.append(b * num_of_nodes / synceasgd / single) optimal.append(num_of_nodes) print('tf: ', tf) print('tb: ', tb) print('total_size: ', total_size) print('wfbp: ', wfbps) print('gmwfbps: ', gmwfbps) print('synceasgds: ', synceasgds) print('compared to synceasgds: ', np.array(gmwfbps) / np.array(synceasgds)) print('compared to wfbps: ', np.array(gmwfbps) / np.array(wfbps)) fig, ax = plt.subplots(figsize=(5, 4.5)) ax.plot(nnodes, optimal, color='k', marker='s', label='Linear') ax.plot(nnodes, wfbps, color='r', marker='d', label='WFBP') ax.plot(nnodes, synceasgds, color='b', marker='o', label='SyncEASGD') ax.plot(nnodes, gmwfbps, color='g', marker='^', label='MG-WFBP') plt.legend(loc=2) plt.xlabel('# of nodes') plt.ylabel('Speedup') #plt.title('%s-Simulation'%name) #plt.yscale('log', basey=2) #plt.xscale('log', basey=2) plt.ylim(bottom=1, top=nnodes[-1] + 1) plt.xlim(left=1, right=nnodes[-1] + 1) plt.xticks(nnodes) plt.yticks(nnodes) plt.grid(color='#5e5c5c', linestyle='-.', linewidth=1) update_fontsize(ax, fontsize=14) plt.subplots_adjust(left=0.13, bottom=0.13, top=0.96, right=0.97) #plt.savefig('%s/speedup%s.pdf' % (OUTPUT_PATH, name.lower()+device)) plt.show()
def plot_data(logfile, plot_type, label, title='ResNet-20'): accuracies, losses, times, average_delays, lrs = read_data_from_log( logfile, plot_type) data = [] if plot_type == 'accuracy': data = accuracies elif plot_type == 'loss': data = losses elif plot_type == 'latency': average_interval = 10 if len(times) > 0: for i in range(1, len(times)): delta = times[i] - times[i - 1] data.append(delta.days * 86400 + delta.seconds) elif plot_type == 'lr': data = lrs else: print('Plot type not defined till now') exit() if logfile.find('resnet50') > 0 or logfile.find('alexnet') > 0: losses = losses[0:45] accuracies = accuracies[0:45] print('Data: ', data) norm_means, norm_stds = read_norm_from_log(logfile) if len(average_delays) > 0: delay = int(np.mean(average_delays)) else: delay = 0 if delay > 0: label = label + ' (delay=%d)' % delay ax.set_ylabel(plot_type.capitalize()) ax.set_title(get_real_title(title)) marker = next(markeriter) color = next(coloriter) ax.plot(list(range(0, len(data))), data, label=label, marker=marker, markerfacecolor='none', color=color) from matplotlib.ticker import MaxNLocator, LinearLocator ax.xaxis.set_major_locator(MaxNLocator(nbins=8, integer=True)) #ax.tick_params(labelsize=8) #Only special cases: if False and len(norm_means) > 0: global ax2 if ax2 is None: ax2 = ax.twinx() ax2.set_ylabel('L2-Norm of : gTopK-Dense') ax2.plot(norm_means, label=label + ' norms', color=color) ax.set_xlabel('Epoch') ax.grid(linestyle=':') if len(lrs) > 0: lr_indexes = [0] lr = lrs[0] for i in range(len(lrs)): clr = lrs[i] if lr != clr: lr_indexes.append(i) lr = clr u.update_fontsize(ax, FONTSIZE)
def plot_norm_diff(lax=None, network=None, subfig=None): global global_index global global_max_epochs density = 0.001 nsupdate = 1 prefix = 'allreduce-comp-gtopk-baseline-gwarmup-dc1-model-ijcai2019' if network == 'lstm': network = 'lstm' bs = 100 lr = 30.0 epochs = 40 elif network == 'lstman4': network = 'lstman4' bs = 8 lr = 0.0002 epochs = 80 elif network == 'resnet20': network = 'resnet20' bs = 32 lr = 0.1 epochs = 140 elif network == 'vgg16': network = 'vgg16' bs = 128 lr = 0.1 epochs = 140 elif network == 'alexnet': network = 'alexnet' bs = 256 lr = 0.01 epochs = 40 elif network == 'resnet50': nsupdate = 16 network = 'resnet50' bs = 512 lr = 0.01 epochs = 35 global_max_epochs = epochs path = LOGHOME + '/%s/%s-n4-bs%d-lr%.4f-ns%d-sg1.50-ds%s' % ( prefix, network, bs, lr, nsupdate, density) print(network, path) plts = [] if network == 'lstm': line = plot_with_params( network, 4, 100, 30.0, HOSTNAME, r'S-SGD loss', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, force_legend=True) plts.append(line) line = plot_with_params( network, 4, 100, 30.0, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix='allreduce-comp-gtopk-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, sg=1.5, density=density, force_legend=True) plts.append(line) elif network == 'resnet20': line = plot_with_params( network, 4, 32, lr, HOSTNAME, 'S-SGD loss', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', force_legend=True) plts.append(line) line = plot_with_params( network, 4, bs, lr, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix='allreduce-comp-topk-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, sg=1.5, density=density, force_legend=True) plts.append(line) pass elif network == 'vgg16': line = plot_with_params( network, 4, bs, lr, HOSTNAME, 'S-SGD loss', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, force_legend=True) plts.append(line) line = plot_with_params(network, 4, bs, lr, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix=prefix, nsupdate=1, sg=1.5, density=density, force_legend=True) plts.append(line) elif network == 'lstman4': line = plot_with_params( network, 4, 8, 0.0002, HOSTNAME, 'S-SGD loss', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, force_legend=True) plts.append(line) line = plot_with_params( network, 4, 8, 0.0002, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix='allreduce-comp-gtopk-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, sg=1.5, density=density, force_legend=True) plts.append(line) elif network == 'resnet50': line = plot_with_params( network, 4, 512, lr, HOSTNAME, 'S-SGD loss', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=nsupdate, force_legend=True) line = plot_with_params(network, 4, 512, lr, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix=prefix, nsupdate=nsupdate, sg=1.5, density=density, force_legend=True) plts.append(line) elif network == 'alexnet': plot_with_params( network, 4, 256, lr, HOSTNAME, 'S-SGD', prefix='allreduce-baseline-gwarmup-dc1-model-ijcai2019', nsupdate=1, force_legend=True) line = plot_with_params(network, 4, 256, lr, HOSTNAME, r'gTop-$k$ S-SGD loss', prefix=prefix, nsupdate=nsupdate, sg=1.5, density=density, force_legend=True) plts.append(line) arr = [] arr2 = [] for i in range(1, epochs + 1): fn = '%s/gtopknorm-rank0-epoch%d.npy' % (path, i) fn2 = '%s/randknorm-rank0-epoch%d.npy' % (path, i) arr.append(np.mean(np.power(np.load(fn), 2))) arr2.append(np.mean(np.power(np.load(fn2), 2))) arr = np.array(arr) arr2 = np.array(arr2) cax = lax if lax is not None else ax1 cax.plot(arr / arr2, label=r'$\delta$', color=fixed_colors['blue'], linewidth=1) cax.set_ylim(bottom=0.97, top=1.001) zero_x = np.arange(len(arr), step=1) ones = np.ones_like(zero_x) cax.plot(zero_x, ones, ':', label='1 ref.', color='black', linewidth=1) if True or network.find('lstm') >= 0: subaxes = inset_axes(cax, width='50%', height='30%', bbox_to_anchor=(-0.04, 0, 1, 0.95), bbox_transform=cax.transAxes, loc='upper right') half = epochs // 2 subx = np.arange(half, len(arr)) subaxes.plot(subx, (arr / arr2)[half:], color=fixed_colors['blue'], linewidth=1) subaxes.plot(subx, ones[half:], ':', color='black', linewidth=1) subaxes.set_ylim(bottom=subaxes.get_ylim()[0]) cax.set_xlabel('# of iteration') cax.set_ylabel(r'$\delta$') u.update_fontsize(cax, FONTSIZE) if global_index is not None: global_index += 1 return plts
def plot_breakdown(): FONTSIZE = 16 names = [ 'FF & BP', 'GradComm', 'FactorComp', 'FactorComm', 'InverseComp', 'InverseComm' ] colors = [ Color.backward_color, Color.comm_color, Color.lars_color, Color.io_color, Color.compression_color, Color.synceasgd_color ] sgd = [0.132, 0, 0, 0, 0, 0] ssgd = [0.132, 0.067, 0, 0, 0, 0] kfac = [0.132, 0, 0.205, 0, 0, 0.15, 0] dkfac = [ 0.132, 0.199 - 0.132, 0.404 - 0.199, 0.704 - 0.404, 0.736 - 0.704, 0.882 - 0.736 ] #names = ['FF & BP', 'Compression', 'Communication', 'LARS'] #colors = [Color.backward_color, Color.compression_color, Color.comm_color, Color.lars_color] #densesgd = [0.204473, 0, 0.24177, 0.01114] #topksgd = [0.204473, 0.239, 0.035, 0.01114] #densesgd96 = [0.054376, 0, 0.366886, 0.012794] #topksgd96 = [0.054376, 0.239, 0.035, 0.012794] fig, ax = plt.subplots(figsize=(4.8, 4.4)) count = 2 ind = np.arange(count) width = 0.28 margin = 0.02 xticklabels = ['SGD', 'KFAC'] newind = np.arange(count).astype(np.float32) bars = [] bottom = np.array([0, 0]).astype(np.float32) for i in range(len(sgd)): label = names[i] data = [sgd[i], kfac[i]] p1 = ax.bar(newind, data, width, bottom=bottom, color=colors[i], label=label, edgecolor='black') bottom += np.array(data) bars.append(p1[0]) utils.autolabel(p1, ax, r'1 GPU', 0, 10) newind += width + margin bottom = 0 for i in range(len(sgd)): label = names[i] data = [ssgd[i], dkfac[i]] p1 = ax.bar(newind, data, width, bottom=bottom, color=colors[i], label=label, edgecolor='black') bottom += np.array(data) utils.autolabel(p1, ax, r'64 GPUs', 0, 10) #bars.append(p1[0]) handles, labels = ax.get_legend_handles_labels() #ax.legend([handles[0][0]], [labels[0][0]], ncol=2) print(labels) print(handles) #ax.set_xlim(right=2.5) ax.set_ylim(top=ax.get_ylim()[1] * 1.05) ax.set_xticks(newind - (width + margin) / 2) ax.set_xticklabels(xticklabels) #ax.set_xlabel('Model') ax.set_ylabel('Time [s]') utils.update_fontsize(ax, FONTSIZE) ax.legend(tuple(bars), tuple(names), loc='center left', bbox_to_anchor=(1, 0.5), fontsize=FONTSIZE) #, handletextpad=0.2, columnspacing =1.) #fig.subplots_adjust(left=0.16, right=0.96, bottom=0.19, top=0.94) #plt.savefig('%s/naive-breakdown.pdf' % (OUTPUT_PATH), bbox_inches='tight') plt.show()
def analyze_tensor_sizes(): fig, ax = plt.subplots(figsize=(6, 4.5)) def _plot_dnn_tensor(dnn): fn = '/Users/lele/shared-server/kfac-logs/%s-matrixsize.log' % (dnn) sizes = reader.read_tensor_sizes(fn) sizes = [s[0] * (s[0] + 1) // 2 for s in sizes] print( 'dnn: ', dnn, ', min: %d, max: %d, mean: %f' % (np.min(sizes), np.max(sizes), np.mean(sizes))) counter_dict = {} for s in sizes: if s not in counter_dict: counter_dict[s] = 0 counter_dict[s] += 1 keys = list(counter_dict.keys()) keys.sort() print(dnn, 'sizes: ', keys) x_pos = [i for i, _ in enumerate(keys)] counters = [counter_dict[k] for k in keys] #print(dnn, 'counters: ', counters) #print(dnn, 'Total tensors: ', np.sum(counters)) #ax2.bar(x_pos, counters, color='green') ax.scatter(np.array(keys) * 4, counters, color=DNN_COLORS[dnn], marker=DNN_MARKERS[dnn], facecolors='none', linewidth=1, label=STANDARD_TITLES[dnn]) #ax2.set_xticks(x_pos, keys) ax.set_xlabel('Tensor size (# of communicated elements)') ax.set_ylabel('Count') threshold = 128 idx = 0 for i, s in enumerate(keys): if s > threshold: idx = i break thres_count = np.sum(counters[0:idx]) #print(dnn, 'counter smaller than threshold: ', thres_count) lines = [] labels = [] #dnn='resnet34' #_plot_dnn_tensor(dnn) dnn = 'resnet50' _plot_dnn_tensor(dnn) dnn = 'resnet152' _plot_dnn_tensor(dnn) dnn = 'densenet201' _plot_dnn_tensor(dnn) dnn = 'inceptionv4' _plot_dnn_tensor(dnn) lines, labels = ax.get_legend_handles_labels() lines, labels = ax.get_legend_handles_labels() #fig.legend(loc='upper center', ncol=3) plt.legend(ncol=1, loc=1, prop={'size': 14}) u.update_fontsize(ax, 14) #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) plt.xscale('log') #plt.title(dnn) #plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'tensordistribution'), bbox_inches='tight') plt.show()
def plot_loss(logfile, label, isacc=False, title='ResNet-20'): losses, times, average_delays, lrs = read_losses_from_log(logfile, isacc=isacc) if logfile.find('resnet50') > 0 or logfile.find('alexnet') > 0: losses = losses[0:45] print('losses: ', losses) norm_means, norm_stds = read_norm_from_log(logfile) #print('times: ', times) #print('Learning rates: ', lrs) if len(average_delays) > 0: delay = int(np.mean(average_delays)) else: delay = 0 if delay > 0: label = label + ' (delay=%d)' % delay #plt.plot(losses, label=label, marker='o') #plt.xlabel('Epoch') #plt.title('ResNet-20 loss') if isacc: ax.set_ylabel('Top-1 Validation Accuracy') else: ax.set_ylabel('Training loss') #plt.title('ResNet-50') ax.set_title(get_real_title(title)) marker = markeriter.next() color = coloriter.next() #print('marker: ', marker) #ax.plot(losses[0:180], label=label, marker=marker, markerfacecolor='none') ax.plot(range(0, len(losses)), losses, label=label, marker=marker, markerfacecolor='none', color=color) #ax.plot(range(1,len(losses)+1), losses, label=label, marker=marker, markerfacecolor='none', color=color) from matplotlib.ticker import MaxNLocator ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if False and len(norm_means) > 0: global ax2 if ax2 is None: ax2 = ax.twinx() ax2.set_ylabel('L2-Norm of : gTopK-Dense') ax2.plot(norm_means, label=label + ' norms', color=color) ax.set_xlabel('Epoch') #plt.plot(times, losses, label=label, marker=markeriter.next()) #plt.xlabel('Time [s]') ax.grid(linestyle=':') if len(lrs) > 0: lr_indexes = [0] lr = lrs[0] for i in range(len(lrs)): clr = lrs[i] if lr != clr: lr_indexes.append(i) lr = clr #for i in lr_indexes: # if i < len(losses): # ls = losses[i] # ax.text(i, ls, 'lr=%f'%lrs[i]) u.update_fontsize(ax, FONTSIZE)
def realdata_speedup(): nworkers = [1, 4, 8, 16, 32] configs = ['VGG-16', 128] dense = [1317.333, 104.200, 92.560, 39.480, 12.600] topk = [1317.333, 110.576, 109.900, 97.865, 63.002] gtopk = [1317.333, 131.060, 130.551, 126.434, 123.200] #configs = ['ResNet-20', 32] #dense= [920.632, 821.700, 705.200, 520.400, 287.900] #topk= [920.632, 908.837, 752.985, 737.594, 696.029] #gtopk= [920.632, 916.260, 868.730, 808.500, 789.300] #configs = ['AlexNet', 32] #dense = [173.469, 14.010, 12.118, 4.936 , 1.234] #topk = [173.469, 14.238, 13.865, 13.352, 9.236] #gtopk = [173.469, 16.536, 16.446, 16.359, 15.777] #configs = ['ResNet-50', 32] #dense =[52.873, 39.002, 36.989, 23.176, 10.721] #topk = [52.873, 37.729, 35.703, 34.495, 30.583] #gtopk =[52.873, 39.795, 39.713, 39.060, 39.119] configs = ['LSTM-PTB', 32] dense = [392.0, 12.657, 8.7, 4.1, 2.1] topk = [392.0, 19.9, 18.6, 14.8, 5.4] gtopk = [392.0, 17.8, 17.6, 15.1, 10.8] name = configs[0] fig, ax = plt.subplots(figsize=(5, 4)) optimal = [100 for i in range(len(dense) - 1)] dense = [v / dense[0] * 100 for i, v in enumerate(dense[1:])] topk = [v / topk[0] * 100 for i, v in enumerate(topk[1:])] gtopk = [v / gtopk[0] * 100 for i, v in enumerate(gtopk[1:])] todense = np.array(gtopk) / np.array(dense) totopk = np.array(gtopk) / np.array(topk) print(name, ', compared to dense: ', todense, 'mean: ', np.mean(todense)) print(name, ', compared to topk: ', totopk, 'mean: ', np.mean(totopk)) #ax.plot(nworkers[1:], optimal, color='k', marker='s', label='Optimal') ax.plot(nworkers[1:], dense, color=gcolors['dense'], marker=gmarkers['dense'], label='Dense S-SGD') ax.plot(nworkers[1:], topk, color=gcolors['topk'], marker=gmarkers['topk'], label=r'Top-$k$ S-SGD') ax.plot(nworkers[1:], gtopk, color=gcolors['gtopk'], marker=gmarkers['gtopk'], label=r'gTop-$k$ S-SGD') #plt.yscale('log', basey=2) #plt.xscale('log', basey=2) plt.legend(loc=3, prop={'size': 14}) plt.xlabel('# of workers (GPU)') plt.ylabel('Scaling efficiency (Percentage)') plt.xticks(nworkers[1:]) plt.title(name) #plt.yticks(nnodes) #plt.ylim(top=gtopk[-1]+1) #plt.xlim(left=1, right=nnodes[-1]+1) #plt.grid(color='#5e5c5c', linestyle='-.', linewidth=1) plt.grid(linestyle=':') update_fontsize(ax, fontsize=14) plt.subplots_adjust(left=0.18, bottom=0.16, top=0.92, right=0.97) plt.savefig('%s/scalingeffi%s.pdf' % (OUTPUT_PATH, name.lower())) plt.show()
def cublas_roofline(): linestyles = ['-', '--', '-.', ':'] N_range = range(400, 10100, 100) f = open("gtx980_raw.txt", "r") gtx980 = f.readlines() f.close() gtx980 = [line.split() for line in gtx980[1:]] gtx980_flops = [ float(line[0])**3 * 2 / (float(line[4]) / 1000) / 10**9 for line in gtx980 ] f = open("titanX_raw.txt", "r") titanX = f.readlines() f.close() titanX = [line.split() for line in titanX[1:]] titanX_flops = [ float(line[0])**3 * 2 / (float(line[4]) / 1000) / 10**9 for line in titanX ] alpha = 0.0225 rs = [N * alpha for N in N_range] flops = specs[0][1] mem = specs[0][0] rc = flops / mem gtx980_th_flops = [flops if r >= rc else mem * r for r in rs] flops = specs[1][1] mem = specs[1][0] rc = flops / mem titanX_th_flops = [flops if r >= rc else mem * r for r in rs] #rs = list(N_range) ax.plot(rs, gtx980_th_flops, label="GTX 980 (theoretical)", linestyle=linestyles[0], linewidth=2) ax.scatter(rs, gtx980_flops, label="GTX 980 (CUBLAS)", linestyle=linestyles[1], linewidth=2) ax.plot(rs, titanX_th_flops, label="Titan X (theoretical)", linestyle=linestyles[0], linewidth=2) ax.scatter(rs, titanX_flops, label="GTX 980 (CUBLAS)", linestyle=linestyles[3], linewidth=2) ax.set_yscale("log", basey=2) #ax.set_xscale("log", basex=2) ax.legend(loc=4) ax.set_xlabel('Operational intensity (FLOPS/byte)') ax.set_ylabel('Throughput (GFLOPS)') u.update_fontsize(ax, 14) fig.subplots_adjust(bottom=0.15, top=0.94) #plt.show() plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'cublas_model'))
def plot_breakdown_pipelining(): fig, ax = plt.subplots(figsize=(7.0, 4.4)) FONTSIZE = 12 names = ['FactorComp', 'FactorComm'] colors = [Color.factor_color, Color.factorcomm_color] xticklabels = ['ResNet-50', 'ResNet-152', 'DenseNet-201', 'Inception-v4'] dnns = ['resnet50', 'resnet152', 'densenet201', 'inceptionv4'] #algos = ['dkfac', 'dkfac-mp', 'spd-kfac'] #algos = ['mpd-kfac', 'spd-kfac'] algos = ['mpd-kfac', 'lw-wo-tf', 'lw-wi-ttf', 'sp-wi-otf'] labels = ['Naive', 'LW w/o TF', 'LW w/ TTF', 'SP w/ OTF'] data = { 'resnet50': { 'mpd-kfac': [0.2115, 0.3814], 'lw-wo-tf': [0.2115, 0.4174], 'lw-wi-ttf': [0.2115, 0.3401], 'sp-wi-otf': [0.2115, 0.3096], }, 'resnet152': { 'mpd-kfac': [0.1927, 0.6285], 'lw-wo-tf': [0.1927, 0.7158], 'lw-wi-ttf': [0.1927, 0.5371], 'sp-wi-otf': [0.1927, 0.4687], }, 'densenet201': { 'mpd-kfac': [0.3163, 0.6665], 'lw-wo-tf': [0.3163, 0.7714], 'lw-wi-ttf': [0.3163, 0.5841], 'sp-wi-otf': [0.3163, 0.5600], }, 'inceptionv4': { 'mpd-kfac': [0.1979, 0.4882], 'lw-wo-tf': [0.1979, 0.6683], 'lw-wi-ttf': [0.1979, 0.4115], 'sp-wi-otf': [0.1979, 0.3967], }, } count = len(dnns) width = 0.2 margin = 0.02 s = (1 - (width * count + (count - 1) * margin)) / 2 + width ind = np.array([s + i + 1 for i in range(count)]) for i, algo in enumerate(algos): newind = ind + s * width + (s + 1) * margin bp = [] gradcomm = [] factorcomp = [] factorcomm = [] inversecomp = [] inversecomm = [] one_group = [[] for ii in range(len(names))] for dnn in dnns: d = data[dnn] ald = d[algo] t0 = 0.0 for j, t in enumerate(ald): one_group[j].append(t - t0) t0 = t legend_p = [] bottom = np.array([0.0] * len(one_group[0])) for k, d in enumerate(one_group): color = colors[k] label = names[k] p = ax.bar(newind, d, width, bottom=bottom, color=color, edgecolor='black', label=label) legend_p.append(p[0]) bottom += np.array(d) s += 1 #ax.text(4, 4, 'ehhlo', color='b') utils.autolabel(p, ax, labels[i], 90, FONTSIZE - 2) ax.set_ylim(top=ax.get_ylim()[1] * 1.25) handles, labels = ax.get_legend_handles_labels() ax.legend(legend_p, names, ncol=1, handletextpad=0.2, columnspacing=1., loc='upper left', fontsize=FONTSIZE) ax.set_ylabel('Time [s]') #ax.set_xticks(newind-width-margin/2) ax.set_xticks(newind - width * 3 / 2 - margin * 3 / 2) ax.set_xticklabels(xticklabels) utils.update_fontsize(ax, FONTSIZE) plt.savefig('%s/pipelining-timebreakdown.pdf' % (OUTPUT_PATH), bbox_inches='tight')
def plot_allreduce_comparison(): alpha = 0.436 beta = 4 * 9e-6 def _denseallreduce_model(P, m): return 2 * (P - 1) * alpha + 2 * (P - 1) / P * m * beta #return 2*np.log2(P)*alpha + 2* (P-1)/P * m * beta def _sparseallreduce_model(P, m, rho=0.001): return np.log2(P) + 2 * (P - 1) * rho * m * beta def _gtopkallreduce_model(P, m, rho=0.001): return 2 * np.log2(P) + 4 * np.log2(P) * rho * m * beta fig, ax = plt.subplots(figsize=(5, 3.8)) #fig, ax = plt.subplots(figsize=(5,4.2)) #variable = 'm' variable = 'P' if variable == 'm': m = [2**(2 * 10 + i) for i in range(0, 8)] # from 1M to 128M m = np.array(m) P = 32 rho = 0.001 #xlabel = 'Size of parameters [bytes]' xlabel = '# of parameters' xticks = m # measured #filename = '%s/mgdlogs/mgd140/ring-allreduce%d.log' % (INPUT_PATH, P) #sizes, comms, errors = read_allreduce_log(filename) #comms = np.array(comms)/1000. #print('sizes: ', sizes) #print('comms: ', comms) #ax.plot(sizes, comms, label=r'DenseAllReduce', linewidth=1, marker=gmarkers['dense'], color=gcolors['dense']) elif variable == 'P': m = 25 * 1024 * 1024 # 10MBytes P = np.array([4, 8, 16, 32, 64, 128]) rho = 0.001 xlabel = 'Number of workers' xticks = P elif variable == 'rho': m = 8 * 1024 * 1024 # 10MBytes P = np.array([4, 8, 16, 32]) rho = np.array([0.01 / (2 * i) for i in range(1, 10)]) xlabel = 'Density' xticks = rho dar = _denseallreduce_model(P, m) sar = _sparseallreduce_model(P, m, rho) gar = _gtopkallreduce_model(P, m, rho) #ax.plot(xticks, dar, label=r'DenseAllReduce', linewidth=1, marker=gmarkers['dense'], color=gcolors['dense']) ax.plot(xticks, sar, label=r'TopKAllReduce', linewidth=1, marker=gmarkers['sparse'], color=gcolors['sparse']) ax.plot(xticks, gar, label=r'gTopKAllReduce', linewidth=1, marker=gmarkers['gtopk'], color=gcolors['gtopk']) ax.grid(linestyle=':') plt.subplots_adjust(bottom=0.16, left=0.15, right=0.96, top=0.97) #ax.set_yscale("log", nonposy='clip') plt.xlabel(xlabel) plt.ylabel(r'Communication time [ms]') #plt.ylim(bottom=0, top=plt.ylim()[1]*1.2) plt.legend(ncol=1, loc=2, prop={'size': 10}) plt.subplots_adjust(left=0.18, bottom=0.20, top=0.94, right=0.96) #plt.ticklabel_format(axis='x', style='sci', scilimits=(0,0)) if variable == 'P': plt.xticks(xticks) elif variable == 'm': ax.set_xscale("log") update_fontsize(ax, fontsize=16) plt.savefig('%s/%s.pdf' % (OUTPUT_PATH, 'sparvsgtopk_dynamic%s' % variable)) plt.show()
def plot_breakdown_spdkfac(): fig, ax = plt.subplots(figsize=(7.0, 4.4)) FONTSIZE = 12 names = [ 'FF & BP', 'GradComm', 'FactorComp', 'FactorComm', 'InverseComp', 'InverseComm' ] colors = [ Color.backward_color, Color.comm_color, Color.factor_color, Color.factorcomm_color, Color.inverse_color, Color.inversecomm_color ] xticklabels = ['ResNet-50', 'ResNet-152', 'DenseNet-201', 'Inception-v4'] dnns = ['resnet50', 'resnet152', 'densenet201', 'inceptionv4'] #algos = ['dkfac', 'dkfac-mp', 'spd-kfac'] algos = ['dkfac', 'mpd-kfac', 'spd-kfac'] labels = ['D-KFAC', 'MPD-KFAC', 'SPD-KFAC'] data = { 'resnet50': # [compute, communicate gradient, compute factor, communicate factor, compute inverse, communicate inverse] { 'dkfac': [0.132, 0.1968, 0.4083, 0.5783, 0.8525, 0.8525], 'mpd-kfac': [0.132, 0.1968, 0.4083, 0.5783, 0.6295, 0.7635], 'spd-kfac': [0.132, 0.1968, 0.4083, 0.5064, 0.6114, 0.6755], }, 'resnet152': { 'dkfac': [0.1140, 0.2730, 0.4657, 0.9048, 1.5807, 1.5807], 'mpd-kfac': [0.1140, 0.2730, 0.4657, 0.9016, 0.9555, 1.3933], 'spd-kfac': [0.1140, 0.2730, 0.4657, 0.7417, 1.0231, 1.1689], }, 'densenet201': { 'dkfac': [0.178, 0.3643, 0.6829, 1.0146, 1.4964, 1.4964], 'mpd-kfac': [0.178, 0.3643, 0.6806, 1.0308, 1.0660, 1.5340], 'spd-kfac': [0.178, 0.3643, 0.6806, 0.9243, 1.3266, 1.3615], }, 'inceptionv4': { 'dkfac': [0.134, 0.2669, 0.4648, 0.7551, 1.1857, 1.1857], 'mpd-kfac': [0.134, 0.2669, 0.4597, 0.7547, 0.8034, 1.1473], 'spd-kfac': [0.134, 0.2669, 0.4597, 0.6635, 0.9174, 0.9907], }, } def Smax(times): tf = times[0] tb = times[1] tc = times[2] r = tc / tb s = 1 + 1.0 / (tf / min(tc, tb) + max(r, 1. / r)) return s count = len(dnns) width = 0.2 margin = 0.02 s = (1 - (width * count + (count - 1) * margin)) / 2 + width ind = np.array([s + i + 1 for i in range(count)]) for ia, algo in enumerate(algos): newind = ind + s * width + (s + 1) * margin bp = [] gradcomm = [] factorcomp = [] factorcomm = [] inversecomp = [] inversecomm = [] one_group = [[] for i in range(len(names))] for dnn in dnns: d = data[dnn] ald = d[algo] t0 = 0.0 for j, t in enumerate(ald): one_group[j].append(t - t0) t0 = t legend_p = [] bottom = np.array([0.0] * len(one_group[0])) for k, d in enumerate(one_group): color = colors[k] label = names[k] p = ax.bar(newind, d, width, bottom=bottom, color=color, edgecolor='black', label=label) legend_p.append(p[0]) bottom += np.array(d) s += 1 #ax.text(4, 4, 'ehhlo', color='b') utils.autolabel(p, ax, labels[ia], 90, FONTSIZE - 2) ax.set_ylim(top=ax.get_ylim()[1] * 1.3) handles, labels = ax.get_legend_handles_labels() ax.legend(legend_p, names, ncol=3, handletextpad=0.2, columnspacing=1., loc='upper center', fontsize=FONTSIZE, bbox_to_anchor=[0.5, 1.2]) ax.set_ylabel('Time [s]') #ax.set_xticks(newind-width-margin/2) #ax.set_xticks(newind-width/2-margin/2) ax.set_xticks(newind - width * 2 / 2 - margin * 2 / 2) ax.set_xticklabels(xticklabels) utils.update_fontsize(ax, FONTSIZE) plt.savefig('%s/spdkfac-vs-mpd-fac-timebreakdown.pdf' % (OUTPUT_PATH), bbox_inches='tight')
def plot_breakdown_bwp(): fig, ax = plt.subplots(figsize=(7.0, 4.4)) FONTSIZE = 12 names = ['InverseComp', 'InverseComm'] colors = [Color.inverse_color, Color.inversecomm_color] xticklabels = ['ResNet-50', 'ResNet-152', 'DenseNet-201', 'Inception-v4'] dnns = ['resnet50', 'resnet152', 'densenet201', 'inceptionv4'] #algos = ['dkfac', 'dkfac-mp', 'spd-kfac'] #algos = ['mpd-kfac', 'spd-kfac'] algos = ['algo1', 'algo2', 'algo3'] labels = ['Non-Dist', 'Seq-Dist', 'LBP'] data = { 'resnet50': { 'algo1': [0.2742, 0.2742], 'algo2': [0.0512, 0.1852], 'algo3': [0.1049, 0.1691], }, 'resnet152': { 'algo1': [0.6759, 0.6759], 'algo2': [0.0539, 0.4917], 'algo3': [0.2814, 0.4271], }, 'densenet201': { 'algo1': [0.4818, 0.4818], 'algo2': [0.0352, 0.5032], 'algo3': [0.4023, 0.4373], }, 'inceptionv4': { 'algo1': [0.4306, 0.4306], 'algo2': [0.0487, 0.3926], 'algo3': [0.2539, 0.3272], }, } count = len(dnns) width = 0.2 margin = 0.02 s = (1 - (width * count + (count - 1) * margin)) / 2 + width ind = np.array([s + i + 1 for i in range(count)]) for i, algo in enumerate(algos): newind = ind + s * width + (s + 1) * margin bp = [] gradcomm = [] factorcomp = [] factorcomm = [] inversecomp = [] inversecomm = [] one_group = [[] for ii in range(len(names))] for dnn in dnns: d = data[dnn] ald = d[algo] t0 = 0.0 for j, t in enumerate(ald): one_group[j].append(t - t0) t0 = t legend_p = [] bottom = np.array([0.0] * len(one_group[0])) for k, d in enumerate(one_group): color = colors[k] label = names[k] p = ax.bar(newind, d, width, bottom=bottom, color=color, edgecolor='black', label=label) legend_p.append(p[0]) bottom += np.array(d) s += 1 #ax.text(4, 4, 'ehhlo', color='b') utils.autolabel(p, ax, labels[i], 90, FONTSIZE - 2) ax.set_ylim(top=ax.get_ylim()[1] * 1.25) handles, labels = ax.get_legend_handles_labels() ax.legend(legend_p, names, ncol=1, handletextpad=0.2, columnspacing=1., loc='upper right', fontsize=FONTSIZE) ax.set_ylabel('Time [s]') #ax.set_xticks(newind-width-margin/2) ax.set_xticks(newind - width * 2 / 2 - margin * 2 / 2) ax.set_xticklabels(xticklabels) utils.update_fontsize(ax, FONTSIZE) plt.savefig('%s/bwp-timebreakdown.pdf' % (OUTPUT_PATH), bbox_inches='tight')
def plot_breakdown(): logpath = '/media/sf_Shared_Data/tmp/icdcs2019/mgdlogs/mgd115-2/logs/allreduce-comp-baseline-gwarmup-dc1-modelmgd-speed/' networks = ['vgg16', 'resnet20', 'alexnet', 'resnet50'] batchsizes = [128, 128, 64, 256] lrs = [0.1, 0.1, 0.01, 0.01] nss = [1, 1, 1, 16] for i, na in enumerate(networks): bs = batchsizes[i] lr = lrs[i] ns = nss[i] fn = os.path.join( logpath, '%s-n32-bs%d-lr%.4f-ns%d-sg2.50/MGD-0.log' % (na, bs, lr, ns)) print('fn: ', fn) names = ['Compu.', 'Compr.', 'Commu.'] vgg16 = [0.139536, 0.091353, 0.811753] resnet20 = [0.146005, 0.001618, 0.024686] alexnet = [0.257205, 0.383776, 3.36298] resnet50 = [4.882041, 0.15405, 1.424253] ratio_vgg16 = [v / np.sum(vgg16) for v in vgg16] ratio_resnet20 = [v / np.sum(resnet20) for v in resnet20] ratio_alexnet = [v / np.sum(alexnet) for v in alexnet] ratio_resnet50 = [v / np.sum(resnet50) for v in resnet50] datas = [ratio_vgg16, ratio_resnet20, ratio_alexnet, ratio_resnet50] for d in datas: print('ratios: ', d) communications = [ ratio_vgg16[2], ratio_resnet20[2], ratio_alexnet[2], ratio_resnet50[2] ] compressions = [ ratio_vgg16[1], ratio_resnet20[1], ratio_alexnet[1], ratio_resnet50[1] ] computes = [ ratio_vgg16[0], ratio_resnet20[0], ratio_alexnet[0], ratio_resnet50[0] ] computes = np.array(computes) compressions = np.array(compressions) communications = np.array(communications) fig, ax = plt.subplots(figsize=(4.8, 3.4)) count = len(datas) ind = np.arange(count) width = 0.35 margin = 0.05 xticklabels = ['VGG-16', 'ResNet-20', 'AlexNet', 'ResNet-50'] #ind = np.array([s+i+1 for i in range(count)]) newind = np.arange(count) p1 = ax.bar(newind, computes, width, color=Color.comp_color, hatch='x', label=names[0]) p2 = ax.bar(newind, compressions, width, bottom=computes, color=Color.compression_color, hatch='-', label=names[1]) p3 = ax.bar(newind, communications, width, bottom=computes + compressions, color=Color.opt_comm_color, label=names[2]) ax.text(10, 10, 'ehhlo', color='b') handles, labels = ax.get_legend_handles_labels() #ax.legend([handles[0][0]], [labels[0][0]], ncol=2) print(labels) print(handles) #ax.set_xlim(left=1+0.3) #ax.set_ylim(top=ax.get_ylim()[1]*1.3) ax.set_xticks(ind) ax.set_xticklabels(xticklabels) #ax.set_xlabel('Model') ax.set_ylabel('Percentage') update_fontsize(ax, 10) ax.legend( (p1[0], p2[0], p3[0]), tuple(names), ncol=9, bbox_to_anchor=(1, -0.1)) #, handletextpad=0.2, columnspacing =1.) #ax.legend((p1[0], p2[0]), (labels[0],labels[1] ), ncol=2, handletextpad=0.2, columnspacing =1.) fig.subplots_adjust(left=0.16, right=0.96, bottom=0.19, top=0.94) plt.savefig('%s/breakdown.pdf' % (OUTPUT_PATH)) plt.show()
def plot_contention(): ns = [8] #, 4, 8] js = list(range(1, 9)) #size=104857600/2*ns[0] size = 104857600 def _read_all_comms_nccl(ns): all_comms = [] for nworkers in ns: comms = [] for job_num in js: folder = 'logs/nccl_job_nw%d_n%d_s%d' % (nworkers, job_num, size) tmp_cs = [] for k in range(1, job_num + 1): fn = 'nccl_job_%d.log' % k logfile = os.path.join(folder, fn) _, c, _ = read_times_from_nccl_log(logfile, end=512 * 1024 * 1024, original=False) tmp_cs.append(c[0]) c = np.max(tmp_cs) comms.append(c) all_comms.append(comms) return all_comms def _read_all_comms_osu(ns): all_comms = [] dataframe = pd.read_csv('results.csv') data = dataframe.values.tolist() #print('dataframe: ', dataframe.values.tolist()) for nworkers in ns: comms = [] for line in data: node_num = line[0] job_num = line[1] time = line[3] / 1e6 if int(node_num) == nworkers: comms.append(time) all_comms.append(comms) return all_comms all_comms = _read_all_comms_osu(ns) # NCCL #alpha_betas = {2: (0.0005662789473684163, 8.564366792377673e-10), # 4: (0.0002603352299668238, 1.2949395937171236e-09), # #4: (0.0005662789473684163, 8.564366792377673e-10), # #8: (0.0005662789473684163, 8.564366792377673e-10), # 8: (0.0024653663101605328, 1.490005930477285e-09), #(0.0016574696969697406, 1.5062312146166814e-09) # } # OSU alpha_betas = { 2: (0.0002534641666666668, 1.0586445305937077e-09), 4: (0.00043822000000000045, 2.40787629158266e-09), 8: (0.0008454662500000003, 3.4648519946682828e-09), } fitted = [] for nworkers in ns: alpha, beta = alpha_betas.get(nworkers, alpha_betas[8]) comms = [] for j in js: comms.append(alpha + beta * j * size) fitted.append(comms) #ax.plot(js, fitted[0], label=r'$T=a+bkN$ ({} workers, a={:.2e}, b={:.2e})'.format(nworkers, alpha, beta), marker='o') ax.plot(js, fitted[0], label=r'$T=a+bkM$', markerfacecolor="none", marker='x') ax.set_title('%d nodes' % nworkers) for i, c in enumerate(all_comms): alpha, beta = alpha_betas.get(nworkers, alpha_betas[8]) #ax.plot(js, c, label='Measured (%d workers)'%ns[i], marker=markers[i]) ax.plot(js, c, label='Measured', marker='.') print('js: ', js) print('c: ', c) a, eta = _fit_linear_function(js, c) eta = (eta - (beta * size)) / size a = a + eta * size fitted_comms = [] for j in js: #fitted_comms.append(a-eta * size + (beta * size+eta*size)*j) fitted_comms.append(alpha + beta * size * j + (j - 1) * eta * size) #ax.plot(js, fitted_comms, label='T=a+bkN+eta*(k-1)*N ({} workers, a={:.2e}, b={:.2e}, eta={:.2e})'.format(nworkers, alpha, beta, eta), marker='o') ax.plot(js, fitted_comms, label=r'$\overline{T_{ar}}=a+bkM+\eta(k-1)M$', markerfacecolor='none', marker='o') #eta = (eta-8*beta*size-alpha)/(size*7) print('a: ', a) print('eta: ', eta) ax.set_xlabel('# of jobs') ax.set_ylabel('Communication time [s]') ax.set_xlim(left=0) ax.set_ylim(bottom=0) ax.grid(linestyle=':') u.update_fontsize(ax, FONTSIZE) ax.legend(fontsize=FONTSIZE - 2) plt.savefig('%s/comm_contention_%d.pdf' % (OUTPUTPATH, ns[0]), bbox_inches='tight') plt.show()