def gen_res(raw_data, stencil_kernel, N): from operator import itemgetter import matplotlib.pyplot as plt import pylab from csv import DictWriter from operator import itemgetter from ics_utils import models, get_bs, get_stencil_num, get_nd #fig_width = 8.588*0.393701 # inches fig_width = 5.5 * 0.393701 # inches fig_height = 0.68 * fig_width #* 210.0/280.0#433.62/578.16 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 5, 'axes.linewidth': 0.5, 'lines.linewidth': 1, 'text.fontsize': 5, 'legend.fontsize': 5, 'xtick.labelsize': 5, 'ytick.labelsize': 5, 'lines.markersize': 5, 'text.usetex': True, 'figure.figsize': fig_size } pylab.rcParams.update(params) req_fields = [('Total cache block size (kiB)', int), ('MStencil/s MAX', float), ('Time stepper orig name', str), ('Stencil Kernel semi-bandwidth', int), ('Stencil Kernel coefficients', str), ('Precision', str), ('Time unroll', int), ('Number of time steps', int), ('Number of tests', int), ('Local NX', int), ('Local NY', int), ('Local NZ', int), ('Total Memory Transfer', float), ('Thread group size', int), ('Intra-diamond prologue/epilogue MStencils', int), ('Multi-wavefront updates', int), ('Intra-diamond width', int)] data = [] for k in raw_data: tup = dict() # add the general fileds for f in req_fields: try: tup[f[0]] = map(f[1], [k[f[0]]])[0] except: print f[0] # add the stencil operator tup['Kernel'] = get_stencil_num(k) data.append(tup) #for i in data: print i WS = 8 # word size in bytes data2 = [] for tup in data: tup['Actual Bytes/LUP'] = actual_BpU(tup) tup['Model'] = models(tup) # model error tup['Err %'] = 100 * ( tup['Model'] - tup['Actual Bytes/LUP']) / tup['Actual Bytes/LUP'] tup['D_width'] = tup['Intra-diamond width'] tup['Performance'] = tup['MStencil/s MAX'] tup['Cache block'] = get_bs(Dw=tup['D_width'], Nd=get_nd(tup['Kernel']), Nf=(tup['Multi-wavefront updates'] - 1), Nx=tup['Local NX'], WS=WS) data2.append(tup) # try: print "%6.3f %6.3f %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0) # except: pass #for i in data2: print i data2 = sorted(data2, key=itemgetter('Kernel', 'Local NX', 'D_width')) cs = [] cb = [] cb_meas = [] Dw = [] for k in data2: if k['Kernel'] == stencil_kernel and (k['Thread group size'] == 10 or k['Thread group size'] == 0) and k['Local NX'] == N: cs.append(k['Cache block']) cb.append(k['Model']) cb_meas.append(k['Actual Bytes/LUP']) Dw.append(k['D_width']) #for i in range(len(cs)): # print Dw[i], cs[i], cb_meas[i], cb[i] if Dw == []: return fig, ax = plt.subplots() ax.plot(cs, cb, marker='^', linestyle='-', color='k', label="Model") ax.plot(cs, cb_meas, marker='x', linestyle='--', color='b', label="Measured") # show the usable cache size limits ax.plot([12.5, 12.5], [0, 0.7 * cb[0]], linestyle='-', color='r', label="Usable cache size") ax.set_ylabel('Code balance (Bytes/LUP)') ax.set_xlabel('Cache block size (MiB)') ax.set_ylim([0, max(cb_meas + cb) + 1]) ax.set_xlim([0, max(cs) + 0.5]) ax2 = ax.twiny() ax2.set_xticks(cs) ax2.set_xlabel('Diamond width') ax2.set_xlim(ax.get_xlim()) if stencil_kernel == 1: Dw = map(str, Dw) Dw[1] = '' Dw[3] = '' Dw[5] = '' ax2.set_xticklabels(Dw) # for i, d in enumerate(Dw): #if ((d+4)%8 == 0): # ax.annotate(d, (cs[i], cb[i])) title = '_code_balance_vs_cache_size_N' + str(N) if stencil_kernel == 0: title = '25_pt_const' + title elif stencil_kernel == 1: title = '7_pt_const' + title elif stencil_kernel == 4: title = '25_pt_var' + title elif stencil_kernel == 5: title = '7_pt_var' + title ax.legend(loc='best') ax.grid() pylab.savefig(title + '.pdf', format='pdf', bbox_inches="tight", pad_inches=0) plt.clf()
def gen_res(raw_data, stencil_kernel, N): from operator import itemgetter import matplotlib.pyplot as plt import pylab from csv import DictWriter from operator import itemgetter from ics_utils import models, get_bs, get_stencil_num, get_nd #fig_width = 8.588*0.393701 # inches fig_width = 5.5*0.393701 # inches fig_height = 0.68*fig_width #* 210.0/280.0#433.62/578.16 fig_size = [fig_width,fig_height] params = { 'axes.labelsize': 5, 'axes.linewidth': 0.5, 'lines.linewidth': 1, 'text.fontsize': 5, 'legend.fontsize': 5, 'xtick.labelsize': 5, 'ytick.labelsize': 5, 'lines.markersize': 5, 'text.usetex': True, 'figure.figsize': fig_size} pylab.rcParams.update(params) req_fields = [('Total cache block size (kiB)', int), ('MStencil/s MAX', float), ('Time stepper orig name', str), ('Stencil Kernel semi-bandwidth', int), ('Stencil Kernel coefficients', str), ('Precision', str), ('Time unroll',int), ('Number of time steps',int), ('Number of tests',int), ('Local NX',int), ('Local NY',int), ('Local NZ',int), ('Total Memory Transfer', float), ('Thread group size' ,int), ('Intra-diamond prologue/epilogue MStencils',int), ('Multi-wavefront updates', int), ('Intra-diamond width', int)] data = [] for k in raw_data: tup = dict() # add the general fileds for f in req_fields: try: tup[f[0]] = map(f[1], [k[f[0]]] )[0] except: print f[0] # add the stencil operator tup['Kernel'] = get_stencil_num(k) data.append(tup) #for i in data: print i WS = 8 # word size in bytes data2 = [] for tup in data: tup['Actual Bytes/LUP'] = actual_BpU(tup) tup['Model'] = models(tup) # model error tup['Err %'] = 100 * (tup['Model'] - tup['Actual Bytes/LUP'])/tup['Actual Bytes/LUP'] tup['D_width'] = tup['Intra-diamond width'] tup['Performance'] = tup['MStencil/s MAX'] tup['Cache block'] = get_bs(Dw=tup['D_width'], Nd=get_nd(tup['Kernel']), Nf=(tup['Multi-wavefront updates']-1), Nx=tup['Local NX'], WS=WS) data2.append(tup) # try: print "%6.3f %6.3f %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0) # except: pass #for i in data2: print i data2 = sorted(data2, key=itemgetter('Kernel', 'Local NX', 'D_width')) cs=[] cb=[] cb_meas=[] Dw=[] for k in data2: if k['Kernel']==stencil_kernel and (k['Thread group size']==10 or k['Thread group size']==0) and k['Local NX']==N: cs.append(k['Cache block']) cb.append(k['Model']) cb_meas.append(k['Actual Bytes/LUP']) Dw.append(k['D_width']) #for i in range(len(cs)): # print Dw[i], cs[i], cb_meas[i], cb[i] if Dw==[]: return fig, ax = plt.subplots() ax.plot(cs, cb , marker='^', linestyle='-', color='k', label="Model") ax.plot(cs, cb_meas, marker='x', linestyle='--', color='b', label="Measured") # show the usable cache size limits ax.plot([12.5, 12.5], [0, 0.7*cb[0]], linestyle='-', color='r', label="Usable cache size") ax.set_ylabel('Code balance (Bytes/LUP)') ax.set_xlabel('Cache block size (MiB)') ax.set_ylim([0, max(cb_meas+cb)+1]) ax.set_xlim([0, max(cs)+0.5]) ax2 = ax.twiny() ax2.set_xticks(cs) ax2.set_xlabel('Diamond width') ax2.set_xlim(ax.get_xlim()) if stencil_kernel==1: Dw = map(str,Dw) Dw[1]='' Dw[3]='' Dw[5]='' ax2.set_xticklabels(Dw) # for i, d in enumerate(Dw): #if ((d+4)%8 == 0): # ax.annotate(d, (cs[i], cb[i])) title = '_code_balance_vs_cache_size_N'+str(N) if stencil_kernel == 0: title = '25_pt_const' + title elif stencil_kernel == 1: title = '7_pt_const' + title elif stencil_kernel == 4: title = '25_pt_var' + title elif stencil_kernel == 5: title = '7_pt_var' + title ax.legend(loc='best') ax.grid() pylab.savefig(title+'.pdf', format='pdf', bbox_inches="tight", pad_inches=0) plt.clf()
def create_table(raw_data, rows, stencil): from csv import DictWriter from utils import select_fields, load_csv from ics_utils import models, get_stencil_num from operator import itemgetter cols_format = [('Time stepper orig name', str), ('Stencil Kernel coefficients', str), ('Thread group size', int), ('Stencil Kernel semi-bandwidth', int), ('OpenMP Threads', int), ('Energy', float), ('Energy DRAM', float), ('Power', float), ('Power DRAM', float), ('MStencil/s MAX', float), ('Global NX', int), ('Local NY', int), ('Global NY', int), ('Global NZ', int), ('Number of time steps', int), ('Number of tests', int), ('Intra-diamond prologue/epilogue MStencils', int), ('Total cache block size (kiB)', int), ('Block size in X', int), ('Precision', str), ('Time unroll',int), ('Intra-diamond width', int), ('Multi-wavefront updates', int), ('Total Memory Transfer', float), ('Sustained Memory BW', float)] cols = [f[0] for f in cols_format] data = select_fields(raw_data, rows, cols) for k in data: if k['Block size in X'] == '': k['Block size in X'] = '100000' for val, fmt in cols_format: try: if k[val] != '': k[val] = map(fmt, [k[val]])[0] except: print val, k[val] # merge the memory trasnfer fields with the corresponding entries key = itemgetter('Time stepper orig name', 'Stencil Kernel coefficients', 'Stencil Kernel semi-bandwidth', 'Thread group size') data2 = [] for k in data: if k['Total Memory Transfer'] == '': for k2 in data: if k2['Total Memory Transfer'] != '': if k!=k2: if key(k) == key(k2): k['Total Memory Transfer'] = k2['Total Memory Transfer'] k['Sustained Memory BW'] = k2['Sustained Memory BW'] k['mem Number of time steps'] = k2['Number of time steps'] k['mem Number of tests'] = k2['Number of tests'] data2.append(k) data = data2 # for k in data: print key(k), k['Total Memory Transfer'], k['Sustained Memory BW'] # compute derived values for k in data: nx= k['Global NX'] ny= k['Global NY'] nz= k['Global NZ'] stencil_size = 2*ny*nz + ny*nz*(nx+2*k['Stencil Kernel semi-bandwidth']) lups = k['Number of tests'] * (stencil_size*k['Number of time steps'] - k['Intra-diamond prologue/epilogue MStencils']*1e6) k['pJ/LUP CPU'] = k['Energy']/lups*1e9 k['pJ/LUP DRAM'] = k['Energy DRAM']/lups*1e9 k['pJ/LUP Total'] = k['pJ/LUP CPU'] + k['pJ/LUP DRAM'] k['Power CPU'] = k['Power'] k['Power Total'] = k['Power CPU'] + k['Power DRAM'] mlups = k['mem Number of tests'] * (stencil_size*k['mem Number of time steps'] - k['Intra-diamond prologue/epilogue MStencils']*1e6) k['Measured Bytes/LUP'] = k['Total Memory Transfer']*1e9/mlups k['Threads'] = k['OpenMP Threads'] k['MLUP/s'] = k['MStencil/s MAX'] k['Thread group size'] = k['Thread group size'] tgs = k['Thread group size'] if tgs == 0: k['Method'] = 'Spt. Blk.' else: k['Method'] = str(k['Thread group size'])+'WD' k['Kernel'] = get_stencil_num(k) k['Model Bytes/LUP'] = models(k) k['Dw-Nf'] = str(k['Intra-diamond width'])+'_'+str(k['Multi-wavefront updates']) k['Cache blk. [MiB]'] = k['Total cache block size (kiB)']/1024 if k['Block size in X'] < k['Global NX']: k['Cache blk. [MiB]'] = k['Cache blk. [MiB]'] * k['Global NX'] / k['Block size in X'] data = sorted(data, key=itemgetter('Thread group size')) fields = ['Method', 'Threads','MLUP/s', 'Cache blk. [MiB]', 'Sustained Memory BW', 'Dw-Nf', 'Model Bytes/LUP', 'Measured Bytes/LUP', 'Power CPU', 'Power DRAM', 'Power Total', 'pJ/LUP CPU', 'pJ/LUP DRAM', 'pJ/LUP Total'] with open(stencil+'_threadscaling_table.csv', 'w') as output_file: r = DictWriter(output_file, fieldnames=fields) r.writeheader() for k in data: k2 = dict() for f in k.keys(): for f2 in fields: if f == f2: k2[f] = k[f] r.writerow(k2)