Exemplo n.º 1
0
def main():
    import sys
    from utils import load_csv, get_stencil_num

    raw_data = load_csv(sys.argv[1])

    k_l = set()
    for k in raw_data:
        k_l.add((get_stencil_num(k), k['Global NX']))
    k_l = list(k_l)

    bsz_l = set()
    for k in raw_data:
        if k['Multi-wavefront updates'] == '0': continue
        bsz_l.add(k['Multi-wavefront updates'])
    bsz_l = sorted(list(bsz_l))

    for k, N in k_l:
        for bsz in bsz_l:
            gen_res(raw_data, int(k), int(bsz), int(N))
def main():
  import sys
  from utils import load_csv, get_stencil_num

  raw_data = load_csv(sys.argv[1])

  k_l = set()
  for k in raw_data:
    k_l.add((get_stencil_num(k), k['Global NX']))
  k_l = list(k_l)

  bsz_l = set()
  for k in raw_data:
    if k['Multi-wavefront updates']=='0': continue
    bsz_l.add(k['Multi-wavefront updates'])
  bsz_l = sorted(list(bsz_l))

  for k, N in k_l:
    for bsz in bsz_l:
      gen_res(raw_data, int(k), int(bsz), int(N))
Exemplo n.º 3
0
def gen_res(raw_data, stencil_kernel, bsz, N):
    from operator import itemgetter
    import matplotlib.pyplot as plt
    import pylab
    from csv import DictWriter
    from operator import itemgetter
    from utils import get_stencil_num

    #fig_width = 8.588*0.393701 # inches
    fig_width = 4.0 * 0.393701  # inches
    fig_height = 0.60 * fig_width  #* 210.0/280.0#433.62/578.16

    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 6,
        'axes.linewidth': 0.5,
        'lines.linewidth': 1,
        'font.size': 5,
        'legend.fontsize': 6,
        'xtick.labelsize': 6,
        'ytick.labelsize': 6,
        'lines.markersize': 5,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    pylab.rcParams.update(params)

    req_fields = [('Total cache block size (kiB)', int),
                  ('MStencil/s  MAX', float), ('Time stepper orig name', str),
                  ('Stencil Kernel semi-bandwidth', int),
                  ('Stencil Kernel coefficients', str), ('Precision', str),
                  ('Time unroll', int), ('Number of time steps', int),
                  ('Number of tests', int), ('Local NX', int),
                  ('Local NY', int), ('Local NZ', int),
                  ('Total Memory Transfer', float), ('Thread group size', int),
                  ('Intra-diamond prologue/epilogue MStencils', int),
                  ('Multi-wavefront updates', int),
                  ('Intra-diamond width', int)]
    data = []
    for k in raw_data:

        # Use single field to represent the performance
        if 'Total RANK0 MStencil/s MAX' in k.keys():
            if (k['Total RANK0 MStencil/s MAX'] != ''):
                k['MStencil/s  MAX'] = k['MWD main-loop RANK0 MStencil/s MAX']
        # temporary for deprecated format
        if 'RANK0 MStencil/s  MAX' in k.keys():
            if k['RANK0 MStencil/s  MAX'] != '':
                k['MStencil/s  MAX'] = k['RANK0 MStencil/s  MAX']

        tup = dict()
        # add the general fileds
        for f in req_fields:
            try:
                tup[f[0]] = map(f[1], [k[f[0]]])[0]
            except:
                print f[0]
        # add the stencil operator
        tup['Kernel'] = get_stencil_num(k)
        data.append(tup)

        # add precision information
        tup['word size'] = 8 if k['Precision'] in 'DP' else 4

    #for i in data: print i

    for tup in data:
        tup['Actual Bytes/LUP'] = actual_BpU(tup)
        tup['Model'] = models(tup)
        # model error
        tup['Err %'] = 100 * (
            tup['Model'] - tup['Actual Bytes/LUP']) / tup['Actual Bytes/LUP']
        tup['D_width'] = tup['Intra-diamond width']
        tup['bsz'] = tup['Multi-wavefront updates']
        tup['Performance'] = tup['MStencil/s  MAX']
        #    tup['Cache block'] = get_bs(Dw=tup['D_width'], Nd=get_nd(tup['Kernel']), Nf=tup['Multi-wavefront updates'], Nx=tup['Local NX'], WS=tup['word size'], R=tup['Stencil Kernel semi-bandwidth'])
        tup['Cache block'] = tup['Total cache block size (kiB)'] / 1024.0


#    print "cache block C:", tup['Total cache block size (kiB)']/1024.0, " Python:", tup['Cache block']
#    try: print "%6.3f  %6.3f  %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0)
#    except: pass

    data = sorted(data, key=itemgetter('Kernel', 'Local NX', 'D_width'))
    #for i in data: print i['Kernel'], i['Local NX'], i['D_width'], i['bsz']

    fig, ax = plt.subplots()
    cs = []
    cb = []
    cb_meas = []
    Dw = []
    for k in data:
        if k['Kernel'] == stencil_kernel and k['Local NX'] == N and (
                k['bsz'] == bsz
                or k['Time stepper orig name'] == 'Spatial Blocking'):
            cs.append(k['Cache block'])
            cb.append(k['Model'])
            cb_meas.append(k['Actual Bytes/LUP'])
            Dw.append(k['D_width'])

    #for i in range(len(cs)):
    #  print Dw[i], cs[i], cb_meas[i], cb[i]
    if Dw == []: return
    ax.plot(cs, cb, marker='+', linestyle='-', color='k', label="Model")
    ax.plot(cs,
            cb_meas,
            marker='x',
            linestyle='--',
            color='b',
            label="Measured")

    # show the usable cache size limits
    #  ax.plot([22.5, 22.5], [0, 0.7*cb[0]], linestyle='-', color='r', label="Usable cache size")

    if (stencil_kernel == 1):
        ax.set_ylabel('Code balance (Bytes/LUP)')
    ax.set_xlabel('Block size (MiB) PER THREAD')
    ax.set_ylim([0, max(cb_meas + cb) + 1])
    ax.set_xlim([0, max(cs) + 0.5])
    ax2 = ax.twiny()
    ax2.set_xticks(cs)
    ax2.set_xlabel('Diamond width')
    ax2.set_xlim(ax.get_xlim())

    if stencil_kernel == 1:
        Dw = map(str, Dw)
        Dw[0] = ''
        Dw[1] = ''
        Dw[3] = ''
        Dw[4] = ''
        Dw[5] = ''
    if stencil_kernel == 5:
        Dw = map(str, Dw)
        Dw[1] = ''

    ax2.set_xticklabels(Dw)

    #  for i, d in enumerate(Dw):
    #if ((d+4)%8 == 0):
    #    ax.annotate(d, (cs[i], cb[i]))

    title = '_code_balance_vs_cache_size_N%d_bsz%d' % (N, bsz)
    if stencil_kernel == 0:
        title = '25_pt_const' + title
    elif stencil_kernel == 1:
        title = '7_pt_const' + title
    elif stencil_kernel == 4:
        title = '25_pt_var' + title
    elif stencil_kernel == 5:
        title = '7_pt_var' + title
    elif stencil_kernel == 6:
        title = 'solar' + title

    if (bsz == 1):
        if (stencil_kernel == 1):
            ax.legend(loc='best')
    ax.grid()
    pylab.savefig(title + '.pdf',
                  format='pdf',
                  bbox_inches="tight",
                  pad_inches=0)
    plt.clf()
def gen_res(raw_data, stencil_kernel, bsz,  N):
  from operator import itemgetter
  import matplotlib.pyplot as plt
  import pylab
  from csv import DictWriter
  from operator import itemgetter
  from utils import get_stencil_num

  #fig_width = 8.588*0.393701 # inches
  fig_width = 4.0*0.393701 # inches
  fig_height = 0.60*fig_width #* 210.0/280.0#433.62/578.16

  fig_size =  [fig_width,fig_height]
  params = {
         'axes.labelsize': 6,
         'axes.linewidth': 0.5,
         'lines.linewidth': 1,
         'font.size': 5,
         'legend.fontsize': 6,
         'xtick.labelsize': 6,
         'ytick.labelsize': 6,
         'lines.markersize': 5,
         'text.usetex': True,
         'figure.figsize': fig_size}
  pylab.rcParams.update(params)


  req_fields = [('Total cache block size (kiB)', int), ('MStencil/s  MAX', float), ('Time stepper orig name', str), ('Stencil Kernel semi-bandwidth', int), ('Stencil Kernel coefficients', str), ('Precision', str), ('Time unroll',int), ('Number of time steps',int), ('Number of tests',int), ('Local NX',int), ('Local NY',int), ('Local NZ',int), ('Total Memory Transfer', float), ('Thread group size' ,int), ('Intra-diamond prologue/epilogue MStencils',int), ('Multi-wavefront updates', int), ('Intra-diamond width', int)]
  data = []
  for k in raw_data:

    # Use single field to represent the performance
    if 'Total RANK0 MStencil/s MAX' in k.keys():
      if(k['Total RANK0 MStencil/s MAX']!=''):
        k['MStencil/s  MAX'] = k['MWD main-loop RANK0 MStencil/s MAX']
    # temporary for deprecated format
    if 'RANK0 MStencil/s  MAX' in k.keys():
      if k['RANK0 MStencil/s  MAX']!='':
        k['MStencil/s  MAX'] = k['RANK0 MStencil/s  MAX']

    tup = dict()
    # add the general fileds
    for f in req_fields:
      try:
        tup[f[0]] = map(f[1], [k[f[0]]] )[0]
      except:
        print f[0]
    # add the stencil operator
    tup['Kernel'] = get_stencil_num(k)
    data.append(tup)

    # add precision information
    tup['word size'] = 8 if k['Precision'] in 'DP' else 4

  #for i in data: print i

  for tup in data:
    tup['Actual Bytes/LUP'] = actual_BpU(tup)
    tup['Model'] = models(tup)
    # model error
    tup['Err %'] = 100 * (tup['Model'] - tup['Actual Bytes/LUP'])/tup['Actual Bytes/LUP']
    tup['D_width'] = tup['Intra-diamond width']
    tup['bsz'] = tup['Multi-wavefront updates']
    tup['Performance'] = tup['MStencil/s  MAX']
#    tup['Cache block'] = get_bs(Dw=tup['D_width'], Nd=get_nd(tup['Kernel']), Nf=tup['Multi-wavefront updates'], Nx=tup['Local NX'], WS=tup['word size'], R=tup['Stencil Kernel semi-bandwidth'])
    tup['Cache block'] =  tup['Total cache block size (kiB)']/1024.0
#    print "cache block C:", tup['Total cache block size (kiB)']/1024.0, " Python:", tup['Cache block']
#    try: print "%6.3f  %6.3f  %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0)
#    except: pass

  data = sorted(data, key=itemgetter('Kernel', 'Local NX', 'D_width'))
  #for i in data: print i['Kernel'], i['Local NX'], i['D_width'], i['bsz']

  fig, ax = plt.subplots()
  cs=[]
  cb=[]
  cb_meas=[]
  Dw=[]
  for k in data:
    if k['Kernel']==stencil_kernel and k['Local NX']==N and (k['bsz']==bsz or k['Time stepper orig name'] == 'Spatial Blocking'):
      cs.append(k['Cache block'])
      cb.append(k['Model'])
      cb_meas.append(k['Actual Bytes/LUP'])
      Dw.append(k['D_width'])

  #for i in range(len(cs)):
  #  print Dw[i], cs[i], cb_meas[i], cb[i]
  if Dw==[]: return
  ax.plot(cs, cb     , marker='+', linestyle='-', color='k', label="Model")
  ax.plot(cs, cb_meas, marker='x', linestyle='--', color='b', label="Measured")


  # show the usable cache size limits
#  ax.plot([22.5, 22.5], [0, 0.7*cb[0]], linestyle='-', color='r', label="Usable cache size")

  if(stencil_kernel == 1):
    ax.set_ylabel('Code balance (Bytes/LUP)')
  ax.set_xlabel('Block size (MiB) PER THREAD')
  ax.set_ylim([0, max(cb_meas+cb)+1])
  ax.set_xlim([0, max(cs)+0.5])
  ax2 = ax.twiny()
  ax2.set_xticks(cs)
  ax2.set_xlabel('Diamond width')
  ax2.set_xlim(ax.get_xlim())

  if stencil_kernel==1:
    Dw = map(str,Dw)
    Dw[0]=''
    Dw[1]=''
    Dw[3]=''
    Dw[4]=''
    Dw[5]=''
  if stencil_kernel==5:
    Dw = map(str,Dw)
    Dw[1]=''
 
  ax2.set_xticklabels(Dw)

#  for i, d in enumerate(Dw):
    #if ((d+4)%8 == 0):
#    ax.annotate(d, (cs[i], cb[i]))  

  title = '_code_balance_vs_cache_size_N%d_bsz%d'%(N, bsz)
  if stencil_kernel == 0:
      title = '25_pt_const' + title
  elif stencil_kernel == 1:
      title = '7_pt_const' + title
  elif stencil_kernel == 4:
      title = '25_pt_var' + title
  elif stencil_kernel == 5:
      title = '7_pt_var' + title
  elif stencil_kernel == 6:
      title = 'solar' + title


  if(bsz==1):
    if(stencil_kernel == 1):
      ax.legend(loc='best')
  ax.grid()
  pylab.savefig(title+'.pdf', format='pdf', bbox_inches="tight", pad_inches=0)
  plt.clf()