def main():
    import sys
    from ics_utils import get_stencil_num
    from utils import load_csv

    raw_data = load_csv(sys.argv[1])

    k_l = set()
    for k in raw_data:
        k_l.add(get_stencil_num(k))
    k_l = list(k_l)

    n_l = set()
    for k in raw_data:
        n_l.add(k['Global NX'])
    n_l = list(n_l)

    for k in k_l:
        for N in n_l:
            gen_res(raw_data, int(k), int(N))
def main():
  import sys
  from ics_utils import get_stencil_num
  from utils import load_csv

  raw_data = load_csv(sys.argv[1])

  k_l = set()
  for k in raw_data:
    k_l.add(get_stencil_num(k))
  k_l = list(k_l)

  n_l = set()
  for k in raw_data:
    n_l.add(k['Global NX'])
  n_l = list(n_l)


  for k in k_l:
    for N in n_l:
      gen_res(raw_data, int(k), int(N))
def gen_res(raw_data, stencil_kernel, N):
  from operator import itemgetter
  import matplotlib.pyplot as plt
  import pylab
  from csv import DictWriter
  from operator import itemgetter
  from ics_utils import models, get_bs, get_stencil_num, get_nd

  #fig_width = 8.588*0.393701 # inches
  fig_width = 5.5*0.393701 # inches
  fig_height = 0.68*fig_width #* 210.0/280.0#433.62/578.16

  fig_size =  [fig_width,fig_height]
  params = {
         'axes.labelsize': 5,
         'axes.linewidth': 0.5,
         'lines.linewidth': 1,
         'text.fontsize': 5,
         'legend.fontsize': 5,
         'xtick.labelsize': 5,
         'ytick.labelsize': 5,
         'lines.markersize': 5,
         'text.usetex': True,
         'figure.figsize': fig_size}
  pylab.rcParams.update(params)


  req_fields = [('Total cache block size (kiB)', int), ('MStencil/s  MAX', float), ('Time stepper orig name', str), ('Stencil Kernel semi-bandwidth', int), ('Stencil Kernel coefficients', str), ('Precision', str), ('Time unroll',int), ('Number of time steps',int), ('Number of tests',int), ('Local NX',int), ('Local NY',int), ('Local NZ',int), ('Total Memory Transfer', float), ('Thread group size' ,int), ('Intra-diamond prologue/epilogue MStencils',int), ('Multi-wavefront updates', int), ('Intra-diamond width', int)]
  data = []
  for k in raw_data:
    tup = dict()
    # add the general fileds
    for f in req_fields:
      try:
        tup[f[0]] = map(f[1], [k[f[0]]] )[0]
      except:
        print f[0]
    # add the stencil operator
    tup['Kernel'] = get_stencil_num(k)
    data.append(tup)

  #for i in data: print i

  WS = 8 # word size in bytes
  data2 = []
  for tup in data:
    tup['Actual Bytes/LUP'] = actual_BpU(tup)
    tup['Model'] = models(tup)
    # model error
    tup['Err %'] = 100 * (tup['Model'] - tup['Actual Bytes/LUP'])/tup['Actual Bytes/LUP']
    tup['D_width'] = tup['Intra-diamond width']
    tup['Performance'] = tup['MStencil/s  MAX']
    tup['Cache block'] = get_bs(Dw=tup['D_width'], Nd=get_nd(tup['Kernel']), Nf=(tup['Multi-wavefront updates']-1), Nx=tup['Local NX'], WS=WS)  
    data2.append(tup)
#    try: print "%6.3f  %6.3f  %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0)
#    except: pass

  #for i in data2: print i
  data2 = sorted(data2, key=itemgetter('Kernel', 'Local NX', 'D_width'))


  cs=[]
  cb=[]
  cb_meas=[]
  Dw=[]
  for k in data2:
    if k['Kernel']==stencil_kernel and (k['Thread group size']==10 or k['Thread group size']==0) and k['Local NX']==N:
      cs.append(k['Cache block'])
      cb.append(k['Model'])
      cb_meas.append(k['Actual Bytes/LUP'])
      Dw.append(k['D_width'])

  #for i in range(len(cs)):
  #  print Dw[i], cs[i], cb_meas[i], cb[i]

  if Dw==[]: return

  fig, ax = plt.subplots()
  ax.plot(cs, cb     , marker='^', linestyle='-', color='k', label="Model")
  ax.plot(cs, cb_meas, marker='x', linestyle='--', color='b', label="Measured")

  # show the usable cache size limits
  ax.plot([12.5, 12.5], [0, 0.7*cb[0]], linestyle='-', color='r', label="Usable cache size")

  ax.set_ylabel('Code balance (Bytes/LUP)')
  ax.set_xlabel('Cache block size (MiB)')
  ax.set_ylim([0, max(cb_meas+cb)+1])
  ax.set_xlim([0, max(cs)+0.5])
  ax2 = ax.twiny()
  ax2.set_xticks(cs)
  ax2.set_xlabel('Diamond width')
  ax2.set_xlim(ax.get_xlim())

  if stencil_kernel==1:
    Dw = map(str,Dw)
    Dw[1]=''
    Dw[3]=''
    Dw[5]=''
  ax2.set_xticklabels(Dw)

#  for i, d in enumerate(Dw):
    #if ((d+4)%8 == 0):
#    ax.annotate(d, (cs[i], cb[i]))  

  title = '_code_balance_vs_cache_size_N'+str(N)
  if stencil_kernel == 0:
      title = '25_pt_const' + title
  elif stencil_kernel == 1:
      title = '7_pt_const' + title
  elif stencil_kernel == 4:
      title = '25_pt_var' + title
  elif stencil_kernel == 5:
      title = '7_pt_var' + title

  ax.legend(loc='best')
  ax.grid()
  pylab.savefig(title+'.pdf', format='pdf', bbox_inches="tight", pad_inches=0)
  plt.clf()
def gen_res(raw_data, stencil_kernel, N):
    from operator import itemgetter
    import matplotlib.pyplot as plt
    import pylab
    from csv import DictWriter
    from operator import itemgetter
    from ics_utils import models, get_bs, get_stencil_num, get_nd

    #fig_width = 8.588*0.393701 # inches
    fig_width = 5.5 * 0.393701  # inches
    fig_height = 0.68 * fig_width  #* 210.0/280.0#433.62/578.16

    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 5,
        'axes.linewidth': 0.5,
        'lines.linewidth': 1,
        'text.fontsize': 5,
        'legend.fontsize': 5,
        'xtick.labelsize': 5,
        'ytick.labelsize': 5,
        'lines.markersize': 5,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    pylab.rcParams.update(params)

    req_fields = [('Total cache block size (kiB)', int),
                  ('MStencil/s  MAX', float), ('Time stepper orig name', str),
                  ('Stencil Kernel semi-bandwidth', int),
                  ('Stencil Kernel coefficients', str), ('Precision', str),
                  ('Time unroll', int), ('Number of time steps', int),
                  ('Number of tests', int), ('Local NX', int),
                  ('Local NY', int), ('Local NZ', int),
                  ('Total Memory Transfer', float), ('Thread group size', int),
                  ('Intra-diamond prologue/epilogue MStencils', int),
                  ('Multi-wavefront updates', int),
                  ('Intra-diamond width', int)]
    data = []
    for k in raw_data:
        tup = dict()
        # add the general fileds
        for f in req_fields:
            try:
                tup[f[0]] = map(f[1], [k[f[0]]])[0]
            except:
                print f[0]
        # add the stencil operator
        tup['Kernel'] = get_stencil_num(k)
        data.append(tup)

    #for i in data: print i

    WS = 8  # word size in bytes
    data2 = []
    for tup in data:
        tup['Actual Bytes/LUP'] = actual_BpU(tup)
        tup['Model'] = models(tup)
        # model error
        tup['Err %'] = 100 * (
            tup['Model'] - tup['Actual Bytes/LUP']) / tup['Actual Bytes/LUP']
        tup['D_width'] = tup['Intra-diamond width']
        tup['Performance'] = tup['MStencil/s  MAX']
        tup['Cache block'] = get_bs(Dw=tup['D_width'],
                                    Nd=get_nd(tup['Kernel']),
                                    Nf=(tup['Multi-wavefront updates'] - 1),
                                    Nx=tup['Local NX'],
                                    WS=WS)
        data2.append(tup)


#    try: print "%6.3f  %6.3f  %6.3f" % (tup['Cache block'], tup['Total cache block size (kiB)']/1024.0,tup['Cache block']- tup['Total cache block size (kiB)']/1024.0)
#    except: pass

#for i in data2: print i
    data2 = sorted(data2, key=itemgetter('Kernel', 'Local NX', 'D_width'))

    cs = []
    cb = []
    cb_meas = []
    Dw = []
    for k in data2:
        if k['Kernel'] == stencil_kernel and (k['Thread group size'] == 10
                                              or k['Thread group size']
                                              == 0) and k['Local NX'] == N:
            cs.append(k['Cache block'])
            cb.append(k['Model'])
            cb_meas.append(k['Actual Bytes/LUP'])
            Dw.append(k['D_width'])

    #for i in range(len(cs)):
    #  print Dw[i], cs[i], cb_meas[i], cb[i]

    if Dw == []: return

    fig, ax = plt.subplots()
    ax.plot(cs, cb, marker='^', linestyle='-', color='k', label="Model")
    ax.plot(cs,
            cb_meas,
            marker='x',
            linestyle='--',
            color='b',
            label="Measured")

    # show the usable cache size limits
    ax.plot([12.5, 12.5], [0, 0.7 * cb[0]],
            linestyle='-',
            color='r',
            label="Usable cache size")

    ax.set_ylabel('Code balance (Bytes/LUP)')
    ax.set_xlabel('Cache block size (MiB)')
    ax.set_ylim([0, max(cb_meas + cb) + 1])
    ax.set_xlim([0, max(cs) + 0.5])
    ax2 = ax.twiny()
    ax2.set_xticks(cs)
    ax2.set_xlabel('Diamond width')
    ax2.set_xlim(ax.get_xlim())

    if stencil_kernel == 1:
        Dw = map(str, Dw)
        Dw[1] = ''
        Dw[3] = ''
        Dw[5] = ''
    ax2.set_xticklabels(Dw)

    #  for i, d in enumerate(Dw):
    #if ((d+4)%8 == 0):
    #    ax.annotate(d, (cs[i], cb[i]))

    title = '_code_balance_vs_cache_size_N' + str(N)
    if stencil_kernel == 0:
        title = '25_pt_const' + title
    elif stencil_kernel == 1:
        title = '7_pt_const' + title
    elif stencil_kernel == 4:
        title = '25_pt_var' + title
    elif stencil_kernel == 5:
        title = '7_pt_var' + title

    ax.legend(loc='best')
    ax.grid()
    pylab.savefig(title + '.pdf',
                  format='pdf',
                  bbox_inches="tight",
                  pad_inches=0)
    plt.clf()
def create_table(raw_data, rows, stencil):

  from csv import DictWriter
  from utils import select_fields, load_csv
  from ics_utils import models, get_stencil_num
  from operator import itemgetter


  cols_format = [('Time stepper orig name', str), ('Stencil Kernel coefficients', str), ('Thread group size', int), ('Stencil Kernel semi-bandwidth', int), ('OpenMP Threads', int), ('Energy', float), ('Energy DRAM', float), ('Power', float), ('Power DRAM', float), ('MStencil/s  MAX', float), ('Global NX', int), ('Local NY', int), ('Global NY', int), ('Global NZ', int), ('Number of time steps', int), ('Number of tests', int), ('Intra-diamond prologue/epilogue MStencils', int), ('Total cache block size (kiB)', int), ('Block size in X', int), ('Precision', str), ('Time unroll',int), ('Intra-diamond width', int), ('Multi-wavefront updates', int), ('Total Memory Transfer', float), ('Sustained Memory BW', float)]

  cols = [f[0] for f in cols_format]

  data  =  select_fields(raw_data, rows, cols)

  for k in data:
    if k['Block size in X'] == '':
      k['Block size in X'] = '100000'
    for val, fmt in cols_format:
      try:
        if k[val] != '':
          k[val] = map(fmt, [k[val]])[0]
      except:
        print val, k[val]


  # merge the memory trasnfer fields with the corresponding entries
  key = itemgetter('Time stepper orig name', 'Stencil Kernel coefficients', 'Stencil Kernel semi-bandwidth', 'Thread group size')

  data2 = []
  for k in data:
    if k['Total Memory Transfer'] == '':
      for k2 in data:
        if k2['Total Memory Transfer'] != '':
         if k!=k2:
            if key(k) == key(k2):
              k['Total Memory Transfer'] = k2['Total Memory Transfer']
              k['Sustained Memory BW'] = k2['Sustained Memory BW']
              k['mem Number of time steps'] = k2['Number of time steps']
              k['mem Number of tests'] = k2['Number of tests']
              data2.append(k)
  data = data2 
#  for k in data: print key(k), k['Total Memory Transfer'], k['Sustained Memory BW']

  # compute derived values
  for k in data:

    nx= k['Global NX']
    ny= k['Global NY']
    nz= k['Global NZ']
    stencil_size = 2*ny*nz + ny*nz*(nx+2*k['Stencil Kernel semi-bandwidth'])

    lups = k['Number of tests'] * (stencil_size*k['Number of time steps'] - k['Intra-diamond prologue/epilogue MStencils']*1e6)

    k['pJ/LUP CPU'] = k['Energy']/lups*1e9
    k['pJ/LUP DRAM'] = k['Energy DRAM']/lups*1e9
    k['pJ/LUP Total'] = k['pJ/LUP CPU'] + k['pJ/LUP DRAM']
    k['Power CPU'] = k['Power']
    k['Power Total'] = k['Power CPU'] + k['Power DRAM']

    mlups = k['mem Number of tests'] * (stencil_size*k['mem Number of time steps'] - k['Intra-diamond prologue/epilogue MStencils']*1e6)
    k['Measured Bytes/LUP'] = k['Total Memory Transfer']*1e9/mlups

    k['Threads'] = k['OpenMP Threads']
    k['MLUP/s'] = k['MStencil/s  MAX']

    k['Thread group size'] = k['Thread group size'] 
    tgs = k['Thread group size'] 
    if tgs == 0:
      k['Method'] = 'Spt. Blk.'
    else:
      k['Method'] = str(k['Thread group size'])+'WD'

    k['Kernel'] = get_stencil_num(k)
    k['Model Bytes/LUP'] = models(k)

    k['Dw-Nf'] = str(k['Intra-diamond width'])+'_'+str(k['Multi-wavefront updates'])

    k['Cache blk. [MiB]'] = k['Total cache block size (kiB)']/1024
    if k['Block size in X'] < k['Global NX']:
      k['Cache blk. [MiB]'] = k['Cache blk. [MiB]'] * k['Global NX'] / k['Block size in X']

  data = sorted(data, key=itemgetter('Thread group size'))

  fields = ['Method', 'Threads','MLUP/s', 'Cache blk. [MiB]', 'Sustained Memory BW', 'Dw-Nf', 'Model Bytes/LUP', 'Measured Bytes/LUP', 'Power CPU', 'Power DRAM', 'Power Total', 'pJ/LUP CPU', 'pJ/LUP DRAM', 'pJ/LUP Total']
  with open(stencil+'_threadscaling_table.csv', 'w') as output_file:
    r = DictWriter(output_file, fieldnames=fields)
    r.writeheader()
    for k in data:
      k2 = dict()
      for f in k.keys():
        for f2 in fields:
          if f == f2:
            k2[f] = k[f]
      r.writerow(k2)