def import_user_defined_variables(variables): # attempt to open the user defined variables definition file try: file = open(os.path.join(userSettingPath, 'variables.txt'), 'r') except: print "No variables.txt file found." return #this can be replaced with a proper lex-yacc parser later for line in file: try: # strip out trailing whitespaces and skip comment lines line = line.strip() if len(line) == 0: # skip empty line continue if line[0] == '#': # skip comment continue # parse the line containing definition of a stat variable s = line.split(",") statName = s[0] statVar = vc.variable('', 1, 0) statVar.importFromString(line) # add parsed stat variable to the searchable map variables[statName] = statVar except Exception, (e): print "error:", e, ", in variables.txt line:", line
def import_user_defined_variables(variables): # attempt to open the user defined variables definition file try: file = open(os.path.join(userSettingPath, 'variables.txt'),'r') except: print "No variables.txt file found." return #this can be replaced with a proper lex-yacc parser later for line in file: try: # strip out trailing whitespaces and skip comment lines line = line.strip() if len(line) == 0: # skip empty line continue if line[0] == '#': # skip comment continue # parse the line containing definition of a stat variable s = line.split(",") statName = s[0] statVar = vc.variable('', 1, 0) statVar.importFromString(line) # add parsed stat variable to the searchable map variables[statName] = statVar except Exception, (e): print "error:",e,", in variables.txt line:",line
def p_sentence(p): '''sentence : WORD NUMBERSEQUENCE''' #print p[0], p[1],p[2] num = p[2].split(" ") lookup_input = p[1].lower() if (lookup_input in stat_lookuptable): if (lookup_input == "globalcyclecount") and (int(num[0]) % 10000 == 0): print "Processing global cycle %s" % num[0] stat = stat_lookuptable[lookup_input] if (stat.type == 1): for x in num: stat.data.append(stat.datatype(x)) elif (stat.type == 2): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 3): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 4): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 5): stat.initSparseMatrix() for entry in num: row, value = entry.split(',') row = stat.datatype(row) value = stat.datatype(value) stat.data[0].append(value) stat.data[1].append(row) stat.data[2].append(stat.sampleNum) stat.sampleNum += 1 elif (lookup_input[0:5] == 'cflog'): if (skipCFLOGParsing == 1): return count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if (p[1] not in CFLOG): CFLOG[p[1]] = vc.variable('', 2, 0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass
def parseMe(filename): #The lexer # List of token names. This is always required tokens = [ 'WORD', 'NUMBERSEQUENCE', ] # Regular expression rules for tokens def t_WORD(t): r'[a-zA-Z_][a-zA-Z0-9_]*' return t def t_NUMBERSEQUENCE(t): r'([-]{0,1}[0-9]+([\.][0-9]+){0,1}[ ]*)+' return t t_ignore = '[\t: ]+' def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) lex.lex() # Creating holder for CFLOG CFLOG = {} # Declaring the properties of supported stats in a single dictionary # FORMAT: <stat name in GUI>:vc.variable(<Stat Name in Log>, <type>, <reset@kernelstart>, [datatype]) variables = { 'shaderInsn': vc.variable('shaderinsncount', 2, 0, 'impVec'), 'globalInsn': vc.variable('globalinsncount', 1, 1, 'scalar'), 'globalCycle': vc.variable('globalcyclecount', 1, 1, 'scalar'), 'shaderWarpDiv': vc.variable('shaderwarpdiv', 2, 0, 'impVec'), 'L1TextMiss': vc.variable('lonetexturemiss', 1, 0, 'scalar'), 'L1ConstMiss': vc.variable('loneconstmiss', 1, 0, 'scalar'), 'L1ReadMiss': vc.variable('lonereadmiss', 1, 0, 'scalar'), 'L1WriteMiss': vc.variable('lonewritemiss', 1, 0, 'scalar'), 'L2ReadMiss': vc.variable('ltworeadmiss', 1, 0, 'scalar'), 'L2WriteMiss': vc.variable('ltwowritemiss', 1, 0, 'scalar'), 'L2WriteHit': vc.variable('ltwowritehit', 1, 0, 'scalar'), 'L2ReadHit': vc.variable('ltworeadhit', 1, 0, 'scalar'), 'globalTotInsn': vc.variable('globaltotinsncount', 1, 0, 'scalar'), 'dramCMD': vc.variable('', 2, 0, 'idxVec'), 'dramNOP': vc.variable('', 2, 0, 'idxVec'), 'dramNACT': vc.variable('', 2, 0, 'idxVec'), 'dramNPRE': vc.variable('', 2, 0, 'idxVec'), 'dramNREQ': vc.variable('', 2, 0, 'idxVec'), 'dramMaxMRQS': vc.variable('', 2, 0, 'idxVec'), 'dramAveMRQS': vc.variable('', 2, 0, 'idxVec'), 'dramUtil': vc.variable('', 2, 0, 'idxVec'), 'dramEff': vc.variable('', 2, 0, 'idxVec'), 'globalCompletedThreads': vc.variable('gpucompletedthreads', 1, 1, 'scalar'), 'globalSentWrites': vc.variable('gpgpunsentwrites', 1, 0, 'scalar'), 'globalProcessedWrites': vc.variable('gpgpunprocessedwrites', 1, 0, 'scalar'), 'averagemflatency': vc.variable('', 1, 0, 'custom'), 'LDmemlatdist': vc.variable('', 3, 0, 'stackbar'), 'STmemlatdist': vc.variable('', 3, 0, 'stackbar'), 'WarpDivergenceBreakdown': vc.variable('', 3, 0, 'stackbar'), 'dram_writes_per_cycle': vc.variable('', 1, 0, 'scalar', float), 'dram_reads_per_cycle': vc.variable('', 1, 0, 'scalar', float), 'gpu_stall_by_MSHRwb': vc.variable('', 1, 0, 'scalar'), 'dramglobal_acc_r': vc.variable('', 4, 0, 'idx2DVec'), 'dramglobal_acc_w': vc.variable('', 4, 0, 'idx2DVec'), 'dramlocal_acc_r': vc.variable('', 4, 0, 'idx2DVec'), 'dramlocal_acc_w': vc.variable('', 4, 0, 'idx2DVec'), 'dramconst_acc_r': vc.variable('', 4, 0, 'idx2DVec'), 'dramtexture_acc_r': vc.variable('', 4, 0, 'idx2DVec'), 'cacheMissRate_globalL1_all': vc.variable('cachemissrate_globallocall1_all', 2, 0, 'impVec', float), 'cacheMissRate_textureL1_all': vc.variable('cachemissrate_texturel1_all', 2, 0, 'impVec', float), 'cacheMissRate_constL1_all': vc.variable('cachemissrate_constl1_all', 2, 0, 'impVec', float), 'cacheMissRate_globalL1_noMgHt': vc.variable('cachemissrate_globallocall1_nomght', 2, 0, 'impVec', float), 'cacheMissRate_textureL1_noMgHt': vc.variable('cachemissrate_texturel1_nomght', 2, 0, 'impVec', float), 'cacheMissRate_constL1_noMgHt': vc.variable('cachemissrate_constl1_nomght', 2, 0, 'impVec', float), 'shdrctacount': vc.variable('shdrctacount', 2, 0, 'impVec'), 'CFLOG': CFLOG } # import user defined stat variables from variables.txt - adds on top of the defaults import_user_defined_variables(variables) # generate a lookup table based on the specified name in log file for each stat stat_lookuptable = {} for name, var in variables.iteritems(): if (name == 'CFLOG'): continue if (var.lookup_tag != ''): stat_lookuptable[var.lookup_tag] = var else: stat_lookuptable[name.lower()] = var inputData = 'NULL' def p_sentence(p): '''sentence : WORD NUMBERSEQUENCE''' #print p[0], p[1],p[2] num = p[2].split(" ") lookup_input = p[1].lower() if (lookup_input in stat_lookuptable): if (lookup_input == "globalcyclecount") and (int(num[0]) % 10000 == 0): print "Processing global cycle %s" % num[0] stat = stat_lookuptable[lookup_input] if (stat.type == 1): for x in num: stat.data.append(stat.datatype(x)) elif (stat.type == 2): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 3): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 4): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 5): stat.initSparseMatrix() for entry in num: row, value = entry.split(',') row = stat.datatype(row) value = stat.datatype(value) stat.data[0].append(value) stat.data[1].append(row) stat.data[2].append(stat.sampleNum) stat.sampleNum += 1 elif (lookup_input[0:5] == 'cflog'): if (skipCFLOGParsing == 1): return count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if (p[1] not in CFLOG): CFLOG[p[1]] = vc.variable('', 2, 0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass def p_error(p): if p: print("Syntax error at '%s'" % p.value) else: print("Syntax error at EOF") yacc.yacc() # detect for gzip'ed log file and gunzip on the fly if (filename.endswith('.gz')): file = gzip.open(filename, 'r') else: file = open(filename, 'r') while file: line = file.readline() if not line: break nameNdata = line.split(":") if (len(nameNdata) != 2): print("Syntax error at '%s'" % line) namePart = nameNdata[0].strip() dataPart = nameNdata[1].strip() parts = [' ', namePart, dataPart] p_sentence(parts) # yacc.parse(line[0:-1]) file.close() return variables
def p_sentence(p): '''sentence : WORD NUMBERSEQUENCE''' #print p[0], p[1],p[2] num = p[2].split(" ") lookup_input = p[1].lower() if (lookup_input in stat_lookuptable): if (lookup_input == "globalcyclecount") and (int(num[0]) % 10000 == 0): print "Processing global cycle %s" % num[0] stat = stat_lookuptable[lookup_input] if (stat.type == 1): for x in num: stat.data.append(stat.datatype(x)) elif (stat.type == 2): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 3): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 4): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 5): stat.initSparseMatrix() for entry in num: row, value = entry.split(',') row = stat.datatype(row) value = stat.datatype(value) stat.data[0].append(value) stat.data[1].append(row) stat.data[2].append(stat.sampleNum) stat.sampleNum += 1 elif (lookup_input[0:5] == 'cflog'): if (skipCFLOGParsing == 1): return count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if (p[1] not in CFLOG): CFLOG[p[1]] = vc.variable('',2,0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass
def organizedata(fileVars): organizeFunction = { 'scalar':OrganizeScalar, # Scalar data 'impVec':nullOrganizedShader, # Implicit vector data for multiple units (used by Shader Core stats) 'stackbar':nullOrganizedStackedBar, # Stacked bars 'idxVec':nullOrganizedDram, # Vector data with index (used by DRAM stats) 'idx2DVec':nullOrganizedDramV2, # Vector data with 2D index (used by DRAM access stats) 'sparse':OrganizeSparse, # Vector data with 2D index (used by DRAM access stats) 'custom':0 } data_type_char = {int:'I', float:'f'} print "Organizing data into internal format..." # Organize globalCycle in advance because it is used as a reference if ('globalCycle' in fileVars): statData = fileVars['globalCycle'] fileVars['globalCycle'].data = organizeFunction[statData.organize](statData.data, data_type_char[statData.datatype]) # Organize other stat data into internal format for statName, statData in fileVars.iteritems(): if (statName != 'CFLOG' and statName != 'globalCycle' and statData.organize != 'custom'): fileVars[statName].data = organizeFunction[statData.organize](statData.data, data_type_char[statData.datatype]) # Custom routines to organize stat data into internal format if fileVars.has_key('averagemflatency'): zeros = [] for count in range(len(fileVars['averagemflatency'].data),len(fileVars['globalCycle'].data)): zeros.append(0) fileVars['averagemflatency'].data = zeros + fileVars['averagemflatency'].data if (skipCFLog == 0) and fileVars.has_key('CFLOG'): ptxFile = CFLOGptxFile statFile = CFLOGInsnInfoFile print "PC Histogram to CUDA Src = %d" % convertCFLog2CUDAsrc parseCFLOGCUDA = convertCFLog2CUDAsrc if parseCFLOGCUDA == 1: print "Obtaining PTX-to-CUDA Mapping from %s..." % ptxFile map = lexyacctexteditor.ptxToCudaMapping(ptxFile.rstrip()) print "Obtaining Program Range from %s..." % statFile maxStats = max(lexyacctexteditor.textEditorParseMe(statFile.rstrip()).keys()) if parseCFLOGCUDA == 1: newMap = {} for lines in map: for ptxLines in map[lines]: newMap[ptxLines] = lines print " Total number of CUDA src lines = %s..." % len(newMap) markForDel = [] for ptxLines in newMap: if ptxLines > maxStats: markForDel.append(ptxLines) for lines in markForDel: del newMap[lines] print " Number of touched CUDA src lines = %s..." % len(newMap) fileVars['CFLOGglobalPTX'] = vc.variable('',2,0) fileVars['CFLOGglobalCUDA'] = vc.variable('',2,0) count = 0 for iter in fileVars['CFLOG']: print "Organizing data for %s" % iter fileVars[iter + 'PTX'] = fileVars['CFLOG'][iter] fileVars[iter + 'PTX'].data = CFLOGOrganizePTX(fileVars['CFLOG'][iter].data, fileVars['CFLOG'][iter].maxPC) if parseCFLOGCUDA == 1: fileVars[iter + 'CUDA'] = vc.variable('',2,0) fileVars[iter + 'CUDA'].data = CFLOGOrganizeCuda(fileVars[iter + 'PTX'].data, newMap) try: if count == 0: fileVars['CFLOGglobalPTX'] = fileVars[iter + 'PTX'] if parseCFLOGCUDA == 1: fileVars['CFLOGglobalCUDA'] = fileVars[iter + 'CUDA'] else: for rows in range(0, len(fileVars[iter + 'PTX'].data)): for columns in range(0, len(fileVars[iter + 'PTX'].data[rows])): fileVars['CFLOGglobalPTX'].data[rows][columns] += fileVars[iter + 'PTX'].data[rows][columns] if parseCFLOGCUDA == 1: for rows in range(0, len(fileVars[iter + 'CUDA'].data)): for columns in range(0, len(fileVars[iter + 'CUDA'].data[rows])): fileVars['CFLOGglobalCUDA'].data[rows][columns] += fileVars[iter + 'CUDA'].data[rows][columns] except: print "Error in generating globalCFLog data" count += 1 del fileVars['CFLOG'] return fileVars
def parseMe(filename): #The lexer # List of token names. This is always required tokens = ['WORD', 'NUMBERSEQUENCE', ] # Regular expression rules for tokens def t_WORD(t): r'[a-zA-Z_][a-zA-Z0-9_]*' return t def t_NUMBERSEQUENCE(t): r'([-]{0,1}[0-9]+([\.][0-9]+){0,1}[ ]*)+' return t t_ignore = '[\t: ]+' def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) lex.lex() # Creating holder for CFLOG CFLOG = {} # Declaring the properties of supported stats in a single dictionary # FORMAT: <stat name in GUI>:vc.variable(<Stat Name in Log>, <type>, <reset@kernelstart>, [datatype]) variables = { 'shaderInsn':vc.variable('shaderinsncount', 2, 0, 'impVec'), 'globalInsn':vc.variable('globalinsncount', 1, 1, 'scalar'), 'globalCycle':vc.variable('globalcyclecount', 1, 1, 'scalar'), 'shaderWarpDiv':vc.variable('shaderwarpdiv', 2, 0, 'impVec'), 'L1TextMiss' :vc.variable('lonetexturemiss', 1, 0, 'scalar'), 'L1ConstMiss':vc.variable('loneconstmiss', 1, 0, 'scalar'), 'L1ReadMiss' :vc.variable('lonereadmiss', 1, 0, 'scalar'), 'L1WriteMiss':vc.variable('lonewritemiss', 1, 0, 'scalar'), 'L2ReadMiss' :vc.variable('ltworeadmiss', 1, 0, 'scalar'), 'L2WriteMiss':vc.variable('ltwowritemiss', 1, 0, 'scalar'), 'L2WriteHit' :vc.variable('ltwowritehit', 1, 0, 'scalar'), 'L2ReadHit' :vc.variable('ltworeadhit', 1, 0, 'scalar'), 'globalTotInsn':vc.variable('globaltotinsncount', 1,0, 'scalar'), 'dramCMD' :vc.variable('', 2, 0, 'idxVec'), 'dramNOP' :vc.variable('', 2, 0, 'idxVec'), 'dramNACT':vc.variable('', 2, 0, 'idxVec'), 'dramNPRE':vc.variable('', 2, 0, 'idxVec'), 'dramNREQ':vc.variable('', 2, 0, 'idxVec'), 'dramMaxMRQS':vc.variable('', 2, 0, 'idxVec'), 'dramAveMRQS':vc.variable('', 2, 0, 'idxVec'), 'dramUtil':vc.variable('', 2, 0, 'idxVec'), 'dramEff' :vc.variable('', 2, 0, 'idxVec'), 'globalCompletedThreads':vc.variable('gpucompletedthreads', 1, 1, 'scalar'), 'globalSentWrites':vc.variable('gpgpunsentwrites', 1, 0, 'scalar'), 'globalProcessedWrites':vc.variable('gpgpunprocessedwrites', 1, 0, 'scalar'), 'averagemflatency' :vc.variable('', 1, 0, 'custom'), 'LDmemlatdist':vc.variable('', 3, 0, 'stackbar'), 'STmemlatdist':vc.variable('', 3, 0, 'stackbar'), 'WarpDivergenceBreakdown':vc.variable('', 3, 0, 'stackbar'), 'WarpIssueSlotBreakdown':vc.variable('', 3, 0, 'stackbar'), 'WarpIssueDynamicIdBreakdown':vc.variable('', 3, 0, 'stackbar'), 'dram_writes_per_cycle':vc.variable('', 1, 0, 'scalar', float), 'dram_reads_per_cycle' :vc.variable('', 1, 0, 'scalar', float), 'gpu_stall_by_MSHRwb':vc.variable('', 1, 0, 'scalar'), 'dramglobal_acc_r' :vc.variable('', 4, 0, 'idx2DVec'), 'dramglobal_acc_w' :vc.variable('', 4, 0, 'idx2DVec'), 'dramlocal_acc_r' :vc.variable('', 4, 0, 'idx2DVec'), 'dramlocal_acc_w' :vc.variable('', 4, 0, 'idx2DVec'), 'dramconst_acc_r' :vc.variable('', 4, 0, 'idx2DVec'), 'dramtexture_acc_r':vc.variable('', 4, 0, 'idx2DVec'), 'cacheMissRate_globalL1_all' :vc.variable('cachemissrate_globallocall1_all', 2, 0, 'impVec', float), 'cacheMissRate_textureL1_all' :vc.variable('cachemissrate_texturel1_all', 2, 0, 'impVec', float), 'cacheMissRate_constL1_all' :vc.variable('cachemissrate_constl1_all', 2, 0, 'impVec', float), 'cacheMissRate_globalL1_noMgHt' :vc.variable('cachemissrate_globallocall1_nomght', 2, 0, 'impVec', float), 'cacheMissRate_textureL1_noMgHt':vc.variable('cachemissrate_texturel1_nomght', 2, 0, 'impVec', float), 'cacheMissRate_constL1_noMgHt' :vc.variable('cachemissrate_constl1_nomght', 2, 0, 'impVec', float), 'shdrctacount': vc.variable('shdrctacount', 2, 0, 'impVec'), 'CFLOG' : CFLOG } # import user defined stat variables from variables.txt - adds on top of the defaults import_user_defined_variables(variables) # generate a lookup table based on the specified name in log file for each stat stat_lookuptable = {} for name, var in variables.iteritems(): if (name == 'CFLOG'): continue; if (var.lookup_tag != ''): stat_lookuptable[var.lookup_tag] = var else: stat_lookuptable[name.lower()] = var inputData = 'NULL' def p_sentence(p): '''sentence : WORD NUMBERSEQUENCE''' #print p[0], p[1],p[2] num = p[2].split(" ") lookup_input = p[1].lower() if (lookup_input in stat_lookuptable): if (lookup_input == "globalcyclecount") and (int(num[0]) % 10000 == 0): print "Processing global cycle %s" % num[0] stat = stat_lookuptable[lookup_input] if (stat.type == 1): for x in num: stat.data.append(stat.datatype(x)) elif (stat.type == 2): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 3): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 4): for x in num: stat.data.append(stat.datatype(x)) stat.data.append("NULL") elif (stat.type == 5): stat.initSparseMatrix() for entry in num: row, value = entry.split(',') row = stat.datatype(row) value = stat.datatype(value) stat.data[0].append(value) stat.data[1].append(row) stat.data[2].append(stat.sampleNum) stat.sampleNum += 1 elif (lookup_input[0:5] == 'cflog'): if (skipCFLOGParsing == 1): return count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if (p[1] not in CFLOG): CFLOG[p[1]] = vc.variable('',2,0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass def p_error(p): if p: print("Syntax error at '%s'" % p.value) else: print("Syntax error at EOF") yacc.yacc() # detect for gzip'ed log file and gunzip on the fly if (filename.endswith('.gz')): file = gzip.open(filename, 'r') else: file = open(filename, 'r') while file: line = file.readline() if not line : break nameNdata = line.split(":") if (len(nameNdata) != 2): print("Syntax error at '%s'" % line) namePart = nameNdata[0].strip() dataPart= nameNdata[1].strip() parts = [' ', namePart, dataPart] p_sentence(parts) # yacc.parse(line[0:-1]) file.close() return variables
if parseCFLOGCUDA == 1: newMap = {} for lines in map: for ptxLines in map[lines]: newMap[ptxLines] = lines print " Total number of CUDA src lines = %s..." % len(newMap) markForDel = [] for ptxLines in newMap: if ptxLines > maxStats: markForDel.append(ptxLines) for lines in markForDel: del newMap[lines] print " Number of touched CUDA src lines = %s..." % len(newMap) fileVars['CFLOGglobalPTX'] = vc.variable('',2,0) fileVars['CFLOGglobalCUDA'] = vc.variable('',2,0) count = 0 for iter in fileVars['CFLOG']: print "Organizing data for %s" % iter fileVars[iter + 'PTX'] = fileVars['CFLOG'][iter] fileVars[iter + 'PTX'].data = CFLOGOrganizePTX(fileVars['CFLOG'][iter].data, fileVars['CFLOG'][iter].maxPC) if parseCFLOGCUDA == 1: fileVars[iter + 'CUDA'] = vc.variable('',2,0) fileVars[iter + 'CUDA'].data = CFLOGOrganizeCuda(fileVars[iter + 'PTX'].data, newMap) try: if count == 0:
def parseMe(filename): # The lexer # List of token names. This is always required tokens = ["WORD", "NUMBERSEQUENCE"] # Regular expression rules for tokens def t_WORD(t): r"[a-zA-Z_][a-zA-Z0-9_]*" return t def t_NUMBERSEQUENCE(t): r"([-]{0,1}[0-9]+([\.][0-9]+){0,1}[ ]*)+" return t t_ignore = "[\t: ]+" def t_newline(t): r"\n+" t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) lex.lex() # Section 1.1 for adding a variable shaderInsn = vc.variable(2, 0) globalInsn = vc.variable(1, 1) globalCycle = vc.variable(1, 1) shaderWarpDiv = vc.variable(2, 0) L1ConstMiss = vc.variable(1, 0) L1TextMiss = vc.variable(1, 0) L1ReadMiss = vc.variable(1, 0) L1WriteMiss = vc.variable(1, 0) L2ReadMiss = vc.variable(1, 0) L2WriteMiss = vc.variable(1, 0) L2WriteHit = vc.variable(1, 0) L2ReadHit = vc.variable(1, 0) globalTotInsn = vc.variable(1, 0) dramCMD = vc.variable(2, 0) dramNOP = vc.variable(2, 0) dramNACT = vc.variable(2, 0) dramNPRE = vc.variable(2, 0) dramNREQ = vc.variable(2, 0) dramMaxMRQS = vc.variable(2, 0) dramAveMRQS = vc.variable(2, 0) dramUtil = vc.variable(2, 0) dramEff = vc.variable(2, 0) globalCompletedThreads = vc.variable(1, 1) globalSentWrites = vc.variable(1, 0) globalProcessedWrites = vc.variable(1, 0) averagemflatency = vc.variable(1, 0) STmemlatdist = vc.variable(3, 0) LDmemlatdist = vc.variable(3, 0) WarpDivergenceBreakdown = vc.variable(3, 0) dram_writes_per_cycle = vc.variable(1, 0) dram_reads_per_cycle = vc.variable(1, 0) gpu_stall_by_MSHRwb = vc.variable(1, 0) dramglobal_acc_r = vc.variable(4, 0) dramglobal_acc_w = vc.variable(4, 0) dramlocal_acc_r = vc.variable(4, 0) dramlocal_acc_w = vc.variable(4, 0) dramconst_acc_r = vc.variable(4, 0) dramtexture_acc_r = vc.variable(4, 0) cacheMissRate_globalL1_all = vc.variable(2, 0) cacheMissRate_textureL1_all = vc.variable(2, 0) cacheMissRate_constL1_all = vc.variable(2, 0) cacheMissRate_globalL1_noMgHt = vc.variable(2, 0) cacheMissRate_textureL1_noMgHt = vc.variable(2, 0) cacheMissRate_constL1_noMgHt = vc.variable(2, 0) shdrctacount = vc.variable(2, 0) CFLOG = {} inputData = "NULL" def p_sentence(p): """sentence : WORD NUMBERSEQUENCE""" # print p[0], p[1],p[2] num = p[2].split(" ") for x in num: try: float(x) except: num.remove(x) # Section 1.2 for adding a variable if p[1].lower() == "shaderinsncount": for x in num: shaderInsn.data.append(int(x)) shaderInsn.data.append("NULL") elif p[1].lower() == "cachemissrate_globallocall1_all": for x in num: cacheMissRate_globalL1_all.data.append(float(x)) cacheMissRate_globalL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_texturel1_all": for x in num: cacheMissRate_textureL1_all.data.append(float(x)) cacheMissRate_textureL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_constl1_all": for x in num: cacheMissRate_constL1_all.data.append(float(x)) cacheMissRate_constL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_globallocall1_nomght": for x in num: cacheMissRate_globalL1_noMgHt.data.append(float(x)) cacheMissRate_globalL1_noMgHt.data.append("NULL") elif p[1].lower() == "cachemissrate_texturel1_nomght": for x in num: cacheMissRate_textureL1_noMgHt.data.append(float(x)) cacheMissRate_textureL1_noMgHt.data.append("NULL") elif p[1].lower() == "cachemissrate_constl1_nomght": for x in num: cacheMissRate_constL1_noMgHt.data.append(float(x)) cacheMissRate_constL1_noMgHt.data.append("NULL") elif p[1].lower() == "shdrctacount": for x in num: shdrctacount.data.append(int(x)) shdrctacount.data.append("NULL") elif p[1].lower() == "globalinsncount": for x in num: globalInsn.data.append(int(x)) # globalInsn.append("NULL") # print globalInsn elif p[1].lower() == "globalcyclecount": for x in num: globalCycle.data.append(int(x)) if int(x) % 10000 == 0: print "Processing cycle %s" % x # globalCycle.append("NULL") # print globalCycle elif p[1].lower() == "shaderwarpdiv": for x in num: shaderWarpDiv.data.append(int(x)) shaderWarpDiv.data.append("NULL") elif p[1].lower() == "loneconstmiss": for x in num: L1ConstMiss.data.append(int(x)) elif p[1].lower() == "lonetexturemiss": for x in num: L1TextMiss.data.append(int(x)) elif p[1].lower() == "lonereadmiss": for x in num: L1ReadMiss.data.append(int(x)) elif p[1].lower() == "lonewritemiss": for x in num: L1WriteMiss.data.append(int(x)) elif p[1].lower() == "ltwowritemiss": for x in num: L2WriteMiss.data.append(int(x)) elif p[1].lower() == "ltwowritehit": for x in num: L2WriteHit.data.append(int(x)) elif p[1].lower() == "ltworeadmiss": for x in num: L2ReadMiss.data.append(int(x)) elif p[1].lower() == "ltworeadhit": for x in num: L2ReadHit.data.append(int(x)) elif p[1].lower() == "globaltotinsncount": for x in num: globalTotInsn.data.append(int(x)) elif p[1].lower() == "dramncmd": for x in num: dramCMD.data.append(int(x)) dramCMD.data.append("NULL") elif p[1].lower() == "dramnop": for x in num: dramNOP.data.append(int(x)) dramNOP.data.append("NULL") elif p[1].lower() == "dramnact": for x in num: dramNACT.data.append(int(x)) dramNACT.data.append("NULL") elif p[1].lower() == "dramnpre": for x in num: dramNPRE.data.append(int(x)) dramNPRE.data.append("NULL") elif p[1].lower() == "dramnreq": for x in num: dramNREQ.data.append(int(x)) dramNREQ.data.append("NULL") elif p[1].lower() == "drammaxmrqs": for x in num: dramMaxMRQS.data.append(int(x)) dramMaxMRQS.data.append("NULL") elif p[1].lower() == "dramavemrqs": for x in num: dramAveMRQS.data.append(int(x)) dramAveMRQS.data.append("NULL") elif p[1].lower() == "dramutil": for x in num: dramUtil.data.append(int(x)) dramUtil.data.append("NULL") elif p[1].lower() == "drameff": for x in num: dramEff.data.append(int(x)) dramEff.data.append("NULL") elif p[1].lower() == "gpucompletedthreads": for x in num: globalCompletedThreads.data.append(int(x)) elif p[1].lower() == "gpgpunsentwrites": for x in num: globalSentWrites.data.append(int(x)) elif p[1].lower() == "gpgpunprocessedwrites": for x in num: globalProcessedWrites.data.append(int(x)) elif p[1].lower() == "averagemflatency": for x in num: averagemflatency.data.append(int(x)) elif p[1].lower() == "ldmemlatdist": for x in num: LDmemlatdist.data.append(int(x)) LDmemlatdist.data.append("NULL") elif p[1].lower() == "stmemlatdist": for x in num: STmemlatdist.data.append(int(x)) STmemlatdist.data.append("NULL") elif p[1].lower() == "warpdivergencebreakdown": for x in num: WarpDivergenceBreakdown.data.append(int(x)) WarpDivergenceBreakdown.data.append("NULL") elif p[1].lower() == "dram_writes_per_cycle": for x in num: dram_writes_per_cycle.data.append(float(x)) elif p[1].lower() == "dram_reads_per_cycle": for x in num: dram_reads_per_cycle.data.append(float(x)) elif p[1].lower() == "gpu_stall_by_mshrwb": for x in num: gpu_stall_by_MSHRwb.data.append(int(x)) elif p[1].lower() == "dramglobal_acc_r": for x in num: dramglobal_acc_r.data.append(int(x)) dramglobal_acc_r.data.append("NULL") elif p[1].lower() == "dramglobal_acc_w": for x in num: dramglobal_acc_w.data.append(int(x)) dramglobal_acc_w.data.append("NULL") elif p[1].lower() == "dramlocal_acc_r": for x in num: dramlocal_acc_r.data.append(int(x)) dramlocal_acc_r.data.append("NULL") elif p[1].lower() == "dramlocal_acc_w": for x in num: dramlocal_acc_w.data.append(int(x)) dramlocal_acc_w.data.append("NULL") elif p[1].lower() == "dramconst_acc_r": for x in num: dramconst_acc_r.data.append(int(x)) dramconst_acc_r.data.append("NULL") elif p[1].lower() == "dramtexture_acc_r": for x in num: dramtexture_acc_r.data.append(int(x)) dramtexture_acc_r.data.append("NULL") elif p[1].lower()[0:5] == "cflog": count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if p[1] not in CFLOG: CFLOG[p[1]] = vc.variable(2, 0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass def p_error(p): if p: print ("Syntax error at '%s'" % p.value) else: print ("Syntax error at EOF") yacc.yacc() # detect for gzip'ed log file and gunzip on the fly if filename.endswith(".gz"): file = gzip.open(filename, "r") else: file = open(filename, "r") while file: line = file.readline() if not line: break yacc.parse(line[0:-1]) file.close() # Section 1.3 for adding a variable variables = { "shaderInsn": shaderInsn, "globalInsn": globalInsn, "globalCycle": globalCycle, "shaderWarpDiv": shaderWarpDiv, "L1TextMiss": L1TextMiss, "L1ConstMiss": L1ConstMiss, "L1ReadMiss": L1ReadMiss, "L1WriteMiss": L1WriteMiss, "L2ReadMiss": L2ReadMiss, "L2WriteMiss": L2WriteMiss, "L2WriteHit": L2WriteHit, "L2ReadHit": L2ReadHit, "globalTotInsn": globalTotInsn, "dramCMD": dramCMD, "dramNOP": dramNOP, "dramNACT": dramNACT, "dramNPRE": dramNPRE, "dramNREQ": dramNREQ, "dramMaxMRQS": dramMaxMRQS, "dramAveMRQS": dramAveMRQS, "dramUtil": dramUtil, "dramEff": dramEff, "globalCompletedThreads": globalCompletedThreads, "globalSentWrites": globalSentWrites, "globalProcessedWrites": globalProcessedWrites, "averagemflatency": averagemflatency, "LDmemlatdist": LDmemlatdist, "STmemlatdist": STmemlatdist, "WarpDivergenceBreakdown": WarpDivergenceBreakdown, "dram_writes_per_cycle": dram_writes_per_cycle, "dram_reads_per_cycle": dram_reads_per_cycle, "gpu_stall_by_MSHRwb": gpu_stall_by_MSHRwb, "dramglobal_acc_r": dramglobal_acc_r, "dramglobal_acc_w": dramglobal_acc_w, "dramlocal_acc_r": dramlocal_acc_r, "dramlocal_acc_w": dramlocal_acc_w, "dramconst_acc_r": dramconst_acc_r, "dramtexture_acc_r": dramtexture_acc_r, "cacheMissRate_globalL1_all": cacheMissRate_globalL1_all, "cacheMissRate_textureL1_all": cacheMissRate_textureL1_all, "cacheMissRate_constL1_all": cacheMissRate_constL1_all, "cacheMissRate_globalL1_noMgHt": cacheMissRate_globalL1_noMgHt, "cacheMissRate_textureL1_noMgHt": cacheMissRate_textureL1_noMgHt, "cacheMissRate_constL1_noMgHt": cacheMissRate_constL1_noMgHt, "CFLOG": CFLOG, "shdrctacount": shdrctacount, } return variables
def p_sentence(p): """sentence : WORD NUMBERSEQUENCE""" # print p[0], p[1],p[2] num = p[2].split(" ") for x in num: try: float(x) except: num.remove(x) # Section 1.2 for adding a variable if p[1].lower() == "shaderinsncount": for x in num: shaderInsn.data.append(int(x)) shaderInsn.data.append("NULL") elif p[1].lower() == "cachemissrate_globallocall1_all": for x in num: cacheMissRate_globalL1_all.data.append(float(x)) cacheMissRate_globalL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_texturel1_all": for x in num: cacheMissRate_textureL1_all.data.append(float(x)) cacheMissRate_textureL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_constl1_all": for x in num: cacheMissRate_constL1_all.data.append(float(x)) cacheMissRate_constL1_all.data.append("NULL") elif p[1].lower() == "cachemissrate_globallocall1_nomght": for x in num: cacheMissRate_globalL1_noMgHt.data.append(float(x)) cacheMissRate_globalL1_noMgHt.data.append("NULL") elif p[1].lower() == "cachemissrate_texturel1_nomght": for x in num: cacheMissRate_textureL1_noMgHt.data.append(float(x)) cacheMissRate_textureL1_noMgHt.data.append("NULL") elif p[1].lower() == "cachemissrate_constl1_nomght": for x in num: cacheMissRate_constL1_noMgHt.data.append(float(x)) cacheMissRate_constL1_noMgHt.data.append("NULL") elif p[1].lower() == "shdrctacount": for x in num: shdrctacount.data.append(int(x)) shdrctacount.data.append("NULL") elif p[1].lower() == "globalinsncount": for x in num: globalInsn.data.append(int(x)) # globalInsn.append("NULL") # print globalInsn elif p[1].lower() == "globalcyclecount": for x in num: globalCycle.data.append(int(x)) if int(x) % 10000 == 0: print "Processing cycle %s" % x # globalCycle.append("NULL") # print globalCycle elif p[1].lower() == "shaderwarpdiv": for x in num: shaderWarpDiv.data.append(int(x)) shaderWarpDiv.data.append("NULL") elif p[1].lower() == "loneconstmiss": for x in num: L1ConstMiss.data.append(int(x)) elif p[1].lower() == "lonetexturemiss": for x in num: L1TextMiss.data.append(int(x)) elif p[1].lower() == "lonereadmiss": for x in num: L1ReadMiss.data.append(int(x)) elif p[1].lower() == "lonewritemiss": for x in num: L1WriteMiss.data.append(int(x)) elif p[1].lower() == "ltwowritemiss": for x in num: L2WriteMiss.data.append(int(x)) elif p[1].lower() == "ltwowritehit": for x in num: L2WriteHit.data.append(int(x)) elif p[1].lower() == "ltworeadmiss": for x in num: L2ReadMiss.data.append(int(x)) elif p[1].lower() == "ltworeadhit": for x in num: L2ReadHit.data.append(int(x)) elif p[1].lower() == "globaltotinsncount": for x in num: globalTotInsn.data.append(int(x)) elif p[1].lower() == "dramncmd": for x in num: dramCMD.data.append(int(x)) dramCMD.data.append("NULL") elif p[1].lower() == "dramnop": for x in num: dramNOP.data.append(int(x)) dramNOP.data.append("NULL") elif p[1].lower() == "dramnact": for x in num: dramNACT.data.append(int(x)) dramNACT.data.append("NULL") elif p[1].lower() == "dramnpre": for x in num: dramNPRE.data.append(int(x)) dramNPRE.data.append("NULL") elif p[1].lower() == "dramnreq": for x in num: dramNREQ.data.append(int(x)) dramNREQ.data.append("NULL") elif p[1].lower() == "drammaxmrqs": for x in num: dramMaxMRQS.data.append(int(x)) dramMaxMRQS.data.append("NULL") elif p[1].lower() == "dramavemrqs": for x in num: dramAveMRQS.data.append(int(x)) dramAveMRQS.data.append("NULL") elif p[1].lower() == "dramutil": for x in num: dramUtil.data.append(int(x)) dramUtil.data.append("NULL") elif p[1].lower() == "drameff": for x in num: dramEff.data.append(int(x)) dramEff.data.append("NULL") elif p[1].lower() == "gpucompletedthreads": for x in num: globalCompletedThreads.data.append(int(x)) elif p[1].lower() == "gpgpunsentwrites": for x in num: globalSentWrites.data.append(int(x)) elif p[1].lower() == "gpgpunprocessedwrites": for x in num: globalProcessedWrites.data.append(int(x)) elif p[1].lower() == "averagemflatency": for x in num: averagemflatency.data.append(int(x)) elif p[1].lower() == "ldmemlatdist": for x in num: LDmemlatdist.data.append(int(x)) LDmemlatdist.data.append("NULL") elif p[1].lower() == "stmemlatdist": for x in num: STmemlatdist.data.append(int(x)) STmemlatdist.data.append("NULL") elif p[1].lower() == "warpdivergencebreakdown": for x in num: WarpDivergenceBreakdown.data.append(int(x)) WarpDivergenceBreakdown.data.append("NULL") elif p[1].lower() == "dram_writes_per_cycle": for x in num: dram_writes_per_cycle.data.append(float(x)) elif p[1].lower() == "dram_reads_per_cycle": for x in num: dram_reads_per_cycle.data.append(float(x)) elif p[1].lower() == "gpu_stall_by_mshrwb": for x in num: gpu_stall_by_MSHRwb.data.append(int(x)) elif p[1].lower() == "dramglobal_acc_r": for x in num: dramglobal_acc_r.data.append(int(x)) dramglobal_acc_r.data.append("NULL") elif p[1].lower() == "dramglobal_acc_w": for x in num: dramglobal_acc_w.data.append(int(x)) dramglobal_acc_w.data.append("NULL") elif p[1].lower() == "dramlocal_acc_r": for x in num: dramlocal_acc_r.data.append(int(x)) dramlocal_acc_r.data.append("NULL") elif p[1].lower() == "dramlocal_acc_w": for x in num: dramlocal_acc_w.data.append(int(x)) dramlocal_acc_w.data.append("NULL") elif p[1].lower() == "dramconst_acc_r": for x in num: dramconst_acc_r.data.append(int(x)) dramconst_acc_r.data.append("NULL") elif p[1].lower() == "dramtexture_acc_r": for x in num: dramtexture_acc_r.data.append(int(x)) dramtexture_acc_r.data.append("NULL") elif p[1].lower()[0:5] == "cflog": count = 0 pc = [] threadcount = [] for x in num: if (count % 2) == 0: pc.append(int(x)) else: threadcount.append(int(x)) count += 1 if p[1] not in CFLOG: CFLOG[p[1]] = vc.variable(2, 0) CFLOG[p[1]].data.append([]) # pc[] CFLOG[p[1]].data.append([]) # threadcount[] CFLOG[p[1]].maxPC = 0 CFLOG[p[1]].data[0].append(pc) CFLOG[p[1]].data[1].append(threadcount) MaxPC = max(pc) CFLOG[p[1]].maxPC = max(MaxPC, CFLOG[p[1]].maxPC) else: pass
def organizedata(fileVars): print "Organizing data into internal format..."; #Section 2.1 for adding a variable if fileVars.has_key('shaderInsn'): fileVars['shaderInsn'].data = nullOrganizedShader(fileVars['shaderInsn'].data) if fileVars.has_key('shdrctacount'): fileVars['shdrctacount'].data = nullOrganizedShader(fileVars['shdrctacount'].data) if fileVars.has_key('cacheMissRate_globalL1_all'): fileVars['cacheMissRate_globalL1_all'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_globalL1_all'].data) if fileVars.has_key('cacheMissRate_textureL1_all'): fileVars['cacheMissRate_textureL1_all'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_textureL1_all'].data) if fileVars.has_key('cacheMissRate_constL1_all'): fileVars['cacheMissRate_constL1_all'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_constL1_all'].data) if fileVars.has_key('cacheMissRate_globalL1_noMgHt'): fileVars['cacheMissRate_globalL1_noMgHt'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_globalL1_noMgHt'].data) if fileVars.has_key('cacheMissRate_textureL1_noMgHt'): fileVars['cacheMissRate_textureL1_noMgHt'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_textureL1_noMgHt'].data) if fileVars.has_key('cacheMissRate_constL1_noMgHt'): fileVars['cacheMissRate_constL1_noMgHt'].data = nullOrganizedShaderv2(fileVars['cacheMissRate_constL1_noMgHt'].data) if fileVars.has_key('dram_writes_per_cycle'): fileVars['dram_writes_per_cycle'].data = [0] + [float(x) for x in fileVars['dram_writes_per_cycle'].data] if fileVars.has_key('dram_reads_per_cycle'): fileVars['dram_reads_per_cycle'].data = [0] + [float(x) for x in fileVars['dram_reads_per_cycle'].data] if fileVars.has_key('globalInsn'): fileVars['globalInsn'].data = [0] + [int(x) for x in fileVars['globalInsn'].data] if fileVars.has_key('globalCycle'): fileVars['globalCycle'].data = [0] + [int(x) for x in fileVars['globalCycle'].data] if fileVars.has_key('L1ReadMiss'): fileVars['L1ReadMiss'].data = [0] + [int(x) for x in fileVars['L1ReadMiss'].data] if fileVars.has_key('L1TextMiss'): fileVars['L1TextMiss'].data = [0] + fileVars['L1TextMiss'].data if fileVars.has_key('L1ConstMiss'): fileVars['L1ConstMiss'].data = [0] + fileVars['L1ConstMiss'].data if fileVars.has_key('shaderWarpDiv'): fileVars['shaderWarpDiv'].data = nullOrganizedShader(fileVars['shaderWarpDiv'].data) if fileVars.has_key('globalTotInsn'): fileVars['globalTotInsn'].data = fileVars['globalTotInsn'].data if fileVars.has_key('STmemlatdist'): fileVars['STmemlatdist'].data = nullOrganizedShader(fileVars['STmemlatdist'].data) if fileVars.has_key('LDmemlatdist'): fileVars['LDmemlatdist'].data = nullOrganizedShader(fileVars['LDmemlatdist'].data) if fileVars.has_key('WarpDivergenceBreakdown'): fileVars['WarpDivergenceBreakdown'].data = nullOrganizedShader(fileVars['WarpDivergenceBreakdown'].data) if fileVars.has_key('dramCMD'): fileVars['dramCMD'].data = nullOrganizedDram(fileVars['dramCMD'].data) if fileVars.has_key('dramNOP'): fileVars['dramNOP'].data = nullOrganizedDram(fileVars['dramNOP'].data) if fileVars.has_key('dramNACT'): fileVars['dramNACT'].data = nullOrganizedDram(fileVars['dramNACT'].data) if fileVars.has_key('dramNPRE'): fileVars['dramNPRE'].data = nullOrganizedDram(fileVars['dramNPRE'].data) if fileVars.has_key('dramNREQ'): fileVars['dramNREQ'].data = nullOrganizedDram(fileVars['dramNREQ'].data) if fileVars.has_key('dramAveMRQS'): fileVars['dramAveMRQS'].data = nullOrganizedDram(fileVars['dramAveMRQS'].data) if fileVars.has_key('dramUtil'): fileVars['dramUtil'].data = nullOrganizedDram(fileVars['dramUtil'].data) if fileVars.has_key('dramEff'): fileVars['dramEff'].data = nullOrganizedDram(fileVars['dramEff'].data) if fileVars.has_key('dramglobal_acc_r'): fileVars['dramglobal_acc_r'].data = nullOrganizedDramV2(fileVars['dramglobal_acc_r'].data) if fileVars.has_key('dramglobal_acc_w'): fileVars['dramglobal_acc_w'].data = nullOrganizedDramV2(fileVars['dramglobal_acc_w'].data) if fileVars.has_key('dramlocal_acc_r'): fileVars['dramlocal_acc_r'].data = nullOrganizedDramV2(fileVars['dramlocal_acc_r'].data) if fileVars.has_key('dramlocal_acc_w'): fileVars['dramlocal_acc_w'].data = nullOrganizedDramV2(fileVars['dramlocal_acc_w'].data) if fileVars.has_key('dramconst_acc_r'): fileVars['dramconst_acc_r'].data = nullOrganizedDramV2(fileVars['dramconst_acc_r'].data) if fileVars.has_key('dramtexture_acc_r'): fileVars['dramtexture_acc_r'].data = nullOrganizedDramV2(fileVars['dramtexture_acc_r'].data) if fileVars.has_key('globalCompletedThreads'): fileVars['globalCompletedThreads'].data = [0] + fileVars['globalCompletedThreads'].data if fileVars.has_key('globalSentWrites'): fileVars['globalSentWrites'].data = [0] + fileVars['globalSentWrites'].data if fileVars.has_key('globalProcessedWrites'): fileVars['globalProcessedWrites'].data = [0] + fileVars['globalProcessedWrites'].data if fileVars.has_key('averagemflatency'): zeros = [] for count in range(len(fileVars['averagemflatency'].data),len(fileVars['globalCycle'].data)): zeros.append(0) fileVars['averagemflatency'].data = zeros + fileVars['averagemflatency'].data if fileVars.has_key('gpu_stall_by_MSHRwb'): fileVars['gpu_stall_by_MSHRwb'].data = [0] + fileVars['gpu_stall_by_MSHRwb'].data if (skipCFLog == 0) and fileVars.has_key('CFLOG'): loadfile = open('recentfiles.txt', 'r') bool = 0 while loadfile: line = loadfile.readline() if not line: break if '.ptx' in line: ptxFile = line bool += 1 if 'gpgpu_inst_stats' in line: statFile = line bool += 1 if bool == 2: break print "PC Histogram to CUDA Src = %d" % convertCFLog2CUDAsrc parseCFLOGCUDA = convertCFLog2CUDAsrc if parseCFLOGCUDA == 1: map = lexyacctexteditor.ptxToCudaMapping(ptxFile.rstrip()) maxStats = max(lexyacctexteditor.textEditorParseMe(statFile.rstrip()).keys()) if parseCFLOGCUDA == 1: newMap = {} for lines in map: for ptxLines in map[lines]: newMap[ptxLines] = lines markForDel = [] for ptxLines in newMap: if ptxLines > maxStats: markForDel.append(ptxLines) for lines in markForDel: del newMap[lines] fileVars['CFLOGglobalPTX'] = vc.variable(2,0) fileVars['CFLOGglobalCUDA'] = vc.variable(2,0) count = 0 for iter in fileVars['CFLOG']: print "Organizing data for %s" % iter fileVars[iter + 'PTX'] = fileVars['CFLOG'][iter] fileVars[iter + 'PTX'].data = CFLOGOrganizePTX(fileVars['CFLOG'][iter].data, fileVars['CFLOG'][iter].maxPC) if parseCFLOGCUDA == 1: fileVars[iter + 'CUDA'] = vc.variable(2,0) fileVars[iter + 'CUDA'].data = CFLOGOrganizeCuda(fileVars[iter + 'PTX'].data, newMap) try: if count == 0: fileVars['globalPTX'] = fileVars[iter + 'PTX'] if parseCFLOGCUDA == 1: fileVars['globalCUDA'] = fileVars[iter + 'CUDA'] else: for rows in range(0, len(fileVars[iter + 'PTX'].data)): for columns in range(0, len(fileVars[iter + 'PTX'].data[rows])): fileVars['globalPTX'].data[rows][columns] += fileVars[iter + 'PTX'].data[rows][columns] if parseCFLOGCUDA == 1: for rows in range(0, len(fileVars[iter + 'CUDA'].data)): for columns in range(0, len(fileVars[iter + 'CUDA'].data[rows])): fileVars['globalCUDA'].data[rows][columns] += fileVars[iter + 'CUDA'].data[rows][columns] except: print "Error in generating globalCFLog data" count += 1 del fileVars['CFLOG'] return fileVars
def organizedata(fileVars): organizeFunction = { 'scalar': OrganizeScalar, # Scalar data 'impVec': nullOrganizedShader, # Implicit vector data for multiple units (used by Shader Core stats) 'stackbar': nullOrganizedStackedBar, # Stacked bars 'idxVec': nullOrganizedDram, # Vector data with index (used by DRAM stats) 'idx2DVec': nullOrganizedDramV2, # Vector data with 2D index (used by DRAM access stats) 'sparse': OrganizeSparse, # Vector data with 2D index (used by DRAM access stats) 'custom': 0 } data_type_char = {int: 'I', float: 'f'} print "Organizing data into internal format..." # Organize globalCycle in advance because it is used as a reference if ('globalCycle' in fileVars): statData = fileVars['globalCycle'] fileVars['globalCycle'].data = organizeFunction[statData.organize]( statData.data, data_type_char[statData.datatype]) # Organize other stat data into internal format for statName, statData in fileVars.iteritems(): if (statName != 'CFLOG' and statName != 'globalCycle' and statData.organize != 'custom'): fileVars[statName].data = organizeFunction[statData.organize]( statData.data, data_type_char[statData.datatype]) # Custom routines to organize stat data into internal format if fileVars.has_key('averagemflatency'): zeros = [] for count in range(len(fileVars['averagemflatency'].data), len(fileVars['globalCycle'].data)): zeros.append(0) fileVars['averagemflatency'].data = zeros + fileVars[ 'averagemflatency'].data if (skipCFLog == 0) and fileVars.has_key('CFLOG'): ptxFile = CFLOGptxFile statFile = CFLOGInsnInfoFile print "PC Histogram to CUDA Src = %d" % convertCFLog2CUDAsrc parseCFLOGCUDA = convertCFLog2CUDAsrc if parseCFLOGCUDA == 1: print "Obtaining PTX-to-CUDA Mapping from %s..." % ptxFile map = lexyacctexteditor.ptxToCudaMapping(ptxFile.rstrip()) print "Obtaining Program Range from %s..." % statFile maxStats = max( lexyacctexteditor.textEditorParseMe(statFile.rstrip()).keys()) if parseCFLOGCUDA == 1: newMap = {} for lines in map: for ptxLines in map[lines]: newMap[ptxLines] = lines markForDel = [] for ptxLines in newMap: if ptxLines > maxStats: markForDel.append(ptxLines) for lines in markForDel: del newMap[lines] fileVars['CFLOGglobalPTX'] = vc.variable('', 2, 0) fileVars['CFLOGglobalCUDA'] = vc.variable('', 2, 0) count = 0 for iter in fileVars['CFLOG']: print "Organizing data for %s" % iter fileVars[iter + 'PTX'] = fileVars['CFLOG'][iter] fileVars[iter + 'PTX'].data = CFLOGOrganizePTX( fileVars['CFLOG'][iter].data, fileVars['CFLOG'][iter].maxPC) if parseCFLOGCUDA == 1: fileVars[iter + 'CUDA'] = vc.variable('', 2, 0) fileVars[iter + 'CUDA'].data = CFLOGOrganizeCuda( fileVars[iter + 'PTX'].data, newMap) try: if count == 0: fileVars['globalPTX'] = fileVars[iter + 'PTX'] if parseCFLOGCUDA == 1: fileVars['globalCUDA'] = fileVars[iter + 'CUDA'] else: for rows in range(0, len(fileVars[iter + 'PTX'].data)): for columns in range( 0, len(fileVars[iter + 'PTX'].data[rows])): fileVars['globalPTX'].data[rows][ columns] += fileVars[iter + 'PTX'].data[rows][columns] if parseCFLOGCUDA == 1: for rows in range(0, len(fileVars[iter + 'CUDA'].data)): for columns in range( 0, len(fileVars[iter + 'CUDA'].data[rows])): fileVars['globalCUDA'].data[rows][ columns] += fileVars[ iter + 'CUDA'].data[rows][columns] except: print "Error in generating globalCFLog data" count += 1 del fileVars['CFLOG'] return fileVars