def buildCFG(self): # ignore the first line in the samples file self.ifb.getLine() moreBatches = not self.ifb.eof commonBinIns = dict() totalIns = dict() lowstdev = 0 highstdev = 0 stddevs = [] while (moreBatches): ib = InstructionBatch(self.batchSize, self.ifb) moreBatches = ib.fromFile() ib.calcStatistics(self.windowSize, 1) self.stat.registerLowStDevStatistics(ib) stddevs.append(ib.meanWindowStdev) if ib.batchId % 100 == 0: logger.debug("batch %d", ib.batchId) if ib.meanWindowStdev <= self.stdDevThreshold: instrGen = ib.genInstruction() self.buildCFGR(instrGen, 0, ib) lowstdev+=1 else: highstdev+=1 printedIns = False instrGen = ib.genInstruction() for i in instrGen: if i.isBranchOrCall(): b = self.stat.getBinFromAddr(i.pc) if b is None: continue bb = self.bbr.getBB(i.pc) if bb and b.count > self.recurrentThreshold: iafter = ib.getInstructionAfter(i) if iafter is not None: otherBB = self.bbr.getBB(iafter.pc) if not otherBB: logger.debug("trying to create a new BB for %x", iafter.pc) self.buildCFGR(instrGen, 1, ib) self.numHighStdevTries+=1 otherBB = self.bbr.getBB(iafter.pc) if otherBB: logger.debug("got it!") self.numHighStdevOK+=1 if otherBB: bb.addTarget(otherBB) otherBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, otherBB) self.highStdevEdges+=1 self.cfg.toDot("test_builder1.dot", True, False) self.cfg.printCFG() print len(self.bbr.blocks), " basic blocks were recognized" print len(self.stat.bins), " address bins were created" print lowstdev, " low standard deviation batches" print highstdev, " high standard deviation batches" totalBBIns = [] for i in self.bbr.blocks: totalBBIns.append(len(self.bbr.blocks[i].instructions)) print "each block has an average of ", np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions" print "number of basic block merges: ", self.numMerge print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges print "number of high standard deviation basic block build tries: ", self.numHighStdevTries print "number of high standard deviation basic block actually built: ", self.numHighStdevOK #implementar metricas: quantidade de instrucoes e blocos basicos por funcao stdev = file(self.samplesFile + ".stdev", 'w') for i in stddevs: stdev.write(str(i) + "\n") stdev.close()
def buildCFG(self): # ignore the first line in the samples file self.ifb.getLine() moreBatches = not self.ifb.eof commonBinIns = dict() totalIns = dict() lowstdev = 0 highstdev = 0 lowstdevValues = [] histdevValues = [] stddevs = [] while (moreBatches): ib = InstructionBatch(self.batchSize, self.ifb) moreBatches = ib.fromFile() ib.calcStatistics(self.windowSize, 1) self.stat.registerLowStDevStatistics(ib) stddevs.append(ib.meanWindowStdev) if ib.batchId % 100 == 0: logger.debug("batch %d", ib.batchId) if ib.meanWindowStdev <= self.stdDevThreshold: instrGen = ib.genInstruction() self.buildCFGR(instrGen, 0, ib) lowstdev+=1 lowstdevValues.append(ib.meanWindowStdev) else: highstdev+=1 histdevValues.append(ib.meanWindowStdev) printedIns = False instrGen = ib.genInstruction() for i in instrGen: if i.isBranchOrCall(): if self.targets.has_key(i.target): self.targets[i.target] += 1 else: self.targets[i.target] = 1 b = self.stat.getBinFromAddr(i.pc) if b is None: continue bb = self.bbr.getBB(i.pc) if bb and b.count > self.recurrentThreshold: iafter = ib.getInstructionAfter(i) if iafter is not None: otherBB = self.bbr.getBB(iafter.pc) if not otherBB: logger.debug("trying to create a new BB for %x", iafter.pc) self.buildCFGR(instrGen, 1, ib) self.numHighStdevTries+=1 otherBB = self.bbr.getBB(iafter.pc) if otherBB: logger.debug("got it!") self.numHighStdevOK+=1 if otherBB: bb.addTarget(otherBB) otherBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, otherBB) self.highStdevEdges+=1 # if i.pc not in totalIns: # totalIns[i.pc] = 1 # else: # totalIns[i.pc]+=1 # # b = self.stat.getBinFromAddr(i.pc) # if b.count > self.recurrentThreshold: # #logger.debug("instruction at %X is in a common bin", i.pc) # if i.pc not in commonBinIns: # commonBinIns[i.pc] = 1 # else: # commonBinIns[i.pc]+=1 # # if i.isBranchOrCall(): # if(self.targets.has_key(i.target)): # self.targets[i.target]+=1 # else: # self.targets[i.target] = 1 # # commonBinList = commonBinIns.items() # # commonBinList.sort(key=lambda x: x[0]) # # logger.debug("common bins:") # for i in commonBinList: # logger.debug("0x%x: %d", i[0], i[1]) # # logger.debug("control flow targets: (%d items)", len(self.targets)) # targetList = self.targets.items() # targetList.sort(key=lambda x: x[0]) # # for i in targetList: # logger.debug("0x%x: %d", i[0], i[1]) # # logger.debug("there were %d distinct instructions in common bins", len(commonBinIns)) # # logger.debug("a total of %d distinct instructions were sampled.", len(totalIns)) self.cfg.toDot("test_builder0.dot", False, True) self.cfg.printCFG() # bb = self.bbr.getBB(0x400811) # # for i in bb.getInstructions(): # print hex(i.pc) print len(self.bbr.blocks), " basic blocks were recognized" print len(self.stat.bins), " address bins were created" print lowstdev, " low standard deviation batches" print highstdev, " high standard deviation batches" totalBBIns = [] for i in self.bbr.blocks: totalBBIns.append(len(self.bbr.blocks[i].instructions)) print "each block has an average of ", np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions" print "number of basic block merges: ", self.numMerge print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges print "number of high standard deviation basic block build tries: ", self.numHighStdevTries print "number of high standard deviation basic block actually built: ", self.numHighStdevOK histdevValues.sort() lowstdevValues.sort() print "high stdev values", histdevValues print "hsv mean = ", np.mean(histdevValues), "+-" ,np.std(histdevValues) print "low stdev values", lowstdevValues print "lo mean = ", np.mean(lowstdevValues), "+-" ,np.std(lowstdevValues) #implementar metricas: quantidade de instrucoes e blocos basicos por funcao stdev = file(self.samplesFile + ".stdev", 'w') for i in stddevs: stdev.write(str(i) + "\n") stdev.close()