def assignIOValues(graphStorage, myTraceFileName): ''' Collect values for I/O *memory* parameters (registers already have their values) ''' # Init the start and end time for easy research startTimeLDF = dict() # time -> LDF for ldfKey in graphStorage.keys(): currentLDF = graphStorage[ldfKey] if currentLDF.startTime not in startTimeLDF.keys(): startTimeLDF[currentLDF.startTime] = set() startTimeLDF[currentLDF.startTime].add(currentLDF) endTimeLDF = dict() # time -> LDF for ldfKey in graphStorage.keys(): currentLDF = graphStorage[ldfKey] if currentLDF.endTime not in endTimeLDF.keys(): endTimeLDF[currentLDF.endTime] = set() endTimeLDF[currentLDF.endTime].add(currentLDF) # ref # Collect values ldfIDs = set() currentLDF = dict() time = 0 f = open(myTraceFileName, 'r') for line in f: ins = executionTrace.lineConnector(line) if ins is None: time += 1 continue if time in startTimeLDF.keys(): for startingLDF in startTimeLDF[time]: ldfIDs.add(startingLDF.ID) currentLDF[startingLDF.ID] = startingLDF # Build the set of input bytes for iv in startingLDF.inputParameters: # Registers AND constants have already their values if iv.registerName == '' and iv.constant == 0: for c in range(0, iv.size): addr = iv.startAddress + c startingLDF.WALFI[addr] = 'U' # Undefined value # Build the set of output bytes for ov in startingLDF.outputParameters: # Registers have already their values if ov.registerName == '': for c in range(0, ov.size): addr = ov.startAddress + c startingLDF.WALFO[addr] = 'U' # Undefined value if len(ldfIDs) != 0: for currentLDFID in ldfIDs: count = 0 for rAddr in ins.memoryReadAddress: for indexByte in range(0, ins.memoryReadSize[count]): addrReadByte = int(rAddr, 16) + indexByte # int if addrReadByte in currentLDF[currentLDFID].WALFI.keys(): # First time it is read, it gives its final value if currentLDF[currentLDFID].WALFI[addrReadByte] == 'U': valueReadByte = (ins.memoryReadValue[count])[indexByte* 2:indexByte * 2 + 2] currentLDF[currentLDFID].WALFI[addrReadByte] = valueReadByte count += 1 count = 0 for wAddr in ins.memoryWriteAddress: for indexByte in range(0, ins.memoryWriteSize[count]): addrWriteByte = int(wAddr, 16) + indexByte # int # Each time it is written, it updates its value if addrWriteByte in currentLDF[currentLDFID].WALFO.keys(): valueWriteByte = (ins.memoryWriteValue[count])[indexByte* 2:indexByte * 2 + 2] currentLDF[currentLDFID].WALFO[addrWriteByte] = valueWriteByte count += 1 if time in endTimeLDF.keys(): for endingLDF in endTimeLDF[time]: # Copy the collected values back in the input variables for iv in endingLDF.inputParameters: if iv.registerName == '' and iv.constant == 0: for c in range(0, iv.size): if endingLDF.WALFI[iv.startAddress + c] == 'U': print 'ERROR - A value missed ?! (r)' return iv.value[c] = endingLDF.WALFI[iv.startAddress + c] # Copy the collected values back in the output variables for ov in endingLDF.outputParameters: if ov.registerName == '': for c in range(0, ov.size): if endingLDF.WALFO[ov.startAddress + c] == 'U': print 'ERROR - A value missed ?! (w)' return ov.value[c] = endingLDF.WALFO[ov.startAddress + c] ldfIDs.remove(endingLDF.ID) time += 1 f.close()
def buildLoopIOMemory(myLoopStorage, myTraceFileName): ''' Build I/O parameters from memory. A parameter is defined as a set of bytes adjacent in memory *and* used (R|W) by a same instruction in a loop body. The actual values of these parameters will be set later during the loop data flow graph building. ''' for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # For all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue model = list() for ins in range(0x0, len(myLoop.body)): model.append(list()) model[ins].append(set()) # Read memory addresses model[ins].append(set()) # Written memory addresses writtenAddresses = set() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance based on its starttime found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # carry out the instance length in the trace lengthToJump = \ myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[ instanceCounter].imbricatedInstanceID.append( [k, kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: # Read count = 0x0 for rAddr in myIns.memoryReadAddress: for b in range(0x0, myIns.memoryReadSize[count]): addr = int(rAddr, 16) + b # Not previously written ? if addr not in writtenAddresses: model[insCounter][0x0].add(addr) count += 1 # Write count = 0x0 for wAddr in myIns.memoryWriteAddress: for b in range(0x0, myIns.memoryWriteSize[count]): addr = int(wAddr, 16) + b model[insCounter][1].add(addr) writtenAddresses.add(addr) count += 1 time += 1 insCounter = (insCounter + 1) % len(myLoop.body) if insCounter == 0x0: firsTurn = 0x0 f.close() # Build input variables inputVar = list() for ins in range(0x0, len(myLoop.body)): if len(model[ins][0x0]) != 0x0: var = variable.variable(0x0, 0x0) for addr in range(min(model[ins][0x0]), max(model[ins][0x0]) + 1): if addr in model[ins][0x0]: if var.startAddress == 0x0: var.startAddress = addr var.incrementSize() else: # Close existing var if there is one if var.startAddress != 0x0: inputVar.append(var) var = variable.variable(0x0, 0x0) # Close last one if var.startAddress != 0x0: inputVar.append(var) var = variable.variable(0x0, 0x0) # Build output variables outputVar = list() for ins in range(0x0, len(myLoop.body)): if len(model[ins][1]) != 0x0: var = variable.variable(0x0, 0x0) for addr in range(min(model[ins][1]), max(model[ins][1]) + 1): if addr in model[ins][1]: if var.startAddress == 0x0: var.startAddress = addr var.incrementSize() else: # Close existing var if there is one if var.startAddress != 0x0: outputVar.append(var) var = variable.variable(0x0, 0x0) # Close last one if var.startAddress != 0x0: outputVar.append(var) var = variable.variable(0x0, 0x0) # Deal with doublons # Input vars i = 0x0 while i < len(inputVar): for j in range(i + 1, len(inputVar)): if inputVar[i].contains(inputVar[j]): del inputVar[j] i = -1 break if inputVar[i].intersects(inputVar[j]): inputVar[i] = inputVar[i].merge(inputVar[j]) del inputVar[j] i = -1 break i += 1 # # Output vars i = 0x0 while i < len(outputVar): for j in range(i + 1, len(outputVar)): if outputVar[i].contains(outputVar[j]): del outputVar[j] i = -1 break if outputVar[i].intersects(outputVar[j]): outputVar[i] = outputVar[i].merge(outputVar[j]) del outputVar[j] i = -1 break i += 1 myLoop.instances[instanceCounter].inputMemoryParameters = inputVar myLoop.instances[ instanceCounter].outputMemoryParameters = outputVar
def buildLoopIORegisters(myLoopStorage, myTraceFileName): ''' Build I/O parameters from register. Such parameters are defined as bytes manipulated by a same instruction in a loop body *and* in the same register at this moment. In contrary to memory I/O parameters, we set their values here. ''' for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # for all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue registerInputBytes = dict() # addr -> value registerOutputBytes = dict() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: # print "continue" time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace # What loop ? loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance (could we make the assumption that the key == ID ?) found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 # Look for the instance with the right start time foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # carry out the instance length in the trace lengthToJump = \ myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[ instanceCounter].imbricatedInstanceID.append( [k, kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: # Read count = 0x0 for rReg in myIns.registersRead: countAddr = 0x0 for addr in utilities.registersAddress(rReg): if addr not in registerOutputBytes.keys(): registerInputBytes[addr] = \ (myIns.registersReadValue[count])[countAddr * 2:countAddr * 2 + 2] countAddr += 1 count += 1 # Write count = 0x0 for wReg in myIns.registersWrite: countAddr = 0x0 for addr in utilities.registersAddress(wReg): registerOutputBytes[addr] = \ (myIns.registersWriteValue[count])[countAddr * 2:countAddr * 2 + 2] countAddr += 1 count += 1 time += 1 insCounter = (insCounter + 1) % len(myLoop.body) if insCounter == 0x0: firsTurn = 0x0 f.close() # Input register parameters for reg in utilities.GPR32: # Check which parts of the reg have been used existL = 0x0 # AL, BL... existH = 0x0 # AH, BH... existX = 0x0 # EAX, EBX... if reg + '3' in registerInputBytes.keys(): existL = 1 if reg + '2' in registerInputBytes.keys(): existH = 1 if reg + '1' in registerInputBytes.keys(): existX = 1 if existX: # 32 bytes register var = variable.variable(0x0, 4, reg) var.value[0x0] = registerInputBytes[reg + '0'] var.value[1] = registerInputBytes[reg + '1'] var.value[2] = registerInputBytes[reg + '2'] var.value[3] = registerInputBytes[reg + '3'] myLoop.instances[ instanceCounter].inputRegisterParameters.append(var) elif existH and existL: # 16 bytes register var = variable.variable(0x0, 2, reg[1:]) var.value[0x0] = registerInputBytes[reg + '2'] var.value[1] = registerInputBytes[reg + '3'] myLoop.instances[ instanceCounter].inputRegisterParameters.append(var) elif existH: # 8 bytes register (AH, BH...) var = variable.variable(0x0, 1, reg[1:2] + 'h') var.value[0x0] = registerInputBytes[reg + '2'] myLoop.instances[ instanceCounter].inputRegisterParameters.append(var) elif existL: # 8 bytes register (AL, BL...) var = variable.variable(0x0, 1, reg[1:2] + 'l') var.value[0x0] = registerInputBytes[reg + '3'] myLoop.instances[ instanceCounter].inputRegisterParameters.append(var) # Output register parameters for reg in utilities.GPR32: # Check which parts of the reg have been used existL = 0x0 # AL, BL... existH = 0x0 # AH, BH... existX = 0x0 # EAX, EBX... if reg + '3' in registerOutputBytes.keys(): existL = 1 if reg + '2' in registerOutputBytes.keys(): existH = 1 if reg + '1' in registerOutputBytes.keys(): existX = 1 if existX: # 32 bytes register var = variable.variable(0x0, 4, reg) var.value[0x0] = registerOutputBytes[reg + '0'] var.value[1] = registerOutputBytes[reg + '1'] var.value[2] = registerOutputBytes[reg + '2'] var.value[3] = registerOutputBytes[reg + '3'] myLoop.instances[ instanceCounter].outputRegisterParameters.append(var) elif existH and existL: # 16 bytes register var = variable.variable(0x0, 2, reg[1:]) var.value[0x0] = registerOutputBytes[reg + '2'] var.value[1] = registerOutputBytes[reg + '3'] myLoop.instances[ instanceCounter].outputRegisterParameters.append(var) elif existH: # 8 bytes register (AH, BH...) var = variable.variable(0x0, 1, reg[1:2] + 'h') var.value[0x0] = registerOutputBytes[reg + '2'] myLoop.instances[ instanceCounter].outputRegisterParameters.append(var) elif existL: # 8 bytes register (AL, BL...) var = variable.variable(0x0, 1, reg[1:2] + 'l') var.value[0x0] = registerOutputBytes[reg + '3'] myLoop.instances[ instanceCounter].outputRegisterParameters.append(var)
def buildLoopIOConstants(myLoopStorage, myTraceFileName): ''' Build constant parameters for loops, e.g. 0x42 in MOV EAX, 0x42. In particular, these are only input parameters. It actually only works for 4-byte contants (cf. TODO list). ''' constantAddr = 0x0 for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # For all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue constantSet = set() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: # print "continue" time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace # What loop ? loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance (could we make the assumption that the key == ID ?) found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 # Look for the instance with the right start time foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # Carry out the instance length in the trace lengthToJump = myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[ instanceCounter].imbricatedInstanceID.append( [k, kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: for cte in myIns.constants: if cte not in constantSet: constantSet.add(cte) # Due to the way we represent parameters, we have # to attribute fake addresses to constant parameters. These # adresses need to be unique, in order to not # influence the data-flow building step. var = variable.variable(constantAddr, 4) var.constant = 1 constantAddr += 4 for i in range(0x0, 4): var.value[i] = cte[i * 2:i * 2 + 2] myLoop.instances[ instanceCounter].constantParameter.append(var) time += 1
def detectLoop(myTraceFileName): ''' Recognition of a loop (a word in the language w.w) from the machine instructions in an execution trace. ''' global onGoingLoopsStacks loopStorage = dict() history = executionHistory() f = open(myTraceFileName, 'r') time = 0x0 for line in f: if time != 0x0 and time % 100000 == 0x0: print '100000 lines...' if debugMode == 1: if time != 0x0 and time % 10000 == 0x0: print 'S:10000 lines...' print 'History depth' print len(history.elements) if time != 0x0 and time % 1000 == 0x0: print 'SS:1000 lines...' print 'History depth' print len(history.elements) # history.display() ins = executionTrace.lineConnector(line) if ins is None: time += 1 # time is actually the number of lines (including "API CALL .." lines) continue if debugMode: print '_ _ _ _\n' print '++ Read from trace : ' + str(ins) confirmedLoop = None # The list order is important for p in onGoingLoopsStacks: if not p.empty(): if debugMode: print '++ Test loop stack: ' p.display() if match(loopStorage, ins, p, history, time): confirmedLoop = p break # As soon as we got a confirmed loop we are happy! if confirmedLoop is None: # We test if the current instruction can begin a loop if createLoops(loopStorage, history.possibleLoops(ins), time, history): history.append(ins, time) # We dont append for 1-inst loop else: # Only one confirmed loop at a time, we can clean the others cleanOnGoingLoops(p) time += 1 f.close() if debugMode: print '\nHistory' history.display() return loopStorage
def assignIOValues(graphStorage, myTraceFileName): ''' Collect values for I/O *memory* parameters (registers already have their values) ''' # Init the start and end time for easy research startTimeLDF = dict() # time -> LDF for ldfKey in graphStorage.keys(): currentLDF = graphStorage[ldfKey] if currentLDF.startTime not in startTimeLDF.keys(): startTimeLDF[currentLDF.startTime] = set() startTimeLDF[currentLDF.startTime].add(currentLDF) endTimeLDF = dict() # time -> LDF for ldfKey in graphStorage.keys(): currentLDF = graphStorage[ldfKey] if currentLDF.endTime not in endTimeLDF.keys(): endTimeLDF[currentLDF.endTime] = set() endTimeLDF[currentLDF.endTime].add(currentLDF) # ref # Collect values ldfIDs = set() currentLDF = dict() time = 0 f = open(myTraceFileName, 'r') for line in f: ins = executionTrace.lineConnector(line) if ins is None: time += 1 continue if time in startTimeLDF.keys(): for startingLDF in startTimeLDF[time]: ldfIDs.add(startingLDF.ID) currentLDF[startingLDF.ID] = startingLDF # Build the set of input bytes for iv in startingLDF.inputParameters: # Registers AND constants have already their values if iv.registerName == '' and iv.constant == 0: for c in range(0, iv.size): addr = iv.startAddress + c startingLDF.WALFI[addr] = 'U' # Undefined value # Build the set of output bytes for ov in startingLDF.outputParameters: # Registers have already their values if ov.registerName == '': for c in range(0, ov.size): addr = ov.startAddress + c startingLDF.WALFO[addr] = 'U' # Undefined value if len(ldfIDs) != 0: for currentLDFID in ldfIDs: count = 0 for rAddr in ins.memoryReadAddress: for indexByte in range(0, ins.memoryReadSize[count]): addrReadByte = int(rAddr, 16) + indexByte # int if addrReadByte in currentLDF[currentLDFID].WALFI.keys( ): # First time it is read, it gives its final value if currentLDF[currentLDFID].WALFI[ addrReadByte] == 'U': valueReadByte = (ins.memoryReadValue[count] )[indexByte * 2:indexByte * 2 + 2] currentLDF[currentLDFID].WALFI[ addrReadByte] = valueReadByte count += 1 count = 0 for wAddr in ins.memoryWriteAddress: for indexByte in range(0, ins.memoryWriteSize[count]): addrWriteByte = int(wAddr, 16) + indexByte # int # Each time it is written, it updates its value if addrWriteByte in currentLDF[ currentLDFID].WALFO.keys(): valueWriteByte = ( ins.memoryWriteValue[count])[indexByte * 2:indexByte * 2 + 2] currentLDF[currentLDFID].WALFO[ addrWriteByte] = valueWriteByte count += 1 if time in endTimeLDF.keys(): for endingLDF in endTimeLDF[time]: # Copy the collected values back in the input variables for iv in endingLDF.inputParameters: if iv.registerName == '' and iv.constant == 0: for c in range(0, iv.size): if endingLDF.WALFI[iv.startAddress + c] == 'U': print 'ERROR - A value missed ?! (r)' return iv.value[c] = endingLDF.WALFI[iv.startAddress + c] # Copy the collected values back in the output variables for ov in endingLDF.outputParameters: if ov.registerName == '': for c in range(0, ov.size): if endingLDF.WALFO[ov.startAddress + c] == 'U': print 'ERROR - A value missed ?! (w)' return ov.value[c] = endingLDF.WALFO[ov.startAddress + c] ldfIDs.remove(endingLDF.ID) time += 1 f.close()
def buildLoopIORegisters(myLoopStorage, myTraceFileName): ''' Build I/O parameters from register. Such parameters are defined as bytes manipulated by a same instruction in a loop body *and* in the same register at this moment. In contrary to memory I/O parameters, we set their values here. ''' for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # for all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue registerInputBytes = dict() # addr -> value registerOutputBytes = dict() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: # print "continue" time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace # What loop ? loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance (could we make the assumption that the key == ID ?) found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 # Look for the instance with the right start time foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # carry out the instance length in the trace lengthToJump = \ myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[instanceCounter].imbricatedInstanceID.append([k, kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: # Read count = 0x0 for rReg in myIns.registersRead: countAddr = 0x0 for addr in utilities.registersAddress(rReg): if addr not in registerOutputBytes.keys(): registerInputBytes[addr] = \ (myIns.registersReadValue[count])[countAddr * 2:countAddr * 2 + 2] countAddr += 1 count += 1 # Write count = 0x0 for wReg in myIns.registersWrite: countAddr = 0x0 for addr in utilities.registersAddress(wReg): registerOutputBytes[addr] = \ (myIns.registersWriteValue[count])[countAddr * 2:countAddr * 2 + 2] countAddr += 1 count += 1 time += 1 insCounter = (insCounter + 1) % len(myLoop.body) if insCounter == 0x0: firsTurn = 0x0 f.close() # Input register parameters for reg in utilities.GPR32: # Check which parts of the reg have been used existL = 0x0 # AL, BL... existH = 0x0 # AH, BH... existX = 0x0 # EAX, EBX... if reg + '3' in registerInputBytes.keys(): existL = 1 if reg + '2' in registerInputBytes.keys(): existH = 1 if reg + '1' in registerInputBytes.keys(): existX = 1 if existX: # 32 bytes register var = variable.variable(0x0, 4, reg) var.value[0x0] = registerInputBytes[reg + '0'] var.value[1] = registerInputBytes[reg + '1'] var.value[2] = registerInputBytes[reg + '2'] var.value[3] = registerInputBytes[reg + '3'] myLoop.instances[instanceCounter].inputRegisterParameters.append(var) elif existH and existL: # 16 bytes register var = variable.variable(0x0, 2, reg[1:]) var.value[0x0] = registerInputBytes[reg + '2'] var.value[1] = registerInputBytes[reg + '3'] myLoop.instances[instanceCounter].inputRegisterParameters.append(var) elif existH: # 8 bytes register (AH, BH...) var = variable.variable(0x0, 1, reg[1:2] + 'h') var.value[0x0] = registerInputBytes[reg + '2'] myLoop.instances[instanceCounter].inputRegisterParameters.append(var) elif existL: # 8 bytes register (AL, BL...) var = variable.variable(0x0, 1, reg[1:2] + 'l') var.value[0x0] = registerInputBytes[reg + '3'] myLoop.instances[instanceCounter].inputRegisterParameters.append(var) # Output register parameters for reg in utilities.GPR32: # Check which parts of the reg have been used existL = 0x0 # AL, BL... existH = 0x0 # AH, BH... existX = 0x0 # EAX, EBX... if reg + '3' in registerOutputBytes.keys(): existL = 1 if reg + '2' in registerOutputBytes.keys(): existH = 1 if reg + '1' in registerOutputBytes.keys(): existX = 1 if existX: # 32 bytes register var = variable.variable(0x0, 4, reg) var.value[0x0] = registerOutputBytes[reg + '0'] var.value[1] = registerOutputBytes[reg + '1'] var.value[2] = registerOutputBytes[reg + '2'] var.value[3] = registerOutputBytes[reg + '3'] myLoop.instances[instanceCounter].outputRegisterParameters.append(var) elif existH and existL: # 16 bytes register var = variable.variable(0x0, 2, reg[1:]) var.value[0x0] = registerOutputBytes[reg + '2'] var.value[1] = registerOutputBytes[reg + '3'] myLoop.instances[instanceCounter].outputRegisterParameters.append(var) elif existH: # 8 bytes register (AH, BH...) var = variable.variable(0x0, 1, reg[1:2] + 'h') var.value[0x0] = registerOutputBytes[reg + '2'] myLoop.instances[instanceCounter].outputRegisterParameters.append(var) elif existL: # 8 bytes register (AL, BL...) var = variable.variable(0x0, 1, reg[1:2] + 'l') var.value[0x0] = registerOutputBytes[reg + '3'] myLoop.instances[instanceCounter].outputRegisterParameters.append(var)
def buildLoopIOMemory(myLoopStorage, myTraceFileName): ''' Build I/O parameters from memory. A parameter is defined as a set of bytes adjacent in memory *and* used (R|W) by a same instruction in a loop body. The actual values of these parameters will be set later during the loop data flow graph building. ''' for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # For all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue model = list() for ins in range(0x0, len(myLoop.body)): model.append(list()) model[ins].append(set()) # Read memory addresses model[ins].append(set()) # Written memory addresses writtenAddresses = set() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance based on its starttime found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # carry out the instance length in the trace lengthToJump = \ myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[instanceCounter].imbricatedInstanceID.append([k, kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: # Read count = 0x0 for rAddr in myIns.memoryReadAddress: for b in range(0x0, myIns.memoryReadSize[count]): addr = int(rAddr, 16) + b # Not previously written ? if addr not in writtenAddresses: model[insCounter][0x0].add(addr) count += 1 # Write count = 0x0 for wAddr in myIns.memoryWriteAddress: for b in range(0x0, myIns.memoryWriteSize[count]): addr = int(wAddr, 16) + b model[insCounter][1].add(addr) writtenAddresses.add(addr) count += 1 time += 1 insCounter = (insCounter + 1) % len(myLoop.body) if insCounter == 0x0: firsTurn = 0x0 f.close() # Build input variables inputVar = list() for ins in range(0x0, len(myLoop.body)): if len(model[ins][0x0]) != 0x0: var = variable.variable(0x0, 0x0) for addr in range(min(model[ins][0x0]), max(model[ins][0x0]) + 1): if addr in model[ins][0x0]: if var.startAddress == 0x0: var.startAddress = addr var.incrementSize() else: # Close existing var if there is one if var.startAddress != 0x0: inputVar.append(var) var = variable.variable(0x0, 0x0) # Close last one if var.startAddress != 0x0: inputVar.append(var) var = variable.variable(0x0, 0x0) # Build output variables outputVar = list() for ins in range(0x0, len(myLoop.body)): if len(model[ins][1]) != 0x0: var = variable.variable(0x0, 0x0) for addr in range(min(model[ins][1]), max(model[ins][1]) + 1): if addr in model[ins][1]: if var.startAddress == 0x0: var.startAddress = addr var.incrementSize() else: # Close existing var if there is one if var.startAddress != 0x0: outputVar.append(var) var = variable.variable(0x0, 0x0) # Close last one if var.startAddress != 0x0: outputVar.append(var) var = variable.variable(0x0, 0x0) # Deal with doublons # Input vars i = 0x0 while i < len(inputVar): for j in range(i + 1, len(inputVar)): if inputVar[i].contains(inputVar[j]): del inputVar[j] i = -1 break if inputVar[i].intersects(inputVar[j]): inputVar[i] = inputVar[i].merge(inputVar[j]) del inputVar[j] i = -1 break i += 1 # # Output vars i = 0x0 while i < len(outputVar): for j in range(i + 1, len(outputVar)): if outputVar[i].contains(outputVar[j]): del outputVar[j] i = -1 break if outputVar[i].intersects(outputVar[j]): outputVar[i] = outputVar[i].merge(outputVar[j]) del outputVar[j] i = -1 break i += 1 myLoop.instances[instanceCounter].inputMemoryParameters = inputVar myLoop.instances[instanceCounter].outputMemoryParameters = outputVar
def buildLoopIOConstants(myLoopStorage, myTraceFileName): ''' Build constant parameters for loops, e.g. 0x42 in MOV EAX, 0x42. In particular, these are only input parameters. It actually only works for 4-byte contants (cf. TODO list). ''' constantAddr = 0x0 for k in myLoopStorage.keys(): myLoop = myLoopStorage[k] # For all instances of the loop for instanceCounter in myLoop.instances.keys(): # Only take into account the valid instances if myLoop.instances[instanceCounter].valid == 0x0: continue constantSet = set() insCounter = 0x0 time = myLoop.instances[instanceCounter].startTime f = open(myTraceFileName, 'r') lineCounter = 1 # Go to the first line while lineCounter != time + 1: f.readline() lineCounter += 1 firsTurn = 1 while time <= myLoop.instances[instanceCounter].endTime: myIns = executionTrace.lineConnector(f.readline()) if myIns is None: # print "continue" time += 1 continue # Jump over nested loops # The nested loop can turn a different number of times at each turn of the big loop if str(myLoop.body[insCounter]).startswith('+L'): # Goal : move the time over the loop in the trace # What loop ? loopId = int(str(myLoop.body[insCounter])[2:]) # Look for the associated instance (could we make the assumption that the key == ID ?) found = 0x0 for k in myLoopStorage.keys(): if myLoopStorage[k].ID == loopId: found = 1 # Look for the instance with the right start time foundBis = 0x0 for kk in myLoopStorage[k].instances.keys(): if myLoopStorage[k].instances[kk].startTime \ == time: # Carry out the instance length in the trace lengthToJump = myLoopStorage[k].instances[kk].endTime \ - myLoopStorage[k].instances[kk].startTime + 1 foundBis = 1 myLoop.instances[instanceCounter].imbricatedInstanceID.append([k,kk]) break if foundBis == 0x0: print 'Fail to find the instance!' print 'Loop ' + str(myLoop.ID) print 'We look for ' + str(loopId) \ + ' at time ' + str(time) return if found == 0x0: print 'Fail to find the loop !!!' return time += lengthToJump lineCounter = 0x0 # Go to the first line while lineCounter != lengthToJump - 1: f.readline() lineCounter += 1 else: for cte in myIns.constants: if cte not in constantSet: constantSet.add(cte) # Due to the way we represent parameters, we have # to attribute fake addresses to constant parameters. These # adresses need to be unique, in order to not # influence the data-flow building step. var = variable.variable(constantAddr, 4) var.constant = 1 constantAddr += 4 for i in range(0x0, 4): var.value[i] = cte[i * 2:i * 2 + 2] myLoop.instances[instanceCounter].constantParameter.append(var) time += 1