def getMaxLoopSizeByIndex(self, index): return { 0: RowCol(self.Y, self.X), 1: self.kern, 2: self.fm, 3: self.dx, 4: self.dy }[index]
def nextDivisor(curr, maxi): if type(curr) == RowCol: return RowCol(curr.row, nextDivisor(curr.col, maxi.col)) else: for i in xrange(maxi, 0, -1): t = ceil(maxi/float(i)) if t > curr: return int(t)
def getMaxLoopSize(self, loopType): return { LoopType.fm: self.fm, LoopType.kern: self.kern, LoopType.rowcol: RowCol(self.Y, self.X), LoopType.dx: self.dx, LoopType.dy: self.dy }[loopType]
def copyTilingBest(tile): best = [] for loop in tile: if (loop.type == LoopType.rowcol): best.append( Loop(loop.type, RowCol(loop.size.row, loop.size.col), loop.pragma)) else: best.append(Loop(loop.type, loop.size, loop.pragma)) return best
def permutate(self, tiling): usedTiles = [] archTile = [x.type for x in self.hwTemplate.archP] ptile = tiling + [ Loop(x, 1, Pragma.n) for x in LoopType if (x != LoopType.dx and x != LoopType.dy) ] for ctile in itertools.permutations(ptile): # skip permutations where two neighbours have the same loop type fctile = list(archTile) + [x.type for x in list(ctile)] if any([x == y for x, y in zip(fctile, fctile[1:])]): continue if fctile in usedTiles: continue else: usedTiles.append(fctile) curtile = list(ctile) + [ Loop(x, 1, Pragma.n) for x in LoopType if (x != LoopType.dx and x != LoopType.dy) ] # init by putting all data to DRAM for index in xrange(len(curtile)): loopType = curtile[index].type right = [g.type for g in curtile[index + 1:]] if loopType not in right: curtile[index].size = self.layer.getMaxLoopSize(loopType) else: if curtile[index].type == LoopType.rowcol: curtile[index].size = RowCol(1, 1) else: curtile[index].size = 1 # print "New Tile" # printTile(curtile,1) # print "hwTemaplate" # printTile(self.hwTemplate.archU,1) # printTile(self.hwTemplate.archP,1) yield OptArgs(curtile, self.hwTemplate, self.layer, self.energyModel, self.hwRestrictions)
def initUsedData(self, left, loop): #scan all the data to the left to find the amount of each data type being used self.usedData = [1] * len(LoopType) for i in LoopType: if (i == LoopType.rowcol): self.usedData[i.value] = RowCol(1, 1) for i in LoopType: rawData = [x for x in left if x.type == i] if len(rawData) > 0: if (i == LoopType.rowcol): self.usedData[i.value].row = rawData[-1].size.row self.usedData[i.value].col = rawData[-1].size.col else: self.usedData[i.value] = rawData[-1].size return self.usedData
def __init__(self): super(Eyeriss, self).__init__( "Eyeriss", [Loop(LoopType.dy, 1, Pragma.u), Loop(LoopType.rowcol, RowCol(1,1), Pragma.u), Loop(LoopType.kern, 1, Pragma.u)], [Loop(LoopType.rowcol, RowCol(1,1), Pragma.p), Loop(LoopType.dx, 1, Pragma.p), Loop(LoopType.fm, 1, Pragma.p)], )
def __init__(self): super(CNPfm, self).__init__( "CNPfm", [Loop(LoopType.dx, 1, Pragma.u), Loop(LoopType.dy, 1, Pragma.u), Loop(LoopType.fm, 1, Pragma.u)], [Loop(LoopType.rowcol, RowCol(1,1), Pragma.p)], skip = [Loop(LoopType.fm, 1, Pragma.s)] )
def __init__(self): # include fm? super(DNNweaver, self).__init__("DNNweaver", [Loop(LoopType.rowcol, RowCol(1,1), Pragma.u), Loop(LoopType.kern, 1, Pragma.u)], [Loop(LoopType.dx, 1, Pragma.p), Loop(LoopType.dy, 1, Pragma.p), Loop(LoopType.rowcol, RowCol(1,1), Pragma.p)], skip = [Loop(LoopType.kern, 1, Pragma.s), Loop(LoopType.fm, 1, Pragma.s)])
def OptimizeTile(args): tiling = copy.deepcopy(args.tiling) hwTemplate = copy.deepcopy(args.hwTemplate) layer = args.layer energyModel = args.energyModel hwRestrictions = args.hwRestrictions tiledArch = hwTemplate.archP + list(tiling) printTile(tiledArch, 2) buffBest = hwTemplate.calcBuffers(tiling, layer) energyBest = hwTemplate.calcEnergy(buffBest, energyModel, layer) tilingBest = copyTilingBest(hwTemplate.archU + hwTemplate.archP + list(tiling)) if not buffFitMem(buffBest, hwRestrictions): return tilingBest, buffBest, energyBest loopIndexByType = [] for loop in LoopType: loopIndexByType.append( [i for i, x in enumerate(tiledArch) if x.type == loop]) iterationCounter = 0 # t2 = time.clock() # Pragma.u loops alwasy occupy all possible MACs hwTemplate.MACRestrictions(layer) for ALUlist in hwTemplate.ALUperm(hwTemplate.numU, hwRestrictions.MAC, hwRestrictions.MAC): hwTemplate.setALU(ALUlist) # find minimal value for each loop type from archU minLoopValues = [1] * len(LoopType) for i in LoopType: if i == LoopType.rowcol: minLoopValues[i.value] = RowCol(1, 1) break for loop in hwTemplate.archU: if (loop.type == LoopType.rowcol): minLoopValues[loop.type.value] = RowCol( loop.size.row, loop.size.col) else: minLoopValues[loop.type.value] = loop.size minLoopValues[LoopType.dx.value] = layer.dx minLoopValues[LoopType.dy.value] = layer.dy # update initial state of tiledArch to be minLoopValue except of 3 last loops, which have the max value for i, loop in enumerate(tiledArch[0:-3]): if loop.type == LoopType.rowcol: tiledArch[i].size.row = minLoopValues[loop.type.value].row tiledArch[i].size.col = minLoopValues[loop.type.value].col else: tiledArch[i].size = minLoopValues[loop.type.value] # for i, archLoop in enumerate(tiledArch): # if archLoop.type == loop.type: # if archLoop.type in [x.type for x in tiledArch[i+1:]]: # tiledArch[i].size = loop.size # print "ALUperm", iterationCounter, " took: ", time.clock() - t2 # printTile(self.hwTemplate.archU + tiledArch, 1) # t2 = time.clock() currGroup = 0 # Main loop # Exit condition: all loops reach maximum, i.e. the last group overflows while currGroup < len(LoopType) - 2: currGroupSliceIndexes = loopIndexByType[currGroup] res, currLoopIndex = incrementTiling( tiledArch, currGroupSliceIndexes, layer.getMaxLoopSizeByIndex(currGroup), minLoopValues[currGroup]) if res == IncTileResult.LoopOverflow: currGroup += 1 continue if res == IncTileResult.LoopIncremented: iterationCounter += 1 hwTemplate.archP = tiledArch[:len(hwTemplate.archP)] currBuff = hwTemplate.calcBuffers( tiledArch[len(hwTemplate.archP):], layer) if not buffFitMem(currBuff, hwRestrictions): updateTilingOnBuffOverflow( tiledArch, loopIndexByType[0:currGroup + 1], currLoopIndex) currGroup = 0 continue currEnergy = hwTemplate.calcEnergy(currBuff, energyModel, layer) # printTile(hwTemplate.archU +tiledArch, 3, currEnergy, sum(currBuff.Bin)+sum(currBuff.Bkern)+sum(currBuff.Bout)) # print currEnergy if currEnergy < energyBest: energyBest = currEnergy del buffBest buffBest = currBuff tilingBest = copyTilingBest(hwTemplate.archU + tiledArch) else: del currBuff currGroup = 0 continue print "Total iterations ", iterationCounter # printTile(tilingBest, 2) # print buffBest # print energyBest return tilingBest, buffBest, energyBest