def __init__(self, boardSize, convSize, numFeatureMaps, **args): inputdim = 2 FeedForwardNetwork.__init__(self, **args) inlayer = LinearLayer(inputdim * boardSize * boardSize, name='in') self.addInputModule(inlayer) # we need some treatment of the border too - thus we pad the direct board input. x = convSize / 2 insize = boardSize + 2 * x if convSize % 2 == 0: insize -= 1 paddedlayer = LinearLayer(inputdim * insize * insize, name='pad') self.addModule(paddedlayer) # we connect a bias to the padded-parts (with shared but trainable weights). bias = BiasUnit() self.addModule(bias) biasConn = MotherConnection(inputdim) paddable = [] if convSize % 2 == 0: xs = range(x) + range(insize - x + 1, insize) else: xs = range(x) + range(insize - x, insize) paddable.extend(crossproduct([range(insize), xs])) paddable.extend(crossproduct([xs, range(x, boardSize + x)])) for (i, j) in paddable: self.addConnection( SharedFullConnection(biasConn, bias, paddedlayer, outSliceFrom=(i * insize + j) * inputdim, outSliceTo=(i * insize + j + 1) * inputdim)) for i in range(boardSize): inmod = ModuleSlice(inlayer, outSliceFrom=i * boardSize * inputdim, outSliceTo=(i + 1) * boardSize * inputdim) outmod = ModuleSlice(paddedlayer, inSliceFrom=((i + x) * insize + x) * inputdim, inSliceTo=((i + x) * insize + x + boardSize) * inputdim) self.addConnection(IdentityConnection(inmod, outmod)) self._buildStructure(inputdim, insize, paddedlayer, convSize, numFeatureMaps) self.sortModules()
class DiscreteBalanceTaskRBF(DiscreteBalanceTask): """ From Lagoudakis & Parr, 2003: With RBF features to generate a 10-dimensional observation (including bias), also no cart-restrictions, no helpful rewards, and a single pole. """ CENTERS = array(crossproduct([[-pi/4, 0, pi/4], [1, 0, -1]])) def getReward(self): angles = list(map(abs, self.env.getPoleAngles())) if max(angles) > 1.6: reward = -1. else: reward = 0.0 return reward def isFinished(self): if max(list(map(abs, self.env.getPoleAngles()))) > 1.6: return True elif self.t >= self.N: return True return False def getObservation(self): res = ones(1+len(self.CENTERS)) sensors = self.env.getSensors()[:-2] res[1:] = exp(-array(list(map(norm, self.CENTERS-sensors)))**2/2) return res @property def outdim(self): return 1+len(self.CENTERS)
class DiscreteDoubleBalanceTaskRBF(DiscreteBalanceTaskRBF): """ Same idea, but two poles. """ CENTERS = array(crossproduct([[-pi/4, 0, pi/4], [1, 0, -1]]*2)) def __init__(self, env=None, maxsteps=1000): if env == None: env = DoublePoleEnvironment() DiscreteBalanceTask.__init__(self, env, maxsteps)
def __init__(self, boardSize, convSize, numFeatureMaps, **args): inputdim = 2 FeedForwardNetwork.__init__(self, **args) inlayer = LinearLayer(inputdim*boardSize*boardSize, name = 'in') self.addInputModule(inlayer) # we need some treatment of the border too - thus we pad the direct board input. x = convSize/2 insize = boardSize+2*x if convSize % 2 == 0: insize -= 1 paddedlayer = LinearLayer(inputdim*insize*insize, name = 'pad') self.addModule(paddedlayer) # we connect a bias to the padded-parts (with shared but trainable weights). bias = BiasUnit() self.addModule(bias) biasConn = MotherConnection(inputdim) paddable = [] if convSize % 2 == 0: xs = range(x)+range(insize-x+1, insize) else: xs = range(x)+range(insize-x, insize) paddable.extend(crossproduct([range(insize), xs])) paddable.extend(crossproduct([xs, range(x, boardSize+x)])) for (i, j) in paddable: self.addConnection(SharedFullConnection(biasConn, bias, paddedlayer, outSliceFrom = (i*insize+j)*inputdim, outSliceTo = (i*insize+j+1)*inputdim)) for i in range(boardSize): inmod = ModuleSlice(inlayer, outSliceFrom = i*boardSize*inputdim, outSliceTo = (i+1)*boardSize*inputdim) outmod = ModuleSlice(paddedlayer, inSliceFrom = ((i+x)*insize+x)*inputdim, inSliceTo = ((i+x)*insize+x+boardSize)*inputdim) self.addConnection(IdentityConnection(inmod, outmod)) self._buildStructure(inputdim, insize, paddedlayer, convSize, numFeatureMaps) self.sortModules()
def _permsForSwiping(self): """Return the correct permutations of blocks for all swiping direction. """ # We use an identity permutation to generate the permutations from by # slicing correctly. identity = scipy.array(list(range(self.sequenceLength))) identity.shape = tuple(s // b for s, b in zip(self.shape, self.blockshape)) permutations = [] # Loop over all possible directions: from each corner to each corner for direction in crossproduct([("+", "-")] * self.timedim): axises = [] for _, axisdir in enumerate(direction): # Use a normal complete slice for forward... if axisdir == "+": indices = slice(None, None, 1) # ...and a reversed complete slice for backward else: indices = slice(None, None, -1) axises.append(indices) permutations.append(operator.getitem(identity, axises).flatten()) return permutations
def _permsForSwiping(self): """Return the correct permutations of blocks for all swiping direction. """ # We use an identity permutation to generate the permutations from by # slicing correctly. identity = scipy.array(range(self.sequenceLength)) identity.shape = tuple(s / b for s, b in zip(self.shape, self.blockshape)) permutations = [] # Loop over all possible directions: from each corner to each corner for direction in crossproduct([('+', '-')] * self.timedim): axises = [] for _, axisdir in enumerate(direction): # Use a normal complete slice for forward... if axisdir == '+': indices = slice(None, None, 1) # ...and a reversed complete slice for backward else: indices = slice(None, None, -1) axises.append(indices) permutations.append(operator.getitem(identity, axises).flatten()) return permutations