def recordDefaults(learner,gradAccum,Y,P,**kw): """A default status message.""" gradAccum.counter['n'] = mutil.numRows(Y) Tracer._record(gradAccum, Tracer.identification(learner,kw) + Tracer.loss(learner,Y,P,kw) + Tracer.timing(learner,kw))
def defaultPlusAcc(learner,gradAccum,Y,P,**kw): """A default status message.""" gradAccum.counter['n'] = mutil.numRows(Y) Tracer._announce(gradAccum, Tracer.identification(learner,kw) + Tracer.loss(learner,Y,P,kw) + Tracer.accuracy(learner,Y,P,kw) + Tracer.timing(learner,kw))
def shuffle(self): for mode in self.xDict: shuffledRowNums = NP.arange(mutil.numRows(self.xDict[mode])) NR.shuffle(shuffledRowNums) self.xDict[mode] = mutil.shuffleRows(self.xDict[mode], shuffledRowNums) self.yDict[mode] = mutil.shuffleRows(self.yDict[mode], shuffledRowNums)
def train(self,mode,X,Y): trainStartTime = time.time() for i in range(self.epochs): startTime = time.time() n = mutil.numRows(X) args = {'i':i,'startTime':startTime} paramGrads = self.crossEntropyGrad(mode,X,Y,tracerArgs=args) self.regularizer.regularizeParams(self.prog,n) self.applyUpdate(paramGrads,self.rate)
def datasetCrossEntropy(goldDset,predictedDset,perExample=True): """ Return cross entropy on a dataset. """ result = 0.0 for mode in goldDset.modesToLearn(): assert predictedDset.hasMode(mode), "CrossEntropy: Mode '%s' not available in predictedDset" % mode Y = goldDset.getY(mode) P = predictedDset.getY(mode) divisor = mutil.numRows(Y) if perExample else 1.0 result += Learner.crossEntropy(Y,P,perExample=False)/divisor return result
def datasetAccuracy(goldDset,predictedDset): """ Return accuracy on a dataset relative to gold labels. """ weightedSum = 0.0 totalWeight = 0.0 for mode in goldDset.modesToLearn(): assert predictedDset.hasMode(mode), "Accuracy: Mode '%s' not available in predictedDset" % mode Y = goldDset.getY(mode) P = predictedDset.getY(mode) weight = mutil.numRows(Y) weightedSum += weight * Learner.accuracy(Y,P) totalWeight += weight if totalWeight == 0: return 0 return weightedSum/totalWeight
def accuracy(Y,P): """Evaluate accuracy of predictions P versus labels Y.""" #TODO surely there's a better way of doing this def allZerosButArgmax(d): result = NP.zeros_like(d) result[d.argmax()] = 1.0 return result n = mutil.numRows(P) ok = 0.0 for i in range(n): pi = P.getrow(i) yi = Y.getrow(i) ti = mutil.mapData(allZerosButArgmax,pi) ok += yi.multiply(ti).sum() return ok/n
def train(self,dset): trainStartTime = time.time() modes = dset.modesToLearn() numModes = len(modes) for i in range(self.epochs): startTime = time.time() epochCounter = GradAccumulator.counter() for j,mode in enumerate(dset.modesToLearn()): n = mutil.numRows(dset.getX(mode)) args = {'i':i,'startTime':startTime,'mode':str(mode)} try: paramGrads = self.crossEntropyGrad(mode,dset.getX(mode),dset.getY(mode),tracerArgs=args) self.regularizer.regularizeParams(self.prog,n) self.applyUpdate(paramGrads,self.rate) GradAccumulator.accumToCounter(epochCounter,paramGrads.counter) except: print "Unexpected error at %s:" % str(args), sys.exc_info()[:2] raise self.epochTracer(self,epochCounter,i=i,startTime=trainStartTime)
def train(self,dset): trainStartTime = time.time() modes = dset.modesToLearn() n = len(modes) for i in range(self.epochs): startTime = time.time() epochCounter = GradAccumulator.counter() k = 0 for (mode,X,Y) in dset.minibatchIterator(batchSize=self.miniBatchSize): n = mutil.numRows(X) k = k+1 args = {'i':i,'k':k,'startTime':startTime,'mode':mode} try: paramGrads = self.crossEntropyGrad(mode,X,Y,tracerArgs=args) self.regularizer.regularizeParams(self.prog,n) self.applyUpdate(paramGrads,self.rate) GradAccumulator.accumToCounter(epochCounter,paramGrads.counter) except: print "Unexpected error at %s:" % str(args), sys.exc_info()[:2] raise self.epochTracer(self,epochCounter,i=i,startTime=trainStartTime)
def minibatchIterator(self, batchSize=100, shuffleFirst=True): """Iterate over triples (mode,X',Y') where X' and Y' are sets of batchSize rows from the full data for mode, randomly selected (without replacement) from the dataset.""" # randomize the order of the examples if shuffleFirst: self.shuffle() # then sample an ordering of the modes modeList = self.modesToLearn() modeSampleDict = {} for modeIndex, mode in enumerate(modeList): numBatches = int( math.ceil(mutil.numRows(self.getX(mode)) / float(batchSize))) modeSampleDict[mode] = NP.ones(numBatches, dtype='int') * modeIndex modeSamples = NP.concatenate(modeSampleDict.values()) NR.shuffle(modeSamples) # finally produce the minibatches currentOffset = [0] * len(modeList) for modeIndex in modeSamples: mode = modeList[modeIndex] lo = currentOffset[modeIndex] bX = mutil.selectRows(self.getX(mode), lo, lo + batchSize) bY = mutil.selectRows(self.getY(mode), lo, lo + batchSize) currentOffset[modeIndex] += batchSize yield mode, bX, bY
def crossEntropy(Y,P,perExample=False): """Compute cross entropy some predications relative to some labels.""" logP = mutil.mapData(NP.log,P) result = -(Y.multiply(logP).sum()) return result/mutil.numRows(Y) if perExample else result
def cheap(learner,gradAccum,Y,P,**kw): """Easy-to-compute status message.""" gradAccum.counter['n'] = mutil.numRows(Y) Tracer._announce(gradAccum, Tracer.identification(learner,kw) + Tracer.timing(learner,kw))
def silent(learner,gradAccum,Y,P,**kw): """No output.""" gradAccum.counter['n'] = mutil.numRows(Y) pass
def _doBackpropTask(task): """ Use the workerLearner """ (mode, X, Y, args) = task paramGrads = workerLearner.crossEntropyGrad(mode, X, Y, tracerArgs=args) return (mutil.numRows(X), paramGrads)
def totalNumExamples(self, miniBatches): """The total nummber of examples in all the miniBatches""" return sum(mutil.numRows(X) for (mode, X, Y) in miniBatches)
def _doEval(self, db, values, pad): return db.zeros(mutil.numRows(values[0]), self.outputType)