def __init__(self, data, dimsKeep): print datetime.now(), 'starting PCA...' #self.pca = PCA(data) self.pca = cached(PCA, data) print datetime.now(), 'done with PCA.' self.dimsKeep = dimsKeep
def getData(self, patchSize, number, seed=None): patches = cached(loadCS294Images, patchSize=patchSize, number=number, seed=seed) return patches
def getData(self, patchSize, number, seed = None): samples, labelMatrix, labelStrings = cached(makeUpsonRovio3.randomSampleMatrixWithLabels, makeUpsonRovio3.trainFilter, color = (self.colors == 3), Nw = patchSize, Nsamples = number, seed = seed, imgDirectory = '../data/upson_rovio_3/imgfiles') return samples.T # one example per column
def plotCov(data, saveDir = None, prefix = 'imgdata', show = False): cv = cached(cov, data) #cv = cov(data) saveto = os.path.join(saveDir, '%s_cov.png' % prefix) if saveDir else None image = pil_imagesc(cv, saveto = saveto, show = False) if show: image.show() return cv
def plotCov(data, saveDir=None, prefix='imgdata', show=False): cv = cached(cov, data) #cv = cov(data) saveto = os.path.join(saveDir, '%s_cov.png' % prefix) if saveDir else None image = pil_imagesc(cv, saveto=saveto, show=False) if show: image.show() return cv
def getData(self, patchSize, number, seed=None): samples, labelMatrix, labelStrings = cached( makeUpsonRovio3.randomSampleMatrixWithLabels, makeUpsonRovio3.trainFilter, color=(self.colors == 3), Nw=patchSize, Nsamples=number, seed=seed, imgDirectory='../data/upson_rovio_3/imgfiles') return samples.T # one example per column
def getBigAndSmallerSamples(fileFilter, whitener, seed, isColor, Nw, Nwshift, Nsamples): # Sample 1.5x larger windows (e.g. 15 px on a side). This is for # patches that overlap half with their # neighbors. largeSamples.shape is (Nsamples, Nw15^2) NwLarge = Nw + Nwshift largeSampleMatrix, labelMatrix, labelStrings = cached(randomSampleMatrixWithLabels, fileFilter, seed, isColor, Nw = NwLarge, Nsamples = Nsamples) stackedSmall = large2StackedSmall(largeSampleMatrix, Nw, Nwshift) stackedSmallWhite,junk = whitener.raw2normalized(stackedSmall.T) # dataWhite.shape = (Nw^2, 4*Nsamples). Now with one example per COLUMN! return largeSampleMatrix.T, stackedSmall.T, stackedSmallWhite, labelMatrix.T, labelStrings
def getDataAndLabels(self, patchSize, number, seed = None): approxMbCachefile = prod(patchSize) * number * 4.0 / 1000000 if approxMbCachefile < MAX_CACHE_SIZE_MB: print 'NYU2_Labeled layer:: Predicted approx size of %f MB, using cache' % (approxMbCachefile) patches, labels = cached(loadNYU2Data, patchSize = patchSize, number = number, rgbColors = self.colorChannels, depthChannels = self.depthChannels, seed = seed) else: # Skip cache, just run print 'NYU2_Labeled layer:: Skipping cache, approx size of %f MB > max cache size of %s MB' % (approxMbCachefile, repr(MAX_CACHE_SIZE_MB)) patches, labels = loadNYU2Data(patchSize = patchSize, number = number, rgbColors = self.colorChannels, depthChannels = self.depthChannels, seed = seed) return patches, labels
def testPca(): random.seed(0) NN = 5000 dim = 200 data = random.randn(NN, dim) pca = cached(PCA, data) #pca = PCA(data) pc = pca.pc() asPc = pca.toPC(data) dataWhite = pca.toZca(data, epsilon=1e-6)
def testPca(): random.seed(0) NN = 5000 dim = 200 data = random.randn(NN,dim) pca = cached(PCA, data) #pca = PCA(data) pc = pca.pc() asPc = pca.toPC(data) dataWhite = pca.toZca(data, epsilon = 1e-6)
def getDataAndLabels(self, patchSize, number, seed=None): approxMbCachefile = prod(patchSize) * number * 4.0 / 1000000 if approxMbCachefile < MAX_CACHE_SIZE_MB: print 'NYU2_Labeled layer:: Predicted approx size of %f MB, using cache' % ( approxMbCachefile) patches, labels = cached(loadNYU2Data, patchSize=patchSize, number=number, rgbColors=self.colorChannels, depthChannels=self.depthChannels, seed=seed) else: # Skip cache, just run print 'NYU2_Labeled layer:: Skipping cache, approx size of %f MB > max cache size of %s MB' % ( approxMbCachefile, repr(MAX_CACHE_SIZE_MB)) patches, labels = loadNYU2Data(patchSize=patchSize, number=number, rgbColors=self.colorChannels, depthChannels=self.depthChannels, seed=seed) return patches, labels
def main(): resman.start('junk', diary = False) stica = loadFromPklGz('results/130407_132841_76b6586_rapidhacks_upson3_1c_l2_first/stackedTica_mod.pkl.gz') layer1Whitener = loadFromPklGz('../data/upson_rovio_3/white/train_10_50000_1c.whitener.pkl.gz') layerSizePlan = [10, 15, 23, 35, 53, 80, 120, 180] visLayer = 1 largeSampleMatrix, labelMatrix, labelStrings = cached(randomSampleMatrixWithLabels, trainFilter, seed = 0, color = False, Nw = layerSizePlan[visLayer], Nsamples = 50000) seed = 0 Nw = layerSizePlan[visLayer-1] # e.g. 10 Nwbig = layerSizePlan[visLayer] # e.g. 15 Nwshift = Nwbig - Nw # e.g. 15 - 10 = 5 Nsamples = 1000 temp = getBigAndSmallerSamples(trainFilter, layer1Whitener, seed, False, Nw, Nwshift, Nsamples) largeSampleMatrix, stackedSmall, stackedSmallWhite, labelMatrix, labelStrings = temp pooled = stica.getRepresentation(largeSampleMatrix) plotTopActivations(pooled, largeSampleMatrix, (Nwbig,Nwbig), resman.rundir, nActivations = 50, nSamples = 20) pl = (pooled.T - pooled.mean(1)).T for ii in range(len(labelStrings)): print 'finding top for', labelStrings[ii] if labelMatrix[ii,:].sum() == 0: print ' skipping, no examples' continue avgActivationForClass = (pl * labelMatrix[ii,:]).mean(1) sortIdx = argsort(avgActivationForClass) topNeurons = sortIdx[-1:-(50+1):-1] plotTopActivations(pooled[topNeurons,:], largeSampleMatrix, (Nwbig,Nwbig), resman.rundir, nActivations = 50, nSamples = 20, prefix = 'topfor_%s' % labelStrings[ii]) resman.stop()
def learnNextLayer(self, params): nLayers = len(self.ticas) print 'StackedTica currently has %d layers, learning next' % nLayers # TODO: only works for one extra layer! Nw = 10 Nwshift = 5 Nsamples = 50000 #Nsamples = 1000; print 'HACK!' # Get data and norm it nextLayerData = cached(makeNewData, self.ticas[-1], self.l1whitener, seed = 0, isColor = self.isColor, Nw = Nw, Nwshift = Nwshift, Nsamples = Nsamples) colNorms = sqrt(sum(nextLayerData**2, 0) + (1e-8)) nextLayerData = nextLayerData / colNorms # Parameters if params['dataCrop']: print '\nWARNING: Cropping data from %d examples to only %d for debug\n' % (nextLayerData.shape[1], params['dataCrop']) nextLayerData = nextLayerData[:,:params['dataCrop']] if params['hiddenISize'] != params['hiddenJSize']: raise Exception('hiddenISize and hiddenJSize must be the same') hiddenLayerShape = (params['hiddenISize'], params['hiddenJSize']) neighborhoodParams = ('gaussian', params['neighborhoodSize'], 0, 0) if self.saveDir: layerLogDir = os.path.join(self.saveDir, 'layer_%02d' % (nLayers+1)) os.makedirs(layerLogDir) else: layerLogDir = '' # Print/plot data stats if layerLogDir: pseudoImgShape = (int(sqrt(nextLayerData.shape[0])), int(sqrt(nextLayerData.shape[0]))) plotImageData(nextLayerData, pseudoImgShape, layerLogDir, pc('data_raw')) printDataStats(nextLayerData) # Learn model tica = TICA(nInputs = nextLayerData.shape[0], hiddenLayerShape = hiddenLayerShape, neighborhoodParams = neighborhoodParams, lambd = params['lambd'], epsilon = 1e-5, saveDir = layerLogDir) manuallySkipExpensivePart = False if not manuallySkipExpensivePart: beginTotalCost, beginPoolingCost, beginReconstructionCost, grad = tica.cost(tica.WW, nextLayerData) tic = time.time() tica.learn(nextLayerData, maxFun = params['maxFuncCalls']) execTime = time.time() - tic if layerLogDir: saveToFile(os.path.join(layerLogDir, 'tica.pkl.gz'), tica) # save learned model endTotalCost, endPoolingCost, endReconstructionCost, grad = tica.cost(tica.WW, nextLayerData) print 'beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime =' print [beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime] else: pdb.set_trace() # Plot some results #plotImageRicaWW(tica.WW, imgShape, saveDir, tileShape = hiddenLayerShape, prefix = pc('WW_iterFinal')) if layerLogDir: self.plotResults(layerLogDir, tica, nextLayerData, pseudoImgShape, hiddenLayerShape) self.ticas.append(tica)
def getData(self, patchSize, number, seed = None): patches = cached(loadCS294Images, patchSize = patchSize, number = number, seed = seed) return patches