def __init__(self, data, dimsKeep):
        print datetime.now(), 'starting PCA...'
        #self.pca = PCA(data)
        self.pca = cached(PCA, data)
        print datetime.now(), 'done with PCA.'

        self.dimsKeep = dimsKeep
Exemplo n.º 2
0
    def __init__(self, data, dimsKeep):
        print datetime.now(), 'starting PCA...'
        #self.pca = PCA(data)
        self.pca = cached(PCA, data)
        print datetime.now(), 'done with PCA.'

        self.dimsKeep = dimsKeep
Exemplo n.º 3
0
    def getData(self, patchSize, number, seed=None):
        patches = cached(loadCS294Images,
                         patchSize=patchSize,
                         number=number,
                         seed=seed)

        return patches
    def getData(self, patchSize, number, seed = None):
        samples, labelMatrix, labelStrings = cached(makeUpsonRovio3.randomSampleMatrixWithLabels,
                                                    makeUpsonRovio3.trainFilter,
                                                    color = (self.colors == 3),
                                                    Nw = patchSize, Nsamples = number, seed = seed,
                                                    imgDirectory = '../data/upson_rovio_3/imgfiles')

        return samples.T    # one example per column
def plotCov(data, saveDir = None, prefix = 'imgdata', show = False):
    cv = cached(cov, data)
    #cv = cov(data)
    saveto = os.path.join(saveDir, '%s_cov.png' % prefix) if saveDir else None
    image = pil_imagesc(cv, saveto = saveto, show = False)
    if show:
        image.show()
    return cv
def plotCov(data, saveDir=None, prefix='imgdata', show=False):
    cv = cached(cov, data)
    #cv = cov(data)
    saveto = os.path.join(saveDir, '%s_cov.png' % prefix) if saveDir else None
    image = pil_imagesc(cv, saveto=saveto, show=False)
    if show:
        image.show()
    return cv
Exemplo n.º 7
0
    def getData(self, patchSize, number, seed=None):
        samples, labelMatrix, labelStrings = cached(
            makeUpsonRovio3.randomSampleMatrixWithLabels,
            makeUpsonRovio3.trainFilter,
            color=(self.colors == 3),
            Nw=patchSize,
            Nsamples=number,
            seed=seed,
            imgDirectory='../data/upson_rovio_3/imgfiles')

        return samples.T  # one example per column
def getBigAndSmallerSamples(fileFilter, whitener, seed, isColor, Nw, Nwshift, Nsamples):
    # Sample 1.5x larger windows (e.g. 15 px on a side). This is for
    # patches that overlap half with their
    # neighbors. largeSamples.shape is (Nsamples, Nw15^2)
    NwLarge = Nw + Nwshift
    largeSampleMatrix, labelMatrix, labelStrings = cached(randomSampleMatrixWithLabels, fileFilter, seed, isColor, Nw = NwLarge, Nsamples = Nsamples)

    stackedSmall = large2StackedSmall(largeSampleMatrix, Nw, Nwshift)

    stackedSmallWhite,junk = whitener.raw2normalized(stackedSmall.T)
    # dataWhite.shape = (Nw^2, 4*Nsamples). Now with one example per COLUMN!

    return largeSampleMatrix.T, stackedSmall.T, stackedSmallWhite, labelMatrix.T, labelStrings
 def getDataAndLabels(self, patchSize, number, seed = None):
     approxMbCachefile = prod(patchSize) * number * 4.0 / 1000000
     if approxMbCachefile < MAX_CACHE_SIZE_MB:
         print 'NYU2_Labeled layer:: Predicted approx size of %f MB, using cache' % (approxMbCachefile)
         patches, labels = cached(loadNYU2Data, patchSize = patchSize, number = number,
                                  rgbColors = self.colorChannels, depthChannels = self.depthChannels,
                                  seed = seed)
     else:
         # Skip cache, just run
         print 'NYU2_Labeled layer:: Skipping cache, approx size of %f MB > max cache size of %s MB' % (approxMbCachefile, repr(MAX_CACHE_SIZE_MB))
         patches, labels = loadNYU2Data(patchSize = patchSize, number = number,
                                        rgbColors = self.colorChannels, depthChannels = self.depthChannels,
                                        seed = seed)
     return patches, labels
Exemplo n.º 10
0
def testPca():
    random.seed(0)

    NN = 5000
    dim = 200

    data = random.randn(NN, dim)

    pca = cached(PCA, data)
    #pca = PCA(data)

    pc = pca.pc()

    asPc = pca.toPC(data)

    dataWhite = pca.toZca(data, epsilon=1e-6)
def testPca():
    random.seed(0)
    
    NN = 5000
    dim = 200
    
    data = random.randn(NN,dim)

    pca = cached(PCA, data)
    #pca = PCA(data)

    pc = pca.pc()

    asPc = pca.toPC(data)

    dataWhite = pca.toZca(data, epsilon = 1e-6)
Exemplo n.º 12
0
 def getDataAndLabels(self, patchSize, number, seed=None):
     approxMbCachefile = prod(patchSize) * number * 4.0 / 1000000
     if approxMbCachefile < MAX_CACHE_SIZE_MB:
         print 'NYU2_Labeled layer:: Predicted approx size of %f MB, using cache' % (
             approxMbCachefile)
         patches, labels = cached(loadNYU2Data,
                                  patchSize=patchSize,
                                  number=number,
                                  rgbColors=self.colorChannels,
                                  depthChannels=self.depthChannels,
                                  seed=seed)
     else:
         # Skip cache, just run
         print 'NYU2_Labeled layer:: Skipping cache, approx size of %f MB > max cache size of %s MB' % (
             approxMbCachefile, repr(MAX_CACHE_SIZE_MB))
         patches, labels = loadNYU2Data(patchSize=patchSize,
                                        number=number,
                                        rgbColors=self.colorChannels,
                                        depthChannels=self.depthChannels,
                                        seed=seed)
     return patches, labels
def main():
    resman.start('junk', diary = False)

    stica = loadFromPklGz('results/130407_132841_76b6586_rapidhacks_upson3_1c_l2_first/stackedTica_mod.pkl.gz')
    layer1Whitener = loadFromPklGz('../data/upson_rovio_3/white/train_10_50000_1c.whitener.pkl.gz')
    
    layerSizePlan = [10, 15, 23, 35, 53, 80, 120, 180]

    visLayer = 1

    largeSampleMatrix, labelMatrix, labelStrings = cached(randomSampleMatrixWithLabels, trainFilter,
                                                          seed = 0, color = False,
                                                          Nw = layerSizePlan[visLayer], Nsamples = 50000)

    seed = 0
    Nw = layerSizePlan[visLayer-1]             # e.g. 10
    Nwbig = layerSizePlan[visLayer]            # e.g. 15
    Nwshift = Nwbig - Nw                       # e.g. 15 - 10 = 5
    Nsamples = 1000
    temp = getBigAndSmallerSamples(trainFilter, layer1Whitener, seed, False, Nw, Nwshift, Nsamples)
    largeSampleMatrix, stackedSmall, stackedSmallWhite, labelMatrix, labelStrings = temp

    pooled = stica.getRepresentation(largeSampleMatrix)
    
    plotTopActivations(pooled, largeSampleMatrix, (Nwbig,Nwbig), resman.rundir, nActivations = 50, nSamples = 20)

    pl = (pooled.T - pooled.mean(1)).T
    for ii in range(len(labelStrings)):
        print 'finding top for', labelStrings[ii]
        if labelMatrix[ii,:].sum() == 0:
            print '  skipping, no examples'
            continue
        avgActivationForClass = (pl * labelMatrix[ii,:]).mean(1)
        sortIdx = argsort(avgActivationForClass)
        topNeurons = sortIdx[-1:-(50+1):-1]
        plotTopActivations(pooled[topNeurons,:], largeSampleMatrix, (Nwbig,Nwbig), resman.rundir,
                           nActivations = 50, nSamples = 20, prefix = 'topfor_%s' % labelStrings[ii])
        
    resman.stop()
    def learnNextLayer(self, params):
        nLayers = len(self.ticas)
        print 'StackedTica currently has %d layers, learning next' % nLayers

        # TODO: only works for one extra layer!
        Nw = 10
        Nwshift = 5
        Nsamples = 50000
        #Nsamples = 1000; print 'HACK!'

        # Get data and norm it
        nextLayerData = cached(makeNewData, self.ticas[-1], self.l1whitener, seed = 0,
                               isColor = self.isColor, Nw = Nw, Nwshift = Nwshift,
                               Nsamples = Nsamples)
        colNorms = sqrt(sum(nextLayerData**2, 0) + (1e-8))
        nextLayerData = nextLayerData / colNorms

        # Parameters
        if params['dataCrop']:
            print '\nWARNING: Cropping data from %d examples to only %d for debug\n' % (nextLayerData.shape[1], params['dataCrop'])
            nextLayerData = nextLayerData[:,:params['dataCrop']]
        if params['hiddenISize'] != params['hiddenJSize']:
            raise Exception('hiddenISize and hiddenJSize must be the same')
        hiddenLayerShape = (params['hiddenISize'], params['hiddenJSize'])
        neighborhoodParams = ('gaussian', params['neighborhoodSize'], 0, 0)
        if self.saveDir:
            layerLogDir = os.path.join(self.saveDir, 'layer_%02d' % (nLayers+1))
            os.makedirs(layerLogDir)
        else:
            layerLogDir = ''

        # Print/plot data stats
        if layerLogDir:
            pseudoImgShape = (int(sqrt(nextLayerData.shape[0])), int(sqrt(nextLayerData.shape[0])))
            plotImageData(nextLayerData, pseudoImgShape, layerLogDir, pc('data_raw'))
        printDataStats(nextLayerData)

        # Learn model
        tica = TICA(nInputs            = nextLayerData.shape[0],
                    hiddenLayerShape   = hiddenLayerShape,
                    neighborhoodParams = neighborhoodParams,
                    lambd              = params['lambd'],
                    epsilon            = 1e-5,
                    saveDir            = layerLogDir)

        manuallySkipExpensivePart = False
        if not manuallySkipExpensivePart:
            beginTotalCost, beginPoolingCost, beginReconstructionCost, grad = tica.cost(tica.WW, nextLayerData)

            tic = time.time()
            tica.learn(nextLayerData, maxFun = params['maxFuncCalls'])
            execTime = time.time() - tic
            if layerLogDir:
                saveToFile(os.path.join(layerLogDir, 'tica.pkl.gz'), tica)    # save learned model

            endTotalCost, endPoolingCost, endReconstructionCost, grad = tica.cost(tica.WW, nextLayerData)

            print 'beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime ='
            print [beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime]
        else:
            pdb.set_trace()

        # Plot some results
        #plotImageRicaWW(tica.WW, imgShape, saveDir, tileShape = hiddenLayerShape, prefix = pc('WW_iterFinal'))
        if layerLogDir:
            self.plotResults(layerLogDir, tica, nextLayerData, pseudoImgShape, hiddenLayerShape)

        self.ticas.append(tica)
    def getData(self, patchSize, number, seed = None):
        patches = cached(loadCS294Images, patchSize = patchSize, number = number, seed = seed)

        return patches