def makeData(): for Nw in (2, 4, 10, 16, 28, 50): #for Nsamples in (50, 500, 5000): for Nsamples in (50, 500, 5000, 50000): random.seed(0) for string in [ 'train_sc_p5', 'test_sc_p5', 'train_sc_p1', 'test_sc_p1', 'train_4sc_p5', 'test_4sc_p5', 'train_4sc_p1', 'test_4sc_p1' ]: probability = .5 if 'p5' in string else .1 function = random4SquaresCircles if '4sc' in string else randomSquareCircle sampleXAndY = function(Nw, Nsamples, prob=probability) saveToFile( '../data/simpleShapes/%s_%02d_%d.pkl.gz' % (string, Nw, Nsamples), sampleXAndY) xx, yy = sampleXAndY if Nsamples == 5000: image = Image.fromarray( tile_raster_images(X=xx, img_shape=(Nw, Nw), tile_shape=(10, 15), tile_spacing=(1, 1), scale_rows_to_unit_interval=False)) image.save('../data/simpleShapes/%s_%02d.png' % (string, Nw))
def main(): random.seed(0) trainFilter = ['image-2534', 'image-2535'] testFilter = ['image-2545'] saveToFile('../data/upson_rovio_1/train_02_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 02, Nsamples = 50))
def main(): parser = argparse.ArgumentParser(description='Trains a StackedLayers model.') parser.add_argument('layerFilename', type = str, help = 'File defining layers, something like tica-10-15.layers') parser.add_argument('trainParamsFilename', type = str, help = 'File defining training parameters, something like tica-10-15.trainparams') parser.add_argument('--name', type = str, default = 'junk', help = 'Name for GitResultsManager results directory (default: junk)') parser.add_argument('--load', type = str, default = '', help = ('Load a previously created StackedLayers object. This can ' + 'be used to resume training a previously checkpointed, ' + 'partially trained StackedLayers object (default: none)')) parser.add_argument('--maxlayer', type = int, default = -1, help = ('Maximum layer to train, -1 to train all. Can be used to train' + 'only a subset of layers in an otherwise deeper model. Default: -1.')) parser.add_argument('--quick', action='store_true', help = 'Enable quick mode (default: off)') parser.add_argument('--nodiary', action='store_true', help = 'Disable diary (default: diary is on)') args = parser.parse_args() resman.start(args.name, diary = not args.nodiary) saveDir = resman.rundir layerDefinitions = importFromFile(args.layerFilename, 'layers') trainParams = importFromFile(args.trainParamsFilename, 'trainParams') shutil.copyfile(args.layerFilename, os.path.join(saveDir, 'params.layers')) shutil.copyfile(args.trainParamsFilename, os.path.join(saveDir, 'params.trainparams')) # Experiment: train a few Stacked Layers with different ticas assert not args.load, 'Loading does not make sense here.' sls = [] for ii, hiddenWidth in enumerate((8,12,16,20,24,28,32,36,40)): treatmentDir = os.path.join(saveDir, 'treatment_%02d_%d' % (ii, hiddenWidth)) os.mkdir(treatmentDir) print '\n' * 4 + '*' * 40 print 'Treatment %d, width %d (results in %s)' % (ii, hiddenWidth, treatmentDir) print '*' * 40 assert layerDefinitions[2]['type'] == 'tica' layerDefinitions[2]['hiddenSize'] = (hiddenWidth, hiddenWidth) print 'Creating new StackedLayers object' sl = StackedLayers(layerDefinitions) sl.printStatus() sl.train(trainParams, saveDir = treatmentDir, quick = args.quick, maxlayer = args.maxlayer) sls.append(sl) fileFinal = os.path.join(saveDir, 'multiStackedLayers.pkl.gz') saveToFile(fileFinal, sls) resman.stop()
def makeData(): size = (10,10,20) for Nsamples in (50, 500, 5000, 50000): for seed,string in ((0, 'train'), (1, 'test')): random.seed(seed) xx = randomCubeSphere(size, Nsamples = Nsamples) saveToFile('../data/simple3DShapes/poisson_%s_%d.pkl.gz' % (string, Nsamples), xx)
def main(): for Nsamples in (50, 500, 5000, 50000): for Nw in (2, 3, 4, 6, 10, 15, 20, 25, 28): for color in (True, False): for seed,string in ((0, 'train'), (123456, 'test')): random.seed(seed) nColors = (3 if color else 1) size = (Nw * Nw * nColors, Nsamples) xx = random.uniform(0, 1, size) saveToFile('../data/random/randomu01_%s_%02d_%d_%dc.pkl.gz' % (string, Nw, Nsamples, nColors), xx)
def makeData(): size = (10, 10, 20) for Nsamples in (50, 500, 5000, 50000): for seed, string in ((0, 'train'), (1, 'test')): random.seed(seed) xx = randomCubeSphere(size, Nsamples=Nsamples) saveToFile( '../data/simple3DShapes/poisson_%s_%d.pkl.gz' % (string, Nsamples), xx)
def main(): for Nsamples in (50, 500, 5000, 50000): for Nw in (2, 3, 4, 6, 10, 15, 20, 25, 28): for color in (True, False): for seed, string in ((0, 'train'), (123456, 'test')): random.seed(seed) nColors = (3 if color else 1) size = (Nw * Nw * nColors, Nsamples) xx = random.uniform(0, 1, size) saveToFile( '../data/random/randomu01_%s_%02d_%d_%dc.pkl.gz' % (string, Nw, Nsamples, nColors), xx)
def main(): resman.start('junk', diary = False) #l1tica = loadFromPklGz('results/130402_033310_44cc757_master_psearchTica_UP/00022_data/tica.pkl.gz') l1tica = loadFromPklGz('results/130406_184751_3d90386_rapidhacks_upson3_1c_l1/tica.pkl.gz') # 1c Upson3 #layer1Whitener = loadFromPklGz('../data/upson_rovio_2/white/train_10_50000_1c.whitener.pkl.gz') layer1Whitener = loadFromPklGz('../data/upson_rovio_3/white/train_10_50000_1c.whitener.pkl.gz') layerSizePlan = [10, 15, 23, 35, 53, 80, 120, 180] stackedTica = StackedTICA(l1tica, layer1Whitener, '../data/upson_rovio_3/imgfiles/', layerSizePlan = layerSizePlan, isColor = False, saveDir = resman.rundir) if False: print 'JUST DEBUG...' pdb.set_trace() tica = stackedTica.ticas[0] pdb.set_trace() return #data,labels,strings = loadUpsonData3('../data/upson_rovio_3/train_10_50000_1c.pkl.gz') #data = loadFromPklGz('../data/upson_rovio_3/white/train_10_50000_1c.white.pkl.gz') #stackedTica.plotResults(resman.rundir, stackedTica.ticas[0], data, (10,10), (15,15)) #pdb.set_trace() params = {} params['hiddenISize'] = 15 params['hiddenJSize'] = params['hiddenISize'] params['neighborhoodSize'] = 1.0 params['lambd'] = .026 params['randSeed'] = 0 #params['dataWidth'] = 10 #params['nColors'] = 1 #params['isColor'] = (params['nColors'] == 3) #params['imgShape'] = ((params['dataWidth'], params['dataWidth'], 3) # if params['isColor'] else # (params['dataWidth'], params['dataWidth'])) params['maxFuncCalls'] = 3 #params['whiten'] = True # Just false for Space Invaders dataset... params['dataCrop'] = None # Set to None to not crop data... #params['dataCrop'] = 10000 # Set to None to not crop data... stackedTica.learnNextLayer(params) #print 'HACK FOR DEBUG' saveToFile(os.path.join(resman.rundir, 'stackedTica.pkl.gz'), stackedTica) # save learned model resman.stop()
def makeData(): for Nw in (2, 4, 10, 16, 28, 50): #for Nsamples in (50, 500, 5000): for Nsamples in (50, 500, 5000, 50000): random.seed(0) for string in ['train_sc_p5', 'test_sc_p5', 'train_sc_p1', 'test_sc_p1', 'train_4sc_p5', 'test_4sc_p5', 'train_4sc_p1', 'test_4sc_p1']: probability = .5 if 'p5' in string else .1 function = random4SquaresCircles if '4sc' in string else randomSquareCircle sampleXAndY = function(Nw, Nsamples, prob = probability) saveToFile('../data/simpleShapes/%s_%02d_%d.pkl.gz' % (string, Nw, Nsamples), sampleXAndY) xx, yy = sampleXAndY if Nsamples == 5000: image = Image.fromarray(tile_raster_images( X = xx, img_shape = (Nw,Nw), tile_shape = (10, 15), tile_spacing=(1,1), scale_rows_to_unit_interval = False)) image.save('../data/simpleShapes/%s_%02d.png' % (string, Nw))
def main(): '''''' try: dataLoaderName, dataPath, savePathWhite, savePathWhiteNormed, savePathWhiter = sys.argv[ 1:6] except: print 'usage: dataLoaderName, dataPath, savePathWhite, savePathWhiteNormed, savePathWhiter' sys.exit(1) dataLoader = globals()[dataLoaderName] # convert string to function loaded = dataLoader(dataPath) if type(loaded) is tuple: data, labels, labelStrings = loaded print 'Data has labels:', labelStrings else: data = loaded labels, labelStrings = None, None print 'Data does not have labels.' #data = data[:,:1000]; print 'HACKK!' print 'Raw data stats:' printDataStats(data) # Whiten with PCA whiteningStage = PCAWhiteningDataNormalizer(data) saveToFile(savePathWhiter, whiteningStage) # Non-normed dataWhite, junk = whiteningStage.raw2normalized(data, unitNorm=False) print 'White data stats:' printDataStats(dataWhite) saveToFile(savePathWhite, dataWhite) del dataWhite # Normed dataWhiteNormed, junk = whiteningStage.raw2normalized(data, unitNorm=True) print 'White normed data stats:' printDataStats(dataWhiteNormed) saveToFile(savePathWhiteNormed, dataWhiteNormed) del dataWhiteNormed print 'done.'
def runTest(saveDir, params): import time, os, sys from numpy import * from GitResultsManager import GitResultsManager #raise Exception('path is %s' % sys.path) #raise Exception('version is %s' % sys.version_info) #raise Exception('cwd is %s' % os.getcwd()) from tica import TICA from util.misc import MakePc, Counter from visualize import plotImageData, plotCov, plotImageRicaWW, plotRicaActivations, plotRicaReconstructions from util.dataPrep import PCAWhiteningDataNormalizer, printDataStats from util.dataLoaders import loadAtariData, loadUpsonData, loadUpsonData3, loadRandomData, saveToFile #counter = Counter() #pc = lambda st : makePc(st, counter = counter) pc = MakePc(Counter()) ######################### # Parameters ######################### hiddenISize = params['hiddenISize'] hiddenJSize = params['hiddenJSize'] neighborhoodParams = ('gaussian', params['neighborhoodSize'], 0, 0) lambd = params['lambd'] epsilon = 1e-5 maxFuncCalls = params['maxFuncCalls'] randSeed = params['randSeed'] whiten = params['whiten'] dataCrop = params['dataCrop'] dataLoader = locals().get( params['dataLoader']) # Convert string to actual function dataPath = params['dataPath'] imgShape = params['imgShape'] hiddenLayerShape = (hiddenISize, hiddenJSize) ######################### # Data ######################### # Load data #data = loadAtariData('../data/atari/mspacman_train_15_50000_3c.pkl.gz'); imgShape = (15,15,3) #data = loadAtariData('../data/atari/space_invaders_train_15_50000_3c.pkl.gz'); imgShape = (15,15,3) loaded = dataLoader(dataPath) if type(loaded) is tuple: data, labels, labelStrings = loaded print 'Data has labels:', labelStrings else: data = loaded labels, labelStrings = None, None print 'Data does not have labels.' if dataCrop: print '\nWARNING: Cropping data from %d examples to only %d for debug\n' % ( data.shape[1], dataCrop) data = data[:, :dataCrop] nInputs = data.shape[0] isColor = len(imgShape) > 2 print '\nParameters:' for key in [ 'nInputs', 'hiddenISize', 'hiddenJSize', 'neighborhoodParams', 'lambd', 'epsilon', 'maxFuncCalls', 'randSeed', 'dataCrop', 'dataLoader', 'dataPath', 'imgShape', 'whiten' ]: print ' %20s: %s' % (key, locals()[key]) print skipVis = True if whiten: if not skipVis: # Visualize before prep plotImageData(data, imgShape, saveDir, pc('data_raw')) plotCov(data, saveDir, pc('data_raw')) printDataStats(data) # Whiten with PCA whiteningStage = PCAWhiteningDataNormalizer(data, saveDir=saveDir) dataWhite, junk = whiteningStage.raw2normalized(data, unitNorm=True) #dataOrig = whiteningStage.normalized2raw(dataWhite) dataOrig = data data = dataWhite if not skipVis: # Visualize after prep plotImageData(data, imgShape, saveDir, pc('data_white')) plotCov(data, saveDir, pc('data_white')) printDataStats(data) ######################### # Model ######################### random.seed(randSeed) tica = TICA(nInputs=prod(imgShape), hiddenLayerShape=hiddenLayerShape, neighborhoodParams=neighborhoodParams, lambd=lambd, epsilon=epsilon, saveDir=saveDir) beginTotalCost, beginPoolingCost, beginReconstructionCost, grad = tica.cost( tica.WW, data) tic = time.time() tica.learn(data, maxFun=maxFuncCalls) execTime = time.time() - tic saveToFile(os.path.join(saveDir, 'tica.pkl.gz'), tica) # save learned model plotImageRicaWW(tica.WW, imgShape, saveDir, tileShape=hiddenLayerShape, prefix=pc('WW_iterFinal')) plotRicaActivations(tica.WW, data, saveDir, prefix=pc('activations_iterFinal')) unwhitener = whiteningStage.normalized2raw if whiten else None plotRicaReconstructions(tica, data, imgShape, saveDir, unwhitener=unwhitener, tileShape=hiddenLayerShape, prefix=pc('recon_iterFinal'), number=20) endTotalCost, endPoolingCost, endReconstructionCost, grad = tica.cost( tica.WW, data) print 'beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime =' print[ beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime ] results = { 'beginTotalCost': beginTotalCost, 'beginPoolingCost': beginPoolingCost, 'beginReconstructionCost': beginReconstructionCost, 'endTotalCost': endTotalCost, 'endPoolingCost': endPoolingCost, 'endReconstructionCost': endReconstructionCost, 'execTime': execTime } # Save locally just in case of exception in main program myResults = {'params': params, 'results': results} saveToFile(os.path.join(saveDir, 'myresults.pkl.gz'), myResults) return results
def learnNextLayer(self, params): nLayers = len(self.ticas) print 'StackedTica currently has %d layers, learning next' % nLayers # TODO: only works for one extra layer! Nw = 10 Nwshift = 5 Nsamples = 50000 #Nsamples = 1000; print 'HACK!' # Get data and norm it nextLayerData = cached(makeNewData, self.ticas[-1], self.l1whitener, seed = 0, isColor = self.isColor, Nw = Nw, Nwshift = Nwshift, Nsamples = Nsamples) colNorms = sqrt(sum(nextLayerData**2, 0) + (1e-8)) nextLayerData = nextLayerData / colNorms # Parameters if params['dataCrop']: print '\nWARNING: Cropping data from %d examples to only %d for debug\n' % (nextLayerData.shape[1], params['dataCrop']) nextLayerData = nextLayerData[:,:params['dataCrop']] if params['hiddenISize'] != params['hiddenJSize']: raise Exception('hiddenISize and hiddenJSize must be the same') hiddenLayerShape = (params['hiddenISize'], params['hiddenJSize']) neighborhoodParams = ('gaussian', params['neighborhoodSize'], 0, 0) if self.saveDir: layerLogDir = os.path.join(self.saveDir, 'layer_%02d' % (nLayers+1)) os.makedirs(layerLogDir) else: layerLogDir = '' # Print/plot data stats if layerLogDir: pseudoImgShape = (int(sqrt(nextLayerData.shape[0])), int(sqrt(nextLayerData.shape[0]))) plotImageData(nextLayerData, pseudoImgShape, layerLogDir, pc('data_raw')) printDataStats(nextLayerData) # Learn model tica = TICA(nInputs = nextLayerData.shape[0], hiddenLayerShape = hiddenLayerShape, neighborhoodParams = neighborhoodParams, lambd = params['lambd'], epsilon = 1e-5, saveDir = layerLogDir) manuallySkipExpensivePart = False if not manuallySkipExpensivePart: beginTotalCost, beginPoolingCost, beginReconstructionCost, grad = tica.cost(tica.WW, nextLayerData) tic = time.time() tica.learn(nextLayerData, maxFun = params['maxFuncCalls']) execTime = time.time() - tic if layerLogDir: saveToFile(os.path.join(layerLogDir, 'tica.pkl.gz'), tica) # save learned model endTotalCost, endPoolingCost, endReconstructionCost, grad = tica.cost(tica.WW, nextLayerData) print 'beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime =' print [beginTotalCost, beginPoolingCost, beginReconstructionCost, endTotalCost, endPoolingCost, endReconstructionCost, execTime] else: pdb.set_trace() # Plot some results #plotImageRicaWW(tica.WW, imgShape, saveDir, tileShape = hiddenLayerShape, prefix = pc('WW_iterFinal')) if layerLogDir: self.plotResults(layerLogDir, tica, nextLayerData, pseudoImgShape, hiddenLayerShape) self.ticas.append(tica)
def train(self, trainParams, saveDir=None, quick=False, maxlayer=-1, onlyInit=False): '''Train all layers. if onlyInit, then do initialization but skip training.''' # check to make sure each trainParam matches a known layer... for layerName in trainParams.keys(): if layerName not in self.layerNames: raise Exception('unknown layer name in param file: %s' % layerName) # ...and each trainable but untrained layer has a trainParam present for layerIdx, layer in enumerate(self.layers): if layer.trainable and not layer.isTrained: if not layer.name in trainParams: raise Exception( 'Param file missing training params for layer: %s' % layer.name) dataLayer = self.layers[0] if maxlayer == -1: maxlayer = len(self.layers) - 1 trainedSomething = False for layerIdx, layer in enumerate(self.layers[:maxlayer + 1]): if layer.trainable and not layer.isTrained: trainedSomething = True print '\n' + '*' * 40 if onlyInit: print 'just initializing layer %d - %s (%s)' % ( layerIdx, layer.name, layer.layerType) else: print 'training layer %d - %s (%s)' % ( layerIdx, layer.name, layer.layerType) print '*' * 40 + '\n' layerTrainParams = trainParams[layer.name] layer.initialize(layerTrainParams, seed=0) if onlyInit: continue # Skip training numExamples = layerTrainParams['examples'] if quick and numExamples > 1000: numExamples = 1000 print 'QUICK MODE: chopping training examples to 1000!' # Make sure layer.sees, data.stride, and data.patchSize are all same len assert len(layer.seesPatches) == len(dataLayer.patchSize) assert len(layer.seesPatches) == len(dataLayer.stride) # Get data print 'gc.collect found', gc.collect(), 'objects' trainRawDataLargePatches, trainRawDataPatches = self.getDataForLayer( layerIdx, numExamples) print 'Memory used to store trainRawDataLargePatches: %g MB' % ( trainRawDataLargePatches.nbytes / 1e6) print 'Memory used to store trainRawDataPatches: %g MB' % ( trainRawDataPatches.nbytes / 1e6) del trainRawDataLargePatches print 'gc.collect found', gc.collect(), 'objects' # Push data through N-1 layers dataArrangementLayer0 = DataArrangement( sliceShape=layer.seesPatches, nSlices=numExamples) tic = Tic('forward prop') trainPrevLayerData, dataArrangementPrevLayer = self.forwardProp( trainRawDataPatches, dataArrangementLayer0, layerIdx=layerIdx - 1) tic() print 'Memory used to store trainPrevLayerData: %g MB' % ( trainPrevLayerData.nbytes / 1e6) # Free the raw patches from memory del trainRawDataPatches print 'gc.collect found', gc.collect(), 'objects' # Train layer tic = Tic('train') layer.train(trainPrevLayerData, dataArrangementPrevLayer, layerTrainParams, quick=quick) tic() print 'training done for layer %d - %s (%s)' % ( layerIdx, layer.name, layer.layerType) # Checkpoint and plot if saveDir: fileFinal = os.path.join(saveDir, 'stackedLayers.pkl.gz') fileTmp = fileFinal + '.tmp' print 'Saving checkpoint...' saveToFile(fileTmp, self) os.rename(fileTmp, fileFinal) print 'Saving these StackedLayers...' self.printStatus() print '... to %s' % fileFinal print '\n ' + '*' * 20 print ' * vis layer %d - %s (%s)' % ( layerIdx, layer.name, layer.layerType) print ' ' + '*' * 20 + '\n' tic = Tic('vis') #prefix = 'layer_%02d_%s_' % (layerIdx, layer.name) #layer.plot(trainPrevLayerData, dataArrangementPrevLayer, saveDir, prefix) self.visLayer(layerIdx, saveDir=saveDir, quick=quick) tic() print if not trainedSomething: print '\nNothing to train. Maybe it was already finished?'
def main(): parser = argparse.ArgumentParser( description='Trains a StackedLayers model.') parser.add_argument( 'layerFilename', type=str, help='File defining layers, something like tica-10-15.layers') parser.add_argument( 'trainParamsFilename', type=str, help= 'File defining training parameters, something like tica-10-15.trainparams' ) parser.add_argument( '--name', type=str, default='junk', help='Name for GitResultsManager results directory (default: junk)') parser.add_argument( '--load', type=str, default='', help=('Load a previously created StackedLayers object. This can ' + 'be used to resume training a previously checkpointed, ' + 'partially trained StackedLayers object (default: none)')) parser.add_argument( '--maxlayer', type=int, default=-1, help=( 'Maximum layer to train, -1 to train all. Can be used to train' + 'only a subset of layers in an otherwise deeper model. Default: -1.' )) parser.add_argument('--quick', action='store_true', help='Enable quick mode (default: off)') parser.add_argument('--nodiary', action='store_true', help='Disable diary (default: diary is on)') args = parser.parse_args() resman.start(args.name, diary=not args.nodiary) saveDir = resman.rundir layerDefinitions = importFromFile(args.layerFilename, 'layers') trainParams = importFromFile(args.trainParamsFilename, 'trainParams') shutil.copyfile(args.layerFilename, os.path.join(saveDir, 'params.layers')) shutil.copyfile(args.trainParamsFilename, os.path.join(saveDir, 'params.trainparams')) # Experiment: train a few Stacked Layers with different ticas assert not args.load, 'Loading does not make sense here.' sls = [] for ii, hiddenWidth in enumerate((8, 12, 16, 20, 24, 28, 32, 36, 40)): treatmentDir = os.path.join(saveDir, 'treatment_%02d_%d' % (ii, hiddenWidth)) os.mkdir(treatmentDir) print '\n' * 4 + '*' * 40 print 'Treatment %d, width %d (results in %s)' % (ii, hiddenWidth, treatmentDir) print '*' * 40 assert layerDefinitions[2]['type'] == 'tica' layerDefinitions[2]['hiddenSize'] = (hiddenWidth, hiddenWidth) print 'Creating new StackedLayers object' sl = StackedLayers(layerDefinitions) sl.printStatus() sl.train(trainParams, saveDir=treatmentDir, quick=args.quick, maxlayer=args.maxlayer) sls.append(sl) fileFinal = os.path.join(saveDir, 'multiStackedLayers.pkl.gz') saveToFile(fileFinal, sls) resman.stop()
saveToFile('../data/upson_rovio_1/train_04_50.pkl.gz', randomSampleMatrix(trainFilter, Nw=04, Nsamples=50)) saveToFile('../data/upson_rovio_1/test_04_50.pkl.gz', randomSampleMatrix(testFilter, Nw=04, Nsamples=50)) saveToFile('../data/upson_rovio_1/train_04_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw=04, Nsamples=50000)) saveToFile('../data/upson_rovio_1/test_04_50000.pkl.gz', randomSampleMatrix(testFilter, Nw=04, Nsamples=50000)) #saveToFile('../data/upson_rovio_1/train_10_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 10, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/test_10_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 10, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/train_10_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw = 10, Nsamples = 50000)) #saveToFile('../data/upson_rovio_1/test_10_50000.pkl.gz', randomSampleMatrix(testFilter, Nw = 10, Nsamples = 50000)) saveToFile('../data/upson_rovio_1/train_15_50.pkl.gz', randomSampleMatrix(trainFilter, Nw=15, Nsamples=50)) saveToFile('../data/upson_rovio_1/test_15_50.pkl.gz', randomSampleMatrix(testFilter, Nw=15, Nsamples=50)) saveToFile('../data/upson_rovio_1/train_15_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw=15, Nsamples=50000)) saveToFile('../data/upson_rovio_1/test_15_50000.pkl.gz', randomSampleMatrix(testFilter, Nw=15, Nsamples=50000)) #saveToFile('../data/upson_rovio_1/train_28_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 28, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/test_28_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 28, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/train_28_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw = 28, Nsamples = 50000)) #saveToFile('../data/upson_rovio_1/test_28_50000.pkl.gz', randomSampleMatrix(testFilter, Nw = 28, Nsamples = 50000)) if __name__ == '__main__': main()
def testIca(datasets, savedir = None, smallImgHack = False, quickHack = False): '''Test ICA on a given dataset.''' random.seed(1) # 0. Get data train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] if quickHack: print '!!! Using quickHack !!!' train_set_x = train_set_x[:2500,:] if train_set_y is not None: train_set_y = train_set_y[:2500] if smallImgHack: print '!!! Using smallImgHack !!! (images will be misaligned)' train_set_x = train_set_x[:,:100] print ('(%d, %d, %d) %d dimensional examples in (train, valid, test)' % (train_set_x.shape[0], valid_set_x.shape[0], test_set_x.shape[0], train_set_x.shape[1])) nDim = train_set_x.shape[1] imgDim = int(round(sqrt(nDim))) # Might not always be true... randIdxRaw = random.randint(0, nDim, 100) randIdxWhite = random.randint(0, nDim, 100) randIdxSource = random.randint(0, nDim, 100) image = Image.fromarray(tile_raster_images( X = train_set_x, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'data_raw.png')) image.show() pyplot.figure() for ii in range(20): idx = randIdxRaw[ii] pyplot.subplot(4,5,ii+1) pyplot.title('raw dim %d' % idx) pyplot.hist(train_set_x[:,idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'data_raw_hist.png')) # 1. Whiten data print 'Whitening data with pca...' pca = PCA(train_set_x) xWhite = pca.toZca(train_set_x) print ' done.' pyplot.figure() for ii in range(20): idx = randIdxWhite[ii] pyplot.subplot(4,5,ii+1) pyplot.title('data white dim %d' % idx) pyplot.hist(xWhite[:,idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_hist.png')) image = Image.fromarray(tile_raster_images( X = xWhite, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'data_white.png')) image.show() # 1.1 plot hist pyplot.figure() pyplot.hold(True) pyplot.title('data white 20 random dims') histMax = 0 histMin = 1e10 for ii in range(20): idx = randIdxWhite[ii] hist, binEdges = histogram(xWhite[:,idx], bins = 20, density = True) histMax = max(histMax, max(hist)) histMin = min(histMin, min(hist[hist != 0])) # min non-zero entry binMiddles = binEdges[:-1] + (binEdges[1] - binEdges[0])/2 #print ' %d from %f to %f' % (ii, min(binMiddles), max(binMiddles)) pyplot.semilogy(binMiddles, hist, '.-') pyplot.axis('tight') ax = looser(pyplot.axis(), semilogy = True) xAbsMax = max(fabs(ax[0:2])) xx = linspace(-xAbsMax, xAbsMax, 100) pyplot.semilogy(xx, mlab.normpdf(xx, 0, 1), 'k', linewidth = 3) pyplot.axis((-xAbsMax, xAbsMax, ax[2], ax[3])) if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_log_hist.png')) # 1.2 plot points pyplot.figure() pyplot.hold(True) pyplot.title('data white 20 random dims') nSamples = min(xWhite.shape[0], 1000) print 'data_white_log_points plotted with', nSamples, 'samples.' for ii in range(10): idx = randIdxWhite[ii] pyplot.plot(xWhite[:nSamples,idx], ii + random.uniform(-.25, .25, nSamples), 'o') pyplot.axis('tight') if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_log_points.png')) # 2. Fit ICA rng = random.RandomState(1) ica = FastICA(random_state = rng, whiten = False) print 'Fitting ICA...' ica.fit(xWhite) print ' done.' if savedir: saveToFile(os.path.join(savedir, 'ica.pkl.gz'), ica) print 'Geting sources and mixing matrix...' sourcesWhite = ica.transform(xWhite) # Estimate the sources #S_fica /= S_fica.std(axis=0) # (should already be done) mixingMatrix = ica.get_mixing_matrix() print ' done.' sources = pca.fromZca(sourcesWhite) # 3. Show independent components and inferred sources image = Image.fromarray(tile_raster_images( X = mixingMatrix, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'ic_white.png')) image.show() image = Image.fromarray(tile_raster_images( X = mixingMatrix.T, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'ic_white.T.png')) image.show() image = Image.fromarray(tile_raster_images( X = pca.fromZca(mixingMatrix), img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'ic_raw.png')) image.show() image = Image.fromarray(tile_raster_images( X = pca.fromZca(mixingMatrix.T), img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'ic_raw.T.png')) image.show() pyplot.figure() for ii in range(20): idx = randIdxSource[ii] pyplot.subplot(4,5,ii+1) pyplot.title('sourceWhite %d' % idx) pyplot.hist(sourcesWhite[:,idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_hist.png')) image = Image.fromarray(tile_raster_images( X = sourcesWhite, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'sources_white.png')) image.show() # 3.1 plot hist pyplot.figure() pyplot.hold(True) pyplot.title('sources white 20 random dims') histMax = 0 histMin = 1e10 for ii in range(20): idx = randIdxSource[ii] hist, binEdges = histogram(sourcesWhite[:,idx], bins = 20, density = True) histMax = max(histMax, max(hist)) histMin = min(histMin, min(hist[hist != 0])) # min non-zero entry binMiddles = binEdges[:-1] + (binEdges[1] - binEdges[0])/2 #print ' %d from %f to %f' % (ii, min(binMiddles), max(binMiddles)) pyplot.semilogy(binMiddles, hist, '.-') pyplot.axis('tight') ax = looser(pyplot.axis(), semilogy = True) xAbsMax = max(fabs(ax[0:2])) xx = linspace(-xAbsMax, xAbsMax, 100) pyplot.semilogy(xx, mlab.normpdf(xx, 0, 1), 'k', linewidth = 3) pyplot.axis((-xAbsMax, xAbsMax, ax[2], ax[3])) if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_log_hist.png')) # 3.2 plot points pyplot.figure() pyplot.hold(True) pyplot.title('sources white 20 random dims') nSamples = min(sourcesWhite.shape[0], 1000) print 'sources_white_log_points plotted with', nSamples, 'samples.' for ii in range(10): idx = randIdxWhite[ii] pyplot.plot(sourcesWhite[:nSamples,idx], ii + random.uniform(-.25, .25, nSamples), 'o') pyplot.axis('tight') if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_log_points.png')) image = Image.fromarray(tile_raster_images( X = sources, img_shape = (imgDim,imgDim), tile_shape = (10,15), tile_spacing=(1,1))) if savedir: image.save(os.path.join(savedir, 'sources_raw.png')) image.show() if savedir: print 'plots saved in', savedir else: import ipdb; ipdb.set_trace()
#cropped.show() imageMatrix /= 255 # normalize to 0-1 range random.shuffle(imageMatrix) return imageMatrix def main(): random.seed(0) trainFilter = ['image-2534', 'image-2535'] testFilter = ['image-2545'] saveToFile('../data/upson_rovio_1/train_02_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 02, Nsamples = 50)) saveToFile('../data/upson_rovio_1/test_02_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 02, Nsamples = 50)) saveToFile('../data/upson_rovio_1/train_02_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw = 02, Nsamples = 50000)) saveToFile('../data/upson_rovio_1/test_02_50000.pkl.gz', randomSampleMatrix(testFilter, Nw = 02, Nsamples = 50000)) saveToFile('../data/upson_rovio_1/train_04_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 04, Nsamples = 50)) saveToFile('../data/upson_rovio_1/test_04_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 04, Nsamples = 50)) saveToFile('../data/upson_rovio_1/train_04_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw = 04, Nsamples = 50000)) saveToFile('../data/upson_rovio_1/test_04_50000.pkl.gz', randomSampleMatrix(testFilter, Nw = 04, Nsamples = 50000)) #saveToFile('../data/upson_rovio_1/train_10_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 10, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/test_10_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 10, Nsamples = 50)) #saveToFile('../data/upson_rovio_1/train_10_50000.pkl.gz', randomSampleMatrix(trainFilter, Nw = 10, Nsamples = 50000)) #saveToFile('../data/upson_rovio_1/test_10_50000.pkl.gz', randomSampleMatrix(testFilter, Nw = 10, Nsamples = 50000)) saveToFile('../data/upson_rovio_1/train_15_50.pkl.gz', randomSampleMatrix(trainFilter, Nw = 15, Nsamples = 50)) saveToFile('../data/upson_rovio_1/test_15_50.pkl.gz', randomSampleMatrix(testFilter, Nw = 15, Nsamples = 50))
def efSample(availableTxtFiles, saveLocation, fileFilter = None, seed = 0, Nsamples = 10, Nsplits = 1): '''saveLocation like "../data/endlessforms/train_%s_%d_%d.pkl.gz" (real, size, serial)''' allTxtFiles = list(availableTxtFiles) nTxtFiles = len(allTxtFiles) random.seed(seed) random.shuffle(allTxtFiles) #if fileFilter: # lenFilt = len(fileFilter) # filteredFiles = [] # for txtFile in txtFiles: # path, filename = os.path.split(txtFile) # if filename[:lenFilt] == fileFilter: # filteredFiles.append(txtFile) # txtFiles = filteredFiles # nTxtFiles = len(txtFiles) # print 'Filtered using', fileFilter, 'to', nTxtFiles, 'txt files' Ntotal = Nsamples * Nsplits if Ntotal > nTxtFiles: raise Exception('Did not find enough txt files (%d < requested %d)' % (nTxtFiles, Ntotal)) print 'Choosing', Ntotal, 'random files total (%d per file x %d files)' % (Nsamples, Nsplits) for splitIdx in range(Nsplits): #print 'Split', splitIdx txtFiles = allTxtFiles[(splitIdx*Nsamples):((splitIdx+1)*Nsamples)] labels = [] for txtFile in txtFiles: # Convert 'aaaix0tl1w_00000_EXPORT_4.txt' -> ('aaaix0tl1w', 0, 4) path, filename = os.path.split(txtFile) runId, genSerial, junk, orgId = filename[:-4].split('_') labels.append((runId, int(genSerial), int(orgId))) USE_PP = False if USE_PP: job_server = pp.Server(ncpus = 20) jobs = [] data = zeros((Nsamples, SHAPE_VOXELS), dtype = 'float32') for ii, txtFile in enumerate(txtFiles): if USE_PP: jobs.append((ii, txtFile, job_server.submit(txt2Mat, (txtFile, SHAPE_VOXELS), modules=('numpy',), )) ) #print 'started', ii else: data[ii,:] = txt2Mat(txtFile, SHAPE_VOXELS) #print 'done with', txtFile if USE_PP: for ii, txtFile, job in jobs: #print ii, txtFile, #sys.stdout.flush() data[ii,:] = job() #print 'done' #print ii, txtFile, results, 'done' if ii % 100 == 0: print 'Finished %d/%d jobs' % (ii, len(jobs)) if ii % 10000 == 0: job_server.print_stats() print job_server.print_stats() saveToFile(saveLocation % ('real', Nsamples, splitIdx), (labels, data)) saveToFile(saveLocation % ('bool', Nsamples, splitIdx), (labels, data > THRESHOLD))
def main(): resman.start('junk', diary=False) useIpython = True if useIpython: client = Client(profile='ssh') #client = Client() print 'IPython worker ids:', client.ids balview = client.load_balanced_view() resultsFilename = os.path.join(resman.rundir, 'allResults.pkl.gz') NN = 1000 allResults = [[None, None] for ii in range(NN)] experiments = [] cwd = os.getcwd() disp = os.environ['DISPLAY'] for ii in range(NN): params = {} random.seed(ii) params['hiddenISize'] = random.choice((2, 4, 6, 8, 10, 15, 20)) params['hiddenJSize'] = params['hiddenISize'] params['neighborhoodSize'] = random.choice( (.1, .3, .5, .7, 1.0, 1.5, 2.0, 2.5, 3.5, 5.0)) lambd = exp(random.uniform(log(.0001), log(10))) # Uniform in log space params['lambd'] = round( lambd, 1 - int(floor(log10(lambd)))) # Just keep two significant figures params['randSeed'] = ii params['maxFuncCalls'] = 300 #params['dataWidth'] = random.choice((2, 4)) # just quick #params['dataWidth'] = random.choice((2, 3, 4, 6, 10, 15, 20, 25, 28)) params['dataWidth'] = random.choice( (2, 3, 4, 6, 10, 15, 20)) # 25 and 28 are incomplete params['nColors'] = random.choice((1, 3)) params['isColor'] = (params['nColors'] == 3) params['imgShape'] = ((params['dataWidth'], params['dataWidth'], 3) if params['isColor'] else (params['dataWidth'], params['dataWidth'])) params['whiten'] = False # Just false for Space Invaders dataset... params['dataCrop'] = None # Set to None to not crop data... paramsRand = params.copy() paramsRand['dataLoader'] = 'loadRandomData' paramsRand['dataPath'] = ( '../data/random/randomu01_train_%02d_50000_%dc.pkl.gz' % (paramsRand['dataWidth'], paramsRand['nColors'])) paramsData = params.copy() paramsData['dataLoader'] = 'loadAtariData' paramsData['dataPath'] = ( '../data/atari/space_invaders_train_%02d_50000_%dc.pkl.gz' % (paramsData['dataWidth'], paramsData['nColors'])) #paramsData['dataLoader'] = 'loadUpsonData' #paramsData['dataPath'] = ('../data/upson_rovio_2/train_%02d_50000_%dc.pkl.gz' # % (paramsData['dataWidth'], paramsData['nColors'])) if not useIpython: resultsRand = reliablyRunTest(resman.rundir, '%05d_rand' % ii, paramsRand) allResults[ii][0] = {'params': paramsRand, 'results': resultsRand} tmpFilename = os.path.join(resman.rundir, '.tmp.%f.pkl.gz' % time.time()) saveToFile(tmpFilename, allResults) os.rename(tmpFilename, resultsFilename) resultsData = reliablyRunTest(resman.rundir, '%05d_data' % ii, paramsData) allResults[ii][1] = {'params': paramsData, 'results': resultsData} tmpFilename = os.path.join(resman.rundir, '.tmp.%f.pkl.gz' % time.time()) saveToFile(tmpFilename, allResults) os.rename(tmpFilename, resultsFilename) else: experiments.append(((ii, 0), resman.rundir, '%05d_rand' % ii, paramsRand, cwd, disp)) experiments.append(((ii, 1), resman.rundir, '%05d_data' % ii, paramsData, cwd, disp)) # Start all jobs jobMap = balview.map_async(reliablyRunTest, experiments, ordered=False) #jobMap = balview.map_async(reliablyRunTest, range(10), ordered = False) for ii, returnValues in enumerate(jobMap): testId, params, results = returnValues print ii, 'Job', testId, 'finished.' allResults[testId[0]][testId[1]] = { 'params': params, 'results': results } tmpFilename = os.path.join(resman.rundir, '.tmp.%f.pkl.gz' % time.time()) saveToFile(tmpFilename, allResults) os.rename(tmpFilename, resultsFilename) #pdb.set_trace() print 'Finished all jobs.' resman.stop()
# Visualize after prep plotImageData(dataWhite, imgShape, saveDir, pc('data_white')) plotCov(dataWhite, saveDir, pc('data_white')) printDataStats(dataWhite) ######################### # Model ######################### random.seed(randSeed) rica = RICA(nInputs = prod(imgShape), nOutputs = nFeatures, lambd = lambd, epsilon = epsilon, saveDir = saveDir) plotImageRicaWW(rica.WW, imgShape, saveDir, prefix = pc('WW_iter0')) plotRicaActivations(rica.WW, dataWhite, saveDir, prefix = pc('activations_iter0')) plotRicaReconstructions(rica, dataWhite, imgShape, saveDir, unwhitener = whiteningStage.normalized2raw, prefix = pc('recon_iter0')) rica.learn(dataWhite, maxFun = maxFuncCalls) saveToFile(os.path.join(saveDir, 'rica.pkl.gz'), rica) # save learned model plotImageRicaWW(rica.WW, imgShape, saveDir, prefix = pc('WW_iterFinal')) plotRicaActivations(rica.WW, dataWhite, saveDir, prefix = pc('activations_iterFinal')) plotRicaReconstructions(rica, dataWhite, imgShape, saveDir, unwhitener = whiteningStage.normalized2raw, prefix = pc('recon_iterFinal')) resman.stop()
def testIca(datasets, savedir=None, smallImgHack=False, quickHack=False): '''Test ICA on a given dataset.''' random.seed(1) # 0. Get data train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] if quickHack: print '!!! Using quickHack !!!' train_set_x = train_set_x[:2500, :] if train_set_y is not None: train_set_y = train_set_y[:2500] if smallImgHack: print '!!! Using smallImgHack !!! (images will be misaligned)' train_set_x = train_set_x[:, :100] print('(%d, %d, %d) %d dimensional examples in (train, valid, test)' % (train_set_x.shape[0], valid_set_x.shape[0], test_set_x.shape[0], train_set_x.shape[1])) nDim = train_set_x.shape[1] imgDim = int(round(sqrt(nDim))) # Might not always be true... randIdxRaw = random.randint(0, nDim, 100) randIdxWhite = random.randint(0, nDim, 100) randIdxSource = random.randint(0, nDim, 100) image = Image.fromarray( tile_raster_images(X=train_set_x, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'data_raw.png')) image.show() pyplot.figure() for ii in range(20): idx = randIdxRaw[ii] pyplot.subplot(4, 5, ii + 1) pyplot.title('raw dim %d' % idx) pyplot.hist(train_set_x[:, idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'data_raw_hist.png')) # 1. Whiten data print 'Whitening data with pca...' pca = PCA(train_set_x) xWhite = pca.toZca(train_set_x) print ' done.' pyplot.figure() for ii in range(20): idx = randIdxWhite[ii] pyplot.subplot(4, 5, ii + 1) pyplot.title('data white dim %d' % idx) pyplot.hist(xWhite[:, idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_hist.png')) image = Image.fromarray( tile_raster_images(X=xWhite, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'data_white.png')) image.show() # 1.1 plot hist pyplot.figure() pyplot.hold(True) pyplot.title('data white 20 random dims') histMax = 0 histMin = 1e10 for ii in range(20): idx = randIdxWhite[ii] hist, binEdges = histogram(xWhite[:, idx], bins=20, density=True) histMax = max(histMax, max(hist)) histMin = min(histMin, min(hist[hist != 0])) # min non-zero entry binMiddles = binEdges[:-1] + (binEdges[1] - binEdges[0]) / 2 #print ' %d from %f to %f' % (ii, min(binMiddles), max(binMiddles)) pyplot.semilogy(binMiddles, hist, '.-') pyplot.axis('tight') ax = looser(pyplot.axis(), semilogy=True) xAbsMax = max(fabs(ax[0:2])) xx = linspace(-xAbsMax, xAbsMax, 100) pyplot.semilogy(xx, mlab.normpdf(xx, 0, 1), 'k', linewidth=3) pyplot.axis((-xAbsMax, xAbsMax, ax[2], ax[3])) if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_log_hist.png')) # 1.2 plot points pyplot.figure() pyplot.hold(True) pyplot.title('data white 20 random dims') nSamples = min(xWhite.shape[0], 1000) print 'data_white_log_points plotted with', nSamples, 'samples.' for ii in range(10): idx = randIdxWhite[ii] pyplot.plot(xWhite[:nSamples, idx], ii + random.uniform(-.25, .25, nSamples), 'o') pyplot.axis('tight') if savedir: pyplot.savefig(os.path.join(savedir, 'data_white_log_points.png')) # 2. Fit ICA rng = random.RandomState(1) ica = FastICA(random_state=rng, whiten=False) print 'Fitting ICA...' ica.fit(xWhite) print ' done.' if savedir: saveToFile(os.path.join(savedir, 'ica.pkl.gz'), ica) print 'Geting sources and mixing matrix...' sourcesWhite = ica.transform(xWhite) # Estimate the sources #S_fica /= S_fica.std(axis=0) # (should already be done) mixingMatrix = ica.get_mixing_matrix() print ' done.' sources = pca.fromZca(sourcesWhite) # 3. Show independent components and inferred sources image = Image.fromarray( tile_raster_images(X=mixingMatrix, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'ic_white.png')) image.show() image = Image.fromarray( tile_raster_images(X=mixingMatrix.T, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'ic_white.T.png')) image.show() image = Image.fromarray( tile_raster_images(X=pca.fromZca(mixingMatrix), img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'ic_raw.png')) image.show() image = Image.fromarray( tile_raster_images(X=pca.fromZca(mixingMatrix.T), img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'ic_raw.T.png')) image.show() pyplot.figure() for ii in range(20): idx = randIdxSource[ii] pyplot.subplot(4, 5, ii + 1) pyplot.title('sourceWhite %d' % idx) pyplot.hist(sourcesWhite[:, idx]) if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_hist.png')) image = Image.fromarray( tile_raster_images(X=sourcesWhite, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'sources_white.png')) image.show() # 3.1 plot hist pyplot.figure() pyplot.hold(True) pyplot.title('sources white 20 random dims') histMax = 0 histMin = 1e10 for ii in range(20): idx = randIdxSource[ii] hist, binEdges = histogram(sourcesWhite[:, idx], bins=20, density=True) histMax = max(histMax, max(hist)) histMin = min(histMin, min(hist[hist != 0])) # min non-zero entry binMiddles = binEdges[:-1] + (binEdges[1] - binEdges[0]) / 2 #print ' %d from %f to %f' % (ii, min(binMiddles), max(binMiddles)) pyplot.semilogy(binMiddles, hist, '.-') pyplot.axis('tight') ax = looser(pyplot.axis(), semilogy=True) xAbsMax = max(fabs(ax[0:2])) xx = linspace(-xAbsMax, xAbsMax, 100) pyplot.semilogy(xx, mlab.normpdf(xx, 0, 1), 'k', linewidth=3) pyplot.axis((-xAbsMax, xAbsMax, ax[2], ax[3])) if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_log_hist.png')) # 3.2 plot points pyplot.figure() pyplot.hold(True) pyplot.title('sources white 20 random dims') nSamples = min(sourcesWhite.shape[0], 1000) print 'sources_white_log_points plotted with', nSamples, 'samples.' for ii in range(10): idx = randIdxWhite[ii] pyplot.plot(sourcesWhite[:nSamples, idx], ii + random.uniform(-.25, .25, nSamples), 'o') pyplot.axis('tight') if savedir: pyplot.savefig(os.path.join(savedir, 'sources_white_log_points.png')) image = Image.fromarray( tile_raster_images(X=sources, img_shape=(imgDim, imgDim), tile_shape=(10, 15), tile_spacing=(1, 1))) if savedir: image.save(os.path.join(savedir, 'sources_raw.png')) image.show() if savedir: print 'plots saved in', savedir else: import ipdb ipdb.set_trace()