def script_sda_detector(resolution): import pymatlab session = pymatlab.session_factory() nruns = 20 partiDetMethod = 'log_detector' for nrun in range(18, nruns + 1): basefilename = '../final_results/baseline_resized/{0:05d}/models/res_baseline_resized_{0:05d}_111111/{1:05d}_{2:03d}_'.format( resolution, nrun, resolution) trainfilename = basefilename + 'train_ids.pkl.gz' valfilename = basefilename + 'val_ids.pkl.gz' trainfinalfilename = basefilename + 'trainfinal_ids.pkl.gz' valfinalfilename = basefilename + 'valfinal_ids.pkl.gz' testfilename = basefilename + 'test_ids.pkl.gz' # [0,max_ids] -> [1,max_ids+1] train_ids = load_savedgzdata(trainfilename) + 1 val_ids = load_savedgzdata(valfilename) + 1 trainfinal_ids = load_savedgzdata(trainfinalfilename) + 1 valfinal_ids = load_savedgzdata(valfinalfilename) + 1 test_ids = load_savedgzdata(testfilename) + 1 print >> sys.stderr, train_ids print >> sys.stderr, val_ids print >> sys.stderr, trainfinal_ids print >> sys.stderr, valfinal_ids print >> sys.stderr, test_ids session.putvalue('partiDetMethod', partiDetMethod) session.putvalue('resolution', str(resolution) + '_' + str(nrun)) session.putvalue('train_ids', train_ids) session.putvalue('val_ids', val_ids) session.putvalue('trainfinal_ids', trainfinal_ids) session.putvalue('valfinal_ids', valfinal_ids) session.putvalue('test_ids', test_ids) mscript = """ data = struct(); data.partiDetMethod = partiDetMethod; data.resolution = resolution; data.train_ids = train_ids; data.val_ids = val_ids; data.trainfinal_ids = trainfinal_ids; data.valfinal_ids = valfinal_ids; data.test_ids = test_ids; res = script_sda_detector( data ) """ session.putvalue('MSCRIPT', mscript) session.run('eval(MSCRIPT)') res = session.getvalue('res') print res
def script_sda_detector(resolution): import pymatlab session = pymatlab.session_factory() nruns = 20 partiDetMethod = 'log_detector' for nrun in range(18,nruns+1): basefilename = '../final_results/baseline_resized/{0:05d}/models/res_baseline_resized_{0:05d}_111111/{1:05d}_{2:03d}_'.format(resolution,nrun,resolution) trainfilename = basefilename + 'train_ids.pkl.gz' valfilename = basefilename + 'val_ids.pkl.gz' trainfinalfilename = basefilename + 'trainfinal_ids.pkl.gz' valfinalfilename = basefilename + 'valfinal_ids.pkl.gz' testfilename = basefilename + 'test_ids.pkl.gz' # [0,max_ids] -> [1,max_ids+1] train_ids = load_savedgzdata(trainfilename)+1 val_ids = load_savedgzdata(valfilename)+1 trainfinal_ids = load_savedgzdata(trainfinalfilename)+1 valfinal_ids = load_savedgzdata(valfinalfilename)+1 test_ids = load_savedgzdata(testfilename)+1 print >> sys.stderr, train_ids print >> sys.stderr, val_ids print >> sys.stderr, trainfinal_ids print >> sys.stderr, valfinal_ids print >> sys.stderr, test_ids session.putvalue('partiDetMethod',partiDetMethod) session.putvalue('resolution',str(resolution) + '_' + str(nrun)) session.putvalue('train_ids',train_ids) session.putvalue('val_ids',val_ids) session.putvalue('trainfinal_ids',trainfinal_ids) session.putvalue('valfinal_ids',valfinal_ids) session.putvalue('test_ids',test_ids) mscript = """ data = struct(); data.partiDetMethod = partiDetMethod; data.resolution = resolution; data.train_ids = train_ids; data.val_ids = val_ids; data.trainfinal_ids = trainfinal_ids; data.valfinal_ids = valfinal_ids; data.test_ids = test_ids; res = script_sda_detector( data ) """ session.putvalue('MSCRIPT', mscript) session.run('eval(MSCRIPT)') res = session.getvalue('res') print res
#!/usr/bin/python import os, string, sys lib_path = os.path.abspath('./TL/') sys.path.append(lib_path) from data_handling import load_savedgzdata basepath_15000 = sys.argv[1] resolution_15000 = '15000' basepath_50000 = sys.argv[2] for nrun in range(1, 21): pathids0 = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format( basepath_15000, nrun, string.atoi(resolution_15000)) pathids1 = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format( basepath_50000, nrun, string.atoi(resolution_15000)) print >> sys.stderr, 'Loading ' + pathids0 + '...' ids0 = load_savedgzdata(pathids0) print >> sys.stderr, 'Loading ' + pathids1 + '...' ids1 = load_savedgzdata(pathids1) print >> sys.stderr, ids0 print >> sys.stderr, ids1 #raw_input()
def load_data(datasetpath, options): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset ''' # if options['oneclass']: # nclasses = 1 # else: # nclasses = 2 nclasses = 2 ############# # LOAD DATA # ############# if options['database'] == 'mnist': train_set, valid_set, test_set = load_savedgzdata('mnist.pkl.gz') train_set_x = numpy.array(train_set[0]) train_set_y = numpy.array(train_set[1]) valid_set_x = numpy.array(valid_set[0]) valid_set_y = numpy.array(valid_set[1]) test_set_x = numpy.array(test_set[0]) test_set_y = numpy.array(test_set[1]) (nelem_train,ndim) = train_set_x.shape (nelem_valid,ndim) = valid_set_x.shape (nelem_test,ndim) = test_set_x.shape dataset = numpy.zeros((options['patchsize']*options['patchsize']+2,nelem_train+nelem_valid+nelem_test)) ids = 0 imgwidth = numpy.sqrt(ndim) #print >> sys.stderr, "train....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, train_set_x, train_set_y) #print >> sys.stderr, "val....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, valid_set_x, valid_set_y) #print >> sys.stderr, "test....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, test_set_x, test_set_y ) nclasses = len(list(set(train_set_y))) return (dataset, options['patchsize']*options['patchsize'], nclasses) elif options['database'] == 'shapes': dataset = load_savedgzdata('shapes.pkl.gz') return(dataset,20*20,4) # -------------------------------------------------------------- # data_dir, data_file = os.path.split(dataset) if not 'all' in options['database']: onlyfiles = [ f for f in listdir(datasetpath) \ if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' and \ options['database'] in f and options['resolution'] in f ) ] else: onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ] # onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ] onlyfiles.sort() first = True ids = 0; for file in onlyfiles: print >> sys.stderr, ( "---> " + datasetpath + file ) f = h5py.File(datasetpath + file,'r') # print f.items(); mpatches = f.get('mpatches') # print mpatches.items(); back = numpy.array( mpatches.get('negative') ) nano = numpy.array( mpatches.get('positive') ) if options['replicate']: nano = numpy.c_[ back, back ] # print >> sys.stderr, back.shape # print >> sys.stderr, nano.shape (back_ndim,back_npoints) = back.shape (nano_ndim,nano_npoints) = nano.shape back[0,:] = back[0,:] + ids nano[0,:] = nano[0,:] + ids ids = max(back[0,:]) # raw_input('> press any key <') back = numpy.r_[ back, numpy.ones((1,back_npoints)) ] nano = numpy.r_[ nano, -1 * numpy.ones((1,nano_npoints)) ] if first: dataset = numpy.c_[ back, nano] first = False else: dataset = numpy.c_[ dataset, back, nano] # print type( dataset ) # raw_input('....') # dataset was constructed according to the following structure # [[ .... ids ....], # [ .... data ...], # [ ..... cls ...]] datasetfilename = 'nanoparticles.npz' #save_gzdata(datasetfilename,dataset) numpy.savez_compressed(datasetfilename,dataset) kakak return (dataset, nano_ndim-1, nclasses)
def TL(source, target=None, path='../gen_patches/dataset_noisy/', retrain=False, retrain_ft_layers=[1, 1, 1, 1, 1, 1], outputfolder='backup', outputfolderres='backup_res', batchsize=1000, sourcemodelspath='./'): """ TO DO: FINISH DOCUMENTATION """ options = { 'sourcemodelspath': sourcemodelspath, 'outputfolder': outputfolder, 'outputfolderres': outputfolderres, 'verbose': 0, 'viewdata': False, 'trainsize': 0.6, 'patchsize': 20, 'measure': 'acc', 'weight': 200, 'datanormalize': True, # ---------- one-class learning 'replicate': False, 'oneclass': False, # ---------- source problem params 'database_source': 'db2', 'resolution_source': source, 'nclasses_source': 2, # TODO: do this automatically # ---------- target problem params 'database_target': 'db2', 'resolution_target': target, # ---------- TL hyperparams 'retrain': retrain, 'retrain_ft_layers': retrain_ft_layers, # ---------- hyperparams 'nruns': 20, 'folds': 3, 'hlayers': [len(retrain_ft_layers) / 2], # X hidden + 1 log layer 'nneurons': [1000], # range(500, 1001, 250), 'pretraining_epochs': [1000], # [200] 'training_epochs': [3000], # [100] 'pretrain_lr': [0.01, 0.001], #[ 0.01, 0.001], 'finetune_lr': [0.1, 0.01], #[ 0.1, 0.01], 'threshold': [0.8], #[ 0.5 , 0.6, 0.8], #numpy.arange(.5,1.01,.1), 'batchsize': [batchsize], #[100] or [1000] depending on the size of the dataset. # ---------- end of hyperparams 'corruptlevels': [0.1], #numpy.arange(0.1, 0.4, 0.1) } print >> sys.stderr, (options), "\n" # ------------------------------------------------------------------------------- datasetpath = path # print argv # print datasetpath # print retrain_ft_layers # alaallala # ------------------------------------------------------------------------------- # load dataset if options['retrain'] == 1: options['database'] = options['database_target'] options['resolution'] = options['resolution_target'] else: options['database'] = options['database_source'] options['resolution'] = options['resolution_source'] (dataset, ndim, nclasses) = load_data(datasetpath, options) options['ndim'] = ndim options['nclasses'] = nclasses # -------------------------------------------------------------------------------------------- for nrun in range(1, options['nruns'] + 1): print >> sys.stderr, ("### {0:03d} of {1:03d}".format( nrun, options['nruns'])) options['numpy_rng'] = numpy.random.RandomState(nrun) options['theano_rng'] = RandomStreams(seed=nrun) # -------------- # generate folds folds = gen_folds(dataset, options, nrun) # continue if options['retrain'] == 1: filename = "{0:s}/{1:05d}_{2:03d}_model.pkl.gz".format( options['sourcemodelspath'], nrun, string.atoi(options['resolution_source'])) print >> sys.stderr, ":: Loading model {0:s}...\n".format(filename) sda_reuse_model = load_savedgzdata(filename) #print sda_reuse_model.logLayer.W.get_value() #print sda_reuse_model.logLayer.W.get_value() #kkk else: sda_reuse_model = None # ---------------------------------------------------------------------------- results = do_experiment(folds, options, nrun, sda_reuse_model) # ---------------------------------------------------------------------------- # -------------------------------------------------- filename = '{0:s}/res_{1:05d}_{2:03d}.pkl.gz'.format( options['outputfolderres'], nrun, string.atoi(options['resolution'])) save_results(filename, results)
def main(resolution, method, pathRes): # load results from LoG imgpathsae = '../../imgs_nanoparticles/{0:03d}/db2/resultado_sae/'.format( string.atoi(resolution)) if method == 'baseline': basepath = './{0:s}/{1:05d}/models/res_baseline_resized_{1:05d}_111111/'.format( pathRes, string.atoi(resolution)) elif method == 'tl': basepath = './{0:s}/{1:05d}/models/res_tl_resized_50000_{1:05d}_111111/'.format( pathRes, string.atoi(resolution)) # annotations annbasepath = '../../imgs_nanoparticles/{0:03d}/db2/annotation/user/'.format( string.atoi(resolution)) annfiles = [ f for f in os.listdir(annbasepath) if re.match(r'[\w\W]*csv', f) ] annfiles = sorted(annfiles) # imgs base paths imgsbasepath = '../../imgs_nanoparticles/{0:03d}/db2/'.format( string.atoi(resolution)) imgspath = os.listdir(imgsbasepath) imgspath = sorted(imgspath) # ------------------------------------------------------------------------------------------------ # TEST DATA PrecisionAll = [] RecallAll = [] PrecisionLoGAll = [] RecallLoGAll = [] nDetectionsAll = [] for nrun in range(1, 21): # print >> sys.stderr, "\n**************************\n" print >> sys.stderr, "NRUN {0:05d}/20 ".format(nrun) filename = '{0:s}/{1:05d}_{2:03d}_model.pkl.gz'.format( basepath, nrun, string.atoi(resolution)) print >> sys.stderr, "Loading " + filename model = load_savedgzdata(filename) # get ids pathids = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format( basepath, nrun, string.atoi(resolution)) print >> sys.stderr, 'Loading ' + pathids + '...' ids = load_savedgzdata(pathids) print >> sys.stderr, ids reg = 'detectedNanoParticlesDetectionResult_log_detector_test_{0:03d}_'.format( nrun) files = [f for f in os.listdir(imgpathsae) if re.match(reg, f)] # order data files = sorted(files) nfiles = len(files) (Precision, Recall, PrecisionLoG, RecallLoG, nDetections) = getPrecisionRecall(nfiles, files, ids, imgpathsae, imgsbasepath, imgspath, annbasepath, annfiles, model, (0, 0, nrun, 0), printImg=True) print >> sys.stderr, "Precision LoG: {0:05f} | Recall LoG: {1:05f}".format( PrecisionLoG, RecallLoG) print >> sys.stderr, "Precision SdA: {0:05f} | Recall SdA: {1:05f}".format( Precision, Recall) # kaka PrecisionAll.append(Precision) RecallAll.append(Recall) PrecisionLoGAll.append(PrecisionLoG) RecallLoGAll.append(RecallLoG) nDetectionsAll.append(nDetections) # --------------------------------------------------------- PrecisionAll = numpy.array(PrecisionAll) RecallAll = numpy.array(RecallAll) PrecisionLoGAll = numpy.array(PrecisionLoGAll) RecallLoGAll = numpy.array(RecallLoGAll) nDetectionsAll = numpy.array(nDetectionsAll) print "--------------------------------------------\n" print "Precision LoG: {0:03f} ({1:03f}) | Recall LoG: {2:03f} ({3:03f})".format( numpy.mean(PrecisionLoGAll), numpy.std(PrecisionLoGAll), numpy.mean(RecallLoGAll), numpy.std(RecallLoGAll)) print "Precision SdA: {0:03f} ({1:03f}) | Recall SdA: {2:03f} ({3:03f})".format( numpy.mean(PrecisionAll), numpy.std(PrecisionAll), numpy.mean(RecallAll), numpy.std(RecallAll)) print "number detections: {0:03f} ({1:03f})".format( numpy.mean(nDetectionsAll), numpy.std(nDetectionsAll)) PrecisionRecall = numpy.c_[PrecisionAll, RecallAll] filename = 'results/sae_{0:s}_{1:s}_test_all.pkl.gz'.format( method, resolution) save_gzdata(filename, PrecisionRecall) PrecisionRecallLoG = numpy.c_[PrecisionLoGAll, RecallLoGAll] filename = 'results/log_{0:s}_{1:s}_test_all.pkl.gz'.format( method, resolution) save_gzdata(filename, PrecisionRecallLoG) PrecisionRecall = numpy.r_[numpy.mean(PrecisionAll), numpy.mean(RecallAll)] filename = 'results/sae_{0:s}_{1:s}_test.pkl.gz'.format(method, resolution) save_gzdata(filename, PrecisionRecall) PrecisionRecallLoG = numpy.r_[numpy.mean(PrecisionLoGAll), numpy.mean(RecallLoGAll)] filename = 'results/log_{0:s}_{1:s}_test.pkl.gz'.format(method, resolution) save_gzdata(filename, PrecisionRecallLoG) filename = 'results/ndetections_{0:s}_{1:s}_test.pkl.gz'.format( method, resolution) save_gzdata(filename, nDetectionsAll)
def TL( source, target = None, path = '../gen_patches/dataset_noisy/', retrain = False, retrain_ft_layers = [1,1,1,1,1,1], outputfolder='backup', outputfolderres='backup_res', batchsize = 1000, sourcemodelspath = './' ): """ TO DO: FINISH DOCUMENTATION """ options = { 'sourcemodelspath' : sourcemodelspath, 'outputfolder' : outputfolder, 'outputfolderres' : outputfolderres, 'verbose' : 0, 'viewdata' : False, 'trainsize' : 0.6, 'patchsize' : 20, 'measure' : 'acc', 'weight' : 200, 'datanormalize' : True, # ---------- one-class learning 'replicate' : False, 'oneclass' : False, # ---------- source problem params 'database_source' : 'db2', 'resolution_source' : source, 'nclasses_source' : 2, # TODO: do this automatically # ---------- target problem params 'database_target' : 'db2', 'resolution_target' : target, # ---------- TL hyperparams 'retrain' : retrain, 'retrain_ft_layers' : retrain_ft_layers, # ---------- hyperparams 'nruns' : 20, 'folds' : 3, 'hlayers' : [len(retrain_ft_layers) / 2], # X hidden + 1 log layer 'nneurons' : [ 1000], # range(500, 1001, 250), 'pretraining_epochs': [ 1000], # [200] 'training_epochs' : [ 3000], # [100] 'pretrain_lr' : [ 0.01, 0.001], #[ 0.01, 0.001], 'finetune_lr' : [ 0.1 , 0.01], #[ 0.1, 0.01], 'threshold' : [0.8], #[ 0.5 , 0.6, 0.8], #numpy.arange(.5,1.01,.1), 'batchsize' : [ batchsize], #[100] or [1000] depending on the size of the dataset. # ---------- end of hyperparams 'corruptlevels' : [0.1], #numpy.arange(0.1, 0.4, 0.1) } print >> sys.stderr, (options), "\n" # ------------------------------------------------------------------------------- datasetpath = path # print argv # print datasetpath # print retrain_ft_layers # alaallala # ------------------------------------------------------------------------------- # load dataset if options['retrain'] == 1: options['database'] = options['database_target'] options['resolution'] = options['resolution_target'] else: options['database'] = options['database_source'] options['resolution'] = options['resolution_source'] (dataset, ndim, nclasses) = load_data( datasetpath, options ) options['ndim'] = ndim options['nclasses'] = nclasses # -------------------------------------------------------------------------------------------- for nrun in range(1,options['nruns']+1): print >> sys.stderr, ("### {0:03d} of {1:03d}".format(nrun,options['nruns'])) options['numpy_rng'] = numpy.random.RandomState(nrun) options['theano_rng'] = RandomStreams(seed=nrun) # -------------- # generate folds folds = gen_folds( dataset, options, nrun ) # continue if options['retrain'] == 1: filename = "{0:s}/{1:05d}_{2:03d}_model.pkl.gz".format(options['sourcemodelspath'], nrun, string.atoi(options['resolution_source'])) print >> sys.stderr, ":: Loading model {0:s}...\n".format(filename) sda_reuse_model = load_savedgzdata ( filename ) #print sda_reuse_model.logLayer.W.get_value() #print sda_reuse_model.logLayer.W.get_value() #kkk else: sda_reuse_model = None # ---------------------------------------------------------------------------- results = do_experiment( folds, options, nrun, sda_reuse_model ) # ---------------------------------------------------------------------------- # -------------------------------------------------- filename = '{0:s}/res_{1:05d}_{2:03d}.pkl.gz'.format(options['outputfolderres'],nrun,string.atoi(options['resolution'])) save_results(filename,results)
def main(resolution,method,pathRes): # load results from LoG imgpathsae = '../../imgs_nanoparticles/{0:03d}/db2/resultado_sae/'.format(string.atoi(resolution)) if method == 'baseline': basepath = './{0:s}/{1:05d}/models/res_baseline_resized_{1:05d}_111111/'.format(pathRes,string.atoi(resolution)) elif method == 'tl': basepath = './{0:s}/{1:05d}/models/res_tl_resized_50000_{1:05d}_111111/'.format(pathRes,string.atoi(resolution)) # annotations annbasepath = '../../imgs_nanoparticles/{0:03d}/db2/annotation/user/'.format(string.atoi(resolution)) annfiles = [f for f in os.listdir(annbasepath) if re.match(r'[\w\W]*csv', f)] annfiles = sorted( annfiles ) # imgs base paths imgsbasepath = '../../imgs_nanoparticles/{0:03d}/db2/'.format(string.atoi(resolution)) imgspath = os.listdir(imgsbasepath) imgspath = sorted( imgspath ) # ------------------------------------------------------------------------------------------------ # TEST DATA PrecisionAll = [] RecallAll = [] PrecisionLoGAll = [] RecallLoGAll = [] nDetectionsAll = [] for nrun in range(1,21): # print >> sys.stderr, "\n**************************\n" print >> sys.stderr, "NRUN {0:05d}/20 ".format(nrun) filename = '{0:s}/{1:05d}_{2:03d}_model.pkl.gz'.format(basepath,nrun,string.atoi(resolution)) print >> sys.stderr, "Loading " + filename model = load_savedgzdata(filename) # get ids pathids = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format(basepath,nrun,string.atoi(resolution)) print >> sys.stderr, 'Loading ' + pathids + '...' ids = load_savedgzdata(pathids) print >> sys.stderr, ids reg = 'detectedNanoParticlesDetectionResult_log_detector_test_{0:03d}_'.format(nrun) files = [f for f in os.listdir(imgpathsae) if re.match(reg, f)] # order data files = sorted( files ) nfiles = len(files) (Precision, Recall, PrecisionLoG,RecallLoG,nDetections) = getPrecisionRecall(nfiles,files,ids,imgpathsae,imgsbasepath,imgspath,annbasepath,annfiles,model,(0,0,nrun,0),printImg=True) print >> sys.stderr, "Precision LoG: {0:05f} | Recall LoG: {1:05f}".format(PrecisionLoG, RecallLoG) print >> sys.stderr, "Precision SdA: {0:05f} | Recall SdA: {1:05f}".format(Precision, Recall) # kaka PrecisionAll.append( Precision ) RecallAll.append( Recall ) PrecisionLoGAll.append( PrecisionLoG ) RecallLoGAll.append( RecallLoG ) nDetectionsAll.append( nDetections ) # --------------------------------------------------------- PrecisionAll = numpy.array( PrecisionAll ) RecallAll = numpy.array( RecallAll ) PrecisionLoGAll = numpy.array( PrecisionLoGAll ) RecallLoGAll = numpy.array( RecallLoGAll ) nDetectionsAll = numpy.array( nDetectionsAll ) print "--------------------------------------------\n" print "Precision LoG: {0:03f} ({1:03f}) | Recall LoG: {2:03f} ({3:03f})".format(numpy.mean(PrecisionLoGAll),numpy.std(PrecisionLoGAll),numpy.mean(RecallLoGAll),numpy.std(RecallLoGAll)) print "Precision SdA: {0:03f} ({1:03f}) | Recall SdA: {2:03f} ({3:03f})".format(numpy.mean(PrecisionAll),numpy.std(PrecisionAll),numpy.mean(RecallAll),numpy.std(RecallAll)) print "number detections: {0:03f} ({1:03f})".format(numpy.mean(nDetectionsAll),numpy.std(nDetectionsAll)) PrecisionRecall = numpy.c_[PrecisionAll,RecallAll] filename = 'results/sae_{0:s}_{1:s}_test_all.pkl.gz'.format(method,resolution) save_gzdata(filename, PrecisionRecall ) PrecisionRecallLoG = numpy.c_[PrecisionLoGAll,RecallLoGAll] filename = 'results/log_{0:s}_{1:s}_test_all.pkl.gz'.format(method,resolution) save_gzdata(filename, PrecisionRecallLoG ) PrecisionRecall = numpy.r_[numpy.mean(PrecisionAll),numpy.mean(RecallAll)] filename = 'results/sae_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution) save_gzdata(filename, PrecisionRecall ) PrecisionRecallLoG = numpy.r_[numpy.mean(PrecisionLoGAll),numpy.mean(RecallLoGAll)] filename = 'results/log_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution) save_gzdata(filename, PrecisionRecallLoG ) filename = 'results/ndetections_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution) save_gzdata(filename, nDetectionsAll )
def load_data(datasetpath, options): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset ''' # if options['oneclass']: # nclasses = 1 # else: # nclasses = 2 nclasses = 2 ############# # LOAD DATA # ############# if options['database'] == 'mnist': train_set, valid_set, test_set = load_savedgzdata('mnist.pkl.gz') train_set_x = numpy.array(train_set[0]) train_set_y = numpy.array(train_set[1]) valid_set_x = numpy.array(valid_set[0]) valid_set_y = numpy.array(valid_set[1]) test_set_x = numpy.array(test_set[0]) test_set_y = numpy.array(test_set[1]) (nelem_train, ndim) = train_set_x.shape (nelem_valid, ndim) = valid_set_x.shape (nelem_test, ndim) = test_set_x.shape dataset = numpy.zeros((options['patchsize'] * options['patchsize'] + 2, nelem_train + nelem_valid + nelem_test)) ids = 0 imgwidth = numpy.sqrt(ndim) #print >> sys.stderr, "train....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, train_set_x, train_set_y) #print >> sys.stderr, "val....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, valid_set_x, valid_set_y) #print >> sys.stderr, "test....", ids (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, test_set_x, test_set_y) nclasses = len(list(set(train_set_y))) return (dataset, options['patchsize'] * options['patchsize'], nclasses) elif options['database'] == 'shapes': dataset = load_savedgzdata('shapes.pkl.gz') return (dataset, 20 * 20, 4) # -------------------------------------------------------------- # data_dir, data_file = os.path.split(dataset) if not 'all' in options['database']: onlyfiles = [ f for f in listdir(datasetpath) \ if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' and \ options['database'] in f and options['resolution'] in f ) ] else: onlyfiles = [ f for f in listdir(datasetpath) if (isfile(join(datasetpath, f)) and splitext(f)[1] == '.mat') ] # onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ] onlyfiles.sort() first = True ids = 0 for file in onlyfiles: print >> sys.stderr, ("---> " + datasetpath + file) f = h5py.File(datasetpath + file, 'r') # print f.items(); mpatches = f.get('mpatches') # print mpatches.items(); back = numpy.array(mpatches.get('negative')) nano = numpy.array(mpatches.get('positive')) if options['replicate']: nano = numpy.c_[back, back] # print >> sys.stderr, back.shape # print >> sys.stderr, nano.shape (back_ndim, back_npoints) = back.shape (nano_ndim, nano_npoints) = nano.shape back[0, :] = back[0, :] + ids nano[0, :] = nano[0, :] + ids ids = max(back[0, :]) # raw_input('> press any key <') back = numpy.r_[back, numpy.ones((1, back_npoints))] nano = numpy.r_[nano, -1 * numpy.ones((1, nano_npoints))] if first: dataset = numpy.c_[back, nano] first = False else: dataset = numpy.c_[dataset, back, nano] # print type( dataset ) # raw_input('....') # dataset was constructed according to the following structure # [[ .... ids ....], # [ .... data ...], # [ ..... cls ...]] datasetfilename = 'nanoparticles.npz' #save_gzdata(datasetfilename,dataset) numpy.savez_compressed(datasetfilename, dataset) kakak return (dataset, nano_ndim - 1, nclasses)