def test_saved_model_folder(dirname,feats,output,filt=False): """ Test a saved model by loading it and applying to features. Output is the output file, used with print_write() RETURN avgerage dist nPatterns nIters totaltime """ print_write('*** MODEL SAVED IN: '+dirname+' ***',output) if filt: print_write('model uses FILTERING',output) # load model model = ANALYZE.unpickle(os.path.join(dirname,'model.p')) print_write('model loaded',output) # find nIters (#tracks), nPatterns, totaltime nIters, nPatterns, totalTime = ANALYZE.traceback_stats(dirname) if filt: nPatterns = model._nPatternUsed print_write('nIters (=nTracks): '+str(nIters),output) print_write('nPatterns: '+str(nPatterns),output) print_write('total time ran: '+str(totalTime),output) # predict best_code_per_p, dists = model.predicts(feats) print_write('prediction done, avg. dist: '+str(np.average(dists)),output) # return return np.average(dists),nPatterns,nIters,totalTime
def test_saved_model_folder(dirname, feats, output): """ Test a saved model by loading it and applying to features. Output is the output file, used with print_write() RETURN avgerage dist nPatterns nIters totaltime """ print_write('*** MODEL SAVED IN: ' + dirname + ' ***', output) # load model model = ANALYZE.unpickle(os.path.join(dirname, 'model.p')) print_write('model loaded', output) # find nIters (#tracks), nPatterns, totaltime nIters, nPatterns, totalTime = ANALYZE.traceback_stats(dirname) print_write('nIters (=nTracks): ' + str(nIters), output) print_write('nPatterns: ' + str(nPatterns), output) print_write('total time ran: ' + str(totalTime), output) # predict best_code_per_p, dists = model.predicts(feats) print_write('prediction done, avg. dist: ' + str(np.average(dists)), output) # return return np.average(dists), nPatterns, nIters, totalTime
def analyze_one_batch_of_models(savedmodel, matfilesdir, output, filt=False): """ Main job, from a saved model, find other saved model in same dir, test them all, return plotting info (nSamples and dist) """ # hack if savedmodel[-1] == os.path.sep: savedmodel = savedmodel[:-1] # GET SAVED MODELS parentdir,tmp = os.path.split(savedmodel) # traceback tb = ANALYZE.traceback(savedmodel) # find everything in parent folder, then just folders all_in_folder = glob.glob(os.path.join(parentdir,'*')) all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder) # keep those that have same origin leaves = filter(lambda x: ANALYZE.traceback(x)[0]==tb[0],all_in_folder) # everything to test, matfile at the end all_to_test = set() for f in tb: if os.path.isdir(f): all_to_test.add(f) for f in leaves: all_to_test.add(f) all_to_test = list(all_to_test) # GET PARAMS / LOAD DATA params = ANALYZE.unpickle(os.path.join(savedmodel,'params.p')) oracle = ORACLE.OracleMatfiles(params,matfilesdir,oneFullIter=True) data = [x for x in oracle] data = filter(lambda x: x != None, data) data = np.concatenate(data) data = data[np.where(np.sum(data,axis=1)>0)] if data.shape[0] == 0: print_write('No patterns loaded, quit.',output) sys.exit(0) # PREDICT ON EVERY MODEL dists = [] patterns = [] for f in all_to_test: a,b,c,d = test_saved_model_folder(f,data,output,filt=filt) dist,nPatterns,nIters,totalTime = a,b,c,d dists.append(dist) patterns.append(nPatterns) # delete data before plotting del data # plot data dists = np.array(dists) patterns = np.array(patterns) order = np.argsort(patterns) # return 2 lists: nPatterns,dists and filt ordered by increasing patterns return patterns[order],dists[order]
def analyze_one_batch_of_models(savedmodel, matfilesdir, output, filt=False): """ Main job, from a saved model, find other saved model in same dir, test them all, return plotting info (nSamples and dist) """ # hack if savedmodel[-1] == os.path.sep: savedmodel = savedmodel[:-1] # GET SAVED MODELS parentdir, tmp = os.path.split(savedmodel) # traceback tb = ANALYZE.traceback(savedmodel) # find everything in parent folder, then just folders all_in_folder = glob.glob(os.path.join(parentdir, '*')) all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder) # keep those that have same origin leaves = filter(lambda x: ANALYZE.traceback(x)[0] == tb[0], all_in_folder) # everything to test, matfile at the end all_to_test = set() for f in tb: if os.path.isdir(f): all_to_test.add(f) for f in leaves: all_to_test.add(f) all_to_test = list(all_to_test) # GET PARAMS / LOAD DATA params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p')) oracle = ORACLE.OracleMatfiles(params, matfilesdir, oneFullIter=True) data = [x for x in oracle] data = filter(lambda x: x != None, data) data = np.concatenate(data) data = data[np.where(np.sum(data, axis=1) > 0)] if data.shape[0] == 0: print_write('No patterns loaded, quit.', output) sys.exit(0) # PREDICT ON EVERY MODEL dists = [] patterns = [] for f in all_to_test: a, b, c, d = test_saved_model_folder(f, data, output, filt=filt) dist, nPatterns, nIters, totalTime = a, b, c, d dists.append(dist) patterns.append(nPatterns) # delete data before plotting del data # plot data dists = np.array(dists) patterns = np.array(patterns) order = np.argsort(patterns) # return 2 lists: nPatterns,dists and filt ordered by increasing patterns return patterns[order], dists[order]
all_in_folder = glob.glob(os.path.join(parentdir,'*')) all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder) # keep those that have same origin leaves = filter(lambda x: ANALYZE.traceback(x)[0]==tb[0],all_in_folder) # everything to test, matfile at the end all_to_testNONFILT = set() for f in tb: if os.path.isdir(f): all_to_testNONFILT.add(f) for f in leaves: all_to_testNONFILT.add(f) all_to_testNONFILT = list(all_to_testNONFILT) # GET PARAMS / LOAD DATA params = ANALYZE.unpickle(os.path.join(savedmodelFILT,'params.p')) oracle = ORACLE.OracleMatfiles(params,matfilesdir,oneFullIter=True) data = [x for x in oracle] data = filter(lambda x: x != None, data) data = np.concatenate(data) data = data[np.where(np.sum(data,axis=1)>0)] if data.shape[0] == 0: print_write('No patterns loaded, quit.',output) sys.exit(0) # PREDICT ON EVERY MODEL FILT distsFILT = [] patternsFILT = [] for f in all_to_testFILT: a,b,c,d = test_saved_model_folder(f,data,output,filt=True) dist,nPatterns,nIters,totalTime = a,b,c,d
def analyze_one_exp_dir(expdir, validdir, testdir, autobar=False, most_recent=False): """ Analyze one experiment dir. This directory contains many subdirectory, for all the saved models. Check every saved model with validdata (numpy array), and test the best one on the test data. Returns numbers: patternsize, codebook size, distortion error, best saved model """ # get all subdirs alldirs = glob.glob(os.path.join(expdir, "*")) if len(alldirs) > 0: alldirs = filter(lambda x: os.path.isdir(x), alldirs) alldirs = filter(lambda x: os.path.split(x)[-1][:4] == "exp_", alldirs) # trim badly saved models alldirs = filter(lambda x: check_saved_model_full(x, False), alldirs) if len(alldirs) == 0: print "no saved model found in:", expdir return None, None, None, None # get params savedmodel = np.sort(alldirs)[-1] params = ANALYZE.unpickle(os.path.join(savedmodel, "params.p")) # if test only one model, the most recent if most_recent: alldirs = [savedmodel] # load valid data if not autobar: oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True) # if autobar: # oracle.use_autobar_in_iterator(savedmodel) validdata = [x for x in oracle] validdata = filter(lambda x: x != None, validdata) validdata = np.concatenate(validdata) assert validdata.shape[1] > 0, "no valid data??" # load test data if not autobar: if validdir != testdir: oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True) testdata = [x for x in oracle] testdata = filter(lambda x: x != None, testdata) testdata = np.concatenate(testdata) assert testdata.shape[1] > 0, "no valid data??" else: testdata = validdata # test all subdirs with valid data, keep the best best_model = "" best_dist = np.inf for sm in alldirs: model = ANALYZE.unpickle(os.path.join(sm, "model.p")) # IF AUTOBAR LOAD DATA NOW oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True) oracle.use_autobar_in_iterator(model) validdata = [x for x in oracle] validdata = filter(lambda x: x != None, validdata) validdata = np.concatenate(validdata) assert validdata.shape[1] > 0, "no valid data??" #### codewords, dists = model.predicts(validdata) avg_dist = np.average(dists) if avg_dist < best_dist: best_model = sm best_dist = avg_dist assert best_model != "", "no data found???" if testdir == validdir: # we're done # load model, verbose model = ANALYZE.unpickle(os.path.join(best_model, "model.p")) print "best model:", best_model, " ( dist =", best_dist, ")" # return patternsize, codebook size, distortion errror, best saved model return validdata.shape[1] / 12, model._codebook.shape[0], best_dist, best_model # test with test data model = ANALYZE.unpickle(os.path.join(best_model, "model.p")) # IF AUTOBAR LOAD DATA NOW oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True) oracle.use_autobar_in_iterator(model) testdata = [x for x in oracle] testdata = filter(lambda x: x != None, testdata) testdata = np.concatenate(testdata) assert testdata.shape[1] > 0, "no valid data??" #### codewords, dists = model.predicts(testdata) avg_dist = np.average(dists) print "best model:", best_model, " ( dist =", avg_dist, ")" # return patternsize, codebook size, distortion errror, best saved model return testdata.shape[1] / 12, model._codebook.shape[0], avg_dist, best_model
def analyze_one_exp_dir(expdir, validdir, testdir, autobar=False, most_recent=False): """ Analyze one experiment dir. This directory contains many subdirectory, for all the saved models. Check every saved model with validdata (numpy array), and test the best one on the test data. Returns numbers: patternsize, codebook size, distortion error, best saved model """ # get all subdirs alldirs = glob.glob(os.path.join(expdir, '*')) if len(alldirs) > 0: alldirs = filter(lambda x: os.path.isdir(x), alldirs) alldirs = filter(lambda x: os.path.split(x)[-1][:4] == 'exp_', alldirs) # trim badly saved models alldirs = filter(lambda x: check_saved_model_full(x, False), alldirs) if len(alldirs) == 0: print 'no saved model found in:', expdir return None, None, None, None # get params savedmodel = np.sort(alldirs)[-1] params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p')) # if test only one model, the most recent if most_recent: alldirs = [savedmodel] # load valid data if not autobar: oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True) #if autobar: # oracle.use_autobar_in_iterator(savedmodel) validdata = [x for x in oracle] validdata = filter(lambda x: x != None, validdata) validdata = np.concatenate(validdata) assert validdata.shape[1] > 0, 'no valid data??' # load test data if not autobar: if validdir != testdir: oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True) testdata = [x for x in oracle] testdata = filter(lambda x: x != None, testdata) testdata = np.concatenate(testdata) assert testdata.shape[1] > 0, 'no valid data??' else: testdata = validdata # test all subdirs with valid data, keep the best best_model = '' best_dist = np.inf for sm in alldirs: model = ANALYZE.unpickle(os.path.join(sm, 'model.p')) # IF AUTOBAR LOAD DATA NOW oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True) oracle.use_autobar_in_iterator(model) validdata = [x for x in oracle] validdata = filter(lambda x: x != None, validdata) validdata = np.concatenate(validdata) assert validdata.shape[1] > 0, 'no valid data??' #### codewords, dists = model.predicts(validdata) avg_dist = np.average(dists) if avg_dist < best_dist: best_model = sm best_dist = avg_dist assert best_model != '', 'no data found???' if testdir == validdir: # we're done # load model, verbose model = ANALYZE.unpickle(os.path.join(best_model, 'model.p')) print 'best model:', best_model, ' ( dist =', best_dist, ')' # return patternsize, codebook size, distortion errror, best saved model return validdata.shape[1] / 12, model._codebook.shape[ 0], best_dist, best_model # test with test data model = ANALYZE.unpickle(os.path.join(best_model, 'model.p')) # IF AUTOBAR LOAD DATA NOW oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True) oracle.use_autobar_in_iterator(model) testdata = [x for x in oracle] testdata = filter(lambda x: x != None, testdata) testdata = np.concatenate(testdata) assert testdata.shape[1] > 0, 'no valid data??' #### codewords, dists = model.predicts(testdata) avg_dist = np.average(dists) print 'best model:', best_model, ' ( dist =', avg_dist, ')' # return patternsize, codebook size, distortion errror, best saved model return testdata.shape[1] / 12, model._codebook.shape[ 0], avg_dist, best_model
# everything to test, matfile at the end all_to_test = set() for f in tb: if os.path.isdir(f): all_to_test.add(f) for f in leaves: all_to_test.add(f) all_to_test = list(all_to_test) print_write('all models to try:', output) for f in all_to_test: print_write(str(f), output) print_write('number of models to test: ' + str(len(all_to_test)), output) #****************************************************************** # get params params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p')) print_write('PARAMS:', output) for k in params.keys(): print_write(str(k) + ' : ' + str(params[k]), output) # load data into memory oracle = ORACLE.OracleMatfiles(params, matfilesdir, oneFullIter=True) # get all features data = [x for x in oracle] print_write('retrieved ' + str(len(data)) + ' tracks.', output) # get none none features data = filter(lambda x: x != None, data) print_write(str(len(data)) + ' tracks not None remaining.', output) # transform into numpy array data = np.concatenate(data) print_write(str(data.shape[0]) + ' patterns loaded.', output) # remove empty patterns