def train_Softmax(C, dataFile, X, Y, testX, testY, pooledFile, imageDim, sgd, save=True, prefix=""): cfg = config.Config() data_path = cfg.paths['data'] if sgd: raise NotImplementedError else: SFC = SoftMaxClassifier(X.T, Y, LAMBDA=C, maxiter=10000) print(SFC._architecture) sfcFile = data_path + "classifiers/%sSoftMax_lambda%e_%s.pkl" % \ (prefix, C, pooledFile.split("/")[-1].split(".")[0]) try: SFC = pickle.load(open(sfcFile, "rb")) SFC = SoftMaxClassifier(input=None, targets=None, saveFile=sfcFile) print("[*] trained classifier found.") print("[*] trained classifier loaded.") except IOError: print("[*] Training Softmax Classifier with LAMBDA=%e" % (C)) SFC.train() print("[+] classifier trained.") if save: print("[+] saving classifier") #pickle.dump(SFC, open(sfcFile, "wb")) SFC.saveNetwork(sfcFile) #pred = SFC.predict(X.T) #acc = pred == Y.T #acc = np.sum(acc)/float(np.shape(acc)[0]) #print 'Accuracy: %2.3f%%\n'% (acc * 100) #pred = SFC.predict(testX.T) #acc = pred == testY.T #acc = np.sum(acc)/float(np.shape(acc)[0]) #print 'Accuracy: %2.3f%%\n'% (acc * 100) pred = SFC.predict(testX.T).T indices = np.argmax(pred, axis=1) pred = np.max(pred, axis=1) pred[indices == 0] = 1 - pred[indices == 0] fpr, tpr, thresholds = roc_curve(testY, pred) FoM = 1 - tpr[np.where(fpr <= 0.01)[0][-1]] print("[+] FoM: %.4f" % (FoM)) threshold = thresholds[np.where(fpr <= 0.01)[0][-1]] print("[+] threshold: %.4f" % (threshold)) return FoM, threshold
def checkGradients(): def costFunction(W, *args): def l2row(X): n, m = np.shape(X) N = np.sqrt(np.sum(np.multiply(X, X), axis=1) + 1e-8) N_stack = np.tile(N, (m, 1)).T Y = np.divide(X, N_stack) return Y, N def l2rowg(X, Y, N, D): n, m = np.shape(X) N_stack = np.tile(N, (m, 1)).T firstTerm = np.divide(D, N_stack) sum = np.sum(np.multiply(D, X), 1) sum = sum / (np.multiply(N, N)) sum_stack = np.tile(sum[np.newaxis], (np.shape(Y)[1], 1)).T secondTerm = np.multiply(Y, sum_stack) return firstTerm - secondTerm X = args[0] n, m = np.shape(X) W = np.reshape(W, (k, n), order="F") # Feed forward F = np.dot(W, X) Fs = np.sqrt(np.multiply(F, F) + 1e-8) NFs, L2Fs = l2row(Fs) Fhat, L2Fn = l2row(NFs.T) # Compute objective function return np.sum(Fhat) k = 40 n = 20 # initialise #W = np.array([[1,2],[3,4],[5,6],[7,8]])/10.0 W = np.random.rand(int(k), int(n)) #print np.shape(W) W = np.ravel(W, order="F") cfg = config.Config() data_path = cfg.paths['data'] dataFile = data_path + "naturalImages_patches_8x8.mat" data = sio.loadmat(dataFile) X = data["patches"][:n, :20] args = X, k sf = SparseFilter(k, 1) cost, grad = sf.objective(X, W) numgrad = computeNumericalGradient(costFunction, W, *args) for i in range(len(numgrad)): print("%d\t%f\t%f" % (i, numgrad[i], grad[i])) print("The above two columns you get should be very similar.") print("(Left-Your Numerical Gradient, Right-Analytical Gradient)") print() print("If your backpropagation implementation is correct, then") print("the relative difference will be small (less than 1e-9). ") diff = numgrad - grad #print "Relative Difference: %f" % diff print(diff)
def main(): parser = optparse.OptionParser("[!] usage: python analyse_RF.py -F <data file>"+\ " -c <classifier file> -s <data set>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") parser.add_option("-c", dest="classifierFile", type="string", \ help="specify classifier to use") parser.add_option("-s", dest="dataSet", type="string", \ help="specify data set to analyse ([training] or [test] set)") (options, args) = parser.parse_args() dataFile = options.dataFile classifierFile = options.classifierFile dataSet = options.dataSet # TODO: remove, only for testing if False: cfg = config.Config() data_path = cfg.paths['data'] dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" classifierFile = data_path + "classifiers/RF_n_estimators100_max_" + \ "features10_min_samples_leaf1_3pi_20x20_skew2_signPreserveNorm.pkl" dataSet = 'test' print() if dataFile == None or classifierFile == None or dataSet == None: print(parser.usage) exit(0) if dataSet != "training" and dataSet != "test": print("[!] Exiting: data set must be 1 of 'training' or 'test'") exit(0) try: data = sio.loadmat(dataFile) except IOError: print("[!] Exiting: %s Not Found" % (dataFile)) exit(0) if dataSet == "training": X = np.nan_to_num(data["X"]) y = np.squeeze(data["y"]) elif dataSet == "test": X = np.nan_to_num(data["testX"]) y = np.squeeze(data["testy"]) try: classifier = pickle.load(open(classifierFile, "rb")) except IOError: print("[!] Exiting: %s Not Found" % (classifierFile)) exit(0) measure_FoM(X, y, classifier)
def main(): parser = optparse.OptionParser( "[!] usage: python cross_validate_RF.py -F <data file>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") (options, args) = parser.parse_args() dataFile = options.dataFile cfg = config.Config() data_path = cfg.paths['data'] dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" if dataFile == None: print(parser.usage) exit(0) data = sio.loadmat(dataFile) #scaler = preprocessing.StandardScaler().fit(data["X"]) #X = scaler.transform(np.concatenate((data["X"], data["validX"]))) X = np.nan_to_num(data["X"]) m, n = np.shape(X) y = np.squeeze(data["y"]) #y = np.squeeze(np.concatenate((data["y"], data["validy"]))) n_estimators_grid = [100, 10] max_features_grid = [10, 25] min_samples_leaf_grid = [1, 2, 5] kf = KFold(m, n_folds=5) fold = 1 for n_estimators in n_estimators_grid: for max_features in max_features_grid: for min_samples_leaf in min_samples_leaf_grid: fold = 1 FoMs = [] for train, test in kf: print("[*]", fold, n_estimators, max_features, min_samples_leaf) file = data_path + "classifiers/cv/RF_n_estimators"+str(n_estimators)+"_max_features"+str(max_features)+\ "_min_samples_leaf"+str(min_samples_leaf)+"_"+dataFile.split("/")[-1].split(".")[0]+\ "_fold"+str(fold)+".pkl" try: rf = pickle.load(open(file, "rb")) except IOError: train_x, train_y = X[train], y[train] rf = train_RF(train_x, train_y, n_estimators, max_features, min_samples_leaf) outputFile = open(file, "wb") pickle.dump(rf, outputFile) FoM, threshold = measure_FoM(X[test], y[test], rf, False) fold += 1 FoMs.append(FoM) print("[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs)))) print()
def main(): parser = optparse.OptionParser( "[!] usage: python cross_validate_SVM.py -F <data file>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") (options, args) = parser.parse_args() dataFile = options.dataFile # TODO: remove only for testing if False: cfg = config.Config() data_path = cfg.paths['data'] data_file_standard = cfg.paths['data_file_standard'] dataFile = data_path + data_file_standard if dataFile == None: print(parser.usage) exit(0) data = sio.loadmat(dataFile) X = data["X"] m, n = np.shape(X) y = np.squeeze(data["y"]) kernel_grid = ["rbf"] C_grid = [5] gamma_grid = [1] kf = KFold(m, n_folds=5) fold = 1 for kernel in kernel_grid: for C in C_grid: for gamma in gamma_grid: fold = 1 FoMs = [] for train, test in kf: print("[*]", fold, kernel, C, gamma) file = data_path + "classifiers/cv/SVM_kernel"+str(kernel)+"_C"+str(C)+\ "_gamma"+str(gamma)+"_"+dataFile.split("/")[-1].split(".")[0]+\ "_fold"+str(fold)+".pkl" try: svm = pickle.load(open(file, "rb")) except IOError: train_x, train_y = X[train], y[train] svm = train_SVM(train_x, train_y, kernel, C, gamma) outputFile = open(file, "wb") pickle.dump(svm, outputFile) FoM, threshold = measure_FoM(X[test], y[test], svm, False) fold += 1 FoMs.append(FoM) print("[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs)))) print()
def main(): parser = optparse.OptionParser("[!] usage: python analyse_SVM.py -F <data file>"+\ " -c <classifier file> -s <data set>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") parser.add_option("-c", dest="classifierFile", type="string", \ help="specify classifier to use") parser.add_option("-s", dest="dataSet", type="string", \ help="specify data set to analyse ([training] or [test] set)") (options, args) = parser.parse_args() dataFile = options.dataFile classifierFile = options.classifierFile dataSet = options.dataSet cfg = config.Config() dataFile = cfg.paths['data'] + cfg.paths['data_file_standard'] classifierFile = cfg.paths['data'] + "classifiers/" + \ "SVM_kernelrbf_C1.0_gamma0.0025_3pi_20x20_skew2_signPreserveNorm.pkl" dataSet = "test" print() if dataFile == None or classifierFile == None or dataSet == None: print(parser.usage) exit(0) if dataSet != "training" and dataSet != "test": print("[!] Exiting: data set must be 1 of 'training' or 'test'") exit(0) try: data = sio.loadmat(dataFile) except IOError: print("[!] Exiting: %s Not Found" % (dataFile)) exit(0) if dataSet == "training": X = data["X"] y = np.squeeze(data["y"]) elif dataSet == "test": X = data["testX"] y = np.squeeze(data["testy"]) try: classifier = pickle.load(open(classifierFile, "rb")) except IOError: print("[!] Exiting: %s Not Found" % (classifierFile)) exit(0) measure_FoM(X, y, classifier)
def main(): #checkGradients() cfg = config.Config() data_path = cfg.paths['data'] dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" #dataFile = "/Users/dew/development/PS1-Real-Bogus/data/3pi/"+\ # "patches_3pi_20x20_signPreserveNorm_8x8_10.mat" data = sio.loadmat(dataFile) #X = data["patches"][:40000,:].T X = data["X"].T sf = SparseFilter() sf.fit(X) sf.saveSF(data_path + "trained_sparseFilters/SF_256_" + dataFile.split("/")[-1].split(".")[0] + ".mat") sf.visualiseLearnedFeatures()
def get_sparseFilter(numFeatures, patches, patchesFile, maxiter=100): try: # added maxiter to filename 24/02/15 cfg = config.Config() data_path = cfg.paths['data'] sf_file = data_path + "trained_sparseFilters/SF_%d_%s_maxiter%d.mat" % \ (numFeatures, patchesFile.split("/")[-1].split(".")[0], maxiter) print(sf_file) SF = SparseFilter(saveFile=sf_file) print("[*] Trained sparse filter loaded.") except IOError: print("[*] Could not find trained sparse filter.") print("[+] Training sparse filter ... ") SF = SparseFilter(k=numFeatures, maxiter=maxiter) SF.fit(patches) SF.saveSF(sf_file) print("[+] Sparse filter trained") SF.visualiseLearnedFeatures() return SF
def main(argv=None): cfg = config.Config() data_path = cfg.paths['data'] if argv is None: argv = sys.argv if len(argv) != 5: sys.exit("Usage: train_RF.py <n_estimators> <max_features>" +\ " <min_samples_leaf> <.mat file>") n_estimators = int(argv[1]) max_features = int(argv[2]) min_samples_leaf = int(argv[3]) dataFile = argv[4] # TODO: Remove, only for testing if False: n_estimators = 100 max_features = 10 min_samples_leaf = 1 dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" data = sio.loadmat(dataFile) #train_x = np.concatenate((data["X"], data["validX"])) #train_y = np.squeeze(np.concatenate((data["y"], data["validy"]))) train_x = np.nan_to_num(data["X"]) train_y = np.squeeze(data["y"]) rf = train_RF(train_x, train_y, n_estimators, max_features, min_samples_leaf) outputFile = open(data_path + "classifiers/RF_n_estimators"+str(n_estimators)+\ "_max_features"+str(max_features)+\ "_min_samples_leaf"+str(min_samples_leaf)+\ "_"+dataFile.split("/")[-1].split(".")[0]+".pkl", "wb") pickle.dump(rf, outputFile)
def main(): parser = optparse.OptionParser("[!] usage: python classify.py\n"+\ " -F <data files [comma-separated]>\n"+\ " -c <classifier files [comma-separated]>\n"+\ " -t <threshold [default=0.5]>\n"+\ " -s <data set>\n"+\ " -o <output file>\n"+\ " -f <figure of merit [\"fpr\" or \"mdr\"]>" " -p <plot hypothesis distribution [optional]>\n"+\ " -r <plot ROC curve [optional]>\n"+\ " -n <classify by name [optional]>\n"+\ " -P <pooled features file [optional]>\n"+\ " -L <plot learning curve [optional]>\n"+\ " -l <labels for plotting [optional, comma-separated]>\n"+\ " -m <print miss classified file names>") parser.add_option("-F", dest="dataFiles", type="string", \ help="specify data file[s] to analyse") parser.add_option("-c", dest="classifierFiles", type="string", \ help="specify classifier[s] to use") parser.add_option("-t", dest="threshold", type="float", \ help="specify decision boundary threshold [default=0.5]") parser.add_option("-o", dest="outputFile", type="string", \ help="specify output file") parser.add_option("-f", dest="fom", type="string", \ help="specify the figure of merit either 1% FPR or 1% MDR - choose \"fpr\" or \"mdr\"") parser.add_option("-s", dest="dataSet", type="string", \ help="specify data set to analyse [default=test]") parser.add_option("-p", action="store_true", dest="plot", \ help="specify whether to plot the hypothesis distribution [optional]") parser.add_option("-r", action="store_true", dest="roc", \ help="specify whether to plot the ROC curve [optional]") parser.add_option("-n", action="store_true", dest="byName", \ help="specify whether to classify objects by name [optional]") parser.add_option("-P", dest="poolFile", type="string", \ help="specify pooled features file [optional]") parser.add_option("-L", action="store_true", dest="learningCurve", \ help="specify whether to generate a leraning curve [optional]") parser.add_option("-l", dest="labels", type="string", \ help="specify label[s] for plots [optional]") parser.add_option("-m", action="store_true", dest="miss", \ help="specify whether or not to print misclassified file names [optional]") (options, args) = parser.parse_args() ## TODO: Test by defining arguments if False: cfg = config.Config() data_path = cfg.paths['data'] dataFiles = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" + "," \ data_path + "3pi_20x20_skew2_signPreserveNorm.mat" + "," \ data_path + "3pi_20x20_skew2_signPreserveNorm.mat" classifierFiles = patchesFile = data_path + "patches_stl-10_unlabeled_meansub_20150409_psdb_6x6.mat" imageDim = 20 imageChannels = 1 patchDim = 6 numFeatures = 20 poolDim = 5 stepSize = 20 try: dataFiles = options.dataFiles.split(",") classifierFiles = options.classifierFiles.split(",") threshold = options.threshold outputFile = options.outputFile fom = options.fom dataSet = options.dataSet plot = options.plot roc = options.roc byName = options.byName poolFile = options.poolFile learningCurve = options.learningCurve miss = options.miss try: labels = options.labels.split(",") except: labels = None except AttributeError as e: print(e) print(parser.usage) exit(0) if dataFiles == None or classifierFiles == None: print(parser.usage) exit(0) if threshold == None: threshold = 0.5 if dataSet == None: dataSet = "test" if fom == "fpr": fom_func = one_percent_fpr elif fom == "mdr": fom_func = one_percent_mdr else: fom_func = one_percent_fpr Xs = [] Ys = [] Files = [] for dataFile in dataFiles: data = sio.loadmat(dataFile) print("[+] %s" % dataFile) X = np.nan_to_num(data["X"]) #scaler = preprocessing.StandardScaler(with_std=False).fit(X) if dataSet == "test": try: Xs.append(np.nan_to_num(data["testX"])) #Xs.append(scaler.transform(data["testX"])) Ys.append(np.squeeze(data["testy"])) Files.append(data["test_files"]) except KeyError: if plot: y = np.zeros((np.shape(X)[0],)) else: print("[!] Exiting: Could not load test set from %s" % dataFile) exit(0) elif dataSet == "training": try: Xs.append(np.nan_to_num(data["X"])) #Xs.append(np.squeeze(np.concatenate((data["X"], data["testX"])))) try: #Ys.append(np.squeeze(np.concatenate((data["y"], data["testy"])))) if -1 in data["y"]: print(np.squeeze(np.where(data["y"] != -1)[1])) Ys.append(np.squeeze(data["y"][np.where(data["y"] != -1)])) else: Ys.append(np.squeeze(data["y"])) except KeyError: if fom: print("[!] Exiting: Could not load labels from %s" % dataFile) print("[*] FoM calculation is not possible without labels.") exit(0) else: Ys.append(np.zeros((np.shape(X)[0],))) Files.append(data["images"]) except KeyError: try: Files.append(data["train_files"]) except KeyError as e: print(e) try: Files.append(data["files"]) except KeyError as e: print(e) print("[!] Exiting: Could not load training set from %s" % dataFile) exit(0) elif dataSet == "all": try: Xs.append(np.nan_to_num(np.concatenate((data["X"], data["testX"])))) try: Ys.append(np.squeeze(np.concatenate((data["y"], data["testy"])))) except KeyError: if fom: print("[!] Exiting: Could not load labels from %s" % dataFile) print("[*] FoM calculation is not possible without labels.") exit(0) else: Ys.append(np.zeros((np.shape(Xs[0])[0],))) Files.append(np.squeeze(np.concatenate((data["images"], data["test_files"])))) except KeyError: try: Files.append(np.squeeze(np.concatenate((data["train_files"], data["test_files"])))) except KeyError as e: print(e) try: Files.append(np.squeeze(np.concatenate((data["files"], data["test_files"])))) except KeyError as e: print(e) print("[!] Exiting: Could not load training set from %s" % dataFile) exit(0) else: print("[!] Exiting: %s is not a valid choice, choose one of \"training\" or \"test\"" % dataSet) exit(0) preds = [] for classifierFile in classifierFiles: dataFile = dataFiles[classifierFiles.index(classifierFile)] try: predFile = predictionsPath+classifierFile.split("/")[-1].replace(".pkl","")+"_predictions_%s_%s.mat"%(dataFile.split("/")[-1].replace(".mat",""), dataSet) preds.append(np.squeeze(sio.loadmat(predFile)["predictions"])) except IOError: if poolFile != None: Xs = [] try: features = sio.loadmat(poolFile) try: pooledFeaturesTrain = features["pooledFeaturesTrain"] except KeyError: pooledFeaturesTrain = features["pooledFeatures"] X = np.transpose(pooledFeaturesTrain, (0,2,3,1)) numTrainImages = np.shape(X)[3] X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \ numTrainImages), order="F") scaler = preprocessing.MinMaxScaler() scaler.fit(X.T) # Don't cheat - fit only on training data X = scaler.transform(X.T) if dataSet == "training": pass elif dataSet == "test": pooledFeaturesTest = features["pooledFeaturesTest"] X = np.transpose(pooledFeaturesTest, (0,2,3,1)) numTestImages = np.shape(X)[3] X = np.reshape(X, (int((pooledFeaturesTest.size)/float(numTestImages)), \ numTestImages), order="F") X = scaler.transform(X.T) Xs.append(X) except IOError: print("[!] Exiting: %s Not Found" % (poolFile)) exit(0) finally: features = None pooledFeaturesTrain = None pooledFeaturesTest = None X = Xs[classifierFiles.index(classifierFile)] if learningCurve: y = Ys[classifierFiles.index(classifierFile)] generate_Learning_Curve(X, y, classifierFile) else: pred = predict(classifierFile, X) #predFile = predictionsPath+classifierFile.split("/")[-1].replace(".mat","")+"_predictions_%s.mat"%dataSet #sio.savemat(predFile,{"ids":Files[classifierFiles.index(classifierFile)],"predictions":pred}) preds.append(np.squeeze(pred)) #X = Xs = None if outputFile != None and not byName: output = open(outputFile, "w") files = Files[0] pred = preds[0] y = Ys[0] for i,prediction in enumerate(pred): output.write(files[i].rstrip() + "," + str(prediction) + "," + str(y[i]) + "\n") output.close() if byName: files = Files[0] pred = preds[0] print(pred) print(files) print(outputFile) preds = [predict_byName(pred, files, outputFile)] try: Ys = [labels_byName(files, Ys[0])] except NameError as e: print(e) if plot: try: for pred in preds: hypothesisDist(Ys[preds.index(pred)], pred, threshold) except NameError as e: print("[!] NameError : %s", e) if roc: plot_ROC(Ys, preds, fom_func, Labels=labels) #test_FDR_procedure(Ys[0], preds[0]) clf = pickle.load(open(classifierFiles[0],"rb")) if type(clf) == type(RandomForestClassifier()): try: feature_names = [] for f in sio.loadmat(dataFiles[0])["features"]: feature_names.append(str(f)) feature_importance(Xs[0], clf, feature_names) except KeyError: feature_importance(Xs[0], clf, list(range(Xs[0].shape[1]))) if miss: print_misclassified(Ys[0], preds[0], np.squeeze(Files[0]), fom_func, threshold)
import sys, optparse import numpy as np import scipy.io as sio import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_curve, f1_score from sklearn import preprocessing from tests_marco.tools import mlutils from tests_marco.config import config import pickle # config cfg = config.Config() data_path = cfg.paths['data'] predictionsPath = data_path + "predictions/" def one_percent_mdr(y, pred, fom): fpr, tpr, thresholds = roc_curve(y, pred) FoM = fpr[np.where(1-tpr<=fom)[0][0]] # FPR at 1% MDR threshold = thresholds[np.where(1-tpr<=fom)[0][0]] return FoM, threshold, fpr, tpr def one_percent_fpr(y, pred, fom): fpr, tpr, thresholds = roc_curve(y, pred) FoM = 1-tpr[np.where(fpr<=fom)[0][-1]] # MDR at 1% FPR threshold = thresholds[np.where(fpr<=fom)[0][-1]] return FoM, threshold, fpr, tpr def predict(clfFile, X):
def main(): """ add -v argument to visualise learned features """ parser = optparse.OptionParser("[!] usage: python convolutional_sparseFiltering.py\n"+\ "\t -F <data file>\n"+\ "\t -P <patches file>\n"+\ "\t -d <image dimension>\n"+\ "\t -c <number of image channels>\n"+\ "\t -p <patch dimension>\n"+\ "\t -f <number of features to learn>\n" "\t -r <receptive field dimension>\n"+\ "\t -s <step size>\n"+\ "\t -C <regularisation parameter>\n"+\ "\t -V <cross validate>\n"+\ "\t -n <maximum number of patches to use>\n"+\ "\t -m <maximum number of iterations [default=100]>\n"+\ "\t -g <stochastic gradient decent>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") parser.add_option("-P", dest="patchesFile", type="string", \ help="specify patches file") parser.add_option("-d", dest="imageDim", type="int", \ help="specify dimension of images in data file") parser.add_option("-c", dest="imageChannels", type="int", \ help="specify number of channels for images in data file") parser.add_option("-p", dest="patchDim", type="int", \ help="specify dimension of patches in patches file") parser.add_option("-f", dest="numFeatures", type="int", \ help="specify number of features for sparse filtering") parser.add_option("-r", dest="poolDim", type="int", \ help="specify dimension of (pooling dimesion)") parser.add_option("-s", dest="stepSize", type="int", \ help="specify step size for convolution and pooling") parser.add_option("-C", dest="C", type="float", \ help="specify the regularisation parameter for linear SVM") parser.add_option("-V", action="store_true", dest="cv", \ help="specify whether to cross validate [default=False]") parser.add_option("-n", dest="numPatches", type="int", \ help="specify the maximum number of patches to use [optional]") parser.add_option("-m", dest="maxiter", type="int", \ help="specify the maximum number iterations [default=100]") parser.add_option("-g", action="store_true", dest="sgd", \ help="specify whether to use stochastic gradient decent [default=False]") (options, args) = parser.parse_args() dataFile = options.dataFile patchesFile = options.patchesFile imageDim = options.imageDim imageChannels = options.imageChannels patchDim = options.patchDim numFeatures = options.numFeatures poolDim = options.poolDim stepSize = options.stepSize C = options.C cv = options.cv numPatches = options.numPatches maxiter = options.maxiter sgd = options.sgd ## TODO: Test by defining arguments if False: cfg = config.Config() data_path = cfg.paths['data'] dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" patchesFile = data_path + "patches_stl-10_unlabeled_meansub_20150409_psdb_6x6.mat" imageDim = 20 imageChannels = 1 patchDim = 6 numFeatures = 20 poolDim = 5 stepSize = 20 C = 1 cv = 3 required_arguments = [dataFile, patchesFile, imageDim, imageChannels, \ patchDim, numFeatures, poolDim] if None in required_arguments: print(parser.usage) #exit(0) try: assert (numFeatures % stepSize) == 0 except AssertionError: print( "[!] Exiting: step size must be a multiple of the number of features." ) #exit(0) try: data = sio.loadmat(patchesFile) patches = data["patches"].T[:, :numPatches] ### Added scaling 06/01/15 ### #n,m = np.shape(patches) #means = np.mean(patches, axis=0) #means = np.tile(means, (n,1)) #print np.shape(means) #patches = patches - means #data = means = None except IOError: print("[!] Exiting: could not open patches file - %s" % patchesFile) #exit(0) if maxiter == None: maxiter = 100 SF = get_sparseFilter(numFeatures, patches, patchesFile, maxiter=maxiter) W = np.reshape(SF.trainedW, (SF.k, SF.n), order="F") SF = None patches = None # added maxiter to filename 24/02/15 featuresFile = data_path + "features/SF_maxiter%d_L1_%s_%dx%d_k%d_%s_pooled%d.mat" % \ (maxiter, dataFile.split("/")[-1].split(".")[0], patchDim, patchDim, numFeatures, \ patchesFile.split("/")[-1].split(".")[0], poolDim) try: features = sio.loadmat(featuresFile) pooledFeaturesTrain = features["pooledFeaturesTrain"] pooledFeaturesTest = features["pooledFeaturesTest"] print("[*] convolved and pooled features loaded") except IOError: print("[*] no convloved and pooled features found for %s" % dataFile.split("/")[-1]) print("[+] convolving and pooling...") convolve_and_pool(dataFile, featuresFile, W, imageDim, patchDim, poolDim, \ numFeatures, stepSize) features = sio.loadmat(featuresFile) pooledFeaturesTrain = features["pooledFeaturesTrain"] pooledFeaturesTest = features["pooledFeaturesTest"] print("[+] Done.") if cv == None: cv = False if sgd == None: sgd = False if C != None and cv == False: trainImages, trainLabels, numTrainImages,\ testImages, testLabels, numTestImages = load_data(dataFile, imageDim) trainImages = None testImages = None X = np.transpose(pooledFeaturesTrain, (0, 2, 3, 1)) X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \ numTrainImages), order="F") # MinMax scaling removed 11-03-2015 scaler = preprocessing.MinMaxScaler() scaler.fit(X.T) # Don't cheat - fit only on training data X = scaler.transform(X.T) #X = X.T Y = np.squeeze(trainLabels) print(Y) testX = np.transpose(pooledFeaturesTest, (0, 2, 3, 1)) testX = np.reshape(testX, (int((pooledFeaturesTest.size)/float(numTestImages)), \ numTestImages), order="F") # MinMax scaling removed 11-03-2015 testX = scaler.transform(testX.T) testY = np.squeeze(testLabels) print(testY) #train_linearSVM(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ # sgd, save=True, prefix="") #train_SoftMaxOnline(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ # sgd, save=True, prefix="") train_Softmax(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ sgd, save=True, prefix="") elif cv == True: trainImages, trainLabels, numTrainImages,\ testImages, testLabels, numTestImages = load_data(dataFile, imageDim) trainImages = None testImages = None X = np.transpose(pooledFeaturesTrain, (0, 2, 3, 1)) X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \ numTrainImages), order="F") # MinMax scaling removed 11-03-2015 scaler = preprocessing.MinMaxScaler() scaler.fit(X.T) # Don't cheat - fit only on training data X = scaler.transform(X.T) Y = np.squeeze(trainLabels) #C = cross_validate_linearSVM(dataFile, X, Y, featuresFile, imageDim, sgd) #C = cross_validate_SoftMaxOnline(dataFile, X, Y, featuresFile, imageDim, sgd) C = cross_validate_Softmax(dataFile, X, Y, featuresFile, imageDim, sgd) testX = np.transpose(pooledFeaturesTest, (0, 2, 3, 1)) testX = np.reshape(testX, (int((pooledFeaturesTest.size)/float(numTestImages)), \ numTestImages), order="F") # MinMax scaling removed 11-03-2015 testX = scaler.transform(testX.T) testY = np.squeeze(testLabels) #train_linearSVM(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ # sgd, save=True, prefix="") #train_SoftmaxOnline(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ # sgd, save=True, prefix="") train_Softmax(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \ sgd, save=True, prefix="")