def get_images_pipeline(in_file, method=1): print "Filtering images and getting components, method", method, "..." #images = image_filter.filter_dir(in_file) # don't load all images into memory at once for performance image_components = [] # Loop over each image in source directory and process individually i = 0 for filename in os.listdir(in_file): # Skip non-image files if (not (filename.endswith(".jpg") or filename.endswith(".png"))): continue # Get filtered image img = image_filter.get_filtered_image(in_file, filename) # Extract the image components components = 0. print i, # for debugging to watch progress if (method == 1): # use DRT, PCA sinogram = invariant_drt.compute_drt(img) new_sinogram = invariant_drt.post_process_sinogram(sinogram) components = pca.get_pca( new_sinogram, 2) # increase for slightly better accuracy else: # use DCT FFT method image_dct = dct.fft_dct_2d(img) # Get similar number of components to DRT approach components = dct.get_largest_freqs( image_dct, 18, 18) # increase for better accuracy image_components.append(components) i += 1 print "[Done]" return image_components
def extract(methods, images, use_pca=False, fit=False, n_components=30): global pca methods.append(flatten) for method in methods: for i, img in enumerate(images): images[i] = method( img.get_image()) if (type(img) is Image) else method(img) print("Method " + method.__name__ + " applied successfully to data") if use_pca: if fit: pca = p.get_pca(n_components, images) pca.fit(images) return pca.transform(images) return images
def product_of_classifiers(data, clfrs, pca_number=None): print("Product Using " + str(len(clfrs)) + " Classifiers") trainerr = 0 testerr = 0 yreturn = [] if pca_number == None: Xtest = data['test'][0] Xtrain = data['train'][0] else: pca_x = get_pca(data, pca_number) Xtest = pca_x.transform(data['test'][0]) Xtrain = pca_x.transform(data['train'][0]) pca_x = None for i, row in enumerate(Xtrain): actual = data['train'][1][i] prod = [] for clfr in clfrs: if len(prod) == 0: prod = np.ones(clfr.predict_proba(row.reshape(1, -1)).shape) prod *= clfr.predict_proba(row.reshape(1, -1)) ind = (-prod).argsort()[:1][0][0] yreturn.append(ind) if not ind == actual: # print("Prod predicted=", ind, "----Actual=", actual) trainerr += 1 print("TrainError=", str(trainerr / len(data['test'][1]))) for i, row in enumerate(Xtest): actual = data['test'][1][i] prod = [] for clfr in clfrs: if len(prod) == 0: prod = np.ones(clfr.predict_proba(row.reshape(1, -1)).shape) prod *= clfr.predict_proba(row.reshape(1, -1)) ind = (-prod).argsort()[:1][0][0] yreturn.append(ind) if not ind == actual: # print("Prod predicted=", ind, "----Actual=", actual) testerr += 1 print("TestError=", str(testerr / len(data['test'][1]))) scoreTrain = 1 - (trainerr / len(data['train'][1])) scoreTest = 1 - (testerr / len(data['test'][1])) return (np.array(yreturn), (scoreTrain, scoreTest))
def get_hog_features(X, shape=(15, 15)): winSize = shape blockSize = (10, 10) blockStride = (5, 5) cellSize = (10, 10) nbins = 9 derivAperture = 1 winSigma = -1. histogramNormType = 0 L2HysThreshold = 0.2 gammaCorrection = 1 nlevels = 64 signedGradients = True useSignedGradients = 1 hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma, histogramNormType, L2HysThreshold, gammaCorrection, nlevels, useSignedGradients) new_x = [] for item in X: x = item.reshape(shape) x = image_resize(x, new_shape=shape, binary_encoded=True, reverse=True) x = np.asarray(Image.fromarray(x, 'L')) temp = hog.compute(x) temp = np.array(temp) # print(temp.flatten()) new_x.append(temp.flatten()) X_normalized = preprocessing.normalize(np.array(new_x), norm='l2') # return X_normalized # trying Hog with PCA of pixels pca_prep = {'train': (X, False)} pca_result = get_pca(pca_prep, 40) x_pca = pca_result.transform(X) print(X_normalized.shape, X.shape, x_pca.shape) Z = np.hstack((x_pca, X_normalized)) print(Z.shape) return Z
def voting_classifier(trained_clfs, data, plot=False): Xtest = get_pca(data, 35).transform(data['test'][0]) predictions_cnn = predictcnn(data['test'][0].reshape(-1, 15, 15, 1)) ytest = data['test'][1] result_features = [] misclassified = 0 for i, row in enumerate(Xtest): temp = [] for clf in trained_clfs: temp2 = [] temp2.append(row) result = clf.predict(np.array(temp2)) temp.append(result) tempcnn = [] tempcnn.append(predictions_cnn[i]) temp.append(tempcnn) most_frequent = mode(temp)[0][0] if not most_frequent == ytest[i]: print(temp, ytest[i]) misclassified = misclassified + 1 if plot: ax1 = plt.subplot2grid((8, 7), (0, 0), rowspan=8, colspan=3) # ax2 = plt.subplot2grid((8, 7), (0, 4), rowspan=8, colspan=3) print(data['test'][0][i].reshape(15, 15), most_frequent, "Should have been:", data['test'][1][i]) ax1.imshow(data['test'][0][i].reshape(15, 15)) # ax1.title = most_frequent # ax2.imshow(small) plt.pause(2) # plt.pause(2) temp.append(most_frequent) result_features.append(temp) result_features = np.array(result_features) print("Accuracy = ", (len(ytest) - misclassified) / len(ytest))