def data_reduction(topic_2_doc, n_clusters): reduced_data = PCA(n_components=2).fit_transform(topic_2_doc) kmeans = KMeans(init="k-means++", n_clusters=n_clusters, n_init=4) kmeans.fit(reduced_data.astype("float")) # step size of the mesh h = 0.005 # plot the decision boundary x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) # return return reduced_data, kmeans, x_min, x_max, y_min, y_max, xx, yy, Z
def pca_reduction(features, n_components=0.8, svd_solver='full'): """Apply PCA dimensionality reduction. Args: features: matrix of dimension [N, D], where N is the number of datapoints and D the feature dimensionality to reduce. n_components: if > 1 reduce the dimensionlity of the features to this value, if 0 < n_components < 1, select the number of components such that the percentage of variance explained is greater than (n_components * 100). svd_solver: SVD solver to use. As default we compute the exact full SVD. Returns: reduced_features: matrix [N, K] of features with reduced dimensionality K. """ reduced_feature = PCA(n_components=n_components, svd_solver=svd_solver).fit_transform(features) return reduced_feature.astype(np.float32)
X_train = X_train.as_matrix() y_train = y_train.as_matrix()[:, -1] ########################### Test Set ###################### ##This is a seperate test file: used for final model validation final_test = pd.read_csv('adult_test.csv') final_test_y = final_test['target'] final_test_x = final_test.drop(['target', 'native-country'], axis=1) final_test_y = pd.get_dummies(final_test_y) final_test_x = pd.get_dummies(final_test_x) y_test = final_test_y.as_matrix()[:, 1] X_test = final_test_x[column_name] ######################################################## PCA ################################################# from sklearn.decomposition import PCA X_train = X_train.astype('float32') pca = PCA(n_components=X_train.shape[1]) pca.fit(X_train) X_train = pca.transform(X_train) X_test = X_test.astype('float32') pca = PCA(n_components=X_train.shape[1]) pca.fit(X_test) X_test = pca.transform(X_test) ############################################ Neural Networks ################################################ from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='sgd', activation='logistic', alpha=1e-5, random_state=1)
help='GPU device to be used.') args = parser.parse_args() # Load model and image list images = get_dataset_images(args.img_dir) if len(images) == 0: print('Could not find any images. Have you set --img_dir correctly?') exit() if args.gpu is not None: caffe.set_mode_gpu() caffe.set_device(args.gpu) else: caffe.set_mode_cpu() net = caffe.Net(args.model, caffe.TEST, weights=args.weights) mean = np.loadtxt(args.mean) # Extract features features = np.array([ extract_cnn_features(net, mean, img, 'fc6') for img in tqdm( images, desc='Extracting CNN features...', dynamic_ncols=True) ], dtype=np.float32) # Norm, PCA, Norm features /= np.linalg.norm(features, axis=1, keepdims=True) features = PCA(args.pca_dim).fit_transform(features) features /= np.linalg.norm(features, axis=1, keepdims=True) # Save features np.save(args.feature_dump, features.astype(np.float32, copy=False))