def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data, 10) else: method = 'l2' params = {'n_neighbors': 1, 'metric': chisquare} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = KNeighborsClassifier(**params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='K-Nearest-Neighbours\nConfusion Matrix', cmap=plt.cm.Greens) plt.savefig('data/out/knn_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True) neighbors_matrix = classifier.kneighbors_graph(X_test) plot_kneighbors_graph(neighbors_matrix, title='Neighbours Graph') plt.savefig('data/out/knn_neighbours.pdf', format='pdf', dpi=300, transparent=True)
def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data) else: method = 'robust' params = {'activation': 'logistic', 'hidden_layer_sizes': (25, )} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = MLPClassifier(learning_rate="adaptive", max_iter=5000, solver='adam', random_state=42, alpha=0.01, **params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='Multi-Layer-Perceptron\nConfusion Matrix', cmap=plt.cm.Reds) plt.savefig('data/out/mlp_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True)
def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data) else: method = 'l2' params = {'metric': chisquare} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = NearestCentroid(**params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='Nearest Centroid\nConfusion Matrix', cmap=plt.cm.Blues) plt.savefig('data/out/nc_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True)
from utils import root_mean_square_error # models from bias_sgd import BiasSGD from embeddings import Embeddings from autoencoder import Autoencoder from iterative_svd import IterativeSVD device_name = tf.test.gpu_device_name() if device_name != '/device:GPU:0': print('GPU device not found') print('Be sure you want to continue...') else: print('Found GPU at: {}'.format(device_name)) dataloader = fetch_data(train_size=1) number_of_users, number_of_movies = 10000, 1000 IDs, users, movies, ratings, _ = dataloader['train'] def create_parser(): parser = argparse.ArgumentParser( description="Run cross validation for model") parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--splits-num", type=int, default=10) parser.add_argument("--shuffle", action="store_true", default=False) parser.add_argument("--model", "-m", type=str) #BSGD parameters
# parse arguments argv = parser.parse_args() # get log level _level = argv.log or '' logger = logging.getLogger(os.path.basename(__file__).replace('.py', '')) if _level.upper() == 'INFO': coloredlogs.install(level='IFNO', logger=logger) elif _level.upper() == 'DEBUG': coloredlogs.install(level='DEBUG', logger=logger) else: coloredlogs.install(level='WARNING', logger=logger) logger.info('Fetching data...') data = fetch_data() X_train, y_train = data['train'] D, N = X_train.shape logger.debug('Number of features: D=%d' % D) logger.debug('Number of train data: N=%d' % N) # mean face mean_face = X_train.mean(axis=1).reshape(-1, 1) logger.info('Plotting mean face...') plt.imshow(mean_face.reshape(SHAPE).T, cmap=plt.get_cmap('gray'), vmin=0, vmax=255)
# get flag of standardization standard = argv.standard or True # get flag of cross validation cv = argv.cross_validation or False logger = logging.getLogger(os.path.basename(__file__).replace('.py', '')) if _level.upper() == 'INFO': coloredlogs.install(level='IFNO', logger=logger) elif _level.upper() == 'DEBUG': coloredlogs.install(level='DEBUG', logger=logger) else: coloredlogs.install(level='WARNING', logger=logger) logger.info('Fetching data...') data = fetch_data(ratio=0.8) X_train, y_train = data['train'] D, N = X_train.shape pca = PCA(n_comps=M, standard=standard, logger=logger) logger.info('Applying PCA with M=%d' % M) # normalise data W_train = pca.fit(X_train) logger.debug('W_train.shape=%s' % (W_train.shape,)) X_test, y_test = data['test'] I, K = X_test.shape assert I == D, logger.error(
return parser if __name__ == "__main__": parser = create_parser() args = parser.parse_args() device_name = tf.test.gpu_device_name() if device_name != '/device:GPU:0': print('GPU device not found') print('Be sure you want to continue...') else: print('Found GPU at: {}'.format(device_name)) dataloader = fetch_data(train_size=args.train_size) number_of_users, number_of_movies = 10000, 1000 # Training train_IDs, train_users, train_movies, train_ratings, A_train = dataloader[ 'train'] # Validation valid_IDs, valid_users, valid_movies, valid_ratings, A_valid = dataloader[ 'valid'] # Testing test_IDs, test_users, test_movies = dataloader['test'] known_train = ~np.isnan(A_train) known_validation = ~np.isnan(A_valid)
# helper data preprocessor from reader import fetch_data # custom PCA transformer from pca import PCA # KNN Classifer from sklearn.neighbors import KNeighborsClassifier M = 121 standard = False data = fetch_data('../src/face.mat', ratio=0.8) X_train, y_train = data['train'] D, N = X_train.shape pca = PCA(n_comps=M, standard=standard) W_train = pca.fit(X_train) X_test, y_test = data['test'] I, K = X_test.shape W_test = pca.transform(X_test) nn = KNeighborsClassifier(n_neighbors=1) nn.fit(W_train.T, y_train.T.ravel()) acc = nn.score(W_test.T, y_test.T.ravel()) print('Accuracy = %.2f%%' % (acc * 100))
import numpy as np import pandas as pd # from svdplus import SVDplus from bias_sgd import BiasSGD from reader import fetch_data from utils import root_mean_square_error N_USERS = 10000 N_MOVIES = 1000 N_ITERS = 1000000 #data = fetch_data(train_size=0.88, train_file="../data/data_train.csv", # test_file="../data/sampleSubmission.csv") data = fetch_data(train_size=0.88) # Training _, train_users, train_movies, train_ratings, A_train = data['train'] # Validation _, valid_users, valid_movies, valid_ratings, A_valid = data['valid'] def predict_with_config(args, hidden_size=12, lr=0.04, reg_matrix=0.08, reg_vector=0.04): predictor = BiasSGD(N_USERS, N_MOVIES,
sns.set_style("ticks") plt.rcParams['figure.figsize'] = [6.0, 12.0] fig, axes = plt.subplots(nrows=4, ncols=2) tuples = [(axes[0, 0], 'none', 'Raw'), (axes[0, 1], 'l2', 'L2 Normalised'), (axes[1, 0], 'l1', 'L1 Normalised'), (axes[1, 1], 'max', '$L_{\infty}$ Normalised'), (axes[2, 0], 'standard', 'Standardardised'), (axes[2, 1], 'maxabs', 'Maximum Absolute Value Scaled'), (axes[3, 0], 'minmax', 'Minimum to Maximum Values Scaled'), (axes[3, 1], 'robust', 'IQR and Median Scaled')] for ax, method, title in tuples: data = normalise(data=fetch_data(), method=method) X_train, y_train = data['train'] X_test, y_test = data['test'] pca = PCA(n_components=2) W_train = pca.fit_transform(X_train) W_test = pca.transform(X_test) _drawn = [False, False, False] col = [sns_blue, sns_green, sns_red] for w, y in zip(W_train, y_train): if not _drawn[y - 1]: ax.scatter(w[0], w[1], c=col[y - 1], label='%s' % (y + 1))