def main(): DATA_DIR = 'arcene' data = extract(os.path.join(DATA_DIR,'arcene_train.data')) data = data.T data, mean = centre(data) data_valid = extract(os.path.join(DATA_DIR,'arcene_train.data')) data_valid,mean = centre(data_valid.T, mean) labels = gen_labels(os.path.join(DATA_DIR, 'arcene_train.labels')) sig = get_sigma(data, labels) eig_val, eig_vec = gram(rbf, data, sig) alpha = get_lda(data, labels, rbf, sig) reduced_points = kernel_pca(rbf, 10, eig_vec, eig_val, data, data, sig) red_points = lda(alpha, data, data, rbf, sig)
def main(): DATA_DIR = 'arcene' header = "arcene_.data" index = header.find('.') files = [header[:index] + x + header[index:] for x in ["train"]] data = [] for f in files: data = extract(os.path.join(DATA_DIR ,f)) lda(data)
def main(): k = 10 DATA_DIR = 'arcene' train_file_labels = os.path.join(DATA_DIR, 'arcene_train.labels') #data = extract(filename) header = "arcene_train.data_10" header2 = "arcene_.data" index = header2.find('.') train_labels = gen_labels(train_file_labels) actual_files = ["test", "train", "valid"] transformed_pca_data = [] data = [] data_files = [header2[:index] + x + header2[index:] for x in actual_files] weight_files = [header + j for j in ['', '0', '00' ]] weights = np.asarray(map(np.loadtxt, weight_files)) assert(weights[0].shape[1] == 10000 and weights[0].shape[0] == 10) assert(weights[1].shape[1] == 10000 and weights[1].shape[0] == 100) assert(weights[2].shape[1] == 10000 and weights[2].shape[0] == 1000) for file in data_files: data.append(extract(os.path.join(DATA_DIR,file))) for i in xrange(len(data)): for j in xrange(len(weights)): transformed_pca_data.append(np.dot(weights[j], data[i].T)) #transformed_pca_data = np.asarray(map(np.loadtxt, files)) transformed_pca_train_data = np.asarray([transformed_pca_data[3], transformed_pca_data[4], transformed_pca_data[5]]) assert(len(transformed_pca_data) == 9) for i in xrange(9): if(i/3 == 0): assert(transformed_pca_data[i].shape[1] == 700) else: assert(transformed_pca_data[i].shape[1] == 100) if(i%3 == 0): assert(transformed_pca_data[i].shape[0] == 10) elif(i%3 == 1): assert(transformed_pca_data[i].shape[0] == 100) else: assert(transformed_pca_data[i].shape[0] == 1000) prior, means, cov, classes = train_data(train_labels, transformed_pca_train_data) assert(len(means) == len(transformed_pca_train_data)) assert(len(cov) == len(transformed_pca_train_data)) #labels = gaussian_classify(transformed_pca_train_data[1], prior, means[1], cov[1], classes) labels = naive_gaussian(transformed_pca_data[7], prior, means[1], cov[1], classes)