Пример #1
0
Файл: p2a.py Проект: NPSDC/Codes
def main():
	DATA_DIR = 'arcene'
	data = extract(os.path.join(DATA_DIR,'arcene_train.data'))
	data = data.T
	data, mean = centre(data)
	
	data_valid = extract(os.path.join(DATA_DIR,'arcene_train.data'))
	data_valid,mean = centre(data_valid.T, mean)
	
	labels = gen_labels(os.path.join(DATA_DIR, 'arcene_train.labels'))

	sig = get_sigma(data, labels)

	eig_val, eig_vec = gram(rbf, data, sig)
	alpha = get_lda(data, labels, rbf, sig)
	reduced_points = kernel_pca(rbf, 10, eig_vec, eig_val, data, data, sig)
	red_points = lda(alpha, data, data, rbf, sig)
Пример #2
0
Файл: lda.py Проект: NPSDC/Codes
def main():
	DATA_DIR = 'arcene'
	header = "arcene_.data"
	index = header.find('.')
	files = [header[:index] + x + header[index:] for x in ["train"]]
	data = []
	for f in files:
		data = extract(os.path.join(DATA_DIR ,f))
		lda(data)
Пример #3
0
Файл: p3.py Проект: NPSDC/Codes
def main():
	k = 10
	DATA_DIR = 'arcene'	
	train_file_labels = os.path.join(DATA_DIR, 'arcene_train.labels')	
	#data = extract(filename)	
	header = "arcene_train.data_10"
	header2 = "arcene_.data"
	index = header2.find('.')
	train_labels = gen_labels(train_file_labels)
	actual_files = ["test", "train", "valid"]
	transformed_pca_data = []
	data = []

	data_files = [header2[:index] + x + header2[index:] for x in actual_files]
	weight_files = [header + j for j in ['', '0', '00' ]]
	weights = np.asarray(map(np.loadtxt, weight_files))
	assert(weights[0].shape[1] == 10000 and weights[0].shape[0] == 10)
	assert(weights[1].shape[1] == 10000 and weights[1].shape[0] == 100)
	assert(weights[2].shape[1] == 10000 and weights[2].shape[0] == 1000)
	
	for file in data_files:
		data.append(extract(os.path.join(DATA_DIR,file)))

	for i in xrange(len(data)):
		for j in xrange(len(weights)):
			transformed_pca_data.append(np.dot(weights[j], data[i].T))

	#transformed_pca_data = np.asarray(map(np.loadtxt, files))
	transformed_pca_train_data = np.asarray([transformed_pca_data[3], transformed_pca_data[4], transformed_pca_data[5]]) 
	
	assert(len(transformed_pca_data) == 9)
	for i in xrange(9):
		if(i/3 == 0):
			assert(transformed_pca_data[i].shape[1] == 700)
		else:
			assert(transformed_pca_data[i].shape[1] == 100)
		if(i%3 == 0):
			assert(transformed_pca_data[i].shape[0] == 10)
		elif(i%3 == 1):
			assert(transformed_pca_data[i].shape[0] == 100)
		else:
			assert(transformed_pca_data[i].shape[0] == 1000)
	
	prior, means, cov, classes = train_data(train_labels, transformed_pca_train_data)
	assert(len(means) == len(transformed_pca_train_data))
	assert(len(cov) == len(transformed_pca_train_data))

	#labels = gaussian_classify(transformed_pca_train_data[1], prior, means[1], cov[1], classes)
	labels = naive_gaussian(transformed_pca_data[7], prior, means[1], cov[1], classes)