Пример #1
0
def test():
	learner = OMTL(epoch=640, matrix_interaction_lr=1e-10, divergence="logdet")
	kpercepts = KPerceptrons(learn_rate=10000)
	knb = KNaiveBayes()
	# Will spit out 20 batches of size 8000
	for task, inp, outp in get_minibatches(batch_size=8000, num_epochs=1, add_bias=True):
		knb.fit(np.asarray(task).reshape((inp.shape[0],1)), inp, np.asarray(outp).reshape((inp.shape[0],1)))
	# Will spit out 20*3000 batches of size 1
	for task, inp, outp in get_minibatches(batch_size=1, num_epochs=3000, add_bias=True):
		learner.fit(task[0].astype(int), inp, outp)
		kpercepts.fit(task[0].astype(int), inp, outp)
	# Test_tasks.shape = (28143, 1)
	test_tasks = np.load("data/test_tasks.npy")
	# We need to call item on test_inputs because it is an array that contains a sparse matrix
	# We then gotta convert it from sparse scipymatrix to nparray and add a bias 
	test_inputs = np.load("data/test_inputs.npy").item().toarray()
	test_inputs = np.hstack((np.ones((test_inputs.shape[0],1)), test_inputs))
	# Test_outputs.shape = (28143,)
	# So we need to reshape it to a (28143, 1) array
	test_outputs = np.load("data/test_outputs.npy").reshape((test_tasks.shape[0],1))
	# See KNB.score to see how I manipulate these three arrays so that they fit with normal
	# SKlearn learners
	print "KNB: %f" %knb.score(test_tasks, test_inputs, test_outputs)
	print "OMTL: %f" % learner.score(test_tasks, test_inputs, test_outputs)
	print "kpercepts: %f" % kpercepts.score(test_tasks, test_inputs, test_outputs)
	print json.dumps(learner.A.tolist())
	with open("task_relate.json","w") as f:
		json.dump(learner.A.tolist(), f)
Пример #2
0
def grid_search():
	out = open("omtl_log", "w")
	val_tasks, val_x, val_y = get_validation_set()
	omtl_lrs = np.logspace(-28,-14,8)
	perceptron_lrs = np.logspace(-10,4,8)
	epochs = [20, 40, 80, 160, 320, 640, 1280]
	start = time.time()
	kpercept_results = []
	omtl_results = []
	for cnt, (omtl_lr, perceptron_lr) in enumerate(zip(omtl_lrs, perceptron_lrs)):
		omtls = [OMTL(epoch=epoch, matrix_interaction_lr=omtl_lr, divergence="logdet") for epoch in epochs]
		kpercepts = KPerceptrons(learn_rate=perceptron_lr)
		for batch, (task, inp, outp) in enumerate(get_minibatches(
									batch_size=1, num_epochs=5000, add_bias=True)):
			[learner.fit(task[0].astype(int), inp, outp) for learner in omtls]
			kpercepts.fit(task[0].astype(int), inp, outp)
			if batch%500 ==0:
				print "Batch %d" % batch
		kpercept_results.append((perceptron_lr, kpercepts.score(val_tasks, val_x, val_y)))
		for omtl in omtls:
			if omtl.has_diverged:
				omtl_results.append((omtl_lr, omtl.epoch, 0.0))
			else:
				omtl_results.append((omtl_lr, omtl.epoch, omtl.score(val_tasks, val_x, val_y)))
		print "Done with iteration %d of %d" %(cnt, len(omtl_lrs))
	json.dump({"omtl":omtl_results, "kpercepts":kpercept_results}, out)
	out.close()
Пример #3
0
def get_validation_set(batch_size=1500):
	# Returns 3 arrays (tasks, X, y) with batch_size*20 examples in each 
	val_tasks, val_x, val_y = np.ndarray(0), None, np.ndarray(0)
	for task, inp, outp in get_minibatches(batch_size=batch_size, num_epochs=1, add_bias=True):
		val_tasks = np.hstack((val_tasks, task.ravel()))
		val_y = np.hstack((val_y, outp.ravel()))
		if val_x is None:
			val_x = inp
		else:
			val_x = np.vstack((val_x, inp))
	return val_tasks, val_x, val_y
Пример #4
0
def timeline():
	# Generates the data file to be used to create the examples vs accuracy graph
	val_tasks, val_x, val_y = get_validation_set()
	learner = OMTL(epoch=40, matrix_interaction_lr=1e-20, divergence="logdet")
	kpercepts = KPerceptrons(learn_rate=10000)
	knb = KNaiveBayes()
	results = []
	tasks, x, y = [], None, []

	for batch, (task, inp, outp) in enumerate(get_minibatches(
								batch_size=1, num_epochs=8000, add_bias=True)):
		learner.fit(task[0].astype(int), inp, outp)
		kpercepts.fit(task[0].astype(int), inp, outp)

		# Used so we only call KNB.fit once per 100 examples
		tasks.append(int(task[0][0]))
		y.append(outp[0].astype(int))
		if x is None:
			x = inp
		else:
			x = np.vstack((x,inp))

		if batch%100 ==0 and batch>50:
			knb.fit(np.asarray(tasks).reshape((x.shape[0],1)), x, np.asarray(y).reshape((x.shape[0],1)))

			omtl_score = learner.score(val_tasks, val_x, val_y)
			kpercept_score = kpercepts.score(val_tasks, val_x, val_y)
			knb_score = knb.score(val_tasks, val_x, val_y)
			print "Batch %d" % batch
			print "OMTL score: %f" % omtl_score
			print "KPercept score %f" % kpercept_score
			print "KNB score %f" % knb_score
			results.append((batch, omtl_score, kpercept_score, knb_score))

			tasks, x, y = [], None, []

	with open("timeline.json", "w") as f:
		json.dump(results, f)