Exemplo n.º 1
0
def streaming_training(next_character, array1, array2, lv1, lv2, trellis,n):
	if next_character == ' ':
		#No training because it'll correctly classify
		return;
	nospace = trellis[0][1][-n+1:]+next_character
	space = trellis[0][1][-n+1:]+' '
	nospace_val = llv.recover_frequency(lv1, nospace, array1)+llv.recover_frequency(lv2, nospace, array2)
	space_val = llv.recover_frequency(lv1, space, array1)+llv.recover_frequency(lv2, space, array2)
	# print " = "
	if (space_val>nospace_val):
		# print array1[len(nospace)-1]
		array1[len(nospace)-1] = array1[len(nospace)-1] + llv.get_letter_vec(nospace, lv1)
		array2[len(nospace)-1] = array2[len(nospace)-1] + llv.get_letter_vec(nospace, lv2)
		# print array1[len(nospace)-1]
	return array1, array2;
Exemplo n.º 2
0
def garden_path_accuracy(stripped_text, array, n, original):
	count = 0;
	test_text = original[:n-1]
	trellis = [(0, test_text, 0)];
	next_space = False;
	for i in original[n-1:]:
		count = count+1;
		if count%100 == 0:
			print count;
		if i == ' ':
			next_space = True;
			continue;
		k = [];
		for j in trellis:
			no_space = j[1][-n+1:]+i;
			space_first = j[1][-n+1:] + ' ';
			space_second = j[1][-n+1:][1:]+' '+i;

			no_space_cost = llv.recover_frequency(lv1, no_space, array)
			space_first_freq = llv.recover_frequency(lv1, space_first, array)
			space_second_freq = llv.recover_frequency(lv1, space_second, array)


			space_cost = (.50*space_first_freq+.50*space_second_freq)
			k.append([j[0]+no_space_cost, j[1]+i, j[2]]);
			k.append([j[0]+space_cost, j[1]+' '+i, j[2]]);


		k = sorted(k, key=lambda t:t[0], reverse=True);
		if(len(k) < 5):
			trellis = k;
		else:
			trellis = k[:30];


		if next_space:
			for j in range(len(trellis)):
				if trellis[j][1][-2] == ' ':
					trellis[j][2] = trellis[j][2]+1;
			next_space = False

	return trellis
Exemplo n.º 3
0
def get_predictions(string, array, lv):
	assert(len(string) < len(array))
	n = len(string)+1;
	x = [];
	for j in alphabet:
		text_mod = string+j;
		num = llv.recover_frequency(lv, text_mod, array[len(text_mod)-1]);
		x.append((num, j));

	x = sorted(x, key=lambda k:k[0], reverse = True)
	return x;
Exemplo n.º 4
0
def garden_path(stripped_text, array1, array2, n, original, training = 0):
	test_text = stripped_text[:n-1]
	trellis = [(0, test_text)];
	count = 0;

	for i in stripped_text[n-1:]:
		count = count+1
		if count%100 == 0:
			print count
		seen = set();
		k = [];
		for j in trellis:
			no_space = j[1][-n+1:]+i;
			# if no_space == 'cards':
			# 	print "cards"+ str(recover_frequency(lv, no_space, array))
			# if no_space == 'car ds':
			# 	print "cards"+str(space_first_freq = recover_frequency(lv, space_first, array))
			if(not no_space in seen):
				seen.add(no_space)
				no_space_cost = llv.recover_frequency(lv1, no_space, array1)+llv.recover_frequency(lv2, no_space, array2)
				k.append((j[0]+no_space_cost, j[1]+i));

			space_first = j[1][-n+1:] + ' ';
			if(not space_first in seen):
				seen.add(space_first)
				space_second = j[1][-n+1:][1:]+' '+i;
				space_first_freq = llv.recover_frequency(lv1, space_first, array1)+llv.recover_frequency(lv2, space_first, array2)

				space_cost = .9*space_first_freq
				k.append((j[0]+space_cost, j[1]+' '+i));
			if training:
				array1, array2 = streaming_training(i, array1, array2, lv1, lv2, trellis, 5)
			# print array1[len(no_space)-1]
			# print array1[0]



		k = sorted(k, key=lambda t:t[0], reverse=True);
		trellis = k[:25];

	# count = 0;
	# correct = 0;
	# for v in trellis[0][1]:
	# 	if(len(original) == count):
	# 		print correct/float(len(''.join([x for x in trellis[0][1] if x == ' '])))
	# 		break;
	# 	if original[count] == ' ' and v == ' ':
	# 		correct = correct+1;
	# 		count = count+1;
	# 		continue;
	# 	while original[count] == ' ':
	# 		count = count+1;
	# 	if original[count] == v:
	# 		count = count+1;
	# 		continue;

	# 	if v == ' ':
	# 		continue;


	# print "Correct percentage: "
	# print correct/float(len(''.join([x for x in trellis[0][1] if x == ' '])))

	return trellis