def main():

	transDict, Greek_word_num, Greek_search_dict, Greek_text = preprocessing()

	# Save lemma to translations found
	found_translist = {}

	try:
		while (True):

			scoreKeeper = scoreboard(MAX_SCOREBOARD_SIZE, MIN_SCORE)

			input_phrase = input("Enter Search Phrase>  ")

			if re.sub(" ", "", re.sub("q", "", input_phrase)) == "" or re.sub(" ", "", re.sub("quit", "", input_phrase)) == "":
				exit(0)

			if (valid_search(input_phrase)):
				
				search = search_phrase(input_phrase, "Latin")

				# Find all the translations of the given words
				for i in range(search.search_len):
					search.has_translation[i] = trn.get_translation(search.text[i], transDict, found_translist)
		
				xls.try_all_search_combos(search, scoreKeeper, Greek_word_num, Greek_search_dict, Greek_text)

				print(scoreKeeper)

			else:
				print('Please enter a valid string\n')

	except KeyboardInterrupt:
		print('\nProgram Terminated\n')
		sys.exit(0)
def search_by_phrase(input_phrase, language, transDict, Greek_word_num, Greek_search_dict, Greek_text, max_scoreboard_size, min_score):

	if not (valid_search(input_phrase)):
		return ERROR
	else:
		output_translation_matrix = []
		output_translation_matrix.append([""])

		# Save lemma to translations found
		found_translist = {}

		scoreKeeper = scoreboard(max_scoreboard_size, min_score)

		search = search_phrase(input_phrase, language)

		# Find all the translations of the given words
		for i in range(search.search_len):
			search.has_translation[i] = trn.get_translation(search.text[i], transDict, found_translist)
		
		xls.try_all_search_combos(search, scoreKeeper, Greek_word_num, Greek_search_dict, Greek_text, output_translation_matrix)

		#translations_of_search = translation_matrix_to_string(output_translation_matrix)
		output_translation_matrix.pop(0)

		return scoreKeeper, output_translation_matrix
def preprocessing():


	# Get filenames for the thesaurus, the latin text, and greek text
	thesaurus_filename, Latin_filename, Greek_filename = xls.get_whole_text_comparison_Args()

	# Read the thesaurus CSV file into a dicitonary object for easy access
	transDict = ths.build_thesaurus(thesaurus_filename)

	# Read both the latin text and greek text into a dictionary for easy access
	Latin_word_num, Latin_search_dict, Latin_text = xls.build_search_dictionary(Latin_filename, "Latin", True)

	Greek_word_num, Greek_search_dict, Greek_text  = xls.build_search_dictionary = (Greek_filename, "Greek", True)

	return Latin_word_num, Latin_search_dict, Latin_text, Greek_word_num, Greek_search_dict, Greek_text
def find_best_match(L1, L2, L1_translation, L2_translation, Greek_search_dict, Latin_text, Latin_word_num, Greek_text, Greek_word_num, scoreKeeper):

	attemptsLeft = MAX_TRANSLATION_ATTEMPTS

	for L1_translation in L1.translations:
		for L2_translation in L2.translations:

			if (attemptsLeft > 0):
				attemptsLeft = attemptsLeft - 1
								
				# Find a match for the given translations of L1 and L2

				match = xls.find_match_pair(L1, L2, L1_translation, L2_translation, Greek_search_dict, Latin_text, Latin_word_num, Greek_text, Greek_word_num)

				if not match:
					continue

				print('Match Found!\n' + str(match))
										
				# Add match to the scoreboard
				scoreKeeper.add_newMatch(match)

				# If quit turned on, quits after first match
				if (0 == QUIT_AFTER_FIRST_MATCH):
					return;
								
			# If no more attempts, break from inner for loop
			else:
				return				
	return
def preprocessing():
	latin_cltk_importer = CorpusImporter('latin')
	latin_cltk_importer.import_corpus('latin_models_cltk')

	greek_cltk_importer = CorpusImporter('greek')
	greek_cltk_importer.import_corpus('greek_models_cltk')

	# Get filenames for the thesaurus, the latin text, and greek text
	thesaurus_filename, Greek_filename = xls.get_search_by_phrase_Args()

	# Read the thesaurus CSV file into a dicitonary object for easy access
	transDict = ths.build_thesaurus(thesaurus_filename)
	
	# Read greek text into a dictionary for easy access
	Greek_word_num, Greek_search_dict, Greek_text = xls.build_search_dictionary(Greek_filename, "Greek", True)

	return transDict, Greek_word_num, Greek_search_dict, Greek_text
def test_build_search_dict(curr_test, filename, words_in_file , language, lemmatized_version = False):

	word_num, search_dict, indexed_corpus = xls.build_search_dictionary(filename, language ,lemmatized_version)

	if not (word_num == words_in_file):
		curr_test.passed = False
		curr_test.errors.append("Wrong number of words added (only " + str(word_num) + " out of " + str(words_in_file) + " words added) ")
	
	test_file = open(filename,'r') 
	
	i = 0

	curr_word = ""

	while 1:
		char = test_file.read(1)

		if not (re.sub("[\p{Z}\t\r\n\v\f\s]", "", char) == ""):
			curr_word += char
		else:
			curr_word = normalize_word(curr_word)
			if not (curr_word == ""):
				if curr_word in search_dict:

					if not( i in search_dict[curr_word]):
						curr_test.passed = False	
									
						error_message = curr_word + " did not have the proper index in the search dict"
						error_message += "\n\t\t\t word number: " +str(i) + " Indices: " + str(search_dict[curr_word]) 
						curr_test.errors.append(error_message) 
				else:
					curr_test.passed = False
					error_message = curr_word + " was not found in the search dict"
						
					curr_test.errors.append(error_message)
					
				i += 1
			curr_word = ""

		if char == None or char == "":
			break

	test_file.close() 

	return curr_test
	# Save lemma to translations found
	found_translist = {}

	#Create scoreboard
	scoreKeeper = scoreboard(MAX_SCOREBOARD_SIZE, MIN_SCORE)

	try:

		print("Beginning Search\n")
		
		# Finds all word pairs in Latin_text that are within 1,2,3, .. MAX_DISTANCE_LATIN positions of each other
		for j in range(1, MAX_DISTANCE_LATIN):
			for i in range(Latin_word_num - j - 1):

				# For both the first and second word in the pair find the word, its position, and the occurences in the text
				L1, L2 = xls.get_LatinWordStats(Latin_text, Latin_search_dict, i, j)

				if -1 == trn.get_translation_pair(L1, L2, transDict, found_translist):
					continue

				# Determine the maximum score for the word pair using all possible combinations of translations
				find_best_match(L1, L2, L1_translation, L2_translation, Greek_search_dict, Latin_text, Latin_word_num, Greek_text, Greek_word_num, scoreKeeper ) 

		print(scoreKeeper)

	except KeyboardInterrupt:
		print('\nProgram Terminated\n')
		print(scoreKeeper)
		sys.exit(0)

	def process_corpus(self, filename, language, make_IndexedText=True, use_lemmatized_text=False):
		self.corpus_ready = False
		self.word_num, self.search_dict, self.indexed_corpus = xls.build_search_dictionary(filename, language, use_lemmatized_text)
		self.corpus_ready = True
def test_functions():

	print("Starting unit testing of simpleXLing.py")
	
	tests = []

	#====Build Search Dict====#
	
	#Attempts to build a search dictionary for a .txt file containing Latin words (Doesn't check lemmatized text file)
	curr_test = test("Build latin search dictionary (XLingFunctions.py)")
	
	latin_filename = "./test_files/small_latin.txt"
	
	words_in_file = 663

	language = "Latin"
	
	lemmatized_version = False
	
	tests.append(test_build_search_dict(curr_test,latin_filename, words_in_file, language, lemmatized_version ))
	

	#Attempts to build a search dictionary for a .txt file containing Greek wors (Doesn't check lemmatized text file)
	curr_test = test("Build greek search dictionary (XLingFunctions.py)")
	
	greek_filename = "./test_files/small_greek.txt"
	
	word_in_file = 789 
		
	language = "Greek"
	
	lemmatized_version = False
	
	tests.append(test_build_search_dict(curr_test,greek_filename, words_in_file, language, lemmatized_version)) 
	
	#Builds an arbitrary translation dictionary and attempts to find translation for a valid word pair
	curr_test = test("Get valid translations (translate.py)")

	LA = word("latina", 1, 3, None)
	LB = word("latinb", 2, 4, None)
	
	transDict = {}
	
	latin_a_translations = ["greek1", "greek2", "greek3"]
	latin_b_translations = ["greek4"]
 
	transDict["latina"] = latin_a_translations
	transDict["latinb"] = latin_b_translations

	result = trn.get_translation_pair(LA,LB,transDict)	
	
	if result == -1:
		curr_test.passed = False
		error_message = "Translations for two valid dictionary entries were not found"
		curr_test.errors.append(error_message)
	
	if not (LA.translations == latin_a_translations):
		curr_test.passed = False
		error_message = "Latina.translations doesn't match the actual translations"
		curr_test.errors.append(error_message)

	if not (LB.translations == latin_b_translations):
		curr_test.passed = False
		error_message = "Latinb.translations doesn't match the actual translations"
		curr_test.errors.append(error_message)
	
	tests.append(curr_test)

	#Attempts to find a translation for an invalid word pair
	curr_test = test("Get invalid translations (translate.py)") 

	LC = word("latinc", 2, 3, None)
	
	result = trn.get_translation_pair(LA,LC,transDict)	
	
	if not (result == -1):
		curr_test.passed = False
		error_message = "Translations for two valid dictionary entries were not found"
		curr_test.errors.append(error_message)
	
	if not (LA.translations == latin_a_translations):
		curr_test.passed = False
		error_message = "Latin_a.translations doesn't match the actual translations"
		curr_test.errors.append(error_message)

	if not (LC.translations == None):
		curr_test.passed = False
		error_message = "Latin_c. shouldn't have any translations"
		curr_test.errors.append(error_message)

	tests.append(curr_test)
	
	#Search for a translation of a pair of latin words in a greek search dictionary
	curr_test = test("Get Greek translation pair (XLingFunctions.py)") 

	L1_translation = "greek1"
	L2_translation = "greek2"

	G_search_dict = { 'greek1' : [1,3], 'greek2' : [2] } 
	
	G1, G2 = xls.get_GreekPair(L1_translation, L2_translation, G_search_dict) 
	
	if not (G1 == [1,3]): 
		curr_test.passed = False
		curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect")

	if not (G2 == [2]): 
		curr_test.passed = False
		curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect")
	
	tests.append(curr_test) 

	#Search for a translation of a pair of latin words in a greek search dictionary (when translations aren't in dictionary)
	curr_test = test("Get Greek translation pair with incomplete search dict (XLingFunctions.py)")
	
	L3_translation = "greek3"
	
	G1, G3 = xls.get_GreekPair(L1_translation, L3_translation, G_search_dict)
	
	if not (G1 == None): 
		curr_test.passed = False
		curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect")

	if not (G3 == None): 
		curr_test.passed = False
		curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect")
	
	tests.append(curr_test) 
	
	#Get latin word stats
	curr_test = test("Get Latin word stats (XLingFunctions.py)") 
 	
	latin_text = ['L1', 'L2', 'L1', 'L1', 'L2', 'L3','L4', 'L5', 'L5', 'L5']

	latin_search_dict = { 'L1' : [0,2,3], 'L2' : [1,4], 'L3' : [5], 'L4' : [6], 'L5': [7,8,9]}

	i = 3
	
	j = 4 

	L1, L2 = xls.get_LatinWordStats(latin_text,latin_search_dict, i, j)
	
	if not ( L1.word == "L1"):
		
		curr_test.passed = False
		curr_test.errors.append("Latin word object #1 corresponds to the wrong word (" + str(L1.word) + ")")
	
	if not (L1.pos == 3):
			
		curr_test.passed = False
		curr_test.errors.append("Latin word object #1 corresponds to the wrong position in text (" + str(L1.pos) + ")")
	
	if not (L1.occurences == 3):
		
		curr_test.passed = False
		curr_test.errors.append("Latin word object #1 should occur 3 times, only occurs" + str(L1.occurences) + " times")

	if not ( L2.word == "L5"):
		
		curr_test.passed = False
		curr_test.errors.append("Latin word object #2 corresponds to the wrong word (" + str(L2.word) + ")" )
	
	if not (L2.pos == 7):
	
		curr_test.passed = False
		curr_test.errors.append("Latin word object #2 corresponds to the wrong position in text (" + str(L2.pos) + ")" )
	
	if not (L2.occurences == 3):
		
		curr_test.passed = False
		curr_test.errors.append("Latin word object #2 should occur 3 times, only occurs" + str(L2.occurences) + " times")

	tests.append(curr_test)
	
	#Find match pair test (three possible matches in greek test, should take the match with the two words side by side) Uses same L1,L2, Latin text, and latin text as above
	curr_test = test("Find match pair given three matches in the greek corpus (XLingFunctions.py") 
	
	L1_translation = "g1"
	L2_translation = "g2"

	Greek_text = [ "g1", "x", "x", "g2", "g1", "x", "g2", "x", "x", "x", "g1" ] 

	Greek_search_dict = {"g1": [0,4,10], "g2": [ 3,6], "x": [1,2,5,7,8,9]}
		
	bestMatch = xls.find_match_pair(L1, L2, L1_translation, L2_translation, Greek_search_dict, latin_text, len(latin_text), Greek_text, len(Greek_text), None)

	if not bestMatch:
		
		curr_test.passed = False
		curr_test.errors.append("No match was found") 
		
	elif not (bestMatch.G1_pos == 4) or not(bestMatch.G2_pos == 3):
		
		curr_test.passed = False
		curr_test.errors.append("The best match did not occur in the expected position ( G1 = " +str(bestMatch.G1_pos) + "   G2 = " + str(bestMatch.G2_pos) )
	
	tests.append(curr_test) 

	#Ensures that try_all_search_combos produces all combos of position indicies
	curr_test = test("Try all search combos (finds best match in a greek text given a search prhase)")
	
	search = search_phrase("L1 L2 L3", "Latin")
	search.has_translation = [1, 1, 1] 

	search.text[0].translations = ["g1", "g4", "g3" ] 
	
	search.text[1].translations = ["g6"]

	search.text[2].translations = ["g5", "g2"] 

	score = scoreboard(1) 
	
	xls.try_all_search_combos( search, score, len(Greek_text), Greek_search_dict,  Greek_text)
	
	if not ( score.matches[0].G1_pos == 4 and score.matches[0].G2_pos == 3):

		curr_test.passed = False
		curr_test.errors.append("The wrong top  match was found for the search") 
	 
	tests.append(curr_test) 

	return tests