class PageTranslator(object): """ Provides methods for translating the text of each page belonging to a publicaiton, into English. """ translator = None def __init__(self, api_key): self.translator = Translator(api_key) def translate_page(self, input_filepath, output_file_path, output_file_name): """ Reads in text from the input file, and then uses the Translator to determine whether the language is english or not. If not then it uses the Translator to acquire an english language version and save it to the output file. Args: input_filepath (str): The input page text file output_file_path (str): The output directory path output_file_name (str): The name of the file to save the translated text to """ if os.path.exists(output_file_path + output_file_name): return with open(input_filepath, 'r') as input_file: page_text = input_file.read() is_english = self.translator.is_english(page_text) if is_english != True: print "[" + is_english + "]" + input_filepath + " ---> " + output_file_path + output_file_name translation = self.translator.translate(page_text) if not os.path.exists(output_file_path): os.makedirs(output_file_path) with open(output_file_path + output_file_name, 'w') as output_file: output_file.write(str(translation['text'])) def translate_publications(self, working_directory, translation_directory, input_file): """ Iterates through in input csv containing ukmhl identifiers and then iterates through the page text files for each publication, using translate_page to attempt to translate the page text if required. Args: input_file (str): The input csv file containing a list of ukmhl identifiers to process """ lab_csv_reader = LabCSVReader(input_file) for row in lab_csv_reader.reader(): publication_directory = working_directory + row['id'] + "/pages/" for page_file in os.listdir(publication_directory): translation_path = translation_directory + row[ 'id'] + "/translations/" self.translate_page(publication_directory + page_file, translation_path, page_file) lab_csv_reader.close()