def test_hola_returns_dict(): translator = SpanishTranslator() fetched_translation = translator.translate_word("hola") assert fetched_translation == {"Word": "hola", "First Translation": 'Hello!', "Second Translation": 'hello', "First Compound": '¡Hola, chicos!', "First Compound Translation": 'hi, boys! hello, boys!', "Second Compound Translation": "Hello, people! Hi, folks!", "Second Compound": "¡Hola, gente!"}
def test_utf8_word(): translator = SpanishTranslator() fetched_translation = translator.translate_word("abolición") assert fetched_translation is not None assert fetched_translation["First Translation"] == "abolition"
def test_known_awkward_case(): translator = SpanishTranslator() fetched_translation = translator.translate_word("enfrascar") assert fetched_translation is None
class TextTranslation(object): """docstring for ClassName""" def __init__(self, chapter): """ :param chapter: """ self.text = chapter # Tokenize the text tokenizer = RegexpTokenizer(r'\w+') self.token_words = tokenizer.tokenize(self.text) # Get verbs nouns etc self.tagger = FilterWords(self.token_words) # Get translator object self.translator = SpanishTranslator() def translate_n_words(self, number_words, ordered_tokens): """ :param number_words: :param ordered_tokens: :return: """ index = 0 word_list = [] # Loop over the words in sorted dict for word in ordered_tokens: # If we haven't got enough words yet if index < number_words: # try to get a translation attempted = self.translator.translate_word(word[0]) # if it works, add it to the list to be returned if attempted is not None: attempted['Count'] = word[1] word_list.append(attempted) index += 1 else: break return word_list def get(self, number_words=-1, start=0, types=[], ordered=False, translate=False, reverse=False): """ :param number_words: :param types: :param translate: :param reverse: :return: """ words = self.tagger.filtered_words(types) if number_words == -1: number_words = len(words) if ordered is True: tagged_words = Counter(words).most_common() else: tagged_words = OrderedDict.fromkeys(words, 1).items() if reverse is True: tagged_words.reverse() if translate is True: tagged_words_list = list(islice(tagged_words, start, number_words + start)) return self.translate_n_words(number_words, tagged_words_list) else: try: return [{'Word': word[0], 'Count': word[1]} for word in tagged_words][start:number_words + start] except IndexError as i: # return [{'Word': word[0], 'Count': word[1]} for word in tagged_words][start:]
class LanguageListCreator(object): def __init__(self, wordreference_api_key='8a8bc'): self.dictionary = SpanishDictionary() self.translator = SpanishTranslator(wordreference_api_key) def random_verbs(self, number): word_array = dict() for i in range(0, number): translated_word, word = None, None while translated_word is None: word = self.dictionary.get_random_verb() if word not in word_array: translated_word = self.translator.translate_word(word) word_array[word] = translated_word return list(word_array.values()) def random_words(self, number): word_array = dict() for i in range(0, number): translated_word, word = None, None while translated_word is None: word = self.dictionary.get_random_word() if word not in word_array: translated_word = self.translator.translate_word(word) word_array[word] = translated_word return list(word_array.values()) def get_list_from_file(self, path_to_file, num): tokenizer = RegexpTokenizer(r'\w+') text = open(path_to_file, 'r').read() token_words = tokenizer.tokenize(text)[:num] word_dict = dict() word_count = dict() for word in token_words: if word not in word_dict: translated_word = self.translator.translate_word(word) if translated_word is not None: word_dict[word] = translated_word word_count[word] = 0 else: word_count[word] += 1 print(word_count) word_list = [word_dict[word] for word in sorted(word_count, key=word_count.get, reverse=True)] return word_list def get_list_from_text(self, text, num): tokenizer = RegexpTokenizer(r'\w+') token_words = tokenizer.tokenize(text) word_count = dict() for word in token_words: if word not in word_count: word_count[word] = 0 else: word_count[word] += 1 counted = Counter(word_count).most_common() index = 0 translated_words = [] for word in counted: if index < num: print(word) attempted = self.translator.translate_word(word[0]) if attempted is not None: translated_words.append(attempted) index += 1 else: break return translated_words