示例#1
0
def translate(input_phrase):
    translator = Translator()

    # detect language type of input phrase
    detect_result = translator.detect(input_phrase)
    language_type = detect_result.lang

    # define data frame
    trans_phrase_array = [''] * 5
    colmuns_list = ['English', 'Japanese', 'Spanish', 'Chinese', 'Ch_Prns']

    print('Language type: %s' % language_type)

    # switch translation process depend on language type
    if 'ja' in language_type:  # Japanese
        print('Input phrase is Japanese')
        phrase_in_zh = to_chinese_simplified(translator, input_phrase)
        trans_phrase_array[0] = to_english(translator, input_phrase).text
        trans_phrase_array[1] = input_phrase
        trans_phrase_array[2] = to_spanish(translator, input_phrase).text
        trans_phrase_array[3] = phrase_in_zh.text
        trans_phrase_array[4] = phrase_in_zh.pronunciation
    elif 'zh-CN' in language_type:  # Chinese(Simplified)
        print('Input phrase is Simplified Chinese')
        phrase_in_zh = to_chinese_simplified(translator, input_phrase)
        trans_phrase_array[0] = to_english(translator, input_phrase).text
        trans_phrase_array[1] = to_japanese(translator, input_phrase).text
        trans_phrase_array[2] = to_spanish(translator, input_phrase).text
        trans_phrase_array[3] = phrase_in_zh.text
        trans_phrase_array[4] = phrase_in_zh.pronunciation
    elif 'en' in language_type:  # English
        print('Input phrase is English')
        phrase_in_zh = to_chinese_simplified(translator, input_phrase)
        trans_phrase_array[0] = input_phrase
        trans_phrase_array[1] = to_japanese(translator, input_phrase).text
        trans_phrase_array[2] = to_spanish(translator, input_phrase).text
        trans_phrase_array[3] = phrase_in_zh.text
        trans_phrase_array[4] = phrase_in_zh.pronunciation
    elif 'es' in language_type:  # Spanish
        print('Input phrase is Spanish')
        phrase_in_zh = to_chinese_simplified(translator, input_phrase)
        trans_phrase_array[0] = to_english(translator, input_phrase).text
        trans_phrase_array[1] = to_japanese(translator, input_phrase).text
        trans_phrase_array[2] = input_phrase
        trans_phrase_array[3] = phrase_in_zh.text
        trans_phrase_array[4] = phrase_in_zh.pronunciation
    else:
        print('Input phrase should be Japanese, English, Chinese or Spanish')

    # write in csv
    # df_trans_phrase = pd.DataFrame(index=[], columns=colmuns_list)
    df_trans_phrase = pd.read_csv('phrase_translation_database.csv',
                                  encoding='utf-8-sig',
                                  index_col=0)
    sr_trans_phrase = pd.Series(trans_phrase_array,
                                index=colmuns_list,
                                name=input_phrase)
    df_trans_phrase = df_trans_phrase.append(sr_trans_phrase)
    df_trans_phrase.to_csv('phrase_translation_database.csv',
                           encoding='utf-8-sig')
示例#2
0
    def detect_language_google(self, sample):
        '''
		EN: It uses Google Translate to detect the language of a given sample.
		SP: Utiliza el Traductor de Google para detectar el idioma de una muestra dada.
		
		:param sample: sample of text from which the language is detected / muestra de texto a partir de la cual detectar el idioma
		:return: the detected language / el idioma detectado
		'''
        translator = Translator()
        det = translator.detect(sample)
        language_google = self.LANGUAGES_GOOGLE[det.lang]
        return language_google
示例#3
0
# Thai to English
translations = translator.translate(
    text='ลมอ่อนพัดโชยมาน้ำตาก็ไหลรินเหลือเพียงกลิ่นหัวใจฟุ้งไปกับความเหงา',
    src='th',
    dest='en')

print('Original Text :')
print(translations.origin)

print('Translated Text :')
print(translations.text)

#%% language detection
print('----------- Language Detection --------------')

LangDetectObj = translator.detect('이 문장은 한글로 쓰여졌습니다.')
LangDetectObj = translator.detect('ไปกับพี่มั๊ยจ๊ะน้องสาว')

LangDetect = LangDetectObj.lang
LangConf = LangDetectObj.confidence

print('Detect language is : %s with confident : %.3f' % (LangDetect, LangConf))

#%% Translate from list
print('-----------Translator from list --------------')

translations = translator.translate(
    ['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko')

for translation in translations:  # since the text is list, translations become list (iterable object)
    print(translation.origin, ' -> ', translation.text)
示例#4
0
class DataTranslation:
    def __init__(self) -> None:
        self.translator = Translator()

    def make_translation(self, word: str) -> Union[bool, str]:
        """
        Translate a word using GoogleTrans library
        The language might be indicate in the 2 first character
        :param word: string
        :param language: string
        :return: word string, or False if the word can't be translated
        """

        if word == 'en:spreads':
            # better translation for the word spreads
            return 'Pate à tartiner'
        elif word == 'en:sweets-spreads':
            return 'Pâte à tartiner sucrée'
        elif word == 'en:plant-based-spreads':
            return 'Pâte à tartiner végétal'
        elif word == ' ':
            return False
        else:
            # take the language indication
            language = self.get_language(word)
            if not language:
                return False
            else:
                # remove language indicator
                word = self.slice_language(word)
                # translate word to french
                word = self.translator.translate(word, src=language,
                                                 dest='fr').text
                # remove any type of punctuation
                return word.replace('-', ' ')

    def detect_lang(self, word: str) -> Union[bool, str]:
        """Detect language with googletrans"""
        # detect language
        tr = self.translator.detect(word)
        if tr.confidence < 0.5:
            return False
        else:
            for l in LANGCODES.values():
                if tr.lang == l:
                    return tr.lang
            return False

    def slice_language(self, word: str) -> str:
        if len(word) > 3 and word[2] == ':':
            return word[3:]
        return word

    def get_language(self, word: str) -> str:
        """
        Return the language of the word if its has a language indicator.
        If its has not googletrans library will detect its language
        """
        if len(self.slice_language(word)) == len(word):
            # the word has no language indicator
            lang = self.detect_lang(word)
        else:
            lang = word[:2]
            tr = self.translator.detect(word)
            # if language detected is different
            # the confidence of the translator is above 90 %
            # take the language from the translator
            if tr.confidence > 0.9 and tr.lang != lang:
                lang = tr.lang
        return lang