Exemplo n.º 1
0
def speaker_output_source():
    translator_input = user_input_textbox.get("1.0", "end")
    translator = google_translator()
    source = translator.detect(translator_input)
    source_lang.set(source[1])
    dst = dest_lang.current()
    destination = gtts[dst]
    translation = translator.translate(translator_input,
                                       lang_src=source[0],
                                       lang_tgt=destination)
    myobj = gTTS(text=translation, lang=destination, slow=False)
    myobj.save(r"C:\AppData\Translation.mp3")
    os.system(r"C:\AppData\Translation.mp3")
Exemplo n.º 2
0
async def tr_engine(event, text, parse_mode='md'):
    kk = Config.LANG if LANGUAGES.get(Config.LANG) else 'en'
    if kk == 'en':
        await event.edit(text, parse_mode=parse_mode)
    else:
        try:
            translator = google_translator()
            translated = translator.translate(text, lang_tgt=kk)
            hmm = translated
        except:
            hmm = text
    await event.edit(hmm, parse_mode=parse_mode)        
    return
Exemplo n.º 3
0
    def get_weather(self):
        translator = google_translator()

        home = 'Campinas, São Paulo'
        owm = pyowm.OWM(OPENWEATHER)
        observation = owm.weather_at_place(home)
        detail = observation.get_weather()
        temp = detail.get_temperature('celsius')
        temp = int(temp['temp'])
        status = detail.get_detailed_status()
        status = translator.translate(status, lang_tgt="pt")

        return [temp, status]
Exemplo n.º 4
0
    def __init__(self, dir_path=None, develop=True):
        """
        Initializes a GTITAN instance with the desired directory.
        :param dir_path:  path where to create a directory. If left to None, uses default package directory;
        """

        self.develop = develop
        if dir_path is None:
            #This queries 1 path up
            self.dir_path = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists(os.path.join(self.dir_path, "checkpath")):
                os.makedirs(os.path.join(self.dir_path, "checkpath"))
        else:
            #TODO: make all folders if new folder indicated: copy default files there such as the config.
            self.dir_path = dir_path
            if not os.path.exists(dir_path):
                default_path = os.path.dirname(os.path.abspath(__file__))
                # creating directory structure
                os.makedirs(os.path.join(self.dir_path, "config"))
                os.makedirs(os.path.join(self.dir_path, "data",
                                         "Translations"))
                os.makedirs(
                    os.path.join(self.dir_path, "data", "Trend_indices"))

                # copying defaults
                shutil.copyfile(
                    os.path.join(default_path, "config", "config_py.json"),
                    os.path.join(self.dir_path, "config", "config_py.json"))
            else:
                print("Directory already exists, loading data from it.")

        print(f"Using directory2 '{self.dir_path}'", __file__)
        with open(os.path.join(self.dir_path, "config", "config_py.json"),
                  'r') as fp:
            self.CONFIG = json.load(fp)

        #set configuring settings:
        self.CONFIG['CONN']['timeout'] = tuple(self.CONFIG['CONN']['timeout'])
        self.t_sleep = 0.2  #also get it from
        self.google_timeout = 14 * 60 * 60
        self.t_block = 0

        #Call instances of used packages
        self.translator = google_translator()
        #self.pytrend = TrendReq(hl='en-US', **self.CONFIG['CONN'])
        self.pytrend = TrendReq()
        self.gtab = gtab.GTAB()

        # sets default anchorbank
        default_project = "frontex"
        self.set_active_project(default_project)
Exemplo n.º 5
0
def request(text):
    lang = "fr"
    t = google_translator(timeout=5)
    # TRANSLATES TO RANDOM LANGUAGES FOUR TIMES
    num_translations = 4
    for i in range(num_translations):
        lang = random.choice(list(LATIN_LANG.keys()))
        text = t.translate(text, lang)
        text = t.translate(text, "en")
    translateFINAL = t.translate(text, "en")
    translateFINAL = re.sub(u"(\u201c|\u201d)", "\"", translateFINAL)
    translateFINAL = re.sub(u"(\u2018|\u2019)", "'", translateFINAL)

    return translateFINAL
Exemplo n.º 6
0
def do_translate_zh_en(text):
    """
    translates a string from Chinese to English
    :param text: str
    :return: str
    """
    t = google_translator(timeout=5)
    if isinstance(text, int):
        return text  # don't translate the IDs
    if isinstance(text, str):
        text = text.strip()  # Strip trailing crap
    return t.translate(
        text, lang_tgt='en',
        lang_src='zh').strip() if text and text != 'null' else ''
Exemplo n.º 7
0
class Translator:
    translator = google_translator()
    tx_cache = {}  # avoid translating twice

    @classmethod
    def translate(cls, text, lang_tgt, lang_src="en"):
        # if langs are the same do nothing
        if not lang_tgt.startswith(lang_src):
            if lang_tgt not in cls.tx_cache:
                cls.tx_cache[lang_tgt] = {}
            # if translated before, dont translate again
            if text in cls.tx_cache[lang_tgt]:
                # get previous translated value
                translated_text = cls.tx_cache[lang_tgt][text]
            else:
                # translate this utterance
                translated_text = cls.translator.translate(text,
                                                           lang_tgt=lang_tgt,
                                                           lang_src=lang_src)
                if isinstance(translated_text, list):
                    # usually male/female forms of the word
                    return translated_text[0]

                # save the translation if we need it again
                cls.tx_cache[lang_tgt][text] = translated_text
        else:
            translated_text = text.strip()
        return translated_text

    @classmethod
    def translate_dict(cls, data, lang_tgt, lang_src="en"):
        for k, v in data.items():
            if isinstance(v, dict):
                data[k] = cls.translate_dict(v, lang_tgt, lang_src)
            elif isinstance(v, str):
                data[k] = cls.translate(v, lang_tgt, lang_src)
            elif isinstance(v, list):
                data[k] = cls.translate_list(v, lang_tgt, lang_src)
        return data

    @classmethod
    def translate_list(cls, data, lang_tgt, lang_src="en"):
        for idx, v in enumerate(data):
            if isinstance(v, dict):
                data[idx] = cls.translate_dict(v, lang_tgt, lang_src)
            elif isinstance(v, str):
                data[idx] = cls.translate(v, lang_tgt, lang_src)
            elif isinstance(v, list):
                data[idx] = cls.translate_list(v, lang_tgt, lang_src)
        return data
def translate_data(language,source):
    #如果遇到错误就最多重试6次
    success_num = 0
    while success_num < 6:
        try:
            translator = google_translator(url_suffix="com",timeout=25,proxies={'http':'159.75.5.165:10808','https':'159.75.5.165:10808',})
            translate_text = translator.translate(source,lang_tgt=language)
            return translate_text
            break

        except Exception as e:
            print(e,"正在重试:",data)
            success_num = success_num + 1
            continue
Exemplo n.º 9
0
    def TranslateNow(file_translate):
        translator = google_translator()

        file = open("output.txt", "r")

        with open(file_translate, "a", encoding='utf-8') as tr_to_en:
            print("Çeviri işlemi başladı.")
            for line in file:
                translate_result = translator.translate(line,
                                                        lang_src="tr",
                                                        lang_tgt="en")
                tr_to_en.write(translate_result)

        file.close()
Exemplo n.º 10
0
def translate_file_util(text, target_lang):

    while True:
        try:
            translator = google_translator()
            translated_text = translator.translate(text,
                                                   lang_src='en',
                                                   lang_tgt=target_lang)
        except Exception as e:
            print(str(e))
            time.sleep(10)
            continue
        break
    return (translated_text)
Exemplo n.º 11
0
async def google_tl(rss_str_tl: str):
    try:
        text = ''
        translator = google_translator()
        text = emoji.demojize(rss_str_tl)
        text = re.sub(r':[A-Za-z_]*:', ' ', text)
        text = '\n谷歌翻译:\n' + str(
            translator.translate(re.escape(text), lang_tgt='zh'))
        text = re.sub(r'\\', '', text)
        return text
    except Exception as e:
        print("谷歌翻译失败")
        text = '\n翻译失败!' + str(e) + '\n'
        return text
Exemplo n.º 12
0
def detect_and_translate(text):
    translator = google_translator()
    r = Rake()
    original_lang = translator.detect(text)
    print("\nSource Language was : ", original_lang[1])
    if (original_lang[0] != 'en'):
        print(text)
    translate_text = translator.translate(text, lang_tgt='en')
    print("\nEnglish Translation \n")
    r.extract_keywords_from_sentences(translate_text.split('\n'))
    print(translate_text)
    ranked = r.get_ranked_phrases_with_scores()
    print("\nPhrases with Scores")
    print(ranked)
Exemplo n.º 13
0
    def __init__(self, 
        db,
        languages,
        weight_field ,
        spacy_model,
        remove_words = "./data/words_to_remove.txt", 
        replace_words= "./data/words_to_replace.txt",
        desc_field = "description", 
        max_words = 5,
        ):

        """Class defined to process wuwana description tags. It attacks db and uses 3 NLP Libraries:
        - Spacy as tokenizer.
        - Wordcloud as tag modeller.
        - Gensim as tag modeller.
        

        Parameters
        -----------
        languages: list with languages in format: ["es","fr","zh-cn"]
        remove_words: path to file of words to be removed.
        replace_words: path to file of words to be replaced.
        spacy_mode: pretrained Spacy model. 
        max_words: max words to be extracted from description texts.
        desc_field: field where text is stored in company table.
        weight_field: Field in company table where weights will be stored.
       
        """

        #file with words to be removed from tags
        self.file_words = open(remove_words, "r", encoding="utf-8" )
        self.remove_words = self.file_words.read().split(";")    

        #bag of words that should be replaced, such as abbreviations
        with open(replace_words, "r", encoding="utf-8" ) as f_in:
            self.replace_words = json.load(f_in)
        
        self.translator = google_translator()  
        self.db = db
        self.cursor_tag = self.db.cursor()
        self.max_words = max_words
        self.desc_field = desc_field
        self.languages = languages
        self.weight_field = weight_field

        # English pretrained Spacy model 
        try:
            self.nlp = spacy.load("en_core_web_lg")
        except: sys.exit("ERROR: You must download en_core_web_lg spacy model. Use 'python -m spacy download en_core_web_lg' ")
Exemplo n.º 14
0
def main():
    """
	A simple NLP app
	"""
    translator = google_translator()
    with open('sentiment analysis app/pickle files/log_reg.pkl', 'rb') as f:
        model = pickle.load(f)

    with open('sentiment analysis app/pickle files/tfidf_vectorizer.pkl',
              'rb') as f:
        tfidf_vectorizer = pickle.load(f)

    st.title('Olist User Review')
    menu = ['Home', 'About']
    choice = st.sidebar.selectbox('Menu', menu)

    try:
        if choice == 'Home':
            st.subheader('Sentiment Analysis')
            review = st.text_area('Review Text',
                                  'Enter your Review in Portuguese')
            trans_text = translator.translate(text=review, lang_tgt='en')
            st.write('Would you like to translate')
            if st.button('Translate'):
                st.text_area('Translated Review Text', trans_text)
            if st.button('Get Sentiment'):
                if translator.detect(review)[0] != 'pt':
                    st.warning(
                        'Review Text has to be in Portuguese language **:see_no_evil:**'
                    )
                else:
                    prediction = int(
                        model.predict(tfidf_vectorizer.transform([review])))
                    if prediction == 1:
                        st.success('**Review text is Positive :joy: :yum:**')
                        st.balloons()
                    elif prediction == 0:
                        st.error('**Review text is Negative :cry: :worried:**')

        if choice == 'About':
            st.subheader('Learn More About Sentiment Analysis')
            st.write(
                '## Model was built using Logistic Regression :sunglasses:')
            st.write('Model was train in **Portuguese language**')
            st.write('''Reviews should be in that language (Portuguese)  \n
			Option to translate to English is Available''')
            # st.write('If you want other language Translation')
    except:
        st.write('**:eyes:** Check your Internet Connectivity!! **:eyes:**')
Exemplo n.º 15
0
def get_converters():
    from google_trans_new import google_translator
    from indictrans import Transliterator
    converters = {
        'g_translator': google_translator(url_suffix="com.pk"),
        'hi2ur': Transliterator(source='hin', target='urd',
                                rb=False),  #, build_lookup=True),
        'ur2hi': Transliterator(source='urd', target='hin',
                                rb=False),  #, build_lookup=True),
        'hi2en': Transliterator(source='hin', target='eng',
                                rb=False),  #, build_lookup=True),
        'ur2en': Transliterator(source='urd', target='eng',
                                rb=False),  #, build_lookup=True),
    }
    return converters
Exemplo n.º 16
0
    def translate(self):
        try:
            self.pushButton.setCursor(QtCore.Qt.BusyCursor)
            text_1 = self.textEdit.toPlainText()
            if self.is_checked:
                lang_2 = self.comboBox_2.currentText()
                langs = list(google_trans_new.LANGUAGES.values())
                codes = list(google_trans_new.LANGUAGES.keys())
                dst_lng = codes[langs.index(lang_2.lower())]

                translator = google_translator()
                src_lang = translator.detect(text_1)
                translate = translator.translate(text_1, lang_tgt=dst_lng)
                self.label_4.setText(
                    f"Auto detected: {src_lang[1].capitalize()}")
                self.label_4.adjustSize()

            else:
                lang_1 = self.comboBox.currentText()
                lang_2 = self.comboBox_2.currentText()
                langs = list(google_trans_new.LANGUAGES.values())
                codes = list(google_trans_new.LANGUAGES.keys())
                src_lang = codes[langs.index(lang_1.lower())]
                dst_lng = codes[langs.index(lang_2.lower())]

                translator = google_translator()
                translate = translator.translate(text_1,
                                                 lang_src=src_lang,
                                                 lang_tgt=dst_lng)
                self.label_4.setText("")

            self.textEdit_2.setText(translate)
            self.pushButton.setCursor(QtCore.Qt.PointingHandCursor)

        except Exception as e:
            self.error_message(e)
Exemplo n.º 17
0
def google_tr_eng_ru(sheet_wb, new_tb, ltr, num, color="FF0000"):
    # TODO гугл переводчик с англ на рус
    translator = google_translator()
    global counter
    counter += 1

    trl = translator.translate('{}'.format(sheet_wb['{}{}'.format(ltr,
                                                                  num)].value),
                               lang_tgt='ru')
    s = sheet_wb['{}{}'.format(ltr, num)].font.size
    n = sheet_wb['{}{}'.format(ltr, num)].font.name

    new_tb['A{}'.format(counter)] = sheet_wb['{}{}'.format(ltr, num)].value
    sheet_wb['{}{}'.format(ltr, num)] = new_tb['B{}'.format(counter)] = trl
    sheet_wb['{}{}'.format(ltr, num)].font = Font(size=s, name=n, color=color)
Exemplo n.º 18
0
 def google_trans(self, l_to = ini["to"]):
     """
         Func:
             Using google translate to translate the content.
         Args:
             content: the content you wanna translate -- it can be a string, and it can also be a list.
             l_to: the language you wanna translate to(default to be zh-CH)
     """
     # trans = Translator(service_urls=["translate.google.cn"])
     trans = google_translator()
     result = trans.translate(self.content, l_to)
     print("\n")
     print(" google translate ".center(40, "*"))
     print("\n\t", proc_str(result), "\n\n")
     self.finished[0] = 1
Exemplo n.º 19
0
def trans(text, lang='zh-CN', detect=1):
    text = text_clean(text)
    tr = google_translator()
    if lang == 'en':
        result = get_trans(text, lang_tgt='en')
    elif lang == 'zh':
        result = get_trans(text, lang_tgt='zh-CN')
    else:
        if get_lang(text)[0] == 'zh-CN':
            result = get_trans(text, lang_tgt='zh-CN') + '\n' \
                + get_trans(text, lang_tgt='en')
        else:
            result = get_trans(text, lang_tgt='zh-CN') + '\n' \
                + text
    return result
Exemplo n.º 20
0
def Detect_image_text_and_translate():
    img = cv2.imread(r'.\img.png')
    text = pytesseract.image_to_string(img)
    print('翻譯前:\n' + str(text) + '\n')

    translator = google_translator()
    #,lang_src='en'
    translate_text = translator.translate(text, lang_tgt='zh-TW')  #翻譯繁體
    print('翻譯後:\n' + str(translate_text) + '\n')
    pyautogui.alert(translate_text, '翻譯內容')

    with open(r".\Detect_Text.txt", "w", encoding="utf-8") as f:
        f.write('翻譯前:\n' + str(text) + '\n\n')  #翻譯前寫入文字檔
        f.write('翻譯後:\n' + str(translate_text) + '\n\n')  #翻譯後寫入文字檔
        print('已寫入文字檔!')
Exemplo n.º 21
0
        def translation(self):
            try:
                self.get_permission()
                try:
                    translator = google_translator()
                    translate_text = translator.translate(
                        self.nav_layout.screen_manager.screen1.isi.text,
                        lang_src='id',
                        lang_tgt='en')

                    self.nav_layout.screen_manager.screen1.isi.text = translate_text
                except:
                    pass
            except:
                pass
def data_augmentation(paragraphs):

    print(paragraphs[0])
    #translator = Translator(service_urls=['https://translation.googleapis.com'])
    translator = google_translator()

    back_translations = []

    def translation(paragraph, src, dest):
        """ recursive translation function to avoid hitting the API's 5000 character limit"""

        """ line by line translation to keep the structure of the document """

        lines = paragraph.split('\n')
        trans_result = ''

        for line in lines:
            if len(line) >= 4000 : # the limit for traduction with google's api is 5000 characters
                    par_1 = line[0:len(line)//2]
                    par_2 = line[ len(line) // 2 if len(line)%2 == 0 else len(line) //2 + 1 : ]
                    trans = translation(par_1, src, dest) + translation(par_2, src, dest)
            else:
                    trans = translator.translate(line, lang_src= src, lang_tgt= dest)
            trans_result += trans + '\n'

        return trans_result


    def back_to_back_translation(paragraph):

        en_to_other_translation = translation(paragraph, 'en', 'fr')
        back_translation = translation(en_to_other_translation, 'fr', 'en')
        return back_translation


    for paragraph in paragraphs:

        back_translation = back_to_back_translation(paragraph)

        back_translations += [back_translation]

    """translations = [ translator.translate(par, lang_src='en', lang_tgt='fr') for par in paragraphs]
    print(translations)

    back_translations = [ translator.translate(trans, lang_src='fr', lang_tgt='en') for trans in translations]
    print(back_translations)"""
        
    return back_translations
Exemplo n.º 23
0
    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker,
            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:

        countries_languages_path = os.path.join("data",
                                                "cldf-datasets-wals-014143f",
                                                "created",
                                                "lang_country_info.csv")
        countries_languages_data = pd.read_csv(countries_languages_path)

        entities = list(tracker.get_latest_entity_values("country"))

        if len(entities) > 0:
            query_lang = entities.pop()
            query_lang = query_lang.lower().title()
            print(query_lang)

            try:
                #gs = goslate.Goslate()
                translator = google_translator()
                #final  = gs.translate(query_lang, 'en')
                #print(final)
                final = translator.translate(query_lang, lang_tgt='en')
                final = final.lower()
                final = re.sub("[^a-zA-Z]+", "", final)
                print(final)
                out_row = countries_languages_data[
                    countries_languages_data["country_name"].str.lower() ==
                    final].to_dict("records")

                if len(out_row) > 0:
                    languages = []
                    for i in range(len(out_row)):
                        languages.append(out_row[i]["name"])
                    print(languages)
                    out_text = "%s की भाषा/एँ: \n%s" % (query_lang,
                                                        ", ".join(languages))
                    dispatcher.utter_message(text=out_text)
                else:
                    dispatcher.utter_message(
                        text="क्षमा करें! हमारे पास %s देश के रिकॉर्ड नहीं हैं।"
                        % query_lang)
            except:
                dispatcher.utter_message(
                    text=
                    "बेटा, System Error के लिए माफी माँगता हूँ। कुछ समय बाद प्रयास करें।"
                )

        return []
Exemplo n.º 24
0
def back_translate(sentence):

    available_langs = list(google_trans_new.LANGUAGES.keys())
    trans_lang = random.choice(available_langs)
    #print(f"Translating to {google_trans_new.LANGUAGES[trans_lang]}")
    translator = google_translator()
    translations = translator.translate(text=sentence,
                                        lang_src='en',
                                        lang_tgt=trans_lang)
    #print(translations)

    translations_en_random = translator.translate(text=translations,
                                                  lang_src=trans_lang,
                                                  lang_tgt='en')
    # print(translations_en_random)
    return translations_en_random
Exemplo n.º 25
0
    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker,
            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
        print('LANG SEARCH')
        data_path = os.path.join("data", "cldf-datasets-wals-014143f", "cldf",
                                 "languages.csv")
        wals_data = pd.read_csv(data_path)
        entities = list(tracker.get_latest_entity_values("language"))

        if len(entities) > 0:
            query_lang = entities.pop()
            query_lang = query_lang.lower().capitalize()
            print(query_lang)

            try:
                translator = google_translator()
                #gs = goslate.Goslate()
                #final  = gs.translate(query_lang, 'en')
                final = translator.translate(query_lang, lang_tgt='en')
                final = final.lower()
                final = re.sub("[^a-zA-Z]+", "", final)

                #print(len(re.sub("[^a-zA-Z]+", "", final)))
                #print(type(final))
                #print(final == 'hindi')
                out_row = wals_data[wals_data["Name"].str.lower() ==
                                    final].to_dict("records")
                #print(wals_data[wals_data["Name"]=='Hindi'])
                #print(out_row)

                if len(out_row) > 0:
                    out_row = out_row[0]
                    out_text = "%s भाषा %s परिवार से संबंधित है।\nइसका जीनस %s है।\nइसका ISO कोड %s है।" % (
                        query_lang, out_row["Family"], out_row["Genus"],
                        out_row["ISO_codes"])
                    dispatcher.utter_message(text=out_text)
                else:
                    dispatcher.utter_message(
                        text=
                        "क्षमा करें! हमारे पास %s भाषा के रिकॉर्ड नहीं हैं।" %
                        query_lang)
            except:
                dispatcher.utter_message(
                    text=
                    "बेटा, System Error के लिए माफी माँगता हूँ। कुछ समय बाद प्रयास करें।"
                )

        return []
Exemplo n.º 26
0
def df_sentiment(df):

    # Import necessary libraries
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    from google_trans_new import google_translator
    import time
    import numpy as np
    from json.decoder import JSONDecodeError

    # Set Google Translator
    translator = google_translator()

    # Set Vader's SentimentIntensityAnalyzer
    sid = SentimentIntensityAnalyzer()

    # Reset index
    df = df.reset_index(drop=True)

    # Compute Sentiment, using vader and google translate

    # Initializing an empty sentiment list to which the sentiment dct for each tweet will be appended
    sentiment_lst = []

    # Filling the column with the sentiment dictionaries
    # This is done with sleeps between each iteration to avoid the 429: Too Many Requests error
    for i in range(0, len(df)):
        en_text = translator.translate(df['text'][i])
        sentiment_lst.append(sid.polarity_scores(en_text))

        if i % 25 == 0:
            time.sleep(2)
        else:
            time.sleep(0.7)

    # Pass the final sentiment lst to a sentiment dct column
    df['sentiment_dct'] = sentiment_lst

    # Creating new columns for each dictionary key and dropping the dictionary itself
    df['negative'] = [df['sentiment_dct'][i]['neg'] for i in df.index.tolist()]
    df['neutral'] = [df['sentiment_dct'][i]['neu'] for i in df.index.tolist()]
    df['positive'] = [df['sentiment_dct'][i]['pos'] for i in df.index.tolist()]
    df['compound'] = [
        df['sentiment_dct'][i]['compound'] for i in df.index.tolist()
    ]
    df = df.drop(columns='sentiment_dct')

    return df
Exemplo n.º 27
0
async def handle_translation(content: str) -> str:
    translator = google_translator()
    appid = config.baiduid
    secretKey = config.baidukey
    text = emoji.demojize(content)
    text = re.sub(r':[A-Za-z_]*:', ' ', text)
    try:
        if appid and secretKey:
            url = f'https://api.fanyi.baidu.com/api/trans/vip/translate'
            salt = str(random.randint(32768, 65536))
            sign = hashlib.md5(
                (appid + content + salt + secretKey).encode()).hexdigest()
            params = {
                "q": content,
                "from": "auto",
                "to": "zh",
                "appid": appid,
                "salt": salt,
                "sign": sign
            }
            async with httpx.AsyncClient(proxies={}) as client:
                r = await client.get(url, params=params, timeout=10)
            try:
                i = 0
                str_tl = ''
                while i < len(r.json()["trans_result"]):
                    str_tl += r.json()["trans_result"][i]["dst"] + "\n"
                    i += 1
                text = "\n百度翻译:\n" + str_tl
            except Exception as e:
                if r.json()["error_code"] == "52003":
                    logger.warning("无效的appid,尝试使用谷歌翻译,错误信息:" +
                                   str(r.json()["error_msg"]))
                    text = '\n谷歌翻译:\n' + \
                            str(translator.translate(re.escape(text), lang_tgt='zh'))
                else:
                    logger.warning("使用百度翻译错误:" + str(r.json()["error_msg"]) +
                                   ",开始尝试使用谷歌翻译")
                    text = '\n谷歌翻译:\n' + \
                            str(translator.translate(re.escape(text), lang_tgt='zh'))
        else:
            text = '\n谷歌翻译:\n' + \
                    str(translator.translate(re.escape(text), lang_tgt='zh'))
        text = re.sub(r'\\', '', text)
    except Exception as e:
        text = '\n翻译失败!' + str(e) + '\n'
    return text
def get_data():
    ## get_data returns a dataframe df with the data shown
    # in the main table of 'https://www.corona.ps/details'.
    # It also translates the text in the table from arabic to english.
    # Besides returning df it also generates a file named
    # 'COVID_ps_%timestamp.csv' in the same folder containting this file.,
    # The timestamp correspond to the actual date.

    #Website with data
    # URL_or = 'https://www.corona.ps/details'
    URL_or = 'https://www.corona.ps/'

    #Magic
    page = requests.get(URL_or)
    soup = BeautifulSoup(page.content, 'html.parser')
    #Find tables in webpage
    tables = soup.find_all("table")
    # init the Google API translator
    # translator = Translator()
    translator = google_translator()
    #tables[4] has the necessary data
    table = tables[4]
    tab_data = [[cell.text for cell in row.find_all(["th", "td"])]
                for row in table.find_all("tr")]
    #generate dataframe
    df = pd.DataFrame(tab_data)
    #translate the first row with titles
    for i in range(0, df.shape[1]):
        translation = translator.translate(df[i][0], lang_tgt="en")
        df[i][0] = translation
    #now translate the 0th column (with governorates)
    for i in range(1, df.shape[0]):
        translation = translator.translate(df[0][i], lang_tgt="en")
        df[0][i] = translation

    #Header
    df = df.rename(columns=df.iloc[0]).drop(df.index[0])

    # Returns a datetime object containing the local date and time
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.date().strftime("%d-%b-%Y")
    name_df = 'COVID_ps_' + timestampStr + '.csv'
    print('Dataframe generated\nSaved in ' + name_df)

    df.to_csv('data/' + name_df)

    return df
Exemplo n.º 29
0
    def __init__(self):
        super().__init__()
        self.translator = google_translator()
        self.tx_cache = {}  # avoid translating twice
        self.duck_cache = {}
        self.rake = Rake()  # only english for now

        # for usage in tell me more / follow up questions
        self.idx = 0
        self.results = []
        self.image = None

        # subparser, intents just for this skill
        # not part of main intent service
        intent_cache = expanduser(
            self.config_core['padatious']['intent_cache'])
        self.intents = IntentContainer(intent_cache)
Exemplo n.º 30
0
def update_keywords(keywords_df: pd.DataFrame, dir_lyrics: str):
    embedding_distributor = load_local_embedding_distributor()
    pos_tagger = load_local_corenlp_pos_tagger()

    translator = google_translator()

    updated_keywords = keywords_df.to_dict('list')
    track_ids = set(updated_keywords['track_id'])
    filenames = os.listdir(dir_lyrics)
    for i, filename in enumerate(filenames):
        track_id, _ = os.path.splitext(filename)
        print(f'Processing track {i + 1}/{len(filenames)}. Id: {track_id}...')

        if int(track_id) in track_ids:
            print(f'Already have keywords for track {track_id}')
            continue

        lyrics_path = DIR_LYRICS + filename
        with open(lyrics_path) as f:
            try:
                lyrics = f.read().replace('\n', ' ').replace(' ', ' ')
                if not lyrics:
                    continue

                lyrics_en = translator.translate(lyrics)

                if not lyrics_en:
                    continue

                keywords, weights, _ = extract_keyphrases(
                    embedding_distributor, pos_tagger, lyrics_en, N_KEYWORDS, 'en')

                keywords_str = ";".join(keywords)
                weights_str = ";".join(map(str, weights))
                updated_keywords['track_id'].append(track_id)
                updated_keywords['keywords'].append(keywords_str)
                updated_keywords['weights'].append(weights_str)

            except Exception as e:
                print(track_id, e)
                pass

    return pd.DataFrame(
        data=updated_keywords,
        columns=keywords_df.columns
    )