Exemplo n.º 1
0
def process_df(source, target, text, with_styles=True):
    import sagas
    rs = []
    options = {'get_pronounce'}
    # options.add('get_pronounce')
    res, t = translate(text,
                       source=source,
                       target=target,
                       trans_verbose=False,
                       options=options)
    # print(res, text, t[ips_idx])
    print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
    for sent in text.split(' '):
        res, t = translate(sent,
                           source=source,
                           target=target,
                           trans_verbose=False,
                           options=options)
        # print(res, sent, t[ips_idx])
        print('%s(%s%s)' % (sent, res, marks(t)), end=" ")
        rs.append((sent, res, marks(t, False)))
    print('.')
    if with_styles:
        return sagas.to_df(
            rs, ['word', 'trans', 'ips']).style.set_table_styles(styles)
    else:
        return sagas.to_df(rs, ['word', 'trans', 'ips'])
Exemplo n.º 2
0
    def proc_word(self, type_name: Text, word: Text, head: Text, index: int, r,
                  lang: Text) -> Dict[Text, Any]:
        from sagas.tool.misc import translit_chunk, display_synsets, target_lang
        from sagas.nlu.translator import translate
        res, _ = translate(word,
                           source=lang,
                           target=target_lang(lang),
                           trans_verbose=False)

        # result=f"[{type_name}]({word}{translit_chunk(word, lang)}) {res}{target}"
        result = {
            'type': type_name,
            'text': word,
            'lemma': r['lemma'],
            'translit': translit_chunk(word, lang),
            'translate': res,
            'lang': lang,
            'index': index,
        }
        if head != '':
            res_t, _ = translate(head,
                                 source=lang,
                                 target=target_lang(lang),
                                 trans_verbose=False,
                                 options={'disable_correct'})
            # target=f" ⊙︿⊙ {res_t}({head})"
            result['head'] = head
            result['head_trans'] = res_t
        # tc.emp('magenta', result)
        return result
Exemplo n.º 3
0
    def deconstructing(self, text, target='ar'):
        """
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student'
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' de
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' fr
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' es
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' vi

        ## other langs: ru, ja, zh

        :param text:
        :param target:
        :return:
        """
        import sagas
        source = 'en'
        options = {'get_pronounce', 'get_translations'}
        res, t = translate(text,
                           source=source,
                           target=target,
                           trans_verbose=False,
                           options=options)
        print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
        for sent in text.split(' '):
            res, t = translate(sent,
                               source=source,
                               target=target,
                               trans_verbose=False,
                               options=options)
            # print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)), end=" ")
            print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)))
            sagas.print_df(t.translations)
        print('.')
Exemplo n.º 4
0
    def process(self, source, target, text, ips_idx=1):
        """
        from sagas.ko.korea_processor import KoreaProcessor
        kp=KoreaProcessor()
        kp.process('ko', 'en', '나는 냉장고가 두 개있다.', 0)

        see also: procs-ko-tr.ipynb
        :param source:
        :param target:
        :param text:
        :param ips_idx:
        :return:
        """
        verbose = False
        options = {'get_pronounce'}
        # options.add('get_pronounce')
        res, t = translate(text,
                           source=source,
                           target=target,
                           trans_verbose=verbose,
                           options=options)
        # print(res, text, t[ips_idx])
        print('✁', '%s(%s %s)' % (text, res, t.pronounce[ips_idx]))
        for sent in text.split(' '):
            res, t = translate(sent,
                               source=source,
                               target=target,
                               trans_verbose=verbose,
                               options=options)
            # print(res, sent, t[ips_idx])
            print('%s(%s,%s)' % (sent, res, t.pronounce[ips_idx][1:]), end=" ")
        print('.')
Exemplo n.º 5
0
def process_en_th(text, target='th'):
    """
    process_en_th('I have two refrigerators')
    process_en_th('I have two refrigerators', 'ar')
    process_en_th('We are going to the park to play', 'ar')
    # hebrew
    process_en_vi('I have two refrigerators', 'iw')
    process_en_th('I have two refrigerators', 'hi')
    process_en_th('I am a student', 'hi')
    # greek language
    process_en_th('I am a student', 'el')
    process_en_th('I have two refrigerators', 'el')
    process_en_th('I have two refrigerators', 'ko')

    :param text:
    :param target:
    :return:
    """
    source = 'en'
    options = {'get_pronounce'}
    res, t = translate(text,
                       source=source,
                       target=target,
                       trans_verbose=False,
                       options=options)
    print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
    for sent in text.split(' '):
        res, t = translate(sent,
                           source=source,
                           target=target,
                           trans_verbose=False,
                           options=options)
        print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)), end=" ")
    print('.')
Exemplo n.º 6
0
    def trans_google(self, ctx: TransContext):
        from tqdm import tqdm
        from sagas.nlu.translator import translate
        import time
        import random
        source, targets, text, says = ctx.pars()

        # for target in tqdm(targets.split(';')):
        print('.. translate to', ctx.target_list)
        for target in tqdm(ctx.target_list):
            # default options
            options = set(cf.ensure('translator_opts'))
            # options.add('disable_correct')
            if says == target:
                options.add('get_pronounce')
            if says == source and target == 'en':
                options.add('get_pronounce')

            trans, tracker = translate(text,
                                       source=source,
                                       target=target,
                                       options=options)
            count = 0
            while trans == '':
                print('wait a second, try again ...')
                sleep(1)
                trans, tracker = translate(text,
                                           source=source,
                                           target=target,
                                           options=options)
                count = count + 1
                if count > self.retries:
                    break

            if trans != '':
                # result=text+'\n\t* '+trans+'\n'
                # line='[%s] '%target[:2]+trans
                line = '%s="%s"' % (target[:2], trans)
                ctx.target_sents.append(line)
                # ctx.target_sents.extend(tracker.pronounce)
                for i, p in enumerate(tracker.pronounce):
                    ps = p[2:]
                    ctx.target_sents.append(f'v{i}="{ps}"')
                ctx.sents_map[target[:2]] = trans
                # print('☌'+line)
            else:
                print(
                    'translate fail, the clipboard content has not been changed.'
                )
                # will exit
                return False

            # time.sleep(random.uniform(0.05, 0.20))

        return True
Exemplo n.º 7
0
    def translate(self, text, target='zh-CN', source='auto', verbose=False):
        """
        $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.'
        $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.' en
        $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.' en ru

        # multi-sentences
        $ python -m sagas.nlu.translator_cli translate 'Что в этом конверте? Письмо и фотографии.' ja auto True
        $ python -m sagas.nlu.translator_cli translate 'Что в этом конверте? Письмо и фотографии.' en auto True
        $ python -m sagas.nlu.translator_cli translate 'I am a student.' ar en True

        $ python -m sagas.nlu.translator_cli translate 'I have two refrigerators' th en True
        $ python -m sagas.nlu.translator_cli translate 'I have two refrigerators' iw en True
        $ python -m sagas.nlu.translator_cli translate '次の信号を右に曲がってください。' zh ja True

        # word translations
        $ python -m sagas.nlu.translator_cli translate 'city' ar en True
        $ python -m sagas.nlu.translator_cli translate 'tiger' lo en True
        $ python -m sagas.nlu.translator_cli translate 'गतिविधि' en hi True
        $ python -m sagas.nlu.translator_cli translate 'fly' en no True
        :param text:
        :return:
        """

        res,_ = translate(text, source=source, target=target,
                          trans_verbose=verbose,
                          # options={'disable_correct'},
                          options={'disable_correct', 'disable_cache'}
                          )
        print(res)
Exemplo n.º 8
0
    def trans_subs(self, kind: Text, word: Text, lang: Text,
                   pos: Text) -> bool:
        from sagas.nlu.translator import translate, with_words, WordsObserver

        ws = get_word_sets(word, lang, pos)  # ensure synsets existence
        if ws:
            return predicate(kind, word, lang, pos)

        word = word.split('/')[self.get_opt(
            'trans_idx', -1)]  # text or lemma, default is lemma
        r, t = translate(word,
                         source=lang,
                         target='en',
                         options={'get_pronounce'},
                         tracker=with_words())
        logger.debug(f"translate {word}: {r}")
        if not r:
            return self.check_subs(kind, word, lang, pos)

        word_r = r.lower()
        candidates = t.observer(WordsObserver).get_axis(word_r, pos)
        result = predicate(kind, candidates, 'en', pos)
        if result:
            self.add_subs(word, word_r, candidates)
        return result
Exemplo n.º 9
0
 def proc_children_column(self, df, lang: Text) -> List[InferPart]:
     from sagas.nlu.translator import translate
     from sagas.tool.misc import translit_chunk, display_synsets, target_lang
     result = []
     # for id, (name, r) in enumerate(zip(partcol, textcol)):
     rels = []
     for id, row in df.iterrows():
         # df['rel'], df['children'], df['index']
         name, r, idx = row['rel'], row['children'], row['index']
         if name in rels:
             continue
         else:
             rels.append(name)
         if name not in ('punct', 'head_root'):
             sent = join_text(r, lang)
             res, _ = translate(sent,
                                source=lang,
                                target=target_lang(lang),
                                trans_verbose=False,
                                options={'disable_correct'})
             # chunk=f"{indent}[{name}]({sent}{translit_chunk(sent, lang)}) {res}"
             chunk = InferPart(
                 name=name,
                 chunk=sent,
                 text=row['text'],
                 lemma=row['lemma'],
                 translit=translit_chunk(sent, lang),
                 translate=res,
                 index=idx,
             )
             result.append(chunk)
             # tc.emp('cyan', chunk)
     return result
Exemplo n.º 10
0
 def trans_to(self, sents, targets):
     from sagas.nlu.translator import translate
     rs = []
     for target in targets:
         r, _ = translate(sents, source=self.lang, target=target)
         # print(r)
         rs.append(r)
     return rs
Exemplo n.º 11
0
 def analyse_en(self, sents):
     """
     $ python -m sagas.ar.arabic_processor analyse_en 'I am a student'
     :param sents:
     :return:
     """
     r, _ = translate(sents, source='en', target='ar')
     print('✔', r)
     self.analyse(r)
Exemplo n.º 12
0
def process_zh_vi(text, target='vi'):
    """
    process_zh_vi('我是一个学生')
    process_zh_vi('我是一个学生', 'th')
    :param text:
    :param target:
    :return:
    """
    import jieba
    source = 'zh-CN'
    res, t = translate(text, source=source, target=target, trans_verbose=False)
    print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
    for sent in jieba.cut(text):
        res, t = translate(sent,
                           source=source,
                           target=target,
                           trans_verbose=False)
        print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ")
    print('.')
Exemplo n.º 13
0
def process(source, target, text):
    options = {'get_pronounce'}
    # options.add('get_pronounce')
    res, t = translate(text,
                       source=source,
                       target=target,
                       trans_verbose=False,
                       options=options)
    # print(res, text, t[ips_idx])
    print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
    for sent in text.split(' '):
        res, t = translate(sent,
                           source=source,
                           target=target,
                           trans_verbose=False,
                           options=options)
        # print(res, sent, t[ips_idx])
        print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ")
    print('.')
Exemplo n.º 14
0
def trans(w, pos: Text):
    r, t = translate(w,
                     source='hi',
                     target='en',
                     options={'get_pronounce'},
                     tracker=with_words())
    # df=t.observer(WordsObserver).word_trans_df
    # if df is not None:
    #     candidates=[w for w in df['word']][:3]
    # else:
    #     candidates=[]
    obs: WordsObserver = t.observer(WordsObserver)
    candidates = obs.get_candidates(pos)
    return {'word': obs.get_axis(r.lower(), pos), 'candidates': candidates}
Exemplo n.º 15
0
def process_en_vi(text, target='vi'):
    """
    process_en_vi('I am a student')
    # indonesian
    process_en_vi('I am a student', 'id')
    process_en_vi('I am a student', 'ar')
    # turkish
    process_en_vi('I am a student', 'tr')
    process_en_vi('I am a student', 'th')
    :param text:
    :param target:
    :return:
    """
    source = 'en'
    res, t = translate(text, source=source, target=target, trans_verbose=False)
    print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
    for sent in text.split(' '):
        res, t = translate(sent,
                           source=source,
                           target=target,
                           trans_verbose=False)
        print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ")
    print('.')
Exemplo n.º 16
0
 def trans_en(self, text, target='zh-CN'):
     """
     $ python -m sagas.nlu.translator_cli trans_en 'I have two refrigerators' es
     $ python -m sagas.nlu.translator_cli trans_en 'I have two refrigerators' he
     :param text:
     :param target:
     :return:
     """
     import sagas
     sagas.nlu.google_translator.logger.setLevel(logging.DEBUG)
     res, _ = translate(text, source='en', target=target,
                        trans_verbose=True,
                        options={'disable_correct'}
                        )
     print(res)
Exemplo n.º 17
0
 def analyse(self, sents_zh):
     """
     analyse('现在整个世界都变成了一个村庄。')
     $ python -m sagas.ko.korea_processor analyse '现在整个世界都变成了一个村庄。'
     $ ko 美丽的花开得很好。
     $ ko 姐姐买来了新衣服。
     :param sents_zh:
     :return:
     """
     # translate from chinese to korea
     sents_ko, _ = translate(sents_zh, source='zh-CN', target='ko')
     print('✔', sents_ko)
     self.process('ko', 'zh-CN', sents_ko)
     # NluTools().say(sents_ko, 'ko')
     self.say_in(sents_ko, 'ko_KR', 200)
Exemplo n.º 18
0
 def get_word_trans(self, word, lang, pos='*'):
     import sagas
     from sagas.nlu.translator import translate, with_words, WordsObserver
     r, t = translate(word,
                      source=lang,
                      target='en',
                      options={'get_pronounce'},
                      tracker=with_words())
     if r:
         word_r = r.lower()
         tc.emp('cyan', f"1. translate: {word_r}")
         obs: WordsObserver = t.observer(WordsObserver)
         dfs = obs.trans_dfs
         if dfs:
             tc.emp('cyan', f"2. candidates: {obs.get_axis(word_r, pos)}")
             for k, df in dfs.items():
                 print(f"- pos:{k} -")
                 sagas.print_df(df)
         else:
             tc.emp('cyan', f"2. no candidates.")
         return word_r
     return ''
Exemplo n.º 19
0
    def contrast(self, text, source, target='en', word_map=None):
        """
        $ nlu contrast '저는 허락을 못 받아서 안 왔어요.' ko
        :param text:
        :param source:
        :param target:
        :return:
        """
        from sagas.nlu.translator import get_word_map
        from sagas.nlu.translator import translate
        from sagas.tool.misc import color_print

        r, tracker = translate(text, source=source, target=target, options={'get_pronounce'})
        tc.info(r)
        for i, p in enumerate(tracker.pronounce):
            ps = p[2:]
            tc.info(f'v{i}="{ps}"')
        rs, trans_table=get_word_map(source, target, text,
                                     words=word_map,
                                     local_translit=True if source in contrast_translit_langs else False)
        for i, (k, r) in enumerate(rs.items()):
            tc.info(f"{i} - ", r.replace('\n', ' '))

        color_print('cyan', ' '.join(trans_table))
Exemplo n.º 20
0
 def tra(self, sents):
     from sagas.nlu.translator import translate
     r, _ = translate(sents, source='en', target=self.lang)
     print(r)
     return r