Exemplo n.º 1
0
 def viz(sents, trans_it=True):
     nlp = get_nlp(self.lang)
     doc = nlp(sents)
     cv = CoreNlpViz(shape='egg', size='8,5', fontsize=20)
     words = [
         word.text for sent in doc.sentences for word in sent.words
     ]
     if trans_it:
         rs_trans = self.trans_to(sents, ['en'])
         if outf is not None:
             for r in rs_trans:
                 outf(r)
         else:
             print(*rs_trans, sep='\n')
         if self.lang in translits.available_langs():
             if outf is not None:
                 outf('♡ ' + translits.translit(sents, self.lang))
             else:
                 print('♡', translits.translit(sents, self.lang))
         tr_map, tr_tab = get_word_map(self.lang,
                                       'en',
                                       sents,
                                       0,
                                       words,
                                       local_translit=local_translit)
         if outf is not None:
             outf(' '.join(tr_tab))
     else:
         tr_map = None
     return cv.analyse_doc(doc, tr_map)
Exemplo n.º 2
0
 def nouns(self, phrase: Text):
     """
     $ python -m sagas.ko.ko_helper nouns '피자와 스파게티가'
     $ python -m sagas.ko.ko_helper nouns '계획이'
     :param phrase:
     :return:
     """
     from sagas.nlu.transliterations import translits
     from sagas.ko.kwn_procs import kwn
     ns = self.mecab.nouns(phrase)
     rs = []
     for w in ns:
         # ws = get_word_sets(w, 'ko')
         ws = kwn.get_synsets(w, first=True)
         if ws:
             rs.append({
                 'spec': ws[0].name(),
                 'text': w,
                 'translit': translits.translit(w, 'ko'),
                 'definition': ws[0].definition()
             })
         else:
             rs.append({
                 'text': w,
                 'translit': translits.translit(w, 'ko'),
             })
     return rs
Exemplo n.º 3
0
def build_kwn(lookups_nor, lookups_tra):
    rs = []
    import pandas as pd
    data_path = f'{cf.conf_dir}/ai/nltk/kwn_1.0/kwn_synset_list.tsv'
    df = pd.read_csv(data_path, sep='\t')

    for index, row in df.iterrows():
        ko_lemmas = row['korean_lemmas'].split(', ')
        refid = row['# synset_id']
        offset, pos = refid.split('-')
        syn = wn.synset_from_pos_and_offset(pos, int(offset))
        en_lemmas = str(row['english_lemmas']).split(', ')
        rs.append({
            'id':
            refid,
            'en':
            en_lemmas,
            'ko':
            ko_lemmas,
            'translit': [translits.translit(le, 'ko') for le in ko_lemmas],
            'name':
            syn.name(),
            'definition':
            syn.definition()
        })
        for kw in ko_lemmas:
            if kw not in lookups_nor:
                lookups_nor[kw] = {syn.name()}
                lookups_tra[translits.translit(kw, 'ko')] = {syn.name()}
            else:
                lookups_nor[kw].add(syn.name())
                lookups_tra[translits.translit(kw, 'ko')].add(syn.name())
    return rs
Exemplo n.º 4
0
 def translit_chunk(chunk:str, lang):
     from sagas.nlu.transliterations import translits
     # if upos=='PUNCT':
     #     return chunk
     if chunk.strip() in (',','.',';','?','!'):
         return chunk
     # if lang in ('ko', 'ja', 'fa', 'hi', 'ar'):
     if translits.is_available_lang(lang):
         if sa_env.runtime!='default':
             return word.text+'\n'+translits.translit(chunk, lang)
         return translits.translit(chunk, lang)
     return chunk
Exemplo n.º 5
0
def word_values(word: Text, lang: Text):
    from sagas.nlu.transliterations import translits
    if '/' in word:
        text, lemma=word.split('/')
    else:
        text=lemma=word
    if translits.is_available_lang(lang):
        try:
            text_val=translits.translit(text, lang)
            return {'value':word, 'text':text_val,
                    'lemma':translits.translit(lemma, lang) if lemma.strip()!='' else text_val}
        except ValueError:
            print(f'*** value error: text: {text}, lemma: {lemma}')
    return {'value':word, 'text':text, 'lemma':lemma}
Exemplo n.º 6
0
def build_omw(lookups_nor, lookups_tra):
    import pandas as pd
    data_path = f'{cf.conf_dir}/ai/nltk/data/wikt/wn-wikt-kor.tab'
    df = pd.read_csv(data_path, sep='\t')
    for index, row in df.iterrows():
        refid = row['# Wiktionary']
        kw = row['http://wiktionary.org/']
        offset, pos = refid.split('-')
        syn = wn.synset_from_pos_and_offset(pos, int(offset))
        if kw not in lookups_nor:
            lookups_nor[kw] = {syn.name()}
            lookups_tra[translits.translit(kw, 'ko')] = {syn.name()}
        else:
            lookups_nor[kw].add(syn.name())
            lookups_tra[translits.translit(kw, 'ko')].add(syn.name())
Exemplo n.º 7
0
 def search_in(items, phrase):
     for item in items:
         if words in item[0]:
             print(item)
             # print(f"{l[1]} -> {l[0]}")
             print(translits.translit(item[1], lang))
     print('.. done.')
Exemplo n.º 8
0
def get_verb_interr(c: DomainToken, part: Text):
    from sagas.nlu.inspectors_dataset import get_interrogative
    from sagas.nlu.transliterations import translits
    word = translits.translit(c.text.split('/')[0], 'ko')
    rep = get_interrogative(word, 'ko')
    if rep:
        return 4, f"interr_root('{rep}')"
    else:
        return 4, "interr_root('??')"
Exemplo n.º 9
0
def ex_translit(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf):
    from sagas.nlu.transliterations import translits
    if translits.is_available_lang(ctx.lang):
        tval = translits.translit(cnt, ctx.lang)
        # tval=tval.replace('[UNK]', '').strip()
        ctx.add_result(extractor, comp, key, tval)
    else:
        ctx.add_result(extractor, comp, key, cnt)
    return True
Exemplo n.º 10
0
    def __call__(self, sents:Text):
        from sagas.nlu.stanza_helper import get_nlp
        from sagas.nlu.transliterations import translits

        preprocs={'sr':lambda : translits.translit(sents, self.lang),
                  }
        nlp = get_nlp(self.lang)
        if self.lang in preprocs:
            sents=preprocs[self.lang]()
        doc = nlp(sents)
        return StanzaSentImpl(doc.sentences[0], text=sents)
Exemplo n.º 11
0
def get_contrast(word:Text, source:Text, target:Text='en', ips_idx=0):
    from sagas.nlu.transliterations import translits
    from sagas.nlu.constants import contrast_translit_langs

    if source==target:
        return word

    options = {'get_pronounce', 'disable_correct'}
    local_translit = True if source in contrast_translit_langs else False
    res, t = translate_try(word, source=source, target=target, options=options)
    if local_translit and translits.is_available_lang(source):
        trans = ', ' + translits.translit(word, source)
    else:
        trans = marks(t, ips_idx)
    return res+trans
Exemplo n.º 12
0
    def run(self, key, ctx:Context):
        from sagas.nlu.inspectors_dataset import negative_maps
        from sagas.nlu.inspectors_dataset import translit_langs
        from sagas.nlu.transliterations import translits

        lang=ctx.meta['lang']
        if lang in negative_maps:
            data_map=negative_maps[lang]
            if lang in translit_langs:
                word_val=translits.translit(ctx.words[key], lang)
            else:
                word_val=ctx.lemmas[key]
            if ctx.chunk_contains(key, data_map) or word_val in data_map:
                return True
        return False
Exemplo n.º 13
0
    def translit(self, word):
        """
        $ python -m sagas.ko.ko_helper translit '피자와 스파게티가'

        See also: procs-ko-konlpy.ipynb
        :param word:
        :return:
        """
        from sagas.nlu.transliterations import translits
        for w, p in self.mecab.pos(word):
            expl = '_'
            if p in ('NNG', 'VV'):
                ws = get_word_sets(w, 'ko')
                if ws:
                    expl = f"{ws['name']}({ws['definition']})"
            print(w, translits.translit(w, 'ko'), p, expl)
Exemplo n.º 14
0
    def trans_en_to(self, text, lang, translit_targets=None, said=True):
        import clipboard
        from sagas.nlu.transliterations import translits

        source = 'en'
        targets = f'fr;zh-CN;{lang}'
        says = lang
        # details=False

        ctx = TransContext(source, targets, text, says, '')
        ctx.sents_map[source[:2]] = text
        succ = self.translators[self.translator](ctx)
        if not succ:
            return

        # addons, result = self.parse_chunks(text, source, targets, ctx, details=details)
        addons = []
        # result = '\n\t'.join([text] + ctx.target_sents)
        lines = []
        lines.append(f'\t.sent({source}="{text}"')
        suffix = ") \\"
        # other appendants like: ctx.target_sents.append(f'v{i}="{ps}"')

        # add translits
        if translit_targets is not None:
            for i, translit in enumerate(translit_targets):
                ps = translits.translit(ctx.sents_map[translit], translit)
                ctx.target_sents.append(f't{i}="{ps}"')

        result = ', \n\t      '.join(lines + ctx.target_sents + [suffix])
        print(result)

        clipboard.copy(result)

        if said:
            from sagas.nlu.nlu_tools import NluTools
            NluTools().say(ctx.sents_map[says], says)
Exemplo n.º 15
0
def get_word_map(source, target, text, ips_idx=0, words=None, local_translit=False):
    """
    Example 1:
    from sagas.nlu.corenlp_helper import CoreNlp, CoreNlpViz, get_nlp
    ana=lambda sents: CoreNlpViz().analyse(sents, get_nlp('hi'), get_word_map('hi','en', sents)[0])
    ana('मेरे पास दो रेफ्रिजरेटर हैं')

    Example 2:
    get_word_map('hi','en', 'मेरे पास दो रेफ्रिजरेटर')[0]

    :param source:
    :param target:
    :param text:
    :param ips_idx:
    :return:
    """
    from sagas.nlu.transliterations import translits

    rs = {}
    verbose = False
    options = {'get_pronounce', 'disable_correct'}
    if words is None:
        words=text.split(' ')

    trans_table=[]
    for sent in words:
        res, t = translate_try(sent, source=source, target=target, options=options)
        # print(res, sent, t[ips_idx])
        if local_translit and translits.is_available_lang(source):
            trans=', '+translits.translit(sent, source)
        else:
            trans=marks(t, ips_idx)
        rs[sent] = '%s\n(%s%s)' % (sent, res, trans)
        res_r=f"({res})" if res!='' and res not in ('(', ')', '[', ']', '/') else ''
        trans_table.append(f"{trans[2:]}{res_r}")
    return rs, trans_table
Exemplo n.º 16
0
def trans_val(cnt, lang):
    from sagas.nlu.transliterations import translits
    if lang in translit_langs:
        # index 0 is word, 1 is lemma
        return translits.translit(cnt.split('/')[0], lang)
    return cnt.split('/')[-1].lower()
Exemplo n.º 17
0
 def translit_text(text, lang) -> Text:
     from sagas.nlu.transliterations import translits
     if translits.is_available_lang(lang):
         return translits.translit(text, lang)
     return text
Exemplo n.º 18
0
from typing import Text, Any, Dict, List, Union
from sagas.nlu.transliterations import translits
from sagas.nlu.translator import translate, with_words, WordsObserver

import logging
logger = logging.getLogger(__name__)

tr = lambda w: translits.translit(w, 'hi')


def trans(w, pos: Text):
    r, t = translate(w,
                     source='hi',
                     target='en',
                     options={'get_pronounce'},
                     tracker=with_words())
    # df=t.observer(WordsObserver).word_trans_df
    # if df is not None:
    #     candidates=[w for w in df['word']][:3]
    # else:
    #     candidates=[]
    obs: WordsObserver = t.observer(WordsObserver)
    candidates = obs.get_candidates(pos)
    return {'word': obs.get_axis(r.lower(), pos), 'candidates': candidates}


def word_map(id: int, all_ws: List[Any], pos: Text) -> Dict[Text, Any]:
    w = next((w for w in all_ws if w.synset_id() == id), None)
    if w:
        return {
            'synset': tr(w.head_word()),
Exemplo n.º 19
0
def translit_chunk(chunk, lang):
    from sagas.nlu.transliterations import translits
    # if lang in ('ko', 'ja', 'fa', 'hi', 'ar'):
    if translits.is_available_lang(lang):
        return '/' + translits.translit(chunk, lang)
    return ''