示例#1
0
def vis_doc(sents, lang):
    from sagas.nlu.ruleset_procs import cached_chunks
    from sagas.nlu.uni_remote_viz import list_contrast, display_doc_deps
    from sagas.conf.conf import cf

    chunks = cached_chunks(sents, lang, cf.engine(lang))
    return display_doc_deps(chunks['doc'], None)
示例#2
0
def get_domains(sents, lang, engine='corenlp', options=None):
    """
    >>> from sagas.nlu.legacy.aiobj_kit import get_domains
    >>> get_domains('你有几台笔记本电脑?', 'zh', 'ltp')
    >>> get_domains('列出上周编辑的文件。', 'zh', 'ltp', DomainGetOptions(enable_predicts=True))

    :param sents:
    :param lang:
    :param engine:
    :param options:
    :return:
    """
    # from IPython.display import display

    if options is None:
        options=DomainGetOptions()
    pipelines=['predicts'] if options.enable_predicts else []
    doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines)
    result_set=[]
    if doc_jsonify is not None:
        tc.emp('cyan', resp)
        if resp is not None and 'predicts' in resp and len(resp['predicts'])>0:
            rs=resp['predicts']
            # print(rs)
        else:
            # print(doc_jsonify.words_string())
            rs = get_chunks(doc_jsonify)
        if len(rs)>0:
            if options.list_chunks:
                list_rs(rs, lang)
            if options.deps_graph:
                # display(display_doc_deps(doc_jsonify, resp))
                tc.gv(display_doc_deps(doc_jsonify, resp,
                                       translit_lang=lang if lang in ('ja', 'ko', 'zh', 'fa', 'ar', 'he') else None))
            # rs_represent(rs, data = {'lang': lang, "sents": sents, 'engine': engine,
            #                         'pipelines':pipelines})
            data = {'lang': lang, "sents": sents, 'engine': engine,
                                     'pipelines':pipelines}
            for r in rs:
                # fixture.print_table(r, False)
                # print(f"lemma: {r['lemma']}")
                # df = sagas.to_df(r['domains'], ['rel', 'index', 'text', 'lemma', 'children', 'features'])
                # display(df)
                domains = r['domains']
                common = {'lemma': r['lemma'], 'word': r['word'],
                          'stems': r['stems']}
                meta = {'rel': r['rel'], **common, **data}
                result_set.append((domains, meta))
        else:
            tc.emp('red', '.. no found predefined chunk-patterns.')
            tc.info(doc_jsonify.words_string())
            tc.info(doc_jsonify.dependencies_string())
    return result_set
示例#3
0
def parse_subj():
    lang = 'ru'
    sents = 'Яблоко - это здоровый фрукт.'
    st.write(sents)
    data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
    doc_jsonify, resp = parse_sents(data)

    domains = get_subj_domain(doc_jsonify)
    testing(domains, 'ru')

    gv = display_doc_deps(doc_jsonify, resp)
    st.graphviz_chart(gv)

    st.write(domains)
示例#4
0
def parse_aux():
    lang = 'en'
    sents = 'what will be the weather in three days?'
    st.write(sents)
    data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
    doc_jsonify, resp = parse_sents(data)

    domains = get_aux_domain(doc_jsonify)
    testing(domains, 'en')

    # show analyse graph
    gv = display_doc_deps(doc_jsonify, resp)
    st.graphviz_chart(gv)

    st.write(domains)
示例#5
0
def parse_deps(text, lang, translit=None):
    text = fix_sents(text, lang)
    engine = cf.engine(lang)
    # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True)
    doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts'])
    if doc_jsonify is not None:
        list_chunks(doc_jsonify, resp, lang, enable_contrast=True)
        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)

        st.graphviz_chart(g)
        if translit is not None:
            st.text(f"♤ {translit}")

        words = [word.text for word in doc_jsonify.words]
        tools.contrast(text, lang, word_map=words)
示例#6
0
    def print_sents(self, sents, lang, engine=None):
        """
        $ python -m sagas.nlu.ruleset_procs print_sents 'I want to play music.' en
        $ python -m sagas.nlu.ruleset_procs print_sents "クモは4つの右の目をしています。" ja corenlp

        :param sents:
        :param lang:
        :return:
        """
        # lang = 'en'
        if engine is None:
            engine = cf.engine(lang)
        data = {'lang': lang, "sents": sents, 'engine': engine}
        doc_jsonify, resp = parse_sents(data)
        rs = get_chunks(doc_jsonify)

        if lang in non_spaces:
            delim = ''
        else:
            delim = ' '
        for serial, r in enumerate(rs):
            meta = build_meta(r, data)
            domains = r['domains']
            # print([(x[0], x[2]) for x in domains])
            #
            keys = {x[0] for x in domains}
            grp = lambda p, idx: [x[idx] for x in domains if x[0] == p]
            tokens = {x: grp(x, 2) for x in keys}
            words = {x: delim.join(grp(x, 2)) for x in keys}
            lemmas = {x: delim.join(grp(x, 3)) for x in keys}
            print('meta keys', meta.keys())
            print('tokens', tokens)
            print('words', meta['word'], words)
            print('lemmas', lemmas)
            #
            ctx = Context(meta, domains)
            # print(ctx.lemmas)
            print('chunks', ctx._chunks)

        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)
        print(*[(w.index, w.text, w.governor,
                 doc_jsonify.words[w.governor - 1].text)
                for w in doc_jsonify.words],
              sep='\n')
        tc.gv(g)
示例#7
0
def row_view(row):
    text = row[1]
    if display_translit and len(row) > 2:
        label = row[2]
    else:
        label = text
    if st.button(f"{label} ✁ {row[0]}"):
        text = fix_sents(text, lang)
        engine = get_engine(lang)
        # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True)
        doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts'])
        if doc_jsonify is not None:
            list_chunks(doc_jsonify, resp, lang, enable_contrast=True)
            g=display_doc_deps(doc_jsonify, resp, translit_lang=lang)

            st.graphviz_chart(g)
            if len(row) > 2:
                st.text(f"♤ {row[2]}")

            words = [word.text for word in doc_jsonify.words]
            tools.contrast(text, lang, word_map=words)
示例#8
0
    def verbs(self, sents, lang='en', do_action=False):
        """
        单词的wordnet匹配使用专门的ruleset来评估, 匹配成功的rule会写入状态, 比如spec_xcomp_obj='music',
        此处的music对应knowledgebase的object_type.
        有效成分的单词wordnet引用依次放入ruleset评估, 这样会得到一个状态集, 这个状态集会放入句子结构,
        供sents_ruleset评估.
	    sents_ruleset会收集到多个intents保存到状态中, 遍历intents, 如果intent有action可触发,
	    则触发这个action.

        $ python -m sagas.nlu.ruleset_procs verbs 'I want to play music.' en
            ```
            [{'ref': 'want/want', 'upos': 'verb'},
             {'ref_xcomp': 'play/play', 'upos': 'verb'},
             {'ref_xcomp_obj': 'music/music', 'upos': 'noun'}]
            ```
        $ python -m sagas.nlu.ruleset_procs verbs 'I want to play video.' en
        $ python -m sagas.nlu.ruleset_procs verbs 'I would like to play video.' en
        $ python -m sagas.nlu.ruleset_procs verbs "i'd like to play sound." en
        $ verbs 'I want to play music.' en True

        :param sents:
        :param lang:
        :return:
        """
        import sagas.nlu.ruleset_fixtures as rf

        data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
        doc_jsonify, resp = parse_sents(data)
        v_domains = get_verb_domain(doc_jsonify)
        if self.verbose:
            tc.gv(display_doc_deps(doc_jsonify, resp))
            pprint(v_domains)
            json_utils.write_json_to_file('./out/v_domain.json', v_domains[0])

            # list words
            tc.emp('cyan', f"✁ list words. {'-' * 25}")

        intents = []
        host = create_host()
        for d in v_domains:
            tokens = list_words(d, lang, with_chains=True)
            if self.verbose:
                pprint(tokens)

            # specs evaluate
            tc.emp('cyan', f"✁ specs evaluate. {'-' * 25}")
            r3 = {}
            for token in tokens:
                r3 = host.assert_fact('chains', token)
                pprint(r3)  # the last result is all state
            [r3.pop(key) for key in ['$s', 'id', 'sid']]
            tc.emp('red', f"specs state - {r3}")

            # sents evaluate
            tc.emp('cyan', f"✁ sents evaluate. {'-' * 25}")
            sents_data = {**d, **r3}
            tc.emp('cyan', f"  keys: {', '.join(sents_data.keys())}")
            result = host.assert_fact('sents', sents_data)
            tc.emp('red', f"sents state - {result}")
            if 'intents' in result:
                intents.extend(result['intents'])

        self.process_intents(sents, lang, intents, do_action)