def langspec_id(self, sents, engine='corenlp'): """ $ python -m sagas.nlu.rules_lang_spec langspec_id 'Berapa umur kamu?' :param sents: :param engine: :return: """ from sagas.nlu.uni_remote import dep_parse from sagas.nlu.uni_parser import get_chunks from sagas.nlu.rules_lang_spec_de import Rules_de from sagas.nlu.rules_lang_spec_id import Rules_id pipelines = ['predicts'] lang='id' doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines) rs = get_chunks(doc_jsonify) # rs_repr(rs, data={'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines}) data = {'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines} for serial, r in enumerate(rs): common = {'lemma': r['lemma'], 'word': r['word'], 'stems': r['stems']} meta = {'rel': r['rel'], **common, **data} c=Rules_id(meta, r['domains'], doc=doc_jsonify) c.root_rules() c.execute()
def viz_sample(lang, sents, engine='corenlp', translit_lang=None, enable_contrast=False): """ >>> from sagas.nlu.uni_remote_viz import viz_sample >>> sents='what time is it ?' >>> viz_sample('en', sents) en="I have to turn off the lights in the room." zh="我必须关掉房间里的灯。" ja="部屋の明かりを消さなければなりません。" viz_sample('en', en) :param lang: :param sents: :param engine: :return: """ # uni=UniCli() # doc=uni.parsers[engine](lang, sents) from sagas.nlu.uni_remote import dep_parse doc_jsonify, resp = dep_parse(sents, lang, engine, ['predicts']) if doc_jsonify is None: raise Exception(f'Cannot parse sentence for lang {lang}') display_root_predicate(doc_jsonify, resp) list_chunks(doc_jsonify, resp, lang, enable_contrast=enable_contrast) return display_doc_deps(doc_jsonify, resp, translit_lang=translit_lang)
def build_anal_tree(sents: Text, lang: Text, engine: Text, nodecls=None, docimpl=None): """ >>> from sagas.nlu.anal import build_anal_tree >>> from anytree.search import findall, findall_by_attr >>> f=build_anal_tree(sents, lang, engine) >>> words = findall_by_attr(f, name='upos', value='VERB') >>> objs = findall(words[0], filter_=lambda n: n.dependency_relation in ("obj")) :param sents: :param lang: :param engine: :return: """ from sagas.nlu.uni_remote import dep_parse from sagas.nlu.utils import fix_sents sents = fix_sents(sents, lang) # doc is SentenceIntf doc, resp = dep_parse(sents, lang=lang, engine=engine, pipelines=['predicts'], doc_impl=docimpl) predicts = resp['predicts'] if resp and 'predicts' in resp else [] return from_doc(doc, lang, engine, nodecls, predicts)
def parse_comps(sents, source): sents = fix_sents(sents, source) engine = cf.engine(source) doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts']) if len(resp['predicts']) > 0: rs = resp['predicts'] else: rs = get_chunks(doc_jsonify) return rs
def get_domains(sents, lang, engine='corenlp', options=None): """ >>> from sagas.nlu.legacy.aiobj_kit import get_domains >>> get_domains('你有几台笔记本电脑?', 'zh', 'ltp') >>> get_domains('列出上周编辑的文件。', 'zh', 'ltp', DomainGetOptions(enable_predicts=True)) :param sents: :param lang: :param engine: :param options: :return: """ # from IPython.display import display if options is None: options=DomainGetOptions() pipelines=['predicts'] if options.enable_predicts else [] doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines) result_set=[] if doc_jsonify is not None: tc.emp('cyan', resp) if resp is not None and 'predicts' in resp and len(resp['predicts'])>0: rs=resp['predicts'] # print(rs) else: # print(doc_jsonify.words_string()) rs = get_chunks(doc_jsonify) if len(rs)>0: if options.list_chunks: list_rs(rs, lang) if options.deps_graph: # display(display_doc_deps(doc_jsonify, resp)) tc.gv(display_doc_deps(doc_jsonify, resp, translit_lang=lang if lang in ('ja', 'ko', 'zh', 'fa', 'ar', 'he') else None)) # rs_represent(rs, data = {'lang': lang, "sents": sents, 'engine': engine, # 'pipelines':pipelines}) data = {'lang': lang, "sents": sents, 'engine': engine, 'pipelines':pipelines} for r in rs: # fixture.print_table(r, False) # print(f"lemma: {r['lemma']}") # df = sagas.to_df(r['domains'], ['rel', 'index', 'text', 'lemma', 'children', 'features']) # display(df) domains = r['domains'] common = {'lemma': r['lemma'], 'word': r['word'], 'stems': r['stems']} meta = {'rel': r['rel'], **common, **data} result_set.append((domains, meta)) else: tc.emp('red', '.. no found predefined chunk-patterns.') tc.info(doc_jsonify.words_string()) tc.info(doc_jsonify.dependencies_string()) return result_set
def exec_rules(self, sents, lang='en', engine='corenlp'): """ $ python -m sagas.tool.misc exec_rules "今何時ですか?" ja $ python -m sagas.tool.misc exec_rules "今何時ですか?" ja knp $ python -m sagas.tool.misc exec_rules "望遠鏡で泳いでいる少女を見た。" ja knp $ python -m sagas.tool.misc exec_rules 'Мы написали три книги за год.' ru $ python -m sagas.tool.misc exec_rules "现在是几点?" zh ltp $ rules '我在臺灣開計程車。' zh $ rules '我在台湾开出租车。' zh ltp $ rules "吸烟对你的健康有害。" zh ltp $ rules 'Tini berumur sepuluh tahun.' id $ rules 'Berapa umur kamu?' id (因为找不到预定义的chunks模式, 所以会输出所有单词和依赖关系) :param sents: :param lang: :param engine: :return: """ from sagas.nlu.uni_parser import get_chunks from sagas.nlu.uni_remote import dep_parse pipelines = ['predicts'] doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines) if doc_jsonify is not None: color_print('cyan', resp) if len(resp['predicts']) > 0: rs_represent(resp['predicts'], data={ 'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines }) else: rs = get_chunks(doc_jsonify) if len(rs) > 0: # rs_summary(rs) rs_represent(rs, data={ 'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines }) else: color_print('red', '.. no found predefined chunk-patterns.') print(doc_jsonify.words_string()) print(doc_jsonify.dependencies_string())
def parse_deps(text, lang, translit=None): text = fix_sents(text, lang) engine = cf.engine(lang) # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True) doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts']) if doc_jsonify is not None: list_chunks(doc_jsonify, resp, lang, enable_contrast=True) g = display_doc_deps(doc_jsonify, resp, translit_lang=lang) st.graphviz_chart(g) if translit is not None: st.text(f"♤ {translit}") words = [word.text for word in doc_jsonify.words] tools.contrast(text, lang, word_map=words)
def clip_parse(self, source, sents='', specified='default', do_test=False): """ >> clip text: یک آبجو مى خواهم. $ nlu clip_parse fa $ engine='stanford' nluc ar $ nlu clip_parse fi 'Tuolla ylhäällä asuu vanha nainen.' $ nluc nl 'De vrouw heeft verschillende appels.' $ nluc id 'Ini adalah judul buku yang saya baca.' aux $ nluc fi 'Voiko täältä lainata aurinkovarjoa?' default True :param source: :return: """ from sagas.nlu.uni_remote import dep_parse from sagas.nlu.common import get_from_clip from sagas.conf.conf import cf from sagas.nlu.uni_remote_viz import list_chunks from sagas.nlu.utils import fix_sents if sents=='': sents = get_from_clip() if sents.strip()=='': tc.info('no text avaliable in clipboard.') return sents=fix_sents(sents, source) tc.info(sents) # Parse the sentence and display it's chunks, domains and contrast translations. engine=cf.engine(source) doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts']) if doc_jsonify is None: raise Exception(f'Cannot parse sentence for lang {source}') list_chunks(doc_jsonify, resp, source, enable_contrast=True, specified=None if specified=='default' else specified) words = [word.text for word in doc_jsonify.words] self.contrast(sents, source, word_map=words) ## visual tree self.main_domains(sents, source, engine, False) ## add rulesets procs from sagas.nlu.inferencer import do_infers cli_cmd, pats = do_infers(sents, source) if do_test: for pat in pats: self.check_rule(sents, source, pat)
def row_view(row): text = row[1] if display_translit and len(row) > 2: label = row[2] else: label = text if st.button(f"{label} ✁ {row[0]}"): text = fix_sents(text, lang) engine = get_engine(lang) # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True) doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts']) if doc_jsonify is not None: list_chunks(doc_jsonify, resp, lang, enable_contrast=True) g=display_doc_deps(doc_jsonify, resp, translit_lang=lang) st.graphviz_chart(g) if len(row) > 2: st.text(f"♤ {row[2]}") words = [word.text for word in doc_jsonify.words] tools.contrast(text, lang, word_map=words)
def sents_summary(sents, source): from sagas.nlu.uni_remote import dep_parse from sagas.nlu.uni_remote_viz import list_contrast from sagas.conf.conf import cf from sagas.nlu.utils import fix_sents from sagas.nlu.uni_parser import get_chunks sents=fix_sents(sents, source) engine=cf.engine(source) doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts']) types=[] if doc_jsonify is None: raise Exception(f'Cannot parse sentence for lang {source}') if len(resp['predicts']) > 0: rs=resp['predicts'] else: rs = get_chunks(doc_jsonify) for serial, r in enumerate(rs): print(f"{serial}. {r['type']} -> {r['word']}") types.append(f"{source}:{r['type']}") list_contrast(rs, source) return types
def dep_parse(self, sents, lang='en', engine='corenlp'): """ $ python -m sagas.tool.misc dep_parse 'Мы написали три книги за год.' ru $ python -m sagas.tool.misc dep_parse "今何時ですか?" ja $ python -m sagas.tool.misc dep_parse "今何時ですか?" ja knp $ python -m sagas.tool.misc dep_parse "私の趣味は、多くの小旅行をすることです。" ja knp $ python -m sagas.tool.misc dep_parse "自由を手に入れる" ja $ python -m sagas.tool.misc dep_parse "现在是几点?" zh ltp :param sents: :param lang: :param engine: :return: """ from sagas.nlu.uni_jsonifier import rs_summary from sagas.nlu.uni_parser import get_chunks from sagas.nlu.uni_remote import dep_parse doc_jsonify, resp = dep_parse(sents, lang, engine, ['predicts']) rs = get_chunks(doc_jsonify) rs_summary(rs) print('-' * 25, 'predicts') pprint(resp) print('-' * 25, 'doc') pprint(doc_jsonify.as_json)
def predict(self, data: Dict[Text, Any], rule_str: Text, name='_none_', engine=None, graph=False, operator=all) -> bool: """ >>> from sagas.tool.dynamic_rules import DynamicRules >>> data = {'lang': 'ja', "sents": '彼のパソコンは便利じゃない。'} >>> DynamicRules().predict(data, "subj('adj',ガ=kindof('artifact', 'n'))", engine='knp') :param data: :param rule_str: :param name: :param engine: :return: """ import sagas.tracker_fn as tc from sagas.kit.analysis_kit import AnalysisKit # ft=InspectorFixture() # domains, meta=ft.request_domains(data, engine=engine) if engine is None: engine = cf.engine(data['lang']) pipelines = ['predicts'] tc.emp('magenta', f"({data['lang']}) {data['sents']}") doc_jsonify, resp = dep_parse(data['sents'], data['lang'], engine, pipelines) if doc_jsonify is not None: if len(resp['predicts']) > 0: domains_set = resp['predicts'] else: domains_set = get_chunks(doc_jsonify) if graph: AnalysisKit().console_vis(data['sents'], data['lang']) check_r = [] for r in domains_set: domains = r['domains'] meta = build_meta(r, data) print(r['type'], meta['index'], meta['word'], meta['lemma'], list(meta.keys())) position = doc_jsonify.get_position(meta['index']) pprint(domains) # agency = ['c_pron', 'c_noun'] pat = lambda p, name='': Patterns( domains, meta, p, name=name, doc=doc_jsonify) # rs = interp(f"[Patterns(domains, meta, {self.priority}, name='{name}').{rule_str}]", if rule_str.startswith('pat('): pattern_text = f"[{rule_str}]" else: pattern_text = f"[pat({self.priority}, name='{name}').{rule_str}]" rs = interp(pattern_text, domains, meta, pat) print_result(rs) # collect matched context's results # r[1] is true/false, r[3] is context results = [el for r in rs for el in r[3].results if r[1]] # r[2] is priority succ = [abs(r[2]) for r in rs if r[1]] priority = max(succ) if len(succ) > 0 else 0 self.priority_list.append(priority) self.result_set.extend(results) self.rasa_ents.append({ 'confidence': None, 'start': position[0], 'end': position[1], 'entity': r['type'], 'extractor': 'ruleset', 'value': f"{meta['word']}/{meta['lemma']}", 'additional_info': results, }) check_r.append(operator([r[1] for r in rs])) return operator(check_r) return False