Exemplo n.º 1
0
    def __init__(self, lang='en'):
        from sagas.nlu.corenlp_helper import get_nlp

        self.lang = lang
        # spacy_model=lang_spacy_mappings[lang][0]
        # self.spacy_nlp=spacy.load(spacy_model)
        self.core_nlp = get_nlp(lang)
Exemplo n.º 2
0
def handle_digest():
    """
    $ curl -XPOST -H 'Content-Type: application/json' -d '{"sents":"Die Eltern mögen den Käse."}'  http://localhost:8090/digest
    will get: {"root": "mögen", "verbs": [["mögen", "mögen"]]}
    :return:
    """
    from sagas.nlu.corenlp_helper import get_nlp

    # print ("request is json?", request.is_json)
    content = request.get_json()
    sents = content['sents']
    lang = content['lang']
    print(lang, sents)
    # dumped = json_to_string({"ok": 'JSON posted'})
    nlp = get_nlp(lang)
    doc = nlp(sents)
    root, root_idx = get_doc_root_and_idx(doc)

    # subj(nsubj), obj(iobj, dobj, pobj)
    # pobj : object of a preposition,介词的宾语
    # obl类似pobj, obl关系用于名义(名词,代词,名词短语),作为非核心(倾斜)参数或附件。
    # 这意味着它在功能上对应于附加在动词,形容词或其他副词上的状语。
    rs = get_root_rel(doc, root_idx, ['subj', 'obj', 'cop', 'obl'])
    data = {'lang': lang, 'root': root, 'verbs': get_doc_verbs(doc)}
    for el in rs:
        data[el[0]] = el[1]
    # data_y=yaml.dump(data, default_flow_style=True,Dumper=KludgeDumper,encoding=None)
    data_y = json.dumps(data, ensure_ascii=False)
    return data_y
Exemplo n.º 3
0
def words_table(sents, lang):
    from sagas.nlu.corenlp_helper import get_nlp
    nlp = get_nlp(lang)
    doc = nlp(sents)
    sentence = doc.sentences[0]
    rows = [[
        word.text, word.lemma, word.upos, word.xpos, word.dependency_relation,
        word.governor, word.feats
    ] for word in sentence.words]
    return sagas.to_df(
        rows, ['text', 'lemma', 'upos', 'xpos', 'dep', 'head', 'feats'])
Exemplo n.º 4
0
def viz(sents, lang='fr'):
    """
    viz("Tu as choisi laquelle tu vas manger ?")
    viz('I am a student', 'en')
    viz('彼らは3月に訪ねて来ます。', 'ja')
    :param sents:
    :param lang:
    :return:
    """
    nlp = get_nlp(lang)
    doc = nlp(sents)
    cv = EnhancedViz(shape='egg', size='8,5', fontsize=20)
    sentence = doc.sentences[0]
    return cv.analyse_doc(sentence, None)
Exemplo n.º 5
0
    def parse(self, sents, lang='en'):
        """
        $ python -m sagas.nlu.corenlp_helper parse 'Barack Obama was born in Hawaii.' en
        $ python -m sagas.nlu.corenlp_helper parse 'Θα το θέλατε με ρύζι;' el

        $ parse 'Die weiße Fläche ist aus dem All sichtbar.' de
        $ parse '私は高校生です。' ja
        $ parse 'Yo tengo una casa en México.' es

        :param sents:
        :param lang:
        :return:
        """
        # routine=langs[lang]
        analyse(sents, get_nlp(lang))
Exemplo n.º 6
0
    def verb_domains(self, sents, lang='en'):
        """
        $ python -m sagas.nlu.uni_parser verb_domains "Barack Obama was born in Hawaii." en
        # 我有一只阿比西尼亚猫
        $ python -m sagas.nlu.uni_parser verb_domains "I have an Abyssinian cat." en

        $ python -m sagas.nlu.uni_parser verb_domains 'Что ты обычно ешь на ужин?' ru
        $ python -m sagas.nlu.uni_parser verb_domains 'Die Zeitschrift erscheint monatlich.' de

        # 测试多个动词(过滤掉从句的动词):
        $ python -m sagas.nlu.uni_parser verb_domains 'Tu as choisi laquelle tu vas manger ?' fr
        :param sents:
        :param lang:
        :return:
        """
        from sagas.nlu.corenlp_helper import get_nlp
        serial_numbers = '❶❷❸❹❺❻❼❽❾❿'
        nlp = get_nlp(lang)
        doc = nlp(sents)
        # 分析依赖关系, 自下而上, 可用于抽取指定关系的子节点集合, 比如此例中的'nsubj:pass'和'obl'
        # word.governor即为当前word的parent
        sent = doc.sentences[0]
        rs = get_verb_domain(sent)
        # r=rs[0]
        for num, r in enumerate(rs):
            # print(json.dumps(r, indent=2, ensure_ascii=False))
            print(serial_numbers[num], '-' * 50)
            # print(r['verb'], r['index'])
            print(r['word'], r['index'])
            # df=sagas.to_df(r[0]['domains'], ['rel', 'index', 'text', 'children'])
            df = sagas.to_df(
                r['domains'],
                ['rel', 'index', 'text', 'lemma', 'children', 'features'])
            sagas.print_df(df)
            for stem in r['stems']:
                if stem[0] == 'obj':
                    print('object ->', ' '.join(stem[1]))
Exemplo n.º 7
0
 def __call__(self, sents):
     from sagas.nlu.corenlp_helper import get_nlp
     nlp = get_nlp(self.lang)
     doc = nlp(sents)
     return CoreNlpSentImpl(doc.sentences[0], text=sents)