Python analyze_morph示例，training_data_generator.scripts.analyzer.analyze_morph Python示例

示例#1

0

显示文件

文件： language_understanding.py 项目： shiratsu/HotPepperGourmetDialogue

    def execute(self, sent):
        features = sent2features_(sent)
        # print("----------features_features--------------")
        # print(features)
        act_type = self.__predictor.predict([features])
        # print("----------act_type--------------")
        # print(act_type)

        surfaces, features = analyze_morph(sent)

        # print("----------surfaces,features--------------")
        # print(features)
        # print(surfaces)

        morphed_sent = [[surfaces[i]] + features[i].split(',')
                        for i in range(len(surfaces))]
        features = sent2features(morphed_sent)

        print("----------morphed_sent,features--------------")
        print(morphed_sent)
        print(features)

        named_entity = self.__extractor.extract(features, morphed_sent)

        print("----------named_entity--------------")
        print(named_entity)

        dialogue_act = {'user_act_type': act_type}
        if act_type != 'other':
            dialogue_act.update(dict(named_entity))
        #
        # print("----------dialogue_act--------------")
        # print(dialogue_act)

        return dialogue_act

示例#2

0

显示文件

文件： language_understanding.py 项目： maeharin/HotPepperGourmetDialogue

    def execute(self, sent):
        # 対話行為タイプの推定開始
        # 4タイプ
        # - genre: イタリアンとか
        # - location: 新宿とか
        # - money: 1万円とか
        # - other: その他
        features = sent2features_(sent)
        #print("送信されたテキストから変換したfeatures:", features)
        act_type = self.__predictor.predict([features])
        print("featureから予測したact_type:", act_type)
        # 対話行為タイプの推定完了

        # 属性抽出
        # ジャンルに対して文字を抽出。イタリアンとか中華とか

        surfaces, features = analyze_morph(sent)
        print("surfaces:", surfaces)
        print("features:", features)
        morphed_sent = [[surfaces[i]] + features[i].split(',')
                        for i in range(len(surfaces))]
        print("morphed_sent", morphed_sent)
        features = sent2features(morphed_sent)
        named_entity = self.__extractor.extract(features, morphed_sent)

        dialogue_act = {'user_act_type': act_type}
        # ここで属性を追加
        dialogue_act.update(dict(named_entity))

        return dialogue_act

示例#3

0

显示文件

def sent2features_(sent):
    from training_data_generator.scripts.analyzer import analyze_morph
    dic_path = os.path.join(os.path.dirname(__file__), 'dic.txt')
    surfaces, _ = analyze_morph(sent)
    dictionary = corpora.Dictionary.load_from_text(dic_path)
    features = to_features(dictionary, surfaces)

    return features

示例#4

0

显示文件

文件： predictor.py 项目： AsaKyo/HotPepperGourmetDialogue

def sent2features_(sent):
    from training_data_generator.scripts.analyzer import analyze_morph
    dic_path = os.path.join(os.path.dirname(__file__), 'dic.txt')
    surfaces, _ = analyze_morph(sent)
    dictionary = corpora.Dictionary.load_from_text(dic_path)
    features = to_features(dictionary, surfaces)

    return features

示例#5

0

显示文件

def sent2features_(sent):
    from training_data_generator.scripts.analyzer import analyze_morph
    dic_path = os.path.join(os.path.dirname(__file__), 'dic.txt')
    # mecabで形態素解析して表層形を取得
    surfaces, _ = analyze_morph(sent)
    print("surfaces:", surfaces)
    # コーパス
    dictionary = corpora.Dictionary.load_from_text(dic_path)
    print("dictionary: ", dictionary)
    features = to_features(dictionary, surfaces)

    return features

示例#6

0

显示文件

文件： language_understanding.py 项目： AsaKyo/HotPepperGourmetDialogue

    def execute(self, sent):
        features = sent2features_(sent)
        act_type = self.__predictor.predict([features])

        surfaces, features = analyze_morph(sent)
        morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))]
        features = sent2features(morphed_sent)
        named_entity = self.__extractor.extract(features, morphed_sent)

        dialogue_act = {'user_act_type': act_type}
        dialogue_act.update(dict(named_entity))

        return dialogue_act

示例#7

0

显示文件

文件： language_understanding.py 项目： kurarrr/chatbot

    def execute(self, sent):
        features = sent2features_(sent)
        act_type = self.__predictor.predict([features])

        surfaces, features = analyze_morph(sent)
        morphed_sent = [[surfaces[i]] + features[i].split(',')
                        for i in range(len(surfaces))]
        features = sent2features(morphed_sent)
        named_entity = self.__extractor.extract(features, morphed_sent)

        dialogue_act = {'user_act_type': act_type}
        dialogue_act.update(dict(named_entity))

        return dialogue_act

示例#8

0

显示文件

文件： matching.py 项目： AsaKyo/HotPepperGourmetDialogue

def matching(sentence, ne_list):

    def get_word_pos_list(sentence, ne_list):
        word_pos_list = []
        searched_pos = 0
        for nw, ne in ne_list:
            nw = ''.join(nw.split(' '))
            idx = sentence.index(nw, searched_pos)
            searched_pos = idx + len(nw)
            word_pos_list.append((idx, searched_pos))

        return word_pos_list

    def get_morph_pos_list(wakati, word_pos_list):
        morph_pos_list = []
        for start_pos, end_pos in word_pos_list:
            ch_cnt = 0
            morph_pos = []
            for i, morph in enumerate(wakati):
                if ch_cnt < start_pos:
                    ch_cnt += len(morph)
                elif ch_cnt < end_pos:
                    morph_pos.append(i)
                    ch_cnt += len(morph)
                else:
                    break
            morph_pos_list.append(morph_pos)

        return morph_pos_list

    def tagging(morph_pos_list, ne_list, ylabel):
        for i in range(len(ne_list)):
            nw, ne = ne_list[i]
            morph_pos = morph_pos_list[i]
            for j, k in enumerate(morph_pos):
                prefix = 'B' if j == 0 else 'I'
                ylabel[k] = prefix + '-' + ne

    wakati, features = analyze_morph(sentence)
    sentence = ''.join(sentence.split(' '))
    word_pos_list = get_word_pos_list(sentence, ne_list)
    morph_pos_list = get_morph_pos_list(wakati, word_pos_list)

    ylabel = ['O'] * len(wakati)

    tagging(morph_pos_list, ne_list, ylabel)

    labeled_sent = []
    for i in range(len(wakati)):
        tmp = []
        tmp.append(wakati[i])
        tmp.extend(features[i].split(','))
        tmp.append(ylabel[i])
        labeled_sent.append(tmp)

    # Error
    for word_pos, morph_pos in zip(word_pos_list, morph_pos_list):
        start_pos, end_pos = word_pos
        word1, word2 = sentence[start_pos: end_pos], ''.join([wakati[i] for i in morph_pos])
        if word1 != word2:
            print(word1, word2)

    return labeled_sent

示例#9

0

显示文件

文件： matching.py 项目： kurarrr/chatbot

def matching(sentence, ne_list):
    def get_word_pos_list(sentence, ne_list):
        word_pos_list = []
        searched_pos = 0
        for nw, ne in ne_list:
            nw = ''.join(nw.split(' '))
            idx = sentence.index(nw, searched_pos)
            searched_pos = idx + len(nw)
            word_pos_list.append((idx, searched_pos))

        return word_pos_list

    def get_morph_pos_list(wakati, word_pos_list):
        morph_pos_list = []
        for start_pos, end_pos in word_pos_list:
            ch_cnt = 0
            morph_pos = []
            for i, morph in enumerate(wakati):
                if ch_cnt < start_pos:
                    ch_cnt += len(morph)
                elif ch_cnt < end_pos:
                    morph_pos.append(i)
                    ch_cnt += len(morph)
                else:
                    break
            morph_pos_list.append(morph_pos)

        return morph_pos_list

    def tagging(morph_pos_list, ne_list, ylabel):
        for i in range(len(ne_list)):
            nw, ne = ne_list[i]
            morph_pos = morph_pos_list[i]
            for j, k in enumerate(morph_pos):
                prefix = 'B' if j == 0 else 'I'
                ylabel[k] = prefix + '-' + ne

    wakati, features = analyze_morph(sentence)
    sentence = ''.join(sentence.split(' '))
    word_pos_list = get_word_pos_list(sentence, ne_list)
    morph_pos_list = get_morph_pos_list(wakati, word_pos_list)

    ylabel = ['O'] * len(wakati)

    tagging(morph_pos_list, ne_list, ylabel)

    labeled_sent = []
    for i in range(len(wakati)):
        tmp = []
        tmp.append(wakati[i])
        tmp.extend(features[i].split(','))
        tmp.append(ylabel[i])
        labeled_sent.append(tmp)

    # Error
    for word_pos, morph_pos in zip(word_pos_list, morph_pos_list):
        start_pos, end_pos = word_pos
        word1, word2 = sentence[start_pos:end_pos], ''.join(
            [wakati[i] for i in morph_pos])
        if word1 != word2:
            print(word1, word2)

    return labeled_sent