Exemplo n.º 1
0
 def __init__(self, experiment_name, version_name):
     self.experiment_name = experiment_name
     self.version_name = version_name
     self.wordstest_dict = {
         "0": "我去玉龙雪山并且喜欢玉龙雪山玉龙雪山",
         "1": "我在玉龙雪山并且喜欢玉龙雪山",
         "2": "我在九寨沟",
         "3": "你好"
     }  #["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
     self.mf = ModelFactory(match_models=['bow', 'tfidf', 'ngram_tfidf'])
Exemplo n.º 2
0
class TextMatchWrapper(mlflow.pyfunc.PythonModel):
    def __init__(self, experiment_name, version_name):
        self.experiment_name = experiment_name
        self.version_name = version_name
        self.wordstest_dict = {
            "0": "我去玉龙雪山并且喜欢玉龙雪山玉龙雪山",
            "1": "我在玉龙雪山并且喜欢玉龙雪山",
            "2": "我在九寨沟",
            "3": "你好"
        }  #["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
        self.mf = ModelFactory(match_models=['bow', 'tfidf', 'ngram_tfidf'])

    def load_context(self, context):
        # wordstest_dict = context.artifacts["wordstest_dict"]
        self.mf.init(words_dict=self.wordstest_dict, update=True)

    def predict(self, context, model_input):
        #print('model_input>>>', model_input)
        #print('model_input[text]>>>', model_input["text"].values)
        return self.mf.predict(model_input["text"].values[0])
Exemplo n.º 3
0
from keras.callbacks import EarlyStopping

if __name__ == '__main__':
    doc_dict = {
        "0": "我去玉龙雪山并且喜欢玉龙雪山玉龙雪山",
        "1": "我在玉龙雪山并且喜欢玉龙雪山",
        "2": "我在九寨沟",
        "3": "你好"
    }  #["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
    #doc_dict = {"0":"This is the first document.", "1":"This is the second second document.", "2":"And the third one."}
    #query = "This is the second second document."
    query = ["我在九寨沟,很喜欢", "我在九寨沟,很喜欢", "我在九寨沟,很喜欢"]
    train_labels = [1, 1, 1]

    # 基于bow
    mf = ModelFactory(match_models=['bow', 'tfidf'])
    #mf.init(words_dict=doc_dict, update=True)
    mf.init(update=False)
    train_sample = []
    for per_query in query:
        bow_pre = mf.predict_emb(per_query)
        # print ('pre>>>>>', bow_pre)
        per_train_sample = []
        for per_v in bow_pre.values():
            per_train_sample.extend(per_v)
        train_sample.append(per_train_sample)
    #print ('train_sample, train_labels', train_sample, train_labels)
    #print ('train_sample:::::', len(train_sample[0]))
    train_x = np.array(train_sample[:2])
    train_y = train_labels[:2]
    val_x = np.array(train_sample[2:3])
Exemplo n.º 4
0
cd TextMatch
export PYTHONPATH=${PYTHONPATH}:../TextMatch
'''

import sys
from textmatch.models.text_embedding.model_factory_sklearn import ModelFactory


if __name__ == '__main__':
    # doc
    doc_dict = {"0":"我去玉龙雪山并且喜欢玉龙雪山玉龙雪山", "1":"我在玉龙雪山并且喜欢玉龙雪山", "2":"我在九寨沟", "3":"你好"}   
    # query
    query = "我在九寨沟,很喜欢"
    
    # 模型工厂,选择需要的模型加到列表中: 'bow', 'tfidf', 'ngram_tfidf', 'bert', 'albert', 'w2v'
    mf = ModelFactory( match_models=['bow', 'tfidf', 'ngram_tfidf'] )
    # 模型处理初始化
    mf.init(words_dict=doc_dict, update=True)

    # query 与 doc的相似度
    search_res = mf.predict(query)
    print ('search_res>>>>>', search_res) 
    # search_res>>>>> {'bow': [('0', 0.2773500981126146), ('1', 0.5303300858899106), ('2', 0.8660254037844388), ('3', 0.0)], 'tfidf': [('0', 0.2201159065358879), ('1', 0.46476266418455736), ('2', 0.8749225357988296), ('3', 0.0)], 'ngram_tfidf': [('0', 0.035719486884261346), ('1', 0.09654705406841395), ('2', 0.9561288696241232), ('3', 0.0)]}
    
    # query的embedding
    query_emb = mf.predict_emb(query)
    print ('query_emb>>>>>', query_emb) 
    '''
    pre_emb>>>>> {'bow': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'tfidf': array([0.61422608, 0.        , 0.        , 0.4842629 , 0.4842629 ,
       0.        , 0.39205255, 0.        ]), 'ngram_tfidf': array([0.        , 0.        , 0.37156534, 0.37156534, 0.        ,
       0.        , 0.        , 0.29294639, 0.        , 0.37156534,
Exemplo n.º 5
0
import sys
from textmatch.models.text_embedding.model_factory_sklearn import ModelFactory

if __name__ == '__main__':
    doc_dict = {
        "0": "我去玉龙雪山并且喜欢玉龙雪山玉龙雪山",
        "1": "我在玉龙雪山并且喜欢玉龙雪山",
        "2": "我在九寨沟",
        "3": "你好"
    }  #["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
    #doc_dict = {"0":"This is the first document.", "1":"This is the second second document.", "2":"And the third one."}
    query = "我在九寨沟,很喜欢"
    #query = "This is the second second document."

    # 基于bow
    mf = ModelFactory(match_models=['bow'])
    mf.init(words_dict=doc_dict, update=True)
    bow_pre = mf.predict(query)
    print('pre>>>>>', bow_pre)
    # pre>>>>> {'bow': [('0', 0.2773500981126146), ('1', 0.5303300858899106), ('2', 0.8660254037844388), ('3', 0.0)]}

    mf = ModelFactory(match_models=['bow', 'tfidf', 'ngram_tfidf'])
    mf.init(words_dict=doc_dict, update=True)
    pre = mf.predict(query)
    print('pre>>>>>', pre)
    # pre>>>>> {'bow': [('0', 0.2773500981126146), ('1', 0.5303300858899106), ('2', 0.8660254037844388), ('3', 0.0)], 'tfidf': [('0', 0.2201159065358879), ('1', 0.46476266418455736), ('2', 0.8749225357988296), ('3', 0.0)], 'ngram_tfidf': [('0', 0.035719486884261346), ('1', 0.09654705406841395), ('2', 0.9561288696241232), ('3', 0.0)]}
    pre_emb = mf.predict_emb(query)
    print('pre_emb>>>>>', pre_emb)
    '''
    pre_emb>>>>> {'bow': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'tfidf': array([0.61422608, 0.        , 0.        , 0.4842629 , 0.4842629 ,
       0.        , 0.39205255, 0.        ]), 'ngram_tfidf': array([0.        , 0.        , 0.37156534, 0.37156534, 0.        ,