Пример #1
0
    def test_oov_emb(self):
        """测试 OOV word embedding"""
        w = ','
        comma_res = text2vec.encode(w)
        print(w, comma_res)
        self.assertEqual(comma_res, 0.0)

        w = '特价机票'
        r = text2vec.encode(w)
        print(w, r)

        w = '特价'
        r1 = text2vec.encode(w)
        print(w, r1)

        w = '机票'
        r2 = text2vec.encode(w)
        print(w, r2)

        emb = [r1, r2]
        r_average = np.array(emb).sum(axis=0) / 2.0
        print('r_average:', r_average)

        if str(r) == str(r_average):
            print('same')
        self.assertTrue(str(r) == str(r_average))
Пример #2
0
def test_sentence_emb():
    char = '你'
    result = text2vec.encode(char)
    print(char, result)
    char = '好'
    result = text2vec.encode(char)
    print(char, result)
    char = '吗'
    result = text2vec.encode(char)
    print(char, result)

    char = '你好'
    result = text2vec.encode(char)
    print(char, result)

    char = '你好吗'
    result = text2vec.encode(char)
    print(char, result)

    import numpy as np
    emb = [text2vec.encode('你好'), text2vec.encode('吗')]
    average = np.array(emb).sum(axis=0) / 2.0
    print('average:', average)
    act = text2vec.encode('你好吗')

    if str(act) == str(average):
        print("same")
    else:
        print('diff')
Пример #3
0
def test_oov_emb():
    char = ','
    result = text2vec.encode(char)
    print(char, result)

    char = '特价机票'
    result = text2vec.encode(char)
    print(char, result)

    char = '特价'
    result = text2vec.encode(char)
    print(char, result)

    char = '机票'
    result = text2vec.encode(char)
    print(char, result)
Пример #4
0
 def test_encode_text(self):
     """测试文本 text encode结果"""
     a = '如何更换花呗绑定银行卡'
     emb = text2vec.encode(a)
     print(a, emb)
     self.assertEqual(emb.shape, (200, ))
     self.assertTrue(' '.join(["{:.3f}".format(i)
                               for i in emb[:3]]) == "0.041 -0.126 0.019")
Пример #5
0
 def test_encode_word(self):
     """测试文本 word encode结果"""
     word = '银行卡'
     emb = text2vec.encode(word)
     print(word, emb)
     self.assertEqual(emb.shape, (200, ))
     self.assertTrue(' '.join(["{:.3f}".format(i)
                               for i in emb[:3]]) == "0.002 -0.126 0.053")
Пример #6
0
    def test_encode_char(self):
        """测试文本 char encode结果"""
        char = '卡'
        emb = text2vec.encode(char)
        t = type(emb)
        print(t)
        self.assertTrue(t == np.ndarray)

        print(char, emb, emb.shape)
        self.assertEqual(emb.shape, (200, ))

        print(' '.join(["{:.3f}".format(i) for i in emb[:3]]))
        self.assertTrue(' '.join(["{:.3f}".format(i)
                                  for i in emb[:3]]) == "0.068 -0.110 -0.048")
Пример #7
0
@author:XuMing([email protected])
@description: 
"""

import sys

import numpy as np

sys.path.append('..')
import text2vec

text2vec.set_log_level('INFO')

if __name__ == '__main__':
    char = '卡'
    emb = text2vec.encode(char)
    print(type(emb), emb.shape)
    print(char, emb)

    word = '银行卡'
    print(word, text2vec.encode(word))

    a = '如何更换花呗绑定银行卡'
    emb = text2vec.encode(a)
    print(a, emb)

    b = [
        '卡', '银行卡', '如何更换花呗绑定银行卡',
        '如何更换花呗绑定银行卡,如何更换花呗绑定银行卡。如何更换花呗绑定银行卡?。。。这个,如何更换花呗绑定银行卡!'
    ]
    res = []
Пример #8
0
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""

import text2vec

a = '如何更换花呗绑定银行卡'
b = '花呗更改绑定银行卡'
c = '我什么时候开通了花呗'
emb = text2vec.encode(a)
print(emb)

s = text2vec.score(a, b)
print(a, b, s)

s = text2vec.score(a, c)
print(a, c, s)

s = text2vec.score(b, c)
print(b, c, s)
Пример #9
0
def similarProj(request):
    final_result = {"fundraisings": [], "rates": [], "points": [], "chart": []}
    if request.method == "POST":
        try:
            # 處理資料
            text2vec_df = pd.read_pickle(
                "/Users/hw_students/proj02/proj02_data/text2vec_df.pkl")
            df = pd.read_pickle("/Users/hw_students/proj02/proj02_data/df.pkl")
            # 處理Input
            userdata = request.data
            print("userdata:")
            print(userdata)
            unit = np.array(list(userdata[0].values()))
            # unit = unit.reshape(1, -1) #全部放到同一個陣列
            # 把使用者的文本放入
            text2vec_df = text2vec_df.append(
                pd.DataFrame(text2vec.encode(unit[5])).T)
            # 把使用者的專案放入

            print(unit[0])
            titles = df.title.tolist() + [unit[0]]
            # titles = df.title.tolist() + ["-1"]
            cs = cosine_similarity(text2vec_df)

            ###相似案例
            RETURN_NUMBER = 3
            similar_project = getSimiliarArticle(
                -1, cs, titles)  # 分數>0.9 的所有募資案id(type=list)
            new = df[df["title"].apply(
                lambda x: x in similar_project[:RETURN_NUMBER])]
            fundraisings = campaignlist(new)
            final_result["fundraisings"] = fundraisings
            # print("similar_project")
            # print(similar_project)
            ###取十筆
            NUMBER = 10
            df_10 = df[df["title"].apply(
                lambda x: x in similar_project[:NUMBER])]
            ten_proj = []
            for i in range(len(df_10)):
                one_proj = {}
                one_proj["id"] = int(df_10.iloc[i:i + 1, :].id.values[0])
                one_proj["title"] = str(df_10.iloc[i:i + 1, :].title.values[0])
                one_proj["url"] = str(df_10.iloc[i:i + 1, :].url.values[0])
                funding_target = int(df_10.iloc[i:i +
                                                1, :].funding_target.values[0])
                now_funding = int(df_10.iloc[i:i + 1, :].now_funding.values[0])
                one_proj["amountRaised"] = now_funding
                one_proj["amountReached"] = funding_target
                one_proj["proportion"] = round(
                    now_funding / funding_target * 100, 2)
                one_proj["status"] = str(df_10.iloc[i:i +
                                                    1, :].status.values[0])
                ten_proj.append(one_proj)
            final_result["rates"] = ten_proj

            ###處理落點分布
            CALCULATE_NUMBER = 15
            df_15 = df[df["title"].apply(
                lambda x: x in similar_project[:CALCULATE_NUMBER])]
            #print(similar_project[:CALCULATE_NUMBER])
            points = {}
            points["averageTarget"] = funding_target_med(df_15)
            points["userTarget"] = unit[2]
            points["averageTime"] = days_med(df_15)
            points["userTime"] = unit[3]
            points["averageFeedback"] = cam_count_med(df_15)
            points["userFeedback"] = unit[4]
            final_result["points"] = points

            ###處理圖表
            # 找出和輸入金額最接近的十筆募資案
            money = int(unit[2])  # 使用者輸入的金額
            df3 = pd.DataFrame()
            df3 = funding_target_similar(money, df)
            # 分割回饋方案
            list3 = campaignlist_origin(df3)

            # print(list3)
            # 相似金額分布
            chart = funding_table(list3, df3)
            final_result["chart"] = chart
            print(final_result)

            return JsonResponse(final_result, safe=False)
        except ValueError as e:
            return Response(e.args[0], status.HTTP_400_BAD_REQUEST)