Пример #1
0
from magpie import Magpie

#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'data/hep-categories'
magpie = Magpie()
magpie.train_word2vec(train_dir, vec_dim=100, MWC=1, w2vc=5)
magpie.fit_scaler('data/hep-categories')
magpie.init_word_vectors('data/hep-categories')

#定义所有类别
labels = [
    '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121',
    '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141',
    '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212',
    '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222',
    '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243',
    '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331',
    '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351',
    '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114',
    '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132',
    '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147',
    '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156',
    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
Пример #2
0
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'C:\\data\\nlp_chinese_corpus'

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [250, 500]:
    for MIN_WORD_COUNT in [5, 10]:
        for WORD2VEC_CONTEXT in [5, 10]:
            magpie.train_word2vec(train_dir,
                                  vec_dim=EMBEDDING_SIZE,
                                  MWC=MIN_WORD_COUNT,
                                  w2vc=WORD2VEC_CONTEXT)
            magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
            magpie.train('C:\\magpie-master\\data\\hep-categories',
                         labels,
                         callbacks=[lossHistory],
                         test_ratio=0.1,
                         epochs=20)  # 训练,20%数据作为测试数据,20轮
            lossHistory.loss_plot(
                'epoch', 'C:\\magpie-master\\' + train_dir[-3:] + '_' +
                str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT) + '.jpg')
            magpie.save_word2vec_model(
                'C:\\magpie-master\\save\\embeddings\\' + train_dir[-3:] +
                '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT))
            magpie.save_scaler('C:\\magpie-master\\save\\scaler\\' +
                               train_dir[-3:] + '_' + str(EMBEDDING_SIZE) +
Пример #3
0
import os
import sys

sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie()
magpie.train_word2vec('../data/hep-categories', vec_dim=3)  #训练一个word2vec
magpie.fit_scaler('../data/hep-categories')  #生成scaler
magpie.init_word_vectors('../data/hep-categories', vec_dim=3)  #初始化词向量
labels = ['军事', '旅游', '政治']  #定义所有类别
magpie.train('../data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练,20%数据作为测试数据,5轮

#保存训练后的模型文件
magpie.save_word2vec_model('../workspace/embeddings', overwrite=True)
magpie.save_scaler('../workspace/scaler', overwrite=True)
magpie.save_model('../workspace/model.h5')