def train(m, Tx, x, y_sentiment, voc_embedding, word_to_index): model = sentiment_model(Tx, (Tx, ), voc_embedding, word_to_index) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #将弹幕列表,转化为sentence-indices二维数组 x_indices = sentence_to_indices(x, word_to_index, m, Tx) #取测试集与训练集 x_train_indices, x_test_indices, y_train_oh, y_test_oh = div_train_and_test( x_indices, y_sentiment, num_classes=6, propertion=0.8) model.fit(x_train_indices, y_train_oh, epochs=50, shuffle=True, batch_size=64) #evaluate loss, acc = model.evaluate(x_test_indices, y_test_oh) print("Loss = ", loss) print("Test accuracy = ", acc) ''' #保存模型与参数 model_json = model.to_json() with open ('model/sentiment/model_noatt_three_60.json', 'w') as json_f: json_f.write(model_json) model.save_weights('model/sentiment/model_weights_noatt_three_60.h5') ''' '''
def train(m, Tx, x, y_sentiment, voc_embedding, word_to_index): model = sentiment_model(Tx, (Tx, ), voc_embedding, word_to_index) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #将弹幕列表,转化为sentence-indices二维数组 x_indices = sentence_to_indices(x, word_to_index, m, Tx) #取测试集与训练集 x_train_indices, x_test_indices, y_train_oh, y_test_oh = div_train_and_test( x_indices, y_sentiment, num_classes=2, propertion=0.8) model.fit(x_train_indices, y_train_oh, epochs=50, shuffle=True) #evaluate loss, acc = model.evaluate(x_test_indices, y_test_oh) print("Loss = ", loss) print("Test accuracy = ", acc) ''' #保存模型与参数 model_json = model.to_json() with open ('model/sentiment/model_three.json', 'w') as json_f: json_f.write(model_json) model.save_weights('model/sentiment/model_weights_three.h5') ''' ## 用于二分类 num_train = len(x_train_indices) y_sentiment_test = y_sentiment[num_train:] pred = model.predict(x_test_indices) #输出弹幕极性 print('弹幕极性(数值越大,情感越倾向于正面):') for i in range(m - num_train - 1): sentence = '' for ind in list(x_test_indices[int(i), :]): if ind != 0: sentence = sentence + index_to_word[int(ind)] else: continue print(sentence + ':' + str(pred[i][1])) #输出错误结果 for i in range(m - num_train - 1): sentence = '' num = np.argmax(pred[i]) if (num != y_sentiment_test[i]): for ind in list(x_test_indices[int(i), :]): if ind != 0: sentence = sentence + index_to_word[int(ind)] else: continue print('Expected label:' + str(y_sentiment_test[i]) + ' prediction: ' + sentence + ' ' + str(num))
tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") file_path = 'data/danmu_seg.txt' voc_embedding_path = 'tools/voc_embedding.json' # 取数据 m, Tx, x, y = read_dataset_intent_all(file_path) voc_embedding, word_to_index, index_to_word = get_embedding(voc_embedding_path) index_to_embedding = get_index_to_embedding(voc_embedding, index_to_word) #将弹幕列表,转化为sentence-indices二维数组 x_indices = sentence_to_indices(x, word_to_index, m, Tx) # 生成训练集与测试集 x_train_indices, x_dev_indices, y_train_oh, y_dev_oh = div_train_and_test( x_indices, y, num_classes=6, propertion=0.8) # 训练集的sentence_sentiment_embedding已在batch_it_with_sentiment中实现 # 生成测试集的sentence_sentiment_embedding dev_sentence_embedding = sentiment_vec(x_dev_indices) # 生成测试集的sentence_bert_embedding dev_bert_embedding = bert_vec(x_dev_indices, index_to_word) # 运行模型 def train(x_train, y_train, x_dev, y_dev, dev_sentence_embedding, index_to_word, dev_bert_embedding): with tf.Graph().as_default():