Пример #1
0
def train(m, Tx, x, y_sentiment, voc_embedding, word_to_index):
    model = sentiment_model(Tx, (Tx, ), voc_embedding, word_to_index)
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    #将弹幕列表,转化为sentence-indices二维数组
    x_indices = sentence_to_indices(x, word_to_index, m, Tx)
    #取测试集与训练集
    x_train_indices, x_test_indices, y_train_oh, y_test_oh = div_train_and_test(
        x_indices, y_sentiment, num_classes=6, propertion=0.8)
    model.fit(x_train_indices,
              y_train_oh,
              epochs=50,
              shuffle=True,
              batch_size=64)
    #evaluate
    loss, acc = model.evaluate(x_test_indices, y_test_oh)
    print("Loss = ", loss)
    print("Test accuracy = ", acc)
    '''
    #保存模型与参数
    model_json = model.to_json()
    with open ('model/sentiment/model_noatt_three_60.json', 'w') as json_f:
        json_f.write(model_json)
    model.save_weights('model/sentiment/model_weights_noatt_three_60.h5')
    '''
    '''
Пример #2
0
def train(m, Tx, x, y_sentiment, voc_embedding, word_to_index):
    model = sentiment_model(Tx, (Tx, ), voc_embedding, word_to_index)
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    #将弹幕列表,转化为sentence-indices二维数组
    x_indices = sentence_to_indices(x, word_to_index, m, Tx)
    #取测试集与训练集
    x_train_indices, x_test_indices, y_train_oh, y_test_oh = div_train_and_test(
        x_indices, y_sentiment, num_classes=2, propertion=0.8)
    model.fit(x_train_indices, y_train_oh, epochs=50, shuffle=True)
    #evaluate
    loss, acc = model.evaluate(x_test_indices, y_test_oh)
    print("Loss = ", loss)
    print("Test accuracy = ", acc)
    '''
    #保存模型与参数
    model_json = model.to_json()
    with open ('model/sentiment/model_three.json', 'w') as json_f:
        json_f.write(model_json)
    model.save_weights('model/sentiment/model_weights_three.h5')
    '''

    ## 用于二分类
    num_train = len(x_train_indices)
    y_sentiment_test = y_sentiment[num_train:]
    pred = model.predict(x_test_indices)
    #输出弹幕极性
    print('弹幕极性(数值越大,情感越倾向于正面):')
    for i in range(m - num_train - 1):
        sentence = ''
        for ind in list(x_test_indices[int(i), :]):
            if ind != 0:
                sentence = sentence + index_to_word[int(ind)]
            else:
                continue
        print(sentence + ':' + str(pred[i][1]))
    #输出错误结果
    for i in range(m - num_train - 1):
        sentence = ''
        num = np.argmax(pred[i])
        if (num != y_sentiment_test[i]):
            for ind in list(x_test_indices[int(i), :]):
                if ind != 0:
                    sentence = sentence + index_to_word[int(ind)]
                else:
                    continue
            print('Expected label:' + str(y_sentiment_test[i]) +
                  ' prediction: ' + sentence + ' ' + str(num))
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")


file_path = 'data/danmu_seg.txt'
voc_embedding_path = 'tools/voc_embedding.json'

# 取数据
m, Tx, x, y = read_dataset_intent_all(file_path)
voc_embedding, word_to_index, index_to_word = get_embedding(voc_embedding_path)
index_to_embedding = get_index_to_embedding(voc_embedding, index_to_word)

#将弹幕列表,转化为sentence-indices二维数组
x_indices = sentence_to_indices(x, word_to_index, m, Tx)

# 生成训练集与测试集
x_train_indices, x_dev_indices, y_train_oh, y_dev_oh = div_train_and_test(
    x_indices, y, num_classes=6, propertion=0.8)
# 训练集的sentence_sentiment_embedding已在batch_it_with_sentiment中实现
# 生成测试集的sentence_sentiment_embedding
dev_sentence_embedding = sentiment_vec(x_dev_indices)
# 生成测试集的sentence_bert_embedding
dev_bert_embedding = bert_vec(x_dev_indices, index_to_word)


# 运行模型
def train(x_train, y_train, x_dev, y_dev, dev_sentence_embedding,
          index_to_word, dev_bert_embedding):
    with tf.Graph().as_default():