Exemplos de build_vocab em Python, exemplos de cnews_loader.build_vocab em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: train_cnn.py Projeto: DragonYong/TextClassification

                                                  feed_dict=feed_dict)

    # 评估
    print("Precision, Recall and F1-Score...")
    print(
        metrics.classification_report(y_test_cls,
                                      y_pred_cls,
                                      target_names=categories))

    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)


if __name__ == '__main__':
    print('Configuring CNN model...')
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表，重建
        build_vocab(train_dir, vocab_dir, args.VOCAB_SIZE)
    categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    args.VOCAB_SIZE = len(words)
    model = TextCNN(args)
    if args.DO_TRAIN:
        train()
    if args.DO_TEST:
        test()

Exemplo n.º 2

0

Exibir arquivo

    # 评估
    print("Precision, Recall and F1-Score...")
    print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories))

    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)


if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_rnn.py [train / test]""")

    print('Configuring RNN model...')
    config = TRNNConfig()
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表，重建
        build_vocab(train_dir, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextRNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run.py Projeto: jiudian123/second

val_dir = os.path.join(base_dir, 'cnewsval.txt')
vocab_dir = os.path.join(base_dir, 'cnewsvocab.txt')
vector_word_dir= os.path.join(base_dir, 'vector_word.txt')#vector_word trained by word2vec
vector_word_npz=os.path.join(base_dir, 'vector_word.npz')# save vector_word to numpy file
#最佳验证结果保存路径
save_dir = 'HOME\mydata\lstm\checkpoints'
save_path = os.path.join(save_dir, 'best_validation') 
#获取词典
'''build_vocab(train_dir,vocab_dir)
_,word_to_id=read_vocab(vocab_dir)
categories,cat_to_id=read_category()

config=TRNNConfig()
model=TextRNN(config)'''
config=TRNNConfig()
build_vocab(train_dir,vocab_dir)
words,word_to_id=read_vocab(vocab_dir)
categories,cat_to_id=read_category()
config.vocab_size = len(words)
if not os.path.exists(vector_word_npz):
   export_word2vec_vectors(word_to_id, vector_word_dir, vector_word_npz)
config.pre_trianing = get_training_word2vec_vectors(vector_word_npz)
model=TextRNN(config)
init=tf.global_variables_initializer()

def get_time_dif(start_time):
    """获取已使用时间"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))