Exemplo n.º 1
0
label_to_index,index_to_label = data_util.get_label_index()

# ****************************************************************
# ------------- region end : 1. 加载训练数据和测试数据 -------------
# ****************************************************************

# ****************************************************************
# +++++++++++++ region start : 2. 转换数据的格式并特征编码 +++++++++++++
# ****************************************************************
logging.debug('=' * 20)
logging.debug('2. 转换数据的格式并特征编码')
from deep_learning.cnn.wordEmbedding_cnn.multichannel_onehot_cnn_model import MultiChannelOnehotBowCNN
# 获取该分类器的编码器
word_feature_encoder, seg_feature_encoder = MultiChannelOnehotBowCNN.get_feature_encoder(
    **{'word_input_length': word_input_length,
     'seg_input_length': seg_input_length}
)


train_y = train_data['LABEL_INDEX'].as_matrix()
test_y = test_data['LABEL_INDEX'].as_matrix()

cv_data = data_util.get_k_fold_data(k=3,
                                    data=train_data,
                                    rand_seed=3,
                                    )
all_cv_word_data = transform_cv_data(word_feature_encoder, cv_data,
                                     (test_data[u'SENTENCE'].as_matrix(),test_y),**config)

all_cv_seg_data = transform_cv_data(seg_feature_encoder, cv_data, (test_data[u'SENTENCE'].as_matrix(),test_y), **config)
Exemplo n.º 2
0
cv_data = data_util.get_k_fold_data(k=3,
                                    data=train_data,
                                    rand_seed=3,
                                    )

MultiChannelOnehotBowCNN.cross_validation(
    cv_data,
    (test_data[u'SENTENCE'].as_matrix(), test_y),
    'result/multi_cnn_bow_v2.3Sa_cv_detail.txt',
    rand_seed=rand_seed,
    nb_epoch=nb_epoch,
    num_labels = 24,
    verbose=verbose,
    remove_stopword = remove_stopword,
    layer1=layer1,
    l1_conv_filter_type=l1_conv_filter_type,
    layer2=layer2,
    l2_conv_filter_type=l2_conv_filter_type,
    k=k,
    hidden1=hidden1,
    hidden2=hidden2,
    word2vec_to_solve_oov = word2vec_to_solve_oov,
    word2vec_model_file_path = config['word2vec_model_file_path'],
    use_layer=use_layer,
    lr = lr,
)



end_time = timeit.default_timer()
print('end! Running time:%ds!' % (end_time - start_time))