Exemplo n.º 1
0
    verbose=1,
    need_segmented=True,
    full_mode=full_mode,
    remove_stopword=True,
    replace_number=True,
    lowercase=True,
    zhs2zht=True,
    remove_url=True,
    padding_mode='center',
    add_unkown_word=True,
    feature_type=feature_type,
)


train_X_feature = feature_encoder.fit_transform(train_data=train_data['SENTENCE'].as_matrix())
feature_encoder.print_model_descibe()
feature_encoder.print_sentence_length_detail()

# train_y = train_data['LABEL_INDEX'].as_matrix()

test_all_X_feature = feature_encoder.transform(test_data['SENTENCE'].as_matrix())

test_all_y = test_data['LABEL_INDEX'].as_matrix()

print(train_X_feature.shape)
print(test_all_X_feature.shape)
logging.debug('=' * 20)
# ****************************************************************
# ------------- region end : 2. 转换数据的格式并特征编码 -------------
# ****************************************************************
Exemplo n.º 2
0
    remove_stopword=True,
    replace_number=True,
    lowercase=True,
    zhs2zht=True,
    remove_url=True,
    padding_mode='center',
    add_unkown_word=True,
    mask_zero=True)
train_X_features = feature_encoder.fit_transform(train_data=train_X)
test_X_features = feature_encoder.transform(test_X)

feature_encoder.print_sentence_length_detail
print(feature_encoder.vocabulary_size)
# print ','.join(sorted(feature_encoder.vocabulary))
# quit()
feature_encoder.print_model_descibe()
# -------------- code start : 结束 -------------
if verbose > 2:
    logging.debug('-' * 20)
    print('-' * 20)
# -------------- region end : 2. 转换数据格式,以可以进行分类 ---------------

for seed in config['rand_seed']:

    # -------------- region start : 3. 初始化CNN模型并训练 -------------
    if verbose > 2:
        logging.debug('-' * 20)
        print('-' * 20)
        logging.debug('3. 初始化CNN模型并训练')
        print('3. 初始化CNN模型并训练')
    # -------------- code start : 开始 -------------