Пример #1
0
    def get_model(feature_encoder, num_filter, region_size, num_labels,
                  **kwargs):
        onehot_cnn = OnehotBowCNN(
            rand_seed=1377,
            verbose=kwargs.get('verbose', 0),
            feature_encoder=feature_encoder,
            # optimizers='adadelta',
            optimizers='sgd',
            num_labels=num_labels,
            l1_conv_filter_type=[
                [
                    num_filter, region_size, -1, 'valid', (-1, 1), 0., 'none',
                    'none'
                ],
            ],
            l2_conv_filter_type=[],
            full_connected_layer_units=[[0, 0, 'relu', 'batch_normalization']],
            embedding_dropout_rate=0.,
            nb_epoch=30,
            nb_batch=32,
            earlyStoping_patience=30,
            lr=1e-2,
        )

        return onehot_cnn
Пример #2
0
    def cross_validation(
        train_data=None,
        test_data=None,
        cv_data=None,
        input_length=None,
        feature_type='word',
        num_filter_list=None,
        region_size_list=None,
        word2vec_to_solve_oov=False,
        word2vec_model_file_path=None,
        verbose=0,
    ):

        from data_processing_util.cross_validation_util import transform_cv_data, get_k_fold_data, get_val_score
        # 1. 获取交叉验证的数据
        if cv_data is None:
            assert train_data is not None, 'cv_data和train_data必须至少提供一个!'
            cv_data = get_k_fold_data(
                k=3,
                train_data=train_data,
                test_data=test_data,
                include_train_data=True,
            )

        # 2. 将数据进行特征编码转换
        feature_encoder = OnehotBowCNN.get_feature_encoder(
            input_length=input_length,
            verbose=verbose,
            feature_type=feature_type,
            word2vec_to_solve_oov=word2vec_to_solve_oov,
            word2vec_model_file_path=word2vec_model_file_path,
        )
        cv_data = transform_cv_data(feature_encoder, cv_data, verbose=0)
        # 交叉验证
        for num_filter in num_filter_list:
            for region_size in region_size_list:
                print('=' * 40)
                print('num_filter and region_size is %d,%d.' %
                      (num_filter, region_size))
                get_val_score(OnehotCNNWithOneConv,
                              cv_data=cv_data,
                              verbose=verbose,
                              region_size=region_size,
                              num_filter=num_filter,
                              num_labels=24)
    def cross_validation(
            train_data=None,
            test_data=None,
            cv_data=None,
            input_length =None,
            feature_type = 'word',
            num_filter_list=None,
            region_size_list=None,
            word2vec_to_solve_oov = False,
            word2vec_model_file_path = None,
            verbose = 0,
           ):

        from data_processing_util.cross_validation_util import transform_cv_data, get_k_fold_data, get_val_score
        # 1. 获取交叉验证的数据
        if cv_data is None:
            assert train_data is not None, 'cv_data和train_data必须至少提供一个!'
            cv_data = get_k_fold_data(
                k=3,
                train_data=train_data,
                test_data=test_data,
                include_train_data=True,
            )

        # 2. 将数据进行特征编码转换
        feature_encoder = OnehotBowCNN.get_feature_encoder(
            input_length=input_length,
            verbose=verbose,
            feature_type=feature_type,
            word2vec_to_solve_oov = word2vec_to_solve_oov,
            word2vec_model_file_path=word2vec_model_file_path,
        )
        cv_data = transform_cv_data(feature_encoder, cv_data,verbose=0)
        # 交叉验证
        for num_filter in num_filter_list:
            for region_size in region_size_list:
                print('=' * 40)
                print('num_filter and region_size is %d,%d.'%(num_filter,region_size))
                get_val_score(OnehotCNNWithOneConv,
                              cv_data=cv_data,
                              verbose=verbose,
                              region_size = region_size,
                              num_filter=num_filter,
                              num_labels=24
                              )
Пример #4
0
    print('bow length:%d'%bow_length)
    print('='*80)
    onehot_cnn = OnehotBowCNN(
        rand_seed=rand_seed,
        verbose=0,
        feature_encoder=feature_encoder,
        # optimizers='adadelta',
        optimizers='sgd',
        input_length=sentence_padding_length,
        input_dim=feature_encoder.vocabulary_size,
        num_labels=len(index_to_label),
        l1_conv_filter_type=[
            [layer1, 2, -1, 'bow', (2, 1), 0., 'relu', 'batch_normalization'],
            # [layer1, 3, -1, 'bow', (2, 1), 0., 'relu', 'batch_normalization'],
            # [1000, 4, -1, 'valid', (2, 1), 0., 'relu', 'batch_normalization'],
        ],
        l2_conv_filter_type=[
            # [layer2, 2, -1, 'valid', (2, 1), 0.25, 'relu', 'none']
        ],
        full_connected_layer_units=[
            # (hidden1, 0.5, 'relu', 'batch_normalization'),
            # (hidden2, 0.5, 'relu', 'none'),
        ],
        embedding_dropout_rate=0.,
        nb_epoch=30,
        nb_batch=5,
        earlyStoping_patience=20,
        lr=1e-2,
    )
    onehot_cnn.print_model_descibe()
Пример #5
0
# +++++++++++++ region start : 3、构建onehot编码 +++++++++++++
# ****************************************************************
logging.debug('=' * 20)
logging.debug('3、构建onehot编码')



onehot_cnn = OnehotBowCNN(
    rand_seed=1337,
    verbose=config['verbose'],
    feature_encoder=feature_encoder,
    # optimizers='adadelta',
    optimizers=config['optimizers'],
    input_length=sentence_padding_length,
    num_labels=len(index_to_label),
    conv1_filter_type=config['l1_conv_filter_type'],
    conv2_filter_type=config['l2_conv_filter_type'],
    full_connected_layer_units=config['full_connected_layer_units'],
    output_dropout_rate=config['output_dropout_rate'],
    nb_epoch=config['nb_epoch'],
    nb_batch=config['nb_batch'],
    earlyStoping_patience=config['earlyStoping_patience'],
    lr=config['lr'],
)
onehot_cnn.print_model_descibe()

onehot_cnn.fit((train_X_feature, train_y),
               (test_X_feature, test_y))
onehot_cnn.accuracy((train_X_feature, train_y), transform_input=False)

logging.debug('=' * 20)
Пример #6
0
# ****************************************************************
logging.debug('=' * 20)
logging.debug('3、构建onehot编码')

from deep_learning.cnn.wordEmbedding_cnn.onehot_cnn_model import OnehotBowCNN


onehot_cnn = OnehotBowCNN(
    rand_seed=config['rand_seed'],
    verbose=config['verbose'],
    feature_encoder=feature_encoder,
    # optimizers='adadelta',
    optimizers=config['optimizers'],
    input_length=sentence_padding_length,
    num_labels=len(index_to_label),
    conv1_filter_type=config['l1_conv_filter_type'],
    conv2_filter_type=config['l2_conv_filter_type'],
    full_connected_layer_units=config['full_connected_layer_units'],
    output_dropout_rate=config['output_dropout_rate'],
    nb_epoch=config['nb_epoch'],
    batch_size=config['nb_batch'],
    earlyStoping_patience=config['earlyStoping_patience'],
    lr=config['lr'],
)
# np.random.seed(0)
print(np.random.randint(0,100))
# quit()
onehot_cnn.print_model_descibe()


onehot_cnn.fit((train_X_feature, train_y),