rand_embedding_cnn = WordEmbeddingCNN(
        rand_seed=seed,
        verbose=verbose,
        optimizers=config['optimizers'],
        input_dim=feature_encoder.vocabulary_size + 1,
        word_embedding_dim=config['word_embedding_dim'],
        input_length=config['sentence_padding_length'],
        num_labels=len(label_to_index),
        conv_filter_type=config['conv_filter_type'],
        k=config['kmax_k'],
        embedding_dropout_rate=config['embedding_dropout_rate'],
        output_dropout_rate=config['output_dropout_rate'],
        nb_epoch=int(config['cnn_nb_epoch']),
        earlyStoping_patience=config['earlyStoping_patience'],
    )
    rand_embedding_cnn.print_model_descibe()

    if config['refresh_all_model'] or not os.path.exists(model_file_path):
        # 训练模型
        rand_embedding_cnn.fit((train_X_feature, train_y),
                               (test_X_feature, test_y))
        # 保存模型
        rand_embedding_cnn.save_model(model_file_path)
    else:
        # 从保存的pickle中加载模型
        rand_embedding_cnn.model_from_pickle(model_file_path)

    # -------------- code start : 结束 -------------
    if verbose > 2:
        logging.debug('-' * 20)
        print '-' * 20
Пример #2
0
        l2_conv_filter_type=  [
            [layer2, 3, 1, 'valid', [2, 1], 0.25]
        ],
        full_connected_layer_units=[[hidden1,0.5],[hidden2,0.5]],
        embedding_dropout_rate=0.,
        nb_epoch=30,
        earlyStoping_patience=config['earlyStoping_patience'],
        lr = config['lr'],
        batch_size = batch_size,
        embedding_weight_trainable = True,
        embedding_init_weight=init_weight,
    )
    print (w2v_embedding_cnn.embedding_layer_output.get_weights()[0][1])

    w2v_embedding_cnn.print_model_descibe()

    print('+'*80)
    # 训练模型
    train_loss, train_accuracy, val_loss, val_accuracy = w2v_embedding_cnn.fit((train_X_feature, train_y),
                          (test_X_feature, test_y))
    print (w2v_embedding_cnn.embedding_layer_output.get_weights()[0][1])

    print('dev:%f,%f' % (train_loss, train_accuracy))
    print('val:%f,%f' % (val_loss, val_accuracy))
    quit()
    # train
    # w2v_embedding_cnn.accuracy((train_X_feature, train_y))


end_time = timeit.default_timer()
Пример #3
0
    rand_embedding_cnn = WordEmbeddingCNN(
        rand_seed=seed,
        verbose=verbose,
        input_dim=feature_encoder.vocabulary_size + 1,
        word_embedding_dim=config['word_embedding_dim'],
        embedding_init_weight=feature_encoder.to_embedding_weight(word2vec_file_path),
        input_length=config['sentence_padding_length'],
        num_labels=len(label_to_index),
        conv_filter_type=config['conv_filter_type'],
        k=config['kmax_k'],
        embedding_dropout_rate=config['embedding_dropout_rate'],
        output_dropout_rate=config['output_dropout_rate'],
        nb_epoch=int(config['cnn_nb_epoch']),
        earlyStoping_patience=config['earlyStoping_patience'],
    )
    rand_embedding_cnn.print_model_descibe()

    if config['refresh_all_model'] or not os.path.exists(model_file_path):
        # 训练模型
        rand_embedding_cnn.fit((feature_encoder.train_padding_index, train_y),
                               (map(feature_encoder.transform_sentence, test_X), test_y))
        # 保存模型
        rand_embedding_cnn.save_model(model_file_path)
    else:
        # 从保存的pickle中加载模型
        rand_embedding_cnn.model_from_pickle(model_file_path)

    # -------------- code start : 结束 -------------
    if verbose > 2:
        logging.debug('-' * 20)
        print '-' * 20