def __init__(self, cores=cpu_cores):
     super(AutoMasterDataHandler, self).__init__()
     self.cores = cores
     self.wvtool = WVTool(ndim=config.emb_dim,
                          cores=self.cores,
                          epochs=config.wv_epochs)
     self.logger = init_logger()
     self.cols = {'X': ['权利要求'], 'Y': ['正例文本']}
     self.cols['X+Y'] = self.cols['X'] + self.cols['Y']
     self.max_len = {'X': None, 'Y': None}
     self.df_all, self.df_train, self.df_eval, self.df_test, self.df_merged = pd.DataFrame(
         columns=['专利序号', '权利要求', '正例文本']), pd.DataFrame(
             columns=['专利序号', '权利要求', '正例文本']), pd.DataFrame(
                 columns=['专利序号', '权利要求', '正例文本']), pd.DataFrame(
                     columns=['专利序号', '权利要求', '正例文本']), pd.DataFrame(
                         columns=['专利序号', '权利要求', '正例文本'])
     self.path_userdict = None
     self.path_stopwords = None
     self.swtool = None
     self.path_data_train, self.path_data_eval, self.path_data_test = None, None, None
     self.path_seg_train, self.path_seg_test, self.path_seg_eval, self.path_seg_merged = None, None, None, None
     self.path_pad_train_X, self.path_pad_test_X, self.path_pad_train_Y = None, None, None
     self.path_wv_model, self.path_embedding_matrix = None, None
     self.path_vocab, self.path_vocab_w2i, self.path_vocab_i2w = None, None, None
     self.initialize_params()
Exemplo n.º 2
0
 def __init__(self, ndim=512, cores=1, epochs=10):
     super(WVTool, self).__init__()
     self.ndim = ndim
     self.cores = cores
     self.epochs = epochs
     self.model, self.vocab = None, None
     self.vocab_w2i, self.vocab_i2w = None, None
     self.embedding_matrix = None
     self.logger = init_logger()
     self.labels = {
         'PAD': CustomToken(name='PAD', word='<PAD>', index=-1),
         'UNK': CustomToken(name='UNK', word='<UNK>', index=-1),
         'BOS': CustomToken(name='BOS', word='<BOS>', index=-1),
         'EOS': CustomToken(name='EOS', word='<EOS>', index=-1)
     }
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @author   : Quan Liu
# @date     : 2020/5/21 16:01

import time
import tensorflow as tf
from tqdm import tqdm
from seq2seq.model_seq2seq_lq import Seq2Seq
from seq2seq.model_utils import get_input_from_batch, get_output_from_batch
from utils import config
from utils.common_utils import init_logger

logger = init_logger()


def eval_model(batcher, model: Seq2Seq,
               ckpt_manager: tf.train.CheckpointManager) -> float:

    # 训练参数
    batch_size = config.eval_batch_size
    learning_rate = config.lr

    # 优化器
    optimizer = tf.keras.optimizers.Adam(name='Adam',
                                         learning_rate=learning_rate)

    # 训练
    # @tf.function
    def train_step(batcher) -> float:
        with tf.GradientTape() as tape: