Пример #1
0
    def initModel(self):
        print('Loading model:', self.model, 'pred_mode:', self.pred_mode)
        checkpoint_file = self.__getCkptfile(self.model, self.pred_mode)
        if not checkpoint_file:
            return

        # 加载模型,这要加.meta后缀
        with self.graph.as_default():
            saver = tf.train.import_meta_graph(checkpoint_file + '.meta')
            saver.restore(self.sess, checkpoint_file)
        # self.graph = tf.get_default_graph()
        # 从图中读取变量
        self.input_x = self.graph.get_operation_by_name("input_x").outputs[0]
        self.input_y = self.graph.get_operation_by_name("input_y").outputs[0]
        self.dropout_keep_prob = self.graph.get_operation_by_name(
            "dropout_keep_prob").outputs[0]
        self.prediction = self.graph.get_operation_by_name(
            "output/prediction").outputs[0]
        self.training = self.graph.get_operation_by_name("training").outputs[0]

        # 加载词向量
        if self.pred_mode == 'CHAR-RANDOM':
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.CHAR_VOCAB_PATH))
        elif self.pred_mode == 'WORD-NON-STATIC' or self.pred_mode == 'MULTI':
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.WORD_VOCAB_PATH))
        # 加载标签
        self.label = preprocess.read_label(
            os.path.join('data', preprocess.LABEL_ID_PATH))
    def prepare_data(self):
        # Data preparation.
        # =======================================================
        if self.train_mode == 'CHAR-RANDOM':
            # 1.字符级
            # 读取词汇表
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.CHAR_VOCAB_PATH))

        elif self.train_mode == 'WORD-NON-STATIC' or self.train_mode == 'MULTI':
            # 把预训练词向量的值读到变量中
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.WORD_VOCAB_PATH))
            self.vecs_dict = preprocess.load_vecs(
                os.path.join('data', preprocess.SGNS_WORD_PATH))
            self.embedding_W = np.ndarray(
                shape=[self.vocab_size, self.embedding_dim], dtype=np.float32)
            for word in self.vocab:
                # 第n行对应id为n的词的词向量
                if word not in self.vecs_dict:
                    preprocess.add_word(word, self.vecs_dict)
                self.embedding_W[self.vocab[word]] = self.vecs_dict[word]

        self.dataset = TextLineDataset(
            os.path.join('data', preprocess.TRAIN_WITH_ID_PATH))

        return
    def prepare_data(self):
        # Data preparation.
        # =======================================================
        if self.train_mode == 'CHAR-RANDOM':
            # 1.字符级
            # 读取词汇表
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.CHAR_VOCAB_PATH))

        elif self.train_mode == 'WORD-NON-STATIC' or self.train_mode == 'MULTI':
            # 把预训练词向量的值读到变量中
            self.vocab = preprocess.read_vocab(
                os.path.join('data', preprocess.WORD_VOCAB_PATH))
            self.vecs_dict = preprocess.load_vecs(
                os.path.join('data', preprocess.SGNS_WORD_PATH))
            self.embedding_W = np.ndarray(
                shape=[self.vocab_size, self.embedding_dim], dtype=np.float32)
            for word in self.vocab:
                # 第n行对应id为n的词的词向量
                if word not in self.vecs_dict:
                    preprocess.add_word(word, self.vecs_dict)
                self.embedding_W[self.vocab[word]] = self.vecs_dict[word]

        self.dataset = TextLineDataset(
            os.path.join('data', preprocess.TRAIN_WITH_ID_PATH))
        print('Shuffling dataset...')
        self.dataset = self.dataset.shuffle(preprocess.TOTAL_TRAIN_SIZE)
        # 分割数据集
        # 取前VALID_SIZE个样本给验证集
        valid_dataset = self.dataset.take(preprocess.VALID_SIZE).batch(
            self.valid_batch_size)
        # 剩下的给训练集
        train_dataset = self.dataset.skip(preprocess.VALID_SIZE).batch(
            self.train_batch_size)

        # Create a reinitializable iterator
        train_iterator = train_dataset.make_initializable_iterator()
        valid_iterator = valid_dataset.make_initializable_iterator()

        train_init_op = train_iterator.initializer
        valid_init_op = valid_iterator.initializer

        # 要获取元素,先sess.run(train_init_op)初始化迭代器
        # 再sess.run(next_train_element)
        next_train_element = train_iterator.get_next()
        next_valid_element = valid_iterator.get_next()

        return train_init_op, valid_init_op, next_train_element, next_valid_element
Пример #4
0
    def prepare_test_data(self):
        # 读取词汇表
        if self.train_mode == 'CHAR-RANDOM':
            # 1.字符级
            self.vocab = preprocess.read_vocab(os.path.join('data',preprocess.CHAR_VOCAB_PATH))

        elif self.train_mode == 'WORD-NON-STATIC':
            self.vocab = preprocess.read_vocab(os.path.join('data', preprocess.WORD_VOCAB_PATH))

        # 测试集有标题,读取时注意跳过第一行
        dataset = TextLineDataset(os.path.join('data',preprocess.TEST_PATH))
        dataset = dataset.shuffle(preprocess.TOTAL_TEST_SIZE).batch(self.test_batch_size)

        iterator = dataset.make_one_shot_iterator()
        next_element = iterator.get_next()

        return dataset, next_element