Python BertModel.get_pooled_output示例

编程语言: Python

命名空间/包名称: bert_dp.modeling

类/类型: BertModel

方法/功能: get_pooled_output

hotexamples.com的示例: 5

Python BertModel.get_pooled_output - 已找到5个示例。这些是从开源项目中提取的最受好评的bert_dp.modeling.BertModel.get_pooled_output现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BertModel(9)

get_pooled_output(5)

get_sequence_output(1)

示例#1

显示文件

    def _init_graph(self):
        self._init_placeholders()

        with tf.variable_scope("model"):
            model_a = BertModel(config=self.bert_config,
                                is_training=self.is_train_ph,
                                input_ids=self.input_ids_a_ph,
                                input_mask=self.input_masks_a_ph,
                                token_type_ids=self.token_types_a_ph,
                                use_one_hot_embeddings=False)

        with tf.variable_scope("model", reuse=True):
            model_b = BertModel(config=self.bert_config,
                                is_training=self.is_train_ph,
                                input_ids=self.input_ids_b_ph,
                                input_mask=self.input_masks_b_ph,
                                token_type_ids=self.token_types_b_ph,
                                use_one_hot_embeddings=False)

        output_layer_a = model_a.get_pooled_output()
        output_layer_b = model_b.get_pooled_output()

        with tf.variable_scope("loss"):
            output_layer_a = tf.nn.dropout(output_layer_a,
                                           keep_prob=self.keep_prob_ph)
            output_layer_b = tf.nn.dropout(output_layer_b,
                                           keep_prob=self.keep_prob_ph)
            self.loss = tf.contrib.losses.metric_learning.npairs_loss(
                self.y_ph, output_layer_a, output_layer_b)
            logits = tf.multiply(output_layer_a, output_layer_b)
            self.y_probas = tf.reduce_sum(logits, 1)
            self.pooled_out = output_layer_a

示例#2

显示文件

文件： bert_classifier.py 项目： dimakarp1996/DeepPavlov

class BertClassifierModel(LRScheduledTFModel):
    """Bert-based model for text classification.

    It uses output from [CLS] token and predicts labels using linear transformation.

    Args:
        bert_config_file: path to Bert configuration file
        n_classes: number of classes
        keep_prob: dropout keep_prob for non-Bert layers
        one_hot_labels: set True if one-hot encoding for labels is used
        multilabel: set True if it is multi-label classification
        return_probas: set True if return class probabilites instead of most probable label needed
        attention_probs_keep_prob: keep_prob for Bert self-attention layers
        hidden_keep_prob: keep_prob for Bert hidden layers
        optimizer: name of tf.train.* optimizer or None for `AdamWeightDecayOptimizer`
        num_warmup_steps:
        weight_decay_rate: L2 weight decay for `AdamWeightDecayOptimizer`
        pretrained_bert: pretrained Bert checkpoint
        min_learning_rate: min value of learning rate if learning rate decay is used
    """

    # TODO: add warmup
    # TODO: add head-only pre-training
    def __init__(self,
                 bert_config_file,
                 n_classes,
                 keep_prob,
                 one_hot_labels=False,
                 multilabel=False,
                 return_probas=False,
                 attention_probs_keep_prob=None,
                 hidden_keep_prob=None,
                 optimizer=None,
                 num_warmup_steps=None,
                 weight_decay_rate=0.01,
                 pretrained_bert=None,
                 min_learning_rate=1e-06,
                 **kwargs) -> None:
        super().__init__(**kwargs)

        self.return_probas = return_probas
        self.n_classes = n_classes
        self.min_learning_rate = min_learning_rate
        self.keep_prob = keep_prob
        self.one_hot_labels = one_hot_labels
        self.multilabel = multilabel
        self.optimizer = optimizer
        self.num_warmup_steps = num_warmup_steps
        self.weight_decay_rate = weight_decay_rate

        if self.multilabel and not self.one_hot_labels:
            raise RuntimeError(
                'Use one-hot encoded labels for multilabel classification!')

        if self.multilabel and not self.return_probas:
            raise RuntimeError(
                'Set return_probas to True for multilabel classification!')

        self.bert_config = BertConfig.from_json_file(
            str(expand_path(bert_config_file)))

        if attention_probs_keep_prob is not None:
            self.bert_config.attention_probs_dropout_prob = 1.0 - attention_probs_keep_prob
        if hidden_keep_prob is not None:
            self.bert_config.hidden_dropout_prob = 1.0 - hidden_keep_prob

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        if pretrained_bert is not None:
            pretrained_bert = str(expand_path(pretrained_bert))

            if tf.train.checkpoint_exists(pretrained_bert) \
                    and not (self.load_path and tf.train.checkpoint_exists(str(self.load_path.resolve()))):
                logger.info('[initializing model with Bert from {}]'.format(
                    pretrained_bert))
                # Exclude optimizer and classification variables from saved variables
                var_list = self._get_saveable_variables(
                    exclude_scopes=('Optimizer', 'learning_rate', 'momentum',
                                    'output_weights', 'output_bias'))
                saver = tf.train.Saver(var_list)
                saver.restore(self.sess, pretrained_bert)

        if self.load_path is not None:
            self.load()

    def _init_graph(self):
        self._init_placeholders()

        self.bert = BertModel(
            config=self.bert_config,
            is_training=self.is_train_ph,
            input_ids=self.input_ids_ph,
            input_mask=self.input_masks_ph,
            token_type_ids=self.token_types_ph,
            use_one_hot_embeddings=False,
        )

        output_layer = self.bert.get_pooled_output()
        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [self.n_classes, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [self.n_classes],
                                      initializer=tf.zeros_initializer())

        with tf.variable_scope("loss"):
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=self.keep_prob_ph)
            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            if self.one_hot_labels:
                one_hot_labels = self.y_ph
            else:
                one_hot_labels = tf.one_hot(self.y_ph,
                                            depth=self.n_classes,
                                            dtype=tf.float32)

            self.y_predictions = tf.argmax(logits, axis=-1)
            if not self.multilabel:
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                self.y_probas = tf.nn.softmax(logits, axis=-1)
                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                self.loss = tf.reduce_mean(per_example_loss)
            else:
                self.y_probas = tf.nn.sigmoid(logits)
                self.loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        labels=one_hot_labels, logits=logits))

    def _init_placeholders(self):
        self.input_ids_ph = tf.placeholder(shape=(None, None),
                                           dtype=tf.int32,
                                           name='ids_ph')
        self.input_masks_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='masks_ph')
        self.token_types_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='token_types_ph')

        if not self.one_hot_labels:
            self.y_ph = tf.placeholder(shape=(None, ),
                                       dtype=tf.int32,
                                       name='y_ph')
        else:
            self.y_ph = tf.placeholder(shape=(None, self.n_classes),
                                       dtype=tf.float32,
                                       name='y_ph')

        self.learning_rate_ph = tf.placeholder_with_default(
            0.0, shape=[], name='learning_rate_ph')
        self.keep_prob_ph = tf.placeholder_with_default(1.0,
                                                        shape=[],
                                                        name='keep_prob_ph')
        self.is_train_ph = tf.placeholder_with_default(False,
                                                       shape=[],
                                                       name='is_train_ph')

    def _init_optimizer(self):
        with tf.variable_scope('Optimizer'):
            self.global_step = tf.get_variable(
                'global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)
            # default optimizer for Bert is Adam with fixed L2 regularization
            if self.optimizer is None:
                self.optimizer = AdamWeightDecayOptimizer(
                    learning_rate=self.learning_rate_ph,
                    weight_decay_rate=self.weight_decay_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-6,
                    exclude_from_weight_decay=[
                        "LayerNorm", "layer_norm", "bias"
                    ])

    def split(self,
              features: List[InputFeatures],
              y: Union[Optional[List[int]], List[List[int]]] = None):
        """
        Splits features: batch of InputFeatures
         on num_parts equal parts
        making num_parts batches instead
        """

        num_parts = self.gradient_accumulation_steps
        assert num_parts > 0
        assert num_parts <= len(features)

        num_features = math.ceil(len(features) + 0.0 / num_parts)
        feature_batches = [
            features[i:i + num_features] for i in range(num_parts)
        ]
        if y is not None:
            y_batches = [y[i:i + num_features] for i in range(num_parts)]
        else:
            y_batches = []
        return feature_batches, y_batches

    def train_on_batch(self,
                       features: List[InputFeatures],
                       y: Union[List[int], List[List[int]]] = None) -> Dict:
        """Train model on given batch.
        This method clls train_op using features and y (labels).
        Args:
            features: batch of InputFeatures
            y: batch of labels (class id or one-hot encoding)
        Returns:
            dict with loss and learning_rate values
        """

        # get trainable variables

        train_vars = tf.trainable_variables()
        accumulated_gradient = [
            tf.zeros_like(this_var) for this_var in train_vars
        ]
        feature_batches, y_batches = self.split(features)
        feed_dicts = [
            self.build_feed_dict(input_ids=feature_batch[0],
                                 input_masks=feature_batch[1],
                                 token_types=feature_batch[2],
                                 y=y)
            for feature_batch, y in zip(feature_batches, y_batches)
        ]
        learning_rate = max(self.get_learning_rate(), self.min_learning_rate)
        total_batch_loss = 0
        # https://stackoverflow.com/questions/59893850/how-to-accumulate-gradients-in-tensorflow-2-0
        for feed_dict in feed_dicts:
            with tf.GradientTape() as tape:
                loss_value = self.sess.run(self.loss, feed_dict=feed_dict)
                total_batch_loss += loss_value
                gradients = tape.gradient(loss_value, train_vars)
                accumulated_gradient = [(accum_grad + grad)
                                        for accum_grad, grad in zip(
                                            accumulated_gradient, gradients)]
        # Now, after executing all the tapes you needed, we apply the optimization step
        # (but first we take the average of the gradients)
        accumulated_gradient = [
            this_grad / self.gradient_accumulation_steps
            for this_grad in accumulated_gradient
        ]
        # apply optimization step
        self.optimizer.apply_gradients(zip(accumulated_gradient, train_vars))
        batch_loss = total_batch_loss / self.gradient_accumulation_steps
        return {'loss': batch_loss, 'learning_rate': learning_rate}

    def __call__(
            self, features: List[InputFeatures]
    ) -> Union[List[int], List[List[float]]]:
        """Make prediction for given features (texts).

        Args:
            features: batch of InputFeatures

        Returns:
            predicted classes or probabilities of each class

        """
        input_ids = [f.input_ids for f in features]
        input_masks = [f.input_mask for f in features]
        input_type_ids = [f.input_type_ids for f in features]

        feed_dict = self._build_feed_dict(input_ids, input_masks,
                                          input_type_ids)
        if not self.return_probas:
            pred = self.sess.run(self.y_predictions, feed_dict=feed_dict)
        else:
            pred = self.sess.run(self.y_probas, feed_dict=feed_dict)
        return pred

示例#3

显示文件

文件： bert_classifier.py 项目： zomglings/DeepPavlov

class BertClassifierModel(LRScheduledTFModel):
    """Bert-based model for text classification.

    It uses output from [CLS] token and predicts labels using linear transformation.

    Args:
        bert_config_file: path to Bert configuration file
        n_classes: number of classes
        keep_prob: dropout keep_prob for non-Bert layers
        one_hot_labels: set True if one-hot encoding for labels is used
        multilabel: set True if it is multi-label classification
        return_probas: set True if return class probabilites instead of most probable label needed
        attention_probs_keep_prob: keep_prob for Bert self-attention layers
        hidden_keep_prob: keep_prob for Bert hidden layers
        optimizer: name of tf.train.* optimizer or None for `AdamWeightDecayOptimizer`
        num_warmup_steps:
        weight_decay_rate: L2 weight decay for `AdamWeightDecayOptimizer`
        pretrained_bert: pretrained Bert checkpoint
        min_learning_rate: min value of learning rate if learning rate decay is used
    """

    # TODO: add warmup
    # TODO: add head-only pre-training
    def __init__(self,
                 bert_config_file,
                 n_classes,
                 keep_prob,
                 one_hot_labels=False,
                 multilabel=False,
                 return_probas=False,
                 attention_probs_keep_prob=None,
                 hidden_keep_prob=None,
                 optimizer=None,
                 num_warmup_steps=None,
                 weight_decay_rate=0.01,
                 pretrained_bert=None,
                 min_learning_rate=1e-06,
                 **kwargs) -> None:
        super().__init__(**kwargs)

        self.return_probas = return_probas
        self.n_classes = n_classes
        self.min_learning_rate = min_learning_rate
        self.keep_prob = keep_prob
        self.one_hot_labels = one_hot_labels
        self.multilabel = multilabel
        self.optimizer = optimizer
        self.num_warmup_steps = num_warmup_steps
        self.weight_decay_rate = weight_decay_rate

        if self.multilabel and not self.one_hot_labels:
            raise RuntimeError(
                'Use one-hot encoded labels for multilabel classification!')

        if self.multilabel and not self.return_probas:
            raise RuntimeError(
                'Set return_probas to True for multilabel classification!')

        self.bert_config = BertConfig.from_json_file(
            str(expand_path(bert_config_file)))

        if attention_probs_keep_prob is not None:
            self.bert_config.attention_probs_dropout_prob = 1.0 - attention_probs_keep_prob
        if hidden_keep_prob is not None:
            self.bert_config.hidden_dropout_prob = 1.0 - hidden_keep_prob

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        if pretrained_bert is not None:
            pretrained_bert = str(expand_path(pretrained_bert))

        if tf.train.checkpoint_exists(pretrained_bert) \
                and not tf.train.checkpoint_exists(str(self.load_path.resolve())):
            logger.info('[initializing model with Bert from {}]'.format(
                pretrained_bert))
            # Exclude optimizer and classification variables from saved variables
            var_list = self._get_saveable_variables(
                exclude_scopes=('Optimizer', 'learning_rate', 'momentum',
                                'output_weights', 'output_bias'))
            saver = tf.train.Saver(var_list)
            saver.restore(self.sess, pretrained_bert)

        if self.load_path is not None:
            self.load()

    def _init_graph(self):
        self._init_placeholders()

        self.bert = BertModel(
            config=self.bert_config,
            is_training=self.is_train_ph,
            input_ids=self.input_ids_ph,
            input_mask=self.input_masks_ph,
            token_type_ids=self.token_types_ph,
            use_one_hot_embeddings=False,
        )

        output_layer = self.bert.get_pooled_output()
        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [self.n_classes, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [self.n_classes],
                                      initializer=tf.zeros_initializer())

        with tf.variable_scope("loss"):
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=self.keep_prob_ph)
            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            if self.one_hot_labels:
                one_hot_labels = self.y_ph
            else:
                one_hot_labels = tf.one_hot(self.y_ph,
                                            depth=self.n_classes,
                                            dtype=tf.float32)

            self.y_predictions = tf.argmax(logits, axis=-1)
            if not self.multilabel:
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                self.y_probas = tf.nn.softmax(logits, axis=-1)
                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                self.loss = tf.reduce_mean(per_example_loss)
            else:
                self.y_probas = tf.nn.sigmoid(logits)
                self.loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        labels=one_hot_labels, logits=logits))

    def _init_placeholders(self):
        self.input_ids_ph = tf.placeholder(shape=(None, None),
                                           dtype=tf.int32,
                                           name='ids_ph')
        self.input_masks_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='masks_ph')
        self.token_types_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='token_types_ph')

        if not self.one_hot_labels:
            self.y_ph = tf.placeholder(shape=(None, ),
                                       dtype=tf.int32,
                                       name='y_ph')
        else:
            self.y_ph = tf.placeholder(shape=(None, self.n_classes),
                                       dtype=tf.float32,
                                       name='y_ph')

        self.learning_rate_ph = tf.placeholder_with_default(
            0.0, shape=[], name='learning_rate_ph')
        self.keep_prob_ph = tf.placeholder_with_default(1.0,
                                                        shape=[],
                                                        name='keep_prob_ph')
        self.is_train_ph = tf.placeholder_with_default(False,
                                                       shape=[],
                                                       name='is_train_ph')

    def _init_optimizer(self):
        with tf.variable_scope('Optimizer'):
            self.global_step = tf.get_variable(
                'global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)
            # default optimizer for Bert is Adam with fixed L2 regularization
            if self.optimizer is None:

                self.train_op = self.get_train_op(
                    self.loss,
                    learning_rate=self.learning_rate_ph,
                    optimizer=AdamWeightDecayOptimizer,
                    weight_decay_rate=self.weight_decay_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-6,
                    exclude_from_weight_decay=[
                        "LayerNorm", "layer_norm", "bias"
                    ])
            else:
                self.train_op = self.get_train_op(
                    self.loss, learning_rate=self.learning_rate_ph)

            if self.optimizer is None:
                new_global_step = self.global_step + 1
                self.train_op = tf.group(
                    self.train_op, [self.global_step.assign(new_global_step)])

    def _build_feed_dict(self, input_ids, input_masks, token_types, y=None):
        feed_dict = {
            self.input_ids_ph: input_ids,
            self.input_masks_ph: input_masks,
            self.token_types_ph: token_types,
        }
        if y is not None:
            feed_dict.update({
                self.y_ph:
                y,
                self.learning_rate_ph:
                max(self.get_learning_rate(), self.min_learning_rate),
                self.keep_prob_ph:
                self.keep_prob,
                self.is_train_ph:
                True,
            })

        return feed_dict

    def train_on_batch(self, features: List[InputFeatures],
                       y: Union[List[int], List[List[int]]]) -> Dict:
        """Train model on given batch.
        This method calls train_op using features and y (labels).

        Args:
            features: batch of InputFeatures
            y: batch of labels (class id or one-hot encoding)

        Returns:
            dict with loss and learning_rate values

        """
        input_ids = [f.input_ids for f in features]
        input_masks = [f.input_mask for f in features]
        input_type_ids = [f.input_type_ids for f in features]

        feed_dict = self._build_feed_dict(input_ids, input_masks,
                                          input_type_ids, y)

        _, loss = self.sess.run([self.train_op, self.loss],
                                feed_dict=feed_dict)
        return {
            'loss': loss,
            'learning_rate': feed_dict[self.learning_rate_ph]
        }

    def __call__(
            self, features: List[InputFeatures]
    ) -> Union[List[int], List[List[float]]]:
        """Make prediction for given features (texts).

        Args:
            features: batch of InputFeatures

        Returns:
            predicted classes or probabilities of each class

        """
        input_ids = [f.input_ids for f in features]
        input_masks = [f.input_mask for f in features]
        input_type_ids = [f.input_type_ids for f in features]

        feed_dict = self._build_feed_dict(input_ids, input_masks,
                                          input_type_ids)
        if not self.return_probas:
            pred = self.sess.run(self.y_predictions, feed_dict=feed_dict)
        else:
            pred = self.sess.run(self.y_probas, feed_dict=feed_dict)
        return pred

示例#4

显示文件

class BertRankerModel(LRScheduledTFModel):
    # TODO: docs
    # TODO: add head-only pre-training
    def __init__(self,
                 bert_config_file,
                 n_classes,
                 keep_prob,
                 batch_size,
                 num_ranking_samples,
                 one_hot_labels=False,
                 attention_probs_keep_prob=None,
                 hidden_keep_prob=None,
                 pretrained_bert=None,
                 resps=None,
                 resp_vecs=None,
                 resp_features=None,
                 resp_eval=True,
                 conts=None,
                 cont_vecs=None,
                 cont_features=None,
                 cont_eval=True,
                 bot_mode=0,
                 min_learning_rate=1e-06,
                 **kwargs) -> None:
        super().__init__(**kwargs)

        self.batch_size = batch_size
        self.num_ranking_samples = num_ranking_samples
        self.n_classes = n_classes
        self.min_learning_rate = min_learning_rate
        self.keep_prob = keep_prob
        self.one_hot_labels = one_hot_labels
        self.batch_size = batch_size
        self.resp_eval = resp_eval
        self.resps = resps
        self.resp_vecs = resp_vecs
        self.cont_eval = cont_eval
        self.conts = conts
        self.cont_vecs = cont_vecs
        self.bot_mode = bot_mode

        self.bert_config = BertConfig.from_json_file(
            str(expand_path(bert_config_file)))

        if attention_probs_keep_prob is not None:
            self.bert_config.attention_probs_dropout_prob = 1.0 - attention_probs_keep_prob
        if hidden_keep_prob is not None:
            self.bert_config.hidden_dropout_prob = 1.0 - hidden_keep_prob

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        if pretrained_bert is not None:
            pretrained_bert = str(expand_path(pretrained_bert))

        if tf.train.checkpoint_exists(pretrained_bert) \
                and not tf.train.checkpoint_exists(str(self.load_path.resolve())):
            logger.info('[initializing model with Bert from {}]'.format(
                pretrained_bert))
            # Exclude optimizer and classification variables from saved variables
            var_list = self._get_saveable_variables(
                exclude_scopes=('Optimizer', 'learning_rate', 'momentum',
                                'classification'))
            saver = tf.train.Saver(var_list)
            saver.restore(self.sess, pretrained_bert)

        if self.load_path is not None:
            self.load()

        if self.resp_eval:
            assert (self.resps is not None)
            assert (self.resp_vecs is not None)
        if self.cont_eval:
            assert (self.conts is not None)
            assert (self.cont_vecs is not None)
        if self.resp_eval and self.cont_eval:
            assert (len(self.resps) == len(self.conts))

    def _init_graph(self):
        self._init_placeholders()
        with tf.variable_scope("model"):
            self.bert = BertModel(
                config=self.bert_config,
                is_training=self.is_train_ph,
                input_ids=self.input_ids_ph,
                input_mask=self.input_masks_ph,
                token_type_ids=self.token_types_ph,
                use_one_hot_embeddings=False,
            )

        output_layer_a = self.bert.get_pooled_output()

        with tf.variable_scope("loss"):
            with tf.variable_scope("loss"):
                self.loss = tf.contrib.losses.metric_learning.npairs_loss(
                    self.y_ph, output_layer_a, output_layer_a)
                self.y_probas = output_layer_a

    def _init_placeholders(self):
        self.input_ids_ph = tf.placeholder(shape=(None, None),
                                           dtype=tf.int32,
                                           name='ids_ph')
        self.input_masks_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='masks_ph')
        self.token_types_ph = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='token_types_ph')

        if not self.one_hot_labels:
            self.y_ph = tf.placeholder(shape=(None, ),
                                       dtype=tf.int32,
                                       name='y_ph')
        else:
            self.y_ph = tf.placeholder(shape=(None, self.n_classes),
                                       dtype=tf.float32,
                                       name='y_ph')

        self.learning_rate_ph = tf.placeholder_with_default(
            0.0, shape=[], name='learning_rate_ph')
        self.keep_prob_ph = tf.placeholder_with_default(1.0,
                                                        shape=[],
                                                        name='keep_prob_ph')
        self.is_train_ph = tf.placeholder_with_default(False,
                                                       shape=[],
                                                       name='is_train_ph')

    def _init_optimizer(self):
        # TODO: use AdamWeightDecay optimizer
        with tf.variable_scope('Optimizer'):
            self.global_step = tf.get_variable(
                'global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)
            self.train_op = self.get_train_op(
                self.loss, learning_rate=self.learning_rate_ph)

    def _build_feed_dict(self, input_ids, input_masks, token_types, y=None):
        feed_dict = {
            self.input_ids_ph: input_ids,
            self.input_masks_ph: input_masks,
            self.token_types_ph: token_types,
        }
        if y is not None:
            feed_dict.update({
                self.y_ph:
                y,
                self.learning_rate_ph:
                max(self.get_learning_rate(), self.min_learning_rate),
                self.keep_prob_ph:
                self.keep_prob,
                self.is_train_ph:
                True,
            })

        return feed_dict

    def train_on_batch(self, features, y):
        pass

    def __call__(self, features_list):
        pred = []
        for features in features_list:
            input_ids = [f.input_ids for f in features]
            input_masks = [f.input_mask for f in features]
            input_type_ids = [f.input_type_ids for f in features]
            feed_dict = self._build_feed_dict(input_ids, input_masks,
                                              input_type_ids)
            p = self.sess.run(self.y_probas, feed_dict=feed_dict)
            if len(p.shape) == 1:
                p = np.expand_dims(p, 0)
            pred.append(p)
        pred = np.vstack(pred)
        pred = pred / np.linalg.norm(pred, keepdims=True)
        bs = pred.shape[0]
        if self.bot_mode == 0:
            s = pred @ self.resp_vecs.T
            ids = np.argmax(s, 1)
            ans = [[self.resps[ids[i]] for i in range(bs)],
                   [s[i][ids[i]] for i in range(bs)]]
        if self.bot_mode == 1:
            sr = (pred @ self.resp_vecs.T + 1) / 2
            sc = (pred @ self.cont_vecs.T + 1) / 2
            ids = np.argsort(sr, 1)[:, -10:]
            sc = [sc[i, ids[i]] for i in range(bs)]
            ids = [
                sorted(zip(ids[i], sc[i]), key=itemgetter(1), reverse=True)
                for i in range(bs)
            ]
            sc = [list(map(lambda x: x[1], ids[i])) for i in range(bs)]
            ids = [list(map(lambda x: x[0], ids[i])) for i in range(bs)]
            ans = [[self.resps[ids[i][0]] for i in range(bs)],
                   [float(sc[i][0]) for i in range(bs)]]
        if self.bot_mode == 2:
            sr = (pred @ self.resp_vecs.T + 1) / 2
            sc = (pred @ self.cont_vecs.T + 1) / 2
            ids = np.argsort(sc, 1)[:, -10:]
            sr = [sr[i, ids[i]] for i in range(bs)]
            ids = [
                sorted(zip(ids[i], sr[i]), key=itemgetter(1), reverse=True)
                for i in range(bs)
            ]
            sr = [list(map(lambda x: x[1], ids[i])) for i in range(bs)]
            ids = [list(map(lambda x: x[0], ids[i])) for i in range(bs)]
            ans = [[self.resps[ids[i][0]] for i in range(bs)],
                   [float(sr[i][0]) for i in range(bs)]]
        if self.bot_mode == 3:
            sr = pred @ self.resp_vecs.T
            sc = pred @ self.cont_vecs.T
            s = sr + sc
            ids = np.argmax(s, 1)
            ans = [[self.resps[ids[i]] for i in range(bs)],
                   [float(s[i][ids[i]]) for i in range(bs)]]
        return ans

示例#5

显示文件

文件： bert_as_summarizer.py 项目： diamondrajan1996/chatbot_deep

class BertAsSummarizer(TFModel):
    """Naive Extractive Summarization model based on BERT.
    BERT model was trained on Masked Language Modeling (MLM) and Next Sentence Prediction (NSP) tasks.
    NSP head was trained to detect in ``[CLS] text_a [SEP] text_b [SEP]`` if text_b follows text_a in original document.

    This NSP head can be used to stack sentences from a long document, based on a initial sentence:

    summary_0 = init_sentence

    summary_1 = summary_0 + argmax(nsp_score(candidates))

    summary_2 = summary_1 + argmax(nsp_score(candidates))

    ...

    , where candidates are all sentences from a document.

    Args:
        bert_config_file: path to Bert configuration file
        pretrained_bert: path to pretrained Bert checkpoint
        vocab_file: path to Bert vocabulary
        max_summary_length: limit on summary length, number of sentences is used if ``max_summary_length_in_tokens``
            is set to False, else number of tokens is used.
        max_summary_length_in_tokens: Use number of tokens as length of summary.
            Defaults to ``False``.
        max_seq_length: max sequence length in subtokens, including ``[SEP]`` and ``[CLS]`` tokens.
            `max_seq_length` is used in Bert to compute NSP scores. Defaults to ``128``.
        do_lower_case: set ``True`` if lowercasing is needed. Defaults to ``False``.
        lang: use ru_sent_tokenizer for 'ru' and ntlk.sent_tokener for other languages.
            Defaults to ``'ru'``.
    """

    def __init__(self, bert_config_file: str,
                 pretrained_bert: str,
                 vocab_file: str,
                 max_summary_length: int,
                 max_summary_length_in_tokens: Optional[bool] = False,
                 max_seq_length: Optional[int] = 128,
                 do_lower_case: Optional[bool] = False,
                 lang: Optional[str] = 'ru',
                 **kwargs) -> None:

        self.max_summary_length = max_summary_length
        self.max_summary_length_in_tokens = max_summary_length_in_tokens
        self.bert_config = BertConfig.from_json_file(str(expand_path(bert_config_file)))

        self.bert_preprocessor = BertPreprocessor(vocab_file=vocab_file, do_lower_case=do_lower_case,
                                                  max_seq_length=max_seq_length)

        self.tokenize_reg = re.compile(r"[\w']+|[^\w ]")

        if lang == 'ru':
            from ru_sent_tokenize import ru_sent_tokenize
            self.sent_tokenizer = ru_sent_tokenize
        else:
            from nltk import sent_tokenize
            self.sent_tokenizer = sent_tokenize

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self.sess.run(tf.global_variables_initializer())

        if pretrained_bert is not None:
            pretrained_bert = str(expand_path(pretrained_bert))

            if tf.train.checkpoint_exists(pretrained_bert):
                logger.info('[initializing model with Bert from {}]'.format(pretrained_bert))
                tvars = tf.trainable_variables()
                assignment_map, _ = get_assignment_map_from_checkpoint(tvars, pretrained_bert)
                tf.train.init_from_checkpoint(pretrained_bert, assignment_map)

    def _init_graph(self):
        self._init_placeholders()

        self.bert = BertModel(config=self.bert_config,
                              is_training=self.is_train_ph,
                              input_ids=self.input_ids_ph,
                              input_mask=self.input_masks_ph,
                              token_type_ids=self.token_types_ph,
                              use_one_hot_embeddings=False,
                              )
        # next sentence prediction head
        with tf.variable_scope("cls/seq_relationship"):
            output_weights = tf.get_variable(
                "output_weights",
                shape=[2, self.bert_config.hidden_size],
                initializer=create_initializer(self.bert_config.initializer_range))
            output_bias = tf.get_variable(
                "output_bias", shape=[2], initializer=tf.zeros_initializer())

        nsp_logits = tf.matmul(self.bert.get_pooled_output(), output_weights, transpose_b=True)
        nsp_logits = tf.nn.bias_add(nsp_logits, output_bias)
        self.nsp_probs = tf.nn.softmax(nsp_logits, axis=-1)

    def _init_placeholders(self):
        self.input_ids_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='ids_ph')
        self.input_masks_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='masks_ph')
        self.token_types_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='token_types_ph')

        self.is_train_ph = tf.placeholder_with_default(False, shape=[], name='is_train_ph')

    def _build_feed_dict(self, input_ids, input_masks, token_types):
        feed_dict = {
            self.input_ids_ph: input_ids,
            self.input_masks_ph: input_masks,
            self.token_types_ph: token_types,
        }
        return feed_dict

    def _get_nsp_predictions(self, sentences: List[str], candidates: List[str]):
        """Compute NextSentence probability for every (sentence_i, candidate_i) pair.

        [CLS] sentence_i [SEP] candidate_i [SEP]

        Args:
            sentences: list of sentences
            candidates: list of candidates to be the next sentence

        Returns:
            probabilities that candidate is a next sentence
        """
        features = self.bert_preprocessor(texts_a=sentences, texts_b=candidates)
        input_ids = [f.input_ids for f in features]
        input_masks = [f.input_mask for f in features]
        input_type_ids = [f.input_type_ids for f in features]
        feed_dict = self._build_feed_dict(input_ids, input_masks, input_type_ids)
        nsp_probs = self.sess.run(self.nsp_probs, feed_dict=feed_dict)
        return nsp_probs[:, 0]

    def __call__(self, texts: List[str], init_sentences: Optional[List[str]] = None) -> List[List[str]]:
        """Builds summary for text from `texts`

        Args:
            texts: texts to build summaries for
            init_sentences: ``init_sentence`` is used as the first sentence in summary.
                Defaults to None.

        Returns:
            List[List[str]]: summaries tokenized on sentences
        """
        summaries = []
        # build summaries for each text, init_sentence pair
        if init_sentences is None:
            init_sentences = [None] * len(texts)

        for text, init_sentence in zip(texts, init_sentences):
            text_sentences = self.sent_tokenizer(text)

            if init_sentence is None:
                init_sentence = text_sentences[0]
                text_sentences = text_sentences[1:]

            # remove duplicates
            text_sentences = list(set(text_sentences))
            # remove init_sentence from text sentences
            text_sentences = [sent for sent in text_sentences if sent != init_sentence]

            summary = [init_sentence]
            if self.max_summary_length_in_tokens:
                # get length in tokens
                def get_length(x):
                    return len(self.tokenize_reg.findall(' '.join(x)))
            else:
                # get length as number of sentences
                get_length = len

            candidates = text_sentences[:]
            while len(candidates) > 0:
                # todo: use batches
                candidates_scores = [self._get_nsp_predictions([' '.join(summary)], [cand]) for cand in candidates]
                best_candidate_idx = np.argmax(candidates_scores)
                best_candidate = candidates[best_candidate_idx]
                del candidates[best_candidate_idx]
                if get_length(summary + [best_candidate]) > self.max_summary_length:
                    break
                summary = summary + [best_candidate]
            summaries += [summary]
        return summaries

    def train_on_batch(self, **kwargs):
        raise NotImplementedError