Пример #1
0
    def test_different_lr(self):
        """Test the usage of different learning rate."""
        with self.test_session() as sess:
            bert_w = tf.get_variable("bert/w",
                                     shape=[3],
                                     initializer=tf.constant_initializer(
                                         [0.1, 0.1, 0.1]))
            non_bert_w = tf.get_variable("w",
                                         shape=[3],
                                         initializer=tf.constant_initializer(
                                             [0.1, 0.1, 0.1]))
            x = tf.constant([1.0, 2.0, 3.0])
            loss = tf.reduce_mean(tf.square(x - bert_w - non_bert_w))

            hparams = tf.contrib.training.HParams(learning_rate=0.001,
                                                  num_train_steps=100,
                                                  num_warmup_steps=0,
                                                  lr_bert=0.00001,
                                                  optimizer="bert_adam",
                                                  use_horovod=False)
            train_op, _, _ = optimization.create_optimizer(hparams, loss)

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            sess.run(init_op)
            sess.run(train_op)
            bert_w_v, non_bert_w_v = sess.run((bert_w, non_bert_w))
            print(bert_w_v, non_bert_w_v)
            # The difference of weight values (gradient) reflects the learning arte difference
            self.assertAllClose((bert_w_v - 0.1) / (non_bert_w_v - 0.1),
                                [0.01, 0.01, 0.01],
                                rtol=1e-2,
                                atol=1e-2)
Пример #2
0
def get_bert_optimizer_fn(hparams):
    """ Returns function that creates an optimizer for bert parameters """
    return lambda: create_optimizer(init_lr=hparams.lr_bert,
                                    num_train_steps=hparams.num_train_steps,
                                    num_warmup_steps=hparams.num_warmup_steps,
                                    optimizer=hparams.optimizer,
                                    use_lr_schedule=hparams.use_lr_schedule,
                                    use_bias_correction_for_adamw=hparams.
                                    use_bias_correction_for_adamw)
Пример #3
0
    def testCreateOptimizer(self):
        """ Tests create_optimizer() """
        init_lr = 0.05
        num_train_steps = 10
        num_warmup_steps = 3
        num_bp_steps = 5

        x = tf.constant([[0.1, 0.2], [0.3, 0.1]], dtype=tf.float32)
        y_true = x[:, 0] + x[:, 1]

        for optimizer_type in ['sgd', 'adam', 'adamw', 'lamb']:
            optimizer = optimization.create_optimizer(
                init_lr=init_lr,
                num_train_steps=num_train_steps,
                num_warmup_steps=num_warmup_steps,
                optimizer=optimizer_type,
                use_lr_schedule=True,
                use_bias_correction_for_adamw=False)

            model = tf.keras.Sequential(
                tf.keras.layers.Dense(
                    1,
                    use_bias=False,
                    kernel_initializer=tf.keras.initializers.zeros()))
            loss_obj = tf.keras.losses.MeanSquaredError()

            prev_loss = self._minimize(x, y_true, model, loss_obj,
                                       optimizer).numpy()
            prev_lr = optimizer._decayed_lr('float32').numpy()
            for step in range(1, num_bp_steps):
                loss = self._minimize(x, y_true, model, loss_obj,
                                      optimizer).numpy()

                # When warm up steps > 0, lr will be 0 when calculating prev_loss and therefore no backprop will be executed
                # This will cause loss_at_step_0 = prev_loss
                if step > 1:
                    self.assertLess(
                        loss, prev_loss,
                        f"Loss should be declining at each step. Step:{step}")

                # Learning rate check
                lr = optimizer._decayed_lr('float32').numpy()
                if step < num_warmup_steps:
                    self.assertGreater(
                        lr, prev_lr,
                        f"Learning rate should be increasing during warm up. Step:{step}"
                    )
                else:
                    self.assertLess(
                        lr, prev_lr,
                        f"Learning rate should be decreasing after warm up. Step:{step}"
                    )

                prev_loss = loss
                prev_lr = lr
Пример #4
0
    def _train_linear_model(self, x, y_true, init_lr, num_train_steps,
                            num_warmup_steps, process_grads_and_vars_fn):
        """Helper function to train a linear model"""
        optimizer = optimization.create_optimizer(
            init_lr=init_lr,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps,
            optimizer='sgd',
            use_lr_schedule=True,
            use_bias_correction_for_adamw=False)

        model = tf.keras.Sequential(
            tf.keras.layers.Dense(
                1,
                use_bias=False,
                kernel_initializer=tf.keras.initializers.zeros()))
        loss_obj = tf.keras.losses.MeanSquaredError()

        for _ in range(2):
            loss, tape = self._get_loss(x, y_true, model, loss_obj)
            grads_and_vars = process_grads_and_vars_fn(
                tape, optimizer, loss, model.trainable_variables, [])
            optimizer.apply_gradients(grads_and_vars)
        return model
Пример #5
0
def model_fn(features, labels, mode, params):
    """
    Defines the model_fn to feed in to estimator
    :param features: dict containing the features in data
    :param labels: dict containing labels in data
    :param mode: running mode, in TRAIN/EVAL/PREDICT
    :param params: hparams used
    :return: tf.estimator.EstimatorSpec
    """
    query_field = features.get('query', None)

    uid = features.get('uid', None)

    weight = features.get('weight', None)
    wide_ftrs = features.get('wide_ftrs', None)

    wide_ftrs_sp_idx = features.get('wide_ftrs_sp_idx', None)
    wide_ftrs_sp_val = features.get('wide_ftrs_sp_val', None)

    doc_fields = [
        features[ftr_name] for ftr_name in features
        if ftr_name.startswith('doc_')
    ]
    if len(doc_fields) == 0:
        doc_fields = None

    usr_fields = [
        features[ftr_name] for ftr_name in features
        if ftr_name.startswith('usr_')
    ]
    if len(usr_fields) == 0:
        usr_fields = None

    doc_id_fields = [
        features[ftr_name] for ftr_name in features
        if ftr_name.startswith('docId_')
    ]
    if len(doc_id_fields) == 0:
        doc_id_fields = None

    usr_id_fields = [
        features[ftr_name] for ftr_name in features
        if ftr_name.startswith('usrId_')
    ]
    if len(usr_id_fields) == 0:
        usr_id_fields = None

    label_field = labels[
        'label'] if mode != tf.estimator.ModeKeys.PREDICT else None
    labels_passthrough = features['label']

    group_size_field = features[
        'group_size'] if mode != tf.estimator.ModeKeys.PREDICT else None

    # build graph
    model = DeepMatch(query=query_field,
                      wide_ftrs=wide_ftrs,
                      doc_fields=doc_fields,
                      usr_fields=usr_fields,
                      doc_id_fields=doc_id_fields,
                      usr_id_fields=usr_id_fields,
                      hparams=params,
                      mode=mode,
                      wide_ftrs_sp_idx=wide_ftrs_sp_idx,
                      wide_ftrs_sp_val=wide_ftrs_sp_val)

    if mode == tf.estimator.ModeKeys.TRAIN:
        loss = compute_loss(params, model.scores, label_field,
                            group_size_field, weight)
        train_op, _, _ = optimization.create_optimizer(params, loss)
        global_step = tf.train.get_global_step()
        train_tensors_log = {'loss': loss, 'global_step': global_step}
        logging_hook = tf.train.LoggingTensorHook(train_tensors_log,
                                                  every_n_iter=10)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[logging_hook])

    elif mode == tf.estimator.ModeKeys.EVAL:
        loss = compute_loss(params, model.scores, label_field,
                            group_size_field, weight)
        eval_metric_ops = {}
        for metric_name in params.all_metrics:
            metric_op_name = 'metric/{}'.format(metric_name)
            topk = int(metric_name.split('@')
                       [1]) if '@' in metric_name else 10  # Default topk
            if metric_name.startswith('ndcg'):
                eval_metric_ops[metric_op_name] = metrics.compute_ndcg_tfr(
                    model.scores, label_field, features, topk)
            elif metric_name.startswith('mrr'):
                eval_metric_ops[metric_op_name] = metrics.compute_mrr_tfr(
                    model.scores, label_field, features)
            elif metric_name.startswith('precision'):
                eval_metric_ops[
                    metric_op_name] = metrics.compute_precision_tfr(
                        model.scores, label_field, features, topk)
            elif metric_name.startswith('traditional_ndcg'):
                eval_metric_ops[metric_op_name] = metrics.compute_ndcg(
                    model.scores, label_field, group_size_field, topk)
            elif metric_name.startswith('li_mrr'):
                eval_metric_ops[metric_op_name] = metrics.compute_mrr(
                    model.scores, labels['label'], features['group_size'],
                    topk)
            elif metric_name == 'auc':
                eval_metric_ops[metric_op_name] = metrics.compute_auc(
                    model.scores, label_field)
            elif metric_name == 'accuracy':
                eval_metric_ops[metric_op_name] = metrics.compute_accuracy(
                    model.scores, label_field)
            elif metric_name == 'confusion_matrix':
                eval_metric_ops[
                    metric_op_name] = metrics.compute_confusion_matrix(
                        model.scores, label_field, params.num_classes)
            else:
                raise ValueError("Unsupported metrics: %s" % (metric_name))

        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    elif mode == tf.estimator.ModeKeys.PREDICT:
        # Prediction field for scoring models
        predictions = {
            'uid': uid,
            'scores': model.original_scores,
            'weight': weight,
            'label': labels_passthrough
        }
        # multiclass classification: export the probabilities across classes by applying softmax
        if params.num_classes > 1:
            predictions['multiclass_probabilities'] = tf.nn.softmax(
                model.scores)

        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        # Provide an estimator spec for `ModeKeys.PREDICT` mode.
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)
    else:
        raise ValueError("Only support mode as TRAIN/EVAL/PREDICT")
Пример #6
0
def model_fn(features, labels, mode, params):
    """
    Defines the model_fn to feed in to estimator
    :param features: dict containing the features in data
    :param labels: dict containing labels in data
    :param mode: running mode, in TRAIN/EVAL/PREDICT
    :param params: hparams used
    :return: tf.estimator.EstimatorSpec
    """
    query_field = features.get('query', None)

    uid = features.get('uid', None)

    weight = features.get('weight', None)
    wide_ftrs = features.get('wide_ftrs', None)

    wide_ftrs_sp_idx = features.get('wide_ftrs_sp_idx', None)
    wide_ftrs_sp_val = features.get('wide_ftrs_sp_val', None)

    doc_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('doc_')]
    if len(doc_fields) == 0:
        doc_fields = None

    usr_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usr_')]
    if len(usr_fields) == 0:
        usr_fields = None

    doc_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('docId_')]
    if len(doc_id_fields) == 0:
        doc_id_fields = None

    usr_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usrId_')]
    if len(usr_id_fields) == 0:
        usr_id_fields = None

    label_field = labels['label'] if mode != tf.estimator.ModeKeys.PREDICT else None
    labels_passthrough = features['label']

    group_size_field = features['group_size'] if mode != tf.estimator.ModeKeys.PREDICT else None

    # For multitask training
    task_id_field = features.get('task_id', None)  # shape=[batch_size,]

    # Update the weight with each task's weight such that weight per document = weight * task_weight
    if params.task_ids is not None:
        task_ids = params.task_ids  # e.g. [0, 1, 2]
        task_weights = params.task_weights  # e.g. [0.1, 0.3, 0.6]
        # Expand task_id_field with shape [batch_size, num_tasks]
        expanded_task_id_field = tf.transpose(tf.broadcast_to(task_id_field, [len(task_ids), tf.shape(task_id_field)[0]]))
        task_mask = tf.cast(tf.equal(expanded_task_id_field, task_ids), dtype=tf.float32)
        weight *= tf.reduce_sum(task_mask * task_weights, 1)  # shape=[batch_size,]

    # build graph
    model = DeepMatch(query=query_field,
                      wide_ftrs=wide_ftrs,
                      doc_fields=doc_fields,
                      usr_fields=usr_fields,
                      doc_id_fields=doc_id_fields,
                      usr_id_fields=usr_id_fields,
                      hparams=params,
                      mode=mode,
                      wide_ftrs_sp_idx=wide_ftrs_sp_idx,
                      wide_ftrs_sp_val=wide_ftrs_sp_val,
                      task_id_field=task_id_field)

    if mode == tf.estimator.ModeKeys.TRAIN:
        loss = compute_loss(params, model.scores, label_field, group_size_field, weight)
        train_op, _, _ = optimization.create_optimizer(params, loss)
        global_step = tf.train.get_global_step()
        train_tensors_log = {'loss': loss, 'global_step': global_step}
        logging_hook = tf.train.LoggingTensorHook(train_tensors_log, every_n_iter=10)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[logging_hook])

    elif mode == tf.estimator.ModeKeys.EVAL:
        loss = compute_loss(params, model.scores, label_field, group_size_field, weight)
        eval_metric_ops = {}
        for metric_name in params.all_metrics:
            metric_op_name = 'metric/{}'.format(metric_name)
            topk = int(metric_name.split('@')[1]) if '@' in metric_name else 10  # Default topk
            if metric_name.startswith('ndcg'):
                metric = metrics.compute_ndcg_tfr(model.scores, label_field, features, topk)
            elif metric_name.startswith('mrr'):
                metric = metrics.compute_mrr_tfr(model.scores, label_field, features)
            elif metric_name.startswith('precision'):
                metric = metrics.compute_precision_tfr(model.scores, label_field, features, topk)
            elif metric_name.startswith('traditional_ndcg'):
                metric = metrics.compute_ndcg(model.scores, label_field, group_size_field, topk)
            elif metric_name.startswith('li_mrr'):
                metric = metrics.compute_mrr(model.scores, labels['label'], features['group_size'], topk)
            elif metric_name == 'auc':
                metric = metrics.compute_auc(model.scores, label_field)
            elif metric_name == 'accuracy':
                metric = metrics.compute_accuracy(model.scores, label_field)
            elif metric_name == 'confusion_matrix':
                metric = metrics.compute_confusion_matrix(model.scores, label_field, params.num_classes)
            else:
                raise ValueError(f"Unsupported metrics: {metric_name}")
            eval_metric_ops[metric_op_name] = metric
        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    elif mode == tf.estimator.ModeKeys.PREDICT:
        # Prediction field for scoring models
        predictions = {
            'uid': uid,
            'scores': model.original_scores,
            'weight': weight,
            'label': labels_passthrough
        }
        # multiclass classification: export the probabilities across classes by applying softmax
        if params.num_classes > 1:
            predictions['multiclass_probabilities'] = tf.nn.softmax(model.scores)

        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        # Provide an estimator spec for `ModeKeys.PREDICT` mode.
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)
    else:
        raise ValueError("Only support mode as TRAIN/EVAL/PREDICT")