Пример #1
0
    def __init__(self, config: configure_pretraining.PretrainingConfig,
                 features, is_training):
        # Set up model config
        self._config = config
        self._bert_config = training_utils.get_bert_config(config)
        if config.debug:
            self._bert_config.num_hidden_layers = 3
            self._bert_config.hidden_size = 144
            self._bert_config.intermediate_size = 144 * 4
            self._bert_config.num_attention_heads = 4

        # Mask the input
        masked_inputs = pretrain_helpers.mask(
            config, pretrain_data.features_to_inputs(features),
            config.mask_prob)

        # Generator
        embedding_size = (self._bert_config.hidden_size
                          if config.embedding_size is None else
                          config.embedding_size)
        if config.uniform_generator:
            mlm_output = self._get_masked_lm_output(masked_inputs, None)
        elif config.electra_objective and config.untied_generator:
            generator = self._build_transformer(
                masked_inputs,
                is_training,
                bert_config=get_generator_config(config, self._bert_config),
                embedding_size=(None if config.untied_generator_embeddings else
                                embedding_size),
                untied_embeddings=config.untied_generator_embeddings,
                name="generator",
            )
            mlm_output = self._get_masked_lm_output(masked_inputs, generator)
        else:
            generator = self._build_transformer(masked_inputs,
                                                is_training,
                                                embedding_size=embedding_size)
            mlm_output = self._get_masked_lm_output(masked_inputs, generator)
        fake_data = self._get_fake_data(masked_inputs, mlm_output.logits)
        self.mlm_output = mlm_output
        self.total_loss = config.gen_weight * mlm_output.loss

        # Discriminator
        disc_output = None
        if config.electra_objective:
            discriminator = self._build_transformer(
                fake_data.inputs,
                is_training,
                reuse=not config.untied_generator,
                embedding_size=embedding_size,
            )
            disc_output = self._get_discriminator_output(
                fake_data.inputs, discriminator, fake_data.is_fake_tokens)
            self.total_loss += config.disc_weight * disc_output.loss

        # Evaluation
        eval_fn_inputs = {
            "input_ids": masked_inputs.input_ids,
            "masked_lm_preds": mlm_output.preds,
            "mlm_loss": mlm_output.per_example_loss,
            "masked_lm_ids": masked_inputs.masked_lm_ids,
            "masked_lm_weights": masked_inputs.masked_lm_weights,
            "input_mask": masked_inputs.input_mask,
        }
        if config.electra_objective:
            eval_fn_inputs.update({
                "disc_loss":
                disc_output.per_example_loss,
                "disc_labels":
                disc_output.labels,
                "disc_probs":
                disc_output.probs,
                "disc_preds":
                disc_output.preds,
                "sampled_tokids":
                tf.argmax(fake_data.sampled_tokens, -1, output_type=tf.int32),
            })
        eval_fn_keys = eval_fn_inputs.keys()
        eval_fn_values = [eval_fn_inputs[k] for k in eval_fn_keys]

        def metric_fn(*args):
            """Computes the loss and accuracy of the model."""
            d = {k: arg for k, arg in zip(eval_fn_keys, args)}
            metrics = dict()
            metrics["masked_lm_accuracy"] = tf.metrics.accuracy(
                labels=tf.reshape(d["masked_lm_ids"], [-1]),
                predictions=tf.reshape(d["masked_lm_preds"], [-1]),
                weights=tf.reshape(d["masked_lm_weights"], [-1]),
            )
            metrics["masked_lm_loss"] = tf.metrics.mean(
                values=tf.reshape(d["mlm_loss"], [-1]),
                weights=tf.reshape(d["masked_lm_weights"], [-1]),
            )
            if config.electra_objective:
                metrics["sampled_masked_lm_accuracy"] = tf.metrics.accuracy(
                    labels=tf.reshape(d["masked_lm_ids"], [-1]),
                    predictions=tf.reshape(d["sampled_tokids"], [-1]),
                    weights=tf.reshape(d["masked_lm_weights"], [-1]),
                )
                if config.disc_weight > 0:
                    metrics["disc_loss"] = tf.metrics.mean(d["disc_loss"])
                    metrics["disc_auc"] = tf.metrics.auc(
                        d["disc_labels"] * d["input_mask"],
                        d["disc_probs"] * tf.cast(d["input_mask"], tf.float32),
                    )
                    metrics["disc_accuracy"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["input_mask"],
                    )
                    metrics["disc_precision"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["disc_preds"] * d["input_mask"],
                    )
                    metrics["disc_recall"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["disc_labels"] * d["input_mask"],
                    )
            return metrics

        self.eval_metrics = (metric_fn, eval_fn_values)
Пример #2
0
def _calculate_eval_metrics_fn(loss, label_ids, logits, input_mask,
                               aggregation_function_id, logits_aggregation,
                               classification_class_index, logits_cls):
    """Calculates metrics for both cells and aggregation functions."""
    logits.shape.assert_has_rank(2)
    label_ids.shape.assert_has_rank(2)

    # <int32>[batch size, seq_length]
    predictions = tf.where(logits >= 0, tf.ones_like(logits, dtype=tf.int32),
                           tf.zeros_like(logits, dtype=tf.int32))
    input_mask_float = tf.cast(input_mask, tf.float32)

    loss = tf.metrics.mean(values=loss)
    accuracy = tf.metrics.accuracy(labels=label_ids,
                                   predictions=predictions,
                                   weights=input_mask_float)

    # <bool>[batch size, seq_length]
    token_correct = tf.logical_or(tf.equal(label_ids, predictions),
                                  tf.logical_not(tf.cast(input_mask, tf.bool)))
    # <bool>[batch size]
    per_sequence_accuracy = tf.reduce_all(token_correct, axis=1)
    sequence_accuracy = tf.metrics.mean(values=per_sequence_accuracy)

    probs = tf.sigmoid(logits)
    precision = tf.metrics.precision(labels=label_ids,
                                     predictions=predictions,
                                     weights=input_mask_float)
    recall = tf.metrics.recall(labels=label_ids,
                               predictions=predictions,
                               weights=input_mask_float)
    auc = tf.metrics.auc(labels=label_ids, predictions=probs)
    mean_label = tf.metrics.mean(values=tf.cast(label_ids, tf.float32),
                                 weights=input_mask_float)

    metrics = {
        "eval_loss": loss,
        "eval_accuracy": accuracy,
        "eval_sequence_accuracy": sequence_accuracy,
        "eval_precision": precision,
        "eval_recall": recall,
        "eval_auc": auc,
        "eval_mean_label": mean_label,
    }

    if logits_cls is not None:
        # <int32>[batch size]
        predictions_cls = tf.argmax(logits_cls, axis=-1, output_type=tf.int32)
        accuracy_cls = tf.metrics.accuracy(labels=classification_class_index,
                                           predictions=predictions_cls)
        metrics.update({
            "eval_classification_accuracy": accuracy_cls,
        })

    if logits_aggregation is not None:
        # <int32>[batch size]
        predictions_agg = tf.argmax(logits_aggregation,
                                    axis=-1,
                                    output_type=tf.int32)
        accuracy_agg = tf.metrics.accuracy(labels=aggregation_function_id,
                                           predictions=predictions_agg)
        # <bool>[batch size]
        per_sequence_agg_accuracy = tf.equal(aggregation_function_id,
                                             predictions_agg)
        # Whether cells and aggregation function predictions are both correct.
        per_sequence_joint_accuracy = tf.logical_and(per_sequence_agg_accuracy,
                                                     per_sequence_accuracy)
        joint_accuracy = tf.metrics.mean(values=per_sequence_joint_accuracy)
        metrics.update({
            "eval_aggregation_accuracy": accuracy_agg,
            "eval_joint_accuracy": joint_accuracy,
        })
    return metrics
Пример #3
0
def model():
    print(' model')

    batch_size = 100
    features = 32 * 32
    categories = 4
    hidden_layer_nodes_1 = 100
    hidden_layer_nodes_2 = 50
    x = tf.placeholder(tf.float32, [None, features])
    y_ = tf.placeholder(tf.float32, [None, categories])

    W1 = tf.Variable(tf.truncated_normal([features, hidden_layer_nodes_1], stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1, shape=[hidden_layer_nodes_1]))
    z1 = tf.nn.relu(tf.matmul(x,W1)+b1)
    W2 = tf.Variable(tf.truncated_normal([hidden_layer_nodes_1, hidden_layer_nodes_2], stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1, shape=[hidden_layer_nodes_2]))
    z2 = tf.nn.relu(tf.matmul(z1, W2) + b2)
    W3 = tf.Variable(tf.truncated_normal([hidden_layer_nodes_2, categories], stddev=0.1))
    b3 = tf.Variable(tf.constant(0.1, shape=[categories]))
    z3 = tf.matmul(z2, W3) + b3
    y = tf.nn.softmax(tf.matmul(z2, W3) + b3)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(y_, z3))

    update = tf.train.AdamOptimizer(0.0001).minimize(loss)
    # data_x = dataX(features, r'Road-Signs-Project\dataset2\train\[0-3]')
    # print("datax: ", data_x)
    # data_y = dataY(categories, r'Road-Signs-Project\dataset2\train\[0-3]')
    # print("datay: ", data_y)
    # data_x_test = dataX(features, r'Road-Signs-Project\dataset2\test\[0-3]')
    # data_y_test = dataY(categories, r'Road-Signs-Project\dataset2\test\[0-3]')
    # data_x_validation = dataX(features, r'Road-Signs-Project\dataset2\validation\[0-3]')
    # data_y_validation = dataY(categories, r'Road-Signs-Project\dataset2\validation\[0-3]')

    data_x = np.load(r't_x.npy')  # dataX(features, r'dataset2\train\[0-42]')
    data_x = data_x.astype(int)
    print("datax: ", data_x)
    data_y = np.load(r't_y.npy')  # dataY(categories, r'dataset2\train\[0-42]')
    data_y = data_y.astype(int)
    print("datay: ", data_y)
    data_x_test = np.load(r'test_x.npy')  # dataX(features, r'dataset2\test\[0-42]')
    data_x_test = data_x_test.astype(int)
    data_y_test = np.load(r'test_y.npy')  # dataY(categories, r'dataset2\test\[0-42]')
    data_y_test = data_y_test.astype(int)
    data_x_validation = np.load(r'v_x.npy')  # dataX(features, r'dataset2\validation\[0-42]')
    data_x_validation = data_x_validation.astype(int)
    data_y_validation = np.load(r'v_y.npy')  # dataY(categories, r'dataset2\validation\[0-42]')
    data_y_validation = data_y_validation.astype(int)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    first = 1
    while(first == 1 or accuracy.eval(session=sess, feed_dict={x: data_x_validation, y_: data_y_validation}) < 0.975):
        first = 0
        for i in range(0, 1000):
            total_batch = int(len(data_x) / batch_size)
            for j in range(total_batch):
                batch_xs, batch_ys = next_batch(batch_size,data_x,data_y)
                sess.run(update, feed_dict={x: batch_xs, y_: batch_ys})
            if i % 100 == 0:
                print("Iteration:", i, ",      Loss: ", loss.eval(session=sess, feed_dict = {x:data_x, y_:data_y}))
            if i==999:
                # print("W: ", sess.run(W1), ",       b: ", sess.run(b1))
                correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
                # Calculate accuracy
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
                print("Accuracy train:", accuracy.eval(session=sess, feed_dict={x: data_x, y_: data_y}))
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        print("Accuracy validation:", accuracy.eval(session=sess, feed_dict={x: data_x_validation, y_: data_y_validation}))

    print("The model is ready!")
    # Test model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy test:", accuracy.eval(session=sess, feed_dict = {x: data_x_test, y_: data_y_test}))
    for i in range(len(data_x_test)):
        print('Prediction for: "', data_x_test[i], '": ', sess.run(y, feed_dict={x: [data_x_test[i]]}), ',  Max value: ', max(sess.run(y, feed_dict={x: [data_x_test[i]]})[0]), ',  Sum: ', sum(sess.run(y, feed_dict={x: [data_x_test[i]]})[0]), ', real class: ', data_y_test[i])


    for i in range(len(data_x_test)):
        print('Prediction for: "', data_x_test[i], '": ', sess.run(y, feed_dict={x: [data_x_test[i]]}), ',  Max value: ', max(sess.run(y, feed_dict={x: [data_x_test[i]]})[0]), ',  Sum: ', sum(sess.run(y, feed_dict={x: [data_x_test[i]]})[0]), ', real class: ', data_y_test[i])
Пример #4
0
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(
                BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())
            print(is_training_pl)

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            pred, end_points = MODEL.get_model(pointclouds_pl,
                                               is_training_pl,
                                               bn_decay=bn_decay)
            loss = MODEL.get_loss(pred, labels_pl, end_points)
            tf.summary.scalar('loss', loss)

            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct,
                                             tf.float32)) / float(BATCH_SIZE)
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        #merged = tf.merge_all_summaries()
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # Init variables
        init = tf.global_variables_initializer()
        # To fix the bug introduced in TF 0.12.1 as in
        # http://stackoverflow.com/questions/41543774/invalidargumenterror-for-tensor-bool-tensorflow-0-12-1
        #sess.run(init)
        sess.run(init, {is_training_pl: True})

        ops = {
            'pointclouds_pl': pointclouds_pl,
            'labels_pl': labels_pl,
            'is_training_pl': is_training_pl,
            'pred': pred,
            'loss': loss,
            'train_op': train_op,
            'merged': merged,
            'step': batch
        }

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
Пример #5
0
def model_eval(sess,
               x,
               y,
               predictions,
               X_test=None,
               Y_test=None,
               feed=None,
               args=None):
    """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
    global _model_eval_cache
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    key = (y, predictions)
    if key in _model_eval_cache:
        correct_preds = _model_eval_cache[key]
    else:
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
        _model_eval_cache[key] = correct_preds

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size, ) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size, ) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

            accuracy += cur_corr_preds[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

    return accuracy
# ---------------------------网络结束---------------------------
def regularizer(a):
    return ((tf.nn.l2_loss(a) * 2)**0.5) * 0.5 * 0.0001


# regularizer = tf.contrib.layers.l2_regularizer(0.0001)
logits = inference(x, False, regularizer)

# (小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor
b = tf.constant(value=1, dtype=tf.float32)
logits_eval = tf.multiply(logits, b, name='logits_eval')

loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# 定义一个函数,按批次取数据
def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batch_size]
        else:
            excerpt = slice(start_idx, start_idx + batch_size)
        yield inputs[excerpt], targets[excerpt]
Пример #7
0
def metric_fn(labels, logits):
  accuracy = tf.metrics.accuracy(
      labels=labels, predictions=tf.argmax(logits, axis=1))
  return {"accuracy": accuracy}
Пример #8
0
    def build():
        """Builds the Tensorflow graph."""
        inputs, labels, lengths = None, None, None

        if mode in ('train', 'eval'):
            if isinstance(no_event_label, numbers.Number):
                label_shape = []
            else:
                label_shape = [len(no_event_label)]
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                label_shape=label_shape,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        if isinstance(encoder_decoder,
                      magenta.music.OneHotIndexEventSequenceEncoderDecoder):
            expanded_inputs = tf.one_hot(
                tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
                encoder_decoder.input_depth)
        else:
            expanded_inputs = inputs

        dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

        cell = make_rnn_cell(hparams.rnn_layer_sizes,
                             dropout_keep_prob=dropout_keep_prob,
                             attn_length=hparams.attn_length,
                             residual_connections=hparams.residual_connections)

        initial_state = cell.zero_state(hparams.batch_size, tf.float32)

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 expanded_inputs,
                                                 sequence_length=lengths,
                                                 initial_state=initial_state,
                                                 swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        if isinstance(num_classes, numbers.Number):
            num_logits = num_classes
        else:
            num_logits = sum(num_classes)
        logits_flat = tf_slim.layers.linear(outputs_flat, num_logits)

        if mode in ('train', 'eval'):
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            if isinstance(num_classes, numbers.Number):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
                predictions_flat = tf.argmax(logits_flat, axis=1)
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax_cross_entropy = []
                predictions = []
                for i in range(len(num_classes)):
                    softmax_cross_entropy.append(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            labels=labels_flat[:, i],
                            logits=logits_flat[:, logits_offsets[i]:
                                               logits_offsets[i + 1]]))
                    predictions.append(
                        tf.argmax(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            axis=1))
                predictions_flat = tf.stack(predictions, 1)

            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            # Compute the total number of time steps across all sequences in the
            # batch. For some models this will be different from the number of RNN
            # steps.
            def batch_labels_to_num_steps(batch_labels, lengths):
                num_steps = 0
                for labels, length in zip(batch_labels, lengths):
                    num_steps += encoder_decoder.labels_to_num_steps(
                        labels[:length])
                return np.float32(num_steps)

            num_steps = tf.py_func(batch_labels_to_num_steps,
                                   [labels, lengths], tf.float32)

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                loss_per_step = tf.reduce_sum(
                    softmax_cross_entropy) / num_steps
                perplexity_per_step = tf.exp(loss_per_step)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = tf_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                    'metrics/loss_per_step': loss_per_step,
                    'metrics/perplexity_per_step': perplexity_per_step,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = tf_slim.metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                        'metrics/loss_per_step':
                        tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) /
                                        num_steps,
                                        weights=num_steps),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])
                vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
                    vars_to_summarize['metrics/loss_per_step'])

            for var_name, var_value in vars_to_summarize.items():
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            if isinstance(num_classes, numbers.Number):
                softmax_flat = tf.nn.softmax(
                    tf.div(logits_flat, tf.fill([num_classes], temperature)))
                softmax = tf.reshape(softmax_flat,
                                     [hparams.batch_size, -1, num_classes])
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax = []
                for i in range(len(num_classes)):
                    sm = tf.nn.softmax(
                        tf.div(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            tf.fill([num_classes[i]], temperature)))
                    sm = tf.reshape(sm,
                                    [hparams.batch_size, -1, num_classes[i]])
                    softmax.append(sm)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf.nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf.nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
Пример #9
0
def main(unused_argv):

    # Load training and eval data.

    train_file = "data/train.csv"
    val_file = "data/val.csv"
    test_file = "data/test.csv"

    # Define the TabNet model
    tabnet_forest_covertype = tabnet_model.TabNet(
        columns=data_helper_covertype.get_columns(),
        num_features=data_helper_covertype.num_features,
        feature_dim=128,
        output_dim=64,
        num_decision_steps=6,
        relaxation_factor=1.5,
        batch_momentum=0.7,
        virtual_batch_size=512,
        num_classes=data_helper_covertype.num_classes)

    column_names = sorted(data_helper_covertype.feature_columns)
    print(
        "Ordered column names, corresponding to the indexing in Tensorboard visualization"
    )
    for fi in range(len(column_names)):
        print(str(fi) + " : " + column_names[fi])

    # Training parameters
    max_steps = 1000000
    display_step = 5000
    val_step = 10000
    save_step = 40000
    init_localearning_rate = 0.02
    decay_every = 500
    decay_rate = 0.95
    batch_size = 16384
    sparsity_loss_weight = 0.0001
    gradient_thresh = 2000.0

    # Input sampling
    train_batch = data_helper_covertype.input_fn(train_file,
                                                 num_epochs=100000,
                                                 shuffle=True,
                                                 batch_size=batch_size)
    val_batch = data_helper_covertype.input_fn(
        val_file,
        num_epochs=10000,
        shuffle=False,
        batch_size=data_helper_covertype.n_val_samples)
    test_batch = data_helper_covertype.input_fn(
        test_file,
        num_epochs=10000,
        shuffle=False,
        batch_size=data_helper_covertype.n_test_samples)

    train_iter = train_batch.make_initializable_iterator()
    val_iter = val_batch.make_initializable_iterator()
    test_iter = test_batch.make_initializable_iterator()

    feature_train_batch, label_train_batch = train_iter.get_next()
    feature_val_batch, label_val_batch = val_iter.get_next()
    feature_test_batch, label_test_batch = test_iter.get_next()

    # Define the model and losses

    encoded_train_batch, total_entropy = tabnet_forest_covertype.encoder(
        feature_train_batch, reuse=False, is_training=True)

    logits_orig_batch, _ = tabnet_forest_covertype.classify(
        encoded_train_batch, reuse=False)

    softmax_orig_key_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_orig_batch, labels=label_train_batch))

    train_loss_op = softmax_orig_key_op + sparsity_loss_weight * total_entropy
    tf.summary.scalar("Total loss", train_loss_op)

    # Optimization step
    global_step = tf.train.get_or_create_global_step()
    learning_rate = tf.train.exponential_decay(init_localearning_rate,
                                               global_step=global_step,
                                               decay_steps=decay_every,
                                               decay_rate=decay_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        gvs = optimizer.compute_gradients(train_loss_op)
        capped_gvs = [(tf.clip_by_value(grad, -gradient_thresh,
                                        gradient_thresh), var)
                      for grad, var in gvs]
        train_op = optimizer.apply_gradients(capped_gvs,
                                             global_step=global_step)

    # Model evaluation

    # Validation performance
    encoded_val_batch, _ = tabnet_forest_covertype.encoder(feature_val_batch,
                                                           reuse=True,
                                                           is_training=False)

    _, prediction_val = tabnet_forest_covertype.classify(encoded_val_batch,
                                                         reuse=True)

    predicted_labels = tf.cast(tf.argmax(prediction_val, 1), dtype=tf.int32)
    val_eq_op = tf.equal(predicted_labels, label_val_batch)
    val_acc_op = tf.reduce_mean(tf.cast(val_eq_op, dtype=tf.float32))
    tf.summary.scalar("Val accuracy", val_acc_op)

    # Test performance
    encoded_test_batch, _ = tabnet_forest_covertype.encoder(feature_test_batch,
                                                            reuse=True,
                                                            is_training=False)

    _, prediction_test = tabnet_forest_covertype.classify(encoded_test_batch,
                                                          reuse=True)

    predicted_labels = tf.cast(tf.argmax(prediction_test, 1), dtype=tf.int32)
    test_eq_op = tf.equal(predicted_labels, label_test_batch)
    test_acc_op = tf.reduce_mean(tf.cast(test_eq_op, dtype=tf.float32))
    tf.summary.scalar("Test accuracy", test_acc_op)

    # Training setup
    model_name = "tabnet_forest_covertype_model"
    init = tf.initialize_all_variables()
    init_local = tf.local_variables_initializer()
    init_table = tf.tables_initializer(name="Initialize_all_tables")
    saver = tf.train.Saver()
    summaries = tf.summary.merge_all()

    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter("./tflog/" + model_name,
                                               sess.graph)

        sess.run(init)
        sess.run(init_local)
        sess.run(init_table)
        sess.run(train_iter.initializer)
        sess.run(val_iter.initializer)
        sess.run(test_iter.initializer)

        for step in range(1, max_steps + 1):
            if step % display_step == 0:
                _, train_loss, merged_summary = sess.run(
                    [train_op, train_loss_op, summaries])
                summary_writer.add_summary(merged_summary, step)
                print("Step " + str(step) + " , Training Loss = " +
                      "{:.4f}".format(train_loss))
            else:
                _ = sess.run(train_op)

            if step % val_step == 0:
                feed_arr = [
                    vars()["summaries"],
                    vars()["val_acc_op"],
                    vars()["test_acc_op"]
                ]

                val_arr = sess.run(feed_arr)
                merged_summary = val_arr[0]
                val_acc = val_arr[1]

                print("Step " + str(step) + " , Val Accuracy = " +
                      "{:.4f}".format(val_acc))
                summary_writer.add_summary(merged_summary, step)

            if step % save_step == 0:
                saver.save(sess, "./checkpoints/" + model_name + ".ckpt")
Пример #10
0
def build_model_fn(features, labels, mode, params):
  """The model_fn for MnasNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
      `params['batch_size']` is always provided and should be used as the
      effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  # This is essential, if using a keras-derived model.
  tf.keras.backend.set_learning_phase(is_training)

  if isinstance(features, dict):
    features = features['feature']

  if mode == tf.estimator.ModeKeys.PREDICT:
    # Adds an identify node to help TFLite export.
    features = tf.identity(features, 'float_image_input')

  # In most cases, the default data format NCHW instead of NHWC should be
  # used for a significant performance boost on GPU. NHWC should be used
  # only if the network needs to be run on CPU since the pooling operations
  # are only supported on NHWC. TPU uses XLA compiler to figure out best layout.
  if params['data_format'] == 'channels_first':
    assert not params['transpose_input']    # channels_first only for GPU
    features = tf.transpose(features, [0, 3, 1, 2])
    stats_shape = [3, 1, 1]
  else:
    stats_shape = [1, 1, 3]

  if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
    features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

  # Normalize the image to zero mean and unit variance.
  features -= tf.constant(
      imagenet_input.MEAN_RGB, shape=stats_shape, dtype=features.dtype)
  features /= tf.constant(
      imagenet_input.STDDEV_RGB, shape=stats_shape, dtype=features.dtype)

  has_moving_average_decay = (params['moving_average_decay'] > 0)

  tf.logging.info('Using open-source implementation for MnasNet definition.')
  override_params = {}
  if params['batch_norm_momentum']:
    override_params['batch_norm_momentum'] = params['batch_norm_momentum']
  if params['batch_norm_epsilon']:
    override_params['batch_norm_epsilon'] = params['batch_norm_epsilon']
  if params['dropout_rate']:
    override_params['dropout_rate'] = params['dropout_rate']
  if params['data_format']:
    override_params['data_format'] = params['data_format']
  if params['num_label_classes']:
    override_params['num_classes'] = params['num_label_classes']
  if params['depth_multiplier']:
    override_params['depth_multiplier'] = params['depth_multiplier']
  if params['depth_divisor']:
    override_params['depth_divisor'] = params['depth_divisor']
  if params['min_depth']:
    override_params['min_depth'] = params['min_depth']
  override_params['use_keras'] = params['use_keras']

  def _build_model(model_name):
    """Build the model for a given model name."""
    if model_name.startswith('mnasnet'):
      return mnasnet_models.build_mnasnet_model(
          features,
          model_name=model_name,
          training=is_training,
          override_params=override_params)
    elif model_name.startswith('mixnet'):
      return mixnet_builder.build_model(
          features,
          model_name=model_name,
          training=is_training,
          override_params=override_params)
    else:
      raise ValueError('Unknown model name {}'.format(model_name))

  if params['precision'] == 'bfloat16':
    with tf.tpu.bfloat16_scope():
      logits, _ = _build_model(params['model_name'])
    logits = tf.cast(logits, tf.float32)
  else:  # params['precision'] == 'float32'
    logits, _ = _build_model(params['model_name'])

  if params['quantized_training']:
    try:
      from tensorflow.contrib import quantize  # pylint: disable=g-import-not-at-top
    except ImportError as e:
      logging.exception('Quantized training is not supported in TensorFlow 2.x')
      raise e

    if is_training:
      tf.logging.info('Adding fake quantization ops for training.')
      quantize.create_training_graph(
          quant_delay=int(params['steps_per_epoch'] *
                          FLAGS.quantization_delay_epochs))
    else:
      tf.logging.info('Adding fake quantization ops for evaluation.')
      quantize.create_eval_graph()

  if mode == tf.estimator.ModeKeys.PREDICT:
    scaffold_fn = None
    if FLAGS.export_moving_average:
      # If the model is trained with moving average decay, to match evaluation
      # metrics, we need to export the model using moving average variables.
      restore_checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir)
      variables_to_restore = get_pretrained_variables_to_restore(
          restore_checkpoint, load_moving_average=True)
      tf.logging.info('Restoring from the latest checkpoint: %s',
                      restore_checkpoint)
      tf.logging.info(str(variables_to_restore))

      def restore_scaffold():
        saver = tf.train.Saver(variables_to_restore)
        return tf.train.Scaffold(saver=saver)

      scaffold_fn = restore_scaffold

    predictions = {
        'classes': tf.argmax(logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }
    return tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        },
        scaffold_fn=scaffold_fn)

  # If necessary, in the model_fn, use params['batch_size'] instead the batch
  # size flags (--train_batch_size or --eval_batch_size).
  batch_size = params['batch_size']  # pylint: disable=unused-variable

  # Calculate loss, which includes softmax cross entropy and L2 regularization.
  one_hot_labels = tf.one_hot(labels, params['num_label_classes'])
  cross_entropy = tf.losses.softmax_cross_entropy(
      logits=logits,
      onehot_labels=one_hot_labels,
      label_smoothing=params['label_smoothing'])

  # Add weight decay to the loss for non-batch-normalization variables.
  loss = cross_entropy + params['weight_decay'] * tf.add_n([
      tf.nn.l2_loss(v)
      for v in tf.trainable_variables()
      if 'batch_normalization' not in v.name
  ])

  global_step = tf.train.get_global_step()
  if has_moving_average_decay:
    ema = tf.train.ExponentialMovingAverage(
        decay=params['moving_average_decay'], num_updates=global_step)
    ema_vars = mnas_utils.get_ema_vars()

  host_call = None
  if is_training:
    # Compute the current epoch and associated learning rate from global_step.
    current_epoch = (
        tf.cast(global_step, tf.float32) / params['steps_per_epoch'])

    scaled_lr = params['base_learning_rate'] * (params['train_batch_size'] / 256.0)  # pylint: disable=line-too-long
    learning_rate = mnas_utils.build_learning_rate(scaled_lr, global_step,
                                                   params['steps_per_epoch'])
    optimizer = mnas_utils.build_optimizer(learning_rate)
    if params['use_tpu']:
      # When using TPU, wrap the optimizer with CrossShardOptimizer which
      # handles synchronization details between different TPU cores. To the
      # user, this should look like regular synchronous training.
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch normalization requires UPDATE_OPS to be added as a dependency to
    # the train operation.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step)

    if has_moving_average_decay:
      with tf.control_dependencies([train_op]):
        train_op = ema.apply(ema_vars)

    if not params['skip_host_call']:

      def host_call_fn(gs, loss, lr, ce):
        """Training host call.

        Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        # Host call fns are executed params['iterations_per_loop'] times after
        # one TPU loop is finished, setting max_queue value to the same as
        # number of iterations will make the summary writer only flush the
        # data to storage once per loop.
        with tf2.summary.create_file_writer(
            FLAGS.model_dir,
            max_queue=params['iterations_per_loop']).as_default():
          with tf2.summary.record_if(True):
            tf2.summary.scalar('loss', loss[0], step=gs)
            tf2.summary.scalar('learning_rate', lr[0], step=gs)
            tf2.summary.scalar('current_epoch', ce[0], step=gs)

            return tf.summary.all_v2_summary_ops()

      # To log the loss, current learning rate, and epoch for Tensorboard, the
      # summary op needs to be run on the host CPU via host_call. host_call
      # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
      # dimension. These Tensors are implicitly concatenated to
      # [params['batch_size']].
      gs_t = tf.reshape(global_step, [1])
      loss_t = tf.reshape(loss, [1])
      lr_t = tf.reshape(learning_rate, [1])
      ce_t = tf.reshape(current_epoch, [1])

      host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(labels, logits):
      """Evaluation metric function.

      Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
      predictions = tf.argmax(logits, axis=1)
      top_1_accuracy = tf.metrics.accuracy(labels, predictions)
      in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
      top_5_accuracy = tf.metrics.mean(in_top_5)

      return {
          'top_1_accuracy': top_1_accuracy,
          'top_5_accuracy': top_5_accuracy,
      }

    eval_metrics = (metric_fn, [labels, logits])

  num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()])
  tf.logging.info('number of trainable parameters: {}'.format(num_params))

  # Prepares scaffold_fn if needed.
  scaffold_fn = None
  if is_training and FLAGS.init_checkpoint:
    variables_to_restore = get_pretrained_variables_to_restore(
        FLAGS.init_checkpoint, has_moving_average_decay)
    tf.logging.info('Initializing from pretrained checkpoint: %s',
                    FLAGS.init_checkpoint)
    if FLAGS.use_tpu:

      def init_scaffold():
        tf.train.init_from_checkpoint(FLAGS.init_checkpoint,
                                      variables_to_restore)
        return tf.train.Scaffold()

      scaffold_fn = init_scaffold
    else:
      tf.train.init_from_checkpoint(FLAGS.init_checkpoint, variables_to_restore)

  restore_vars_dict = None
  if not is_training and has_moving_average_decay:
    # Load moving average variables for eval.
    restore_vars_dict = ema.variables_to_restore(ema_vars)

    def eval_scaffold():
      saver = tf.train.Saver(restore_vars_dict)
      return tf.train.Scaffold(saver=saver)

    scaffold_fn = eval_scaffold

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      host_call=host_call,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)
def bert_model_fn(features, labels, is_training):  # pylint: disable=unused-argument
  """The `model_fn` for LowLevelRunner."""

  tf.logging.info("*** Features ***")
  for name in sorted(features.keys()):
    tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

  input_ids = features["input_ids"]
  input_mask = features["input_mask"]
  segment_ids = features["segment_ids"]
  masked_lm_positions = features["masked_lm_positions"]
  masked_lm_ids = features["masked_lm_ids"]
  masked_lm_weights = features["masked_lm_weights"]
  next_sentence_labels = features["next_sentence_labels"]

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
  use_one_hot_embeddings = False
  learning_rate = FLAGS.learning_rate
  num_train_steps = FLAGS.num_train_steps
  num_warmup_steps = FLAGS.num_warmup_steps
  start_warmup_step = FLAGS.start_warmup_step
  num_train_steps = FLAGS.num_train_steps
  use_tpu = FLAGS.use_tpu
  optimizer = FLAGS.optimizer
  poly_power = FLAGS.poly_power
  lamb_weight_decay_rate = FLAGS.lamb_weight_decay_rate
  lamb_beta_1 = FLAGS.lamb_beta_1
  lamb_beta_2 = FLAGS.lamb_beta_2
  log_epsilon = FLAGS.log_epsilon

  tf.logging.info("Using learning rate: %s", learning_rate)
  print("Using learning rate:", learning_rate)
  tf.logging.info("Using lamb_weight_decay_rate: %s", lamb_weight_decay_rate)
  print("Using lamb_weight_decay_rate:", lamb_weight_decay_rate)
  tf.logging.info("Using beta 1: %s", lamb_beta_1)
  print("Using beta 1:", lamb_beta_1)
  tf.logging.info("Using beta 2: %s", lamb_beta_2)
  print("Using beta 2:", lamb_beta_2)
  tf.logging.info("Using log_epsilon: %s", log_epsilon)
  print("Using log_epsilon:", log_epsilon)
  tf.logging.info("Using num_warmup_steps: %s", num_warmup_steps)
  print("Using num_warmup_steps:", num_warmup_steps)
  tf.logging.info("Using num_train_steps: %s", num_train_steps)
  print("Using num_train_steps:", num_train_steps)

  tf.get_variable_scope().set_custom_getter(
      modeling.bfloat16_var_getter if FLAGS.use_bfloat16_activation else None)

  if FLAGS.use_bfloat16_activation:
    tf.logging.info("Using bfloat16 for activations.")

  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      use_bfloat16_activation=FLAGS.use_bfloat16_activation,
      num_partitions=FLAGS.num_partitions)

  (masked_lm_loss, masked_lm_example_loss,
   masked_lm_log_probs) = get_masked_lm_output(
       bert_config, tf.cast(model.get_sequence_output(), tf.float32),
       model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
       masked_lm_weights, FLAGS.num_partitions)

  (
      next_sentence_loss,
      _,  #  next_sentence_example_loss,
      _  #  next_sentence_log_probs
  ) = get_next_sentence_output(bert_config,
                               tf.cast(model.get_pooled_output(), tf.float32),
                               next_sentence_labels)

  total_loss = masked_lm_loss + next_sentence_loss

  if not is_training:
    # Computes the loss and accuracy of the model.
    masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                     [-1, masked_lm_log_probs.shape[-1]])
    masked_lm_predictions = tf.argmax(
        masked_lm_log_probs, axis=-1, output_type=tf.int32)
    masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
    masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
    masked_lm_weights = tf.reshape(masked_lm_weights, [-1])

    masked_lm_weighted_correct = tf.multiply(
        tf.cast(tf.equal(masked_lm_ids, masked_lm_predictions), tf.float32),
        masked_lm_weights)
    masked_lm_weighted_correct = tf.reduce_sum(masked_lm_weighted_correct)
    masked_lm_weighted_count = tf.reduce_sum(masked_lm_weights)

    return None, {
        "masked_lm_weighted_correct":
            tf.reshape(masked_lm_weighted_correct, [-1]),
        "masked_lm_weighted_count":
            tf.reshape(masked_lm_weighted_count, [-1])}

  train_op = optimization.create_optimizer(total_loss, learning_rate,
                                           num_train_steps, num_warmup_steps,
                                           use_tpu, optimizer, poly_power,
                                           start_warmup_step,
                                           lamb_weight_decay_rate,
                                           lamb_beta_1, lamb_beta_2,
                                           log_epsilon,
                                           FLAGS.use_bfloat16_all_reduce)

  return train_op, None
Пример #12
0
    def __init__(self,
                 config,
                 w2i_target,
                 useTeacherForcing=True,
                 useAttention=True,
                 useBeamSearch=1):

        self.build_inputs(config)

        with tf.variable_scope("encoder"):

            encoder_embedding = tf.Variable(tf.random_uniform(
                [config.source_vocab_size, config.embedding_dim]),
                                            dtype=tf.float32,
                                            name='encoder_embedding')
            encoder_inputs_embedded = tf.nn.embedding_lookup(
                encoder_embedding, self.seq_inputs)

            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_final_state,
              encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(
                  cell_fw=tf.nn.rnn_cell.GRUCell(config.hidden_dim),
                  cell_bw=tf.nn.rnn_cell.GRUCell(config.hidden_dim),
                  inputs=encoder_inputs_embedded,
                  sequence_length=self.seq_inputs_length,
                  dtype=tf.float32,
                  time_major=False)
            encoder_state = tf.add(encoder_fw_final_state,
                                   encoder_bw_final_state)
            encoder_outputs = tf.add(encoder_fw_outputs, encoder_bw_outputs)

        with tf.variable_scope("decoder"):

            decoder_embedding = tf.Variable(tf.random_uniform(
                [config.target_vocab_size, config.embedding_dim]),
                                            dtype=tf.float32,
                                            name='decoder_embedding')

            tokens_go = tf.ones([config.batch_size],
                                dtype=tf.int32,
                                name='tokens_GO') * w2i_target["_GO"]

            if useTeacherForcing:
                decoder_inputs = tf.concat(
                    [tf.reshape(tokens_go, [-1, 1]), self.seq_targets[:, :-1]],
                    1)
                helper = tf.contrib.seq2seq.TrainingHelper(
                    tf.nn.embedding_lookup(decoder_embedding, decoder_inputs),
                    self.seq_targets_length)
            else:
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    decoder_embedding, tokens_go, w2i_target["_EOS"])

            with tf.variable_scope("gru_cell"):
                decoder_cell = tf.nn.rnn_cell.GRUCell(config.hidden_dim)

                if useAttention:
                    if useBeamSearch > 1:
                        tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
                            encoder_outputs, multiplier=useBeamSearch)
                        tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
                            self.seq_inputs_length, multiplier=useBeamSearch)
                        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                            num_units=config.hidden_dim,
                            memory=tiled_encoder_outputs,
                            memory_sequence_length=tiled_sequence_length)
                        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                            decoder_cell, attention_mechanism)
                        tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                            encoder_state, multiplier=useBeamSearch)
                        tiled_decoder_initial_state = decoder_cell.zero_state(
                            batch_size=config.batch_size * useBeamSearch,
                            dtype=tf.float32)
                        tiled_decoder_initial_state = tiled_decoder_initial_state.clone(
                            cell_state=tiled_encoder_final_state)
                        decoder_initial_state = tiled_decoder_initial_state
                    else:
                        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                            num_units=config.hidden_dim,
                            memory=encoder_outputs,
                            memory_sequence_length=self.seq_inputs_length)
                        # attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=config.hidden_dim, memory=encoder_outputs, memory_sequence_length=self.seq_inputs_length)
                        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                            decoder_cell, attention_mechanism)
                        decoder_initial_state = decoder_cell.zero_state(
                            batch_size=config.batch_size, dtype=tf.float32)
                        decoder_initial_state = decoder_initial_state.clone(
                            cell_state=encoder_state)
                else:
                    if useBeamSearch > 1:
                        decoder_initial_state = tf.contrib.seq2seq.tile_batch(
                            encoder_state, multiplier=useBeamSearch)
                    else:
                        decoder_initial_state = encoder_state

            if useBeamSearch > 1:
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    decoder_cell,
                    decoder_embedding,
                    tokens_go,
                    w2i_target["_EOS"],
                    decoder_initial_state,
                    beam_width=useBeamSearch,
                    output_layer=tf.layers.Dense(config.target_vocab_size))
            else:
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell,
                    helper,
                    decoder_initial_state,
                    output_layer=tf.layers.Dense(config.target_vocab_size))

            decoder_outputs, decoder_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                decoder,
                maximum_iterations=tf.reduce_max(self.seq_targets_length))

        if useBeamSearch > 1:
            self.out = decoder_outputs.predicted_ids[:, :, 0]
        else:
            decoder_logits = decoder_outputs.rnn_output
            self.out = tf.argmax(decoder_logits, 2)

            sequence_mask = tf.sequence_mask(self.seq_targets_length,
                                             dtype=tf.float32)
            self.loss = tf.contrib.seq2seq.sequence_loss(
                logits=decoder_logits,
                targets=self.seq_targets,
                weights=sequence_mask)

            self.train_op = tf.train.AdamOptimizer(
                learning_rate=config.learning_rate).minimize(self.loss)
Пример #13
0
with tf.name_scope('Loss'):
    cross_entropy = tf.reduce_mean(
        -tf.reduce_sum(ys * tf.log(tf.clip_by_value(prediction, 1e-15, 1.0)),
                       reduction_indices=[1]))
    # tf.add_to_collection('losses', cross_entropy)  # 将交叉熵加入损失函数集合losses
    # loss = tf.add_n(tf.get_collection('losses'))    # 将losses全部结果相加
    tf.summary.scalar('loss', cross_entropy)

with tf.name_scope('Train'):
    # train = tf.train.AdamOptimizer(learnRate).minimize(cross_entropy)
    train = tf.train.MomentumOptimizer(learnRate,
                                       momentum=0.9).minimize(cross_entropy)

with tf.name_scope('Accuracy'):
    correct = tf.equal(tf.argmax(ys, 1), tf.argmax(prediction, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    tf.summary.scalar('accuracy', accuracy)

saver = tf.train.Saver()
init = tf.initialize_all_variables()
merge = tf.summary.merge_all()
trainWriter = tf.summary.FileWriter(trainLogPath, sess.graph)
testWriter = tf.summary.FileWriter(testLogPath)

if Path(saverDistPath).exists():
    saver.restore(sess, saverPath)
else:
    sess.run(init)

# sess.run(init)
Пример #14
0
 def _accuracy(self):
     self.accuracy = masked_accuracy(self.outputs,
                                     self.placeholders['labels'],
                                     self.placeholders['labels_mask'])
     self.pred = tf.argmax(self.outputs, 1)
     self.labels = tf.argmax(self.placeholders['labels'], 1)
Пример #15
0
    def _model_fn(features, labels, mode, params=None):
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        batch_size = features['point'].shape[0]
        n_sample_frames = features['point'].shape[1]
        accum_size = batch_size * n_sample_frames

        if params == 'gen_mesh':
            latent_output = tf.constant([0, 0, 0], dtype=tf.float32)
            latent_holder = tf.placeholder(tf.float32, latent_output.shape)

        # Decode the tranformed shapes and compute the losses
        with tf.variable_scope('shape/decode', reuse=tf.AUTO_REUSE):
            transform = tf.reshape(features['transform'],
                                   [accum_size, n_parts, transform_dims])
            joint = tf.reshape(features['joint'],
                               [accum_size, n_parts, n_dims])
            points = features['point']
            n_points = tf.shape(points)[2]
            points = tf.reshape(points, [accum_size, n_points, n_dims])

            if is_training:
                labels = tf.reshape(features['label'],
                                    [accum_size, n_points, 1])
                predictions, parts = model_utils.nasa_indicator(
                    points,
                    transform,
                    joint,
                    hparams,
                    need_transformation=True)
                indicator_loss = model_utils.compute_l2_indicator_loss(
                    labels, predictions)

                minimal_loss = tf.reduce_mean(
                    tf.square(parts[..., :sample_bbox, :]))

                part_points = tf.reshape(features['vert'],
                                         [accum_size, -1, n_dims])
                part_weight = tf.reshape(features['weight'],
                                         [accum_size, -1, n_parts])
                if sample_vert > 0:  # If 0, use all vertices.
                    n_vert = part_points.shape[1]
                    sample_indices = tf.random.uniform(
                        [accum_size, sample_vert],
                        minval=0,
                        maxval=n_vert,
                        dtype=tf.int32)
                    part_points = tf.gather(part_points,
                                            sample_indices,
                                            axis=1,
                                            batch_dims=1)
                    part_weight = tf.gather(part_weight,
                                            sample_indices,
                                            axis=1,
                                            batch_dims=1)
                unused_var, pred_parts = model_utils.nasa_indicator(
                    part_points,
                    transform,
                    joint,
                    hparams,
                    need_transformation=True)
                part_label = tf.argmax(part_weight, axis=-1)
                part_label = tf.one_hot(
                    part_label, depth=n_parts, axis=-1,
                    dtype=tf.float32) * level_set
                part_label = tf.expand_dims(tf.transpose(
                    part_label, [0, 2, 1]),
                                            axis=-1)
                label_loss = model_utils.compute_l2_indicator_loss(
                    part_label, pred_parts)
            else:
                n_points = tf.shape(features['point'])[2]
                points = tf.reshape(features['point'],
                                    [accum_size, n_points, n_dims])
                predictions, parts = model_utils.nasa_indicator(
                    points,
                    transform,
                    joint,
                    hparams,
                    need_transformation=True,
                    noise=labels)

        if params == 'gen_mesh':
            return latent_holder, latent_output, tf.concat(
                [parts, tf.expand_dims(predictions, axis=1)], axis=1)

        tf.summary.scalar('indicator', indicator_loss)
        loss = indicator_loss
        if label_w > 0:
            tf.summary.scalar('label', label_loss)
            indicator_loss += label_loss * label_w
        if minimal_w > 0:
            tf.summary.scalar('minimal', minimal_loss)
            indicator_loss += minimal_loss * minimal_w

        global_step = tf.train.get_or_create_global_step()
        optimizer = tf.train.AdamOptimizer(lr)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(indicator_loss,
                                          global_step=global_step,
                                          name='optimizer_shape')

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)
Пример #16
0
def resnet_model_fn(features, labels, mode, params):
  """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images. If transpose_input is enabled, it
        is transposed to device layout and reshaped to 1D tensor.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
  if isinstance(features, dict):
    features = features['feature']

  # In most cases, the default data format NCHW instead of NHWC should be
  # used for a significant performance boost on GPU/TPU. NHWC should be used
  # only if the network needs to be run on CPU since the pooling operations
  # are only supported on NHWC.
  if params['data_format'] == 'channels_first':
    assert not params['transpose_input']    # channels_first only for GPU
    features = tf.transpose(features, [0, 3, 1, 2])

  if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
    image_size = tf.sqrt(tf.shape(features)[0] / (3 * tf.shape(labels)[0]))
    features = tf.reshape(features, [image_size, image_size, 3, -1])
    features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

  # Normalize the image to zero mean and unit variance.
  features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
  features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

  # DropBlock keep_prob for the 4 block groups of ResNet architecture.
  # None means applying no DropBlock at the corresponding block group.
  dropblock_keep_probs = [None] * 4
  if params['dropblock_groups']:
    # Scheduled keep_prob for DropBlock.
    train_steps = tf.cast(params['train_steps'], tf.float32)
    current_step = tf.cast(tf.train.get_global_step(), tf.float32)
    current_ratio = current_step / train_steps
    dropblock_keep_prob = (1 - current_ratio * (
        1 - params['dropblock_keep_prob']))

    # Computes DropBlock keep_prob for different block groups of ResNet.
    dropblock_groups = [int(x) for x in params['dropblock_groups'].split(',')]
    for block_group in dropblock_groups:
      if block_group < 1 or block_group > 4:
        raise ValueError(
            'dropblock_groups should be a comma separated list of integers '
            'between 1 and 4 (dropblcok_groups: {}).'
            .format(params['dropblock_groups']))
      dropblock_keep_probs[block_group - 1] = 1 - (
          (1 - dropblock_keep_prob) / 4.0**(4 - block_group))

  # This nested function allows us to avoid duplicating the logic which
  # builds the network, for different values of --precision.
  def build_network():
    network = resnet_model.resnet(
        resnet_depth=params['resnet_depth'],
        num_classes=params['num_label_classes'],
        dropblock_size=params['dropblock_size'],
        dropblock_keep_probs=dropblock_keep_probs,
        pre_activation=params['pre_activation'],
        norm_act_layer=params['norm_act_layer'],
        data_format=params['data_format'])
    return network(
        inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))

  if params['precision'] == 'bfloat16':
    with tf.tpu.bfloat16_scope():
      logits = build_network()
    logits = tf.cast(logits, tf.float32)
  elif params['precision'] == 'float32':
    logits = build_network()

  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'classes': tf.argmax(logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        })

  # If necessary, in the model_fn, use params['batch_size'] instead the batch
  # size flags (--train_batch_size or --eval_batch_size).
  batch_size = params['batch_size']   # pylint: disable=unused-variable

  # Calculate loss, which includes softmax cross entropy and L2 regularization.
  one_hot_labels = tf.one_hot(labels, params['num_label_classes'])
  cross_entropy = tf.losses.softmax_cross_entropy(
      logits=logits,
      onehot_labels=one_hot_labels,
      label_smoothing=params['label_smoothing'])

  # Add weight decay to the loss for non-batch-normalization variables.
  if params['enable_lars']:
    loss = cross_entropy
  else:
    loss = cross_entropy + params['weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v)
        for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name and 'evonorm' not in v.name
    ])

  host_call = None
  if mode == tf.estimator.ModeKeys.TRAIN:
    # Compute the current epoch and associated learning rate from global_step.
    global_step = tf.train.get_global_step()
    steps_per_epoch = params['num_train_images'] / params['train_batch_size']
    current_epoch = (tf.cast(global_step, tf.float32) /
                     steps_per_epoch)
    # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K
    # and larger batch sizes.
    if params['enable_lars']:
      learning_rate = 0.0
      optimizer = lars_util.init_lars_optimizer(current_epoch, params)
    else:
      learning_rate = learning_rate_schedule(params, current_epoch)
      optimizer = tf.train.MomentumOptimizer(
          learning_rate=learning_rate,
          momentum=params['momentum'],
          use_nesterov=True)
    if params['use_tpu']:
      # When using TPU, wrap the optimizer with CrossShardOptimizer which
      # handles synchronization details between different TPU cores. To the
      # user, this should look like regular synchronous training.
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch normalization requires UPDATE_OPS to be added as a dependency to
    # the train operation.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step)

    if not params['skip_host_call']:
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        # Host call fns are executed params['iterations_per_loop'] times after
        # one TPU loop is finished, setting max_queue value to the same as
        # number of iterations will make the summary writer only flush the data
        # to storage once per loop.
        with tf2.summary.create_file_writer(
            FLAGS.model_dir,
            max_queue=params['iterations_per_loop']).as_default():
          with tf2.summary.record_if(True):
            tf2.summary.scalar('loss', loss[0], step=gs)
            tf2.summary.scalar('learning_rate', lr[0], step=gs)
            tf2.summary.scalar('current_epoch', ce[0], step=gs)

          return tf.summary.all_v2_summary_ops()

      # To log the loss, current learning rate, and epoch for Tensorboard, the
      # summary op needs to be run on the host CPU via host_call. host_call
      # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
      # dimension. These Tensors are implicitly concatenated to
      # [params['batch_size']].
      gs_t = tf.reshape(global_step, [1])
      loss_t = tf.reshape(loss, [1])
      lr_t = tf.reshape(learning_rate, [1])
      ce_t = tf.reshape(current_epoch, [1])

      host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:
    def metric_fn(labels, logits):
      """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
      predictions = tf.argmax(logits, axis=1)
      top_1_accuracy = tf.metrics.accuracy(labels, predictions)
      in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
      top_5_accuracy = tf.metrics.mean(in_top_5)

      return {
          'top_1_accuracy': top_1_accuracy,
          'top_5_accuracy': top_5_accuracy,
      }

    eval_metrics = (metric_fn, [labels, logits])

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      host_call=host_call,
      eval_metrics=eval_metrics)
Пример #17
0
    def infer(self,
              features=None,
              decode_length=50,
              beam_size=1,
              top_beams=1,
              alpha=0.0,
              use_tpu=False):
        """Produce predictions from the model."""
        if not self._hparams.do_mask:
            infer_out = super(TransformerAE, self).infer(features,
                                                         decode_length,
                                                         beam_size,
                                                         top_beams,
                                                         alpha,
                                                         use_tpu=use_tpu)
            return infer_out["outputs"]
        if not features:
            features = {}
        inputs_old = None
        if "inputs" in features and len(features["inputs"].shape) < 4:
            inputs_old = features["inputs"]
            features["inputs"] = tf.expand_dims(features["inputs"], 2)

        # Create an initial targets tensor.
        if "partial_targets" in features:
            initial_output = tf.convert_to_tensor(features["partial_targets"])
        else:
            # inputs might not be present in features (e.g.: language modeling),
            # in which case we fallback to 'infer_targets' for calculating initial
            # input shape, type, etc.
            inputs_or_targets = features.get("inputs",
                                             features.get("infer_targets"))
            batch_size = common_layers.shape_list(inputs_or_targets)[0]
            length = common_layers.shape_list(inputs_or_targets)[1]
            hidden_dim = common_layers.shape_list(inputs_or_targets)[-1]
            target_length = tf.to_int32(2.0 * tf.to_float(length))
            initial_output = tf.zeros(
                (batch_size, target_length, 1, hidden_dim),
                dtype=inputs_or_targets.dtype)

        features["targets"] = initial_output
        logits, _ = self(features)  # pylint: disable=not-callable
        # this should only happen if we're doing target_modality not real
        if inputs_or_targets.dtype == tf.float32:
            samples = logits
        else:
            samples = tf.argmax(logits, axis=-1)

        # More steps.
        self.predict_mask = 0.0  # Use the provided targets this time.
        how_many_more_steps = 0  # Set to 1 or more for Gibbs-like sampling.
        for _ in range(how_many_more_steps):
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                features["targets"] = samples
                logits, _ = self(features)  # pylint: disable=not-callable
                if inputs_or_targets.dtype == tf.float32:
                    # When target_modality is real, the last axis does not represent
                    # classes, so it should not be argmax'ed
                    samples = logits
                else:
                    samples = tf.argmax(logits, axis=-1)

        self.predict_mask = 1.0
        if inputs_old is not None:  # Restore to not confuse Estimator.
            features["inputs"] = inputs_old
        return samples
Пример #18
0
    print ('fc1',fc1.get_shape())
    #fc1 = tf.nn.relu(fc1)
    #fc1 = tf.nn.dropout(fc1, dropout)
    
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out


pred = conv_lstm_net1(x, pixel_coordinate, weights, biases, keep_prob)
pred1 = tf.nn.softmax(conv_lstm_net1(x, pixel_coordinate, weights, biases, keep_prob))

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))#reduce是求均值

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))#equal返回值为true/false
#argmax求最大元素索引值

accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))#cast将true和false转化为1和0

init = tf.global_variables_initializer()


for i in range (test_num):
    with tf.Session() as sess:
        sess.run(init)
        out_num = int(training_iters / display_step) - 1
        Testing_Accuarcy = np.zeros([1,2])
        step = 0
        accuracy_test = 0
        rec_loss = np.zeros([out_num,1])
Пример #19
0
    def __init__(self, config: configure_pretraining.PretrainingConfig,
                 features, is_training):
        # Set up model config
        self._config = config
        self._bert_config = training_utils.get_bert_config(config)
        if config.debug:
            self._bert_config.num_hidden_layers = 3
            self._bert_config.hidden_size = 144
            self._bert_config.intermediate_size = 144 * 4
            self._bert_config.num_attention_heads = 4

        # Mask the input
        unmasked_inputs = pretrain_data.features_to_inputs(features)
        masked_inputs = pretrain_helpers.mask(config, unmasked_inputs,
                                              config.mask_prob)

        # Generator
        embedding_size = (self._bert_config.hidden_size
                          if config.embedding_size is None else
                          config.embedding_size)
        cloze_output = None
        if config.uniform_generator:
            # simple generator sampling fakes uniformly at random
            mlm_output = self._get_masked_lm_output(masked_inputs, None)
        elif ((config.electra_objective or config.electric_objective)
              and config.untied_generator):
            generator_config = get_generator_config(config, self._bert_config)
            if config.two_tower_generator:
                # two-tower cloze model generator used for electric
                generator = TwoTowerClozeTransformer(config, generator_config,
                                                     unmasked_inputs,
                                                     is_training,
                                                     embedding_size)
                cloze_output = self._get_cloze_outputs(unmasked_inputs,
                                                       generator)
                mlm_output = get_softmax_output(
                    pretrain_helpers.gather_positions(
                        cloze_output.logits,
                        masked_inputs.masked_lm_positions),
                    masked_inputs.masked_lm_ids,
                    masked_inputs.masked_lm_weights,
                    self._bert_config.vocab_size)
            else:
                # small masked language model generator
                generator = build_transformer(
                    config,
                    masked_inputs,
                    is_training,
                    generator_config,
                    embedding_size=(None if config.untied_generator_embeddings
                                    else embedding_size),
                    untied_embeddings=config.untied_generator_embeddings,
                    scope="generator")
                mlm_output = self._get_masked_lm_output(
                    masked_inputs, generator)
        else:
            # full-sized masked language model generator if using BERT objective or if
            # the generator and discriminator have tied weights
            generator = build_transformer(config,
                                          masked_inputs,
                                          is_training,
                                          self._bert_config,
                                          embedding_size=embedding_size)
            mlm_output = self._get_masked_lm_output(masked_inputs, generator)
        fake_data = self._get_fake_data(masked_inputs, mlm_output.logits)
        self.mlm_output = mlm_output
        self.total_loss = config.gen_weight * (cloze_output.loss
                                               if config.two_tower_generator
                                               else mlm_output.loss)

        # Discriminator
        disc_output = None
        if config.electra_objective or config.electric_objective:
            discriminator = build_transformer(
                config,
                fake_data.inputs,
                is_training,
                self._bert_config,
                reuse=not config.untied_generator,
                embedding_size=embedding_size)
            disc_output = self._get_discriminator_output(
                fake_data.inputs, discriminator, fake_data.is_fake_tokens,
                cloze_output)
            self.total_loss += config.disc_weight * disc_output.loss

        # Evaluation
        eval_fn_inputs = {
            "input_ids": masked_inputs.input_ids,
            "masked_lm_preds": mlm_output.preds,
            "mlm_loss": mlm_output.per_example_loss,
            "masked_lm_ids": masked_inputs.masked_lm_ids,
            "masked_lm_weights": masked_inputs.masked_lm_weights,
            "input_mask": masked_inputs.input_mask
        }
        if config.electra_objective or config.electric_objective:
            eval_fn_inputs.update({
                "disc_loss":
                disc_output.per_example_loss,
                "disc_labels":
                disc_output.labels,
                "disc_probs":
                disc_output.probs,
                "disc_preds":
                disc_output.preds,
                "sampled_tokids":
                tf.argmax(fake_data.sampled_tokens, -1, output_type=tf.int32)
            })
        eval_fn_keys = eval_fn_inputs.keys()
        eval_fn_values = [eval_fn_inputs[k] for k in eval_fn_keys]

        def metric_fn(*args):
            """Computes the loss and accuracy of the model."""
            d = {k: arg for k, arg in zip(eval_fn_keys, args)}
            metrics = dict()
            metrics["masked_lm_accuracy"] = tf.metrics.accuracy(
                labels=tf.reshape(d["masked_lm_ids"], [-1]),
                predictions=tf.reshape(d["masked_lm_preds"], [-1]),
                weights=tf.reshape(d["masked_lm_weights"], [-1]))
            metrics["masked_lm_loss"] = tf.metrics.mean(
                values=tf.reshape(d["mlm_loss"], [-1]),
                weights=tf.reshape(d["masked_lm_weights"], [-1]))
            if config.electra_objective or config.electric_objective:
                metrics["sampled_masked_lm_accuracy"] = tf.metrics.accuracy(
                    labels=tf.reshape(d["masked_lm_ids"], [-1]),
                    predictions=tf.reshape(d["sampled_tokids"], [-1]),
                    weights=tf.reshape(d["masked_lm_weights"], [-1]))
                if config.disc_weight > 0:
                    metrics["disc_loss"] = tf.metrics.mean(d["disc_loss"])
                    metrics["disc_auc"] = tf.metrics.auc(
                        d["disc_labels"] * d["input_mask"],
                        d["disc_probs"] * tf.cast(d["input_mask"], tf.float32))
                    metrics["disc_accuracy"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["input_mask"])
                    metrics["disc_precision"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["disc_preds"] * d["input_mask"])
                    metrics["disc_recall"] = tf.metrics.accuracy(
                        labels=d["disc_labels"],
                        predictions=d["disc_preds"],
                        weights=d["disc_labels"] * d["input_mask"])
            return metrics

        self.eval_metrics = (metric_fn, eval_fn_values)
Пример #20
0
	Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
	Z3 = tf.nn.relu(tf.matmul(Z2r, w3) + b3)
	Yish = tf.matmul(Z3, W4) + b4

	
    cost = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=T
        )
    )

	train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99, momentum=0.9).minimize(cost)

	# we'll use this to calculate the error rate
    predict_op = tf.argmax(logits, 1)

    t0 = datetime.now()
    LL = []
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in xrange(max_iter):
            for j in xrange(n_batches):
                Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
                Ybatch = Ytrain[j*batch_sz:(j*batch_sz + batch_sz),]

				if len(Xbatch) == batch_sz:
	                session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
					if j % print_period == 0:
Пример #21
0
    def train(self, data_dir, save_model_path):
        print('ready load train dataset')
        X, y = self.init_data(data_dir)
        print('success load' + str(len(y)) + 'datas')
        train_x, test_x, train_y, test_y = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=0)

        out_put = self.cnn_construct()
        predicts = tf.nn.softmax(out_put)
        predicts = tf.argmax(predicts, axis=1)
        actual_y = tf.argmax(self.y_place, axis=1)
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(predicts, actual_y), dtype=tf.float32))
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=out_put,
                                                    labels=self.y_place))
        opt = tf.train.AdamOptimizer(learning_rate=0.001)
        train_step = opt.minimize(cost)

        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            step = 0
            saver = tf.train.Saver()
            while True:
                train_index = np.random.choice(len(train_x),
                                               self.batch_size,
                                               replace=False)
                train_randx = train_x[train_index]
                train_randy = train_y[train_index]
                _, loss = sess.run(
                    [train_step, cost],
                    feed_dict={
                        self.x_place: train_randx,
                        self.y_place: train_randy,
                        self.keep_place: 0.75
                    })
                step += 1

                if step % 10 == 0:
                    test_index = np.random.choice(len(test_x),
                                                  self.batch_size,
                                                  replace=False)
                    test_randx = test_x[test_index]
                    test_randy = test_y[test_index]
                    acc = sess.run(accuracy,
                                   feed_dict={
                                       self.x_place: test_randx,
                                       self.y_place: test_randy,
                                       self.keep_place: 1.0
                                   })
                    print(step, loss)
                    if step % 50 == 0:
                        print('accuracy:' + str(acc))
                    if step % 500 == 0:
                        saver.save(sess, save_model_path, global_step=step)
                    if acc > 0.99 and step > 500:
                        saver.save(sess, save_model_path, global_step=step)
                        break
Пример #22
0
def k_clusters(K, v):
    # Loading data
    data = np.load('data2D.npy')
    #data = np.load('data100D.npy')

    [num_pts, dim] = np.shape(data)

    #set is_valid to false
    is_valid = v
    # For Validation set
    if is_valid:
        valid_batch = int(num_pts / 3.0)
        np.random.seed(45689)
        rnd_idx = np.arange(num_pts)
        np.random.shuffle(rnd_idx)
        val_data = data[rnd_idx[:valid_batch]]
        data = data[rnd_idx[valid_batch:]]

    np.random.seed(420)
    iterx = 500
    loss_arr = []
    loss_arr_valid = []
    arr = []

    initpi = tf.Variable(tf.random_normal([K, 1], stddev=0.05))
    lpi = tf.squeeze(hlp.logsoftmax(initpi))

    X = tf.placeholder("float", [None, dim], "X")
    init_mean = tf.random_normal([K, dim], stddev=0.05)
    MU = tf.Variable(init_mean)
    init_sigma = tf.random_normal([K, 1], stddev=0.05)
    sigma = tf.exp(tf.Variable(init_sigma))
    pdf = log_GaussPDF(X, MU, sigma)

    red_min = hlp.reduce_logsumexp(pdf + lpi, 1, keep_dims=True)
    loss = -tf.reduce_sum(red_min)
    adam_opt = tf.train.AdamOptimizer(learning_rate=0.1,
                                      beta1=0.9,
                                      beta2=0.99,
                                      epsilon=1e-5).minimize(loss)

    lpost = log_posterior(pdf, lpi)
    smax = tf.nn.softmax(lpost)
    relu = tf.argmax(smax, 1)

    with tf.Session() as s:
        s.run(tf.global_variables_initializer())
        s.run(tf.local_variables_initializer())

        for steps in range(iterx):
            _, lTrain, _, arr = s.run([MU, loss, adam_opt, relu],
                                      feed_dict={X: data})
            loss_arr.append(lTrain)
            if is_valid:
                _, lVal, _, _ = s.run([MU, loss, adam_opt, relu],
                                      feed_dict={X: val_data})
                loss_arr_valid.append(lVal)

        #d_ = distanceFunc(X,MU)

        plot_sc(data, num_pts, arr, K)

        if is_valid:
            lval = np.format_float_positional(np.float32(lVal))

        fig = plt.figure(1)
        plt.title('K Means Clusters K = %i' % K)
        plt.legend(loc="best")
        plt.ylabel('Y')
        plt.xlabel('X')
        if is_valid:
            fig.text(.1, .0005, f'Final Validation Loss: {lval}', ha='left')
        plt.grid()
        plt.show()

    plt.figure(1)
    plt.plot(range(len(loss_arr)), loss_arr, c="g", label="training Loss")

    plt.legend(loc="best")
    plt.title('K Means')
    plt.ylabel('Loss')
    plt.xlabel('Iterations')
    plt.show()

    return loss_arr_valid
Пример #23
0
def train_crack_captcha_cnn():
    x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
    py_x = model(x, p_keep_conv, p_keep_hidden)

    with tf.name_scope('cost'):
        cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=py_x, labels=Y))  # 对于多标签、多分类问题,网络的最后一层应该使用sigmoid 激活
        train_op = tf.train.AdamOptimizer(0.001, 0.9).minimize(
            cost)  # 使用RMSProp优化器构建模型
        tf.summary.scalar('cost', cost)  # 记录标量数据

    with tf.name_scope('accuracy'):
        # correct_pred = tf.equal(tf.argmax(py_x,1),tf.argmax(Y,1))
        # accuracy = tf.reduce_mean(tf.cast(correct_pred, 'float'))
        # tf.summary.scalar('accuracy', accuracy)
        predict = tf.reshape(py_x, [-1, MAX_CAPTCHA, len(CHAR_SET)])
        max_idx_p = tf.argmax(predict, 2)
        max_idx_l = tf.argmax(tf.reshape(
            Y, [-1, MAX_CAPTCHA, len(CHAR_SET)]), 2)
        correct_pred = tf.equal(max_idx_p, max_idx_l)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('accuracy', accuracy)  # 记录标量数据

    saver = tf.train.Saver()
    with tf.Session() as sess:
        writer = tf.summary.FileWriter("./人工智能实验课/logs/captcha_logs",
                                       sess.graph)
        merged = tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())

        step = 0
        while True:
            batch_x, batch_y = get_next_batch(64)
            _, loss = sess.run([train_op, cost],
                               feed_dict={
                                   X: batch_x,
                                   Y: batch_y,
                                   p_keep_conv: 0.75,
                                   p_keep_hidden: 0.75
                               })
            print(step, loss)
            # 每10 step计算一次准确率
            if step % 10 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                summary, acc = sess.run(
                    [merged, accuracy],
                    feed_dict={
                        X: batch_x_test,
                        Y: batch_y_test,
                        p_keep_conv: 1.0,
                        p_keep_hidden: 1.0
                    })
                writer.add_summary(summary, step)
                print('准确率:', step, acc)
                # 如果准确率大于50%,保存模型,完成训练
                if acc > 0.9:
                    saver.save(sess, "crack_capcha.model", global_step=step)
                    break

            step += 1
Пример #24
0
def argmax(x, axis=None):
    return tf.argmax(x, dimension=axis)
Пример #25
0
def _single_column_cell_selection_loss(token_logits, column_logits, label_ids,
                                       cell_index, col_index, cell_mask):
    """Computes the loss for cell selection constrained to a single column.

  The loss is a hierarchical log-likelihood. The model first predicts a column
  and then selects cells within that column (conditioned on the column). Cells
  outside the selected column are never selected.

  Args:
    token_logits: <float>[batch_size, seq_length] Logits per token.
    column_logits: <float>[batch_size, max_num_cols] Logits per column.
    label_ids: <int32>[batch_size, seq_length] Labels per token.
    cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into cells.
    col_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into columns.
    cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
      cell, 1 for cells that exists in the example and 0 for padding.

  Returns:
    selection_loss_per_example: <float>[batch_size] Loss for each example.
    logits: <float>[batch_size, seq_length] New logits which are only allowed
      to select cells in a single column. Logits outside of the most likely
      column according to `column_logits` will be set to a very low value
      (such that the probabilities are 0).
  """
    # First find the column we should select. We use the column with maximum
    # number of selected cells.
    labels_per_column, _ = segmented_tensor.reduce_sum(
        tf.cast(label_ids, tf.float32), col_index)
    column_label = tf.argmax(labels_per_column, axis=-1, output_type=tf.int32)
    # Check if there are no selected cells in the column. In that case the model
    # should predict the special column id 0, which means "select nothing".
    no_cell_selected = tf.equal(tf.reduce_max(labels_per_column, axis=-1), 0)
    column_label = tf.where(no_cell_selected, tf.zeros_like(column_label),
                            column_label)

    column_dist = tfp.distributions.Categorical(logits=column_logits)
    column_loss_per_example = -column_dist.log_prob(column_label)

    # Reduce the labels and logits to per-cell from per-token.
    logits_per_cell, _ = segmented_tensor.reduce_mean(token_logits, cell_index)
    labels_per_cell, labels_index = segmented_tensor.reduce_max(
        tf.cast(label_ids, tf.int32), cell_index)

    # Mask for the selected column.
    column_id_for_cells = cell_index.project_inner(labels_index).indices
    column_mask = tf.cast(
        tf.equal(column_id_for_cells, tf.expand_dims(column_label, axis=1)),
        tf.float32)

    # Compute the log-likelihood for cells, but only for the selected column.
    cell_dist = tfp.distributions.Bernoulli(logits=logits_per_cell)
    cell_log_prob = cell_dist.log_prob(labels_per_cell)
    cell_loss = -tf.reduce_sum(cell_log_prob * column_mask * cell_mask, axis=1)
    # We need to normalize the loss by the number of cells in the column.
    cell_loss /= tf.reduce_sum(column_mask * cell_mask,
                               axis=1) + _EPSILON_ZERO_DIVISION

    selection_loss_per_example = column_loss_per_example
    selection_loss_per_example += tf.where(
        no_cell_selected, tf.zeros_like(selection_loss_per_example), cell_loss)

    # Set the probs outside the selected column (selected by the *model*)
    # to 0. This ensures backwards compatibility with models that select
    # cells from multiple columns.
    selected_column_id = tf.argmax(column_logits,
                                   axis=-1,
                                   output_type=tf.int32)
    selected_column_mask = tf.cast(
        tf.equal(column_id_for_cells,
                 tf.expand_dims(selected_column_id, axis=-1)), tf.float32)
    # Never select cells with the special column id 0.
    selected_column_mask = tf.where(tf.equal(column_id_for_cells, 0),
                                    tf.zeros_like(selected_column_mask),
                                    selected_column_mask)
    logits_per_cell += _CLOSE_ENOUGH_TO_LOG_ZERO * (
        1.0 - cell_mask * selected_column_mask)
    logits = segmented_tensor.gather(logits_per_cell, cell_index)

    return selection_loss_per_example, logits
Пример #26
0
 def map_fn(inputs):
     flattened = tf.reshape(inputs, [-1])
     argmax = tf.argmax(flattened, output_type=tf.int32)
     indices = tensor_utils.unravel_index_2d(argmax, inputs.shape)
     score = flattened[argmax]
     return indices, score
Пример #27
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        del labels  # Unused.

        tf.logging.info("*** Features ***")
        for name in sorted(features):
            tf.logging.info("  name = %s, shape = %s", name,
                            features[name].shape)

        label_ids = features["label_ids"]
        input_mask = features["input_mask"]
        row_ids = features["row_ids"]
        column_ids = features["column_ids"]
        # Table cells only, without question tokens and table headers.
        table_mask = tf.where(row_ids > 0, tf.ones_like(row_ids),
                              tf.zeros_like(row_ids))
        do_model_aggregation = config.num_aggregation_labels > 0
        aggregation_function_id = (tf.squeeze(
            features["aggregation_function_id"], axis=[1])
                                   if do_model_aggregation else None)

        do_model_classification = config.num_classification_labels > 0
        classification_class_index = (tf.squeeze(
            features["classification_class_index"], axis=[1])
                                      if do_model_classification else None)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = table_bert.create_model(
            features=features,
            mode=mode,
            bert_config=config.bert_config,
            disabled_features=config.disabled_features,
            disable_position_embeddings=config.disable_position_embeddings)

        if config.use_answer_as_supervision:
            answer = tf.squeeze(features["answer"], axis=[1])
            numeric_values = features["numeric_values"]
            numeric_values_scale = features["numeric_values_scale"]
        else:
            answer = None
            numeric_values = None
            numeric_values_scale = None

        (total_loss, logits, logits_aggregation, probabilities,
         logits_cls) = _get_classification_outputs(
             config=config,
             output_layer=model.get_sequence_output(),
             output_layer_aggregation=model.get_pooled_output(),
             label_ids=label_ids,
             input_mask=input_mask,
             table_mask=table_mask,
             aggregation_function_id=aggregation_function_id,
             answer=answer,
             numeric_values=numeric_values,
             numeric_values_scale=numeric_values_scale,
             is_training=is_training,
             row_ids=row_ids,
             column_ids=column_ids,
             classification_class_index=classification_class_index)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        init_checkpoint = config.init_checkpoint
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if config.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss,
                config.learning_rate,
                config.num_train_steps,
                config.num_warmup_steps,
                config.use_tpu,
                gradient_accumulation_steps=params.get(
                    "gradient_accumulation_steps", 1),
                grad_clipping=config.grad_clipping)

            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (_calculate_eval_metrics_fn, [
                total_loss, label_ids, logits, input_mask,
                aggregation_function_id, logits_aggregation,
                classification_class_index, logits_cls
            ])
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "probabilities": probabilities,
                "column_ids": features["column_ids"],
                "row_ids": features["row_ids"],
                "segment_ids": features["segment_ids"],
                "question_id_ints": features["question_id_ints"],
            }
            # TODO Remove once the data has been updated.
            if "question_id" in features:
                # Only available when predicting on GPU.
                predictions["question_id"] = features["question_id"]
            if do_model_aggregation:
                predictions.update({
                    "gold_aggr":
                    features["aggregation_function_id"],
                    "pred_aggr":
                    tf.argmax(logits_aggregation,
                              axis=-1,
                              output_type=tf.int32)
                })
            if do_model_classification:
                predictions.update({
                    "gold_cls":
                    features["classification_class_index"],
                    "pred_cls":
                    tf.argmax(logits_cls, axis=-1, output_type=tf.int32)
                })
                if config.num_classification_labels == 2:
                    predictions.update(
                        {"logits_cls": logits_cls[:, 1] - logits_cls[:, 0]})
                else:
                    predictions.update({"logits_cls": logits_cls})
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Пример #28
0
def model_definition(vector_dimension,
                     label_count,
                     slot_vectors,
                     value_vectors,
                     use_delex_features=False,
                     use_softmax=True,
                     value_specific_decoder=False,
                     learn_belief_state_update=True):
    """
    This method defines the model and returns the required TensorFlow operations.

    slot_vectors, value_vectors should be of size [label_count + 2, 300].
    For None, we should just pass zero vectors for both. 

    Then, replicate using these vectors the old NBT and then combine each value's (including NONE) into softmax. 


    List of values learned by this model: 

    1) h_utterance_representation, which uses a CNN to learn a representation of the utterance r.  
    2) candidates_transform, which includes w_candidates and b_candidates, which transforms candidate values to vector c.
    3) w_joint_hidden_layer and b_joint_hidden_layer, which collapses the interaction of r and c to an intermediate vector. 
    4) w_joint_presoftmax and b_joint_presoftmax, which collapse the intermediate layer to a single feature. 
    5) sysreq_w_hidden_layer and sysreq_b_hidden_layer, which compute intermediate sysreq representation.
    6) TODO: sysreq_w_softmax and sysreq_b_softmax, which map this to final decision. -- currently not size independent. 
    7) TODO: confirm_w1_hidden_layer, confirm_b1_hidden_layer, confirm_w1_softmax, confirm_b1_softmax: for confirmations. -- currently does not work. 
    8) a_memory, b_memory, a_current, b_current: for the belief state updates, composed into matrix.   

    If all of these are initialised and then supplied to each of the models, we could train them together (batch of each slot), and just save
    these variables, then at test time, just load them (as session even), and then initialise all of the models with them. 

    """

    print "=========================== Model declaration ==========================="
    if use_softmax:
        label_size = label_count + 1  # 1 is for NONE, dontcare is added to the ontology.
    else:
        label_size = label_count

    # these are actual NN hyperparameters that we might want to tune at some point:
    hidden_units_1 = 100
    longest_utterance_length = 40

    summary_feature_count = 10

    print "Hidden layer size:", hidden_units_1, "Label Size:", label_size, "Use Softmax:", use_softmax, "Use Delex Features:", use_delex_features

    utterance_representations_full = tf.placeholder(
        tf.float32, [None, 40, vector_dimension
                     ])  # full feature vector, which we want to convolve over.
    utterance_representations_delex = tf.placeholder(tf.float32,
                                                     [None, label_size])
    #    utterance_representations_delex = tf.placeholder(tf.float32, [None, label_size, 40, vector_dimension])

    system_act_slots = tf.placeholder(
        tf.float32,
        shape=(None, vector_dimension))  # just slots, for requestables.

    system_act_confirm_slots = tf.placeholder(tf.float32,
                                              shape=(None, vector_dimension))
    system_act_confirm_values = tf.placeholder(tf.float32,
                                               shape=(None, vector_dimension))

    #slot_values =  tf.placeholder(tf.float32, shape=(None, vector_dimension))
    #candidate_values = tf.placeholder(tf.float32, shape=(None, vector_dimension))

    # Initial (distributional) vectors. Needed for L2 regularisation.
    W_slots = tf.constant(slot_vectors, name="W_init")
    W_values = tf.constant(value_vectors, name="W_init")

    # output label, i.e. True / False, 1-hot encoded:
    y_ = tf.placeholder(tf.float32, [None, label_size])

    y_past_state = tf.placeholder(tf.float32, [None, label_size])

    # dropout placeholder, 0.5 for training, 1.0 for validation/testing:
    keep_prob = tf.placeholder("float")

    # constants useful for evaluation variables further below:
    ones = tf.constant(1.0, dtype="float")
    zeros = tf.constant(0.0, dtype="float")

    hidden_utterance_size = vector_dimension

    filter_sizes = [1, 2, 3]
    num_filters = 300
    hidden_utterance_size = num_filters  #* len(filter_sizes)

    #candidate_sum = candidate_values + slot_values # to avoid summing these two multiple times later.

    #w_candidates = tf.Variable(tf.random_normal([vector_dimension, vector_dimension]))
    #b_candidates = tf.Variable(tf.zeros([vector_dimension]))

    #candidates = tf.nn.sigmoid(tf.matmul(candidate_sum, w_candidates) + b_candidates)
    #candidates = tf.nn.sigmoid(tf.matmul(candidate_values, w_candidates) + b_candidates)

    # filter needs to be of shape: filter_height = 1,2,3, filter_width=300, in_channel=1, out_channel=num_filters
    # filter just dot products - in images these then overlap from different regions - we don't have that.
    h_utterance_representation = define_CNN_model(
        utterance_representations_full, num_filters, vector_dimension,
        longest_utterance_length)

    #candidate_sum = W_slots + W_values # size [label_size, vector_dimension]

    w_candidates = tf.Variable(
        tf.random_normal([vector_dimension, vector_dimension]))
    b_candidates = tf.Variable(tf.zeros([vector_dimension]))

    # multiply to get: [label_size, vector_dimension]
    candidates_transform = tf.nn.sigmoid(
        tf.matmul(W_values, w_candidates) + b_candidates)

    # Next, multiply candidates [label_size, vector_dimension] each with the uttereance representations [None, vector_dimension], to get [None, label_size, vector_dimension]
    # or utterance [None, vector_dimension] X [vector_dimension, label_size] to get [None, label_size]
    #h_utterance_representation_candidate_interaction = tf.Variable(tf.zeros([None, label_size, vector_dimension]))

    list_of_value_contributions = []

    # get interaction of utterance with each value:
    for value_idx in range(0, label_count):
        list_of_value_contributions.append(
            tf.multiply(h_utterance_representation,
                        candidates_transform[value_idx, :]))

    h_utterance_representation_candidate_interaction = tf.reshape(
        tf.transpose(tf.stack(list_of_value_contributions), [1, 0, 2]),
        [-1, vector_dimension])
    # the same transform now runs across each value's vector, multiplying.
    w_joint_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    b_joint_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    # now multiply [None, label_size, vector_dimension] by [vector_dimension, hidden_units_1], to get [None, label_size, hidden_units_1]
    hidden_layer_joint = tf.nn.sigmoid(
        tf.reshape(
            tf.matmul(h_utterance_representation_candidate_interaction,
                      w_joint_hidden_layer) + b_joint_hidden_layer,
            [-1, label_count, hidden_units_1]))
    hidden_layer_joint_with_dropout = tf.nn.dropout(hidden_layer_joint,
                                                    keep_prob)

    # next initialise parameters that go into a softmax, i.e. mapping [None, label_size, hidden_units_1] -> [None, label_size]
    w_joint_presoftmax = tf.Variable(tf.random_normal([hidden_units_1,
                                                       1]))  # collapse to 1
    b_joint_presoftmax = tf.Variable(tf.zeros([1]))  # collapse to 1

    y_presoftmax = tf.reshape(
        tf.matmul(
            tf.reshape(hidden_layer_joint_with_dropout, [-1, hidden_units_1]),
            w_joint_presoftmax) + b_joint_presoftmax, [-1, label_count])

    # for now we do not implement this

    sysreq_contributions = []  # a list of contributions for each of the values
    confirm_contributions = [
    ]  # a list of contributions for each of the values

    # =================== NETWORK FOR SYSTEM REQUESTS ==========================

    # is the current slot offered
    system_act_candidate_interaction = tf.multiply(
        W_slots[0, :],
        system_act_slots)  # only multiply with slots for the requests.
    dot_product_sysreq = tf.reduce_mean(system_act_candidate_interaction, 1)

    #full_ones = tf.ones([tf.shape(dot_product_sysreq)[0], 1])
    #dot_product = tf.cast(tf.equal(dot_product_sysreq, full_ones), "float32")

    decision = tf.multiply(tf.expand_dims(dot_product_sysreq, 1),
                           h_utterance_representation)

    sysreq_w_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    sysreq_b_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    # allow each value to learn to map different utterances to yes. Mainly dontcare.
    for value_idx in range(0, label_count):

        sysreq_hidden_layer_1 = tf.nn.sigmoid(
            tf.matmul(decision, sysreq_w_hidden_layer) + sysreq_b_hidden_layer)
        sysreq_hidden_layer_1_with_dropout = tf.nn.dropout(
            sysreq_hidden_layer_1, keep_prob)

        sysreq_w_softmax = tf.Variable(tf.random_normal([hidden_units_1, 1]))
        sysreq_b_softmax = tf.Variable(tf.zeros([1]))

        sysreq_contribution = tf.matmul(sysreq_hidden_layer_1_with_dropout,
                                        sysreq_w_softmax) + sysreq_b_softmax

        sysreq_contributions.append(sysreq_contribution)

    sysreq = tf.concat(sysreq_contributions, 1)  #, [-1, label_size])

    # =================== NETWORK FOR CONFIRMATIONS ==========================

    # here, we do want to tie across all values, as it will get a different signal depending on whether both things match.
    confirm_w1_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    confirm_b1_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    confirm_w1_softmax = tf.Variable(tf.random_normal([hidden_units_1, 1]))
    confirm_b1_softmax = tf.Variable(tf.zeros([1]))

    for value_idx in range(0, label_count):

        dot_product = tf.multiply(
            tf.reduce_mean(
                tf.multiply(W_slots[0, :], system_act_confirm_slots), 1),
            tf.reduce_mean(
                tf.multiply(W_values[value_idx, :], system_act_confirm_values),
                1))  # dot product: slot equality and value equality

        full_ones = tf.ones(tf.shape(dot_product))
        dot_product = tf.cast(tf.equal(dot_product, full_ones), "float32")

        decision = tf.multiply(tf.expand_dims(dot_product, 1),
                               h_utterance_representation)

        confirm_hidden_layer_1 = tf.nn.sigmoid(
            tf.matmul(decision, confirm_w1_hidden_layer) +
            confirm_b1_hidden_layer)
        confirm_hidden_layer_1_with_dropout = tf.nn.dropout(
            confirm_hidden_layer_1, keep_prob)

        confirm_contribution = tf.matmul(
            confirm_hidden_layer_1_with_dropout,
            confirm_w1_softmax) + confirm_b1_softmax
        confirm_contributions.append(confirm_contribution)

    sysconf = tf.concat(confirm_contributions, 1)

    if use_softmax:

        append_zeros_none = tf.zeros([tf.shape(y_presoftmax)[0], 1])
        y_presoftmax = tf.concat([y_presoftmax, append_zeros_none], 1)

        append_zeros = tf.zeros([tf.shape(y_presoftmax)[0], 1])
        sysreq = tf.concat([sysreq, append_zeros], 1)
        sysconf = tf.concat([sysconf, append_zeros], 1)

        y_presoftmax = y_presoftmax + sysconf + sysreq

    if use_delex_features:
        y_presoftmax = y_presoftmax + utterance_representations_delex

    # value-specific decoder:
    if value_specific_decoder and False:

        h_utterance_representation_for_full_softmax = define_CNN_model(
            utterance_representations_full, num_filters, vector_dimension,
            longest_utterance_length)

        h_utterance_dropout = tf.nn.dropout(
            h_utterance_representation_for_full_softmax, keep_prob)

        ss_w_hidden_layer = tf.Variable(
            tf.random_normal([vector_dimension, hidden_units_1]))
        ss_b_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

        ss_hidden_layer_1 = tf.nn.relu(
            tf.matmul(h_utterance_dropout, ss_w_hidden_layer) +
            ss_b_hidden_layer)
        ss_hidden_layer_1_with_dropout = tf.nn.dropout(ss_hidden_layer_1,
                                                       keep_prob)

        ss_w_softmax = tf.Variable(
            tf.random_normal([hidden_units_1, label_size]))
        ss_b_softmax = tf.Variable(tf.zeros([label_size]))

        ss_contribution = tf.matmul(ss_hidden_layer_1_with_dropout,
                                    ss_w_softmax) + ss_b_softmax

        y_presoftmax += ss_contribution

    # as we are returning always, can't be null
    update_coefficient = tf.constant(0.49)

    if use_softmax:

        if learn_belief_state_update:

            if value_specific_decoder:  # value-specific update

                update_coefficient = tf.constant(0.8)

                ss_W_memory = tf.Variable(
                    tf.random_normal([label_size, label_size]))

                ss_W_current = tf.Variable(
                    tf.random_normal([label_size, label_size]))

                y_combine = tf.matmul(y_past_state, ss_W_memory) + tf.matmul(
                    y_presoftmax, ss_W_current)

            else:

                update_coefficient = tf.constant(0.7)

                a_memory = tf.Variable(tf.random_normal([1, 1]))
                diag_memory = a_memory * tf.diag(tf.ones(label_size))

                b_memory = tf.Variable(tf.random_normal([1, 1]))
                non_diag_memory = tf.matrix_set_diag(
                    b_memory * tf.ones([label_size, label_size]),
                    tf.zeros(label_size))

                W_memory = diag_memory + non_diag_memory

                a_current = tf.Variable(tf.random_normal([1, 1]))
                diag_current = a_current * tf.diag(tf.ones(label_size))

                b_current = tf.Variable(tf.random_normal([1, 1]))
                non_diag_current = tf.matrix_set_diag(
                    b_current * tf.ones([label_size, label_size]),
                    tf.zeros(label_size))

                W_current = diag_current + non_diag_current

                y_combine = tf.matmul(y_past_state, W_memory) + tf.matmul(
                    y_presoftmax, W_current
                )  #+ tf.matmul(sysreq, W_current_req) + tf.matmul(sysconf, W_current_conf)

            y = tf.nn.softmax(y_combine)  # + y_ss_update_contrib)

        else:
            # This code runs the baseline experiments reported in Footnote 2 in the paper.
            update_coefficient = tf.Variable(
                0.5)  #this scales the contribution of the current turn.
            y_combine = update_coefficient * y_presoftmax + (
                1 - update_coefficient) * y_past_state
            y = tf.nn.softmax(y_combine)

    else:

        y = tf.nn.sigmoid(
            y_presoftmax
        )  # for requestables, we just have turn-level binary decisions

    # ======================== LOSS IS JUST CROSS ENTROPY ==========================================

    if use_softmax:
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits=y_combine, labels=y_)
    else:
        cross_entropy = tf.reduce_sum(tf.square(y - y_))

    # ============================= EVALUATION =====================================================

    if use_softmax:
        predictions = tf.cast(tf.argmax(y, 1),
                              "float32")  # will have ones where positive
        true_predictions = tf.cast(tf.argmax(y_, 1), "float32")
        correct_prediction = tf.cast(tf.equal(predictions, true_predictions),
                                     "float")

        accuracy = tf.reduce_mean(correct_prediction)
        # this will count number of positives - they are marked with 1 in true_predictions
        num_positives = tf.reduce_sum(true_predictions)
        # positives are indicated with ones.
        classified_positives = tf.reduce_sum(predictions)
        # will have ones in all places where both are predicting positives
        true_positives = tf.multiply(predictions, true_predictions)
        # if indicators for positive of both are 1, then it is positive.
        num_true_positives = tf.reduce_sum(true_positives)

        recall = num_true_positives / num_positives
        precision = num_true_positives / classified_positives
        f_score = (2 * recall * precision) / (recall + precision)

    else:
        predictions = tf.cast(tf.round(y),
                              "float32")  # will have ones where positive
        true_predictions = tf.cast(tf.round(y_), "float32")
        correct_prediction = tf.cast(tf.equal(predictions, true_predictions),
                                     "float")

        num_positives = tf.reduce_sum(true_predictions)

        classified_positives = tf.reduce_sum(predictions)
        true_positives = tf.multiply(predictions, true_predictions)
        num_true_positives = tf.reduce_sum(true_positives)
        recall = num_true_positives / num_positives
        precision = num_true_positives / classified_positives
        f_score = (2 * recall * precision) / (recall + precision)

        accuracy = tf.reduce_mean(correct_prediction)

    optimizer = tf.train.AdamOptimizer(0.001)
    train_step = optimizer.minimize(cross_entropy)

    return keep_prob, utterance_representations_full, utterance_representations_delex, \
            system_act_slots, system_act_confirm_slots, system_act_confirm_values, \
            y_, y_past_state, accuracy, f_score, precision, \
           recall, num_true_positives, num_positives, classified_positives, y, \
           predictions, true_predictions, correct_prediction, true_positives, train_step, update_coefficient
Пример #29
0
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
    cost_function)
print(cost_function.shape)
sess = tf.Session()
sess.run(init)

# Calculate the cost and the accuracy for each epock
mse_history = []
accuracy_history = []
print("trainy: ", train_y.shape)
print("trainx: ", train_x.shape)
for epoch in range(training_epochs):
    sess.run(training_step, feed_dict={x: train_x, y_: train_y})
    cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y})
    cost_history = np.append(cost_history, cost)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    #print("Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
    pred_y = sess.run(y, feed_dict={x: test_x})
    mse = tf.reduce_mean(tf.square(pred_y - test_y))
    mse_ = sess.run(mse)
    mse_history.append(mse_)
    accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
    accuracy_history.append(accuracy)

    print('epoch: ', epoch, ' - ', 'cost: ', cost, " - MSE: ", mse_,
          " - Train Accuracy: ", accuracy)

save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
# print("Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
Пример #30
0
    def _get_masked_lm_output(self, inputs: pretrain_data.Inputs, model):
        """Masked language modeling softmax layer."""
        masked_lm_weights = inputs.masked_lm_weights
        with tf.variable_scope("generator_predictions"):
            if self._config.uniform_generator:
                logits = tf.zeros(self._bert_config.vocab_size)
                logits_tiled = tf.zeros(
                    modeling.get_shape_list(inputs.masked_lm_ids) +
                    [self._bert_config.vocab_size])
                logits_tiled += tf.reshape(
                    logits, [1, 1, self._bert_config.vocab_size])
                logits = logits_tiled
            else:
                relevant_hidden = pretrain_helpers.gather_positions(
                    model.get_sequence_output(), inputs.masked_lm_positions)
                hidden = tf.layers.dense(
                    relevant_hidden,
                    units=modeling.get_shape_list(
                        model.get_embedding_table())[-1],
                    activation=modeling.get_activation(
                        self._bert_config.hidden_act),
                    kernel_initializer=modeling.create_initializer(
                        self._bert_config.initializer_range),
                )
                hidden = modeling.layer_norm(hidden)
                output_bias = tf.get_variable(
                    "output_bias",
                    shape=[self._bert_config.vocab_size],
                    initializer=tf.zeros_initializer(),
                )
                logits = tf.matmul(hidden,
                                   model.get_embedding_table(),
                                   transpose_b=True)
                logits = tf.nn.bias_add(logits, output_bias)

            oh_labels = tf.one_hot(
                inputs.masked_lm_ids,
                depth=self._bert_config.vocab_size,
                dtype=tf.float32,
            )

            probs = tf.nn.softmax(logits)
            log_probs = tf.nn.log_softmax(logits)
            label_log_probs = -tf.reduce_sum(log_probs * oh_labels, axis=-1)

            numerator = tf.reduce_sum(inputs.masked_lm_weights *
                                      label_log_probs)
            denominator = tf.reduce_sum(masked_lm_weights) + 1e-6
            loss = numerator / denominator
            preds = tf.argmax(log_probs, axis=-1, output_type=tf.int32)

            MLMOutput = collections.namedtuple(
                "MLMOutput",
                ["logits", "probs", "loss", "per_example_loss", "preds"])
            return MLMOutput(
                logits=logits,
                probs=probs,
                per_example_loss=label_log_probs,
                loss=loss,
                preds=preds,
            )