Exemplo n.º 1
0
# bias
b = tf.Variable(tf.zeros([10]))
# test_data * W + b
y = tf.matmul(x, W) + b
sm = tf.nn.softmax(y, name="softmax")

# cross entropy (loss function)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y,
                                                              labels=y_),
                      name="loss")

# train step
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

# evaluating the model
correct_prediction = tf.equal(tf.argmax(sm, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                          name="accuracy")

HISTORY_LOG = []
saver = tf.train.Saver()
init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)

    # training
    for step in range(num_steps):
        batch_data, batch_labels = DATASET.next_batch(batch_size)
        feed_dict = {x: batch_data, y_: batch_labels}
Exemplo n.º 2
0
def bi_tempered_logistic_loss(activations,
                              labels,
                              t1,
                              t2,
                              label_smoothing=0.0,
                              num_iters=5):
    """Bi-Tempered Logistic Loss with custom gradient.

  Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    labels: A tensor with shape and dtype as activations.
    t1: Temperature 1 (< 1.0 for boundedness).
    t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
    label_smoothing: Label smoothing parameter between [0, 1).
    num_iters: Number of iterations to run the method.

  Returns:
    A loss tensor.
  """
    with tf.name_scope('bitempered_logistic'):
        t1 = tf.convert_to_tensor(t1)
        t2 = tf.convert_to_tensor(t2)
        if label_smoothing > 0.0:
            num_classes = tf.cast(tf.shape(labels)[-1], tf.float32)
            labels = (1 - num_classes / (num_classes - 1) * label_smoothing
                      ) * labels + label_smoothing / (num_classes - 1)

        @tf.custom_gradient
        def _custom_gradient_bi_tempered_logistic_loss(activations):
            """Bi-Tempered Logistic Loss with custom gradient.

      Args:
        activations: A multi-dimensional tensor with last dim `num_classes`.

      Returns:
        A loss tensor, grad.
      """
            with tf.name_scope('gradient_bitempered_logistic'):
                probabilities = tempered_softmax(activations, t2, num_iters)
                loss_values = tf.multiply(
                    labels,
                    log_t(labels + 1e-10, t1) -
                    log_t(probabilities, t1)) - 1.0 / (2.0 - t1) * (tf.pow(
                        labels, 2.0 - t1) - tf.pow(probabilities, 2.0 - t1))

                def grad(d_loss):
                    """Explicit gradient calculation.

          Args:
            d_loss: Infinitesimal change in the loss value.
          Returns:
            Loss gradient.
          """
                    delta_probs = probabilities - labels
                    forget_factor = tf.pow(probabilities, t2 - t1)
                    delta_probs_times_forget_factor = tf.multiply(
                        delta_probs, forget_factor)
                    delta_forget_sum = tf.reduce_sum(
                        delta_probs_times_forget_factor, -1, keep_dims=True)
                    escorts = tf.pow(probabilities, t2)
                    escorts = escorts / tf.reduce_sum(
                        escorts, -1, keep_dims=True)
                    derivative = delta_probs_times_forget_factor - tf.multiply(
                        escorts, delta_forget_sum)
                    return tf.multiply(d_loss, derivative)

                return loss_values, grad

        loss_values = tf.cond(
            tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)),
            functools.partial(tf.nn.softmax_cross_entropy_with_logits,
                              labels=labels,
                              logits=activations),
            functools.partial(_custom_gradient_bi_tempered_logistic_loss,
                              activations))
        reduce_sum_last = lambda x: tf.reduce_sum(x, -1)
        loss_values = tf.cond(
            tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)),
            functools.partial(tf.identity, loss_values),
            functools.partial(reduce_sum_last, loss_values))
        return loss_values
Exemplo n.º 3
0
def divide_safe(num, den, name=None):
  eps = 1e-8
  den += eps*tf.cast(tf.equal(den, 0), 'float32')
  return tf.divide(num, den, name=name)
Exemplo n.º 4
0
                    np.multiply(local_models[local_model_index][1],
                                agents_weights[local_model_index]), m_b)
                model = {'weights': m_w, 'bias': m_b}
            learning_rate = learning_rate * 0.9
            loss = federated_eval(model, federated_train_data)
            print('round {}, loss={}'.format(round_num, loss))
            print(time.time() - start_time)
            '''model = federated_train(model, learning_rate, federated_train_data)
            learning_rate = learning_rate * 0.9
            loss = federated_eval(model, federated_train_data)
            print('round {}, loss={}'.format(round_num, loss))'''

        m = np.dot(test_images, np.asarray(model['weights']))
        test_result = m + np.asarray(model['bias'])
        y = tf.nn.softmax(test_result)
        correct_prediction = tf.equal(tf.argmax(y, 1),
                                      tf.arg_max(test_labels_onehot, 1))
        #print(list(tf.argmax(y, 1).numpy()))
        #print(list(tf.arg_max(test_labels_onehot, 1).numpy()))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        group_shapley_value.append(accuracy.numpy())
        print("combination finished ", time.time() - start_time)
        print(
            str(ss) + "\t" +
            str(group_shapley_value[len(group_shapley_value) - 1]))

    agent_shapley = []
    for index in range(NUM_AGENT):
        shapley = 0.0
        for j in all_sets:
            if index in j:
                remove_list_index = remove_list_indexed(index, j, all_sets)
Exemplo n.º 5
0
def accuracy(label, logits):
    """Computes accuracy from given label and logits."""
    return tf.reduce_mean(
        tf.to_float(tf.equal(label, tf.argmax(logits, axis=1))))
Exemplo n.º 6
0
Arquivo: 1.py Projeto: Lee077/AI
h2 = tf.sigmoid(hc2)
h2 = tf.nn.dropout(h2, keep_prob=keep)

# layer3
var3 = tf.Variable(tf.truncated_normal([256, 2], stddev=0.1))
bias3 = tf.Variable(tf.zeros([2]))
hc3 = tf.add(tf.matmul(h2, var3), bias3)
h3 = tf.nn.softmax(hc3)

# 定义损失
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=h3, labels=y))
tf.summary.scalar('loss', loss)

# 定义正确率
ac = tf.cast(tf.equal(tf.argmax(h3, 1), tf.argmax(y, 1)), tf.float32)
acc = tf.reduce_mean(ac)
tf.summary.scalar('accuracy', acc)

# 定义优化器
optimzer = tf.train.AdamOptimizer(1e-3).minimize(loss)

merge_summary = tf.summary.merge_all()

isTrain = 1

# 定义训练
print("正在训练.....")
saver = tf.train.Saver(max_to_keep=1)

with tf.Session() as sess:
Exemplo n.º 7
0
def _get_scores(log_probs, sequence_lengths, length_penalty_weight,
                coverage_penalty_weight, finished,
                accumulated_attention_probs):
    """Calculates scores for beam search hypotheses.

  Args:
    log_probs: The log probabilities with shape
      `[batch_size, beam_width, vocab_size]`.
    sequence_lengths: The array of sequence lengths.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.
    finished: A boolean tensor of shape `[batch_size, beam_width, vocab_size]`
      that specifies which elements in the beam are finished already.
    accumulated_attention_probs: Accumulated attention probabilities up to the
      current time step, with shape `[batch_size, beam_width, max_time]` if
      coverage_penalty_weight is not 0.0.

  Returns:
    The scores normalized by the length_penalty and coverage_penalty.

  Raises:
    ValueError: accumulated_attention_probs is None when coverage penalty is
      enabled.
  """
    length_penalty_ = _length_penalty(sequence_lengths=sequence_lengths,
                                      penalty_factor=length_penalty_weight,
                                      dtype=log_probs.dtype)

    if coverage_penalty_weight == 0.0:
        return tf.where(finished, log_probs / length_penalty_, log_probs)

    coverage_penalty_weight = tf.convert_to_tensor(
        coverage_penalty_weight,
        name="coverage_penalty_weight",
        dtype=log_probs.dtype)
    if coverage_penalty_weight.shape.ndims != 0:
        raise ValueError("coverage_penalty_weight should be a scalar, "
                         "but saw shape: %s" % coverage_penalty_weight.shape)

    if accumulated_attention_probs is None:
        raise ValueError(
            "accumulated_attention_probs can be None only if coverage penalty is "
            "disabled.")

    # Add source sequence length mask before computing coverage penalty.
    accumulated_attention_probs = tf.where(
        tf.equal(accumulated_attention_probs, 0.0),
        tf.ones_like(accumulated_attention_probs), accumulated_attention_probs)

    # coverage penalty =
    #     sum over `max_time` {log(min(accumulated_attention_probs, 1.0))}
    coverage_penalty = tf.reduce_sum(
        tf.log(tf.minimum(accumulated_attention_probs, 1.0)), 2)
    # Apply coverage penalty to finished predictions.
    weighted_coverage_penalty = coverage_penalty * coverage_penalty_weight
    # Reshape from [batch_size, beam_width] to [batch_size, beam_width, 1]
    weighted_coverage_penalty = tf.expand_dims(weighted_coverage_penalty, 2)

    # Normalize the scores of finished predictions.
    return tf.where(finished,
                    log_probs / length_penalty_ + weighted_coverage_penalty,
                    log_probs)
    def _build_outputs(self, images, labels, mode):
        is_training = mode == mode_keys.TRAIN
        model_outputs = {}

        if 'anchor_boxes' in labels:
            anchor_boxes = labels['anchor_boxes']
        else:
            anchor_boxes = anchor.Anchor(
                self._params.architecture.min_level,
                self._params.architecture.max_level,
                self._params.anchor.num_scales,
                self._params.anchor.aspect_ratios,
                self._params.anchor.anchor_size,
                images.get_shape().as_list()[1:3]).multilevel_boxes

            batch_size = tf.shape(images)[0]
            for level in anchor_boxes:
                anchor_boxes[level] = tf.tile(
                    tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1])

        backbone_features = self._backbone_fn(images, is_training)
        fpn_features = self._fpn_fn(backbone_features, is_training)

        rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn(
            fpn_features, is_training)
        model_outputs.update({
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
        })
        rpn_rois, _ = self._generate_rois_fn(rpn_box_outputs,
                                             rpn_score_outputs, anchor_boxes,
                                             labels['image_info'][:, 1, :],
                                             is_training)

        if is_training:
            rpn_rois = tf.stop_gradient(rpn_rois)

            # Sample proposals.
            rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = (
                self._sample_rois_fn(rpn_rois, labels['gt_boxes'],
                                     labels['gt_classes']))

            # Create bounding box training targets.
            box_targets = box_utils.encode_boxes(
                matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0])
            # If the target is background, the box target is set to all 0s.
            box_targets = tf.where(
                tf.tile(
                    tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
                    [1, 1, 4]), tf.zeros_like(box_targets), box_targets)
            model_outputs.update({
                'class_targets': matched_gt_classes,
                'box_targets': box_targets,
            })

        roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=7)

        class_outputs, box_outputs = self._frcnn_head_fn(
            roi_features, is_training)
        model_outputs.update({
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
        })

        if not is_training:
            detection_results = self._generate_detections_fn(
                box_outputs, class_outputs, rpn_rois,
                labels['image_info'][:, 1:2, :])
            model_outputs.update(detection_results)

        if not self._include_mask:
            return model_outputs

        if is_training:
            rpn_rois, classes, mask_targets, gather_nd_gt_indices = self._sample_masks_fn(
                rpn_rois, matched_gt_boxes, matched_gt_classes,
                matched_gt_indices, labels['gt_masks'])
            mask_targets = tf.stop_gradient(mask_targets)

            classes = tf.cast(classes, dtype=tf.int32)

            model_outputs.update({
                'mask_targets': mask_targets,
                'sampled_class_targets': classes,
            })
        else:
            rpn_rois = detection_results['detection_boxes']
            classes = tf.cast(detection_results['detection_classes'],
                              dtype=tf.int32)

        mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=14)

        mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes,
                                           is_training)

        if is_training:
            model_outputs.update({
                'mask_outputs': mask_outputs,
            })
        else:
            model_outputs.update(
                {'detection_masks': tf.nn.sigmoid(mask_outputs)})

        if not self._include_attributes:
            return model_outputs

        attribute_outputs = self._attributes_head_fn(mask_roi_features,
                                                     is_training)

        if is_training:
            attribute_targets = tf.gather_nd(
                labels['gt_attributes'],
                gather_nd_gt_indices)  # [batch, K, num_attributes]

            model_outputs.update({
                'attribute_outputs': attribute_outputs,
                'attribute_targets': attribute_targets,
            })
        else:
            model_outputs['detection_attributes'] = tf.nn.sigmoid(
                attribute_outputs)

        return model_outputs
Exemplo n.º 9
0
def linear_classifier(x_train, y_train, x_test, y_test, num_classes,
                      learning_rate, iterations):
    """
        Define and train linear classifier for MNIST classification

        :param  x_train  nr x num_features training data
        :param  y_train  nr x int label data
        :param  x_test   nr x num_features test data
        :param  y_test   nr x int label data
        :param  num_classes     number of classes to classify
        :param  learning_rate
        :param  iterations
        (This is a mini exercise. Let us just train one epoch.)

        :return accuracy of classification tr_acc, ts_acc

    """

    # x_train and x_test are HoG features.
    num_features = x_train.shape[1]

    # Build a network
    tf.disable_eager_execution()
    tf.reset_default_graph()

    x = tf.placeholder(tf.float32, shape=[None, num_features], name="images")
    y_ = tf.placeholder(tf.int32, shape=[None, num_classes], name="labels")

    w = tf.get_variable("weights", shape=[num_features,
                                          num_classes])  # default initializer
    b = tf.get_variable("offsets", shape=[1, num_classes])

    y_hat = tf.matmul(x, w) + b

    correct_labels = tf.argmax(y_, axis=1, output_type=tf.int32)
    predicted_labels = tf.argmax(y_hat, axis=1, output_type=tf.int32)

    # type change of correct_prediction into float -> loss computation.
    correct_prediction = tf.cast(tf.equal(correct_labels, predicted_labels),
                                 tf.float32)
    # tf.reduce_sum(correct_prediction, axis = None)
    accuracy = tf.reduce_sum(correct_prediction) / tf.cast(
        tf.shape(correct_prediction)[0], tf.float32)

    # Loss : L2 Norm
    # loss = tf.reduce_mean(tf.nn.l2_loss(y_hat - tf.cast(y_, tf.float32)))

    # Loss : Cross-Entropy
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.cast(y_, tf.float32), logits=y_hat),
                          name='loss')
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss)

    with tf.Session() as sess:
        # one epoch
        sess.run(tf.global_variables_initializer())
        # iterations.
        for i in range(iterations):
            y = one_hot_encoder(y_train[i], num_classes)

            # feed_dict: set the dimension of feed_dict correctly.
            # e.g. np.array([np.object])
            # train the classifier with images one by one(SGD), no batches
            sess.run((train_step, loss, accuracy),
                     feed_dict={
                         x: np.array([x_train[i]]),
                         y_: np.array([y])
                     })

        training_accuracy = accuracy.eval(feed_dict={
            x: x_train,
            y_: one_hot_mat(y_train)
        })
        testing_accuracy = accuracy.eval(feed_dict={
            x: x_test,
            y_: one_hot_mat(y_test)
        })

        # Return training and testing error rate
        return (training_accuracy, testing_accuracy)
                     use_relu=False) 

layer_fc3 = create_fc_layer(input=layer_fc2,
                     num_inputs=fc_layer_size,
                     num_outputs=num_classes,
                     use_relu=False) 

y_pred = tf.nn.softmax(layer_fc3,name='y_pred')

y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc3,
                                                    labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


session.run(tf.global_variables_initializer()) 


def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%},  Validation Loss: {3:.3f}"
    print(msg.format(epoch + 1, acc, val_acc, val_loss))

total_iterations = 0

saver = tf.train.Saver()
Exemplo n.º 11
0
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)
 
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
 
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
 
print('Learning Finished!')
 
# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels}))
 
# Get one and predict
r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1]}))
 
 
'''
Epoch: 0001 cost = 0.301498963
Epoch: 0002 cost = 0.107252513
Epoch: 0003 cost = 0.064888892
Exemplo n.º 12
0
    def model_fn(features, labels, mode, params):
        """Builds the model from the input features."""
        del params  # Unused
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # Store auxiliary activations increasing in depth of network. First
        # activation occurs immediately after the stem and the others immediately
        # follow each stack.
        aux_activations = []

        # Initial stem convolution
        with tf.variable_scope('stem'):
            net = base_ops.conv_bn_relu(features, 3,
                                        config['stem_filter_size'],
                                        is_training, config['data_format'])
            aux_activations.append(net)

        for stack_num in range(config['num_stacks']):
            channels = net.get_shape()[channel_axis].value

            # Downsample at start (except first)
            if stack_num > 0:
                net = tf.layers.max_pooling2d(
                    inputs=net,
                    pool_size=(2, 2),
                    strides=(2, 2),
                    padding='same',
                    data_format=config['data_format'])

                # Double output channels each time we downsample
                channels *= 2

            with tf.variable_scope('stack{}'.format(stack_num)):
                for module_num in range(config['num_modules_per_stack']):
                    with tf.variable_scope('module{}'.format(module_num)):
                        net = build_module(spec,
                                           inputs=net,
                                           channels=channels,
                                           is_training=is_training)
                aux_activations.append(net)

        # Global average pool
        if config['data_format'] == 'channels_last':
            net = tf.reduce_mean(net, [1, 2])
        elif config['data_format'] == 'channels_first':
            net = tf.reduce_mean(net, [2, 3])
        else:
            raise ValueError('invalid data_format')

        # Fully-connected layer to labels
        logits = tf.layers.dense(inputs=net, units=config['num_labels'])

        if mode == tf.estimator.ModeKeys.PREDICT and not config['use_tpu']:
            # It is a known limitation of Estimator that the labels
            # are not passed during PREDICT mode when running on CPU/GPU
            # (https://github.com/tensorflow/tensorflow/issues/17824), thus we cannot
            # compute the loss or anything dependent on it (i.e., the gradients).
            loss = tf.constant(0.0)
        else:
            loss = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(
                labels, config['num_labels']),
                                                   logits=logits)

            loss += config['weight_decay'] * tf.add_n(
                [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

        # Use inference mode to compute some useful metrics on a fixed sample
        # Due to the batch being sharded on TPU, these metrics should be run on CPU
        # only to ensure that the metrics are computed on the whole batch. We add a
        # leading dimension because PREDICT expects batch-shaped tensors.
        if mode == tf.estimator.ModeKeys.PREDICT:
            parameter_norms = {
                'param:' + tensor.name: tf.expand_dims(tf.norm(tensor, ord=2),
                                                       0)
                for tensor in tf.trainable_variables()
            }

            # Compute gradients of all parameters and the input simultaneously
            all_params_names = []
            all_params_tensors = []
            for tensor in tf.trainable_variables():
                all_params_names.append('param_grad_norm:' + tensor.name)
                all_params_tensors.append(tensor)
            all_params_names.append('input_grad_norm')
            all_params_tensors.append(features)

            grads = tf.gradients(loss, all_params_tensors)

            param_gradient_norms = {}
            for name, grad in list(zip(all_params_names, grads))[:-1]:
                if grad is not None:
                    param_gradient_norms[name] = (tf.expand_dims(
                        tf.norm(grad, ord=2), 0))
                else:
                    param_gradient_norms[name] = (tf.expand_dims(
                        tf.constant(0.0), 0))

            if grads[-1] is not None:
                input_grad_norm = tf.sqrt(
                    tf.reduce_sum(tf.square(grads[-1]), axis=[1, 2, 3]))
            else:
                input_grad_norm = tf.expand_dims(tf.constant(0.0), 0)

            covariance_matrices = {
                'cov_matrix_%d' % i: tf.expand_dims(_covariance_matrix(aux), 0)
                for i, aux in enumerate(aux_activations)
            }

            predictions = {
                'logits': logits,
                'loss': tf.expand_dims(loss, 0),
                'input_grad_norm': input_grad_norm,
            }
            predictions.update(parameter_norms)
            predictions.update(param_gradient_norms)
            predictions.update(covariance_matrices)

            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   predictions=predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            global_step = tf.train.get_or_create_global_step()
            base_lr = config['learning_rate']
            if config['use_tpu']:
                base_lr *= config['tpu_num_shards']

            if config['lr_decay_method'] == 'COSINE_BY_STEP':
                total_steps = int(config['train_epochs'] * num_train_images /
                                  config['batch_size'])
                progress_fraction = tf.cast(global_step,
                                            tf.float32) / total_steps
                learning_rate = (0.5 * base_lr *
                                 (1 + tf.cos(np.pi * progress_fraction)))

            elif config['lr_decay_method'] == 'COSINE_BY_TIME':
                # Requires training_time.limit hooks to be added to Estimator
                elapsed_time = tf.cast(training_time.get_total_time(),
                                       dtype=tf.float32)
                progress_fraction = elapsed_time / config['train_seconds']
                learning_rate = (0.5 * base_lr *
                                 (1 + tf.cos(np.pi * progress_fraction)))

            elif config['lr_decay_method'] == 'STEPWISE':
                # divide LR by 10 at 1/2, 2/3, and 5/6 of total epochs
                total_steps = (config['train_epochs'] * num_train_images /
                               config['batch_size'])
                boundaries = [
                    int(0.5 * total_steps),
                    int(0.667 * total_steps),
                    int(0.833 * total_steps)
                ]
                values = [
                    1.0 * base_lr, 0.1 * base_lr, 0.01 * base_lr,
                    0.0001 * base_lr
                ]
                learning_rate = tf.train.piecewise_constant(
                    global_step, boundaries, values)

            else:
                raise ValueError('invalid lr_decay_method')

            # Set LR to 0 for step 0 to initialize the weights without training
            learning_rate = tf.where(tf.equal(global_step, 0), 0.0,
                                     learning_rate)

            optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                                  momentum=config['momentum'],
                                                  epsilon=1.0)
            if config['use_tpu']:
                optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

            # Update ops required for batch norm moving variables
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(loss, global_step)

            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   loss=loss,
                                                   train_op=train_op)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(labels, logits):
                predictions = tf.argmax(logits, axis=1)
                accuracy = tf.metrics.accuracy(labels, predictions)

                return {'accuracy': accuracy}

            eval_metrics = (metric_fn, [labels, logits])

            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   loss=loss,
                                                   eval_metrics=eval_metrics)
Exemplo n.º 13
0
    def decode(self, serialized_example):
        """Decode the serialized example.

    Args:
      serialized_example: a single serialized tf.Example string.

    Returns:
      decoded_tensors: a dictionary of tensors with the following fields:
        - image: a uint8 tensor of shape [None, None, 3].
        - source_id: a string scalar tensor.
        - height: an integer scalar tensor.
        - width: an integer scalar tensor.
        - groundtruth_classes: an int64 tensor of shape [None].
        - groundtruth_is_crowd: a bool tensor of shape [None].
        - groundtruth_area: a float32 tensor of shape [None].
        - groundtruth_boxes: a float32 tensor of shape [None, 4].
        - groundtruth_instance_masks: a float32 tensor of shape
            [None, None, None].
        - groundtruth_instance_masks_png: a string tensor of shape [None].
    """
        parsed_tensors = tf.io.parse_single_example(serialized_example,
                                                    self._keys_to_features)
        for k in parsed_tensors:
            if isinstance(parsed_tensors[k], tf.SparseTensor):
                if parsed_tensors[k].dtype == tf.string:
                    parsed_tensors[k] = tf.sparse_tensor_to_dense(
                        parsed_tensors[k], default_value='')
                else:
                    parsed_tensors[k] = tf.sparse_tensor_to_dense(
                        parsed_tensors[k], default_value=0)

        image = self._decode_image(parsed_tensors)
        boxes = self._decode_boxes(parsed_tensors)
        areas = self._decode_areas(parsed_tensors)

        decode_image_shape = tf.logical_or(
            tf.equal(parsed_tensors['image/height'], -1),
            tf.equal(parsed_tensors['image/width'], -1))
        image_shape = tf.cast(tf.shape(image), dtype=tf.int64)

        parsed_tensors['image/height'] = tf.where(
            decode_image_shape, image_shape[0], parsed_tensors['image/height'])
        parsed_tensors['image/width'] = tf.where(decode_image_shape,
                                                 image_shape[1],
                                                 parsed_tensors['image/width'])

        is_crowds = tf.cond(
            tf.greater(
                tf.shape(parsed_tensors['image/object/is_crowd'])[0],
                0), lambda: tf.cast(parsed_tensors['image/object/is_crowd'],
                                    dtype=tf.bool),
            lambda: tf.zeros_like(parsed_tensors[self._label_key],
                                  dtype=tf.bool))
        if self._regenerate_source_id:
            source_id = _get_source_id_from_encoded_image(parsed_tensors)
        else:
            source_id = tf.cond(
                tf.greater(
                    tf.strings.length(parsed_tensors['image/source_id']),
                    0), lambda: parsed_tensors['image/source_id'],
                lambda: _get_source_id_from_encoded_image(parsed_tensors))
        if self._include_mask:
            masks = self._decode_masks(parsed_tensors)

        groundtruth_classes = parsed_tensors[self._label_key]
        decoded_tensors = {
            'image': image,
            'source_id': source_id,
            'height': parsed_tensors['image/height'],
            'width': parsed_tensors['image/width'],
            'groundtruth_classes': groundtruth_classes,
            'groundtruth_is_crowd': is_crowds,
            'groundtruth_area': areas,
            'groundtruth_boxes': boxes,
        }
        if self._include_mask:
            decoded_tensors.update({
                'groundtruth_instance_masks':
                masks,
                'groundtruth_instance_masks_png':
                parsed_tensors['image/object/mask'],
            })
        return decoded_tensors
Exemplo n.º 14
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, genre,
                                 is_training, gold_starts, gold_ends,
                                 cluster_ids):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            # [num_sentences, max_sentence_length, max_word_length, emb]
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index)

            # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])

            # [num_sentences * max_sentence_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"])

            # [num_sentences, max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])

        # [num_sentences * max_sentence_length * emb, 1]
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(self.lm_weights, 1))
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        # [num_sentences, max_sentence_length, emb]
        context_emb = tf.concat(context_emb_list, 2)
        # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list, 2)
        # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(context_emb, self.lexical_dropout)
        # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(head_emb, self.lexical_dropout)

        # [num_sentence, max_sentence_length]
        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        # [num_sentences, max_sentence_length]
        sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1),
                                   [1, max_sentence_length])
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]
        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + \
            tf.expand_dims(tf.range(self.max_span_width),
                           0)  # [num_words, max_span_width]
        # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices, candidate_starts)
        # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends, num_words - 1))
        # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(candidate_start_sentence_indices,
                     candidate_end_sentence_indices))
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        # [num_candidates]
        candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]),
                                           flattened_candidate_mask)
        # [num_candidates]
        candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]),
                                         flattened_candidate_mask)
        # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)
        # [num_candidates]
        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)
        # [num_candidates, emb]
        candidate_span_emb = self.get_span_emb(flattened_head_emb,
                                               context_outputs,
                                               candidate_starts,
                                               candidate_ends)
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        if self.config['use_gold']:
            candidates_spans = tf.stack([candidate_starts, candidate_ends],
                                        axis=1)
            gold_spans = tf.stack([gold_starts, gold_ends], axis=1)
            same_span = tf.equal(tf.expand_dims(gold_spans, 1),
                                 tf.expand_dims(candidates_spans, 0))
            top_span_indices = tf.reduce_any(tf.reduce_all(same_span, axis=2),
                                             axis=0)
            top_span_indices = tf.squeeze(tf.where(top_span_indices), axis=1)
            k = tf.cast(util.shape(top_span_indices, 0), tf.int32)

        else:
            k = tf.to_int32(
                tf.floor(
                    tf.to_float(tf.shape(context_outputs)[0]) *
                    self.config["top_span_ratio"]))
            top_span_indices = coref_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(candidate_starts, 0),
                tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                util.shape(context_outputs, 0), True)  # [1, k]
            top_span_indices.set_shape([1, None])
            top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]

        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            (top_antecedents, top_antecedents_mask, top_fast_antecedent_scores,
             top_antecedent_offsets) = self.coarse_to_fine_pruning(
                 top_span_emb, top_span_mention_scores, c)
        else:
            (top_antecedents, top_antecedents_mask, top_fast_antecedent_scores,
             top_antecedent_offsets) = self.distance_pruning(
                 top_span_emb, top_span_mention_scores, c)
        dummy_scores_nomention = tf.expand_dims(
            top_span_mention_scores * -1, 1)  # tf.zeros([k, 1])  # [k, 1]
        dummy_scores_first = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = (
                    top_fast_antecedent_scores +
                    self.get_slow_antecedent_scores(
                        top_span_emb, top_antecedents, top_antecedent_emb,
                        top_antecedent_offsets, genre_emb))  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([
                        dummy_scores_nomention, dummy_scores_first,
                        top_antecedent_scores
                    ], 1))  # [k, c + 2]
                top_antecedent_emb = tf.concat([
                    tf.expand_dims(top_span_emb, 1),
                    tf.expand_dims(top_span_emb, 1), top_antecedent_emb
                ], 1)  # [k, c + 1, emb]
                # [k, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + \
                        (1 - f) * top_span_emb  # [k, emb]
        # [k, c + 2]
        top_antecedent_scores = tf.concat([
            dummy_scores_nomention, dummy_scores_first, top_antecedent_scores
        ], 1)

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels_nomention = tf.expand_dims(
            tf.equal(top_span_cluster_ids, 0), 1)  # [k, 1]
        dummy_labels_first = tf.logical_not(
            tf.reduce_any(tf.concat([dummy_labels_nomention, pairwise_labels],
                                    1),
                          1,
                          keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat(
            [dummy_labels_nomention, dummy_labels_first, pairwise_labels],
            1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Exemplo n.º 15
0
def App_Run():
    # def load_variables_from_checkpoint(sess, start_checkpoint):
    #     """Utility function to centralize checkpoint restoration.
    #     Args:
    #         sess: TensorFlow session.
    #         start_checkpoint: Path to saved checkpoint on disk.
    #     """
    #     saver = tf.train.Saver(tf.global_variables())
    #     saver.restore(sess, start_checkpoint)
    # train_log_f = open("./training_log/t_log_{}")
    data_file = "../train_data/20200605/shuffled_train_data.npy"
    lbl_file = "../train_data/20200605/shuffled_train_label.npy"
    val_data_file = "../train_data/20200605/shuffled_val_train_data.npy"
    val_lbl_file = "../train_data/20200605/shuffled_val_train_label.npy"
    x_train = np.load(data_file, allow_pickle=True)
    y_train = np.load(lbl_file, allow_pickle=True)
    # y_train = tf.keras.utils.to_categorical(y_train)
    # x_test = np.load(val_data_file,allow_pickle=True)
    # y_test = np.load(val_lbl_file,allow_pickle=True)
    # y_test = tf.keras.utils.to_categorical(y_test)
    X = tf.placeholder(tf.float32, [None, n_band])
    Y = tf.placeholder(tf.int8, [None, n_classes])
    logits = create_neural_net(X)
    prediction = tf.nn.softmax(logits)
    loss_op = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    error_train = []
    error_test = []
    weight1 = []
    weight2 = []
    weight3 = []
    weight4 = []
    bias1 = []
    bias2 = []
    bias3 = []
    bias4 = []
    init = tf.global_variables_initializer()
    acc_now = 0
    epochnum = 3
    ckpt_file_path = "../training_ckpt/weights_improvement_{}-{}.ckpt"
    '''Add ops to save and restore all the variables.'''
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)
        '''    Restore variables from disk.    '''
        #    checkpoint = tf.train.latest_checkpoint("checkpoints/checkpoints_1017_256")
        #    saver.restore(sess, checkpoint)

        # ckpt_save_point = 100
        # run_count = 0
        current_epoch = 0
        for epoch in range(epochnum):
            current_epoch = epoch
            for step in range(100):
                batch_x, batch_y = next_batch(batch_size, x_train, y_train)
                peek_train_data(batch_x, batch_y)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
                # valid_flag = exam_train_data(x_n, y_n)
                # if valid_flag:
                #     sess.run(train_op, feed_dict={x: x_n, y: y_n})
                #     # run_count += 1
                # else: continue
                if step % display_step == 0 or step == 1:
                    # Calculate batch loss and accuracy
                    loss, acc = sess.run([loss_op, accuracy],
                                         feed_dict={
                                             X: batch_x,
                                             Y: batch_y
                                         })
                    estimated_pred = sess.run(prediction,
                                              feed_dict={
                                                  x: x_train,
                                                  y: y_train
                                              })
            # acc1 = sess.run(accuracy, feed_dict={x: x_test, y: y_test})

            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

            print("epoch", epoch)
            print("train : ", acc)
            print("test : ", acc1)

            error_train.append(acc)
            error_test.append(acc1)

            if acc > acc_now:
                acc_now = acc
                weight1 = w['hidden1'].eval(sess)
                weight2 = w['hidden2'].eval(sess)
                weight3 = w['hidden3'].eval(sess)
                weight4 = w['output'].eval(sess)
                bias1 = b['hidden1'].eval(sess)
                bias2 = b['hidden2'].eval(sess)
                bias3 = b['hidden3'].eval(sess)
                bias4 = b['output'].eval(sess)
                spio.savemat(
                    'kws_weights/w_3layer128.mat', {
                        'w1': weight1,
                        'w2': weight2,
                        'w3': weight3,
                        'w4': weight4,
                        'b1': bias1,
                        'b2': bias2,
                        'b3': bias3,
                        'b4': bias4
                    })
                saver.save(sess, ckpt_file_path.format(current_epoch, acc_now))
Exemplo n.º 16
0
# 학습
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(cost)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

for step in range(100):
    sess.run(train_op, feed_dict={X: x_data, Y: y_data})

    if (step + 1) % 10 == 0:
        print(step + 1, sess.run(cost, feed_dict={X: x_data, Y: y_data}))

# 학습된 결과 확인
# argmax는 요소 중 가장 큰 값을 골라줌
prediction = tf.argmax(model, axis=1)
target = tf.argmax(Y, axis=1)
print("prediction=", sess.run(prediction, feed_dict={X: x_data}))
print("target=", sess.run(target, feed_dict={Y: y_data}))

# 정확도 측정
is_correct = tf.equal(prediction, target)
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("accuracy=%0.2f" %
      sess.run(accuracy * 100, feed_dict={
          X: x_data,
          Y: y_data
      }))
Exemplo n.º 17
0
w0 = tf.Variable(tf.zeros([300, 10]))
b0 = tf.Variable(tf.zeros([10]))
k = tf.matmul(hidden2, w0) + b0  # k는 소프트맥스 층을 적용하기 전의 값
p = tf.nn.softmax(k)

# define loss (cost) function
# 비용 함수 정의
t = tf.placeholder(tf.float32, [None, 10])  # 플레이스 홀더로 정의. 나중에 학습 데이터 셋에서 읽을 라벨
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=k, labels=t)
)  # tf.nn.softmax_cross_entropy_with_logits 함수는 softmax가 포함되어 있는 함수
train_step = tf.train.AdamOptimizer(0.0001).minimize(
    loss)  # 비용 함수를 최적화 하기 위해서 최적화 함수 AdamOptimizer 사용

# 정확도 계산 함수
correct_prediction = tf.equal(tf.argmax(p, 1), tf.argmax(
    t, 1))  # 학습 결과와 입력된 라벨(정답)을 비교하여 맞았는지 틀렸는지를 리턴
# argmax는 인자에서 가장 큰 값의 인덱스를 리턴함. 0~9 배열이 들어가 있기 때문에 가장 큰 값이 학습에 의해 예측된 숫자
# p는 예측의 결과값, t는 학습의 결과(라벨)값. 두 값을 비교하여 가장 큰 값이 있는 인덱스가 일치하면 예측이 성공한 것
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# correct_predication이 bool 값이기 때문에 이 값을 숫자로 바꾸고 저장
tf.summary.scalar('accuracy', accuracy)  # 정확도 모니터링을 위해 accuracy 사용

# 텐서보드
summary_init = tf.summary.merge_all()  # summary 사용을 위한 초기화

# prepare session
# 학습 세션을 시작하고 변수를 초기화
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
Exemplo n.º 18
0
def draw_samples(alpha, scale):
    r"""Draw samples from the robust distribution.

  This function implements Algorithm 1 the paper. This code is written to allow
  for sampling from a set of different distributions, each parametrized by its
  own alpha and scale values, as opposed to the more standard approach of
  drawing N samples from the same distribution. This is done by repeatedly
  performing N instances of rejection sampling for each of the N distributions
  until at least one proposal for each of the N distributions has been accepted.
  All samples are drawn with a zero mean, to use a non-zero mean just add each
  mean to each sample.

  Args:
    alpha: A TF tensor/scalar or numpy array/scalar of floats where each element
      is the shape parameter of that element's distribution.
    scale: A TF tensor/scalar or numpy array/scalar of floats where each element
      is the scale parameter of that element's distribution. Must be the same
      shape as `alpha`.

  Returns:
    A TF tensor with the same shape and precision as `alpha` and `scale` where
    each element is a sample drawn from the distribution specified for that
    element by `alpha` and `scale`.
  """
    # `scale` must have the same type as `alpha`.
    float_dtype = alpha.dtype
    tf.assert_type(scale, float_dtype)
    assert_ops = [
        # `scale` must be > 0.
        tf.Assert(tf.reduce_all(scale > 0.), [scale]),
        # `alpha` must be >= 0.
        tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]),
        # `alpha` and `scale` must have the same shape.
        tf.Assert(tf.reduce_all(tf.equal(tf.shape(alpha), tf.shape(scale))),
                  [tf.shape(alpha), tf.shape(scale)]),
    ]

    with tf.control_dependencies(assert_ops):
        shape = tf.shape(alpha)

        # The distributions we will need for rejection sampling. The sqrt(2) scaling
        # of the Cauchy distribution corrects for our differing conventions for
        # standardization.
        cauchy = tfp.distributions.Cauchy(loc=0., scale=tf.sqrt(2.))
        uniform = tfp.distributions.Uniform(low=0., high=1.)

        def while_cond(_, accepted):
            """Terminate the loop only when all samples have been accepted."""
            return ~tf.reduce_all(accepted)

        def while_body(samples, accepted):
            """Generate N proposal samples, and then perform rejection sampling."""
            # Draw N samples from a Cauchy, our proposal distribution.
            cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype)

            # Compute the likelihood of each sample under its target distribution.
            nll = nllfun(cauchy_sample, alpha, tf.cast(1, float_dtype))
            # Bound the NLL. We don't use the approximate loss as it may cause
            # unpredictable behavior in the context of sampling.
            nll_bound = general.lossfun(
                cauchy_sample,
                tf.cast(0, float_dtype),
                tf.cast(1, float_dtype),
                approximate=False) + log_base_partition_function(alpha)

            # Draw N samples from a uniform distribution, and use each uniform sample
            # to decide whether or not to accept each proposal sample.
            uniform_sample = tf.cast(uniform.sample(shape), float_dtype)
            accept = uniform_sample <= tf.math.exp(nll_bound - nll)

            # If a sample is accepted, replace its element in `samples` with the
            # proposal sample, and set its bit in `accepted` to True.
            samples = tf.where(accept, cauchy_sample, samples)
            accepted = accept | accepted
            return (samples, accepted)

        # Initialize the loop. The first item does not matter as it will get
        # overwritten, the second item must be all False.
        while_loop_vars = (tf.zeros(shape,
                                    float_dtype), tf.zeros(shape, dtype=bool))

        # Perform rejection sampling until all N samples have been accepted.
        terminal_state = tf.while_loop(cond=while_cond,
                                       body=while_body,
                                       loop_vars=while_loop_vars)

        # Because our distribution is a location-scale family, we sample from
        # p(x | 0, \alpha, 1) and then scale each sample by `scale`.
        samples = tf.multiply(terminal_state[0], scale)

        return samples
Exemplo n.º 19
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight, max_tgt):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.
    max_tgt: maximum prediction length.

  Returns:
    A new beam state.
  """

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished
    not_finished = tf.logical_not(previously_finished)

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = tf.nn.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = tf.expand_dims(beam_state.log_probs, 2) + step_log_probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or tf.shape(logits)[-1]
    lengths_to_add = tf.one_hot(indices=tf.fill([batch_size, beam_width],
                                                end_token),
                                depth=vocab_size,
                                on_value=np.int64(0),
                                off_value=np.int64(1),
                                dtype=tf.int64)
    add_mask = tf.to_int64(not_finished)
    lengths_to_add *= tf.expand_dims(add_mask, 2)
    new_prediction_lengths = (lengths_to_add +
                              tf.expand_dims(prediction_lengths, 2))

    # Calculate the accumulated attention probabilities if coverage penalty is
    # enabled.
    accumulated_attention_probs = None
    attention_probs = get_attention_probs(next_cell_state,
                                          coverage_penalty_weight)
    if attention_probs is not None:
        attention_probs *= tf.expand_dims(
            tf.cast(not_finished, attention_probs.dtype), 2)
        accumulated_attention_probs = (beam_state.accumulated_attention_probs +
                                       attention_probs)

    batch_finished = tf.reduce_all(previously_finished, axis=1, keepdims=True)
    any_batch_finished = tf.reduce_any(batch_finished)
    batch_finished = tf.tile(tf.expand_dims(batch_finished, 2),
                             [1, beam_width, vocab_size])

    def _normalized_scores():
        return _get_scores(
            log_probs=total_probs,
            sequence_lengths=new_prediction_lengths,
            length_penalty_weight=length_penalty_weight,
            coverage_penalty_weight=coverage_penalty_weight,
            finished=batch_finished,
            accumulated_attention_probs=accumulated_attention_probs)

    # Normalize the scores of finished batches.
    scores = tf.cond(any_batch_finished, _normalized_scores,
                     lambda: total_probs)

    time = tf.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_flat = tf.reshape(scores, [batch_size, -1])

    # Pick the next beams according to the specified successors function
    next_beam_scores, word_indices = top_k_with_unique(scores_flat, beam_width)

    next_beam_scores.set_shape([batch_size, beam_width])
    word_indices.set_shape([batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    batch_ids = tf.expand_dims(
        tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, beam_width]), 2)
    indices = tf.concat([batch_ids, tf.expand_dims(word_indices, 2)], -1)
    next_beam_probs = tf.gather_nd(tf.reshape(total_probs, [batch_size, -1]),
                                   indices)

    # Note: just doing the following
    #   tf.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = tf.mod(word_indices,
                               vocab_size,
                               name="next_beam_word_ids")
    next_word_ids = tf.to_int32(raw_next_word_ids)
    next_beam_ids = tf.div(word_indices,
                           vocab_size,
                           name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = tf.logical_or(previously_finished,
                                  tf.equal(next_word_ids, end_token),
                                  name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged.
    # 2. Beams that are now finished (EOS predicted) have their length
    #    increased by 1.
    # 3. Beams that are not yet finished have their length increased by 1.
    lengths_to_add = tf.to_int64(tf.logical_not(previously_finished))
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add
    next_accumulated_attention_probs = ()
    if accumulated_attention_probs is not None:
        next_accumulated_attention_probs = _tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=accumulated_attention_probs,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1],
            name="next_accumulated_attention_probs")
    next_pred_ids = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=beam_state.pred_ids,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[batch_size * beam_width, -1],
        name="pred_ids")

    # Add next_word_ids to next_pred_ids.
    next_pred_ids = tf.transpose(next_pred_ids, [2, 0, 1])
    cur_time = tf.tile(tf.reshape(time, [1]), [max_tgt])
    time_mask = tf.equal(tf.range(max_tgt), cur_time)
    time_mask = tf.tile(tf.reshape(time_mask, [max_tgt, 1, 1]),
                        [1, batch_size, beam_width])
    cur_time_ids = tf.tile(
        tf.reshape(next_word_ids, [1, batch_size, beam_width]),
        [max_tgt, 1, 1])
    next_pred_ids = tf.where(time_mask, cur_time_ids, next_pred_ids)
    next_pred_ids = tf.transpose(next_pred_ids, [1, 2, 0])

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = contrib_framework.nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(
        cell_state=next_cell_state,
        log_probs=next_beam_probs,
        lengths=next_prediction_len,
        finished=next_finished,
        accumulated_attention_probs=next_accumulated_attention_probs,
        pred_ids=next_pred_ids)

    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Exemplo n.º 20
0
    def build():
        """Builds the Tensorflow graph."""
        inputs, lengths = None, None

        if mode in ('train', 'eval'):
            inputs, _, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        cell = events_rnn_graph.make_rnn_cell(
            hparams.rnn_layer_sizes,
            dropout_keep_prob=hparams.dropout_keep_prob
            if mode == 'train' else 1.0,
            attn_length=hparams.attn_length,
            residual_connections=hparams.residual_connections)

        rnn_nade = RnnNade(cell,
                           num_dims=input_size,
                           num_hidden=hparams.nade_hidden_units)

        if mode in ('train', 'eval'):
            log_probs, cond_probs = rnn_nade.log_prob(inputs, lengths)

            inputs_flat = tf.to_float(
                magenta.common.flatten_maybe_padded_sequences(inputs, lengths))
            predictions_flat = tf.to_float(tf.greater_equal(cond_probs, .5))

            if mode == 'train':
                loss = tf.reduce_mean(-log_probs)
                perplexity = tf.reduce_mean(tf.exp(log_probs))
                correct_predictions = tf.to_float(
                    tf.equal(inputs_flat, predictions_flat))
                accuracy = tf.reduce_mean(correct_predictions)
                precision = (tf.reduce_sum(inputs_flat * predictions_flat) /
                             tf.reduce_sum(predictions_flat))
                recall = (tf.reduce_sum(inputs_flat * predictions_flat) /
                          tf.reduce_sum(inputs_flat))

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = contrib_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/precision': precision,
                    'metrics/recall': recall,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(-log_probs),
                        'metrics/perplexity':
                        tf.metrics.mean(tf.exp(log_probs)),
                        'metrics/accuracy':
                        tf.metrics.accuracy(inputs_flat, predictions_flat),
                        'metrics/precision':
                        tf.metrics.precision(inputs_flat, predictions_flat),
                        'metrics/recall':
                        tf.metrics.recall(inputs_flat, predictions_flat),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

            precision = vars_to_summarize['metrics/precision']
            recall = vars_to_summarize['metrics/precision']
            f1_score = tf.where(
                tf.greater(precision + recall, 0),
                2 * ((precision * recall) / (precision + recall)), 0)
            vars_to_summarize['metrics/f1_score'] = f1_score
            for var_name, var_value in vars_to_summarize.items():
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            initial_state = rnn_nade.zero_state(hparams.batch_size)

            final_state = rnn_nade.steps(inputs, initial_state)
            samples, log_prob = rnn_nade.sample_single(initial_state)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('sample', samples)
            tf.add_to_collection('log_prob', log_prob)

            # Flatten state tuples for metagraph compatibility.
            for state in tf.nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf.nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
Exemplo n.º 21
0
    (2, 2),  #步长
    name='pool1')
separable_2a = separable_conv_block(pooling1, 32, name='separable_2a')
separable_2b = separable_conv_block(separable_2a, 32, name='separable_2b')
pooling2 = tf.layers.max_pooling2d(separable_2b, [2, 2], [2, 2], name='pool2')
separable_3a = separable_conv_block(pooling2, 32, name='separable_3a')
separable_3b = separable_conv_block(separable_3a, 32, name='separable_3b')
pooling3 = tf.layers.max_pooling2d(separable_3b, [2, 2], [2, 2], name='pool3')
flatten = tf.layers.flatten(pooling3)

y_ = tf.layers.dense(flatten, 10)

loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)
#取y_中最大值标签为预测分类
predict = tf.arg_max(y_, 1)
correct_prediction = tf.equal(predict, y)  #预测值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))  #准确率

#梯度下降
with tf.name_scope('train_op'):
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)


#cifar-10数据处理类
class CifarData:
    def __init__(self, filenames,
                 need_shuffle):  #need_shuffle:打乱训练集顺序,降低依赖,提升泛化能力
        # 读入数据
        all_data = []
        all_labels = []
        for filename in filenames:
print("y train size: ", len(y_train))
print("y test size: ", len(y_test))

#%%#### BUILD A MODEL
# Placeholders
X = tf.placeholder(tf.float32, [None, SEGMENT_TIME_SIZE, N_FEATURES], name="X")
y = tf.placeholder(tf.float32, [None, N_CLASSES], name="y")
y_pred = createLSTM(X)
y_pred_softmax = tf.nn.softmax(y_pred, name="y_pred_softmax")
# LOSS
l2 = L2_LOSS * sum(tf.nn.l2_loss(i) for i in tf.trainable_variables())
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y)) + l2
#%% OPTIMIZER
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
correct_pred = tf.equal(tf.argmax(y_pred_softmax, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))
#%% TRAINING
saver = tf.train.Saver()
history = dict(train_loss=[], train_acc=[], test_loss=[], test_acc=[])
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
train_count = len(X_train)
for i in range(1, N_EPOCHS + 1):
    for start, end in zip(range(0, train_count, BATCH_SIZE),
                          range(BATCH_SIZE, train_count + 1, BATCH_SIZE)):
        sess.run(optimizer,
                 feed_dict={
                     X: X_train[start:end],
                     y: y_train[start:end]
                 })
covid = rd.creat_x_database('.\\grey_covid',128,128)
non_covid = rd.creat_x_database('.\\grey_non',128,128)
dataSet = np.vstack((covid,non_covid))
#设定标签
covid_label = creat_label(covid.shape[0],2,[0,1])
non_covid_label = creat_label(non_covid.shape[0],2,[1,0])
label = np.vstack((covid_label,non_covid_label))
#获取最终数据集
# x_train,x_test,y_train,y_test = train_test_split(dataSet,label,test_size=0.1,random_state=0,shuffle=True)

pre = Forward_conv(x,weights,biases,0.8)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = pre,labels = y))
# cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pre+ 1e-10), reduction_indices=[1]))

optimizer = tf.train.AdamOptimizer(0.00001).minimize(cost)
p = tf.equal(tf.argmax(y,1),tf.argmax(pre,1))
accuracy = tf.reduce_mean(tf.cast(p,tf.float32))

###########################################################################
sess = tf.Session()
sess.run(tf.global_variables_initializer())
avg_cost = 0
for j in range(0,1000):
   x_train, x_test, y_train, y_test = train_test_split(dataSet, label, test_size=0.2, random_state=0,shuffle=True)
   print(j)
   avg_cost = 0
   for i in range(0,3):
        k = i*179
        x_train1 = [x_train[m] for m in range(k,k+179)]
        y_train1 = [y_train[m] for m in range(k,k+179)]
        sess.run(optimizer, feed_dict={x: x_train1, y: y_train1})
def num_correct_prediction(logits, labels):
    correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    correct = tf.cast(correct, tf.int32)
    n_correct = tf.reduce_sum(correct)
    return n_correct
Exemplo n.º 25
0
loss = tf.negative(tf.subtract(first_term, second_term))

rA = tf.reshape(tf.reduce_sum(tf.square(x_data), 1), [-1, 1])
rB = tf.reshape(tf.reduce_sum(tf.square(prediction_grid), 1), [-1, 1])
pred_sq_dist = tf.add(
    tf.subtract(
        rA, tf.multiply(2., tf.matmul(x_data, tf.transpose(prediction_grid)))),
    tf.transpose(rB))
pred_kernel = tf.exp(tf.multiply(gamma, tf.abs(pred_sq_dist)))

prediction_output = tf.matmul(tf.multiply(y_target, b), pred_kernel)
prediction = tf.arg_max(
    prediction_output -
    tf.expand_dims(tf.reduce_mean(prediction_output, 1), 1), 0)
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(prediction, tf.argmax(y_target, 0)), tf.float32))

my_opt = tf.train.GradientDescentOptimizer(0.01)
train_step = my_opt.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)

loss_vec = []
batch_accuracy = []

for i in range(1000):
    rand_index = np.random.choice(len(x_vals), size=batch_size)
    rand_x = x_vals[rand_index]
    rand_y = y_vals[:, rand_index]
    sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)

# **달라진 부분 Cross entropy cost/loss
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                               labels=tf.stop_gradient(
                                                   [Y_one_hot])))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

prediction = tf.argmax(hypothesis, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        _, cost_val, acc_val = sess.run([optimizer, cost, accuracy],
                                        feed_dict={
                                            X: x_data,
                                            Y: y_data
                                        })

        if step % 200 == 0:
            print("Step: {:5}\tCost: {:.3f}\tAcc: {:.2%}".format(
Exemplo n.º 27
0
def sparse_bi_tempered_logistic_loss(activations, labels, t1, t2, num_iters=5):
    """Sparse Bi-Tempered Logistic Loss with custom gradient.

  Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    labels: A tensor with dtype of int32.
    t1: Temperature 1 (< 1.0 for boundedness).
    t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
    num_iters: Number of iterations to run the method.

  Returns:
    A loss tensor.
  """
    with tf.name_scope('sparse_bitempered_logistic'):
        t1 = tf.convert_to_tensor(t1)
        t2 = tf.convert_to_tensor(t2)
        num_classes = tf.shape(activations)[-1]

        @tf.custom_gradient
        def _custom_gradient_sparse_bi_tempered_logistic_loss(activations):
            """Sparse Bi-Tempered Logistic Loss with custom gradient.

      Args:
        activations: A multi-dimensional tensor with last dim `num_classes`.

      Returns:
        A loss tensor, grad.
      """
            with tf.name_scope('gradient_sparse_bitempered_logistic'):
                probabilities = tempered_softmax(activations, t2, num_iters)
                # TODO(eamid): Replace one hot with gather.
                loss_values = -log_t(
                    tf.reshape(
                        tf.gather_nd(probabilities,
                                     tf.where(tf.one_hot(labels,
                                                         num_classes))),
                        tf.shape(activations)[:-1]), t1) - 1.0 / (2.0 - t1) * (
                            1.0 -
                            tf.reduce_sum(tf.pow(probabilities, 2.0 - t1), -1))

                def grad(d_loss):
                    """Explicit gradient calculation.

          Args:
            d_loss: Infinitesimal change in the loss value.
          Returns:
            Loss gradient.
          """
                    delta_probs = probabilities - tf.one_hot(
                        labels, num_classes)
                    forget_factor = tf.pow(probabilities, t2 - t1)
                    delta_probs_times_forget_factor = tf.multiply(
                        delta_probs, forget_factor)
                    delta_forget_sum = tf.reduce_sum(
                        delta_probs_times_forget_factor, -1, keep_dims=True)
                    escorts = tf.pow(probabilities, t2)
                    escorts = escorts / tf.reduce_sum(
                        escorts, -1, keep_dims=True)
                    derivative = delta_probs_times_forget_factor - tf.multiply(
                        escorts, delta_forget_sum)
                    return tf.multiply(d_loss, derivative)

                return loss_values, grad

        loss_values = tf.cond(
            tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)),
            functools.partial(tf.nn.sparse_softmax_cross_entropy_with_logits,
                              labels=labels,
                              logits=activations),
            functools.partial(
                _custom_gradient_sparse_bi_tempered_logistic_loss,
                activations))
        return loss_values
Exemplo n.º 28
0
def _at_least_x_are_equal(a, b, x):
    """At least `x` of `a` and `b` `Tensors` are equal."""
    match = tf.equal(a, b)
    match = tf.cast(match, tf.int32)
    return tf.greater_equal(tf.reduce_sum(match), x)
Exemplo n.º 29
0
def train(params=None):
    mnist = input_data.read_data_sets('/storage/emulated/0/tensor-data/',
                                      one_hot=True)
    #加载数据
    x_data = mnist.train.images
    y_data = mnist.train.labels
    x_test = mnist.test.images
    y_test = mnist.test.labels

    #输入值
    xs = tf.placeholder(tf.float32, shape=[None, 784])
    ys = tf.placeholder(tf.float32, shape=[None, 10])
    x_images = tf.reshape(xs, [-1, 28, 28, 1])

    #第一层卷积
    #con_1
    w_con1 = weights([5, 5, 1, 32], "w1")
    b_con1 = bias([32])
    h_con1 = tf.nn.conv2d(x_images, w_con1, [1, 1, 1, 1], padding='SAME')
    h_relu1 = tf.nn.relu(h_con1 + b_con1)
    #pool1
    h_pool1 = tf.nn.max_pool(h_relu1,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME')

    #第二层卷积
    #con2
    w_con2 = weights([5, 5, 32, 64], "w2")
    b_con2 = bias([64])
    h_con2 = tf.nn.conv2d(h_pool1,
                          w_con2,
                          strides=[1, 1, 1, 1],
                          padding='SAME')
    h_relu2 = tf.nn.relu(h_con2)
    #pool2
    h_pool2 = tf.nn.max_pool(h_relu2,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME')

    #全连接层
    w_fc1 = weights([7 * 7 * 64, 1024], "w3")
    b_fc1 = bias([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)

    #drop_out
    keep_pro = tf.placeholder(dtype=tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_pro)

    #输出层
    w_fc2 = weights([1024, 10], "w4")
    b_fc2 = bias([10])
    h_fc2 = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2)

    #损失函数
    loss = -tf.reduce_mean(ys * tf.log(h_fc2))
    train = tf.train.AdamOptimizer(1e-4).minimize(loss)
    #初始化变量
    # 初始化变量
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    #计算误差
    accuracy = tf.equal(tf.arg_max(ys, 1), tf.arg_max(h_fc2, 1))
    accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32))

    #开始训练
    for step in range(5000):
        batch_x, batch_y = mnist.train.next_batch(100)
        sess.run(train, feed_dict={xs: batch_x, ys: batch_y, keep_pro: 0.8})
        if step % 100 == 0:
            print(
                step,
                sess.run(accuracy,
                         feed_dict={
                             xs: mnist.test.images,
                             ys: mnist.test.labels,
                             keep_pro: 1
                         }))

    if not tf.gfile.Exists('/storage/emulated/0/tensor-model/'):
        tf.gfile.MakeDirs('/storage/emulated/0/tensor-model/')
    saver = tf.train.Saver()  # 保存模型 实例化
    saver.save(sess, '/storage/emulated/0/tensor-model/my_model.ckpt')
Exemplo n.º 30
0
def ppo_policy_loss(neg_logprobs_old,
                    actions,
                    advantages,
                    dist_new,
                    policy_gradient_enable=False,
                    mcts_sampling=False,
                    clipping_coeff=0.2,
                    mcts_clipping_coeff=0.9,
                    tanh_action_clipping=False):
    """Use the formula in PPO baseline for calculating policy loss.

  paper: https://arxiv.org/abs/1707.06347

  Args:
    neg_logprobs_old: old negative log of probability.
    actions: actions from old policy.
    advantages: advantages from old policy.
    dist_new: the latest trained policy distribution.
    policy_gradient_enable: if True, vanilla policy gradient with advantage
      is used.
    mcts_sampling: If True, the data samples are generated with MCTS sampling.
    clipping_coeff: the coefficient used to clip the probability ratio.
    mcts_clipping_coeff: the coefficient used to clip the probability ration,
      when the data are sampled using MCTS.
    tanh_action_clipping: if True, performs tanh action clipping. Enabling tanh
      action clipping bound the actions to [-1, 1].
      Paper --> https://arxiv.org/pdf/1801.01290.pdf

  Returns:
    policy_loss: policy loss.
  """
    neg_logprobs_new = dist_new.negative_log_prob(actions)

    current_clipping_coeff = tf.cond(tf.equal(mcts_sampling, True),
                                     lambda: tf.constant(mcts_clipping_coeff),
                                     lambda: tf.constant(clipping_coeff))

    # Calculate correction for logprob if tanh clipping is enabled
    # A mechanism for clipping the actions between [-1., 1.]
    # paper: https://arxiv.org/pdf/1801.01290.pdf
    if tanh_action_clipping:
        logprobs_correction = tf.reduce_sum(tf.log(1 - tf.tanh(actions)**2 +
                                                   1e-6),
                                            axis=1)
        neg_logprobs_new = neg_logprobs_new + logprobs_correction

    p_ratio = tf.exp(neg_logprobs_old - neg_logprobs_new, name='ratio')

    if policy_gradient_enable:
        pg_losses = advantages * neg_logprobs_new
        pg_loss = tf.reduce_mean(pg_losses, name='policy_loss')
    else:  # using PPO formulat to calculate policy loss
        # Defining Loss = - J is equivalent to max J
        pg_losses = -advantages * p_ratio
        pg_losses2 = -advantages * tf.clip_by_value(
            p_ratio, 1. - current_clipping_coeff, 1. + current_clipping_coeff)
        pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2),
                                 name='policy_loss')
    # KL between new and old policy
    approxkl = .5 * tf.reduce_mean(
        tf.square(neg_logprobs_new - neg_logprobs_old))
    # Which fraction of policy ratios get clipped
    clipfrac = tf.reduce_mean(
        tf.to_float(tf.greater(tf.abs(p_ratio - 1.), current_clipping_coeff)))

    return pg_loss, approxkl, clipfrac, p_ratio