# bias b = tf.Variable(tf.zeros([10])) # test_data * W + b y = tf.matmul(x, W) + b sm = tf.nn.softmax(y, name="softmax") # cross entropy (loss function) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_), name="loss") # train step train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # evaluating the model correct_prediction = tf.equal(tf.argmax(sm, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") HISTORY_LOG = [] saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) # training for step in range(num_steps): batch_data, batch_labels = DATASET.next_batch(batch_size) feed_dict = {x: batch_data, y_: batch_labels}
def bi_tempered_logistic_loss(activations, labels, t1, t2, label_smoothing=0.0, num_iters=5): """Bi-Tempered Logistic Loss with custom gradient. Args: activations: A multi-dimensional tensor with last dimension `num_classes`. labels: A tensor with shape and dtype as activations. t1: Temperature 1 (< 1.0 for boundedness). t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support). label_smoothing: Label smoothing parameter between [0, 1). num_iters: Number of iterations to run the method. Returns: A loss tensor. """ with tf.name_scope('bitempered_logistic'): t1 = tf.convert_to_tensor(t1) t2 = tf.convert_to_tensor(t2) if label_smoothing > 0.0: num_classes = tf.cast(tf.shape(labels)[-1], tf.float32) labels = (1 - num_classes / (num_classes - 1) * label_smoothing ) * labels + label_smoothing / (num_classes - 1) @tf.custom_gradient def _custom_gradient_bi_tempered_logistic_loss(activations): """Bi-Tempered Logistic Loss with custom gradient. Args: activations: A multi-dimensional tensor with last dim `num_classes`. Returns: A loss tensor, grad. """ with tf.name_scope('gradient_bitempered_logistic'): probabilities = tempered_softmax(activations, t2, num_iters) loss_values = tf.multiply( labels, log_t(labels + 1e-10, t1) - log_t(probabilities, t1)) - 1.0 / (2.0 - t1) * (tf.pow( labels, 2.0 - t1) - tf.pow(probabilities, 2.0 - t1)) def grad(d_loss): """Explicit gradient calculation. Args: d_loss: Infinitesimal change in the loss value. Returns: Loss gradient. """ delta_probs = probabilities - labels forget_factor = tf.pow(probabilities, t2 - t1) delta_probs_times_forget_factor = tf.multiply( delta_probs, forget_factor) delta_forget_sum = tf.reduce_sum( delta_probs_times_forget_factor, -1, keep_dims=True) escorts = tf.pow(probabilities, t2) escorts = escorts / tf.reduce_sum( escorts, -1, keep_dims=True) derivative = delta_probs_times_forget_factor - tf.multiply( escorts, delta_forget_sum) return tf.multiply(d_loss, derivative) return loss_values, grad loss_values = tf.cond( tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)), functools.partial(tf.nn.softmax_cross_entropy_with_logits, labels=labels, logits=activations), functools.partial(_custom_gradient_bi_tempered_logistic_loss, activations)) reduce_sum_last = lambda x: tf.reduce_sum(x, -1) loss_values = tf.cond( tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)), functools.partial(tf.identity, loss_values), functools.partial(reduce_sum_last, loss_values)) return loss_values
def divide_safe(num, den, name=None): eps = 1e-8 den += eps*tf.cast(tf.equal(den, 0), 'float32') return tf.divide(num, den, name=name)
np.multiply(local_models[local_model_index][1], agents_weights[local_model_index]), m_b) model = {'weights': m_w, 'bias': m_b} learning_rate = learning_rate * 0.9 loss = federated_eval(model, federated_train_data) print('round {}, loss={}'.format(round_num, loss)) print(time.time() - start_time) '''model = federated_train(model, learning_rate, federated_train_data) learning_rate = learning_rate * 0.9 loss = federated_eval(model, federated_train_data) print('round {}, loss={}'.format(round_num, loss))''' m = np.dot(test_images, np.asarray(model['weights'])) test_result = m + np.asarray(model['bias']) y = tf.nn.softmax(test_result) correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1)) #print(list(tf.argmax(y, 1).numpy())) #print(list(tf.arg_max(test_labels_onehot, 1).numpy())) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) group_shapley_value.append(accuracy.numpy()) print("combination finished ", time.time() - start_time) print( str(ss) + "\t" + str(group_shapley_value[len(group_shapley_value) - 1])) agent_shapley = [] for index in range(NUM_AGENT): shapley = 0.0 for j in all_sets: if index in j: remove_list_index = remove_list_indexed(index, j, all_sets)
def accuracy(label, logits): """Computes accuracy from given label and logits.""" return tf.reduce_mean( tf.to_float(tf.equal(label, tf.argmax(logits, axis=1))))
h2 = tf.sigmoid(hc2) h2 = tf.nn.dropout(h2, keep_prob=keep) # layer3 var3 = tf.Variable(tf.truncated_normal([256, 2], stddev=0.1)) bias3 = tf.Variable(tf.zeros([2])) hc3 = tf.add(tf.matmul(h2, var3), bias3) h3 = tf.nn.softmax(hc3) # 定义损失 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=h3, labels=y)) tf.summary.scalar('loss', loss) # 定义正确率 ac = tf.cast(tf.equal(tf.argmax(h3, 1), tf.argmax(y, 1)), tf.float32) acc = tf.reduce_mean(ac) tf.summary.scalar('accuracy', acc) # 定义优化器 optimzer = tf.train.AdamOptimizer(1e-3).minimize(loss) merge_summary = tf.summary.merge_all() isTrain = 1 # 定义训练 print("正在训练.....") saver = tf.train.Saver(max_to_keep=1) with tf.Session() as sess:
def _get_scores(log_probs, sequence_lengths, length_penalty_weight, coverage_penalty_weight, finished, accumulated_attention_probs): """Calculates scores for beam search hypotheses. Args: log_probs: The log probabilities with shape `[batch_size, beam_width, vocab_size]`. sequence_lengths: The array of sequence lengths. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. finished: A boolean tensor of shape `[batch_size, beam_width, vocab_size]` that specifies which elements in the beam are finished already. accumulated_attention_probs: Accumulated attention probabilities up to the current time step, with shape `[batch_size, beam_width, max_time]` if coverage_penalty_weight is not 0.0. Returns: The scores normalized by the length_penalty and coverage_penalty. Raises: ValueError: accumulated_attention_probs is None when coverage penalty is enabled. """ length_penalty_ = _length_penalty(sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight, dtype=log_probs.dtype) if coverage_penalty_weight == 0.0: return tf.where(finished, log_probs / length_penalty_, log_probs) coverage_penalty_weight = tf.convert_to_tensor( coverage_penalty_weight, name="coverage_penalty_weight", dtype=log_probs.dtype) if coverage_penalty_weight.shape.ndims != 0: raise ValueError("coverage_penalty_weight should be a scalar, " "but saw shape: %s" % coverage_penalty_weight.shape) if accumulated_attention_probs is None: raise ValueError( "accumulated_attention_probs can be None only if coverage penalty is " "disabled.") # Add source sequence length mask before computing coverage penalty. accumulated_attention_probs = tf.where( tf.equal(accumulated_attention_probs, 0.0), tf.ones_like(accumulated_attention_probs), accumulated_attention_probs) # coverage penalty = # sum over `max_time` {log(min(accumulated_attention_probs, 1.0))} coverage_penalty = tf.reduce_sum( tf.log(tf.minimum(accumulated_attention_probs, 1.0)), 2) # Apply coverage penalty to finished predictions. weighted_coverage_penalty = coverage_penalty * coverage_penalty_weight # Reshape from [batch_size, beam_width] to [batch_size, beam_width, 1] weighted_coverage_penalty = tf.expand_dims(weighted_coverage_penalty, 2) # Normalize the scores of finished predictions. return tf.where(finished, log_probs / length_penalty_ + weighted_coverage_penalty, log_probs)
def _build_outputs(self, images, labels, mode): is_training = mode == mode_keys.TRAIN model_outputs = {} if 'anchor_boxes' in labels: anchor_boxes = labels['anchor_boxes'] else: anchor_boxes = anchor.Anchor( self._params.architecture.min_level, self._params.architecture.max_level, self._params.anchor.num_scales, self._params.anchor.aspect_ratios, self._params.anchor.anchor_size, images.get_shape().as_list()[1:3]).multilevel_boxes batch_size = tf.shape(images)[0] for level in anchor_boxes: anchor_boxes[level] = tf.tile( tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1]) backbone_features = self._backbone_fn(images, is_training) fpn_features = self._fpn_fn(backbone_features, is_training) rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn( fpn_features, is_training) model_outputs.update({ 'rpn_score_outputs': rpn_score_outputs, 'rpn_box_outputs': rpn_box_outputs, }) rpn_rois, _ = self._generate_rois_fn(rpn_box_outputs, rpn_score_outputs, anchor_boxes, labels['image_info'][:, 1, :], is_training) if is_training: rpn_rois = tf.stop_gradient(rpn_rois) # Sample proposals. rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = ( self._sample_rois_fn(rpn_rois, labels['gt_boxes'], labels['gt_classes'])) # Create bounding box training targets. box_targets = box_utils.encode_boxes( matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0]) # If the target is background, the box target is set to all 0s. box_targets = tf.where( tf.tile( tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), [1, 1, 4]), tf.zeros_like(box_targets), box_targets) model_outputs.update({ 'class_targets': matched_gt_classes, 'box_targets': box_targets, }) roi_features = spatial_transform_ops.multilevel_crop_and_resize( fpn_features, rpn_rois, output_size=7) class_outputs, box_outputs = self._frcnn_head_fn( roi_features, is_training) model_outputs.update({ 'class_outputs': class_outputs, 'box_outputs': box_outputs, }) if not is_training: detection_results = self._generate_detections_fn( box_outputs, class_outputs, rpn_rois, labels['image_info'][:, 1:2, :]) model_outputs.update(detection_results) if not self._include_mask: return model_outputs if is_training: rpn_rois, classes, mask_targets, gather_nd_gt_indices = self._sample_masks_fn( rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices, labels['gt_masks']) mask_targets = tf.stop_gradient(mask_targets) classes = tf.cast(classes, dtype=tf.int32) model_outputs.update({ 'mask_targets': mask_targets, 'sampled_class_targets': classes, }) else: rpn_rois = detection_results['detection_boxes'] classes = tf.cast(detection_results['detection_classes'], dtype=tf.int32) mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize( fpn_features, rpn_rois, output_size=14) mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes, is_training) if is_training: model_outputs.update({ 'mask_outputs': mask_outputs, }) else: model_outputs.update( {'detection_masks': tf.nn.sigmoid(mask_outputs)}) if not self._include_attributes: return model_outputs attribute_outputs = self._attributes_head_fn(mask_roi_features, is_training) if is_training: attribute_targets = tf.gather_nd( labels['gt_attributes'], gather_nd_gt_indices) # [batch, K, num_attributes] model_outputs.update({ 'attribute_outputs': attribute_outputs, 'attribute_targets': attribute_targets, }) else: model_outputs['detection_attributes'] = tf.nn.sigmoid( attribute_outputs) return model_outputs
def linear_classifier(x_train, y_train, x_test, y_test, num_classes, learning_rate, iterations): """ Define and train linear classifier for MNIST classification :param x_train nr x num_features training data :param y_train nr x int label data :param x_test nr x num_features test data :param y_test nr x int label data :param num_classes number of classes to classify :param learning_rate :param iterations (This is a mini exercise. Let us just train one epoch.) :return accuracy of classification tr_acc, ts_acc """ # x_train and x_test are HoG features. num_features = x_train.shape[1] # Build a network tf.disable_eager_execution() tf.reset_default_graph() x = tf.placeholder(tf.float32, shape=[None, num_features], name="images") y_ = tf.placeholder(tf.int32, shape=[None, num_classes], name="labels") w = tf.get_variable("weights", shape=[num_features, num_classes]) # default initializer b = tf.get_variable("offsets", shape=[1, num_classes]) y_hat = tf.matmul(x, w) + b correct_labels = tf.argmax(y_, axis=1, output_type=tf.int32) predicted_labels = tf.argmax(y_hat, axis=1, output_type=tf.int32) # type change of correct_prediction into float -> loss computation. correct_prediction = tf.cast(tf.equal(correct_labels, predicted_labels), tf.float32) # tf.reduce_sum(correct_prediction, axis = None) accuracy = tf.reduce_sum(correct_prediction) / tf.cast( tf.shape(correct_prediction)[0], tf.float32) # Loss : L2 Norm # loss = tf.reduce_mean(tf.nn.l2_loss(y_hat - tf.cast(y_, tf.float32))) # Loss : Cross-Entropy loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=tf.cast(y_, tf.float32), logits=y_hat), name='loss') train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss) with tf.Session() as sess: # one epoch sess.run(tf.global_variables_initializer()) # iterations. for i in range(iterations): y = one_hot_encoder(y_train[i], num_classes) # feed_dict: set the dimension of feed_dict correctly. # e.g. np.array([np.object]) # train the classifier with images one by one(SGD), no batches sess.run((train_step, loss, accuracy), feed_dict={ x: np.array([x_train[i]]), y_: np.array([y]) }) training_accuracy = accuracy.eval(feed_dict={ x: x_train, y_: one_hot_mat(y_train) }) testing_accuracy = accuracy.eval(feed_dict={ x: x_test, y_: one_hot_mat(y_test) }) # Return training and testing error rate return (training_accuracy, testing_accuracy)
use_relu=False) layer_fc3 = create_fc_layer(input=layer_fc2, num_inputs=fc_layer_size, num_outputs=num_classes, use_relu=False) y_pred = tf.nn.softmax(layer_fc3,name='y_pred') y_pred_cls = tf.argmax(y_pred, dimension=1) session.run(tf.global_variables_initializer()) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc3, labels=y_true) cost = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) correct_prediction = tf.equal(y_pred_cls, y_true_cls) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) session.run(tf.global_variables_initializer()) def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss): acc = session.run(accuracy, feed_dict=feed_dict_train) val_acc = session.run(accuracy, feed_dict=feed_dict_validate) msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}" print(msg.format(epoch + 1, acc, val_acc, val_loss)) total_iterations = 0 saver = tf.train.Saver()
for epoch in range(training_epochs): avg_cost = 0 total_batch = int(mnist.train.num_examples / batch_size) for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) feed_dict = {X: batch_xs, Y: batch_ys} c, _ = sess.run([cost, optimizer], feed_dict=feed_dict) avg_cost += c / total_batch print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost)) print('Learning Finished!') # Test model and check accuracy correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print('Accuracy:', sess.run(accuracy, feed_dict={ X: mnist.test.images, Y: mnist.test.labels})) # Get one and predict r = random.randint(0, mnist.test.num_examples - 1) print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1))) print("Prediction: ", sess.run( tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1]})) ''' Epoch: 0001 cost = 0.301498963 Epoch: 0002 cost = 0.107252513 Epoch: 0003 cost = 0.064888892
def model_fn(features, labels, mode, params): """Builds the model from the input features.""" del params # Unused is_training = (mode == tf.estimator.ModeKeys.TRAIN) # Store auxiliary activations increasing in depth of network. First # activation occurs immediately after the stem and the others immediately # follow each stack. aux_activations = [] # Initial stem convolution with tf.variable_scope('stem'): net = base_ops.conv_bn_relu(features, 3, config['stem_filter_size'], is_training, config['data_format']) aux_activations.append(net) for stack_num in range(config['num_stacks']): channels = net.get_shape()[channel_axis].value # Downsample at start (except first) if stack_num > 0: net = tf.layers.max_pooling2d( inputs=net, pool_size=(2, 2), strides=(2, 2), padding='same', data_format=config['data_format']) # Double output channels each time we downsample channels *= 2 with tf.variable_scope('stack{}'.format(stack_num)): for module_num in range(config['num_modules_per_stack']): with tf.variable_scope('module{}'.format(module_num)): net = build_module(spec, inputs=net, channels=channels, is_training=is_training) aux_activations.append(net) # Global average pool if config['data_format'] == 'channels_last': net = tf.reduce_mean(net, [1, 2]) elif config['data_format'] == 'channels_first': net = tf.reduce_mean(net, [2, 3]) else: raise ValueError('invalid data_format') # Fully-connected layer to labels logits = tf.layers.dense(inputs=net, units=config['num_labels']) if mode == tf.estimator.ModeKeys.PREDICT and not config['use_tpu']: # It is a known limitation of Estimator that the labels # are not passed during PREDICT mode when running on CPU/GPU # (https://github.com/tensorflow/tensorflow/issues/17824), thus we cannot # compute the loss or anything dependent on it (i.e., the gradients). loss = tf.constant(0.0) else: loss = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot( labels, config['num_labels']), logits=logits) loss += config['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) # Use inference mode to compute some useful metrics on a fixed sample # Due to the batch being sharded on TPU, these metrics should be run on CPU # only to ensure that the metrics are computed on the whole batch. We add a # leading dimension because PREDICT expects batch-shaped tensors. if mode == tf.estimator.ModeKeys.PREDICT: parameter_norms = { 'param:' + tensor.name: tf.expand_dims(tf.norm(tensor, ord=2), 0) for tensor in tf.trainable_variables() } # Compute gradients of all parameters and the input simultaneously all_params_names = [] all_params_tensors = [] for tensor in tf.trainable_variables(): all_params_names.append('param_grad_norm:' + tensor.name) all_params_tensors.append(tensor) all_params_names.append('input_grad_norm') all_params_tensors.append(features) grads = tf.gradients(loss, all_params_tensors) param_gradient_norms = {} for name, grad in list(zip(all_params_names, grads))[:-1]: if grad is not None: param_gradient_norms[name] = (tf.expand_dims( tf.norm(grad, ord=2), 0)) else: param_gradient_norms[name] = (tf.expand_dims( tf.constant(0.0), 0)) if grads[-1] is not None: input_grad_norm = tf.sqrt( tf.reduce_sum(tf.square(grads[-1]), axis=[1, 2, 3])) else: input_grad_norm = tf.expand_dims(tf.constant(0.0), 0) covariance_matrices = { 'cov_matrix_%d' % i: tf.expand_dims(_covariance_matrix(aux), 0) for i, aux in enumerate(aux_activations) } predictions = { 'logits': logits, 'loss': tf.expand_dims(loss, 0), 'input_grad_norm': input_grad_norm, } predictions.update(parameter_norms) predictions.update(param_gradient_norms) predictions.update(covariance_matrices) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() base_lr = config['learning_rate'] if config['use_tpu']: base_lr *= config['tpu_num_shards'] if config['lr_decay_method'] == 'COSINE_BY_STEP': total_steps = int(config['train_epochs'] * num_train_images / config['batch_size']) progress_fraction = tf.cast(global_step, tf.float32) / total_steps learning_rate = (0.5 * base_lr * (1 + tf.cos(np.pi * progress_fraction))) elif config['lr_decay_method'] == 'COSINE_BY_TIME': # Requires training_time.limit hooks to be added to Estimator elapsed_time = tf.cast(training_time.get_total_time(), dtype=tf.float32) progress_fraction = elapsed_time / config['train_seconds'] learning_rate = (0.5 * base_lr * (1 + tf.cos(np.pi * progress_fraction))) elif config['lr_decay_method'] == 'STEPWISE': # divide LR by 10 at 1/2, 2/3, and 5/6 of total epochs total_steps = (config['train_epochs'] * num_train_images / config['batch_size']) boundaries = [ int(0.5 * total_steps), int(0.667 * total_steps), int(0.833 * total_steps) ] values = [ 1.0 * base_lr, 0.1 * base_lr, 0.01 * base_lr, 0.0001 * base_lr ] learning_rate = tf.train.piecewise_constant( global_step, boundaries, values) else: raise ValueError('invalid lr_decay_method') # Set LR to 0 for step 0 to initialize the weights without training learning_rate = tf.where(tf.equal(global_step, 0), 0.0, learning_rate) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=config['momentum'], epsilon=1.0) if config['use_tpu']: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) # Update ops required for batch norm moving variables update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): predictions = tf.argmax(logits, axis=1) accuracy = tf.metrics.accuracy(labels, predictions) return {'accuracy': accuracy} eval_metrics = (metric_fn, [labels, logits]) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics)
def decode(self, serialized_example): """Decode the serialized example. Args: serialized_example: a single serialized tf.Example string. Returns: decoded_tensors: a dictionary of tensors with the following fields: - image: a uint8 tensor of shape [None, None, 3]. - source_id: a string scalar tensor. - height: an integer scalar tensor. - width: an integer scalar tensor. - groundtruth_classes: an int64 tensor of shape [None]. - groundtruth_is_crowd: a bool tensor of shape [None]. - groundtruth_area: a float32 tensor of shape [None]. - groundtruth_boxes: a float32 tensor of shape [None, 4]. - groundtruth_instance_masks: a float32 tensor of shape [None, None, None]. - groundtruth_instance_masks_png: a string tensor of shape [None]. """ parsed_tensors = tf.io.parse_single_example(serialized_example, self._keys_to_features) for k in parsed_tensors: if isinstance(parsed_tensors[k], tf.SparseTensor): if parsed_tensors[k].dtype == tf.string: parsed_tensors[k] = tf.sparse_tensor_to_dense( parsed_tensors[k], default_value='') else: parsed_tensors[k] = tf.sparse_tensor_to_dense( parsed_tensors[k], default_value=0) image = self._decode_image(parsed_tensors) boxes = self._decode_boxes(parsed_tensors) areas = self._decode_areas(parsed_tensors) decode_image_shape = tf.logical_or( tf.equal(parsed_tensors['image/height'], -1), tf.equal(parsed_tensors['image/width'], -1)) image_shape = tf.cast(tf.shape(image), dtype=tf.int64) parsed_tensors['image/height'] = tf.where( decode_image_shape, image_shape[0], parsed_tensors['image/height']) parsed_tensors['image/width'] = tf.where(decode_image_shape, image_shape[1], parsed_tensors['image/width']) is_crowds = tf.cond( tf.greater( tf.shape(parsed_tensors['image/object/is_crowd'])[0], 0), lambda: tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool), lambda: tf.zeros_like(parsed_tensors[self._label_key], dtype=tf.bool)) if self._regenerate_source_id: source_id = _get_source_id_from_encoded_image(parsed_tensors) else: source_id = tf.cond( tf.greater( tf.strings.length(parsed_tensors['image/source_id']), 0), lambda: parsed_tensors['image/source_id'], lambda: _get_source_id_from_encoded_image(parsed_tensors)) if self._include_mask: masks = self._decode_masks(parsed_tensors) groundtruth_classes = parsed_tensors[self._label_key] decoded_tensors = { 'image': image, 'source_id': source_id, 'height': parsed_tensors['image/height'], 'width': parsed_tensors['image/width'], 'groundtruth_classes': groundtruth_classes, 'groundtruth_is_crowd': is_crowds, 'groundtruth_area': areas, 'groundtruth_boxes': boxes, } if self._include_mask: decoded_tensors.update({ 'groundtruth_instance_masks': masks, 'groundtruth_instance_masks_png': parsed_tensors['image/object/mask'], }) return decoded_tensors
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: # [num_sentences, max_sentence_length, max_word_length, emb] char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences, max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) # [num_sentences * max_sentence_length * emb, 1] flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) # [num_sentences, max_sentence_length, emb] context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentence, max_sentence_length] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] # [num_sentences, max_sentence_length] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + \ tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] # [num_candidates] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates, emb] candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] if self.config['use_gold']: candidates_spans = tf.stack([candidate_starts, candidate_ends], axis=1) gold_spans = tf.stack([gold_starts, gold_ends], axis=1) same_span = tf.equal(tf.expand_dims(gold_spans, 1), tf.expand_dims(candidates_spans, 0)) top_span_indices = tf.reduce_any(tf.reduce_all(same_span, axis=2), axis=0) top_span_indices = tf.squeeze(tf.where(top_span_indices), axis=1) k = tf.cast(util.shape(top_span_indices, 0), tf.int32) else: k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: (top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets) = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: (top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets) = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores_nomention = tf.expand_dims( top_span_mention_scores * -1, 1) # tf.zeros([k, 1]) # [k, 1] dummy_scores_first = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = ( top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, genre_emb)) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([ dummy_scores_nomention, dummy_scores_first, top_antecedent_scores ], 1)) # [k, c + 2] top_antecedent_emb = tf.concat([ tf.expand_dims(top_span_emb, 1), tf.expand_dims(top_span_emb, 1), top_antecedent_emb ], 1) # [k, c + 1, emb] # [k, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + \ (1 - f) * top_span_emb # [k, emb] # [k, c + 2] top_antecedent_scores = tf.concat([ dummy_scores_nomention, dummy_scores_first, top_antecedent_scores ], 1) top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels_nomention = tf.expand_dims( tf.equal(top_span_cluster_ids, 0), 1) # [k, 1] dummy_labels_first = tf.logical_not( tf.reduce_any(tf.concat([dummy_labels_nomention, pairwise_labels], 1), 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat( [dummy_labels_nomention, dummy_labels_first, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def App_Run(): # def load_variables_from_checkpoint(sess, start_checkpoint): # """Utility function to centralize checkpoint restoration. # Args: # sess: TensorFlow session. # start_checkpoint: Path to saved checkpoint on disk. # """ # saver = tf.train.Saver(tf.global_variables()) # saver.restore(sess, start_checkpoint) # train_log_f = open("./training_log/t_log_{}") data_file = "../train_data/20200605/shuffled_train_data.npy" lbl_file = "../train_data/20200605/shuffled_train_label.npy" val_data_file = "../train_data/20200605/shuffled_val_train_data.npy" val_lbl_file = "../train_data/20200605/shuffled_val_train_label.npy" x_train = np.load(data_file, allow_pickle=True) y_train = np.load(lbl_file, allow_pickle=True) # y_train = tf.keras.utils.to_categorical(y_train) # x_test = np.load(val_data_file,allow_pickle=True) # y_test = np.load(val_lbl_file,allow_pickle=True) # y_test = tf.keras.utils.to_categorical(y_test) X = tf.placeholder(tf.float32, [None, n_band]) Y = tf.placeholder(tf.int8, [None, n_classes]) logits = create_neural_net(X) prediction = tf.nn.softmax(logits) loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) error_train = [] error_test = [] weight1 = [] weight2 = [] weight3 = [] weight4 = [] bias1 = [] bias2 = [] bias3 = [] bias4 = [] init = tf.global_variables_initializer() acc_now = 0 epochnum = 3 ckpt_file_path = "../training_ckpt/weights_improvement_{}-{}.ckpt" '''Add ops to save and restore all the variables.''' saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) ''' Restore variables from disk. ''' # checkpoint = tf.train.latest_checkpoint("checkpoints/checkpoints_1017_256") # saver.restore(sess, checkpoint) # ckpt_save_point = 100 # run_count = 0 current_epoch = 0 for epoch in range(epochnum): current_epoch = epoch for step in range(100): batch_x, batch_y = next_batch(batch_size, x_train, y_train) peek_train_data(batch_x, batch_y) sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) # valid_flag = exam_train_data(x_n, y_n) # if valid_flag: # sess.run(train_op, feed_dict={x: x_n, y: y_n}) # # run_count += 1 # else: continue if step % display_step == 0 or step == 1: # Calculate batch loss and accuracy loss, acc = sess.run([loss_op, accuracy], feed_dict={ X: batch_x, Y: batch_y }) estimated_pred = sess.run(prediction, feed_dict={ x: x_train, y: y_train }) # acc1 = sess.run(accuracy, feed_dict={x: x_test, y: y_test}) print("Step " + str(step) + ", Minibatch Loss= " + \ "{:.4f}".format(loss) + ", Training Accuracy= " + \ "{:.3f}".format(acc)) print("epoch", epoch) print("train : ", acc) print("test : ", acc1) error_train.append(acc) error_test.append(acc1) if acc > acc_now: acc_now = acc weight1 = w['hidden1'].eval(sess) weight2 = w['hidden2'].eval(sess) weight3 = w['hidden3'].eval(sess) weight4 = w['output'].eval(sess) bias1 = b['hidden1'].eval(sess) bias2 = b['hidden2'].eval(sess) bias3 = b['hidden3'].eval(sess) bias4 = b['output'].eval(sess) spio.savemat( 'kws_weights/w_3layer128.mat', { 'w1': weight1, 'w2': weight2, 'w3': weight3, 'w4': weight4, 'b1': bias1, 'b2': bias2, 'b3': bias3, 'b4': bias4 }) saver.save(sess, ckpt_file_path.format(current_epoch, acc_now))
# 학습 optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(cost) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for step in range(100): sess.run(train_op, feed_dict={X: x_data, Y: y_data}) if (step + 1) % 10 == 0: print(step + 1, sess.run(cost, feed_dict={X: x_data, Y: y_data})) # 학습된 결과 확인 # argmax는 요소 중 가장 큰 값을 골라줌 prediction = tf.argmax(model, axis=1) target = tf.argmax(Y, axis=1) print("prediction=", sess.run(prediction, feed_dict={X: x_data})) print("target=", sess.run(target, feed_dict={Y: y_data})) # 정확도 측정 is_correct = tf.equal(prediction, target) accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) print("accuracy=%0.2f" % sess.run(accuracy * 100, feed_dict={ X: x_data, Y: y_data }))
w0 = tf.Variable(tf.zeros([300, 10])) b0 = tf.Variable(tf.zeros([10])) k = tf.matmul(hidden2, w0) + b0 # k는 소프트맥스 층을 적용하기 전의 값 p = tf.nn.softmax(k) # define loss (cost) function # 비용 함수 정의 t = tf.placeholder(tf.float32, [None, 10]) # 플레이스 홀더로 정의. 나중에 학습 데이터 셋에서 읽을 라벨 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=k, labels=t) ) # tf.nn.softmax_cross_entropy_with_logits 함수는 softmax가 포함되어 있는 함수 train_step = tf.train.AdamOptimizer(0.0001).minimize( loss) # 비용 함수를 최적화 하기 위해서 최적화 함수 AdamOptimizer 사용 # 정확도 계산 함수 correct_prediction = tf.equal(tf.argmax(p, 1), tf.argmax( t, 1)) # 학습 결과와 입력된 라벨(정답)을 비교하여 맞았는지 틀렸는지를 리턴 # argmax는 인자에서 가장 큰 값의 인덱스를 리턴함. 0~9 배열이 들어가 있기 때문에 가장 큰 값이 학습에 의해 예측된 숫자 # p는 예측의 결과값, t는 학습의 결과(라벨)값. 두 값을 비교하여 가장 큰 값이 있는 인덱스가 일치하면 예측이 성공한 것 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # correct_predication이 bool 값이기 때문에 이 값을 숫자로 바꾸고 저장 tf.summary.scalar('accuracy', accuracy) # 정확도 모니터링을 위해 accuracy 사용 # 텐서보드 summary_init = tf.summary.merge_all() # summary 사용을 위한 초기화 # prepare session # 학습 세션을 시작하고 변수를 초기화 sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver()
def draw_samples(alpha, scale): r"""Draw samples from the robust distribution. This function implements Algorithm 1 the paper. This code is written to allow for sampling from a set of different distributions, each parametrized by its own alpha and scale values, as opposed to the more standard approach of drawing N samples from the same distribution. This is done by repeatedly performing N instances of rejection sampling for each of the N distributions until at least one proposal for each of the N distributions has been accepted. All samples are drawn with a zero mean, to use a non-zero mean just add each mean to each sample. Args: alpha: A TF tensor/scalar or numpy array/scalar of floats where each element is the shape parameter of that element's distribution. scale: A TF tensor/scalar or numpy array/scalar of floats where each element is the scale parameter of that element's distribution. Must be the same shape as `alpha`. Returns: A TF tensor with the same shape and precision as `alpha` and `scale` where each element is a sample drawn from the distribution specified for that element by `alpha` and `scale`. """ # `scale` must have the same type as `alpha`. float_dtype = alpha.dtype tf.assert_type(scale, float_dtype) assert_ops = [ # `scale` must be > 0. tf.Assert(tf.reduce_all(scale > 0.), [scale]), # `alpha` must be >= 0. tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]), # `alpha` and `scale` must have the same shape. tf.Assert(tf.reduce_all(tf.equal(tf.shape(alpha), tf.shape(scale))), [tf.shape(alpha), tf.shape(scale)]), ] with tf.control_dependencies(assert_ops): shape = tf.shape(alpha) # The distributions we will need for rejection sampling. The sqrt(2) scaling # of the Cauchy distribution corrects for our differing conventions for # standardization. cauchy = tfp.distributions.Cauchy(loc=0., scale=tf.sqrt(2.)) uniform = tfp.distributions.Uniform(low=0., high=1.) def while_cond(_, accepted): """Terminate the loop only when all samples have been accepted.""" return ~tf.reduce_all(accepted) def while_body(samples, accepted): """Generate N proposal samples, and then perform rejection sampling.""" # Draw N samples from a Cauchy, our proposal distribution. cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype) # Compute the likelihood of each sample under its target distribution. nll = nllfun(cauchy_sample, alpha, tf.cast(1, float_dtype)) # Bound the NLL. We don't use the approximate loss as it may cause # unpredictable behavior in the context of sampling. nll_bound = general.lossfun( cauchy_sample, tf.cast(0, float_dtype), tf.cast(1, float_dtype), approximate=False) + log_base_partition_function(alpha) # Draw N samples from a uniform distribution, and use each uniform sample # to decide whether or not to accept each proposal sample. uniform_sample = tf.cast(uniform.sample(shape), float_dtype) accept = uniform_sample <= tf.math.exp(nll_bound - nll) # If a sample is accepted, replace its element in `samples` with the # proposal sample, and set its bit in `accepted` to True. samples = tf.where(accept, cauchy_sample, samples) accepted = accept | accepted return (samples, accepted) # Initialize the loop. The first item does not matter as it will get # overwritten, the second item must be all False. while_loop_vars = (tf.zeros(shape, float_dtype), tf.zeros(shape, dtype=bool)) # Perform rejection sampling until all N samples have been accepted. terminal_state = tf.while_loop(cond=while_cond, body=while_body, loop_vars=while_loop_vars) # Because our distribution is a location-scale family, we sample from # p(x | 0, \alpha, 1) and then scale each sample by `scale`. samples = tf.multiply(terminal_state[0], scale) return samples
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight, coverage_penalty_weight, max_tgt): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. max_tgt: maximum prediction length. Returns: A new beam state. """ # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished not_finished = tf.logical_not(previously_finished) # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = tf.nn.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = tf.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or tf.shape(logits)[-1] lengths_to_add = tf.one_hot(indices=tf.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=tf.int64) add_mask = tf.to_int64(not_finished) lengths_to_add *= tf.expand_dims(add_mask, 2) new_prediction_lengths = (lengths_to_add + tf.expand_dims(prediction_lengths, 2)) # Calculate the accumulated attention probabilities if coverage penalty is # enabled. accumulated_attention_probs = None attention_probs = get_attention_probs(next_cell_state, coverage_penalty_weight) if attention_probs is not None: attention_probs *= tf.expand_dims( tf.cast(not_finished, attention_probs.dtype), 2) accumulated_attention_probs = (beam_state.accumulated_attention_probs + attention_probs) batch_finished = tf.reduce_all(previously_finished, axis=1, keepdims=True) any_batch_finished = tf.reduce_any(batch_finished) batch_finished = tf.tile(tf.expand_dims(batch_finished, 2), [1, beam_width, vocab_size]) def _normalized_scores(): return _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight, coverage_penalty_weight=coverage_penalty_weight, finished=batch_finished, accumulated_attention_probs=accumulated_attention_probs) # Normalize the scores of finished batches. scores = tf.cond(any_batch_finished, _normalized_scores, lambda: total_probs) time = tf.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_flat = tf.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_scores, word_indices = top_k_with_unique(scores_flat, beam_width) next_beam_scores.set_shape([batch_size, beam_width]) word_indices.set_shape([batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions batch_ids = tf.expand_dims( tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, beam_width]), 2) indices = tf.concat([batch_ids, tf.expand_dims(word_indices, 2)], -1) next_beam_probs = tf.gather_nd(tf.reshape(total_probs, [batch_size, -1]), indices) # Note: just doing the following # tf.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = tf.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = tf.to_int32(raw_next_word_ids) next_beam_ids = tf.div(word_indices, vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = tf.logical_or(previously_finished, tf.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = tf.to_int64(tf.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add next_accumulated_attention_probs = () if accumulated_attention_probs is not None: next_accumulated_attention_probs = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=accumulated_attention_probs, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1], name="next_accumulated_attention_probs") next_pred_ids = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.pred_ids, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1], name="pred_ids") # Add next_word_ids to next_pred_ids. next_pred_ids = tf.transpose(next_pred_ids, [2, 0, 1]) cur_time = tf.tile(tf.reshape(time, [1]), [max_tgt]) time_mask = tf.equal(tf.range(max_tgt), cur_time) time_mask = tf.tile(tf.reshape(time_mask, [max_tgt, 1, 1]), [1, batch_size, beam_width]) cur_time_ids = tf.tile( tf.reshape(next_word_ids, [1, batch_size, beam_width]), [max_tgt, 1, 1]) next_pred_ids = tf.where(time_mask, cur_time_ids, next_pred_ids) next_pred_ids = tf.transpose(next_pred_ids, [1, 2, 0]) # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = contrib_framework.nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished, accumulated_attention_probs=next_accumulated_attention_probs, pred_ids=next_pred_ids) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def build(): """Builds the Tensorflow graph.""" inputs, lengths = None, None if mode in ('train', 'eval'): inputs, _, lengths = magenta.common.get_padded_batch( sequence_example_file_paths, hparams.batch_size, input_size, shuffle=mode == 'train') elif mode == 'generate': inputs = tf.placeholder(tf.float32, [hparams.batch_size, None, input_size]) cell = events_rnn_graph.make_rnn_cell( hparams.rnn_layer_sizes, dropout_keep_prob=hparams.dropout_keep_prob if mode == 'train' else 1.0, attn_length=hparams.attn_length, residual_connections=hparams.residual_connections) rnn_nade = RnnNade(cell, num_dims=input_size, num_hidden=hparams.nade_hidden_units) if mode in ('train', 'eval'): log_probs, cond_probs = rnn_nade.log_prob(inputs, lengths) inputs_flat = tf.to_float( magenta.common.flatten_maybe_padded_sequences(inputs, lengths)) predictions_flat = tf.to_float(tf.greater_equal(cond_probs, .5)) if mode == 'train': loss = tf.reduce_mean(-log_probs) perplexity = tf.reduce_mean(tf.exp(log_probs)) correct_predictions = tf.to_float( tf.equal(inputs_flat, predictions_flat)) accuracy = tf.reduce_mean(correct_predictions) precision = (tf.reduce_sum(inputs_flat * predictions_flat) / tf.reduce_sum(predictions_flat)) recall = (tf.reduce_sum(inputs_flat * predictions_flat) / tf.reduce_sum(inputs_flat)) optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) train_op = contrib_slim.learning.create_train_op( loss, optimizer, clip_gradient_norm=hparams.clip_norm) tf.add_to_collection('train_op', train_op) vars_to_summarize = { 'loss': loss, 'metrics/perplexity': perplexity, 'metrics/accuracy': accuracy, 'metrics/precision': precision, 'metrics/recall': recall, } elif mode == 'eval': vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map( { 'loss': tf.metrics.mean(-log_probs), 'metrics/perplexity': tf.metrics.mean(tf.exp(log_probs)), 'metrics/accuracy': tf.metrics.accuracy(inputs_flat, predictions_flat), 'metrics/precision': tf.metrics.precision(inputs_flat, predictions_flat), 'metrics/recall': tf.metrics.recall(inputs_flat, predictions_flat), }) for updates_op in update_ops.values(): tf.add_to_collection('eval_ops', updates_op) precision = vars_to_summarize['metrics/precision'] recall = vars_to_summarize['metrics/precision'] f1_score = tf.where( tf.greater(precision + recall, 0), 2 * ((precision * recall) / (precision + recall)), 0) vars_to_summarize['metrics/f1_score'] = f1_score for var_name, var_value in vars_to_summarize.items(): tf.summary.scalar(var_name, var_value) tf.add_to_collection(var_name, var_value) elif mode == 'generate': initial_state = rnn_nade.zero_state(hparams.batch_size) final_state = rnn_nade.steps(inputs, initial_state) samples, log_prob = rnn_nade.sample_single(initial_state) tf.add_to_collection('inputs', inputs) tf.add_to_collection('sample', samples) tf.add_to_collection('log_prob', log_prob) # Flatten state tuples for metagraph compatibility. for state in tf.nest.flatten(initial_state): tf.add_to_collection('initial_state', state) for state in tf.nest.flatten(final_state): tf.add_to_collection('final_state', state)
(2, 2), #步长 name='pool1') separable_2a = separable_conv_block(pooling1, 32, name='separable_2a') separable_2b = separable_conv_block(separable_2a, 32, name='separable_2b') pooling2 = tf.layers.max_pooling2d(separable_2b, [2, 2], [2, 2], name='pool2') separable_3a = separable_conv_block(pooling2, 32, name='separable_3a') separable_3b = separable_conv_block(separable_3a, 32, name='separable_3b') pooling3 = tf.layers.max_pooling2d(separable_3b, [2, 2], [2, 2], name='pool3') flatten = tf.layers.flatten(pooling3) y_ = tf.layers.dense(flatten, 10) loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_) #取y_中最大值标签为预测分类 predict = tf.arg_max(y_, 1) correct_prediction = tf.equal(predict, y) #预测值 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) #准确率 #梯度下降 with tf.name_scope('train_op'): train_op = tf.train.AdamOptimizer(1e-3).minimize(loss) #cifar-10数据处理类 class CifarData: def __init__(self, filenames, need_shuffle): #need_shuffle:打乱训练集顺序,降低依赖,提升泛化能力 # 读入数据 all_data = [] all_labels = [] for filename in filenames:
print("y train size: ", len(y_train)) print("y test size: ", len(y_test)) #%%#### BUILD A MODEL # Placeholders X = tf.placeholder(tf.float32, [None, SEGMENT_TIME_SIZE, N_FEATURES], name="X") y = tf.placeholder(tf.float32, [None, N_CLASSES], name="y") y_pred = createLSTM(X) y_pred_softmax = tf.nn.softmax(y_pred, name="y_pred_softmax") # LOSS l2 = L2_LOSS * sum(tf.nn.l2_loss(i) for i in tf.trainable_variables()) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y)) + l2 #%% OPTIMIZER optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss) correct_pred = tf.equal(tf.argmax(y_pred_softmax, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32)) #%% TRAINING saver = tf.train.Saver() history = dict(train_loss=[], train_acc=[], test_loss=[], test_acc=[]) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) train_count = len(X_train) for i in range(1, N_EPOCHS + 1): for start, end in zip(range(0, train_count, BATCH_SIZE), range(BATCH_SIZE, train_count + 1, BATCH_SIZE)): sess.run(optimizer, feed_dict={ X: X_train[start:end], y: y_train[start:end] })
covid = rd.creat_x_database('.\\grey_covid',128,128) non_covid = rd.creat_x_database('.\\grey_non',128,128) dataSet = np.vstack((covid,non_covid)) #设定标签 covid_label = creat_label(covid.shape[0],2,[0,1]) non_covid_label = creat_label(non_covid.shape[0],2,[1,0]) label = np.vstack((covid_label,non_covid_label)) #获取最终数据集 # x_train,x_test,y_train,y_test = train_test_split(dataSet,label,test_size=0.1,random_state=0,shuffle=True) pre = Forward_conv(x,weights,biases,0.8) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = pre,labels = y)) # cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pre+ 1e-10), reduction_indices=[1])) optimizer = tf.train.AdamOptimizer(0.00001).minimize(cost) p = tf.equal(tf.argmax(y,1),tf.argmax(pre,1)) accuracy = tf.reduce_mean(tf.cast(p,tf.float32)) ########################################################################### sess = tf.Session() sess.run(tf.global_variables_initializer()) avg_cost = 0 for j in range(0,1000): x_train, x_test, y_train, y_test = train_test_split(dataSet, label, test_size=0.2, random_state=0,shuffle=True) print(j) avg_cost = 0 for i in range(0,3): k = i*179 x_train1 = [x_train[m] for m in range(k,k+179)] y_train1 = [y_train[m] for m in range(k,k+179)] sess.run(optimizer, feed_dict={x: x_train1, y: y_train1})
def num_correct_prediction(logits, labels): correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) correct = tf.cast(correct, tf.int32) n_correct = tf.reduce_sum(correct) return n_correct
loss = tf.negative(tf.subtract(first_term, second_term)) rA = tf.reshape(tf.reduce_sum(tf.square(x_data), 1), [-1, 1]) rB = tf.reshape(tf.reduce_sum(tf.square(prediction_grid), 1), [-1, 1]) pred_sq_dist = tf.add( tf.subtract( rA, tf.multiply(2., tf.matmul(x_data, tf.transpose(prediction_grid)))), tf.transpose(rB)) pred_kernel = tf.exp(tf.multiply(gamma, tf.abs(pred_sq_dist))) prediction_output = tf.matmul(tf.multiply(y_target, b), pred_kernel) prediction = tf.arg_max( prediction_output - tf.expand_dims(tf.reduce_mean(prediction_output, 1), 1), 0) accuracy = tf.reduce_mean( tf.cast(tf.equal(prediction, tf.argmax(y_target, 0)), tf.float32)) my_opt = tf.train.GradientDescentOptimizer(0.01) train_step = my_opt.minimize(loss) init = tf.global_variables_initializer() sess.run(init) loss_vec = [] batch_accuracy = [] for i in range(1000): rand_index = np.random.choice(len(x_vals), size=batch_size) rand_x = x_vals[rand_index] rand_y = y_vals[:, rand_index] sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})
b = tf.Variable(tf.random_normal([nb_classes]), name='bias') # tf.nn.softmax computes softmax activations # softmax = exp(logits) / reduce_sum(exp(logits), dim) logits = tf.matmul(X, W) + b hypothesis = tf.nn.softmax(logits) # **달라진 부분 Cross entropy cost/loss cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf.stop_gradient( [Y_one_hot]))) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) prediction = tf.argmax(hypothesis, 1) correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Launch graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(10001): _, cost_val, acc_val = sess.run([optimizer, cost, accuracy], feed_dict={ X: x_data, Y: y_data }) if step % 200 == 0: print("Step: {:5}\tCost: {:.3f}\tAcc: {:.2%}".format(
def sparse_bi_tempered_logistic_loss(activations, labels, t1, t2, num_iters=5): """Sparse Bi-Tempered Logistic Loss with custom gradient. Args: activations: A multi-dimensional tensor with last dimension `num_classes`. labels: A tensor with dtype of int32. t1: Temperature 1 (< 1.0 for boundedness). t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support). num_iters: Number of iterations to run the method. Returns: A loss tensor. """ with tf.name_scope('sparse_bitempered_logistic'): t1 = tf.convert_to_tensor(t1) t2 = tf.convert_to_tensor(t2) num_classes = tf.shape(activations)[-1] @tf.custom_gradient def _custom_gradient_sparse_bi_tempered_logistic_loss(activations): """Sparse Bi-Tempered Logistic Loss with custom gradient. Args: activations: A multi-dimensional tensor with last dim `num_classes`. Returns: A loss tensor, grad. """ with tf.name_scope('gradient_sparse_bitempered_logistic'): probabilities = tempered_softmax(activations, t2, num_iters) # TODO(eamid): Replace one hot with gather. loss_values = -log_t( tf.reshape( tf.gather_nd(probabilities, tf.where(tf.one_hot(labels, num_classes))), tf.shape(activations)[:-1]), t1) - 1.0 / (2.0 - t1) * ( 1.0 - tf.reduce_sum(tf.pow(probabilities, 2.0 - t1), -1)) def grad(d_loss): """Explicit gradient calculation. Args: d_loss: Infinitesimal change in the loss value. Returns: Loss gradient. """ delta_probs = probabilities - tf.one_hot( labels, num_classes) forget_factor = tf.pow(probabilities, t2 - t1) delta_probs_times_forget_factor = tf.multiply( delta_probs, forget_factor) delta_forget_sum = tf.reduce_sum( delta_probs_times_forget_factor, -1, keep_dims=True) escorts = tf.pow(probabilities, t2) escorts = escorts / tf.reduce_sum( escorts, -1, keep_dims=True) derivative = delta_probs_times_forget_factor - tf.multiply( escorts, delta_forget_sum) return tf.multiply(d_loss, derivative) return loss_values, grad loss_values = tf.cond( tf.logical_and(tf.equal(t1, 1.0), tf.equal(t2, 1.0)), functools.partial(tf.nn.sparse_softmax_cross_entropy_with_logits, labels=labels, logits=activations), functools.partial( _custom_gradient_sparse_bi_tempered_logistic_loss, activations)) return loss_values
def _at_least_x_are_equal(a, b, x): """At least `x` of `a` and `b` `Tensors` are equal.""" match = tf.equal(a, b) match = tf.cast(match, tf.int32) return tf.greater_equal(tf.reduce_sum(match), x)
def train(params=None): mnist = input_data.read_data_sets('/storage/emulated/0/tensor-data/', one_hot=True) #加载数据 x_data = mnist.train.images y_data = mnist.train.labels x_test = mnist.test.images y_test = mnist.test.labels #输入值 xs = tf.placeholder(tf.float32, shape=[None, 784]) ys = tf.placeholder(tf.float32, shape=[None, 10]) x_images = tf.reshape(xs, [-1, 28, 28, 1]) #第一层卷积 #con_1 w_con1 = weights([5, 5, 1, 32], "w1") b_con1 = bias([32]) h_con1 = tf.nn.conv2d(x_images, w_con1, [1, 1, 1, 1], padding='SAME') h_relu1 = tf.nn.relu(h_con1 + b_con1) #pool1 h_pool1 = tf.nn.max_pool(h_relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #第二层卷积 #con2 w_con2 = weights([5, 5, 32, 64], "w2") b_con2 = bias([64]) h_con2 = tf.nn.conv2d(h_pool1, w_con2, strides=[1, 1, 1, 1], padding='SAME') h_relu2 = tf.nn.relu(h_con2) #pool2 h_pool2 = tf.nn.max_pool(h_relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #全连接层 w_fc1 = weights([7 * 7 * 64, 1024], "w3") b_fc1 = bias([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) #drop_out keep_pro = tf.placeholder(dtype=tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_pro) #输出层 w_fc2 = weights([1024, 10], "w4") b_fc2 = bias([10]) h_fc2 = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2) #损失函数 loss = -tf.reduce_mean(ys * tf.log(h_fc2)) train = tf.train.AdamOptimizer(1e-4).minimize(loss) #初始化变量 # 初始化变量 sess = tf.Session() sess.run(tf.global_variables_initializer()) #计算误差 accuracy = tf.equal(tf.arg_max(ys, 1), tf.arg_max(h_fc2, 1)) accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32)) #开始训练 for step in range(5000): batch_x, batch_y = mnist.train.next_batch(100) sess.run(train, feed_dict={xs: batch_x, ys: batch_y, keep_pro: 0.8}) if step % 100 == 0: print( step, sess.run(accuracy, feed_dict={ xs: mnist.test.images, ys: mnist.test.labels, keep_pro: 1 })) if not tf.gfile.Exists('/storage/emulated/0/tensor-model/'): tf.gfile.MakeDirs('/storage/emulated/0/tensor-model/') saver = tf.train.Saver() # 保存模型 实例化 saver.save(sess, '/storage/emulated/0/tensor-model/my_model.ckpt')
def ppo_policy_loss(neg_logprobs_old, actions, advantages, dist_new, policy_gradient_enable=False, mcts_sampling=False, clipping_coeff=0.2, mcts_clipping_coeff=0.9, tanh_action_clipping=False): """Use the formula in PPO baseline for calculating policy loss. paper: https://arxiv.org/abs/1707.06347 Args: neg_logprobs_old: old negative log of probability. actions: actions from old policy. advantages: advantages from old policy. dist_new: the latest trained policy distribution. policy_gradient_enable: if True, vanilla policy gradient with advantage is used. mcts_sampling: If True, the data samples are generated with MCTS sampling. clipping_coeff: the coefficient used to clip the probability ratio. mcts_clipping_coeff: the coefficient used to clip the probability ration, when the data are sampled using MCTS. tanh_action_clipping: if True, performs tanh action clipping. Enabling tanh action clipping bound the actions to [-1, 1]. Paper --> https://arxiv.org/pdf/1801.01290.pdf Returns: policy_loss: policy loss. """ neg_logprobs_new = dist_new.negative_log_prob(actions) current_clipping_coeff = tf.cond(tf.equal(mcts_sampling, True), lambda: tf.constant(mcts_clipping_coeff), lambda: tf.constant(clipping_coeff)) # Calculate correction for logprob if tanh clipping is enabled # A mechanism for clipping the actions between [-1., 1.] # paper: https://arxiv.org/pdf/1801.01290.pdf if tanh_action_clipping: logprobs_correction = tf.reduce_sum(tf.log(1 - tf.tanh(actions)**2 + 1e-6), axis=1) neg_logprobs_new = neg_logprobs_new + logprobs_correction p_ratio = tf.exp(neg_logprobs_old - neg_logprobs_new, name='ratio') if policy_gradient_enable: pg_losses = advantages * neg_logprobs_new pg_loss = tf.reduce_mean(pg_losses, name='policy_loss') else: # using PPO formulat to calculate policy loss # Defining Loss = - J is equivalent to max J pg_losses = -advantages * p_ratio pg_losses2 = -advantages * tf.clip_by_value( p_ratio, 1. - current_clipping_coeff, 1. + current_clipping_coeff) pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2), name='policy_loss') # KL between new and old policy approxkl = .5 * tf.reduce_mean( tf.square(neg_logprobs_new - neg_logprobs_old)) # Which fraction of policy ratios get clipped clipfrac = tf.reduce_mean( tf.to_float(tf.greater(tf.abs(p_ratio - 1.), current_clipping_coeff))) return pg_loss, approxkl, clipfrac, p_ratio