def test2DAreaMax(self): batch_size = 256 feature_len = 100 memory_height = 10 heads = 2 key_len = 6 depth = 128 max_area_height = 3 max_area_width = 3 queries = tf.random_uniform([batch_size, heads, key_len, depth], minval=-10.0, maxval=10.0) features = tf.random_uniform([batch_size, heads, feature_len, depth], minval=-10.0, maxval=10.0) target_values = tf.random_uniform([batch_size, heads, key_len, depth], minval=-0.2, maxval=0.2) keys = tf.layers.dense(features, units=depth) values = tf.layers.dense(features, units=depth) max_attention = area_attention.dot_product_area_attention( queries, keys, values, bias=None, area_key_mode="max", area_value_mode="max", name="max_key", max_area_width=max_area_width, max_area_height=max_area_height, memory_height=memory_height) max_gradients = tf.gradients( tf.reduce_mean(tf.pow(target_values - max_attention, 2)), features) with self.test_session() as session: session.run(tf.global_variables_initializer()) result1, result2 = session.run([max_gradients, max_attention]) self.assertFalse(np.any(np.logical_not(np.isfinite(result1)))) self.assertFalse(np.any(np.logical_not(np.isfinite(result2))))
def __readImages(self,filename): image_string = tf.read_file(filename) #Gets a string tensor from a file decodedInput = tf.image.decode_image(image_string) #Decode a string tensor as image floatInput = tf.image.convert_image_dtype(decodedInput, dtype=tf.float32) #Transform image to float32 assertion = tf.assert_equal(tf.shape(floatInput)[-1], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): floatInput.set_shape([None, None, 3]) gammadInput = floatInput #print("CAREFUL THE GAMMA IS NOT CORRECTED AUTOMATICALLY") #input = floatInput input = tf.pow(floatInput, 2.2) #correct for the gamma #If we want to log the inputs, we do it here if self.logInput: input = helpers.logTensor(input) #The preprocess function puts the vectors value between [-1; 1] from [0;1] input = helpers.preprocess(input) targets = tf.zeros(tf.shape(input)) # is here (None, None, 3) targets = tf.expand_dims(targets, axis = 0) targets = tf.tile(targets, (self.nbTargetsToRead, 1,1,1)) return filename, input, targets, gammadInput
def __init__(self, Y, logits, alpha=0.90, gamma=0.5): with tf.name_scope("Focal_Loss"): label = Y epsilon = 1e-10 self.pred = tf.clip_by_value(tf.nn.sigmoid(logits), epsilon, 1 - epsilon) ## cross-entropy #cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=label, logits=logits) #self.pred = tf.clip_by_value( # tf.nn.softmax(logits), epsilon, 1-epsilon) log_pred = tf.log(self.pred) p_t = tf.reduce_sum(-tf.multiply(label, self.pred), axis=-1) cross_entropy = tf.reduce_sum(-tf.multiply(label, log_pred), axis=-1) #alpha_ = label * alpha * (1.-label) * (1.-alpha) _alpha = label[..., 1] * alpha + label[..., 0] * (1. - alpha) losses = tf.multiply(tf.pow(_alpha * (1. - p_t), gamma), cross_entropy) losses = tf.reduce_mean(losses, axis=[1, 2, 3]) self.loss = tf.reduce_mean(losses)
def _get_cubic_root(self): """Get the cubic root.""" # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substitution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html assert_array = [ tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [ self._grad_var, ]), tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [ self._grad_var, ]) ] with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0) y = w - p / 3.0 / w x = y + 1 return x
def _body(i, update, activation, center): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] if leaky: posterior = layers.leaky_routing(update, output_dim) else: posterior = tf.nn.softmax(update, dim=2) vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width] masses = tf.reduce_sum(vote_conf, axis=1, keep_dims=True) + 0.00001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(preactivate_unrolled, axis=1, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = tf.reduce_sum(log_variance, axis=3, keep_dims=True) sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update return (prior_update, logit, center)
def metric_fn(per_example_loss, label_ids, logits, is_real_example): """Compute Matthew's correlations for STS-B.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient tp, tp_op = tf.metrics.true_positives( predictions, label_ids, weights=is_real_example) tn, tn_op = tf.metrics.true_negatives( predictions, label_ids, weights=is_real_example) fp, fp_op = tf.metrics.false_positives( predictions, label_ids, weights=is_real_example) fn, fn_op = tf.metrics.false_negatives( predictions, label_ids, weights=is_real_example) # Compute Matthew's correlation mcc = tf.div_no_nan( tp * tn - fp * fn, tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn), 0.5)) # Compute accuracy accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "matthew_corr": (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)), "eval_accuracy": accuracy, "eval_loss": loss, }
def fun_w(self, x, low, up): I1 = 0.110987 x_list = tf.split(x, self.dim, 1) #************************************************** x_scale_list = [] h_len = (up - low) / 2.0 for i in range(self.dim): x_scale = (x_list[i] - low - h_len) / h_len x_scale_list.append(x_scale) #************************************************ z_x_list = [] for i in range(self.dim): supp_x = tf.greater(1 - tf.abs(x_scale_list[i]), 0) z_x = tf.where(supp_x, tf.exp(1 / (tf.pow(x_scale_list[i], 2) - 1)) / I1, tf.zeros_like(x_scale_list[i])) z_x_list.append(z_x) #*************************************************** w_val = tf.constant(1.0) for i in range(self.dim): w_val = tf.multiply(w_val, z_x_list[i]) dw = tf.gradients(w_val, x, unconnected_gradients='zero')[0] dw = tf.where(tf.is_nan(dw), tf.zeros_like(dw), dw) return (w_val, dw)
def _model_fn(input_fea, input_lab): """Creates a model, add summary, modes (train or eval), and hooks.""" # input_fea and input_lab should be a list (laid_out_tensors). if not isinstance(input_fea, list): input_fea = [input_fea] if not isinstance(input_lab, list): input_lab = [input_lab] def _add_summary(lowering, train_or_eval, tf_loss, scalars, global_step): """Add all summaries.""" for k in scalars.keys(): if not isinstance(scalars[k], tf.Tensor): scalars[k] = tf.cast( lowering.export_to_tf_tensor(scalars[k]), tf.float32) def _host_loss_summary(global_step, tf_loss, **scalars): """Add summary.scalar in host side.""" gs = tf.cast(global_step, tf.int64) sum_loss = contrib_summary.scalar( '{}_loss'.format(train_or_eval), tf_loss, step=gs) sum_ops = [sum_loss.op] for description, tf_metric in scalars.iteritems(): sum_metric = contrib_summary.scalar( '{}_{}'.format(train_or_eval, description), tf_metric, step=gs) sum_ops.append(sum_metric) with tf.control_dependencies(sum_ops): return tf.identity(tf_loss) if FLAGS.use_tpu: # Cast the global step to tf.int32, since # outside_compilation does not support tf.int64. tf_loss = tpu.outside_compilation( _host_loss_summary, tf.cast(global_step, tf.int32), tf_loss, **scalars) else: tf_loss = _host_loss_summary( tf.cast(global_step, tf.int32), tf_loss, **scalars) return tf_loss global_step = tf.train.get_or_create_global_step() graph, mesh, mesh_impl = mesh_context.create_graph_mesh_and_mesh_impl() with mtf.utils.outside_all_rewrites(): # Do not tpu_rewrite this part. Inside this unet, If you use Tensorflow, # instead of Mesh-Tensorflor, it will cause host to tpu send/rec. preds, loss, scalars, bn_update_ops = ( unet.unet_with_spatial_partition( mesh, mesh_impl, train_or_eval, input_fea, input_lab)) if train_or_eval == 'train': var_grads = mtf.gradients( [loss], [v.outputs[0] for v in graph.trainable_variables]) lr = FLAGS.lr * tf.pow( FLAGS.lr_drop_rate, tf.floor(tf.cast(global_step, tf.float32) / FLAGS.lr_drop_steps)) scalars['learning_rate'] = lr optimizer = mtf.optimize.AdafactorOptimizer(learning_rate=lr) update_ops = optimizer.apply_grads(var_grads, graph.trainable_variables) # This is where the actual tf graph got built. lowering = mtf.Lowering(graph, {mesh: mesh_impl}) tf_update_ops = [lowering.lowered_operation(op) for op in update_ops] tf_update_ops.append(tf.assign_add(global_step, 1)) tf_update_ops.extend( [lowering.lowered_operation(op) for op in bn_update_ops]) else: # train_or_eval == 'eval': preds = [mtf.anonymize(pred) for pred in preds] # This is where the actual tf graph got built. lowering = mtf.Lowering(graph, {mesh: mesh_impl}) tf_preds = [tf.cast( lowering.export_to_tf_tensor(pred), tf.float32) for pred in preds] tf_loss = tf.cast(lowering.export_to_tf_tensor(loss), tf.float32) if FLAGS.write_summary: tf_loss = _add_summary( lowering, train_or_eval, tf_loss, scalars, global_step) master_to_slice_hook = mtf.MtfRestoreHook(lowering) if train_or_eval == 'train': with mtf.utils.outside_all_rewrites(): saver = tf.train.Saver(tf.global_variables(), save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) saver_listener = mtf.MtfCheckpointSaverListener(lowering) slice_to_master_hook = tf.train.CheckpointSaverHook( FLAGS.checkpoint_dir, save_steps=FLAGS.save_checkpoints_steps, saver=saver, listeners=[saver_listener]) captured_hooks.capture([master_to_slice_hook, slice_to_master_hook]) return tf.group([tf_loss] + tf_update_ops) else: # train_or_eval == 'eval': if FLAGS.use_tpu: tf_preds.extend([tf_loss, global_step]) tf_preds_dtypes = [tf_pred.dtype for tf_pred in tf_preds] tf_preds_shapes = [tf_pred.shape for tf_pred in tf_preds] captured_hooks.capture([master_to_slice_hook, None]) captured_output_dtypes_shapes.capture( [tf_preds_dtypes, tf_preds_shapes]) return tpu_ops.outfeed_enqueue_tuple(tf_preds) else: tf_preds.extend([tf_loss, global_step]) captured_hooks.capture([master_to_slice_hook, None]) return tf_preds
def loss_function(y_true, x_out): # loss = tf.reduce_mean(tf.pow(tf.log(y_true+1) - x_out, 2)) loss = tf.reduce_mean(tf.pow(tf.log(y_true + 1) - tf.log(x_out + 1), 2)) # loss = loss+tf.losses.get_regularization_loss() return loss
def add_distance_loss_to_center(labels, logits, groundtruth_coords): """Add distance loss function for ClickRegression.""" weights = tf.to_int32( tf.not_equal( labels, model_input.dataset_descriptors[FLAGS.dataset].ignore_label)) labels *= weights # Use GT box to get center if it exists. Less computation required. # Otherwise, calculate from label mask. if FLAGS.use_groundtruth_box: center_x = (groundtruth_coords['xmin'] + groundtruth_coords['xmax']) / 2.0 center_y = (groundtruth_coords['ymin'] + groundtruth_coords['ymax']) / 2.0 center = tf.stack([center_y, center_x], axis=1) else: # Make array of coordinates (each row contains three coordinates) ii, jj = tf.meshgrid(tf.range(FLAGS.image_size), tf.range(FLAGS.image_size), indexing='ij') coords = tf.stack([tf.reshape(ii, (-1, )), tf.reshape(jj, (-1, ))], axis=-1) coords = tf.cast(coords, tf.int32) # Rearrange input into one vector per volume volumes_flat = tf.reshape( labels, [-1, FLAGS.image_size * FLAGS.image_size * 1, 1]) # Compute total mass for each volume. Add 0.00001 to prevent division by 0 total_mass = tf.cast(tf.reduce_sum(volumes_flat, axis=1), tf.float32) + ZERO_DIV_OFFSET # Compute centre of mass center = tf.cast(tf.reduce_sum(volumes_flat * coords, axis=1), tf.float32) / total_mass center = center / FLAGS.image_size # Normalize coordinates by size of image logits = logits / FLAGS.image_size # Calculate loss based on the distance metric specified # Loss added later in model_fn by tf.losses.get_total_loss() if FLAGS.distance_metric == 'mse': tf.losses.mean_squared_error(center, logits) elif FLAGS.distance_metric in [ 'euclidean', 'euclidean_sqrt', 'euclidean_iter' ]: distance_to_center = tf.sqrt( tf.reduce_sum(tf.square(logits - center), axis=-1) + ZERO_DIV_OFFSET) if FLAGS.ratio_box_distance: distance_to_box = calc_distance_to_edge(groundtruth_coords, logits) box_distance_to_center = (tf.to_float(distance_to_center) - distance_to_box) loss = distance_to_center / (box_distance_to_center + ZERO_DIV_OFFSET) else: loss = distance_to_center if FLAGS.distance_metric == 'euclidean_sqrt': loss = tf.sqrt(loss) if FLAGS.distance_metric == 'euclidean_iter': iter_num = tf.to_float(tf.train.get_or_create_global_step()) step = (iter_num // FLAGS.euclidean_step) + 1.0 loss = tf.pow(loss, tf.to_float(1.0 / step)) tf.losses.compute_weighted_loss(loss)
def _body(i, posterior, activation, center, masses): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width, 1, 1] masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) + 0.0000001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( preactivate_unrolled, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max( tf.reduce_max(prior_update, axis=-1, keep_dims=True), axis=-2, keep_dims=True), axis=-3, keep_dims=True), axis=-4, keep_dims=True) prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7]) c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1]) pad_prior = tf.pad(c_prior, [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [0, 0]], 'CONSTANT') patch_prior = tf.extract_image_patches(images=pad_prior, ksizes=[1, k, k, 1], strides=[1, k, k, 1], rates=[1, k - 1, k - 1, 1], padding='VALID') sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True) sum_prior_patch = tf.extract_image_patches(images=sum_prior, ksizes=[1, k, k, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='VALID') sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001 posterior = prior_exp / sum_prior_reshape return (posterior, logit, center, masses)
def l2norm_sqrd(a, b): return tf.reduce_sum(tf.pow(a-b, 2), 1) def l2(a, b): return tf.reduce_mean(tf.pow(a-b, 2))
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """See base class.""" assignments = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue param_name = self._get_variable_name(param.name) m = tf.get_variable(name=six.ensure_str(param_name) + "/m", shape=param.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) # Note: shape is not passed here explicitly since tf.get_variable # complains when you do that while passing a Tensor as an initializer. prev_w_norm = tf.get_variable( name=six.ensure_str(param_name) + "/prev_w_norm", dtype=tf.float32, trainable=False, initializer=lambda w=param: tf.norm(w.initialized_value(), ord=2)) prev_eta = tf.get_variable(name=six.ensure_str(param_name) + "/prev_eta", shape=[], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) prev_beta = tf.get_variable(name=six.ensure_str(param_name) + "/prev_beta", shape=[], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) if self._do_use_weight_decay(param_name): grad += self.weight_decay_rate * param if self.use_adaptive: grad_squared_sum = tf.get_variable( name=six.ensure_str(param_name) + "/grad_squared_sum", shape=[], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) max_grad = tf.get_variable(name=six.ensure_str(param_name) + "/max_grad", shape=[], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) iteration = tf.get_variable(name=six.ensure_str(param_name) + "/iteration", shape=[], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) next_grad_squared_sum = grad_squared_sum + tf.norm(grad, 2) next_iteration = iteration + 1 next_max_grad = tf.maximum(max_grad, tf.norm(grad, 2)) assignments.extend([ grad_squared_sum.assign(next_grad_squared_sum), iteration.assign(next_iteration), max_grad.assign(next_max_grad) ]) # Intuitively we should be able to leave g_sum=next_grad_squared_sum, # but current theory needs this extra t^1/4 max_grad term. g_sum = next_grad_squared_sum + tf.pow(next_iteration, 0.25) * next_max_grad eta = self.learning_rate / tf.pow( tf.pow(next_iteration, 3.0) * tf.pow(g_sum, 2.0), 1.0 / 7.0) a = tf.minimum( 1.0, 1.0 / (next_iteration * tf.pow(eta, 2.0) * g_sum)) beta = 1.0 - a else: eta = self.learning_rate beta = self.beta next_m = (tf.multiply(beta, m) + tf.multiply(1.0 - beta, grad)) ratio = 1.0 w_norm = tf.norm(param, ord=2) if self._do_layer_adaptation(param_name): g_norm = tf.norm(next_m, ord=2) ratio = self.gamma * tf.where( tf.math.greater(w_norm, 0), tf.where(tf.math.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0) normalized_m_with_lr = ratio * eta * next_m if self.use_igt: prev_x = self.compute_x(param_name, param, m, prev_w_norm, prev_eta, prev_beta) next_x = prev_x - normalized_m_with_lr next_param = next_x + tf.divide( tf.multiply(beta, normalized_m_with_lr), beta - 1.0) else: next_param = param - normalized_m_with_lr assignments.extend([ param.assign(next_param), m.assign(next_m), prev_w_norm.assign(w_norm), prev_eta.assign(eta), prev_beta.assign(beta) ]) return tf.group(*assignments, name=name)
def simple_linear_regression(): raw_train_dataset = library.data_processing(train_data_path) X_d = pd.DataFrame(raw_train_dataset[[ 'TEMP' ]]).to_numpy() # Change the variables here to train using different values Y_d = pd.DataFrame(raw_train_dataset[['PM2.5']]).to_numpy() X = tf.placeholder(tf.float32, [X_d.shape[0], X_d.shape[1]], name='x') Y = tf.placeholder(tf.float32, name='y') w = tf.Variable(np.random.normal(), [None, X_d.shape[1]], name='weight') b = tf.Variable(np.random.normal(), name='bias') y_pred = tf.add(tf.multiply(X, w), b) loss = tf.reduce_sum(tf.square(y_pred - Y)) / (2 * X_d.shape[0]) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) init = tf.global_variables_initializer() # Launch the graph with tf.Session() as sess: sess.run(init) count = 0 # Fit all training data for epoch in range(training_epochs): for (x, y) in zip(X_d, Y_d): sess.run(optimizer, feed_dict={X: x, Y: y}) # Display logs per epoch step if epoch % display_step == 0: print( "Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(sess.run(loss, feed_dict={ X: X_d, Y: Y_d })), "W=", sess.run(w), "b=", sess.run(b)) fig = plt.figure(figsize=(10, 10), dpi=100) ax = fig.add_subplot(111) ax.set_ylim(0, 1) ax.plot(X_d, Y_d, 'ro', label='Original data') ax.plot(X_d, sess.run(w) * X_d + sess.run(b), label='Fitted line') ax.legend() plt.show() fig.savefig('plot_{:05d}.png'.format(count), bbox_inches='tight', dpi=100) count = count + 1 plt.close(fig) print("Optimization Finished!") training_cost = sess.run(loss, feed_dict={X: X_d, Y: Y_d}) t_w = sess.run(w) t_b = sess.run(b) print("Training cost=", training_cost, "W=", t_w, "b=", t_b, '\n') raw_test_dataset = library.data_processing(test_data_path) X_test_d = pd.DataFrame(raw_test_dataset[['TEMP']]).to_numpy( ) # Change the variables here to train using different values Y_test_d = pd.DataFrame(raw_test_dataset[['PM2.5']]).to_numpy() print("Testing... (L2 loss Comparison)") testing_cost = sess.run(tf.reduce_sum(tf.pow(y_pred - Y, 2)) / (2 * X_test_d.shape[0]), feed_dict={ X: X_test_d, Y: Y_test_d }) print("Testing cost=", testing_cost) print("Absolute l2 loss difference:", abs(training_cost - testing_cost))
def model_fn(features, labels, mode, params): """Construct a TPUEstimatorSpec for a model.""" if mode != tf.estimator.ModeKeys.TRAIN: raise NotImplementedError( 'Expected that mode == TRAIN, but got {:!r}'.format(mode)) # Data was transposed from NHWC to HWCN on the host side. Transpose it back. # This transposition will be optimized away by the XLA compiler. It serves # as a hint to the compiler that it should expect the input data to come # in HWCN format rather than NHWC. train_features = tf.transpose(features['train'], [3, 0, 1, 2]) validation_features = tf.transpose(features['validation'], [3, 0, 1, 2]) if params['use_bfloat16'] == 'ontpu': train_features = tf.cast(train_features, tf.bfloat16) validation_features = tf.cast(validation_features, tf.bfloat16) global_step = tf.train.get_global_step() # Randomly sample a network architecture. with tf.variable_scope('rl_controller') as rl_scope: pass model_spec = mobile_classifier_factory.get_model_spec(params['ssd']) tf.io.gfile.makedirs(params['checkpoint_dir']) model_spec_filename = os.path.join(params['checkpoint_dir'], 'model_spec.json') with tf.io.gfile.GFile(model_spec_filename, 'w') as handle: handle.write(schema_io.serialize(model_spec)) increase_ops_prob = custom_layers.linear_decay( global_step, params['increase_ops_warmup_steps']) increase_filters_prob = custom_layers.linear_decay( global_step, params['increase_filters_warmup_steps']) model_spec, dist_info = controller.independent_sample( model_spec, increase_ops_probability=increase_ops_prob, increase_filters_probability=increase_filters_prob, name=rl_scope) if params['enable_cost_model']: cost_model_features = mobile_cost_model.coupled_tf_features(model_spec) estimated_cost = cost_model_lib.estimate_cost(cost_model_features, params['ssd']) # We divide the regularization strength by 2 for backwards compatibility with # the deprecated tf.contrib.layers.l2_regularizer() function, which was used # in our published experiments. kernel_regularizer = tf.keras.regularizers.l2( params['model_weight_decay'] / 2) # Set up the basic TensorFlow training/inference graph. model = mobile_classifier_factory.get_model_for_search( model_spec, kernel_regularizer=kernel_regularizer) model.build(train_features.shape) with tf.name_scope('training'): model_logits, _ = model.apply(train_features, training=True) # Cast back to float32 (effectively only when using use_bfloat16 is true). model_logits = tf.cast(model_logits, tf.float32) model_empirical_loss = tf.losses.softmax_cross_entropy( onehot_labels=labels['train'], logits=model_logits, label_smoothing=0.1) model_regularization_loss = model.regularization_loss() model_loss = model_empirical_loss + model_regularization_loss # Set up the model weight training logic. model_learning_rate = custom_layers.cosine_decay_with_linear_warmup( peak_learning_rate=params['model_learning_rate'], global_step=global_step, max_global_step=params['max_global_step'], warmup_steps=params['model_warmup_steps']) model_optimizer = tf.tpu.CrossShardOptimizer( tf.train.RMSPropOptimizer(model_learning_rate, decay=0.9, momentum=params['model_momentum'], epsilon=1.0)) model_vars = model.trainable_variables() model_update_ops = model.updates() with tf.control_dependencies(model_update_ops): grads_and_vars = model_optimizer.compute_gradients( model_loss, var_list=model_vars) if params['use_gradient_sync_barrier']: # Force all gradients to be computed before any are applied. grads_and_vars = _grads_and_vars_barrier(grads_and_vars) # NOTE: We do not pass `global_step` to apply_gradients(), so the global # step is not incremented by `model_optimizer`. The global_step will be # incremented later on, when we update the RL controller weights. If we # incremented it here too, we'd end up incrementing the global_step twice # at each training step. model_op = model_optimizer.apply_gradients(grads_and_vars) if params['use_gradient_sync_barrier']: # Finish computing gradients for the shared model weights before we # start on the RL update step. # # NOTE: The barrier above forces TensorFlow to finish computing grads # for all of the trainable variables before any of the grads can be # consumed. So while the call to with_data_dependencies() here only # explicitly depends on grads_and_vars[0][0], the call implicitly forces # TensorFlow to finish computing the gradients for *all* trainable # variables before computing the validation features. validation_features = layers.with_data_dependencies( [grads_and_vars[0][0]], [validation_features])[0] with tf.name_scope('validation'): # Estimate the model accuracy on a batch of examples from the validation # set. Force this logic to run after the model optimization step. with tf.control_dependencies([model_op]): validation_logits, _ = model.apply(validation_features, training=False) # NOTE(b/130311965): An earlier version of this code cast validation_logits # from bfloat16 to float32 before applying an argmax when the --use_bfloat16 # flag was true. As of cl/240923609, this caused XLA to compute incorrect # model accuracies. Please avoid casting from bfloat16 to bfloat32 before # taking the argmax. is_prediction_correct = tf.equal( tf.argmax(validation_logits, axis=1), tf.argmax(labels['validation'], axis=1)) validation_accuracy = tf.reduce_mean( tf.cast(is_prediction_correct, tf.float32)) # Estimate the reward for the current network architecture and update the # reward to incorporate the cost of the network architecture. if params['enable_cost_model']: rl_stats = search_space_utils.reward_for_single_cost_model( validation_accuracy, rl_reward_function=params['rl_reward_function'], estimated_cost=estimated_cost, rl_cost_model_target=params['rl_cost_model_target'], rl_cost_model_exponent=params['rl_cost_model_exponent']) rl_cost_ratio = rl_stats['rl_cost_ratio'] rl_reward = rl_stats['rl_reward'] rl_cost_adjustment = rl_stats['rl_cost_adjustment'] else: rl_reward = validation_accuracy # Compute a baseline. We first take a cross-replica sum of the rewards # for all the TPU shards, then incorporate the result into an exponential # moving average. Within a single batch, each TPU shard will select a # different set of op masks from the RL controller. Each shard will basically # evaluate a different candidate architecture in our search space. # Count the number of TPU shards (cores) used for training. num_tpu_shards = tf.tpu.cross_replica_sum( tf.ones(shape=(), dtype=rl_reward.dtype)) rl_step_baseline = tf.tpu.cross_replica_sum(rl_reward) rl_step_baseline = rl_step_baseline / num_tpu_shards rl_baseline = custom_layers.update_exponential_moving_average( rl_step_baseline, momentum=params['rl_baseline_momentum']) # Apply a REINFORCE update to the RL controller. log_prob = dist_info['sample_log_prob'] rl_advantage = rl_reward - rl_baseline rl_empirical_loss = -tf.stop_gradient(rl_advantage) * log_prob # We set rl_entropy_loss proportional to (-entropy) so that minimizing the # loss will lead to an entropy that is as large as possible. rl_entropy = dist_info['entropy'] rl_entropy_loss = -params['rl_entropy_regularization'] * rl_entropy # We use an RL learning rate of 0 for the first N epochs of training. See # Appendix A of FBNet. (https://arxiv.org/pdf/1812.03443.pdf). Although they # don't mention it explicitly, there are some indications that ProxylessNAS # (https://openreview.net/forum?id=HylVB3AqYm) might also be doing this. enable_rl_optimizer = tf.cast( tf.greater_equal(global_step, params['rl_delay_steps']), tf.float32) rl_learning_rate = params['rl_learning_rate'] * enable_rl_optimizer if params['use_exponential_rl_learning_rate_schedule']: # rl_learning_rate_progress will be 0 when the RL controller starts # learning and 1 when the search ends. rl_learning_rate_progress = tf.nn.relu( tf.div( tf.cast(global_step - params['rl_delay_steps'], tf.float32), max(1, params['max_global_step'] - params['rl_delay_steps']))) # exponentially increase the RL learning rate over time. rl_learning_rate_multiplier = tf.pow(10.0, rl_learning_rate_progress) rl_learning_rate = rl_learning_rate * rl_learning_rate_multiplier rl_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, rl_scope.name) with tf.control_dependencies(rl_update_ops): # In order to evaluate train_op, we must first evaluate validation_accuracy. # And to evaluate validation_accuracy, we must first evaluate model_op. So # running this op will perform a step of model training followed by # a step of RL controller training. if params['use_gradient_sync_barrier']: transform_grads_fn = _grads_and_vars_barrier else: transform_grads_fn = None train_op = tpu_optimizer_ops.apply_adam( rl_empirical_loss, regularization_loss=rl_entropy_loss, global_step=global_step, var_list=tf.trainable_variables(rl_scope.name), learning_rate=rl_learning_rate, beta1=0.0, beta2=0.999, epsilon=1e-8, transform_grads_fn=transform_grads_fn) # TensorBoard logging tensorboard_scalars = collections.OrderedDict([ ('model/loss', model_loss), ('model/empirical_loss', model_empirical_loss), ('model/regularization_loss', model_regularization_loss), ('model/learning_rate', model_learning_rate), ('rlcontroller/empirical_loss', rl_empirical_loss), ('rlcontroller/entropy_loss', rl_entropy_loss), ('rlcontroller/validation_accuracy', validation_accuracy), ('rlcontroller/reward', rl_reward), ('rlcontroller/step_baseline', rl_step_baseline), ('rlcontroller/baseline', rl_baseline), ('rlcontroller/advantage', rl_advantage), ('rlcontroller/log_prob', log_prob), ]) if params['enable_cost_model']: tensorboard_scalars['rlcontroller/estimated_cost'] = estimated_cost tensorboard_scalars['rlcontroller/cost_ratio'] = rl_cost_ratio tensorboard_scalars[ 'rlcontroller/cost_adjustment'] = rl_cost_adjustment tensorboard_scalars['rlcontroller/learning_rate'] = rl_learning_rate tensorboard_scalars['rlcontroller/increase_ops_prob'] = increase_ops_prob tensorboard_scalars['rlcontroller/increase_filters_prob'] = ( increase_filters_prob) # Log the values of all the choices made by the RL controller. for name_i, logits_i in dist_info['logits_by_path'].items(): assert len(logits_i.shape) == 1, logits_i for j in range(int(logits_i.shape[0])): key = 'rlpathlogits/{:s}/{:d}'.format(name_i, j) tensorboard_scalars[key] = logits_i[j] for name_i, logits_i in dist_info['logits_by_tag'].items(): assert len(logits_i.shape) == 1, logits_i for j in range(int(logits_i.shape[0])): key = 'rltaglogits/{:s}/{:d}'.format(name_i, j) tensorboard_scalars[key] = logits_i[j] # NOTE: host_call only works on rank-1 tensors. There's also a fairly # large performance penalty if we try to pass too many distinct tensors # from the TPU to the host at once. We avoid these problems by (i) calling # tf.stack to merge all of the float32 scalar values into a single rank-1 # tensor that can be sent to the host relatively cheaply and (ii) reshaping # the remaining values from scalars to rank-1 tensors. def host_call_fn(step, scalar_values): values = tf.unstack(scalar_values) with tf2.summary.create_file_writer( params['checkpoint_dir']).as_default(): with tf2.summary.record_if( tf.math.equal(step[0] % params['tpu_iterations_per_loop'], 0)): for key, value in zip(list(tensorboard_scalars.keys()), values): tf2.summary.scalar(key, value, step=step[0]) return tf.summary.all_v2_summary_ops() host_call_values = tf.stack(list(tensorboard_scalars.values())) host_call = (host_call_fn, [tf.reshape(global_step, [1]), host_call_values]) # Construct the estimator specification. return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=model_loss, train_op=train_op, host_call=host_call)
def order_loss(labels, logits, margin=0.2): label_act = tf.reduce_sum(labels * logits, axis=-1, keep_dims=True) negative_cost = (1 - labels) * tf.cast( tf.greater(logits, label_act - margin), tf.float32) * tf.pow( logits + margin - label_act, 2) return negative_cost
weight1 = tf.Variable(tf.truncated_normal([9, 50], stddev=0.1)) bias1 = tf.Variable(tf.constant(0.1, shape=[50])) weight2 = tf.Variable(tf.truncated_normal([50, 50], stddev=0.1)) bias2 = tf.Variable(tf.constant(0.1, shape=[50])) weight3 = tf.Variable(tf.truncated_normal([50, 1], stddev=0.1)) bias3 = tf.Variable(tf.constant(0.1, shape=[1])) sample_size = len(data) #输出y y = hidden_layer(x, weight1, bias1, weight2, bias2, weight3, bias3) #损失函数 error_loss = tf.reduce_sum(tf.pow(y_ - y, 2)) / sample_size tf.add_to_collection("losses", error_loss) #加入正则化 #regularizer = tf.contrib.layers.l2_regularizer(0.01) regularizer = tf.keras.regularizers.l2(0.001) regularization = regularizer(weight1) + regularizer(weight2) + regularizer( weight3) tf.add_to_collection("losses", regularization) loss = tf.add_n(tf.get_collection("losses")) #定义优化器 train_op = tf.train.AdamOptimizer(0.05).minimize(loss) #train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
def body(i, old_adv_x, old_loss, labels=labels): """Find example with max loss value amongst batch of perturbations.""" deltas = tf.random_uniform(deltas_shape) # generate uniform samples from the l^p unit ball interior if self.ord == np.inf: deltas *= 2. * self.eps deltas -= self.eps elif self.ord == 1: # ref: https://mathoverflow.net/questions/9185/how-to-generate-random-points-in-ell-p-balls pylint: disable=line-too-long exp = -tf.log(deltas) shift = -tf.log(tf.random_uniform(deltas_shape[:2])) norm = tf.reduce_sum(tf.abs(exp), range(2, len(deltas_shape) - 2)) scale = tf.reshape( shift + norm, deltas_shape[:2] + [1] * (len(deltas_shape) - 2)) deltas = exp / scale elif self.ord == 2: # ref: https://blogs.sas.com/content/iml/2016/04/06/generate-points-uniformly-in-ball.html pylint: disable=line-too-long dims = tf.reduce_prod(deltas_shape[2:]) deltas = tf.pow(deltas, 1. / dims) normal = tf.random_normal(deltas) normal /= tf.sqrt(tf.reduce_sum(normal**2, axis=range( 2, len(deltas_shape) - 2)), keepdims=True) deltas *= normal else: raise NotImplementedError('Only L-inf, L1 and L2 norms are ' 'currently implemented.') adv_x = tf.expand_dims(x, 1) + deltas labels = tf.expand_dims(labels, 1) labels = tf.tile(labels, [1, self.num_samples, 1]) if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) adv_x_r = tf.reshape(adv_x, [-1] + deltas_shape[2:]) preds = self.model.get_probs(adv_x_r) preds_shape = preds.shape.as_list() preds = tf.reshape(preds, deltas_shape[:2] + preds_shape[1:]) if labels is None: # Using model predictions as ground truth to avoid label leaking preds_max = tf.reduce_max(preds, -1, keep_dims=True) labels = tf.to_float(tf.equal(preds, preds_max)) labels = tf.stop_gradient(labels) labels = labels / tf.reduce_sum(labels, -1, keep_dims=True) # Compute loss loss = utils_tf.model_loss(labels, preds, mean=False) if self.y_target is not None: loss = -loss # find the maximum loss value input_idx = tf.one_hot(tf.argmax(loss, axis=1), self.num_samples, axis=1) loss = tf.reduce_sum(loss * input_idx, axis=1) input_idx = tf.reshape( input_idx, deltas_shape[:2] + [1] * (len(deltas_shape) - 2)) adv_x = tf.reduce_sum(adv_x * input_idx, axis=1) condition = tf.greater(old_loss, loss) new_loss = tf.where(condition, old_loss, loss) new_adv_x = tf.where(condition, old_adv_x, adv_x) print(new_loss, new_adv_x) return i + 1, new_adv_x, new_loss
def _body(i, posterior, center, wx, activation_biases, sigma_biases, input_activation, tile_filter): """Body of EM while loop.""" tf.logging.info(' Wx: %s', wx) beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) posterior = tf.Print(posterior, [ layer_name, i, h, ih, tf.reduce_min(posterior), tf.reduce_max(posterior) ], message='posterior') # route: [outdim, height?, width?, batch, indim] with tf.name_scope('vote_conf'): vote_conf = posterior * input_activation vote_conf = tf.maximum(vote_conf, 0.0) # masses: [batch, 1, outdim, 1, height, width, 1, 1] with tf.name_scope('masses'): masses = tf.reduce_sum(vote_conf, axis=[1, -1, -2], keepdims=True, name='masses_calculation') + 0.0000001 with tf.name_scope('preactivate_unrolled'): preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] with tf.name_scope('center'): center = .9 * tf.reduce_sum( preactivate_unrolled, axis=[1, -1, -2], keepdims=True) / masses + .1 * center # Rematerialization to save GPU memory. (+22ms/-1.6GB) # @tf.contrib.layers.recompute_grad def compute_noise_and_variance(wx, center, vote_conf, masses): noise = tf.squared_difference(wx, center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=[1, -1, -2], keepdims=True, name='variance_calculation') / masses return noise, variance with tf.name_scope('compute_noise_and_variance'): noise, variance = compute_noise_and_variance( wx, center, vote_conf, masses) with tf.name_scope('win'): log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True) log_2pi = tf.log(2 * math.pi) sigma_b = tf.log(sigma_biases * sigma_biases + min_var) win = masses * (p_i - num_out_atoms * (sigma_b + log_2pi + 1.0)) with tf.name_scope('logit'): logit = beta * (win - activation_biases * 50 * num_out_atoms) with tf.name_scope('activation_update'): activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) with tf.name_scope('sigma_update'): log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 with tf.name_scope('exp_update'): exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = tf.subtract(activation_update - sigma_update, exp_update, name='prior_update_sub') max_prior_update = tf.reduce_max(prior_update, axis=[2, 3, 4, 5, 6, 7], keepdims=True, name='max_prior_opdate') prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) prior_exp_out = tf.reduce_sum(prior_exp, axis=2, keepdims=True, name='prior_exp_out') prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k], name='prior_exp_reshape') sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape, tile_filter, output_shape=[b * c, ih, ih, 1], strides=[1, s, s, 1], padding='VALID') sum_prior = tf.maximum(1e-6, sum_prior) sum_prior_patch = utils.kernel_tile(sum_prior, k, s, 1, name='sum_prior_patch') with utils.maybe_jit_scope(), tf.name_scope('posterior'): sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k]) posterior = prior_exp / sum_prior_reshape return (i + 1, posterior, logit, center, masses)
def gelu(x): """GeLU activation function.""" return 0.5 * x * ( 1 + tf.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
def getDistance(self): dist = tf.pow(self.o1 - self.o2, 2) dist = tf.reduce_mean(dist, axis=1) dist = tf.sqrt(dist + 1e-6) return dist
def gelu(x): return 0.5 * x * ( 1 + tf.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
#the Learning Rate and the number of Epochs. learning_rate = 0.01 print("learning_rate", learning_rate) training_epochs = 1000 print("training_epochs", training_epochs) #Now, we will be building the Hypothesis, the Cost Function, #and the Optimizer. We won’t be implementing the Gradient Descent Optimizer #manually since it is built inside Tensorflow. After that, we will be initializing the Variables. # Hypothesis y_pred = tf.add(tf.multiply(X, W), b) print("y_pred=", y_pred) # Mean Squared Error Cost Function cost = tf.reduce_sum(tf.pow(y_pred - Y, 2)) / (2 * n) print("cost=", cost) # Gradient Descent Optimizer optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) print("optimizer=", optimizer) # Global Variables Initializer init = tf.global_variables_initializer() #Now we will begin the training process inside a Tensorflow Session. # Starting the Tensorflow Session with tf.Session() as sess: # Initializing the Variables
def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape, regression_weights=None, bbox_per_class=True, distill_class_outputs=None): """Generate final detections. Args: box_outputs: a tensor of shape of [batch_size, K, num_classes * 4] representing the class-specific box coordinates relative to anchors. class_outputs: a tensor of shape of [batch_size, K, num_classes] representing the class logits before applying score activation. anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the corresponding anchor boxes w.r.t `box_outputs`. image_shape: a tensor of shape of [batch_size, 2] storing the image height and width w.r.t. the scaled image, i.e. the same image space as `box_outputs` and `anchor_boxes`. regression_weights: A list of four float numbers to scale coordinates. bbox_per_class: A `bool`. If True, perform per-class box regression. distill_class_outputs: a float tensor of shape of [batch_size, K, num_classes-1] representing the distilled class logits before applying score activation, without the background class. Returns: nmsed_boxes: `float` Tensor of shape [batch_size, max_total_size, 4] representing top detected boxes in [y1, x1, y2, x2]. nmsed_scores: `float` Tensor of shape [batch_size, max_total_size] representing sorted confidence scores for detected boxes. The values are between [0, 1]. nmsed_classes: `int` Tensor of shape [batch_size, max_total_size] representing classes for detected boxes. valid_detections: `int` Tensor of shape [batch_size] only the top `valid_detections` boxes are valid detections. """ class_outputs_shape = tf.shape(class_outputs) num_locations = class_outputs_shape[1] num_classes = class_outputs_shape[-1] if self._discard_background: # Removes the background class before softmax. class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1]) class_outputs = tf.nn.softmax(class_outputs, axis=-1) if not self._discard_background: # Removes the background class. class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1]) if self._feat_distill == 'double_branch': distill_class_outputs = tf.nn.softmax( distill_class_outputs, axis=-1) # [B, num_rois, num_classes] third_component = ( 1.0 - self._rare_mask ) * distill_class_outputs + self._rare_mask * class_outputs weighted_product = distill_class_outputs * class_outputs * third_component class_outputs = tf.pow(weighted_product, 1.0 / 3.0) if bbox_per_class: num_detections = num_locations * (num_classes - 1) box_outputs = tf.reshape(box_outputs, [-1, num_locations, num_classes, 4]) box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1]) anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1]) box_outputs = tf.reshape(box_outputs, [-1, num_detections, 4]) anchor_boxes = tf.reshape(anchor_boxes, [-1, num_detections, 4]) # Box decoding. if regression_weights is None: regression_weights = [10.0, 10.0, 5.0, 5.0] decoded_boxes = box_utils.decode_boxes(box_outputs, anchor_boxes, weights=regression_weights) # Box clipping decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape) if bbox_per_class: decoded_boxes = tf.reshape(decoded_boxes, [-1, num_locations, num_classes - 1, 4]) else: decoded_boxes = tf.expand_dims(decoded_boxes, axis=2) if not self._apply_nms: return { 'raw_boxes': decoded_boxes, 'raw_scores': class_outputs, } nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( self._generate_detections(decoded_boxes, class_outputs)) # Adds 1 to offset the background class which has index 0. nmsed_classes += 1 return { 'num_detections': valid_detections, 'detection_boxes': nmsed_boxes, 'detection_classes': nmsed_classes, 'detection_scores': nmsed_scores, }
def linear_regression_categorical(): raw_train_dataset = library.data_processing(train_data_path) dummies = pd.get_dummies(pd.DataFrame(raw_train_dataset[['wd']])) X_d = dummies.to_numpy() Y_d = pd.DataFrame(raw_train_dataset[['PM2.5']]).to_numpy() X = tf.placeholder(tf.float32, name='x') Y = tf.placeholder(tf.float32, name='y') w = tf.Variable(np.random.normal(), name='weight') b = tf.Variable(np.random.normal(), name='bias') y_pred = tf.add(tf.multiply(X, w), b) loss = tf.reduce_sum(tf.square(y_pred - Y)) / (2 * X_d.shape[0]) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) init = tf.global_variables_initializer() # Launch the graph with tf.Session() as sess: sess.run(init) # Fit all training data for epoch in range(training_epochs): for (x, y) in zip(X_d, Y_d): x = x.reshape(1, X_d.shape[1]) sess.run(optimizer, feed_dict={X: x, Y: y}) # Display logs per epoch step if epoch % display_step == 0: print( "Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(sess.run(loss, feed_dict={ X: X_d, Y: Y_d })), "W=", sess.run(w), "b=", sess.run(b)) fig = plt.figure(figsize=(10, 10), dpi=100) ax = raw_train_dataset.plot.scatter(x='wd', y='PM2.5') ax.set_ylim(0, 1) ax.plot(X_d, sess.run(w) * X_d + sess.run(b), label='Fitted line') ax.legend() plt.show() plt.close(fig) print("Optimization Finished!") training_cost = sess.run(loss, feed_dict={X: X_d, Y: Y_d}) t_w = sess.run(w) t_b = sess.run(b) print("Training cost=", training_cost, "W=", t_w, "b=", t_b, '\n') raw_test_dataset = library.data_processing(test_data_path) X_test_d = pd.DataFrame(raw_test_dataset[['wd']]).to_numpy() dummies = pd.get_dummies(pd.DataFrame(raw_test_dataset[['wd']])) X_d = dummies.to_numpy() Y_test_d = pd.DataFrame(raw_test_dataset[['PM2.5']]).to_numpy() print("Testing... (L2 loss Comparison)") testing_cost = sess.run(tf.reduce_sum(tf.pow(y_pred - Y, 2)) / (2 * X_test_d.shape[0]), feed_dict={ X: X_d, Y: Y_test_d }) print("Testing cost=", testing_cost) print("Absolute l2 loss difference:", abs(training_cost - testing_cost))
def get_sampling_probability(hparams, is_training): """Returns the sampling probability as a tensor based on the hparams. Supports three sampling schedules (`hparams.sampling_schedule`): constant: `hparams.sampling_rate` is the sampling probability. Must be in the interval [0, 1]. exponential: `hparams.sampling_rate` is the base of the decay exponential. Must be in the interval (0, 1). Larger values imply a slower increase in sampling. inverse_sigmoid: `hparams.sampling_rate` is in the interval [1, inf). Larger values imply a slower increase in sampling. A constant value of 0 is returned if `hparams.sampling_schedule` is undefined. If not training and a non-0 sampling schedule is defined, a constant value of 1 is returned since this is assumed to be a test/eval job associated with a scheduled sampling trainer. Args: hparams: An HParams object containing model hyperparameters. is_training: Whether or not the model is being used for training. Raises: ValueError: On an invalid `sampling_schedule` or `sampling_rate` hparam. """ if (not hasattr(hparams, 'sampling_schedule') or not hparams.sampling_schedule or (hparams.sampling_schedule == 'constant' and hparams.sampling_rate == 0)): return tf.constant(0.0) if not is_training: # This is likely an eval/test job associated with a training job using # scheduled sampling. tf.logging.warning( 'Setting non-training sampling schedule from %s:%f to constant:1.0.', hparams.sampling_schedule, hparams.sampling_rate) hparams.sampling_schedule = 'constant' hparams.sampling_rate = 1.0 schedule = hparams.sampling_schedule rate = hparams.sampling_rate step = tf.to_float(tf.train.get_global_step()) if schedule == 'constant': if not 0 <= rate <= 1: raise ValueError( '`constant` sampling rate must be in the interval [0, 1]. Got %f.' % rate) sampling_probability = tf.to_float(rate) elif schedule == 'inverse_sigmoid': if rate < 1: raise ValueError( '`inverse_sigmoid` sampling rate must be at least 1. Got %f.' % rate) k = tf.to_float(rate) sampling_probability = 1.0 - k / (k + tf.exp(step / k)) elif schedule == 'exponential': if not 0 < rate < 1: raise ValueError( '`exponential` sampling rate must be in the interval (0, 1). Got %f.' % hparams.sampling_rate) k = tf.to_float(rate) sampling_probability = 1.0 - tf.pow(k, step) else: raise ValueError('Invalid `sampling_schedule`: %s' % schedule) tf.summary.scalar('sampling_probability', sampling_probability) return sampling_probability
# Task 1 - 1 # TIES 4911 # Toni Pikkarainen # 14.1.2020 import tensorflow.compat.v1 as tf tf.disable_v2_behavior() # In[2]: # Constants # Lecture02, slides 11-13 a = tf.constant(5) b = tf.constant(2) add_op = tf.add(a, b) mul_op = tf.multiply(b, add_op) pow_op = tf.pow(mul_op, b) # In[3]: # Variables # Lecture02, slides 15-17 var1 = tf.Variable(2, name="scalar1") var2 = tf.Variable(3, name="scalar2") assign_op = var2.assign(10) # In[4]: # Placeholders # Lecture02, slides 19-20 a = tf.placeholder(tf.float32, shape=[3])
# Hidden layer in the decoder with sigmoid activation 2 layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2'])) return layer_2 # Autoencoder model encoder_op = encoder(X) decoder_op = decoder(encoder_op) # Prediction y_pred = decoder_op y_true = X # Loss and optimizer, minimize the squared error loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2)) optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) # Initialize the variables init = tf.global_variables_initializer() # Start Training # Start a a tensorflow session with tf.Session() as sess: sess.run(init) # Training for i in range(1, num_steps + 1): # Prepare Data # Get the next batch of MNIST data only images, not labels batch_x, _ = mnist.train.next_batch(batch_size)
def l2(a, b): return tf.reduce_mean(tf.pow(a-b, 2)) def show_graph_operations():
def focal(self, target, actual, alpha=1, gamma=2): focal_loss = alpha * tf.pow(tf.abs(target - actual), gamma) return focal_loss