out_layer = tf.matmul(layer_3, weights['out']) + biases['out'] return out_layer, X, Y # learning parameters learning_rate = 1e-4 training_epochs = 1000 # display training accuracy every .. display_accuracy_step = 30 logits, X, Y = multilayer_perceptron() # define loss and optimizer loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # define training graph operation train_op = optimizer.minimize(loss_op) # graph operation to initialize all variables init_op = tf.global_variables_initializer() with tf.Session() as sess: # run graph weights/biases initialization op sess.run(init_op) # begin training loop .. for epoch in range(training_epochs): # complete code below # run optimization operation (backprop) and cost operation (to get loss value)
def _build_model(self): """Build CNN model.""" img_s = 32 self.x_input = tf.placeholder( tf.float32, shape=[None, 32, 32, 3], name='image') self.y_input = tf.placeholder(tf.int64, shape=None, name='label') # standardize input data. x_input = self.x_input / 255.0 x_input = (x_input - MEANS) / STDS cin = 3 # Channel In cout = 32 # Channel Out with tf.variable_scope('conv1'): conv1 = conv_layer(x_input, [3, 3, cin, cout], [img_s, img_s, cout]) cin = cout cout = 64 with tf.variable_scope('conv2'): conv2 = conv_layer(conv1, [3, 3, cin, cout], [img_s, img_s, cout]) pool1, img_s = max_pool_layer(conv2, img_s) cin = cout cout = 128 with tf.variable_scope('conv3'): conv3 = conv_layer(pool1, [3, 3, cin, cout], [img_s, img_s, cout]) cin = cout with tf.variable_scope('conv4'): conv4 = conv_layer(conv3, [3, 3, cin, cout], [img_s, img_s, cout]) pool2, img_s = max_pool_layer(conv4, img_s) cout = 256 with tf.variable_scope('conv5'): conv5 = conv_layer(pool2, [3, 3, cin, cout], [img_s, img_s, cout]) cin = cout with tf.variable_scope('conv6'): conv6 = conv_layer(conv5, [3, 3, cin, cout], [img_s, img_s, cout]) pool3, img_s = max_pool_layer(conv6, img_s) with tf.variable_scope('fc1'): n_in = img_s * img_s * cout n_out = 1024 pool3_1d = tf.reshape(pool3, [-1, n_in]) fc1 = fc_layer(pool3_1d, n_in, n_out) with tf.variable_scope('fc2'): n_in = n_out n_out = 512 fc2 = fc_layer(fc1, n_in, n_out) with tf.variable_scope('fc3'): n_in = n_out n_out = self.n_labels self.logits = fc_layer(fc2, n_in, n_out, activation_fn=None) with tf.variable_scope('weights_norm'): weights_norm = tf.reduce_sum( input_tensor=WEIGHT_DECAY * tf.stack( [tf.nn.l2_loss(i) for i in tf.get_collection('all_weights')]), name='weights_norm') tf.add_to_collection('losses', weights_norm) with tf.variable_scope('cross_entropy'): cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_input, logits=self.logits)) tf.add_to_collection('losses', cross_entropy) self.total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss') correct_prediction = tf.equal(self.y_input, tf.argmax(self.logits, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def batch_loss(model, batch): predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias) return -tf.reduce_mean(tf.reduce_sum( tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
def compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: eval_bpp, mse, psnr, msssim, num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(eval_bpp)) print("Actual bits per pixel: {:0.4f}".format(bpp))
def train_crack_captcha_cnn(): start_time = time.time() output = crack_captcha_cnn() #损失函数 loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y)) #优化器: optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) #将最终的输出值转化为三维数组,高为4,宽为10,第三个维度(batch)自动计算, #每个二维数组的每一行就代表验证码每一位数字10各类别的概率值 predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]) #按照第三个维度索取每一个二维数组取得最大值的索引 max_idx_p = tf.argmax(predict, 2) #同时对真实值Y也索取其最大值的索引 max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2) #计算精确度 correct_pred = tf.equal(max_idx_p, max_idx_l) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #存储训练的数据 saver = tf.train.Saver(max_to_keep=1) #创建计算视图 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #max_acc = 0 step = 0 #用while循环迭代,直到精确度大于某一阈值,用for循环可以找到规定循环次数内的最高精度 while True: #每次训练选取128个样本 batch_x, batch_y = get_next_batch(128) _, loss_ = sess.run([optimizer, loss], feed_dict={ X: batch_x, Y: batch_y, keep_prob: 0.75 }) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), step, loss_) # 每10 step计算一次准确率 if step % 100 == 0: #每次选取128个样本用来验证 batch_x_test, batch_y_test = get_next_batch(128) acc = sess.run(accuracy, feed_dict={ X: batch_x_test, Y: batch_y_test, keep_prob: 1. }) print( u'***************************************************************第%s次的准确率为%s' % (step, acc)) #当精确度高于55%时就保存模型 if acc > 0.98: saver.save(sess, "models/" + str(modelNo) + "/" + str(modelNo) + ".model", global_step=step) print(time.time() - start_time) break step += 1
def __init__(self, linear_size, num_layers, residual, batch_norm, max_norm, batch_size, learning_rate, summaries_dir, predict_14=False, dtype=tf.float32): """Creates the linear + relu model Args linear_size: integer. number of units in each layer of the model num_layers: integer. number of bilinear blocks in the model residual: boolean. Whether to add residual connections batch_norm: boolean. Whether to use batch normalization max_norm: boolean. Whether to clip weights to a norm of 1 batch_size: integer. The size of the batches used during training learning_rate: float. Learning rate to start with summaries_dir: String. Directory where to log progress predict_14: boolean. Whether to predict 14 instead of 17 joints dtype: the data type to use to store internal variables """ # There are in total 17 joints in H3.6M and 16 in MPII (and therefore in stacked # hourglass detections). We settled with 16 joints in 2d just to make models # compatible (e.g. you can train on ground truth 2d and test on SH detections). # This does not seem to have an effect on prediction performance. self.HUMAN_2D_SIZE = 16 * 2 # In 3d all the predictions are zero-centered around the root (hip) joint, so # we actually predict only 16 joints. The error is still computed over 17 joints, # because if one uses, e.g. Procrustes alignment, there is still error in the # hip to account for! # There is also an option to predict only 14 joints, which makes our results # directly comparable to those in https://arxiv.org/pdf/1611.09010.pdf self.HUMAN_3D_SIZE = 14 * 3 if predict_14 else 16 * 3 self.input_size = self.HUMAN_2D_SIZE self.output_size = self.HUMAN_3D_SIZE self.isTraining = tf.placeholder(tf.bool, name="isTrainingflag") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Summary writers for train and test runs self.train_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'train')) self.test_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'test')) self.linear_size = linear_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype, name="learning_rate") self.global_step = tf.Variable(0, trainable=False, name="global_step") decay_steps = 100000 # empirical decay_rate = 0.96 # empirical self.learning_rate = tf.train.exponential_decay( self.learning_rate, self.global_step, decay_steps, decay_rate) # === Transform the inputs === with vs.variable_scope("inputs"): # in=2d poses, out=3d poses enc_in = tf.placeholder(dtype, shape=[None, self.input_size], name="enc_in") dec_out = tf.placeholder(dtype, shape=[None, self.output_size], name="dec_out") self.encoder_inputs = enc_in self.decoder_outputs = dec_out # === Create the linear + relu combos === with vs.variable_scope("linear_model"): # === First layer, brings dimensionality up to linear_size === w1 = tf.get_variable(name="w1", initializer=kaiming, shape=[self.HUMAN_2D_SIZE, linear_size], dtype=dtype) b1 = tf.get_variable(name="b1", initializer=kaiming, shape=[linear_size], dtype=dtype) w1 = tf.clip_by_norm(w1, 1) if max_norm else w1 y3 = tf.matmul(enc_in, w1) + b1 if batch_norm: y3 = tf.layers.batch_normalization(y3, training=self.isTraining, name="batch_normalization") y3 = tf.nn.relu(y3) y3 = tf.nn.dropout(y3, self.dropout_keep_prob) # === Create multiple bi-linear layers === for idx in range(num_layers): y3 = self.two_linear(y3, linear_size, residual, self.dropout_keep_prob, max_norm, batch_norm, dtype, idx) # === Last linear layer has HUMAN_3D_SIZE in output === w4 = tf.get_variable(name="w4", initializer=kaiming, shape=[linear_size, self.HUMAN_3D_SIZE], dtype=dtype) b4 = tf.get_variable(name="b4", initializer=kaiming, shape=[self.HUMAN_3D_SIZE], dtype=dtype) w4 = tf.clip_by_norm(w4, 1) if max_norm else w4 y = tf.matmul(y3, w4) + b4 # === End linear model === # Store the outputs here self.outputs = y self.loss = tf.reduce_mean(tf.square(y - dec_out)) self.loss_summary = tf.summary.scalar('loss/loss', self.loss) # To keep track of the loss in mm self.err_mm = tf.placeholder(tf.float32, name="error_mm") self.err_mm_summary = tf.summary.scalar("loss/error_mm", self.err_mm) # Gradients and update operation for training the model. opt = tf.train.AdamOptimizer(self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Update all the trainable parameters gradients = opt.compute_gradients(self.loss) self.gradients = [[] if i == None else i for i in gradients] self.updates = opt.apply_gradients(gradients, global_step=self.global_step) # Keep track of the learning rate self.learning_rate_summary = tf.summary.scalar( 'learning_rate/learning_rate', self.learning_rate) # To save the model self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3) W4 = tf.get_variable("W4", shape=[512, 512], initializer=tf.contrib.layers.xavier_initializer()) b4 = tf.Variable(tf.random_normal([512])) L4 = tf.nn.relu(tf.matmul(L3, W4) + b4) W5 = tf.get_variable("W5", shape=[512, 10], initializer=tf.contrib.layers.xavier_initializer()) b5 = tf.Variable(tf.random_normal([10])) hypothesis = tf.matmul(L4, W5) + b5 # define cost/loss & optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # initialize sess = tf.Session() sess.run(tf.global_variables_initializer()) # train my model for epoch in range(training_epochs): avg_cost = 0 total_batch = int(mnist.train.num_examples / batch_size) for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) feed_dict = {X: batch_xs, Y: batch_ys} c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
def finetune(sess, dataset, steps=-1, model_name='124M', model_dir='models', combine=50000, batch_size=1, learning_rate=0.0001, accumulate_gradients=5, restore_from='latest', run_name='run1', checkpoint_dir='checkpoint', sample_every=100, sample_length=1023, sample_num=1, multi_gpu=False, save_every=1000, print_every=1, max_checkpoints=1, use_memory_saving_gradients=False, only_train_transformer_layers=False, optimizer='adam', overwrite=False): """Finetunes the model on the given dataset. Adapted from https://github.com/nshepperd/gpt-2/blob/finetuning/train.py. See that file for parameter definitions. """ # assert model_name not in ['774M', '1558M'] or multi_gpu, "Currently, a modern single GPU cannot finetune the 774M GPT-2 model or larger." SAMPLE_DIR = 'samples' checkpoint_path = os.path.join(checkpoint_dir, run_name) def maketree(path): try: os.makedirs(path) except: pass maketree(checkpoint_path) files = [f for f in os.listdir(checkpoint_path)] for file in ['hparams.json', 'encoder.json', 'vocab.bpe']: try: shutil.copyfile(os.path.join(model_dir, model_name, file), os.path.join(checkpoint_path, file)) except FileNotFoundError as fnf_error: print( "You need to download the GPT-2 model first via download_gpt2()" ) raise (fnf_error) enc = encoder.get_encoder(checkpoint_path) hparams = model.default_hparams() with open(os.path.join(checkpoint_path, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if sample_length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) if model_name not in ['117M', '124M']: use_memory_saving_gradients = True only_train_transformer_layers = True accumulate_gradients = 1 context = tf.placeholder(tf.int32, [batch_size, None]) gpus = [] if multi_gpu: gpus = get_available_gpus() output = model.model(hparams=hparams, X=context, gpus=gpus) loss = tf.reduce_mean( input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) tf_sample = sample.sample_sequence(hparams=hparams, length=sample_length, context=context, batch_size=batch_size, temperature=1.0, top_k=40) all_vars = [v for v in tf.trainable_variables() if 'model' in v.name] train_vars = [v for v in all_vars if '/h' in v.name ] if only_train_transformer_layers else all_vars if optimizer == 'adam': opt = tf.train.AdamOptimizer(learning_rate=learning_rate) elif optimizer == 'sgd': opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) if accumulate_gradients > 1: opt = AccumulatingOptimizer(opt=opt, var_list=train_vars) opt_reset = opt.reset() opt_compute = opt.compute_gradients(loss) opt_apply = opt.apply_gradients() summary_loss = tf.summary.scalar('loss', opt_apply) else: opt_apply = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss, var_list=train_vars) summary_loss = tf.summary.scalar('loss', loss) summary_log = tf.summary.FileWriter(checkpoint_path) saver = tf.train.Saver(var_list=all_vars, max_to_keep=max_checkpoints) sess.run(tf.global_variables_initializer()) if restore_from == 'latest': ckpt = tf.train.latest_checkpoint(checkpoint_path) if ckpt is None: # Get fresh GPT weights if new run. ckpt = tf.train.latest_checkpoint( os.path.join(model_dir, model_name)) elif restore_from == 'fresh': ckpt = tf.train.latest_checkpoint(os.path.join(model_dir, model_name)) else: ckpt = tf.train.latest_checkpoint(restore_from) print('Loading checkpoint', ckpt) saver.restore(sess, ckpt) print('Loading dataset...') chunks = load_dataset(enc, dataset, combine) data_sampler = Sampler(chunks) print('dataset has', data_sampler.total_size, 'tokens') print('Training...') counter = 1 counter_path = os.path.join(checkpoint_path, 'counter') if os.path.exists(counter_path) and restore_from == 'latest': # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r') as fp: counter = int(fp.read()) + 1 counter_base = counter def save(): maketree(checkpoint_path) print('Saving', os.path.join(checkpoint_path, 'model-{}').format(counter - 1)) saver.save(sess, os.path.join(checkpoint_path, 'model'), global_step=counter - 1) with open(counter_path, 'w') as fp: fp.write(str(counter - 1) + '\n') def generate_samples(): context_tokens = data_sampler.sample(1) all_text = [] index = 0 while index < sample_num: out = sess.run(tf_sample, feed_dict={context: batch_size * [context_tokens]}) for i in range(min(sample_num - index, batch_size)): text = enc.decode(out[i]) text = '======== SAMPLE {} ========\n{}\n'.format( index + 1, text) all_text.append(text) index += 1 print(text) maketree(os.path.join(SAMPLE_DIR, run_name)) with open( os.path.join(SAMPLE_DIR, run_name, 'samples-{}').format(counter), 'w') as fp: fp.write('\n'.join(all_text)) def sample_batch(): return [data_sampler.sample(1024) for _ in range(batch_size)] if overwrite and restore_from == 'latest': for file in files: if file.startswith('model') or file.startswith('events'): os.remove(os.path.join(checkpoint_path, file)) save() avg_loss = (0.0, 0.0) start_time = time.time() if steps: steps = int(steps) try: while True: if steps > 0 and counter == (counter_base + steps): save() return if (counter - 1) % save_every == 0 and counter > 1: save() if (counter - 1) % sample_every == 0 and counter > 1: generate_samples() if accumulate_gradients > 1: sess.run(opt_reset) for _ in range(accumulate_gradients): sess.run(opt_compute, feed_dict={context: sample_batch()}) (v_loss, v_summary) = sess.run((opt_apply, summary_loss)) else: (_, v_loss, v_summary) = sess.run( (opt_apply, loss, summary_loss), feed_dict={context: sample_batch()}) summary_log.add_summary(v_summary, counter) if counter % print_every == 0: avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format(counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter += 1 except KeyboardInterrupt: print('interrupted') save()
import tensorflow.compat.v1 as tf x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]] y_data = [[0], [0], [0], [1], [1], [1]] # placeholders for a tensor that will be always fed. X = tf.placeholder(tf.float32, shape=[None, 2]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([2, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W) + b)) hypothesis = tf.sigmoid(tf.matmul(X, W) + b) # cost/loss function cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) # Accuracy computation # True if hypothesis > 0.5 else False predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) # Launch graph with tf.Session() as sess: # Initialize TensorFlow variables sess.run(tf.global_variables_initializer()) for step in range(10001): cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
def model_fn(features, labels, mode, params): """Build model for boundary detection. Args: features: (Tensor) of input features, i.e. images. labels: (Tensor) of ground truth labels. mode: (String) train/eval/predict modes. params: (Dict) of model training parameters. """ eval_metrics, train_op, loss = None, None, None host_call = None training = mode == tf.estimator.ModeKeys.TRAIN cfg = vgg_16_hed_config(add_v1net_early=FLAGS.add_v1net_early, add_v1net=FLAGS.add_v1net) vgg = VGG(cfg) predictions, endpoints = vgg.build_model(images=features["image"], is_training=training, preprocess=FLAGS.preprocess) # TODO(vveeraba): Add vgg restore checkpoint # Tile ground truth for 5 side outputs side_predictions = endpoints["side_outputs_fullres"] side_labels = tf.tile(labels["label"], [5, 1, 1, 1]) # output predictions if mode == tf.estimator.ModeKeys.PREDICT: sigmoid = tf.nn.sigmoid(predictions) predictions = { "predictions": predictions, "boundary_pred_map": sigmoid, } return tf.estimator.tpu.TPUEstimatorSpec(mode, predictions=predictions) # TODO(vveeraba): Change positive class weight below # Changing pos_weight to num_positive_samples / num_negative_samples pos_weight = 1.1 loss_fn = tf.nn.weighted_cross_entropy_with_logits xent = tf.nn.sigmoid_cross_entropy_with_logits loss_fuse = tf.reduce_mean( xent( logits=predictions, labels=labels["label"], )) loss_side = tf.reduce_mean( loss_fn(logits=side_predictions, labels=side_labels, pos_weight=pos_weight), ) loss = loss_side + loss_fuse + FLAGS.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'normalization' not in v.name ]) if training: global_step = tf.train.get_global_step() steps_per_epoch = params["num_train_steps_per_epoch"] learning_rate = build_learning_rate(FLAGS.learning_rate, global_step, steps_per_epoch, decay_factor=0.1, decay_epochs=25) fast_start = min(FLAGS.learning_rate * 100, 1e-4) fast_learning_rate = build_learning_rate(fast_start, global_step, steps_per_epoch, decay_factor=0.1, decay_epochs=10) optimizer = get_optimizer(learning_rate, FLAGS.optimizer, FLAGS.use_tpu) slow_vars = [var for var in vgg.model_vars if "v1net" not in var.name] fast_vars = list(set(vgg.model_vars).difference(set(slow_vars))) fast_optimizer = get_optimizer(fast_learning_rate, FLAGS.optimizer, FLAGS.use_tpu) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = optimizer.minimize(loss, global_step, var_list=slow_vars) fast_train_op = fast_optimizer.minimize(loss, global_step, var_list=fast_vars) train_op = tf.group([train_op, update_ops, fast_train_op]) gs_t = tf.reshape(global_step, [1]) lr_t = tf.reshape(learning_rate, [1]) fast_lr_t = tf.reshape(fast_learning_rate, [1]) loss_t = tf.reshape(loss, [1]) loss_side_t = tf.reshape(loss_side, [1]) loss_fuse_t = tf.reshape(loss_fuse, [1]) labels_t = labels["label"] preds_t = tf.nn.sigmoid(predictions) def host_call_fn(gs, lr, fast_lr, loss, loss_side, loss_fuse, lbls, preds): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. loss_side: `Tensor` with shape `[batch]` for the training side loss. loss_fuse: `Tensor` with shape `[batch]` for the training fused loss. img: `Tensor` of input images. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with tf.compat.v2.summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default(): with tf.compat.v2.summary.record_if(True): tf.compat.v2.summary.scalar('training/total_loss', loss[0], step=gs) tf.compat.v2.summary.scalar('training/side_loss', loss_side[0], step=gs) tf.compat.v2.summary.scalar('training/fuse_loss', loss_fuse[0], step=gs) tf.compat.v2.summary.scalar('training/learning_rate', lr[0], step=gs) tf.compat.v2.summary.scalar('training/fast_learning_rate', fast_lr[0], step=gs) tf.compat.v2.summary.image('training/predictions', 1 - preds, step=gs) tf.compat.v2.summary.image('training/labels', lbls, step=gs) return tf.summary.all_v2_summary_ops() host_call_args = [ gs_t, lr_t, fast_lr_t, loss_t, loss_side_t, loss_fuse_t, labels_t, preds_t ] host_call = (host_call_fn, host_call_args) if mode == tf.estimator.ModeKeys.EVAL: # Define evaluation metrics: def metric_fn(labels, logits): xent = tf.nn.sigmoid_cross_entropy_with_logits xentropy = tf.reduce_mean(xent( logits=logits, labels=labels, )) rmse = tf.metrics.root_mean_squared_error(labels=labels, predictions=logits) return { 'xentropy': xentropy, 'rmse': rmse, } eval_metrics = (metric_fn, [labels["label"], predictions]) return tf.estimator.tpu.TPUEstimatorSpec( train_op=train_op, mode=mode, loss=loss, eval_metrics=eval_metrics, host_call=host_call, )
def create(self): tf.reset_default_graph() self.weight_bias_init() self.x_ph = tf.placeholder("float32", [1, self.batch.shape[0], self.batch.shape[1]]) self.y_ph = tf.placeholder("float32", self.batch_targ.shape) self.seq=tf.constant(self.truncated,shape=[1]) self.seq2=tf.constant(self.truncated,shape=[1]) self.dropout_ph = tf.placeholder("float32") self.fw_cell=self.cell_create('1') self.fw_cell2=self.cell_create('2') if self.configuration=='R': self.outputs, self.states= tf.nn.dynamic_rnn(self.fw_cell, self.x_ph, sequence_length=self.seq,dtype=tf.float32) if self.attention_number >0: self.outputs_zero_padded=tf.pad(self.outputs,[[0,0],[self.attention_number,0],[0,0]]) self.RNNout1=tf.stack([tf.reshape(self.outputs_zero_padded[:,g:g+(self.attention_number+1)],[self.n_hidden[(len(self.n_hidden)-1)]*((self.attention_number)+1)]) for g in range(self.batch_size)]) self.presoft=tf.matmul(self.RNNout1, self.weights) + self.biases else: self.presoft=tf.matmul(self.outputs[0][0], self.weights) + self.biases elif self.configuration=='B': self.bw_cell=self.cell_create('1') self.bw_cell2=self.cell_create('2') with tf.variable_scope('1'): self.outputs, self.states= tf.nn.bidirectional_dynamic_rnn(self.fw_cell, self.bw_cell, self.x_ph, sequence_length=self.seq,dtype=tf.float32) self.first_out=tf.concat((self.outputs[0],self.outputs[1]),2) with tf.variable_scope('2'): self.outputs2, self.states2= tf.nn.bidirectional_dynamic_rnn(self.fw_cell2, self.bw_cell2, self.first_out, sequence_length=self.seq2,dtype=tf.float32) self.second_out=tf.concat((self.outputs2[0],self.outputs2[1]),2) for i in range((self.attention_number*2)+1): self.attention_weight_init(i) self.zero_pad_second_out=tf.pad(tf.squeeze(self.second_out),[[self.attention_number,self.attention_number],[0,0]]) # self.attention_chunks.append(self.zero_pad_second_out[j:j+attention_number*2]) self.attention_m=[tf.tanh(tf.matmul(tf.concat((self.zero_pad_second_out[j:j+self.batch_size],tf.squeeze(self.first_out)),1),self.attention_weights[j])) for j in range((self.attention_number*2)+1)] self.attention_s=tf.nn.softmax(tf.stack([tf.matmul(self.attention_m[i],self.sm_attention_weights[i]) for i in range(self.attention_number*2+1)]),0) self.attention_z=tf.reduce_sum([self.attention_s[i]*self.zero_pad_second_out[i:self.batch_size+i] for i in range(self.attention_number*2+1)],0) self.presoft=tf.matmul(self.attention_z,self.weights)+self.biases if self.output_act=='softmax': self.pred=tf.nn.softmax(self.presoft) self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph)) elif self.output_act=='sigmoid': self.pred=tf.nn.sigmoid(self.presoft) self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph)) if self.optimizer == 'GD': self.optimize = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.cost) elif self.optimizer == 'Adam': self.optimize = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost) elif self.optimizer == 'RMS': self.optimize = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.cost) self.correct_pred = tf.equal(tf.argmax(self.pred,1), tf.argmax(self.y_ph,1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32)) self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() self.saver_var = tf.train.Saver(tf.trainable_variables()) if self.save_location==[]: self.save_location=os.getcwd()
def _build_model(self): assert self.mode == 'train' or self.mode == 'eval' """Build the core model within the graph.""" with tf.variable_scope('input'): self.x_input = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) self.y_input = tf.placeholder(tf.int64, shape=None) input_standardized = tf.map_fn( lambda img: tf.image.per_image_standardization(img), self.x_input) x = self._conv('init_conv', input_standardized, 3, 3, 16, self._stride_arr(1)) strides = [1, 2, 2] activate_before_residual = [True, False, False] res_func = self._residual # wide residual network (https://arxiv.org/abs/1605.07146v1) # use filters = [16, 16, 32, 64] for a non-wide version filters = [16, 160, 320, 640] # Update hps.num_residual_units to 9 # NOTE: Variable_scope might be an issue? Prob doesn't exist in TF2. with tf.variable_scope('unit_1_0'): x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]), activate_before_residual[0]) for i in range(1, 5): with tf.variable_scope('unit_1_%d' % i): x = res_func(x, filters[1], filters[1], self._stride_arr(1), False) with tf.variable_scope('unit_2_0'): x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]), activate_before_residual[1]) for i in range(1, 5): with tf.variable_scope('unit_2_%d' % i): x = res_func(x, filters[2], filters[2], self._stride_arr(1), False) with tf.variable_scope('unit_3_0'): x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]), activate_before_residual[2]) for i in range(1, 5): with tf.variable_scope('unit_3_%d' % i): x = res_func(x, filters[3], filters[3], self._stride_arr(1), False) with tf.variable_scope('unit_last'): x = self._batch_norm('final_bn', x) x = self._relu(x, 0.1) x = self._global_avg_pool(x) with tf.variable_scope('logit'): self.pre_softmax = self._fully_connected(x, 10) self.predictions = tf.argmax(self.pre_softmax, 1) self.correct_prediction = tf.equal(self.predictions, self.y_input) self.num_correct = tf.reduce_sum( tf.cast(self.correct_prediction, tf.int64)) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) with tf.variable_scope('costs'): self.y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.pre_softmax, labels=self.y_input) self.xent = tf.reduce_sum(self.y_xent, name='y_xent') self.mean_xent = tf.reduce_mean(self.y_xent) self.weight_decay_loss = self._decay()
def _global_avg_pool(self, x): assert x.get_shape().ndims == 4 return tf.reduce_mean(x, [1, 2])
def body(self, features): hp = self.hparams is_distill = hp.distill_phase == "distill" targets = features["targets_raw"] targets = tf.squeeze(targets, [1, 2, 3]) one_hot_targets = tf.one_hot(targets, hp.num_classes, dtype=tf.float32) # Teacher Network with tf.variable_scope("teacher"): teacher_outputs = self.teacher_model.body(features) tf.logging.info("teacher output shape: %s" % teacher_outputs.get_shape()) teacher_outputs = tf.reduce_mean(teacher_outputs, axis=[1, 2]) teacher_logits = tf.layers.dense(teacher_outputs, hp.num_classes) teacher_task_xent = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_targets, logits=teacher_logits) outputs = teacher_logits if is_distill: # Load teacher weights tf.train.init_from_checkpoint(hp.teacher_dir, {"teacher/": "teacher/"}) # Do not train the teacher trainable_vars = tf.get_collection_ref( tf.GraphKeys.TRAINABLE_VARIABLES) del trainable_vars[:] # Student Network if is_distill: with tf.variable_scope("student"): student_outputs = self.student_model.body(features) tf.logging.info("student output shape: %s" % student_outputs.get_shape()) student_outputs = tf.reduce_mean(student_outputs, axis=[1, 2]) student_logits = tf.layers.dense(student_outputs, hp.num_classes) student_task_xent = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_targets, logits=student_logits) teacher_targets = tf.nn.softmax(teacher_logits / hp.distill_temperature) student_distill_xent = tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(teacher_targets), logits=student_logits / hp.distill_temperature) # scale soft target obj. to match hard target obj. scale student_distill_xent *= hp.distill_temperature**2 outputs = student_logits # Summaries tf.summary.scalar("distill_xent", student_distill_xent) if not is_distill: phase_loss = teacher_task_xent else: phase_loss = hp.task_balance * student_task_xent phase_loss += (1 - hp.task_balance) * student_distill_xent losses = {"training": phase_loss} outputs = tf.reshape(outputs, [-1, 1, 1, 1, outputs.shape[1]]) return outputs, losses
def _calc_loudness_loss(self, gt_lds, pred_lds): list_difference = [gt_lds[:, i, :] - pred_lds[i] for i in range(self.C)] return tf.reduce_mean(tf.abs(list_difference))
def build_model(batch, seq_len, vocab_size, d_model, head): input_tensor = tf.placeholder(shape=(batch, seq_len, d_model), dtype=tf.int32) mask_tensor = tf.placeholder(shape=(batch, seq_len), dtype=tf.float32) # We are not using embedding here input_ids = tf.cast(input_tensor, tf.float32) # Add positional encoding. We use static positional encoding here. if USE_POSITIONAL_ENCODING: pos_enc = generate_position_embedding(input_len=seq_len, d_model=d_model) pos_enc = tf.constant(pos_enc, dtype=tf.float32) input_ids = input_ids + pos_enc # Convert input to 2D tensor input_batch = tf.reshape(input_ids, (-1, d_model)) # Transform input to Q, K and V tensor size_per_head = int(d_model / head) K = tf.layers.dense(input_batch, size_per_head * head, name='K') Q = tf.layers.dense(input_batch, size_per_head * head, name='Q') V = tf.layers.dense(input_batch, size_per_head * head, name='V') # [Batch, Head, Len, Size_per_Head] K = transpose_for_scores(K, batch, head, seq_len, size_per_head) Q = transpose_for_scores(Q, batch, head, seq_len, size_per_head) V = transpose_for_scores(V, batch, head, seq_len, size_per_head) # Scaled Dot-Product attention [Batch, Head, Len-Q, Len-K] attention_scores = tf.matmul(Q, K, transpose_b=True) attention_scores = tf.multiply(attention_scores, 1.0 / math.sqrt(float(size_per_head))) # Generate attention mask to prevent attention to padding tokens to_mask = tf.reshape(mask_tensor, [batch, 1, seq_len]) broadcast_ones = tf.ones(shape=[batch, seq_len, 1], dtype=tf.float32) # Attention mask [Batch, Len, Len] attention_mask = broadcast_ones * to_mask # `attention_mask` = [Batch, 1, Len, Len] attention_mask = tf.expand_dims(attention_mask, axis=[1]) # Make adding -10000.0 to attention of padding tokens adder = (1.0 - attention_mask) * -10000.0 attention_scores += adder # `attention_probs` = [Batch, Head, Len, Len] attention_probs = tf.nn.softmax(attention_scores) # `context_layer` = [Batch, Head, Len-Q, Size_per_Head] context_layer = tf.matmul(attention_probs, V) # `context_layer` = [Batch, Len-Q, Head, Size_per_Head] context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) # Also calculate cost of attention head output difference here. disagreement_cost = get_attention_heads_disagreement_cost( context_layer) # `output_tensor` = [Batch x Len-Q, Head x Size_per_Head = D_Model] output_tensor = tf.reshape(context_layer, [batch * seq_len, head * size_per_head]) # Final linear projection. Note that this weight has permutation set divided by row instead of column as in K/Q/V output_tensor = tf.layers.dense(output_tensor, d_model, name='output') # `output_tensor` = [Batch, Len-Q, Head x Size_per_Head = D_Model] output_tensor = tf.reshape(output_tensor, [batch, seq_len, head * size_per_head]) # Pooled output is the 1st dimension of each hidden state of all tokens pooled_output_tensor = tf.reduce_mean( output_tensor, axis=-1) # output_tensor[:, :, 0] # Add binary classification layers logprob_tensor = tf.nn.sigmoid(pooled_output_tensor, name='sigmoid') return (input_tensor, mask_tensor, pooled_output_tensor, disagreement_cost, logprob_tensor, attention_probs)
def _build_graph(self): # audios: [batch_size, max_len] audios, f0s, loudness = self.dataloader.get_next() input_audio = audios[:, 0, :] self.single_audios = single_audios = tf.unstack( audios[:, 1:, :], axis=1) with tf.variable_scope("encoder"): # encoded_input: [batch_size, some len, N] encoded_input = self.layers["conv1d_encoder"]( inputs=tf.expand_dims(input_audio, -1)) self.encoded_len = int(4 * self.sample_rate // self.L) with tf.variable_scope("bottleneck"): # norm_input: [batch_size, some len, N] norm_input = self._channel_norm(encoded_input, "bottleneck") # block_input: [batch_size, some len, B] block_input = self.layers["bottleneck"](norm_input) for r in range(self.R): for x in range(self.X): now_block = "block_{}_{}_".format(r, x) with tf.variable_scope(now_block): block_output = self.layers[now_block + "first_1x1_conv"](block_input) block_output = self.layers[now_block + "first_PReLU"](block_output) block_output = self._global_norm(block_output, "first") block_output = self._depthwise_conv1d(block_output, x) block_output = self.layers[now_block + "second_PReLU"](block_output) block_output = self._global_norm(block_output, "second") block_output = self.layers[now_block + "second_1x1_conv"](block_output) block_input = block_output = block_output + block_input sep_output_list = [ self.layers["1x1_conv_decoder_{}".format(i)](block_output) for i in range(self.C) ] # softmax probs = tf.nn.softmax(tf.stack(sep_output_list, axis=-1)) prob_list = tf.unstack(probs, axis=-1) # C, B, T, N sep_output_list = [mask * encoded_input for mask in prob_list] # C, B, T, 128 f0_deconved = [ self.layers["f0_deconv"](sep_output) for sep_output in sep_output_list ] T_TO_F = (self.sample_rate // self.frame_rate) // self.L # C, B, F, 128 if T_TO_F > 1: f0_deconved = [y[:, ::T_TO_F, :] for y in f0_deconved] # C, B, T loudness_deconved = [ tf.squeeze(self.layers["loudness_deconv"](sep_output), axis=-1) for sep_output in sep_output_list ] if T_TO_F > 1: loudness_deconved = [y[:, ::T_TO_F] for y in loudness_deconved] probs = [tf.nn.softplus(y) + 1e-3 for y in f0_deconved] probs = [prob / tf.reduce_sum(prob, axis=-1, keepdims=True) for prob in probs] output_f0s = tf.squeeze(self._compute_f0_hz(probs), axis=-1) # C, B, F output_loudnesses = loudness_deconved self.outputs = (output_f0s, output_loudnesses) f0_loss = self._calc_f0_loss(f0s, output_f0s) loudness_loss = self._calc_loudness_loss(loudness, output_loudnesses) f0_diff = [tf.reduce_mean(tf.abs(f0s[:, i, :] - output_f0s[i, :, :])) for i in range(self.C)] ld_diff = [tf.reduce_mean(tf.abs(loudness[:, i, :] - output_loudnesses[i])) for i in range(self.C)] self.losses = (f0_diff, ld_diff) self.loss = self.weight_f0 * f0_loss + (1.0 - self.weight_f0) * loudness_loss self.inputs = (f0s, loudness)
train_x, train_y = get_data_set("train") test_x, test_y = get_data_set("test") tf.set_random_seed(idx) train_start = time() filename = 'tanh2_gaussian_L50_N400_lr_%d' % (idx) file_ = open("{}.csv".format(filename), 'a+') _MARK = 0 sess = tf.Session() global_accuracy = 0 epoch_start = 0 x, y, output, y_pred_cls, global_step, learning_rate = model() # loss = tf.reduce_mean(tf.reduce_sum((y-output)**2,reduction_indices=[1])) loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(output), reduction_indices=[1])) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) correct_prediction = tf.equal(y_pred_cls, tf.argmax(y, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init = tf.global_variables_initializer() sess.run(init) print("\n Learning rate:{}".format(lri)) for i in range(_EPOCH): if _MARK == 1: print('exist epoch loop now') break print("\nEpoch: {}/{}\n".format((i + 1), _EPOCH))
def main(): # Reset Graph tf.reset_default_graph() # Load Data DATA_FILE = "cifar_10_tf_train_test.pkl" train_x,train_y, test_x, test_y = loadData(DATA_FILE) test_y_np = np.array(test_y) print("Train X size:\t", train_x.shape) print("Train Y size:\t", len(train_y)) print("Test X size:\t", test_x.shape) print("Test Y size:\t", len(test_y)) """ imgplot = plt.imshow(data_list[0][0]) plt.colorbar() plt.show() """ # Hyper Parameters batch_size = 100 num_epochs = 3000 learning_rate = .005 # Convolution Layer1 filter_size1 = 5 num_filters1 = 32 # Convolution Layer2 filter_size2 = 5 num_filters2 = 32 # Convolution Layer3 filter_size3 = 3 num_filters3 = 64 # Dimensions of Data img_size = 32 img_depth = 3 # number of channels in the image (red,blue,green) img_size_flat = 32*32*img_depth img_shape = (img_size,img_size,img_depth) num_classes = 10 # Initializers xavier_init = tf.initializers.glorot_normal() #xavier_init = tf.contrib.layers.xavier_initializer() zero_init = tf.zeros_initializer() # Input Variables input_img = tf.placeholder(dtype=tf.uint8, shape=[None, img_size, img_size, img_depth], name="input_img") y = tf.placeholder(dtype=tf.int64, shape=[None], name="labels") # Normalization x = tf.image.convert_image_dtype(input_img,dtype="float32") x = tf.math.divide(x,255) mean = tf.math.reduce_mean(x,0) x = tf.math.subtract(x,mean) y_true = tf.one_hot(y, 10,dtype="float32") # Filters,Weights, and Biases F1_shape = [filter_size1,filter_size1,img_depth,num_filters1] F1 = tf.get_variable(shape=F1_shape, dtype='float32', initializer=xavier_init, name="filter1") F1_bias = tf.get_variable(shape=[num_filters1],dtype='float32', initializer=zero_init, name="filter_bias1") F2_shape = [filter_size2,filter_size2,num_filters1,num_filters2] F2 = tf.get_variable(shape=F2_shape, dtype='float32', initializer=xavier_init, name="filter2") F2_bias = tf.get_variable(shape=[num_filters2],dtype='float32', initializer=zero_init, name="filter_bias2") F3_shape = [filter_size3,filter_size3,num_filters2,num_filters3] F3 = tf.get_variable(shape=F3_shape, dtype='float32', initializer=xavier_init, name="filter3") F3_bias = tf.get_variable(shape=[num_filters3],dtype='float32', initializer=zero_init, name="filter_bias3") weights_fc = tf.get_variable(shape=[576,num_classes] , dtype="float32", initializer=xavier_init, name="weightsfc") bias_fc = tf.get_variable(shape=[10] , dtype="float32", initializer=zero_init, name="biasfc") # Forward Propagation conv_layer1 = tf.nn.leaky_relu(tf.nn.conv2d(x, filters=F1, strides=[1,1,1,1],padding="VALID") + F1_bias) pool1 = tf.nn.pool(conv_layer1, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID") conv_layer2 = tf.nn.leaky_relu(tf.nn.conv2d(pool1, filters=F2, strides=[1,1,1,1],padding="VALID") + F2_bias) pool2 = tf.nn.pool(conv_layer2, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID") conv_layer3 = tf.nn.leaky_relu(tf.nn.conv2d(pool2, filters=F3, strides=[1,1,1,1],padding="VALID") + F3_bias) # Vectorize Final Convolution conv_vector = tf.layers.flatten(conv_layer3) print(conv_layer1.get_shape()) print(conv_layer2.get_shape()) print(conv_layer3.get_shape()) print(conv_vector.get_shape()) # Fully Connected Layer logits = tf.matmul(conv_vector, weights_fc)+bias_fc softmax_op = tf.nn.softmax(logits) predict_lbl = tf.argmax(softmax_op, axis=1, name='predict_lbl') # Cost Function cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits, name=None) correct_prediction = tf.equal(predict_lbl, y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) cost = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Create the collection. tf.get_collection("validation_nodes") # Add stuff to the collection. tf.add_to_collection("validation_nodes", input_img) tf.add_to_collection("validation_nodes", predict_lbl) # start training saver = tf.train.Saver() # Plot Variables cost_list = [] train_accuracy_list = [] test_accuracy_list = [] test_accuracy_cls = {} start_time = time.time() # Initialize the Graph init = tf.global_variables_initializer() with tf.Session() as sess: print("\n\n\n") sess.run(init) index = 0 trained_set = set() for e in range(num_epochs): time.sleep(.1) indlimit = train_x.shape[0]-batch_size index = random.randint(0,indlimit) for i in range(index,index+batch_size):trained_set.add(int(i)) x_batch = train_x[index: index+batch_size] y_batch = train_y[index: index+batch_size] permutation = np.random.permutation(len(y_batch)) x_batch = x_batch[permutation,:] y_batch = np.asarray(y_batch)[permutation] sess.run(optimizer, feed_dict={input_img:x_batch, y:y_batch}) # Store Values for plots cost_list.append(sess.run(cost, feed_dict={input_img:x_batch, y:y_batch})) train_accuracy_list.append(sess.run(accuracy, feed_dict={input_img:x_batch, y:y_batch})) if(e%100==0): print("Iteration:\t", e) print("Index Start:\t",index) print("Len Trained Set:", len(trained_set)) predict_test = sess.run(predict_lbl, feed_dict={input_img:test_x}) test_accuracy = np.sum(predict_test==test_y_np)/5000 test_accuracy_list.append(test_accuracy) print("Test Accuracy:", test_accuracy) print() # this saver.save() should be within the same tf.Session() after the training is conv1_filters = sess.run(F1, feed_dict={input_img:x_batch}) conv1_filter_images = ((conv1_filters + 0.1) * (1/0.3) * 255).astype('uint8') save_path = saver.save(sess, "my_model") for pred in range(len(predict_test)): if test_y_np[pred] not in test_accuracy_cls: test_accuracy_cls[test_y_np[pred]]={} test_accuracy_cls[test_y_np[pred]]["correct"] = 0 test_accuracy_cls[test_y_np[pred]]["total"] = 0 test_accuracy_cls[test_y_np[pred]]["total"] += 1 if(test_y_np[pred]==predict_test[pred]): test_accuracy_cls[test_y_np[pred]]["total"] += 1 print(test_accuracy_cls) end_time = time.time() print("Time Ellapsed:", end_time-start_time) plt.plot(range(0,len(train_accuracy_list)), train_accuracy_list) plt.title('Total Training Accuracy') plt.xlabel('Iterations') plt.ylabel('%Accuracy') plt.show() plt.plot(range(0,len(cost_list)), cost_list) plt.title('Total Error/Cost') plt.xlabel('Iterations') plt.ylabel('Cost') plt.show() plt.plot(range(0,len(test_accuracy_list)), test_accuracy_list) plt.title('Total Test Accuracy') plt.xlabel('Iterations') plt.ylabel('%Accuracy') plt.show() # TODO: plot filters print(conv1_filter_images.shape) conv1_filter_images = conv1_filter_images.T for i in range(32): plt.subplot(4,8,i+1) plt.title("Filter "+str(i+1),fontsize=6) plt.axis("off") plt.imshow(conv1_filter_images[i].T) plt.show()
def __init__(self, load_path=LOAD_PATH, meta_path=META_PATH, keep_features=True, required_sample_size=10000, num_splits=5, do_kdsd=True, do_conditional_dsds=True, sample_freq=24000): """ args: load_path: Path to DeepSpeech2 model checkpoint. meta_path: Path to DeepSpeech2 meta graph file. keep_features: If True, reference and benchmark features will be kept in memory for faster evaluation of future samples. required_sample_size: Mimimum sample size required for computation. Double of this number of samples is required from reference (real data) sample to compute benchmark. num_splits: Computation of FDSD and cFDSD will compute mean and std of distance based on results from this number of independent runs. do_kdsd: If True, Kernel distances (KDSD, cKDSD) will also be computed. do_conditional_dsds: If True, conditional distances will be computed. sample_freq: Audio sample frequency. """ self.load_path = load_path self.meta_path = meta_path self.batch_size = 16 # Fixed in DeepSpeech2 graph. self.keep_features = keep_features self.kept_features = {} self.do_kdsd = do_kdsd self.do_conditional_dsds = do_conditional_dsds self.sample_freq = sample_freq self.input_tensors = [ 'IteratorGetNext:0', 'IteratorGetNext:1', 'IteratorGetNext:2' ] self.output_tensor = 'ForwardPass/ds2_encoder/Reshape_2:0' self._restored = False mult = num_splits * self.batch_size if required_sample_size // mult < 1: raise ValueError( f"Too small sample size ({required_sample_size}) for " f"given batch size ({self.batch_size}) and number of " f"splits ({num_splits}.") self.required_sample_size = (required_sample_size // mult) * mult self.saver = tf.train.import_meta_graph(meta_path) self.sess_config = tf.ConfigProto(allow_soft_placement=True) self.sess_config.gpu_options.allow_growth = True shape = (self.required_sample_size, 1600) self.ref_features = tf.placeholder(tf.float32, shape=shape, name='ref_features') self.eval_features = tf.placeholder(tf.float32, shape=shape, name='eval_features') zipped = zip(tf.split(self.ref_features, num_splits), tf.split(self.eval_features, num_splits)) dists = [frechet_dist(ref, ev) for ref, ev in zipped] self.dists = [(tf.reduce_mean(dists), tf.math.reduce_std(dists))] if self.do_kdsd: self.dists += [ kernel_dist(self.ref_features, self.eval_features, dtype=tf.float32) ] self.real_data = None self.real_data_benchmarks = None
def train(args): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format( args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle( buffer_size=len(train_files)).repeat() train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) num_pixels = args.batchsize * args.patchsize**2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Build autoencoder and hyperprior. y = analysis_transform(x) z = hyper_analysis_transform(abs(y)) z_tilde, z_likelihoods = entropy_bottleneck(z, training=True) sigma = hyper_synthesis_transform(z_tilde) scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) y_tilde, y_likelihoods = conditional_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde) # Total number of bits divided by number of pixels. train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255**2 # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", train_bpp) tf.summary.scalar("mse", train_mse) tf.summary.image("original", quantize_image(x)) tf.summary.image("reconstruction", quantize_image(x_tilde)) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] with tf.train.MonitoredTrainingSession(hooks=hooks, checkpoint_dir=args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
def is_duplicate(endpoints): """Implements a simple duplicate filter, based on L1 difference in RGB.""" return tf.greater( tf.reduce_mean(tf.abs(endpoints['rgb'][1] - endpoints['rgb'][0])), params.input.duplicates_filter_threshold)
def _build_net(self): with tf.variable_scope("Actor"+self.suffix, reuse=tf.AUTO_REUSE): with tf.name_scope('inputs'+self.suffix): self.tf_obs = tf.placeholder(tf.float32, [None, self.n_features], name='observation'+self.suffix) self.tf_acts = tf.placeholder(tf.int32, [None, ], name='actions_num'+self.suffix) self.tf_vt = tf.placeholder(tf.float32, [None, ], name='actions_value'+self.suffix) self.tf_safe = tf.placeholder(tf.float32, [None, ], name='safety_value'+self.suffix) self.entropy_weight = tf.placeholder(tf.float32, shape=(), name='entropy_weight_clustering'+self.suffix) ##### PPO change ##### self.ppo_ratio = tf.placeholder(tf.float32, [None, ], name='ppo_ratio'+self.suffix) ##### PPO change ##### layer = tf.layers.dense( inputs=self.tf_obs, units=128, activation=tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), # kernel_initializer=tf.orthogonal_initializer(gain=np.sqrt(2.)), # ppo default initialization bias_initializer=tf.constant_initializer(0.1), name='fc1'+self.suffix ) all_act = tf.layers.dense( inputs=layer, units=self.n_actions, activation=None, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), # kernel_initializer=tf.orthogonal_initializer(gain=np.sqrt(2.)), # ppo default initialization bias_initializer=tf.constant_initializer(0.1), name='fc2'+self.suffix ) print("kernel_initializer: random_initializer") self.trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor'+self.suffix) self.trainable_variables_shapes = [var.get_shape().as_list() for var in self.trainable_variables] print("trainable_variables_shapes", self.trainable_variables_shapes) # sampling self.all_act_prob = tf.nn.softmax(all_act, name='act_prob'+self.suffix) self.all_act_prob = tf.clip_by_value(self.all_act_prob, 1e-20, 1.0) with tf.name_scope('loss'+self.suffix): neg_log_prob = tf.reduce_sum(-tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * tf.one_hot(indices=self.tf_acts, depth=self.n_actions), axis=1) loss = tf.reduce_mean(neg_log_prob * self.tf_vt) loss += self.entropy_weight * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) self.entro = self.entropy_weight * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) self.loss = loss with tf.name_scope('train' + self.suffix): self.train_op = tf.train.AdamOptimizer(self.pg_lr).minimize(loss) # safety loss """ * -1? """ self.chosen_action_log_probs = tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * tf.one_hot(indices=self.tf_acts, depth=self.n_actions), axis=1) ##### PPO CHANGE ##### self.ppo_old_chosen_action_log_probs = tf.placeholder(tf.float32, [None]) ##### PPO CHANGE ##### self.old_chosen_action_log_probs = tf.stop_gradient(tf.placeholder(tf.float32, [None])) # self.each_safety_loss = tf.exp(self.chosen_action_log_probs - self.old_chosen_action_log_probs) * self.tf_safe self.each_safety_loss = (tf.exp(self.chosen_action_log_probs) - tf.exp(self.old_chosen_action_log_probs)) * self.tf_safe self.average_safety_loss = tf.reduce_mean(self.each_safety_loss) #/ self.n_episodes tf.reduce_sum # self.average_safety_loss +=self.entro # KL D self.old_all_act_prob = tf.stop_gradient(tf.placeholder(tf.float32, [None, self.n_actions])) def kl(x, y): EPS = 1e-10 x = tf.where(tf.abs(x) < EPS, EPS * tf.ones_like(x), x) y = tf.where(tf.abs(y) < EPS, EPS * tf.ones_like(y), y) X = tf.distributions.Categorical(probs=x + EPS) Y = tf.distributions.Categorical(probs=y + EPS) return tf.distributions.kl_divergence(X, Y, allow_nan_stats=False) self.each_kl_divergence = kl(self.all_act_prob, self.old_all_act_prob) # tf.reduce_sum(kl(self.all_act_prob, self.old_all_act_prob), axis=1) self.average_kl_divergence = tf.reduce_mean(self.each_kl_divergence) # self.kl_gradients = tf.gradients(self.average_kl_divergence, self.trainable_variables) # useless self.desired_kl = desired_kl # self.metrics = [self.loss, self.average_kl_divergence, self.average_safety_loss, self.entro] # Luping self.metrics = [self.loss, self.loss, self.average_safety_loss, self.entro] # Luping # FLat self.flat_params_op = get_flat_params(self.trainable_variables) """not use tensorflow default function, here we calculate the gradient by self: (1) loss: g (2) kl: directional_gradients (math, fisher) (3) safe: b """ ##### PPO change ##### #### PPO Suyi's Change #### with tf.name_scope('ppoloss' + self.suffix): self.ppo_ratio = tf.exp(self.chosen_action_log_probs - self.ppo_old_chosen_action_log_probs) # self.ppo_ratio = tf.Print(self.ppo_ratio, [self.ppo_ratio], "self.ppo_ratio: ") surr = self.ppo_ratio * self.tf_vt self.ppoloss = -tf.reduce_mean(tf.minimum( surr, tf.clip_by_value(self.ppo_ratio, 1.- self.clip_eps, 1.+ self.clip_eps) * self.tf_vt)) self.ppoloss += self.entropy_weight * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) # self.ppoloss += 0.01 * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) with tf.variable_scope('ppotrain'): # self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(self.ppoloss) self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(self.ppoloss) #### PPO Suyi's Change #### self.ppoloss_flat_gradients_op = get_flat_gradients(self.ppoloss, self.trainable_variables) ##### PPO change ##### self.loss_flat_gradients_op = get_flat_gradients(self.loss, self.trainable_variables) self.kl_flat_gradients_op = get_flat_gradients(self.average_kl_divergence, self.trainable_variables) self.constraint_flat_gradients_op = get_flat_gradients(self.average_safety_loss, self.trainable_variables) self.vec = tf.placeholder(tf.float32, [None]) self.fisher_product_op = self.get_fisher_product_op() self.new_params = tf.placeholder(tf.float32, [None]) self.params_assign_op = assign_network_params_op(self.new_params, self.trainable_variables, self.trainable_variables_shapes)
def _build(self, optimizer=None): # Generate mean and var from encoder z_mu, z_logvar = self.Encoder() # Sample z~N(mu, var) with tf.name_scope('sample_z'): eps = tf.random_normal(shape=tf.shape(z_mu)) z_sample = z_mu + tf.exp(z_logvar / 2) * eps # Link decoder for training and output _, logits = self.Decoder(z_sample, with_logits=True) self._P = self.Decoder() # Check output shape of decoder p_shape = self._P.get_shape().as_list()[1:] q_shape = self.Q.input_tensor.get_shape()[1:] if p_shape != q_shape: raise ValueError('Output shape of decoder {} does not match the input ' 'shape of encoder {}'.format(p_shape, q_shape)) # Define the output tensor if self._output_shape is None: self._output_shape = p_shape if p_shape == self._output_shape: self._outputs = self._P else: self._outputs = tf.reshape( self._P, shape=[-1] + self._output_shape, name='outputs') # Define loss with tf.name_scope('Losses'): # E[log P(X|z)] recon_loss = tf.reduce_mean(tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=self.Q.input_tensor), 1)) # recon_loss = tf.norm # D_KL(Q(z|X) || P(z|X)) kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum( tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)) # VAE loss vae_loss = recon_loss + kl_loss self._loss = vae_loss # Add summaries with tf.name_scope('Summaries'): self._merged_summary = tf.summary.merge([ tf.summary.scalar('recon_loss', recon_loss), tf.summary.scalar('kl_loss', kl_loss), tf.summary.scalar('vae_loss', vae_loss)]) # Check optimizer if optimizer is None: optimizer = tf.train.AdamOptimizer() # Define training step with tf.name_scope('Train_Step'): self._train_step = optimizer.minimize(vae_loss) # Print status and model structure self._show_building_info(Encoder=self.Q, Decoder=self.P) # Set default snapshot function TODO self._snapshot_function = self._default_snapshot_function # Launch session self.launch_model(overwrite=hub.overwrite)
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() xData = [1, 2, 3, 4, 5, 6, 7] yData = [25000, 55000, 75000, 110000, 128000, 155000, 180000] # -100 ~ 100 사이의 랜덤 값 W = tf.Variable(tf.random_uniform([1], -100, 100)) b = tf.Variable(tf.random_uniform([1], -100, 100)) X = tf.placeholder(tf.float32) Y = tf.placeholder(tf.float32) H = W * X + b # cost : 비용 # reduce_mean : 평균 값; square : 제곱 cost = tf.reduce_mean(tf.square(H - Y)) # 경사하강 그래프에서 얼마만큼 이동(점프)할 지 a = tf.Variable(0.01) # 경사 하강 라이브러리 optimizer = tf.train.GradientDescentOptimizer(a) train = optimizer.minimize(cost) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for i in range(5001): sess.run(train, feed_dict={X: xData, Y: yData}) if i % 500 == 0: print(i, sess.run(cost, feed_dict={X: xData, Y: yData}), sess.run(W), sess.run(b)) print(sess.run(H, feed_dict={X: [8]}))
def sync_batch_norm(x, params, training, name='batch_norm'): """Sync batch_norm.""" size = x.shape[-1].value with tf.variable_scope(name): gamma = tf.get_variable(name='gamma', shape=[size], initializer=tf.initializers.ones(), trainable=True) beta = tf.get_variable(name='beta', shape=[size], initializer=tf.initializers.zeros(), trainable=True) moving_mean = tf.get_variable(name='moving_mean', shape=[size], initializer=tf.initializers.zeros(), trainable=False) moving_variance = tf.get_variable(name='moving_variance', shape=[size], initializer=tf.initializers.ones(), trainable=False) x = tf.cast(x, tf.float32) if training: if params.use_tpu: num_replicas = params.num_replicas if num_replicas <= 8: group_assign = None group_shards = tf.cast(num_replicas, tf.float32) else: group_shards = max(8, num_replicas // 8) # round to nearest power of 2 log_num_replicas = max(1, int(np.log(group_shards) / np.log(2.))) group_shards = int(np.power(2., log_num_replicas)) group_assign = np.arange(num_replicas, dtype=np.int32) group_assign = group_assign.reshape([-1, group_shards]) group_assign = group_assign.tolist() group_shards = tf.cast(group_shards, tf.float32) mean = tf.reduce_mean(x, [0, 1, 2]) mean = tf.tpu.cross_replica_sum(mean / group_shards, group_assign) # Var[x] = E[x^2] - E[x]^2 mean_sq = tf.reduce_mean(tf.math.square(x), [0, 1, 2]) mean_sq = tf.tpu.cross_replica_sum(mean_sq / group_shards, group_assign) variance = mean_sq - tf.math.square(mean) else: mean, variance = tf.nn.moments(x, [0, 1, 2]) x = tf.nn.batch_normalization( x, mean=mean, variance=variance, offset=beta, scale=gamma, variance_epsilon=params.batch_norm_epsilon) if USE_BFLOAT16: x = tf.cast(x, tf.bfloat16, name='batch_norm_recast') if (isinstance(moving_mean, tf.Variable) and isinstance(moving_variance, tf.Variable)): decay = tf.cast(1. - params.batch_norm_decay, tf.float32) def u(moving, normal, name): if params.use_tpu: num_replicas_fp = tf.cast(params.num_replicas, tf.float32) normal = tf.tpu.cross_replica_sum(normal) / num_replicas_fp diff = decay * (moving - normal) return tf.assign_sub(moving, diff, use_locking=True, name=name) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, u(moving_mean, mean, name='moving_mean')) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, u(moving_variance, variance, name='moving_variance')) return x else: return x, mean, variance else: if params.use_tpu: x = tf.nn.batch_normalization( x, mean=moving_mean, variance=moving_variance, offset=beta, scale=gamma, variance_epsilon=params.batch_norm_epsilon) else: x, _, _ = tf.nn.fused_batch_norm(x, scale=gamma, offset=beta, mean=moving_mean, variance=moving_variance, epsilon=params.batch_norm_epsilon, is_training=False) if USE_BFLOAT16: x = tf.cast(x, tf.bfloat16) return x
def fit(self, dataset): """Compute the model parameters of the fair classifier using gradient descent. Args: dataset (BinaryLabelDataset): Dataset containing true labels. Returns: AdversarialDebiasing: Returns self. """ if tf.executing_eagerly(): raise RuntimeError("AdversarialDebiasing does not work in eager " "execution mode. To fix, add `tf.disable_eager_execution()`" " to the top of the calling script.") if self.seed is not None: np.random.seed(self.seed) ii32 = np.iinfo(np.int32) self.seed1, self.seed2, self.seed3, self.seed4 = np.random.randint(ii32.min, ii32.max, size=4) # Map the dataset labels to 0 and 1. temp_labels = dataset.labels.copy() temp_labels[(dataset.labels == dataset.favorable_label).ravel(),0] = 1.0 temp_labels[(dataset.labels == dataset.unfavorable_label).ravel(),0] = 0.0 with tf.variable_scope(self.scope_name): num_train_samples, self.features_dim = np.shape(dataset.features) # Setup placeholders self.features_ph = tf.placeholder(tf.float32, shape=[None, self.features_dim]) self.protected_attributes_ph = tf.placeholder(tf.float32, shape=[None,1]) self.true_labels_ph = tf.placeholder(tf.float32, shape=[None,1]) self.keep_prob = tf.placeholder(tf.float32) # Obtain classifier predictions and classifier loss self.pred_labels, pred_logits = self._classifier_model(self.features_ph, self.features_dim, self.keep_prob) pred_labels_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.true_labels_ph, logits=pred_logits)) if self.debias: # Obtain adversary predictions and adversary loss pred_protected_attributes_labels, pred_protected_attributes_logits = self._adversary_model(pred_logits, self.true_labels_ph) pred_protected_attributes_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.protected_attributes_ph, logits=pred_protected_attributes_logits)) # Setup optimizers with learning rates global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.001 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 1000, 0.96, staircase=True) classifier_opt = tf.train.AdamOptimizer(learning_rate) if self.debias: adversary_opt = tf.train.AdamOptimizer(learning_rate) classifier_vars = [var for var in tf.trainable_variables() if 'classifier_model' in var.name] if self.debias: adversary_vars = [var for var in tf.trainable_variables() if 'adversary_model' in var.name] # Update classifier parameters adversary_grads = {var: grad for (grad, var) in adversary_opt.compute_gradients(pred_protected_attributes_loss, var_list=classifier_vars)} normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny) classifier_grads = [] for (grad,var) in classifier_opt.compute_gradients(pred_labels_loss, var_list=classifier_vars): if self.debias: unit_adversary_grad = normalize(adversary_grads[var]) grad -= tf.reduce_sum(grad * unit_adversary_grad) * unit_adversary_grad grad -= self.adversary_loss_weight * adversary_grads[var] classifier_grads.append((grad, var)) classifier_minimizer = classifier_opt.apply_gradients(classifier_grads, global_step=global_step) if self.debias: # Update adversary parameters with tf.control_dependencies([classifier_minimizer]): adversary_minimizer = adversary_opt.minimize(pred_protected_attributes_loss, var_list=adversary_vars)#, global_step=global_step) self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) # Begin training for epoch in range(self.num_epochs): shuffled_ids = np.random.choice(num_train_samples, num_train_samples, replace=False) for i in range(num_train_samples//self.batch_size): batch_ids = shuffled_ids[self.batch_size*i: self.batch_size*(i+1)] batch_features = dataset.features[batch_ids] batch_labels = np.reshape(temp_labels[batch_ids], [-1,1]) batch_protected_attributes = np.reshape(dataset.protected_attributes[batch_ids][:, dataset.protected_attribute_names.index(self.protected_attribute_name)], [-1,1]) batch_feed_dict = {self.features_ph: batch_features, self.true_labels_ph: batch_labels, self.protected_attributes_ph: batch_protected_attributes, self.keep_prob: 0.8} if self.debias: _, _, pred_labels_loss_value, pred_protected_attributes_loss_vale = self.sess.run([classifier_minimizer, adversary_minimizer, pred_labels_loss, pred_protected_attributes_loss], feed_dict=batch_feed_dict) if i % 200 == 0: print("epoch %d; iter: %d; batch classifier loss: %f; batch adversarial loss: %f" % (epoch, i, pred_labels_loss_value, pred_protected_attributes_loss_vale)) else: _, pred_labels_loss_value = self.sess.run( [classifier_minimizer, pred_labels_loss], feed_dict=batch_feed_dict) if i % 200 == 0: print("epoch %d; iter: %d; batch classifier loss: %f" % ( epoch, i, pred_labels_loss_value)) return self
def _calc_f0_loss(self, gt_f0s, pred_f0s): list_difference = [gt_f0s[:, i, :] - pred_f0s[i, :, :] for i in range(self.C)] return tf.reduce_mean(tf.abs(list_difference))
def main(_): with tf.Graph().as_default(), tf.Session() as sess: # Define VGGish. embeddings = vggish_slim.define_vggish_slim(FLAGS.train_vggish) # Define a shallow classification model and associated training ops on top # of VGGish. with tf.variable_scope('mymodel'): # Add a fully connected layer with 100 units. num_units = 100 fc = slim.fully_connected(embeddings, num_units) # Add a classifier layer at the end, consisting of parallel logistic # classifiers, one per class. This allows for multi-class tasks. logits = slim.fully_connected(fc, _NUM_CLASSES, activation_fn=None, scope='logits') tf.sigmoid(logits, name='prediction') # Add training ops. with tf.variable_scope('train'): global_step = tf.Variable(0, name='global_step', trainable=False, collections=[ tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP ]) # Labels are assumed to be fed as a batch multi-hot vectors, with # a 1 in the position of each positive class label, and 0 elsewhere. labels = tf.placeholder(tf.float32, shape=(None, _NUM_CLASSES), name='labels') # Cross-entropy label loss. xent = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels, name='xent') loss = tf.reduce_mean(xent, name='loss_op') tf.summary.scalar('loss', loss) # We use the same optimizer and hyperparameters as used to train VGGish. optimizer = tf.train.AdamOptimizer( learning_rate=vggish_params.LEARNING_RATE, epsilon=vggish_params.ADAM_EPSILON) optimizer.minimize(loss, global_step=global_step, name='train_op') # Initialize all variables in the model, and then load the pre-trained # VGGish checkpoint. sess.run(tf.global_variables_initializer()) vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint) # Locate all the tensors and ops we need for the training loop. features_tensor = sess.graph.get_tensor_by_name( vggish_params.INPUT_TENSOR_NAME) labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0') global_step_tensor = sess.graph.get_tensor_by_name( 'mymodel/train/global_step:0') loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0') train_op = sess.graph.get_operation_by_name('mymodel/train/train_op') # The training loop. for _ in range(FLAGS.num_batches): (features, labels) = _get_examples_batch() [num_steps, loss, _] = sess.run([global_step_tensor, loss_tensor, train_op], feed_dict={ features_tensor: features, labels_tensor: labels }) print('Step %d: loss %g' % (num_steps, loss))
def __init__(self, env, context_encoder, context_encoder_recurrent=False, expert_trajs=None, reward_arch=relu_net, reward_arch_args=None, value_fn_arch=relu_net, score_discrim=False, discount=1.0, state_only=True, max_path_length=500, meta_batch_size=16, max_itrs=100, fusion=False, latent_dim=3, info_coeff=1.0, name='info_airl'): super(InfoAIRL, self).__init__() env_spec = env.spec if reward_arch_args is None: reward_arch_args = {} if fusion: # self.fusion = RamFusionDistrCustom(100, subsample_ratio=0.5) self.fusion = RamFusionDistrCustom(20, subsample_ratio=0.5) else: self.fusion = None self.dO = env_spec.observation_space.flat_dim - latent_dim self.dU = env_spec.action_space.flat_dim assert isinstance(env.action_space, Box) self.context_encoder = context_encoder self.score_discrim = score_discrim self.gamma = discount assert value_fn_arch is not None self.set_demos(expert_trajs) self.state_only = state_only self.T = max_path_length self.max_itrs = max_itrs self.latent_dim = latent_dim self.meta_batch_size = meta_batch_size # build energy model with tf.variable_scope(name) as _vs: # Should be meta_batch_size x batch_size x T x dO/dU self.expert_traj_var = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dO + self.dU], name='expert_traj') self.sample_traj_var = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dO + self.dU], name='sample_traj') self.obs_t = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dO], name='obs') self.nobs_t = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dO], name='nobs') self.act_t = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dU], name='act') self.nact_t = tf.placeholder( tf.float32, [meta_batch_size, None, self.T, self.dU], name='nact') self.labels = tf.placeholder(tf.float32, [meta_batch_size, None, 1, 1], name='labels') self.lprobs = tf.placeholder(tf.float32, [meta_batch_size, None, self.T, 1], name='log_probs') self.lr = tf.placeholder(tf.float32, (), name='lr') with tf.variable_scope('discrim') as dvs: # infer m_hat expert_traj_var = tf.reshape( self.expert_traj_var, [-1, (self.dO + self.dU) * self.T]) # m_hat should be of shape meta_batch_size x (batch_size*2) x T x latent_dim context_dist_info_vars = self.context_encoder.dist_info_sym( expert_traj_var) context_mean_var = context_dist_info_vars["mean"] context_log_std_var = context_dist_info_vars["log_std"] eps = tf.random.normal(shape=tf.shape(context_mean_var)) reparam_latent = eps * tf.exp( context_log_std_var) + context_mean_var self.reparam_latent_tile = reparam_latent_tile = tf.tile( tf.expand_dims(reparam_latent, axis=1), [1, self.T, 1]) reparam_latent_tile = tf.reshape(reparam_latent_tile, [-1, latent_dim]) rew_input = self.obs_t if not self.state_only: rew_input = tf.concat([self.obs_t, self.act_t], axis=-1) # condition on inferred m rew_input = tf.concat([ tf.reshape(rew_input, [-1, rew_input.get_shape().dims[-1].value]), reparam_latent_tile ], axis=1) with tf.variable_scope('reward'): self.reward = reward_arch(rew_input, dout=1, **reward_arch_args) self.sampled_traj_return = tf.reduce_sum(tf.reshape( self.reward, [meta_batch_size, -1, self.T]), axis=-1, keepdims=True) # with tf.variable_scope('reward', reuse=True): # self.sampled_traj_return = reward_arch(tf.reshape(self.sampled_traj_var, [-1, self.dO+self.dU]), dout=1, **reward_arch) # self.sampled_traj_return = tf.reduce_sum(tf.reshape(self.sampled_traj_return, [meta_batch_size, -1, self.T]), axis=-1) #energy_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) npotential_input = tf.concat([ tf.reshape(self.nobs_t, [-1, self.dO]), reparam_latent_tile ], axis=-1) potential_input = tf.concat([ tf.reshape(self.obs_t, [-1, self.dO]), reparam_latent_tile ], axis=-1) # value function shaping with tf.variable_scope('vfn'): fitted_value_fn_n = value_fn_arch(npotential_input, dout=1) with tf.variable_scope('vfn', reuse=True): self.value_fn = fitted_value_fn = value_fn_arch( potential_input, dout=1) # Define log p_tau(a|s) = r + gamma * V(s') - V(s) self.qfn = self.reward + self.gamma * fitted_value_fn_n log_p_tau = self.reward + self.gamma * fitted_value_fn_n - fitted_value_fn log_q_tau = self.lprobs log_p_tau = tf.reshape(log_p_tau, [meta_batch_size, -1, self.T, 1]) log_pq = tf.reduce_logsumexp( [log_p_tau, log_q_tau], axis=0) # [meta_batch_size, -1, self.T, 1] self.discrim_output = tf.exp(log_p_tau - log_pq) cent_loss = -tf.reduce_mean(self.labels * (log_p_tau - log_pq) + (1 - self.labels) * (log_q_tau - log_pq)) # compute mutual information loss # sampled_traj_var = tf.reshape(tf.concat([self.obs_t, self.act_t], axis=-1), [-1, (self.dO+self.dU)*self.T]) log_q_m_tau = tf.reshape( self.context_encoder.distribution.log_likelihood_sym( reparam_latent, context_dist_info_vars), [meta_batch_size, -1, 1]) # Used for computing gradient w.r.t. psi info_loss = -tf.reduce_mean(log_q_m_tau * (1 - tf.squeeze(self.labels, axis=-1)) ) / tf.reduce_mean(1 - self.labels) # Used for computing the gradient w.r.t. theta info_surr_loss = -tf.reduce_mean( (1 - tf.squeeze(self.labels, axis=-1)) * log_q_m_tau * self.sampled_traj_return - (1 - tf.squeeze(self.labels, axis=-1)) * log_q_m_tau * tf.reduce_mean(self.sampled_traj_return * (1 - tf.squeeze(self.labels, axis=-1)), axis=1, keepdims=True) / tf.reduce_mean(1 - self.labels) ) / tf.reduce_mean(1 - self.labels) self.loss = cent_loss + info_coeff * info_loss self.info_loss = info_loss tot_loss = self.loss context_encoder_weights = self.context_encoder.get_params( trainable=True) # reward_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="reward") reward_weights = [ i for i in tf.trainable_variables() if "reward" in i.name ] # value_fn_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="vfn") value_fn_weights = [ i for i in tf.trainable_variables() if "vfn" in i.name ] # self.step = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(tot_loss) optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) grads_and_vars_cent = optimizer.compute_gradients( cent_loss, var_list=reward_weights + value_fn_weights + context_encoder_weights) grads_and_vars_context = optimizer.compute_gradients( info_coeff * info_loss, var_list=context_encoder_weights) grads_and_vars_reward = optimizer.compute_gradients( info_coeff * info_surr_loss, var_list=reward_weights) self.step = optimizer.apply_gradients(grads_and_vars_cent + grads_and_vars_context + grads_and_vars_reward) self.train_context_encoder_step = optimizer.apply_gradients( grads_and_vars_context) # grads_and_vars_cent = optimizer.compute_gradients(cent_loss, var_list=reward_weights+value_fn_weights) # grads_and_vars_reward = optimizer.compute_gradients(info_coeff*info_surr_loss, var_list=reward_weights) # self.step = optimizer.apply_gradients(grads_and_vars_cent+grads_and_vars_reward) self._make_param_ops(_vs)