def _build(self, num_classifiers, learning_rate): # inputs self.X = tf.placeholder(tf.float32, [None, 28, 28]) self.y = tf.placeholder(tf.int32, [None]) one_hot_y = tf.one_hot(self.y, 10) networks = [layers.feedforward(self.X) for _ in range(num_classifiers)] self.individual_loss = [ layers.loss(net, one_hot_y) for net in networks ] self.individual_accuracy = [ layers.accuracy(net, one_hot_y) for net in networks ] logits = tf.reduce_mean(tf.stack(networks, axis=-1), axis=-1) l2_distance = tf.add_n([ tf.norm(networks[0] - networks[1]), tf.norm(networks[1] - networks[2]), tf.norm(networks[2] - networks[0]) ]) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=one_hot_y) self.loss = tf.reduce_mean(cross_entropy) + 1e-4 * l2_distance optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = optimizer.minimize(self.loss) correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.prediction = tf.argmax(logits, axis=1)
def loss(y, y_): m = len(y) e = 0 for i in range(m): e += net.loss().forward({ 'x': np.matrix(y_[i]), 'y': np.matrix(y[i]) }) return e / m
def build_model(input_data_tensor, input_label_tensor): num_classes = config["num_classes"] images = tf.image.resize_images(input_data_tensor, [224, 224]) logits = vgg.build(images, n_classes=num_classes, training=True) probs = tf.nn.softmax(logits) loss = L.loss(logits, tf.one_hot(input_label_tensor, num_classes)) error_top5 = L.topK_error(probs, input_label_tensor, K=5) error_top1 = L.topK_error(probs, input_label_tensor, K=1) # you must return a dictionary with at least the "loss" as a key return dict(loss=loss, logits=logits, error_top5=error_top5, error_top1=error_top1)
def gradCostFunc(x, y, w1): m = x.shape[0] dEdW1 = 0 E = 0 for i in range(m): # forward ---> z1 = np.matrix(x[i, :]).T yi = np.matrix(y[i]) z2 = net.inner().forward({'x': z1, 'w': w1}) #z3 = net.sigm().forward({'x':z2}); z = net.loss().forward({ 'x': z2, 'y': yi }) E += z # <--- backward l4 = [1] l3 = net.loss().backward({ 'x': z2, 'y': yi, 'dzdx': l4 }) #l2 = net.sigm().backward({'x':z2, 'dzdx':l3}); _, dEdW1_i = net.inner().backward({ 'x': z1, 'w': w1, 'dzdx': l3 }) dEdW1 += dEdW1_i return E / m, dEdW1 / m
def build_model(input_data_tensor, input_label_tensor): num_classes = config["num_classes"] weight_decay = config["weight_decay"] images = tf.image.resize_images(input_data_tensor, [224, 224]) logits = vgg.build(images, n_classes=num_classes, training=True) probs = tf.nn.softmax(logits) loss_classify = L.loss(logits, tf.one_hot(input_label_tensor, num_classes)) loss_weight_decay = tf.reduce_sum(tf.stack([tf.nn.l2_loss(i) for i in tf.get_collection('variables')])) loss = loss_classify + weight_decay*loss_weight_decay error_top5 = L.topK_error(probs, input_label_tensor, K=5) error_top1 = L.topK_error(probs, input_label_tensor, K=1) # you must return a dictionary with loss as a key, other variables return dict(loss=loss, probs=probs, logits=logits, error_top5=error_top5, error_top1=error_top1)
def _build(self, num_classifiers, learning_rate): # inputs self.X = tf.placeholder(tf.float32, [None, 28, 28]) self.y = tf.placeholder(tf.int32, [None]) one_hot_y = tf.one_hot(self.y, 10) networks = [layers.convolutional(self.X) for _ in range(num_classifiers)] self.individual_loss = [layers.loss(net, one_hot_y) for net in networks] self.individual_accuracy = [layers.accuracy(net, one_hot_y) for net in networks] logits = layers.linear(tf.concat(networks, axis=-1), 10, bias=False) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=one_hot_y) self.loss = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = optimizer.minimize(self.loss) correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.prediction = tf.argmax(logits, axis=1)
def training(): pretrained_weights = './pretrain/vgg16.npy' data_dir = './data/cifar10_data/cifar-10-batches-bin' train_log_dir = './log/train/' val_log_dir = './log/val/' with tf.name_scope('input'): images_train, labels_train = input_data.read_cifar10(data_dir, is_train=True, batch_size=BATCH_SIZE, shuffle=True) images_val, labels_val = input_data.read_cifar10(data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.8) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) # Refenrnce: https://stackoverflow.com/questions/35413618/tensorflow-placeholder-error-when-using-tf-merge-all-summaries summary_op = tf.summary.merge_all() # summary_op = tf.summary.merge([loss_summary, accuracy_summary], tf.GraphKeys.SUMMARIES) # The main thread init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) print('########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) # Coordinate the relationship between threads # Reference: http://wiki.jikexueyuan.com/project/tensorflow-zh/how_tos/threading_and_queues.html coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run([train_op, loss, accuracy, summary_op], feed_dict={image_holder: train_images, label_holder: train_labels}) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) #summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={image_holder: val_images, label_holder: val_labels}) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) #summary_str2 = sess.run(summary_op) val_summary_writer.add_summary(summary_str, step) # Why not use global_step=global_steps instead of step ??? if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
def training(): pretrained_weights = './pretrain/vgg16.npy' train_log_dir = './log_dr50000/train/' val_log_dir = './log_dr50000/val/' with tf.name_scope('input'): images_train, labels_train = dr5_input.input_data(True, BATCH_SIZE) images_val, labels_val = dr5_input.input_data(False, BATCH_SIZE) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.5) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) LEARNING_RATE = tf.train.exponential_decay(start_rate, global_steps, decay_steps, deacy_rate, staircase=True) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() # The main thread init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.InteractiveSession() sess.run(init) print( '########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run( [train_op, loss, accuracy, summary_op], feed_dict={ image_holder: train_images, label_holder: train_labels }) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={ image_holder: val_images, label_holder: val_labels }) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) val_summary_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) lr = sess.run(LEARNING_RATE) print("step %d, learning_rate= %f" % (step, lr)) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
def main(N=300, K=3, D=2, nodes=100, lr=1e-3, reg=1e-8): """Main""" # Generate and plot data set X, Y = gen_data(N, K, D) print("Plotting data...") col_levels = np.array(list(range(K + 1)), dtype=np.float) - 0.5 col_cmap = plt.cm.gist_rainbow col_norm = col.BoundaryNorm(col_levels, col_cmap.N) plt.ion() plt.subplot(1, 1, 1) plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=col_cmap, norm=col_norm, vmin=np.min(Y), vmax=np.max(Y)) plt.draw() input("Press <ENTER> to continue.") # Set up layers layers = [] layers += [L.input(X)] layers += [L.fc(layers[-1].Y, nodes)] layers += [L.sigmoid(layers[-1].Y)] layers += [L.dropout(layers[-1].Y, 0.25)] layers += [L.fc(layers[-1].Y, nodes)] layers += [L.sigmoid(layers[-1].Y)] layers += [L.dropout(layers[-1].Y, 0.25)] layers += [L.fc(layers[-1].Y, K)] layers += [L.softmax(layers[-1].Y)] layers += [L.loss(layers[-1].Y, Y)] nlayers = len(layers) # TODO (architecture): Instead of calling fwd on each layer, connect layers # with "pointers" and call fwd only on the first layer try: itx = 1 while True: # Forward propagation for i, layer in enumerate(layers): if i == 0: layer.X = X else: layer.reshape(layers[i - 1].Y.shape) layer.X = layers[i - 1].Y layer.fwd() if np.isnan(layers[-1].Y[0, 0]): pdb.set_trace() print("Iteration {}, Loss = {:.4f}".format( itx, np.asscalar(layers[-1].Y)), end='\r') if itx % 1000 == 0: print("") # Backprop for i in list(range(nlayers))[::-1]: if i == nlayers - 1: layers[i].dy = 1 else: layers[i].dy = layers[i + 1].dx layers[i].bck() if itx % 5000 == 0: # Gradient check if np.all(layers[i].dx == 0): continue r, c = [ np.random.choice(layers[i].X.shape[j]) for j in (0, 1) ] h = 1e-4 if abs(layers[i].dx[r, c]) < 1e-5: continue print("Checking gradient on {}...".format(layers[i]), end=' ') X_store = layers[i].X Y_ = [] for X_ in [layers[i].X[r, c] + s * h for s in (-1, 1)]: layers[i].X[r, c] = X_ for j in range(i, nlayers): if j > i: layers[j].X = layers[j - 1].Y stochastic_store = layers[j].stochastic layers[j].stochastic = False layers[j].fwd() layers[j].stochastic = stochastic_store Y_.append(np.asscalar(layers[-1].Y)) layers[i].X = X_store dx = layers[i].dx[r, c] ndx = (Y_[1] - Y_[0]) / (2 * h) diff = abs(ndx - dx) / max(abs(ndx), abs(dx), 1e-10) print("Diff: {:.8f}".format(diff)) if diff > 1e-2: pdb.set_trace() for layer in layers: layer.step(lr, reg) if itx % 1000 == 0: range_ = [np.max(X[:, i]) - np.min(X[:, i]) for i in (0, 1)] x, y = [ np.linspace( np.min(X[:, i]) - range_[i] / 2, np.max(X[:, i]) + range_[i] / 2, 400) for i in (0, 1) ] xx, yy = np.meshgrid(x, y) X_ = np.c_[xx.flatten(), yy.flatten()] for i, layer in enumerate(layers[:-1]): if i == 0: layer.X = X_ else: layer.reshape(layers[i - 1].Y.shape) layer.X = layers[i - 1].Y temp = layer.stochastic layer.stochastic = False layer.fwd() layer.stochastic = temp z = np.argmax(layers[-2].Y, axis=1).reshape(xx.shape) plt.clf() plt.contourf(xx, yy, z, levels=col_levels, cmap=col_cmap, norm=col_norm) plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=col_cmap, norm=col_norm) plt.draw() plt.pause(1e-10) itx += 1 except KeyboardInterrupt: # print(layers[-2].Y) pass
def run(self, run_type): is_training = True if run_type == 'train' else False self.log('{} epoch: {}'.format(run_type, self.epoch)) image_filenames, label_filenames = self.dataset.load_filenames( run_type) global_step = tf.Variable(1, name='global_step', trainable=False) images, labels = inputs.load_batches(image_filenames, label_filenames, shape=self.dataset.SHAPE, batch_size=self.batch_size, resize_shape=self.dataset.SHAPE, crop_shape=(256, 512), augment=True) with tf.name_scope('labels'): color_labels = util.colorize(labels, self.dataset.augmented_labels) labels = tf.cast(labels, tf.int32) ignore_mask = util.get_ignore_mask(labels, self.dataset.augmented_labels) tf.summary.image('label', color_labels, 1) tf.summary.image('weights', tf.cast(ignore_mask * 255, tf.uint8), 1) tf.summary.image('image', images, 1) logits = self.model.inference(images, num_classes=self.num_classes, is_training=is_training) with tf.name_scope('outputs'): predictions = layers.predictions(logits) color_predictions = util.colorize(predictions, self.dataset.augmented_labels) tf.summary.image('prediction', color_predictions, 1) # Add some metrics with tf.name_scope('metrics'): accuracy_op, accuracy_update_op = tf.contrib.metrics.streaming_accuracy( predictions, labels, weights=ignore_mask) mean_iou_op, mean_iou_update_op = tf.contrib.metrics.streaming_mean_iou( predictions, labels, num_classes=self.num_classes, weights=ignore_mask) if is_training: loss_op = layers.loss(logits, labels, mask=ignore_mask, weight_decay=self.weight_decay) train_op = layers.optimize(loss_op, learning_rate=self.learning_rate, global_step=global_step) # Merge all summaries into summary op summary_op = tf.summary.merge_all() # Create restorer for restoring saver = tf.train.Saver() # Initialize session and local variables (for input pipeline and metrics) sess = tf.Session() sess.run(tf.local_variables_initializer()) if self.checkpoint is None: sess.run(tf.global_variables_initializer()) self.log('{} {} from scratch.'.format(run_type, self.model_name)) else: start_time = time.time() saver.restore(sess, self.checkpoint) duration = time.time() - start_time self.log('{} from previous checkpoint {:s} ({:.2f}s)'.format( run_type, self.checkpoint, duration)) # Create summary writer summary_path = os.path.join(self.model_path, run_type) step_writer = tf.summary.FileWriter(summary_path, sess.graph) # Start filling the input queues coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) num_examples = self.dataset.NUM_TRAIN_EXAMPLES if is_training else self.dataset.NUM_VALID_EXAMPLES for local_step in range(num_examples // self.batch_size): # Take time! start_time = time.time() if is_training: _, loss, accuracy, mean_iou, summary = sess.run([ train_op, loss_op, accuracy_update_op, mean_iou_update_op, summary_op ]) duration = time.time() - start_time self.log('Epoch: {} train step: {} loss: {:.4f} accuracy: {:.2f}% duration: {:.2f}s' \ .format(self.epoch, local_step + 1, loss, accuracy * 100, duration)) else: accuracy, mean_iou, summary = sess.run( [accuracy_update_op, mean_iou_update_op, summary_op]) duration = time.time() - start_time self.log('Epoch: {} eval step: {} accuracy: {:.2f}% duration: {:.2f}s'\ .format(self.epoch, local_step + 1, accuracy * 100, duration)) # Save summary and print stats step_writer.add_summary(summary, global_step=global_step.eval(session=sess)) # Write additional epoch summaries epoch_writer = tf.summary.FileWriter(summary_path) epoch_summaries = [] if is_training: epoch_summaries.append( tf.summary.scalar('params/weight_decay', self.weight_decay)) epoch_summaries.append( tf.summary.scalar('params/learning_rate', self.learning_rate)) epoch_summaries.append( tf.summary.scalar('params/batch_size', self.batch_size)) epoch_summaries.append( tf.summary.scalar('metrics/accuracy', accuracy_op)) epoch_summaries.append( tf.summary.scalar('metrics/mean_iou', mean_iou_op)) epoch_summary_op = tf.summary.merge(epoch_summaries) summary = sess.run(epoch_summary_op) epoch_writer.add_summary(summary, global_step=self.epoch) # Save after each epoch when training if is_training: checkpoint_path = os.path.join(self.model_path, self.model_name + '.checkpoint') start_time = time.time() self.checkpoint = saver.save(sess, checkpoint_path, global_step=self.epoch) duration = time.time() - start_time self.log('Model saved as {:s} ({:.2f}s)'.format( self.checkpoint, duration)) # Stop queue runners and reset the graph coord.request_stop() coord.join(threads) sess.close() tf.reset_default_graph()
def create_network(self): """ Create network """ mask_cache = dict() if self.use_mask_cache else None response_emb = fluid.layers.embedding( input=self.response, size=[self._vocab_size + 1, self._emb_size], is_sparse=self.use_sparse_embedding, param_attr=fluid.ParamAttr( name=self.word_emb_name, initializer=fluid.initializer.Normal(scale=0.1))) # response part Hr = response_emb Hr_stack = [Hr] for index in six.moves.xrange(self._stack_num): Hr = layers.block(name="response_self_stack" + str(index), query=Hr, key=Hr, value=Hr, d_key=self._emb_size, q_mask=self.response_mask, k_mask=self.response_mask, mask_cache=mask_cache) Hr_stack.append(Hr) # context part sim_turns = [] for t in six.moves.xrange(self._max_turn_num): Hu = fluid.layers.embedding( input=self.turns_data[t], size=[self._vocab_size + 1, self._emb_size], is_sparse=self.use_sparse_embedding, param_attr=fluid.ParamAttr( name=self.word_emb_name, initializer=fluid.initializer.Normal(scale=0.1))) Hu_stack = [Hu] for index in six.moves.xrange(self._stack_num): # share parameters Hu = layers.block(name="turn_self_stack" + str(index), query=Hu, key=Hu, value=Hu, d_key=self._emb_size, q_mask=self.turns_mask[t], k_mask=self.turns_mask[t], mask_cache=mask_cache) Hu_stack.append(Hu) # cross attention r_a_t_stack = [] t_a_r_stack = [] for index in six.moves.xrange(self._stack_num + 1): t_a_r = layers.block(name="t_attend_r_" + str(index), query=Hu_stack[index], key=Hr_stack[index], value=Hr_stack[index], d_key=self._emb_size, q_mask=self.turns_mask[t], k_mask=self.response_mask, mask_cache=mask_cache) r_a_t = layers.block(name="r_attend_t_" + str(index), query=Hr_stack[index], key=Hu_stack[index], value=Hu_stack[index], d_key=self._emb_size, q_mask=self.response_mask, k_mask=self.turns_mask[t], mask_cache=mask_cache) t_a_r_stack.append(t_a_r) r_a_t_stack.append(r_a_t) t_a_r_stack.extend(Hu_stack) r_a_t_stack.extend(Hr_stack) if self.use_stack_op: t_a_r = fluid.layers.stack(t_a_r_stack, axis=1) r_a_t = fluid.layers.stack(r_a_t_stack, axis=1) else: for index in six.moves.xrange(len(t_a_r_stack)): t_a_r_stack[index] = fluid.layers.unsqueeze( input=t_a_r_stack[index], axes=[1]) r_a_t_stack[index] = fluid.layers.unsqueeze( input=r_a_t_stack[index], axes=[1]) t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1) r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1) # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len] sim = fluid.layers.matmul(x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0)) sim_turns.append(sim) if self.use_stack_op: sim = fluid.layers.stack(sim_turns, axis=2) else: for index in six.moves.xrange(len(sim_turns)): sim_turns[index] = fluid.layers.unsqueeze( input=sim_turns[index], axes=[2]) # sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len] sim = fluid.layers.concat(input=sim_turns, axis=2) final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num) loss, logits = layers.loss(final_info, self.label) return loss, logits