def main(argv=None): """Main Function """ if FLAGS.test: args = FLAGS.test_file.split("_") FLAGS.batch_size = int(args[1][2:]) FLAGS.seq_length = int(args[2][1:]) FLAGS.fc_hidden_unit = int(args[3][2:]) FLAGS.lstm_unit = int(args[4][1:]) model.build() train_file = FLAGS.data_dir + FLAGS.input_file + "_train.csv" test_file = FLAGS.data_dir + FLAGS.input_file + "_test.csv" with tf.Session() as sess: # ckpt (checkpoint) saver saver = tf.train.Saver() if FLAGS.test: test_ckpt_dir = './ckpt/' + FLAGS.input_file + '/' + FLAGS.test_file + '/' check = test_ckpt_dir + "*.meta" ckpt_idx = len(glob.glob(check)) for i in range(ckpt_idx): test_ckpt_path = test_ckpt_dir + "train_result" + str(i+1) + ".ckpt" saver.restore(sess, test_ckpt_path) print ('Restored variables from %s.' % test_ckpt_path) run_test(sess, test_file) else: # prepare for checkpoint ckpt_dir = './ckpt/' + FLAGS.input_file + '/' ckpt_dir = ckpt_dir + 'E{}_BS{}_S{}_FC{}_L{}/'.format( FLAGS.total_epoch, FLAGS.batch_size, FLAGS.seq_length, FLAGS.fc_hidden_unit, FLAGS.lstm_unit) if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) check = ckpt_dir + "*.meta" ckpt_idx = len(glob.glob(check)) ckpt_path = ckpt_dir + "train_result" + str(ckpt_idx+1) + ".ckpt" tf.global_variables_initializer().run() run_train(sess, train_file) saver.save(sess, ckpt_path) print(' * Variables are saved: %s *' % ckpt_path) run_test(sess, test_file)
def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]): P = Parameters() ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes) predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl) input_seq = T.matrix('input_sequence') output_seq = T.matrix('output_sequence') seqs = predict(input_seq) output_seq_pred = seqs[-1] cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1) params = P.values() l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() cost = T.sum(cross_entropy) + 1e-4*l2 grads = [ T.clip(g,-10,10) for g in T.grad(cost,wrt=params) ] train = theano.function( inputs=[input_seq,output_seq], outputs=cost, # updates=updates.adadelta(params,grads) updates = updates.rmsprop(params,grads,learning_rate = 1e-5) ) return P,train
def make_train(input_size,output_size,mem_size,mem_width,hidden_size=100): P = Parameters() # Build controller. ctrl is a network that takes an external and read input # and returns the output of the network and its hidden layer ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_size) # Build model that predicts output sequence given input sequence predict = model.build(P,mem_size,mem_width,hidden_size,ctrl) input_seq = T.matrix('input_sequence') output_seq = T.matrix('output_sequence') [M,weights,output_seq_pred] = predict(input_seq) # Setup for adadelta updates cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1) params = P.values() l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() cost = T.sum(cross_entropy) + 1e-3*l2 # clip gradients grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ] train = theano.function( inputs=[input_seq,output_seq], outputs=cost, updates=updates.adadelta(params,grads) ) return P,train
def get_train_test_fn(P, data_X, data_W, data_Y): X = T.matrix('X') Y = T.ivector('Y') W = T.vector('W') f = model.build(P, input_size=data_X.get_value().shape[1], hidden_sizes=[256, 128, 64, 32]
def make_functions( input_size, output_size, mem_size, mem_width, hidden_sizes=[100]): start_time = time.time() input_seqs = T.btensor3('input_sequences') output_seqs = T.btensor3('output_sequences') P = Parameters() process = model.build(P, input_size, output_size, mem_size, mem_width, hidden_sizes[0]) outputs = process(T.cast(input_seqs,'float32')) output_length = (input_seqs.shape[1] - 2) // 2 Y = output_seqs[:,-output_length:,:-2] Y_hat = T.nnet.sigmoid(outputs[:,-output_length:,:-2]) cross_entropy = T.mean(T.nnet.binary_crossentropy(Y_hat,Y)) bits_loss = cross_entropy * (Y.shape[1] * Y.shape[2]) / T.log(2) params = P.values() cost = cross_entropy # + 1e-5 * sum(T.sum(T.sqr(w)) for w in params) print "Computing gradients", grads = T.grad(cost, wrt=params) grads = updates.clip_deltas(grads, np.float32(clip_length)) print "Done. (%0.3f s)"%(time.time() - start_time) start_time = time.time() print "Compiling function", P_learn = Parameters() update_pairs = updates.rmsprop( params, grads, learning_rate=1e-4, P=P_learn ) train = theano.function( inputs=[input_seqs, output_seqs], outputs=cross_entropy, updates=update_pairs, ) test = theano.function( inputs=[input_seqs, output_seqs], outputs=bits_loss ) print "Done. (%0.3f s)"%(time.time() - start_time) print P.parameter_count() return P, P_learn, train, test
def make_train_functions(): P = Parameters() X = T.bvector('X') Y = T.ivector('Y') aux = {} predict = model.build( P, input_size=128, embedding_size=64, controller_size=256, stack_size=256, output_size=128, ) output = predict(X,aux=aux) error = - T.log(output[T.arange(Y.shape[0]),((128+1 + Y)%(128+1))]) error = error[-(Y.shape[0]/2):] parameters = P.values() gradients = T.grad(T.sum(error),wrt=parameters) shapes = [ p.get_value().shape for p in parameters ] count = theano.shared(np.float32(0)) acc_grads = [ theano.shared(np.zeros(s,dtype=np.float32)) for s in shapes ] acc_update = [ (a,a+g) for a,g in zip(acc_grads,gradients) ] +\ [ (count,count + np.float32(1)) ] acc_clear = [ (a,np.float32(0) * a) for a in acc_grads ] +\ [ (count,np.int32(0)) ] avg_grads = [ (g / count) for g in acc_grads ] avg_grads = [ clip(g,1) for g in acc_grads ] acc = theano.function( inputs=[X,Y], outputs=T.mean(error), updates = acc_update, ) update = theano.function( inputs=[], updates=updates.adadelta(parameters,avg_grads,learning_rate=1e-8) + acc_clear ) test = theano.function( inputs=[X], outputs=T.argmax(output,axis=1)[-(X.shape[0]/2):], ) return acc,update,test
def make_model( input_size=8, output_size=8, mem_size=128, mem_width=20, hidden_sizes=[100]): P = Parameters() ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes) predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl) input_seq = T.matrix('input_sequence') [M_curr,weights,output] = predict(input_seq) test_fun = theano.function( inputs=[input_seq], outputs=[weights,output] ) return P,test_fun
def main(): # Training data consits of 60000 images and 60000 labels # Testing data consists of 10000 images and 10000 labels # Each image consits of 784 (28x28) pixels each of which contains a value from # 0 to 255.0 which corresponds to its darkness or lightness. # Each input needs to be a list of numpy arrays to be valid. # Load all of the data print "Loading data..." test_images = data.load_data(LIMITED) train_images = data.load_data(LIMITED, "train-images.idx3-ubyte", "train-labels.idx1-ubyte") print "Normalizing data..." X_train, Y_train = data.convert_image_data(train_images) X_test, Y_test = data.convert_image_data(test_images) X_train = np.array(X_train) Y_train = np.array(Y_train) X_test = np.array(X_test) Y_test = np.array(Y_test) if LOAD == False: print "Building the model..." _model = model.build() else: print "Loading the model..." elements = os.listdir("model") if len(elements) == 0: print "No models to load." else: _model = model.load(elements[len(elements)-1]) if TRAIN == True: print "Training the model..." model.train(_model, X_train, Y_train, X_test, Y_test) if VISUALIZE: model.visualize(_model, test_images, VISUALIZE_TO_FILE) if TRAIN == True: print "Saving the model..." model.save(_model)
def build(P , input_size , mem_width , weighted_mem_width , output_size) : ctrl = controller.build(P, input_size, output_size, weighted_mem_width) predict = model.build(P, input_size, mem_width, weighted_mem_width, ctrl) def turing_updates(cost , lr) : params = P.values() #whether add P weight decay l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() all_cost = cost + 1e-3 * l2 grads = [T.clip(g, -100, 100) for g in T.grad(all_cost, wrt=params)] return updates.rmsprop(params, grads, learning_rate=lr) def init_parameter(name , value) : P[name] = value #used by getvalue return turing_updates , predict
def build(P , mem_width , output_size) : ctrl = controller.build(P, mem_width, output_size, mem_width) predict = model.build(P, mem_width, ctrl) def turing_updates(cost , lr) : params = P.values() #whether add P weight decay l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() all_cost = cost + 1e-3 * l2 clipper = updates.clip(5.) g = T.grad(all_cost, wrt=params) grads = clipper(g) return updates.momentum(params, grads, mu = 0, learning_rate=lr) def init_parameter(name , value) : P[name] = value #used by getvalue return turing_updates , predict
def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]): P = Parameters() ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes) predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl) input_seq = T.matrix('input_sequence') output_seq = T.matrix('output_sequence') seqs = predict(input_seq) output_seq_pred = seqs[-1] cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1) cost = T.sum(cross_entropy) # + 1e-3 * l2 params = P.values() grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ] response_length = input_seq.shape[0]/2 train = theano.function( inputs=[input_seq,output_seq], outputs=T.mean(cross_entropy[-response_length:]), updates=updates.adadelta(params,grads) ) return P,train
def predict(): url = 'http://api.shuibei.chxj.name/captcha' response = requests.get(url) bytes_io = BytesIO(response.content) img = Image.open(bytes_io).convert('RGB') arr = np.array(img) char1 = arr[0:36, 0:30] char2 = arr[0:36, 30:60] char3 = arr[0:36, 60:90] char4 = arr[0:36, 90:120] model = m.build((36, 30, 3), len(classes)) input = np.array([char1, char2, char3, char4]) predict = model.predict(input) text = '' for i in np.argmax(predict, axis=1): text += classes[i] print("the image is", text) return {'image': base64.b64encode(bytes_io.getvalue()).decode(), 'text': text}
def make_model( input_size=8, output_size=8, mem_size=128, mem_width=20, hidden_size=100): """ Given the model parameters, return a Theano function for the NTM's model """ P = Parameters() # Build the controller ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_size) predict = model.build(P,mem_size,mem_width,hidden_size,ctrl) input_seq = T.matrix('input_sequence') [M_curr,weights,output] = predict(input_seq) # Return a Theano function for the NTM test_fun = theano.function( inputs=[input_seq], outputs=[weights,output] ) return P,test_fun
improvement_threshold = args.improvement_threshold validation_percent = args.validation_percent patience = args.patience checkpoint = args.checkpoint embedding_size = args.embedding_size hidden_size = args.hidden_size l2_coefficient = args.l2 id2char = pickle.load(open(vocab_file,'r')) char2id = vocab.load(vocab_file) P = Parameters() lang_model = model.build(P, character_count=len(char2id) + 1, embedding_size=embedding_size, hidden_size=hidden_size ) def cost(X, P): # batch_size x time eps = 1e-3 X = X.T # time x batch_size char_prob_dist = lang_model(X[:-1]) # time x batch_size x output_size char_prob_dist = (1 - 2 * eps) * char_prob_dist + eps label_prob = char_prob_dist[ T.arange(X.shape[0] - 1).dimshuffle(0, 'x'), T.arange(X.shape[1]).dimshuffle('x', 0), X[1:] ] # time x batch_size cross_entropy = -T.sum(T.log(label_prob), axis=0) display_cost = 2**(-T.mean(T.log2(label_prob), axis=0))
@author: ashima.garg """ import config import data import model if __name__ == "__main__": data = data.Data() data.read_train(config.TRAIN_X_PATH, config.TRAIN_Y_PATH) data.preprocess() data.split() print("data read") model = model.Model() model.build() print("model build") model.train(data) print("model trained") model.test(data) print("model tested") ''' data.read_test(config.TEST_X_PATH) data.preprocess() print("model predicted") '''
.batch(BATCH_SIZE)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # import matplotlib.pyplot as plt # for A,B,C in dataset: # print(A.shape,B.shape,C.shape) # plt.imshow(B[0,:,:,0],cmap="gray") # plt.show() # print(C[0,:]) # break test_set = train_dataset.DataPipeLine(train_path, train=False, onehot=True) #但其实毫无用处,仅仅是噪声作为输入的堆叠 test_set = tf.data.Dataset.from_generator(test_set.generator,output_types=(tf.float32,tf.float32),output_shapes=((28,28),(10)))\ .map(map_func_z,num_parallel_calls=num_threads)\ .batch(1)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for i,noise in enumerate(test_set): # print(i,noise.shape) model = model.DCGAN(train_set=dataset, test_set=test_set, loss_name="WGAN-GP-SN", mixed_precision=True, learning_rate=1e-4, tmp_path=tmp_path, out_path=out_path) model.build(input_shape_G=[None, 100], input_shape_D=[None, 28, 28, 1], label_shape=[None, 10]) model.train(epoches=EPOCHES)
import numpy as np from model import build from datasetup import setup WEIGHTS = 'model.h5' train_x_ims, train_x_text, train_y, test_x_ims, test_x_text, test_y, img_shape, vocab_size, n_ans, ans, test_q, test_answer_indices = setup( ) model = build(img_shape, vocab_size, n_ans) model.load_weights(WEIGHTS) predictions = model.predict([test_x_ims, test_x_text]) for idx in range(n_ans): prediction = predictions[:, idx] answer = ans[idx] min = np.amin(prediction) max = np.amax(prediction) mean = np.mean(prediction) print("\n(" + str(answer) + ") Min: " + str(min) + ", Max: " + str(max) + ", Mean: " + str(mean)) shapes = [] decision = [] for i in range(n_ans): if ans[i] == 'rectangle' or ans[i] == 'circle' or ans[i] == 'triangle': shapes.append(i) elif ans[i] == 'yes' or ans[i] == 'no': decision.append(i)
def visualize(tfrecords, bbox_priors, checkpoint_path, cfg): logger = logging.getLogger() logger.setLevel(logging.DEBUG) graph = tf.Graph() # Force all Variables to reside on the CPU. with graph.as_default(): images, batched_bboxes, batched_num_bboxes, image_ids = inputs.input_nodes( tfrecords=tfrecords, max_num_bboxes=cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries=True, shuffle_batch=True, cfg=cfg) batch_norm_params = { # Decay for the batch_norm moving averages. 'decay': cfg.BATCHNORM_MOVING_AVERAGE_DECAY, # epsilon to prevent 0s in variance. 'epsilon': 0.001, 'variables_collections': [tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 'is_training': False } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.00004), biases_regularizer=slim.l2_regularizer(0.00004)): locations, confidences, inception_vars = model.build( inputs=images, num_bboxes_per_cell=cfg.NUM_BBOXES_PER_CELL, reuse=False, scope='') ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var): var for var in slim.get_model_variables() } # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement=True, gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG. PER_PROCESS_GPU_MEMORY_FRACTION)) sess = tf.Session(graph=graph, config=sess_config) # Little utility to convert the float images to uint8 image_to_convert = tf.placeholder(tf.float32) convert_image_to_uint8 = tf.image.convert_image_dtype( tf.add(tf.div(image_to_convert, 2.0), 0.5), tf.uint8) with sess.as_default(): fetches = [ locations, confidences, images, batched_bboxes, batched_num_bboxes ] coord = tf.train.Coordinator() tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) plt.ion() try: if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint( checkpoint_path) if checkpoint_path is None: print "ERROR: No checkpoint file found." return # Restores from checkpoint saver.restore(sess, checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int( checkpoint_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step, ) total_sample_count = 0 step = 0 done = False while not coord.should_stop() and not done: t = time.time() outputs = sess.run(fetches) dt = time.time() - t locs = outputs[0] confs = outputs[1] imgs = outputs[2] gt_bboxes = outputs[3] gt_num_bboxes = outputs[4] print locs.shape print confs.shape for b in range(cfg.BATCH_SIZE): # Show the image image = imgs[b] uint8_image = sess.run(convert_image_to_uint8, {image_to_convert: image}) plt.imshow(uint8_image) num_gt_bboxes_in_image = gt_num_bboxes[b] print "Number of GT Boxes: %d" % ( num_gt_bboxes_in_image, ) # Draw the GT Boxes in blue for i in range(num_gt_bboxes_in_image): gt_bbox = gt_bboxes[b][i] xmin, ymin, xmax, ymax = gt_bbox * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'b-') indices = np.argsort(confs[b].ravel())[::-1] print "Top 10 Detection Confidences: ", confs[b][ indices[:10]].ravel().tolist() # Draw the most confident boxes in red num_detections_to_render = num_gt_bboxes_in_image if num_gt_bboxes_in_image > 0 else 5 for i, index in enumerate( indices[0:num_detections_to_render]): loc = locs[b][index].ravel() conf = confs[b][index] prior = bbox_priors[index] print "Location: ", loc print "Prior: ", prior print "Index: ", index # Plot the predicted location in red xmin, ymin, xmax, ymax = (prior + loc) * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'r-') # Plot the prior in green xmin, ymin, xmax, ymax = prior * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'g-') print "Pred Confidence for box %d: %f" % (i, conf) plt.show() t = raw_input("push button") if t != '': done = True break plt.clf() except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads)
def __init__(self, input_size, output_size, mem_size, mem_width, hidden_sizes, num_heads, max_epochs, momentum, learning_rate ,grad_clip, l2_norm): self.input_size = input_size self.output_size = output_size self.mem_size = mem_size self.mem_width = mem_width self.hidden_sizes = hidden_sizes self.num_heads = num_heads self.max_epochs = max_epochs self.momentum = momentum self.learning_rate = learning_rate self.grad_clip = grad_clip self.l2_norm = l2_norm self.best_train_cost = np.inf self.best_valid_cost = np.inf #self.train = None #self.cost = None self.train_his = [] P = Parameters() ctrl = controller.build( P, self.input_size, self.output_size, self.mem_size, self.mem_width, self.hidden_sizes) predict = model.build( P, self.mem_size, self.mem_width, self.hidden_sizes[-1], ctrl, self.num_heads) input_seq = T.matrix('input_sequence') output_seq = T.matrix('output_sequence') [M_curr,weights,output] = predict(input_seq) # output_seq_pred = seqs[-1] cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output, output_seq),axis=1) self.params = P.values() l2 = T.sum(0) for p in self.params: l2 = l2 + (p ** 2).sum() cost = T.sum(cross_entropy) + self.l2_norm * l2 # cost = T.sum(cross_entropy) + 1e-3*l2 grads = [ T.clip(g, grad_clip[0], grad_clip[1]) for g in T.grad(cost, wrt=self.params) ] # grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ] # grads = [ T.clip(g,1e-9, 0.2) for g in T.grad(cost,wrt=params) ] self.train = theano.function( inputs=[input_seq,output_seq], outputs=cost, # updates=updates.adadelta(params,grads) updates = updates.rmsprop(self.params, grads, momentum=self.momentum, learning_rate=self.learning_rate ) ) self.predict_cost = theano.function( inputs=[input_seq,output_seq], outputs= cost ) self.predict = theano.function( inputs=[input_seq], outputs= [ weights, output] )
def train(tfrecords, bbox_priors, logdir, cfg, first_iteration=False): graph = tf.Graph() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) session = tf.Session(graph=graph, config=sess_config) with graph.as_default(), session.as_default(): images, batched_bboxes, batched_num_bboxes, image_ids = construct_network_input_nodes( tfrecords=tfrecords, max_num_bboxes=cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries = True, augment=cfg.AUGMENT_IMAGE, shuffle_batch=True, capacity = cfg.QUEUE_CAPACITY, min_after_dequeue = cfg.QUEUE_MIN, cfg=cfg ) features = model.build(graph, images, cfg) if first_iteration: # conv kernels, gamma and beta for batch normalization original_inception_vars = [v for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)] locations, confidences = model.add_detection_heads( graph, features, num_bboxes_per_cell=5, batch_size=cfg.BATCH_SIZE, cfg=cfg ) location_loss, confidence_loss, matching = model.add_loss(graph, locations, confidences, batched_bboxes, batched_num_bboxes, bbox_priors, cfg) loss = location_loss + confidence_loss global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate=cfg.LEARNING_RATE, global_step=global_step, decay_steps=cfg.LEARNING_RATE_DECAY_STEPS, decay_rate=cfg.LEARNING_RATE_DECAY, staircase=cfg.LEARNING_RATE_STAIRCASE ) # Create the optimizer optimizer = tf.train.RMSPropOptimizer( learning_rate = learning_rate, decay = cfg.RMSPROP_DECAY, # 0.9, # Parameter setting from the arxiv paper epsilon = cfg.RMSPROP_EPSILON # 1.0 #Parameter setting from the arxiv paper ) # Compute the gradients using the loss gradients = optimizer.compute_gradients(loss) # Apply the gradients optimize_op = optimizer.apply_gradients( grads_and_vars = gradients, global_step = global_step ) ema = tf.train.ExponentialMovingAverage( decay=cfg.MOVING_AVERAGE_DECAY, num_updates=global_step ) maintain_averages_op = ema.apply( tf.get_collection('conv_params') + tf.get_collection('batchnorm_params') + tf.get_collection('softmax_params') + tf.get_collection('batchnorm_mean_var') ) # Create an op that will update the moving averages after each training # step. This is what we will use in place of the usual training op. with tf.control_dependencies([optimize_op]): training_op = tf.group(maintain_averages_op) save_dir = os.path.join(logdir, 'checkpoints') if not os.path.exists(save_dir): os.makedirs(save_dir) print "Storing checkpoint files in %s" % (save_dir,) # Create a saver to snapshot the model saver = tf.train.Saver( # Save all variables max_to_keep = 3, keep_checkpoint_every_n_hours = 1 ) # Look to see if there is a checkpoint file ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt: ckpt_file = ckpt.model_checkpoint_path print "Found checkpoint: %s" % (ckpt_file,) # If this is the first iteration then restore the original inception weights if first_iteration: original_inception_saver = tf.train.Saver( var_list=original_inception_vars ) # Add some more summaries tf.scalar_summary('primary_loss', loss) tf.scalar_summary('location_loss', location_loss) tf.scalar_summary('confidence_loss', confidence_loss) tf.scalar_summary(learning_rate.op.name, learning_rate) tf.histogram_summary('confidences', confidences) #for grad, var in gradients: # tf.histogram_summary(var.op.name, var) # tf.histogram_summary(var.op.name + '/gradients', grad) summary_op = tf.merge_all_summaries() # create the directory to hold the summary outputs summary_logdir = os.path.join(logdir, 'summary') if not os.path.exists(summary_logdir): os.makedirs(summary_logdir) print "Storing summary files in %s" % (summary_logdir,) # create the summary writer to write the event files summar_writer = tf.train.SummaryWriter( summary_logdir, graph_def=graph.as_graph_def(), max_queue=10, flush_secs=30 ) fetches = [loss, training_op, learning_rate, confidences, matching, location_loss, confidence_loss] # Print some information to the command line on each run print_str = ', '.join([ 'Step: %d', 'LR: %.4f', 'Loss: %.4f', 'Time/image (ms): %.1f' ]) # Now create a training coordinator that will control the different threads coord = tf.train.Coordinator() # make sure to initialize all of the variables tf.initialize_all_variables().run() # launch the queue runner threads threads = tf.train.start_queue_runners(sess=session, coord=coord) # If there was a checkpoint file, then restore it. if ckpt and first_iteration: original_inception_saver.restore(session, ckpt.model_checkpoint_path) elif ckpt: saver.restore(session, ckpt.model_checkpoint_path) try: step = global_step.eval() while step < cfg.NUM_TRAIN_ITERATIONS: if coord.should_stop(): break t = time.time() fetched = session.run(fetches) dt = time.time() - t # increment the global step counter step = global_step.eval() print print_str % (step, fetched[2], fetched[0], (dt / cfg.BATCH_SIZE) * 1000) if (step % 50) == 0: print "writing summary" summar_writer.add_summary(session.run(summary_op), global_step=step) if (step % cfg.SAVE_EVERY_N_ITERATIONS) == 0: print "saving model" saver.save( sess=session, save_path= os.path.join(save_dir, 'ft_inception_v3'), global_step=step ) except Exception as e: # Report exceptions to the coordinator. coord.request_stop(e) # When done, ask the threads to stop. It is innocuous to request stop twice. coord.request_stop() # And wait for them to actually do it. coord.join(threads)
import numpy as np from theano_toolkit.parameters import Parameters import data_io import model import vae import matplotlib import sys matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.animation import FuncAnimation if __name__ == "__main__": model.SAMPLED_LAYERS = [int(s) for s in sys.argv[1:]] print model.SAMPLED_LAYERS P = Parameters() autoencoder, inpaint = model.build(P) parameters = P.values() X = T.itensor4('X') X_hat, posteriors, priors = \ autoencoder(T.cast(X, 'float32') / np.float32(255.)) latent_kls = [ T.mean(vae.kl_divergence(po_m, po_s, pr_m, pr_s), axis=0) for (po_m, po_s), (pr_m, pr_s) in zip(posteriors, priors) ] recon_loss = model.cost(X_hat, X[:, :, 16:-16, 16:-16]) val_loss = (recon_loss + sum(latent_kls)) / (32**2) X_recon = inpaint(T.cast(X, 'float32') / np.float32(255.)) Y = model.predict(X_recon) fill = theano.function(inputs=[X],
# -*- coding: utf-8 -*- import os, sys, time import numpy as np import model import vocab from config import config data_dir = "text" model_dir = "model" dataset, config.n_vocab, config.n_dataset = vocab.load(data_dir) lm = model.build() lm.load(model_dir) n_epoch = 1000 n_train = 5000 batchsize = 64 total_time = 0 # 長すぎるデータはメモリに乗らないこともあります max_length_of_chars = 100 # 学習初期は短い文章のみ学習し、徐々に長くしていきます。 # この機能が必要ない場合は最初から大きな値を設定します。 current_length_limit = 15 def make_batch(): target_batch_array = [] max_length_in_batch = 0 for b in xrange(batchsize): length = current_length_limit + 1
def detect_visualize(tfrecords, bbox_priors, checkpoint_path, cfg): logger = logging.getLogger() logger.setLevel(logging.DEBUG) graph = tf.Graph() #logdir = os.path.dirname(checkpoint_path) #feature_cache_file = os.path.join(logdir, "feature_cache_created") #if not os.path.exists(feature_cache_file): feature_cache_file = None # Force all Variables to reside on the CPU. with graph.as_default(): if feature_cache_file: with open(feature_cache_file) as f: feat_shape = [int(a) for a in f.read().split(' ')] batched_features, batched_bboxes, batched_num_bboxes, image_ids = inputs.input_nodes_precomputed_features( tfrecords=tfrecords, max_num_bboxes=cfg.MAX_NUM_BBOXES, num_epochs=1, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, capacity=cfg.QUEUE_CAPACITY, min_after_dequeue=cfg.QUEUE_MIN, cfg=cfg, feat_shape=feat_shape) else: batched_images, batched_offsets, batched_dims, batched_is_flipped, batched_bbox_restrictions, batched_max_to_keep, batched_heights_widths, batched_image_ids = input_nodes( tfrecords=tfrecords, num_epochs=1, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, capacity=cfg.QUEUE_CAPACITY, cfg=cfg) batch_norm_params = { # Decay for the batch_norm moving averages. 'decay': cfg.BATCHNORM_MOVING_AVERAGE_DECAY, # epsilon to prevent 0s in variance. 'epsilon': 0.001, 'variables_collections': [tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 'is_training': False } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.00004), biases_regularizer=slim.l2_regularizer(0.00004)): locations, confidences, inception_vars = model.build( inputs=batched_images, num_bboxes_per_cell=cfg.NUM_BBOXES_PER_CELL, reuse=False, scope='') ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var): var for var in slim.get_model_variables() } # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) fetches = [ locations, confidences, batched_offsets, batched_dims, batched_is_flipped, batched_bbox_restrictions, batched_max_to_keep, batched_heights_widths, batched_image_ids, batched_images ] coord = tf.train.Coordinator() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement=True, gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG. PER_PROCESS_GPU_MEMORY_FRACTION)) sess = tf.Session(graph=graph, config=sess_config) # Little utility to convert the float images to uint8 image_to_convert = tf.placeholder(tf.float32) convert_image_to_uint8 = tf.image.convert_image_dtype( tf.add(tf.div(image_to_convert, 2.0), 0.5), tf.uint8) detection_results = [] with sess.as_default(): tf.global_variables_initializer().run() tf.local_variables_initializer().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint( checkpoint_path) if checkpoint_path is None: print "ERROR: No checkpoint file found." return # Restores from checkpoint saver.restore(sess, checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int( checkpoint_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step, ) print_str = ', '.join(['Step: %d', 'Time/image (ms): %.1f']) plt.ion() original_image = None current_image_id = None step = 0 done = False while not coord.should_stop() and not done: t = time.time() outputs = sess.run(fetches) dt = time.time() - t locs = outputs[0] confs = outputs[1] patch_offsets = outputs[2] patch_dims = outputs[3] patch_is_flipped = outputs[4] patch_bbox_restrictions = outputs[5] patch_max_to_keep = outputs[6] image_height_widths = outputs[7] image_ids = outputs[8] images = outputs[9] for b in range(cfg.BATCH_SIZE): print "Patch Dims: ", patch_dims[b] print "Patch Offset: ", patch_offsets[b] print "Max to keep: ", patch_max_to_keep[b] print "Patch restrictions: ", patch_bbox_restrictions[ b] print "Image HxW: ", image_height_widths[b] print if current_image_id is None or current_image_id != image_ids[ b]: original_image = images[b] original_image = sess.run( convert_image_to_uint8, {image_to_convert: images[b]}) current_image_id = image_ids[b] img_id = int(np.asscalar(image_ids[b])) predicted_bboxes = locs[b] + bbox_priors predicted_bboxes = np.clip(predicted_bboxes, 0., 1.) predicted_confs = confs[b] # Keep only the predictions that are completely contained in the [0.1, 0.1, 0.9, 0.9] square # for this patch #if patch_restrict[b]: # filtered_bboxes, filtered_confs = filter_proposals(predicted_bboxes, predicted_confs) #else: # filtered_bboxes = predicted_bboxes # filtered_confs = predicted_confs filtered_bboxes, filtered_confs = filter_proposals( predicted_bboxes, predicted_confs, patch_bbox_restrictions[b]) # No valid predictions? if filtered_bboxes.shape[0] == 0: continue # Lets get rid of some of the predictions num_preds_to_keep = np.asscalar(patch_max_to_keep[b]) sorted_idxs = np.argsort(filtered_confs.ravel())[::-1] sorted_idxs = sorted_idxs[:num_preds_to_keep] filtered_bboxes = filtered_bboxes[sorted_idxs] filtered_confs = filtered_confs[sorted_idxs] plt.figure('Cropped Size') uint8_image = sess.run(convert_image_to_uint8, {image_to_convert: images[b]}) plt.imshow(uint8_image) num_detections_to_render = min( filtered_bboxes.shape[0], 10) for i in range(num_detections_to_render): loc = filtered_bboxes[i].ravel() conf = filtered_confs[i] #print "Location: ", loc #print "Conf: ", conf # Plot the predicted location in red xmin, ymin, xmax, ymax = loc * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'r-') # Convert the bounding boxes to the original image dimensions converted_bboxes = convert_proposals( bboxes=filtered_bboxes, offset=patch_offsets[b], patch_dims=patch_dims[b], image_dims=image_height_widths[b], is_flipped=patch_is_flipped[b]) plt.figure('Resized') plt.imshow(original_image) num_detections_to_render = min( converted_bboxes.shape[0], 10) for i in range(num_detections_to_render): loc = converted_bboxes[i].ravel() conf = filtered_confs[i] #print "Location: ", loc #print "Conf: ", conf # Plot the predicted location in red xmin, ymin, xmax, ymax = loc * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'r-') r = raw_input("press button: ") if r != "": done = True break plt.close('all') step += 1 print print_str % (step, (dt / cfg.BATCH_SIZE) * 1000) except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads)
from data import (enum_train_with_progress, revert_preprocess, im_visualize_before, im_visualize_after, enum_test_with_progress, len_train, len_test) from model import build import config import os import numpy as np import tensorlayer as tl import tensorflow as tf model_eval, model_train = build() optimizer = tf.optimizers.Adam(learning_rate=config.learning_rate) def l2_loss(a, b): return tf.reduce_mean(tf.square(a - b)) def kl_loss(mean, logstdev): return -0.5 * tf.reduce_mean(1 + logstdev - tf.exp(logstdev) - mean**2) tl.files.exists_or_mkdir(config.save_snapshot_to) tl.files.exists_or_mkdir(config.save_visualization_to) writer = tf.summary.create_file_writer(config.save_logs_to) for epoch in range(config.cnt_epoch): print('Epoch %d/%d' % (epoch, config.cnt_epoch)) tf.summary.experimental.set_step(epoch)
def test(tfrecords, bbox_priors, checkpoint_dir, specific_model_path, save_dir, max_detections, cfg): graph = tf.Graph() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) sess = tf.Session(graph=graph, config=sess_config) with graph.as_default(), sess.as_default(): images, batched_bboxes, batched_num_bboxes, image_ids = construct_network_input_nodes( tfrecords=tfrecords, max_num_bboxes=cfg.MAX_NUM_BBOXES, num_epochs=1, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries = False, augment=cfg.AUGMENT_IMAGE, shuffle_batch=False, cfg=cfg ) features = model.build(graph, images, cfg) locations, confidences = model.add_detection_heads( graph, features, num_bboxes_per_cell=5, batch_size=cfg.BATCH_SIZE, cfg=cfg ) # Restore the moving average variables for the conv filters, beta and gamma for # batch normalization and the softmax params ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var) : var for var in graph.get_collection('conv_params') } shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('softmax_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_mean_var') }) # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) fetches = [locations, confidences, image_ids] coord = tf.train.Coordinator() tf.initialize_all_variables().run() tf.initialize_local_variables().run() # This is needed for the filename_queue threads = tf.train.start_queue_runners(sess=sess, coord=coord) detection_results = [] try: if specific_model_path == None: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: specific_model_path = ckpt.model_checkpoint_path else: print('No checkpoint file found') return # Restores from checkpoint saver.restore(sess, specific_model_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(specific_model_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step,) print_str = ', '.join([ 'Step: %d', 'Time/image (ms): %.1f' ]) total_sample_count = 0 step = 0 done = False total_overlap = 0. while not coord.should_stop() and not done: t = time.time() outputs = sess.run(fetches) dt = time.time()-t locs = outputs[0] confs = outputs[1] img_ids = outputs[2] for b in range(cfg.BATCH_SIZE): indices = np.argsort(confs[b].ravel())[::-1] img_id = img_ids[b] for index in indices[0:max_detections]: loc = locs[b][index].ravel() conf = confs[b][index] prior = np.array(bbox_priors[index]) pred_xmin, pred_ymin, pred_xmax, pred_ymax = prior + loc # Not sure what we want to do here. Its interesting that we don't enforce this anywhere in the model if pred_xmin > pred_xmax: t = pred_xmax pred_xmax = pred_xmin pred_xmin = t if pred_ymin > pred_ymax: t = pred_ymax pred_ymax = pred_ymin pred_ymin = t detection_results.append({ "image_id" : int(img_id), # converts from np.array "bbox" : [pred_xmin, pred_ymin, pred_xmax, pred_ymax], "score" : float(conf), # converts from np.array }) step += 1 print print_str % (step, (dt / cfg.BATCH_SIZE) * 1000) except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads) # save the results save_path = os.path.join(save_dir, "results-%d.json" % global_step) with open(save_path, 'w') as f: json.dump(detection_results, f)
dataset = train_dataset.DataPipeLine(train_path, train=True, onehot=True) #拆包应当在第一步完成,不应当放在map中 因为会遇到可能无法堆叠的shape dataset = tf.data.Dataset.from_generator(dataset.generator,output_types=(tf.float32,tf.float32),output_shapes=((28,28),(10)))\ .map(map_func,num_parallel_calls=num_threads)\ .batch(BATCH_SIZE)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for A,B in dataset: # print(A.shape,B.shape) # for i in range(100): # print(A[i,:]) # break test_set = train_dataset.DataPipeLine(train_path, train=False, onehot=True) #但其实毫无用处,仅仅是噪声作为输入的堆叠 test_set = tf.data.Dataset.from_generator(test_set.generator,output_types=(tf.float32,tf.float32),output_shapes=((28,28),(10)))\ .map(map_func_z,num_parallel_calls=num_threads)\ .batch(1)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for i,noise in enumerate(test_set): # print(i,noise.shape) model = model.DCGAN(train_set=dataset, test_set=test_set, loss_name="LSGAN", mixed_precision=True, learning_rate=2e-4, tmp_path=tmp_path, out_path=out_path) model.build(input_shape_G=[None, 100], input_shape_D=[None, 28, 28, 1]) model.train(epoches=EPOCHES)
evidence_count = 2 if compute_tree_exists: inputs, outputs, params, grads = pickle.load(open("compute_tree.pkl")) else: print "Creating compute tree...", P = Parameters() story = T.ivector('story') idxs = T.ivector('idxs') qstn = T.ivector('qstn') ans_evds = T.ivector('ans_evds') ans_lbl = T.iscalar('ans_lbl') attention = model.build(P, word_rep_size=128, stmt_hidden_size=128, diag_hidden_size=128, vocab_size=vocab_size, output_size=vocab_size, map_fun_size=128, evidence_count=evidence_count) output_evds, output_ans = attention(story, idxs, qstn) cross_entropy = -T.log(output_ans[ans_lbl]) \ + -T.log(output_evds[0][ans_evds[0]]) \ + -T.log(output_evds[1][ans_evds[1]]) #cost += -T.log(ordered_probs(output_evds,ans_e.vds)) print "Done." print "Parameter count:", P.parameter_count() print "Calculating gradient expression...", params = P.values() cost = cross_entropy
def train( X_train, X_valid, y_train, y_valid, weights_file=None, init_file=None): # model parameters wd = 0.0005 bs = 128 base_lr = 0.01 gamma = 0.0001 p = 0.75 mntm = 0.9 fixed_bs = True mc_dropout = True #mc_dropout = False lr_update = lambda itr: base_lr * (1 + gamma * itr) ** (-p) snapshot_every = 5 max_epochs = 100000 print('building model...') l_out = model.build(bs, np.unique(y_train).shape[0]) network.print_layers(l_out) # check if we need to load pre-trained weights if init_file is not None: print('initializing weights from %s...' % (init_file)) network.init_weights(l_out, init_file) else: print('initializing weights randomly...') # do theano stuff print('creating shared variables...') lr_shared = theano.shared(floatX(base_lr)) print('compiling theano functions...') train_iter = iter_funcs.create_iter_funcs_train(l_out, base_lr, mntm, wd) valid_iter = iter_funcs.create_iter_funcs_valid( l_out, bs, N=50, mc_dropout=mc_dropout) # prepare to start training best_epoch = -1 best_train_losses_mean, best_valid_losses_mean = np.inf, np.inf print('starting training at %s' % ( network.get_current_time())) epoch_train_losses, epoch_valid_losses = [], [] gradient_updates = 0 epochs = [] # start training try: for epoch in range(1, max_epochs + 1): t_epoch_start = time() train_losses, train_accs = [], [] # print run training for each batch for train_idx in network.get_batch_idx( X_train.shape[0], bs, fixed=fixed_bs, shuffle=True): X_train_batch = X_train[train_idx] y_train_batch = y_train[train_idx] #print X_train_batch.shape, y_train_batch.shape train_loss, train_acc = train_iter( X_train_batch, y_train_batch) #train_loss, train_acc = 0, 0 # learning rate policy gradient_updates += 1 lr = lr_update(gradient_updates) lr_shared.set_value(floatX(lr)) train_losses.append(train_loss) train_accs.append(train_acc) # run validation for each batch valid_losses, valid_accs = [], [] for valid_idx in network.get_batch_idx( X_valid.shape[0], bs, fixed=fixed_bs, shuffle=False): X_valid_batch = X_valid[valid_idx] y_valid_batch = y_valid[valid_idx] #print X_valid_batch.shape, y_valid_batch.shape valid_loss, valid_acc = valid_iter( X_valid_batch, y_valid_batch) #valid_loss, valid_acc = 0, 0 valid_losses.append(valid_loss) valid_accs.append(valid_acc) # average over the batches train_losses_mean = np.mean(train_losses) train_accs_mean = np.mean(train_accs) valid_losses_mean = np.mean(valid_losses) valid_accs_mean = np.mean(valid_accs) epochs.append(epoch) epoch_train_losses.append(train_losses_mean) epoch_valid_losses.append(valid_losses_mean) # display useful info epoch_color = ('', '') if valid_losses_mean < best_valid_losses_mean: best_epoch = epoch best_train_losses_mean = train_losses_mean best_valid_losses_mean = valid_losses_mean best_weights = layers.get_all_param_values(l_out) epoch_color = ('\033[32m', '\033[0m') t_epoch_end = time() duration = t_epoch_end - t_epoch_start print('{}{:>4}{} | {:>10.6f} | {:>10.6f} | ' '{:>3.2f}% | {:>3.2f}% | ' '{:>1.8} | {:>4.2f}s | '.format( epoch_color[0], epoch, epoch_color[1], train_losses_mean, valid_losses_mean, 100 * train_accs_mean, 100 * valid_accs_mean, lr, duration)) if (epoch % snapshot_every) == 0: network.save_weights(best_weights, weights_file) except KeyboardInterrupt: print('caught ctrl-c... stopped training.') # display final results and save weights print('training finished at %s\n' % ( network.get_current_time())) print('best local minimum for validation data at epoch %d' % ( best_epoch)) print(' train loss = %.6f' % ( best_train_losses_mean)) print(' valid loss = %.6f' % ( best_valid_losses_mean)) if best_weights is not None: print('saving best weights to %s' % (weights_file)) network.save_weights(best_weights, weights_file) # plot the train/val loss over epochs print('plotting training/validation loss...') plt.plot(epochs, epoch_train_losses, 'b') plt.plot(epochs, epoch_valid_losses, 'g') plt.legend(('training', 'validation')) plt.ylabel('loss') plt.xlabel('epochs') plt.xlim((1, epochs[-1])) train_val_log = join('logs', '%s.png' % network.get_current_time()) plt.savefig(train_val_log, bbox_inches='tight')
dataset = train_dataset.DataPipeLine(train_path) #拆包应当在第一步完成,不应当放在map中 因为会遇到可能无法堆叠的shape dataset = tf.data.Dataset.from_generator(dataset.generator,output_types=(tf.float32),output_shapes=((218,178,3)))\ .map(map_func,num_parallel_calls=num_threads)\ .batch(BATCH_SIZE)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for A,b in dataset: # print(A.shape) # for i in range(100): # print(A[i,:]) # break test_set = train_dataset.DataPipeLine(train_path) #但其实毫无用处,仅仅是噪声作为输入的堆叠 test_set = tf.data.Dataset.from_generator(test_set.generator,output_types=(tf.float32),output_shapes=((218,178,3)))\ .map(map_func_z,num_parallel_calls=num_threads)\ .batch(1)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for i,noise in enumerate(test_set): # print(i,noise.shape) model = model.DCGAN(train_set=dataset, test_set=test_set, loss_name="WGAN-GP", mixed_precision=True, learning_rate=2e-4, tmp_path=tmp_path, out_path=out_path) model.build(input_shape_G=[None,100],input_shape_D=[None,112,80,3]) model.train(epoches=EPOCHES)
data_train = cPickle.load(open('train_batch', "rb")) data_test = cPickle.load(open('test_batch', "rb")) x_train = data_train['data'] y_train = data_train['labels'] x_test = data_test['data'] y_test = data_test['labels'] print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 model = m.build(x_train.shape[1:], num_classes) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True) model.save_weights('weights.hdf5')
def test(bbox_priors, checkpoint_dir, specific_model_path, cfg): graph = tf.Graph() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) sess = tf.Session(graph=graph, config=sess_config) with graph.as_default(), sess.as_default(): images = tf.placeholder(tf.float32, [1, 299, 299, 3]) features = model.build(graph, images, cfg) locations, confidences = model.add_detection_heads(graph, features, num_bboxes_per_cell=5, batch_size=cfg.BATCH_SIZE, cfg=cfg) # Restore the moving average variables for the conv filters, beta and gamma for # batch normalization and the softmax params ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var) : var for var in graph.get_collection('conv_params') } shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('softmax_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_mean_var') }) # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) fetches = [locations, confidences] coord = tf.train.Coordinator() tf.initialize_all_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # File path tab completion. A bit lazy... def complete(text, state): options = [x for x in glob.glob(text+'*')] + [None] return options[state] readline.set_completer_delims('\t') readline.parse_and_bind("tab: complete") readline.set_completer(complete) try: if specific_model_path == None: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: specific_model_path = ckpt.model_checkpoint_path else: print('No checkpoint file found') return # Restores from checkpoint saver.restore(sess, specific_model_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(specific_model_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step,) plt.ion() bbox_colors = [ ('r', 'red'), ('b', 'blue'), ('g', 'green'), ('c', 'cyan'), ('m', 'magenta'), ('y', 'yellow'), ('k', 'black'), ('w', 'white') ] done = False fig = plt.figure("Detected Objects") while not coord.should_stop() and not done: file_path = raw_input("File Path:") while not os.path.exists(file_path) or not os.path.isfile(file_path): file_path = raw_input("File Path:") img = imread(file_path) resized_image, h, w = resize_image_maintain_aspect_ratio(img.astype(np.float32), cfg.INPUT_SIZE, cfg.INPUT_SIZE) preped_image = (np.copy(resized_image) - cfg.IMAGE_MEAN) / cfg.IMAGE_STD imgs = np.expand_dims(preped_image, axis=0) t = time.time() outputs = sess.run(fetches, {images : imgs}) dt = time.time()-t locs = outputs[0] confs = outputs[1] # Show the image plt.imshow(resized_image.astype(np.int8)) plt.axis('off') # pos = [left, bottom, width, height] plt.gca().set_position([0, .2, .8, .8]) figure_text = "" indices = np.argsort(confs[0].ravel())[::-1] current_index = 0 bbox_index = 0 # Draw the most confident box loc = locs[0][indices[current_index]] prior = bbox_priors[indices[current_index]] xmin, ymin, xmax, ymax = (prior + loc) * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], '%s-' % bbox_colors[bbox_index][0]) figure_text += "BBox %d, color: %s, confidence : %0.3f\n" % (current_index, bbox_colors[bbox_index][1], confs[0][indices[current_index]][0]) current_index += 1 bbox_index = current_index % len(bbox_colors) fig_y = .03 fig_x = .1 fig.text(fig_y, fig_x, figure_text) plt.show() while True: t = raw_input("Press `b` for next bounding box, `n` for next image:") if t == 'b': loc = locs[0][indices[current_index]] prior = bbox_priors[indices[current_index]] xmin, ymin, xmax, ymax = (prior + loc) * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], '%s-' % bbox_colors[bbox_index][0]) figure_text = "BBox %d, color: %s, confidence : %0.3f\n" % (current_index, bbox_colors[bbox_index][1], confs[0][indices[current_index]][0]) fig.text(fig_y, fig_x - (.03 * (current_index % 5)), figure_text) current_index += 1 bbox_index = current_index % len(bbox_colors) if current_index % 5 == 0: fig_y = .5 plt.show() elif t == 'n': break else: done = True break plt.clf() except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads)
def weight_norm(u,norm=1.9356): in_norm = T.sqrt(T.sum(T.sqr(u),axis=0)) ratio = T.minimum(norm,in_norm) / (in_norm + 1e-8) return ratio * u def normalise_weights(updates): return [ ( p, weight_norm(u) if p.name.startswith('W') else u ) for p,u in updates ] if __name__ == "__main__": P = Parameters() extract,_ = model.build(P, "vrnn") X = T.tensor3('X') l = T.ivector('l') [Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std] = extract(X,l) parameters = P.values() batch_cost = model.cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std,l) print "Calculating gradient..." print parameters batch_size = T.cast(X.shape[1],'float32') gradients = T.grad(batch_cost,wrt=parameters) gradients = [ g / batch_size for g in gradients ] gradients = clip(5,parameters,gradients)
def visual_test(tfrecords, bbox_priors, checkpoint_dir, specific_model_path, cfg): graph = tf.Graph() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) sess = tf.Session(graph=graph, config=sess_config) with graph.as_default(), sess.as_default(): images, batched_bboxes, batched_num_bboxes, paths = construct_network_input_nodes( tfrecords=tfrecords, max_num_bboxes=cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries = False, augment=cfg.AUGMENT_IMAGE, shuffle_batch=False, cfg=cfg ) features = model.build(graph, images, cfg) locations, confidences = model.add_detection_heads(graph, features, num_bboxes_per_cell=5, batch_size=cfg.BATCH_SIZE, cfg=cfg) # Restore the moving average variables for the conv filters, beta and gamma for # batch normalization and the softmax params ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var) : var for var in graph.get_collection('conv_params') } shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('softmax_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_mean_var') }) # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) fetches = [locations, confidences, images, batched_bboxes, batched_num_bboxes] coord = tf.train.Coordinator() tf.initialize_all_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) plt.ion() try: if specific_model_path == None: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: specific_model_path = ckpt.model_checkpoint_path else: print('No checkpoint file found') return # Restores from checkpoint saver.restore(sess, specific_model_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(specific_model_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step,) total_sample_count = 0 step = 0 done = False while not coord.should_stop() and not done: t = time.time() outputs = sess.run(fetches) dt = time.time()-t locs = outputs[0] confs = outputs[1] imgs = outputs[2] gt_bboxes = outputs[3] gt_num_bboxes = outputs[4] print locs.shape print confs.shape for b in range(cfg.BATCH_SIZE): # Show the image image = imgs[b] plt.imshow((image * cfg.IMAGE_STD + cfg.IMAGE_MEAN).astype(np.uint8)) # Draw the GT Boxes in blue for i in range(gt_num_bboxes[b]): gt_bbox = gt_bboxes[b][i] xmin, ymin, xmax, ymax = gt_bbox * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'b-') # Draw the most confident boxes in red indices = np.argsort(confs[b].ravel())[::-1] for i, index in enumerate(indices[0:gt_num_bboxes[b]]): loc = locs[b][index].ravel() conf = confs[b][index] prior = np.array(bbox_priors[index]) xmin, ymin, xmax, ymax = (prior + loc) * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'r-') # Draw the prior box that was chosen xmin, ymin, xmax, ymax = prior * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'g-') print "Pred Confidence for box %d: %f" % (i, conf) plt.show() t = raw_input("push button") if t != '': done = True break plt.clf() except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads)
def weight_norm(u, norm=1.9356): in_norm = T.sqrt(T.sum(T.sqr(u), axis=0)) ratio = T.minimum(norm, in_norm) / (in_norm + 1e-8) return ratio * u def normalise_weights(updates): return [(p, weight_norm(u) if p.name.startswith('W') else u) for p, u in updates] if __name__ == "__main__": P = Parameters() extract, _ = model.build(P, "vrnn") X = T.tensor3('X') l = T.ivector('l') [Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std] = extract(X, l) parameters = P.values() batch_cost = model.cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std, l) print "Calculating gradient..." print parameters batch_size = T.cast(X.shape[1], 'float32') gradients = T.grad(batch_cost, wrt=parameters) gradients = [g / batch_size for g in gradients] gradients = clip(5, parameters, gradients)
from tensorflow.examples.tutorials.mnist import input_data from model import build # 재현을 위해 rand seed 설정 tf.set_random_seed(777) # load mnist data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) batch_size = 250 norm = 0.15 dropout_rate = tf.placeholder(tf.float32) true_labels = tf.placeholder(tf.float32, [None, 10]) data = tf.placeholder(tf.float32, [None, 28, 28, 1]) train, cost, accuracy = build(data, true_labels, dropout_rate) saver = tf.train.Saver() with tf.device('/gpu:0'): sess = tf.Session() sess.run(tf.global_variables_initializer()) saver.restore(sess, "model_9963/model.ckpt") xs, ys = mnist.test.images, mnist.test.labels j = 0 acc = 0 while len(xs) > j * batch_size: test_xs, test_ys = xs[j * batch_size:j * batch_size + batch_size], ys[ j * batch_size:j * batch_size + batch_size] acc += accuracy(sess, test_xs.reshape(len(test_ys), 28, 28, 1) - norm, test_ys)
logging.info("Loading model.") P.load(output_file) def build_validation_callback(P): def validation_callback(prev_score, curr_score): if curr_score < prev_score: save(P) return validation_callback if __name__ == "__main__": config.parse_args() P = Parameters() predict = model.build(P) X = T.matrix('X') layers, _ = predict(X) inputs = [X] + layers[:-1] # Make smaller. W = P["W_classifier_input_0"] W.set_value(W.get_value() / 4) Ws = [P["W_classifier_input_0"] ] + [P["W_classifier_%d" % i] for i in xrange(1, len(layers))] bs = [P["b_classifier_input"] ] + [P["b_classifier_%d" % i] for i in xrange(1, len(layers))] sizes = [W.get_value().shape[0] for W in Ws] train_fns = []
import os import sys import init import numpy as np import tensorflow as tf import init import model sess = tf.InteractiveSession() _x = tf.placeholder(tf.float32, (model.BATCH_SIZE, 32, 32, 3)) _y = tf.placeholder(tf.float32, (model.BATCH_SIZE, 10)) o = model.build(_x, _y) model.init(sess) lo = model.loss() train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(lo) correct_prediction = tf.equal(tf.argmax(o, 1), tf.argmax(_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) for i in range(0, 30000): x, y = init.genxy(model.BATCH_SIZE) feed_dict = {_x: x, _y: y} train_step.run(feed_dict=feed_dict) if i % 1000 == 0: print(accuracy.eval(feed_dict=feed_dict))
import io import logging from flask import Flask, current_app, request, jsonify from model import build pr = build() app = Flask(__name__) @app.route('/predict', methods=['POST']) def predict(): """ Send document, receive back entity with designation Use: curl -i -H "Content-Type: application/json" -X POST -d '{"data":"my printer does not work. Also my iPhone screen is cracked"}' http://0.0.0.0:8080/predict """ data = {} try: data = request.json['data'] except Exception: return jsonify(status_code='400', msg='Bad Request'), 400 predictions = pr.spanMatcher(data) current_app.logger.info('Predictions: %s', predictions) return jsonify(predictions=predictions) if __name__ == '__main__': app.run(host='0.0.0.0', port=8080, debug=True)
# -*- coding: utf-8 -*- import os, sys, time import numpy as np import model import vocab from config import config data_dir = "text" model_dir = "model" dataset, config.n_vocab, config.n_dataset = vocab.load(data_dir) lm = model.build() lm.load(model_dir) n_epoch = 1000 n_train = 5000 batchsize = 64 total_time = 0 # 長すぎるデータはメモリに乗らないこともあります max_length_of_chars = 100 # 学習初期は短い文章のみ学習し、徐々に長くしていきます。 # この機能が必要ない場合は最初から大きな値を設定します。 current_length_limit = 15 def make_batch(): target_batch_array = [] max_length_in_batch = 0 for b in xrange(batchsize): length = current_length_limit + 1 while length > current_length_limit:
parser = argparse.ArgumentParser(description='Train song embeddings.') parser.add_argument('--config', '-c', required=True, help='Config file') parser.add_argument('--ckpt', required=True, help='TensorFlow checkpoint file') parser.add_argument('--song_id', required=True, type=int, help='ID of the song to sample') parser.add_argument('--n_samples', type=int, default=100, help='Number of sequential samples to take') args = parser.parse_args() config = config.load(args.config) input_song_ids = tf.placeholder(tf.int32, [None]) target_feature_sequences = tf.placeholder( tf.float32, [None, None, config['num_features']], ) feature_outputs = model.build(config, 382, input_song_ids, target_feature_sequences) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, args.ckpt) print('Model restored.') outputs = [np.zeros((1, 1, config['num_features']))] # This is super inefficient since it does not use the known hidden states # and instead recomputes from scratch for i in range(args.n_samples): if (i + 1) % 50 == 0: print(outputs[-1])
def log_softmax(output): if output.owner.op == T.nnet.softmax_op: x = output.owner.inputs[0] k = T.max(x,axis=1,keepdims=True) sum_x = T.log(T.sum(T.exp(x - k),axis=1,keepdims=True)) + k print >> sys.stderr, "Stable log softmax" return x - sum_x else: return T.log(softmax) if __name__ == "__main__": config.parse_args() X = T.matrix('X') P = Parameters() predict = model.build(P) _,outputs = predict(X) counts = load_counts() predict = theano.function( inputs = [X], outputs = log_softmax(outputs) - T.log(counts/T.sum(counts)) ) if predict != None: stream = data_io.context(ark_io.parse_binary(sys.stdin),left=5,right=5) for name,frames in stream: ark_io.print_ark_binary(sys.stdout,name,predict(frames))
word = False max_time_step = data_handle.char_fixed_length else: char = False word = True max_time_step = data_handle.word_fixed_length print('data type: ', options.DataType) print('max time step: ', max_time_step) hidden_unit = [int(i) for i in options.HiddenUnit.split(' ')] print('hidden unit: ', hidden_unit) print('Epoch: ', options.Epoch) print('display while {0} step'.format(options.DisplayWhileNStep)) print('save while {0} step'.format(options.SaveWhileNStep)) print('val while {0} epoch'.format(options.ValWhileNEpoch)) model.build(embeding_len=embeding_len, batch_size=options.BatchSize, hidden_unit=hidden_unit, max_time_step=max_time_step) model.build_loss() model.train(epoch=options.Epoch, save_path=options.SavePath, save_while_n_step=options.SaveWhileNStep, val_while_n_epoch=options.ValWhileNEpoch, data=data_handle, char=char, word=word, display_shilw_n_step=options.DisplayWhileNStep, basic_lr=options.LearningRate, device=options.UseCpu) pass
import sys import data import model from theano_toolkit.parameters import Parameters from theano_toolkit import updates if __name__ == '__main__': model_filename = sys.argv[1] test_filename = sys.argv[2] train_filename = sys.argv[3] P = Parameters() data_X, df = data.load_test(test_filename, train_filename) f = model.build(P, input_size=data_X.shape[1], hidden_sizes=[256, 128, 64, 32] ) X = T.matrix('X') predict = theano.function( inputs=[X], outputs=f(X, test=True) > 0.5, ) P.load(model_filename) output = predict(data_X) print data_X.shape print output.shape print df.values.shape df['probs'] = predict(data_X) df['Class'] = 'b' df['Class'][df.probs > 0.5] = 's' df['RankOrder'] = df.probs.rank(ascending=False,method='first').astype(int)
# plt.imshow(X[0,:,:,0],cmap='gray') # plt.axis('off') # plt.subplot(2,2,2) # plt.imshow(Y[0,:,:,0],cmap='gray') # plt.axis('off') # plt.subplot(2,2,3) # plt.imshow(mX[0,:,:,0],cmap='gray') # plt.axis('off') # plt.subplot(2,2,4) # plt.imshow(mY[0,:,:,0],cmap='gray') # plt.axis('off') # plt.show() test_set = train_dataset.DataPipeLine(test_path, target_size=[240, 240], patch_size=[128, 128]) test_set = tf.data.Dataset.from_generator(test_set.generator,output_types=(tf.float32,tf.float32,tf.float32,tf.float32,tf.int32),output_shapes=([240,240],[240,240],[240,240],[240,240],[2,2]))\ .map(map_func,num_parallel_calls=num_threads)\ .batch(BATCH_SIZE)\ .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # for i,(X,Y) in enumerate(dataset): # print(i+1,X.shape,Y.dtype) model = model.CycleGAN(train_set=dataset, test_set=test_set, loss_name="WGAN-GP-SN", mixed_precision=True, learning_rate=1e-4, tmp_path=tmp_path, out_path=out_path) model.build(X_shape=[None, 128, 128, 1], Y_shape=[None, 128, 128, 1]) model.train(epoches=EPOCHES)
if compute_tree_exists: inputs,outputs,params,grads = pickle.load(open("compute_tree.pkl")) else: print "Creating compute tree...", P = Parameters() story = T.ivector('story') idxs = T.ivector('idxs') qstn = T.ivector('qstn') ans_evds = T.ivector('ans_evds') ans_lbl = T.iscalar('ans_lbl') attention = model.build(P, word_rep_size = 128, stmt_hidden_size = 128, diag_hidden_size = 128, vocab_size = vocab_size, output_size = vocab_size, map_fun_size = 128, evidence_count = evidence_count ) output_evds,output_ans = attention(story,idxs,qstn) cross_entropy = -T.log(output_ans[ans_lbl]) \ + -T.log(output_evds[0][ans_evds[0]]) \ + -T.log(output_evds[1][ans_evds[1]]) #cost += -T.log(ordered_probs(output_evds,ans_e.vds)) print "Done." print "Parameter count:", P.parameter_count() print "Calculating gradient expression...", params = P.values()
def test(tfrecords, bbox_priors, checkpoint_dir, specific_model_path, save_dir, max_detections, cfg): """ Args: tfrecords (list) : a list of file paths to tfrecords bbox_priors (list) : a list of [x1, y1, x2, y2] bounding box priors checkpoint_dir (str) : a directory path to where the checkpoints are stored specific_model_path (str) : a file path to a specific model save_dir (str) : a directory path where to store the detection results max_detections (int) : the maximum number of detections to store per image cfg (EasyDict) : configuration parameters """ # shape: [number of priors, 4] bbox_priors = np.array(bbox_priors) # Important to override the batch size to ensure its 1 cfg.BATCH_SIZE = 1 graph = tf.Graph() sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) sess = tf.Session(graph=graph, config=sess_config) with graph.as_default(), sess.as_default(): # A graph to read one image out of the tfrecord files single_image, single_image_id = single_image_input(tfrecords, cfg) # A graph to predict bounding boxes # The placeholder will be filled in with crops feed_image = tf.placeholder(tf.int8, [None, None, 3]) feed_image_height = tf.placeholder(tf.int32, []) feed_image_width = tf.placeholder(tf.int32, []) image_to_prep = tf.reshape(feed_image, tf.pack([feed_image_height, feed_image_width, 3])) float_image = tf.cast(image_to_prep, tf.float32) resized_image = tf.image.resize_images(float_image, cfg.INPUT_SIZE, cfg.INPUT_SIZE) resized_image -= cfg.IMAGE_MEAN resized_image /= cfg.IMAGE_STD resized_image = tf.expand_dims(resized_image, 0) features = model.build(graph, resized_image, cfg) locations, confidences = model.add_detection_heads( graph, features, num_bboxes_per_cell=5, batch_size=cfg.BATCH_SIZE, cfg=cfg ) # Restore the moving average variables for the conv filters, beta and gamma for # batch normalization and the softmax params ema = tf.train.ExponentialMovingAverage(decay=cfg.MOVING_AVERAGE_DECAY) shadow_vars = { ema.average_name(var) : var for var in graph.get_collection('conv_params') } shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('softmax_params') }) shadow_vars.update({ ema.average_name(var) : var for var in graph.get_collection('batchnorm_mean_var') }) # Restore the parameters saver = tf.train.Saver(shadow_vars, reshape=True) coord = tf.train.Coordinator() tf.initialize_all_variables().run() tf.initialize_local_variables().run() # This is needed for the filename_queue threads = tf.train.start_queue_runners(sess=sess, coord=coord) detection_results = [] if DEBUG: plt.ion() try: if specific_model_path == None: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: specific_model_path = ckpt.model_checkpoint_path else: print('No checkpoint file found') return # Restores from checkpoint saver.restore(sess, specific_model_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(specific_model_path.split('/')[-1].split('-')[-1]) print "Found model for global step: %d" % (global_step,) print_str = ', '.join([ 'Step: %d', 'Time/image (ms): %.1f' ]) step = 0 while not coord.should_stop(): if DEBUG: if step == 5: break start_time = time.time() image, image_id = sess.run([single_image, single_image_id]) image_dims = image.shape[:2] # Process the original sized image outputs = sess.run([locations, confidences], { feed_image : image, feed_image_height : image_dims[0], feed_image_width : image_dims[1] }) bboxes = outputs[0][0] + bbox_priors confs = outputs[1][0] # Restrict to the max number of detections sorted_idxs = np.argsort(confs.ravel())[::-1] sorted_idxs = sorted_idxs[:max_detections] all_bboxes = bboxes[sorted_idxs] all_confs = confs[sorted_idxs].ravel() # We could try to do some batching here.... # Now process the crops # Patch Dims, Stride Information patch_info = [ [(299, 299), (149, 149)], # [(185, 185), (93, 93)] ] for patch_dims, offset in patch_info: patches, patch_offsets = patch_utils.extract_patches(image, patch_dims, offset) for patch, patch_offset in zip(patches, patch_offsets): outputs = sess.run([locations, confidences], { feed_image : patch, feed_image_height : patch_dims[0], feed_image_width : patch_dims[1] }) # Get the predictions, don't forget that there is a batch size of 1 predicted_bboxes = outputs[0][0] + bbox_priors predicted_confs = outputs[1][0] # Keep only the predictions that are completely contained in the [0.1, 0.1, 0.9, 0.9] square # for this patch filtered_bboxes, filtered_confs = patch_utils.filter_proposals(predicted_bboxes, predicted_confs) # No valid predictions? if filtered_bboxes.shape[0] == 0: continue # Lets get rid of some of the predictions sorted_idxs = np.argsort(filtered_confs.ravel())[::-1] sorted_idxs = sorted_idxs[:max_detections] filtered_bboxes = filtered_bboxes[sorted_idxs] filtered_confs = filtered_confs[sorted_idxs] # Convert the bounding boxes to the original image dimensions converted_bboxes = patch_utils.convert_proposals( bboxes = filtered_bboxes, offset = patch_offset, patch_dims = patch_dims, image_dims = image_dims ) # Add the bboxes and confs to our collection all_bboxes = np.vstack([all_bboxes, converted_bboxes]) all_confs = np.hstack([all_confs, filtered_confs.ravel()]) # Lets make sure the coordinates are in the proper order # Its interesting that we don't enforce this anywhere in the model proper_bboxes = [] for loc in all_bboxes: pred_xmin, pred_ymin, pred_xmax, pred_ymax = loc # Not sure what we want to do here. Its interesting that we don't enforce this anywhere in the model if pred_xmin > pred_xmax: t = pred_xmax pred_xmax = pred_xmin pred_xmin = t if pred_ymin > pred_ymax: t = pred_ymax pred_ymax = pred_ymin pred_ymin = t proper_bboxes.append([pred_xmin, pred_ymin, pred_xmax, pred_ymax]) all_bboxes = np.array(proper_bboxes) if DEBUG: plt.figure('all detections') plt.clf() plt.imshow(imresize(image, [299, 299])) r = "" b = 0 while r == "": xmin, ymin, xmax, ymax = all_bboxes[b] * np.array([299, 299, 299, 299]) print "BBox: [%0.3f, %0.3f, %0.3f, %0.3f]" % (xmin, ymin, xmax, ymax) print "Conf: ", all_confs[b] plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'b-') plt.show() b+= 1 r = raw_input("push button") print "Number of total bboxes: %d" % (all_bboxes.shape[0], ) # Lets do non-maximum suppression for the final predictions locs, confs = patch_utils.non_max_suppression(all_bboxes, all_confs, jaccard_threshold=0.85) # Now process the predictions indices = np.argsort(confs.ravel())[::-1] if DEBUG: print "Number of nms bboxes: %d" % (locs.shape[0], ) print "Removed %d detections" % (all_bboxes.shape[0] - locs.shape[0],) print "Sorted confidences: %s" % (confs[indices][:10],) plt.figure('top detections') plt.clf() plt.imshow(imresize(image, [299, 299])) r = "" b = 0 while r == "": xmin, ymin, xmax, ymax = locs[indices[b]] * np.array([299, 299, 299, 299]) print "BBox: [%0.3f, %0.3f, %0.3f, %0.3f]" % (xmin, ymin, xmax, ymax) print "Conf: ", confs[indices[b]] plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'b-') plt.show() b+= 1 r = raw_input("push button") for index in indices[0:max_detections]: loc = locs[index].ravel() conf = confs[index] pred_xmin, pred_ymin, pred_xmax, pred_ymax = loc detection_results.append({ "image_id" : int(image_id), # converts from np.array "bbox" : [pred_xmin, pred_ymin, pred_xmax, pred_ymax], "score" : float(conf), # converts from np.array }) dt = time.time()-start_time step += 1 print print_str % (step, (dt / cfg.BATCH_SIZE) * 1000) except tf.errors.OutOfRangeError as e: pass coord.request_stop() coord.join(threads) # save the results save_path = os.path.join(save_dir, "dense-results-%d.json" % global_step) with open(save_path, 'w') as f: json.dump(detection_results, f)