示例#1
0
def get_batch(dataset_dir,
			  num_readers,
			  batch_size,
			  out_shape,
			  net,
			  anchors,
			  num_preprocessing_threads,
			  file_pattern = '*.tfrecord',
			  is_training = True):
	
	dataset = sythtextprovider.get_datasets(dataset_dir,file_pattern = file_pattern)

	provider = slim.dataset_data_provider.DatasetDataProvider(
				dataset,
				num_readers=num_readers,
				common_queue_capacity=20 * batch_size,
				common_queue_min=10 * batch_size,
				shuffle=True)
	
	[image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
											 'object/label',
											 'object/bbox'])

	image, glabels, gbboxes,num = \
	ssd_vgg_preprocessing.preprocess_image(image,  glabels,gbboxes, 
											out_shape,is_training=is_training)

	gclasses, glocalisations, gscores = \
	net.bboxes_encode( glabels, gbboxes, anchors, num)

	batch_shape = [1] + [len(anchors)] * 3


	r = tf.train.batch(
		tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
		batch_size=batch_size,
		num_threads=num_preprocessing_threads,
		capacity=5 * batch_size)

	b_image, b_gclasses, b_glocalisations, b_gscores= \
		tf_utils.reshape_list(r, batch_shape)

	return [b_image, b_gclasses, b_glocalisations, b_gscores]
示例#2
0
def get_batch(dataset_dir,
              num_readers,
              batch_size,
              out_shape,
              net,
              anchors,
              FLAGS,
              file_pattern='*.tfrecord',
              is_training=True,
              shuffe=False):

    dataset = sythtextprovider.get_datasets(dataset_dir,
                                            file_pattern=file_pattern)

    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=num_readers,
        common_queue_capacity=512 * 16 + 20 * batch_size,
        common_queue_min=512 * 16,
        shuffle=shuffe)

    [image, shape, glabels, gbboxes, height, width] = provider.get(
        ['image', 'shape', 'object/label', 'object/bbox', 'height', 'width'])

    if is_training:
        image, glabels, gbboxes,num = \
        txt_preprocessing.preprocess_image(image,  glabels, gbboxes, height, width,
                 out_shape,use_whiten=FLAGS.use_whiten,is_training=is_training)

        glocalisations, gscores = \
         net.bboxes_encode( gbboxes, anchors, num)

        batch_shape = [1] + [len(anchors)] * 2

        r = tf.train.shuffle_batch(tf_utils.reshape_list(
            [image, glocalisations, gscores]),
                                   batch_size=batch_size,
                                   num_threads=FLAGS.num_preprocessing_threads,
                                   capacity=100 * batch_size,
                                   min_after_dequeue=50 * batch_size)

        b_image, b_glocalisations, b_gscores= \
         tf_utils.reshape_list(r, batch_shape)

        return b_image, b_glocalisations, b_gscores

    else:
        image, glabels, gbboxes,bbox_img, num = \
        txt_preprocessing.preprocess_image(image,  glabels,gbboxes, height,width,
                out_shape,use_whiten=FLAGS.use_whiten,is_training=is_training)

        glocalisations, gscores = \
         net.bboxes_encode( gbboxes, anchors, num)
        batch_shape = [1] * 4 + [len(anchors)] * 2
        r = tf.train.batch(tf_utils.reshape_list(
            [image, glabels, gbboxes, bbox_img, glocalisations, gscores]),
                           batch_size=batch_size,
                           num_threads=FLAGS.num_preprocessing_threads,
                           capacity=50 * batch_size,
                           dynamic_pad=True)

        image, glabels, gbboxes,g_bbox_img,glocalisations, gscores = \
         tf_utils.reshape_list(r, batch_shape)

        return image, glabels, gbboxes, g_bbox_img, glocalisations, gscores
def main(_):
	if not FLAGS.dataset_dir:
		raise ValueError('You must supply the dataset directory with --dataset_dir')

	tf.logging.set_verbosity(tf.logging.DEBUG)
	with tf.Graph().as_default():
		# Config model_deploy. Keep TF Slim Models structure.
		# Useful if want to need multiple GPUs and/or servers in the future.
		deploy_config = model_deploy.DeploymentConfig(
			num_clones=FLAGS.num_clones,
			clone_on_cpu=FLAGS.clone_on_cpu,
			replica_id=0,
			num_replicas=1,
			num_ps_tasks=0)
		# Create global_step.
		with tf.device(deploy_config.variables_device()):
			global_step = slim.create_global_step()

		# Select the dataset.

		#dataset = dataset_factory.get_dataset(
		#	FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
		dataset = sythtextprovider.get_datasets(FLAGS.dataset_dir)
		# Get the SSD network and its anchors.

		#ssd_class = nets_factory.get_network(FLAGS.model_name)
		#ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
		text_net = txtbox_300.TextboxNet()
		text_shape = text_net.params.img_shape
		print 'text_shape '+  str(text_shape)
		text_anchors = text_net.anchors(text_shape)
		print len(text_anchors)
		# Select the preprocessing function.
		'''
		preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
		image_preprocessing_fn = preprocessing_factory.get_preprocessing(
			preprocessing_name, is_training=True)
		'''
		#tf_utils.print_configuration(FLAGS.__flags, ssd_params,
		#							 dataset.data_sources, FLAGS.train_dir)
		# =================================================================== #
		# Create a dataset provider and batches.
		# =================================================================== #
		with tf.device(deploy_config.inputs_device()):
			with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
				provider = slim.dataset_data_provider.DatasetDataProvider(
					dataset,
					num_readers=FLAGS.num_readers,
					common_queue_capacity=20 * FLAGS.batch_size,
					common_queue_min=10 * FLAGS.batch_size,
					shuffle=True)
			# Get for SSD network: image, labels, bboxes.
			[image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
															 'object/label',
															 'object/bbox'])
		
			init_op = tf.global_variables_initializer()

			# Pre-processing image, labels and bboxes.

			image, glabels, gbboxes,num = \
				ssd_vgg_preprocessing.preprocess_image(image,  glabels,gbboxes, 
														text_shape,is_training=True,
														data_format='NHWC')

			# Encode groundtruth labels and bboxes.
			print 'bboxes num' + str(gbboxes.get_shape())
			print 'glabes' + str(tf.shape(glabels))
			glocalisations, gscores = \
				text_net.bboxes_encode( gbboxes, text_anchors,num)
			batch_shape = [1] + [len(text_anchors)] * 2

			# Training batches and queue.

			r = tf.train.batch(
				tf_utils.reshape_list([image, glocalisations, gscores]),
				batch_size=FLAGS.batch_size,
				num_threads=FLAGS.num_preprocessing_threads,
				capacity=5 * FLAGS.batch_size)
			print 'r shape' + str(r[0]) + str(r[1]) + str(r[10])
			b_image, b_glocalisations, b_gscores= \
				tf_utils.reshape_list(r, batch_shape)


			# Intermediate queueing: unique batch computation pipeline for all
			# GPUs running the training.
			batch_queue = slim.prefetch_queue.prefetch_queue(
				tf_utils.reshape_list([b_image, b_glocalisations,b_gscores]),
				capacity=2 * deploy_config.num_clones)
			

		# =================================================================== #
		# Define the model running on every GPU.
		# =================================================================== #
		def clone_fn(batch_queue):
			
			#Allows data parallelism by creating multiple
			#clones of network_fn. 
			
			# Dequeue batch.
			b_image, b_glocalisations, b_gscores = \
				tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

			# Construct SSD network.
			arg_scope = text_net.arg_scope(weight_decay=FLAGS.weight_decay)
			with slim.arg_scope(arg_scope):
				localisations, logits, end_points = \
					text_net.net(b_image, is_training=True)
			# Add loss function.
			text_net.losses(logits, localisations,
						   b_glocalisations, b_gscores,
						   match_threshold=FLAGS.match_threshold,
						   negative_ratio=FLAGS.negative_ratio,
						   alpha=FLAGS.loss_alpha,
						   label_smoothing=FLAGS.label_smoothing)
			return end_points

		# Gather initial summaries.
		summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

		# =================================================================== #
		# Add summaries from first clone.
		# =================================================================== #
		clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
		first_clone_scope = deploy_config.clone_scope(0)
		# Gather update_ops from the first clone. These contain, for example,
		# the updates for the batch_norm variables created by network_fn.
		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

		# Add summaries for end_points.
		end_points = clones[0].outputs
		for end_point in end_points:
			x = end_points[end_point]
			summaries.add(tf.summary.histogram('activations/' + end_point, x))
			summaries.add(tf.summary.scalar('sparsity/' + end_point,
											tf.nn.zero_fraction(x)))
		# Add summaries for losses and extra losses.
		for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
			summaries.add(tf.summary.scalar(loss.op.name, loss))
		for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
			summaries.add(tf.summary.scalar(loss.op.name, loss))

		# Add summaries for variables.
		for variable in slim.get_model_variables():
			summaries.add(tf.summary.histogram(variable.op.name, variable))

		# =================================================================== #
		# Configure the moving averages.
		# =================================================================== #
		if FLAGS.moving_average_decay:
			moving_average_variables = slim.get_model_variables()
			variable_averages = tf.train.ExponentialMovingAverage(
				FLAGS.moving_average_decay, global_step)
		else:
			moving_average_variables, variable_averages = None, None

		# =================================================================== #
		# Configure the optimization procedure.
		# =================================================================== #
		with tf.device(deploy_config.optimizer_device()):
			learning_rate = tf_utils.configure_learning_rate(FLAGS,
															 dataset.num_samples,
															 global_step)
			optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
			summaries.add(tf.summary.scalar('learning_rate', learning_rate))

		if FLAGS.moving_average_decay:
			# Update ops executed locally by trainer.
			update_ops.append(variable_averages.apply(moving_average_variables))

		# Variables to train.
		variables_to_train = tf_utils.get_variables_to_train(FLAGS)

		# and returns a train_tensor and summary_op
		total_loss, clones_gradients = model_deploy.optimize_clones(
			clones,
			optimizer,
			var_list=variables_to_train)
		# Add total_loss to summary.
		summaries.add(tf.summary.scalar('total_loss', total_loss))

		# Create gradient updates.
		grad_updates = optimizer.apply_gradients(clones_gradients,
												 global_step=global_step)
		update_ops.append(grad_updates)
		update_op = tf.group(*update_ops)
		train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
														  name='train_op')

		# Add the summaries from the first clone. These contain the summaries
		summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
										   first_clone_scope))
		# Merge all summaries together.
		summary_op = tf.summary.merge(list(summaries), name='summary_op')

		# =================================================================== #
		# Kicks off the training.
		# =================================================================== #
		gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
		config = tf.ConfigProto(log_device_placement=False,
								gpu_options=gpu_options,
								allow_soft_placement = True)
		saver = tf.train.Saver(max_to_keep=5,
							   keep_checkpoint_every_n_hours=1.0,
							   write_version=2,
							   pad_step_number=False)
		slim.learning.train(
			train_tensor,
			logdir=FLAGS.train_dir,
			master='',
			is_chief=True,
			init_fn=tf_utils.get_init_fn(FLAGS),
			summary_op=summary_op,
			number_of_steps=FLAGS.max_number_of_steps,
			log_every_n_steps=FLAGS.log_every_n_steps,
			save_summaries_secs=FLAGS.save_summaries_secs,
			saver=saver,
			save_interval_secs=FLAGS.save_interval_secs,
			session_config=config,
			sync_optimizer=None)
示例#4
0
def get_batch(dataset_dir,
              num_readers,
              batch_size,
              out_shape,
              net,
              anchors,
              FLAGS,
              file_pattern='*.tfrecord',
              is_training=True,
              shuffe=False):

    dataset = sythtextprovider.get_datasets(dataset_dir,
                                            file_pattern=file_pattern)

    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=num_readers,
        common_queue_capacity=20 * batch_size,
        common_queue_min=10 * batch_size,
        shuffle=shuffe)

    [image, shape, glabels, gbboxes, corx, cory] = provider.get([
        'image', 'shape', 'object/label', 'object/bbox', 'object/corx',
        'object/cory'
    ])
    corx = tf.expand_dims(corx, -1)
    cory = tf.expand_dims(cory, -1)

    cord = tf.concat([corx, cory], -1)

    if is_training:
        image, glabels, gbboxes, cord, num = \
        txt_preprocessing.preprocess_image(image,  glabels,gbboxes, cord,
                   out_shape,is_training=is_training)

        glocalisations, glabels, glinks = \
         net.bboxes_encode(cord, anchors ,num)

        batch_shape = [1] + [len(anchors)] * 3

        r = tf.train.batch(
            tf_utils.reshape_list([image, glocalisations, glabels, glinks]),
            batch_size=batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * batch_size,
        )

        b_image, b_glocalisations, b_glabels, b_glinks= \
         tf_utils.reshape_list(r, batch_shape)

        return b_image, b_glocalisations, b_glabels, b_glinks
    else:
        image, labels, bboxes, cord, num = \
        txt_preprocessing.preprocess_image(image,  glabels,gbboxes, cord,
                out_shape,is_training=is_training)

        glocalisations, glabels, glinks = \
         net.bboxes_encode(cord, anchors ,num)

        batch_shape = [1] * 3 + [len(anchors)] * 3

        r = tf.train.batch(
            tf_utils.reshape_list(
                [image, labels, cord, glocalisations, glabels, glinks]),
            batch_size=batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * batch_size,
        )

        b_image, b_labels, b_cord, b_glocalisations, b_glabels, b_glinks= \
         tf_utils.reshape_list(r, batch_shape)

        return b_image, b_labels, b_cord, b_glocalisations, b_glabels, b_glinks
示例#5
0
def run():
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    print('-----start test-------')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    with tf.device('/GPU:0'):
        dataset = sythtextprovider.get_datasets(FLAGS.dataset_dir)
        print(dataset)
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=FLAGS.num_readers,
            common_queue_capacity=20 * FLAGS.batch_size,
            common_queue_min=10 * FLAGS.batch_size,
            shuffle=True)
        print('provider:', provider)
        [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3,
         y4] = provider.get([
             'image', 'shape', 'object/label', 'object/bbox',
             'object/oriented_bbox/x1', 'object/oriented_bbox/x2',
             'object/oriented_bbox/x3', 'object/oriented_bbox/x4',
             'object/oriented_bbox/y1', 'object/oriented_bbox/y2',
             'object/oriented_bbox/y3', 'object/oriented_bbox/y4'
         ])
        print('image:', image)
        print('shape:', shape)
        print('glabel:', glabels)
        print('gboxes:', gbboxes)

        gxs = tf.transpose(tf.stack([x1, x2, x3, x4]))  #shape = (N,4)
        gys = tf.transpose(tf.stack([y1, y2, y3, y4]))

        image = tf.identity(image, 'input_image')
        text_shape = (384, 384)
        image, glabels, gbboxes, gxs, gys = ssd_vgg_preprocessing.preprocess_image(
            image,
            glabels,
            gbboxes,
            gxs,
            gys,
            text_shape,
            is_training=True,
            data_format='NHWC')

        x1, x2, x3, x4 = tf.unstack(gxs, axis=1)
        y1, y2, y3, y4 = tf.unstack(gys, axis=1)

        text_net = txtbox_384.TextboxNet()
        text_anchors = text_net.anchors(text_shape)
        e_localisations, e_scores, e_labels = text_net.bboxes_encode(
            glabels, gbboxes, text_anchors, gxs, gys)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)
        j = 0
        all_time = 0
        try:
            while not coord.should_stop() and j < show_pic_sum:
                start_time = time.time()
                image_sess, label_sess, gbbox_sess, x1_sess, x2_sess, x3_sess, x4_sess, y1_sess, y2_sess, y3_sess, y4_sess, p_localisations, p_scores, p_labels = sess.run(
                    [
                        image, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3,
                        y4, e_localisations, e_scores, e_labels
                    ])
                end_time = time.time() - start_time
                all_time += end_time
                image_np = image_sess
                # print(image_np)
                # print('label_sess:',label_sess)

                p_labels_concat = np.concatenate(p_labels)
                p_scores_concat = np.concatenate(p_scores)
                debug = False
                if debug is True:
                    print(p_labels)
                    print('l_labels:',
                          len(p_labels_concat[p_labels_concat.nonzero()]),
                          p_labels_concat[p_labels_concat.nonzero()])
                    print('p_socres:',
                          len(p_scores_concat[p_scores_concat.nonzero()]),
                          p_scores_concat[p_scores_concat.nonzero()])
                    # print(img_np.shape)

                    print('label_sess:',
                          np.array(list(label_sess)).shape, list(label_sess))
                img_np = np.array(image_np)
                cv2.imwrite('{}/{}.png'.format(save_dir, j), img_np)
                img_np = cv2.imread('{}/{}.png'.format(save_dir, j))

                h, w, d = img_np.shape

                label_sess = list(label_sess)
                # for i , label in enumerate(label_sess):
                i = 0
                num_correct = 0

                for label in label_sess:
                    # print(int(label) == 1)
                    if int(label) == 1:
                        num_correct += 1
                        img_np = draw_polygon(img_np, x1_sess[i] * w,
                                              y1_sess[i] * h, x2_sess[i] * w,
                                              y2_sess[i] * h, x3_sess[i] * w,
                                              y3_sess[i] * h, x4_sess[i] * w,
                                              y4_sess[i] * h)
                    if int(label) == 0:
                        img_np = draw_polygon(img_np,
                                              x1_sess[i] * w,
                                              y1_sess[i] * h,
                                              x2_sess[i] * w,
                                              y2_sess[i] * h,
                                              x3_sess[i] * w,
                                              y3_sess[i] * h,
                                              x4_sess[i] * w,
                                              y4_sess[i] * h,
                                              color=(0, 0, 255))
                    i += 1
                img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
                cv2.imwrite(
                    '{}'.format(os.path.join(save_dir,
                                             str(j) + '.png')), img_np)
                j += 1
                print('correct:', num_correct)
        except tf.errors.OutOfRangeError:
            print('done')
        finally:
            print('done')
            coord.request_stop()
        print('all time:', all_time, 'average:', all_time / show_pic_sum)
        coord.join(threads=threads)
示例#6
0
    'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
    #'image/object/bbox/label_text' : tf.VarLenFeature(dtype=tf.string),
    'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
    'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
}, name='features')
# image was saved as uint8, so we have to decode as uint8.

image = tf.decode_raw(tfrecord_features['image/encoded'], tf.uint8)
shape = tf.cast(tfrecord_features['image/shape'], tf.int64)
#image = tf.reshape(image, shape)
height = tf.cast(tfrecord_features['image/height'],tf.int64)
width = tf.cast(tfrecord_features['image/width'],tf.int64)

"""
dataset_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'
dataset = get_datasets(dataset_dir)

provider = slim.dataset_data_provider.DatasetDataProvider(
    dataset,
    num_readers=1,
    common_queue_capacity=20 * 32,
    common_queue_min=10 * 32,
    shuffle=True)
# Get for SSD network: image, labels, bboxes.
[
    image,
    shape,
    height,
    width,
    glabels,
    gbboxes,
示例#7
0
for min_s in min_scala:
    for max_s in max_scala:
        scales = [min_s + i * (max_s - min_s) / 6 for i in range(7)]
        anchor_sizes = [(512 * scales[i], 512 * scales[i] + 50)
                        for i in range(7)]
        with tf.Graph().as_default():
            # build a net
            params = txtbox512.TextboxNet.default_params
            params = params._replace(anchor_sizes=anchor_sizes)
            text_net = txtbox512.TextboxNet(params)
            text_shape = text_net.params.img_shape
            print 'text_shape ' + str(text_shape)
            text_anchors = text_net.anchors(text_shape)

            ## dataset provider
            dataset = sythtextprovider.get_datasets('../data/ICDAR2013/',
                                                    file_pattern='*.tfrecord')

            data_provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset, common_queue_capacity=32, common_queue_min=2)

            [image, shape, glabels, gbboxes] = \
            data_provider.get(['image', 'shape',
                 'object/label',
                 'object/bbox'])

            dst_image, glabels, gbboxes,num = \
            txt_preprocessing.preprocess_image(image,  glabels,gbboxes,
                      text_shape,is_training=True)

            glocalisations, gscores = \
            text_net.bboxes_encode( gbboxes, text_anchors, num)