def infer(self, image_path): import numpy as np from PIL import Image if self.config is None: tf.logging.error('Configuration is None') return None model_name = self.config['model_name'] checkpoint_path = self.config['checkpoint_path'] labels_to_names = None if dataset_utils.has_labels(checkpoint_path, 'label_map.txt'): labels_to_names = dataset_utils.read_label_file(checkpoint_path, 'label_map.txt') else: tf.logging.error('No label map') return None keys = list(labels_to_names.keys()) with tf.Graph().as_default(): image_string = tf.read_file(image_path) image = tf.image.decode_jpeg(image_string, channels=3) image_preprocessing_fn = preprocessing_factory.get_preprocessing( model_name, is_training=False) network_fn = nets_factory.get_network_fn( model_name, num_classes=len(keys), is_training=False) processed_image = image_preprocessing_fn(image, network_fn.default_image_size, network_fn.default_image_size) image_expanded = tf.expand_dims(processed_image, axis=0) logits, _ = network_fn(image_expanded) probabilites = tf.nn.softmax(logits) predictions = tf.argmax(logits, 1) model_path = tf.train.latest_checkpoint(checkpoint_path) init_fn = slim.assign_from_checkpoint_fn(model_path, slim.get_model_variables(scope_map[model_name])) with tf.Session() as sess: init_fn(sess) probs, pred = sess.run([probabilites, predictions]) result =[] for i in range(len(probs[0])): result.append({'type': labels_to_names[keys[i]], 'prob': str(probs[0][i])}) sorted_result = sorted(result, key=lambda k: float(k['prob']), reverse=True) return sorted_result
def eval(self): # config = self.config # if config is None: # tf.logging.error('There is no input configurations.') # return # with open(config['eval_configs']) as f: # eval_configs = json.load(f) # eval_configs['batch_size'] = int(config['batch_size']) # eval_configs['model_name'] = config['model_name'] # eval_configs['dataset_dir'] = config['data_dir'] # eval_configs['checkpoint_path'] = config['checkpoint_path'] # eval_configs['eval_dir'] = config['eval_dir'] #self.create_tf_data(ratio=0) eval_configs = {} eval_configs['batch_size'] = 100 eval_configs['model_name'] = 'inception_v1' eval_configs['dataset_dir'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/data/classification/' eval_configs['checkpoint_path'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/model/classification/inception_v1/' eval_configs['val_dir'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/log/' eval_configs['num_preprocessing_threads'] = 4 eval_configs['labels_offset'] = 0 eval_configs['moving_average_decay'] = None eval_configs['max_num_batches'] = None eval_configs['master'] = '' eval_configs['preprocessing_name'] = '' eval_configs['eval_image_size'] = None os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" self.num_classes = 5 self.splits_to_sizes = {'train': 3320, 'val': 350} self.items_to_descriptions = {'image': 'A color image of varying size.', 'label': 'A single integer between 0 and 4'} if not eval_configs['dataset_dir']: raise ValueError('You must supply the dataset directory with --dataset_dir') with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = self.get_dataset('train', eval_configs['dataset_dir']) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( eval_configs['model_name'], num_classes=(dataset.num_classes - eval_configs['labels_offset']), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * eval_configs['batch_size'], common_queue_min=eval_configs['batch_size']) [image, label] = provider.get(['image', 'label']) label -= eval_configs['labels_offset'] ##################################### # Select the preprocessing function # ##################################### preprocessing_name = eval_configs['preprocessing_name'] or eval_configs['model_name'] image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = eval_configs['eval_image_size'] or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=eval_configs['batch_size'], num_threads=eval_configs['num_preprocessing_threads'], capacity=5 * eval_configs['batch_size']) #################### # Define the model # #################### logits, _ = network_fn(images) if eval_configs['moving_average_decay']: variable_averages = tf.train.ExponentialMovingAverage( eval_configs['moving_average_decay'], tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k( logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if eval_configs['max_num_batches']: num_batches = eval_configs['max_num_batches'] else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(eval_configs['batch_size'])) if tf.gfile.IsDirectory(eval_configs['checkpoint_path']): checkpoint_path = tf.train.latest_checkpoint(eval_configs['checkpoint_path']) else: checkpoint_path = eval_configs['checkpoint_path'] tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=eval_configs['master'], checkpoint_path=checkpoint_path, logdir=eval_configs['val_dir'], num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def train(self): config = self.config if config is None: tf.logging.error('There is no input configurations.') return try: with open(config['training_configs']) as f: training_configs = json.load(f) training_configs['tf_configs']['train_dir'] = config['train_dir'] training_configs['tf_configs']['log_every_n_steps'] = int(config['log_every_n_steps']) training_configs['optimization_params']['optimizer'] = config['optimizer'] training_configs['learning_rate_params']['learning_rate'] = float(config['learning_rate']) training_configs['dataset_params']['batch_size'] = int(config['batch_size']) training_configs['dataset_params']['model_name'] = config['model_name'] training_configs['dataset_params']['dataset_dir'] = config['data_dir'] training_configs['fine_tuning_params']['checkpoint_path'] = config['fine_tuning_ckpt_path'] if training_configs['fine_tuning_params']['checkpoint_path'] is not None: training_configs['fine_tuning_params']['checkpoint_exclude_scopes'] = \ exclude_scopes_map[training_configs['dataset_params']['model_name']].format(\ scope_map[training_configs['dataset_params']['model_name']], \ scope_map[training_configs['dataset_params']['model_name']]) training_configs['fine_tuning_params']['trainable_scopes'] = \ exclude_scopes_map[training_configs['dataset_params']['model_name']].format(\ scope_map[training_configs['dataset_params']['model_name']], \ scope_map[training_configs['dataset_params']['model_name']]) self.training_configs = training_configs with tf.Graph().as_default(): # use only one gpu os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(1) # from tensorflow.python.client import device_lib # local_device_protos = device_lib.list_local_devices() # create tf_record data # self.create_tf_data() self.num_classes = 5 self.splits_to_sizes = {'train': 3320, 'val': 350} self.items_to_descriptions = {'image': 'A color image of varying size.', 'label': 'A single integer between 0 and 4'} ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=training_configs['tf_configs']['num_clones'], clone_on_cpu=training_configs['tf_configs']['clone_on_cpu'], replica_id=training_configs['tf_configs']['task'], num_replicas=training_configs['tf_configs']['worker_replicas'], num_ps_tasks=training_configs['tf_configs']['num_ps_tasks']) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = self.get_dataset('train', training_configs['dataset_params']['dataset_dir']) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( training_configs['dataset_params']['model_name'], num_classes=(dataset.num_classes - training_configs['dataset_params']['label_offset']), weight_decay=training_configs['optimization_params']['weight_decay'], is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = training_configs['dataset_params']['preprocessing_name'] or training_configs['dataset_params']['model_name'] image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=training_configs['tf_configs']['num_readers'], common_queue_capacity=20 * training_configs['dataset_params']['batch_size'], common_queue_min=10 * training_configs['dataset_params']['batch_size']) [image, label] = provider.get(['image', 'label']) label -= training_configs['dataset_params']['label_offset'] train_image_size = training_configs['dataset_params']['train_image_size'] or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=training_configs['dataset_params']['batch_size'], num_threads=training_configs['tf_configs']['num_preprocessing_threads'], capacity=5 * training_configs['dataset_params']['batch_size']) labels = slim.one_hot_encoding( labels, dataset.num_classes - training_configs['dataset_params']['label_offset']) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=training_configs['learning_rate_params']['label_smoothing'], weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=training_configs['learning_rate_params']['label_smoothing'], weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if training_configs['learning_rate_params']['moving_average_decay']: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( training_configs['learning_rate_params']['moving_average_decay'], global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = self._configure_learning_rate(dataset.num_samples, global_step) optimizer = self._configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if training_configs['learning_rate_params']['sync_replicas']: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=training_configs['learning_rate_params']['replicas_to_aggregate'], total_num_replicas=training_configs['tf_configs']['worker_replicas'], variable_averages=variable_averages, variables_to_average=moving_average_variables) elif training_configs['learning_rate_params']['moving_average_decay']: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = self._get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') train_dir = training_configs['tf_configs']['train_dir'] if not os.path.exists(train_dir): os.makedirs(train_dir) copy(training_configs['dataset_params']['dataset_dir'] + 'label_map.txt', training_configs['tf_configs']['train_dir']) ## weblog_dir = config['weblog_dir'] if not os.path.exists(weblog_dir): os.makedirs(weblog_dir) logger = Logger('Training Monitor') ########################### # Kicks off the training. # ########################### learning.train( train_tensor, logdir=train_dir, master=training_configs['tf_configs']['master'], is_chief=(training_configs['tf_configs']['task'] == 0), init_fn=self._get_init_fn(), summary_op=summary_op, log_every_n_steps=training_configs['tf_configs']['log_every_n_steps'], save_summaries_secs=training_configs['tf_configs']['save_summaries_secs'], save_interval_secs=training_configs['tf_configs']['save_interval_secs'], sync_optimizer=optimizer if training_configs['learning_rate_params']['sync_replicas'] else None, logger=logger, weblog_dir=weblog_dir) except: tf.logging.error('Unexpected error')