示例#1
0
def get_bottleneck_tensor(input_jpeg_str):
    # type: tf.Tensor -> tf.Tensor
    """Calculates the bottleneck tensor for input JPEG string tensor.

  This function will resize/encode the image as required by Inception V3 model.
  Then it will run it through the InceptionV3 checkpoint to calculate
  bottleneck values.

  Args:
    input_jpeg_str: Tensor for input JPEG image.

  Returns:
    bottleneck_tensor: Tensor for output bottleneck Tensor.
  """
    module_spec = tensorflow_hub.load_module_spec(_FEATURE_VECTORS_MODULE_URL)
    input_height, input_width = tensorflow_hub.get_expected_image_size(
        module_spec)
    input_depth = tensorflow_hub.get_num_image_channels(module_spec)
    decoded_image = tf.image.decode_jpeg(input_jpeg_str, channels=input_depth)
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image_4d = tf.image.resize_bilinear(decoded_image_4d,
                                                resize_shape_as_int)
    m = tensorflow_hub.Module(module_spec)
    bottleneck_tensor = m(resized_image_4d)
    return bottleneck_tensor
示例#2
0
def extract_inception_bottleneck_features(images_train, images_test):
    # Combine train & test set to single set of images
    images = images_train + images_test

    print('Extracting inception bottleneck features...')
    module_spec = hub.load_module_spec(TFHUB_INCEPTION_V3_MODULE_SPEC_URL)
    module = hub.Module(module_spec)
    (image_height, image_width) = hub.get_expected_image_size(module)
    images = [tf.image.convert_image_dtype(x, tf.float32) for x in images]
    images = [
        tf.image.resize_images(x, (image_height, image_width)) for x in images
    ]
    sess = tf.Session()
    m = hub.Module(module_spec)
    X = []
    sess.run(tf.global_variables_initializer())
    batches = [
        images[i:i + BATCH_SIZE] for i in range(0, len(images), BATCH_SIZE)
    ]
    for batch in batches:
        bottleneck_tensors = m(batch)
        x_batch = sess.run(bottleneck_tensors)
        X.extend(x_batch)

    # Recover train & test set
    X_train = X[:len(images_train)]
    X_test = X[len(images_train):]
    return (X_train, X_test)
示例#3
0
文件: models.py 项目: diesendruck/pv
def tfhub_encoder(x, dropout_pr=1.0):
    """Applies TFHub encoder to batch of images.

    Args:
        x: Images on [0, 255] sized (batch_size, scale_size, scale_size, 3).

    Returns:
        enc_x: Encodings sized (batch_size, encoding_size).
    """

    x = x / 255.
    #module_spec_str = ('https://tfhub.dev/google/imagenet/inception_v3/'
    #                   'feature_vector/1')
    # This module takes (224, 224) and encodes to (1280).
    module_spec_str = (
        'https://tfhub.dev/google/imagenet/mobilenet_v2_035_224/'
        'feature_vector/2')
    module_spec = hub.load_module_spec(module_spec_str)
    height, width = hub.get_expected_image_size(module_spec)
    assert x.shape[1] == height, 'height is {}. Must be {}'.format(
        x.shape[1], height)
    assert x.shape[2] == width, 'width is {}. Must be {}'.format(
        x.shape[2], width)

    module = hub.Module(module_spec)
    embedding_tensor = module(x)

    batch_size, embedding_tensor_size = embedding_tensor.get_shape().as_list()
    #assert batch_size is None, 'We want to work with arbitrary batch size.'

    return embedding_tensor
示例#4
0
def run(constant_overwrites):
    config_path = os.path.join(os.path.dirname(__file__), 'hyperparams.yml')
    constants = merge_dict(load_hyperparams(config_path), constant_overwrites)
    data_generator = ImageDataGenerator(rescale=1/255, rotation_range=90, width_shift_range=0.2,
                                        height_shift_range=0.2, horizontal_flip=True)
    print('Load', constants['module_spec'])
    module_spec = hub.load_module_spec(constants['module_spec'])
    image_size, _ = hub.get_expected_image_size(module_spec)
    # n_channels = hub.get_num_image_channels(module_spec)
    # project_dir = 'tmp/semihard_full_' + 'time:' + str(int(time()))[-3:] +\
    #               '/top:lambda:' + str(constants['lambda_reg']) +\
    #               'margin:' + str(constants['tl_margin'])
    project_dir = '/Users/d777710/src/DeepLearning/vision'
    print('Project dir:', project_dir)
    _, _, bottleneck_config = get_bottleneck_config(os.path.join(project_dir, constants['bottleneck_dir']),
                                                    os.path.join(project_dir, constants['splits_dir']))
    bottleneck_flow_gen = ImageFlowGenerator(bottleneck_config, mode='bottleneck')
    constants.update({
        'train_dir': os.path.join(project_dir, constants['train_subdir']),
        'top_model_dir': os.path.join(project_dir, constants['top_model_subdir']),
        'val_dir': os.path.join(project_dir, constants['val_subdir']),
        'top_model_val_dir': os.path.join(project_dir, constants['top_model_val_subdir']),
        'data_flow_gen': bottleneck_flow_gen,
        'eval_every_n_steps': 5,
        'generator': data_generator,
        'image_size': image_size
    })
    model = SemiHardModel(constants, train_top_only=True)
    run_training(model, constants)
示例#5
0
 def __init__(self, module_spec):
     self._module = None
     self._module_spec = tfhub.load_module_spec(module_spec)
     self._module_spec_path = module_spec
     self._sess = None
     self._graph = tf.Graph()
     self._inputs = None
     self._outputs = None
示例#6
0
 def build(self, input_shape):
     self.embedder = tfhub.Module(self.module_uri, trainable=self.trainable)
     self.embedder_spec = tfhub.load_module_spec(self.module_uri)
     variables_ = [v for v in tensorflow.trainable_variables() if v in self.embedder.variables]
     self.trainable_weights.extend(variables_)
     self.weights.extend(variables_)
     self.trainable_variables.extend(variables_)
     super(TFHubTextLayer, self).build(input_shape)
示例#7
0
def get_text_module_input_name():
    """Get the tag used for inputs to the text module.

  Returns:
    a string, probably "default"
  """
    module_spec = hub.load_module_spec(FLAGS.module_handle)
    return list(module_spec.get_input_info_dict())[0]
示例#8
0
 def download_image_model(mdl_url):
     # type: str -> (tensorflow_hub.Module, int, int, int)
     """Returns the Tensorflow Hub model used to process images."""
     module_spec = tensorflow_hub.load_module_spec(mdl_url)
     input_height, input_width = tensorflow_hub.get_expected_image_size(
         module_spec)
     input_depth = tensorflow_hub.get_num_image_channels(module_spec)
     m = tensorflow_hub.Module(module_spec)
     return (m, input_height, input_width, input_depth)
示例#9
0
def eval_from_hub(model_dir, input_fn, eval_steps):
    """Eval using hub module."""
    hub_module_spec = hub.load_module_spec(model_dir)
    run_config = tf.estimator.RunConfig(model_dir=model_dir)
    image_classifier = tf.estimator.Estimator(
        model_fn=_make_model_fn(hub_module_spec), config=run_config, params={})
    eval_results = image_classifier.evaluate(input_fn=input_fn,
                                             steps=eval_steps)
    tf.logging.info('Evaluation results: %s' % eval_results)
示例#10
0
def run(dataset_folder, network='inception_v3', batch_size=16):
    assert network in modules, 'Invalid network, pick one of %s' % list(modules.keys())
    assert dataset_folder is not None

    with tf.Graph().as_default():
        dataset = basename(dirname(dataset_folder))
        filenames_output = os.path.join(dataset_folder, dataset + '_' + network + '_filenames.csv')
        labels_output = os.path.join(dataset_folder, dataset + '_' + network + '_labels.csv')
        features_output = os.path.join(dataset_folder, dataset + '_' + network + '_features.csv')

        module_url = modules[network]

        types = ('/*/*.jpg', '/*/*.png')
        filenames = []
        for files in types:
            filenames.extend(glob.glob(dataset_folder + files))
        pbar = tqdm(total=len(filenames))
        labels = [basename(dirname(f)) for f in filenames]
        filenames = tf.constant(filenames)
        labels = tf.constant(labels)

        module_spec = hub.load_module_spec(module_url)
        output_size = module_spec.get_output_info_dict()['default'].get_shape()[1]
        height, width = hub.get_expected_image_size(module_spec)

        images, labels, files = input_fn(filenames, labels, [height, width], batch_size)

        features = np.empty((0, output_size), float)
        classes = np.empty(0, int)
        filenames = np.empty(0, str)

        network = hub.Module(module_spec)
        network = network(images), labels, files

        with tf.compat.v1.Session() as sess:

            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(tf.compat.v1.tables_initializer())

            while True:
                try:
                    x, y, f = sess.run(network)
                    f = [basename(k) for k in f]
                    filenames = np.append(filenames, f)
                    classes = np.append(classes, y)
                    features = np.append(features, x, axis=0)
                    pbar.update(len(y))
                except tf.errors.OutOfRangeError:
                    break
        pbar.close()

        # pca = PCA(n_components=100, random_state=1)
        # features = pca.fit_transform(features)
        np.savetxt(filenames_output, filenames.astype(str), fmt='%s', delimiter=',')
        np.savetxt(labels_output, classes.astype(str), fmt='%s', delimiter=',')
        np.savetxt(features_output, features, delimiter=',')
示例#11
0
def main(_):
    # Needed to make sure the logging output is visible.
    # See https://github.com/tensorflow/tensorflow/issues/3047
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.image_dir:
        tf.logging.error('Must set flag --image_dir.')
        return -1

    # Prepare necessary directories that can be used during training
    #prepare_file_system()

    # Look at the folder structure, and create lists of all the images.
    image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                     FLAGS.validation_percentage)
    class_count = len(image_lists.keys())
    if class_count == 0:
        tf.logging.error('No valid folders of images found at ' +
                         FLAGS.image_dir)
        return -1
    if class_count == 1:
        tf.logging.error('Only one valid folder of images found at ' +
                         FLAGS.image_dir +
                         ' - multiple classes are needed for classification.')
        return -1

    # Set up the pre-trained graph.
    module_spec = hub.load_module_spec(FLAGS.tfhub_module)
    graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
        create_module_graph(module_spec))

    # Add the new layer that we'll be training.
    with graph.as_default():
        (train_step, cross_entropy, bottleneck_input, ground_truth_input,
         final_tensor) = add_final_retrain_ops(class_count,
                                               FLAGS.final_tensor_name,
                                               bottleneck_tensor,
                                               wants_quantization,
                                               is_training=True)

    with tf.Session(graph=graph) as sess:
        # Initialize all weights: for the module to their pretrained values,
        # and for the newly added retraining layer to random initial values.
        init = tf.global_variables_initializer()
        sess.run(init)

        # Set up the image decoding sub-graph.
        jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

        # We'll make sure we've calculated the 'bottleneck' image summaries and
        # cached them on disk.
        cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                          FLAGS.bottleneck_dir, jpeg_data_tensor,
                          decoded_image_tensor, resized_image_tensor,
                          bottleneck_tensor, FLAGS.tfhub_module)
def main(_):

    graph = tf.Graph()
    with tf.Session(graph=graph) as sess:
        tf.saved_model.loader.load(sess, [tag_constants.SERVING],
                                   FLAGS.saved_model_dir)
        # resized_input_tensor
        image = graph.get_tensor_by_name('Placeholder:0')
        prediction = graph.get_tensor_by_name('final_result:0')

        module_spec = hub.load_module_spec(FLAGS.tfhub_module)
        jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

        image_lists = create_image_lists(FLAGS.image_dir, FLAGS.subsets,
                                         FLAGS.testing_percentage,
                                         FLAGS.validation_percentage)
        hits = 0
        total = 0
        confusion_matirx = dict()

        for image_class in image_lists:
            confusion_matirx[image_class] = collections.defaultdict(int)
            for image_set in FLAGS.set:
                for image_filename in image_lists[image_class][image_set]:
                    image_path = os.path.join(FLAGS.image_dir, image_filename)
                    if not tf.gfile.Exists(image_path):
                        tf.logging.fatal('File does not exist %s', image_path)
                    image_data = tf.gfile.GFile(image_path, 'rb').read()
                    resized_image = sess.run(decoded_image_tensor,
                                             {jpeg_data_tensor: image_data})

                    result = sess.run(prediction, {image: resized_image})
                    predicted_class = get_lab(result[0],
                                              list(image_lists.keys()))
                    confusion_matirx[image_class][predicted_class] += 1
                    if image_class == predicted_class:
                        hits = hits + 1
                    total = total + 1

        print("Set: {}".format(" ".join(FLAGS.set)))
        print("Confusion Matrix")
        print("{:>15}".format(""), end="")
        for image_class in image_lists:
            print("{:>15}".format(image_class), end="")
        print()

        for image_class in image_lists:
            print("{:>15}".format(image_class), end="")
            for predicted_class in image_lists:
                print("{:>15}".format(
                    confusion_matirx[image_class][predicted_class]),
                      end="")
            print()
        accuracy = hits / total * 100
        print("Total Accuracy: {}% (N={})".format(accuracy, total))
示例#13
0
  def __init__(self,
               tf_hub_module_spec=None,
               tf_hub_module_path=None,):
    """Creates an instance to extract image features from a pre-trained model.

    The model to use may be specified as a TF-hub module (either by ModuleSpec
    or path) or as an Inception V4 model checkpoint.

    If a TF-hub module is given, it is assumed to conform to the interface
    described in [1]. Its default signature should take an input 'images' Tensor
    with shape [batch_size, height, width, num_channels=3] and return a
    [batch_size, feature_dim] Tensor of features. Pass
    `tf_hub_module_spec=make_module_spec_for_testing()` to stub out the model
    for tests.

    [1]
    https://www.tensorflow.org/hub/common_signatures/images#image_feature_vector

    Args:
      tf_hub_module_spec: `hub.ModuleSpec` or None, the TF-hub module to load.
      tf_hub_module_path: str or None, the location of the TF-hub module to load
        in a format understood by `load_module_spec()` (URL,
        '@internal/module/name', '/on/disk/path', etc.)

    Raises:
      ValueError: if not exactly one kwarg specifying the model is given.
    """
    self.spec_str = None  # String describing the model/module being used.

    # Input and output tensors for the image to representation computation.
    # The output tensor will depend on the model options.
    self._input = None
    self._output = None
    self._session = None

    num_kwargs = sum(
        int(kwarg is not None) for kwarg in
        [tf_hub_module_spec, tf_hub_module_path])
    if num_kwargs != 1:
      raise ValueError(
          'Must provide exactly one of "tf_hub_module_spec", '
          '"tf_hub_module_path".')

    if tf_hub_module_spec:
      self.spec_str = 'user_provided_module'
      self._initialize_from_hub_module(tf_hub_module_spec)
    elif tf_hub_module_path:
      self.spec_str = tf_hub_module_path
      self._initialize_from_hub_module(hub.load_module_spec(tf_hub_module_path))
示例#14
0
def get_bottleneck_list(image_dir, label_name):

    # Look at the folder structure, and create lists of all the images.
    image_lists = create_image_lists(
        '/usa/psu/Documents/CISC849/project/example/flower_photos', 10, 10)
    class_count = len(image_lists.keys())
    if class_count == 0:
        tf.logging.error('No valid folders of images found at ' + image_dir)
        return -1
    if class_count == 1:
        tf.logging.error('Only one valid folder of images found at ' +
                         image_dir +
                         ' - multiple classes are needed for classification.')
        return -1

    # Set up the pre-trained graph.
    module_spec = hub.load_module_spec(
        'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1')
    graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
        create_module_graph(module_spec))

    # Add the new layer that we'll be training.
    with graph.as_default():

        with tf.Session(graph=graph) as sess:
            # Initialize all weights: for the module to their pretrained values,
            # and for the newly added retraining layer to random initial values.
            init = tf.global_variables_initializer()
            sess.run(init)
            bottleneck_list = []
            label_list = []
            # Set up the image decoding sub-graph.
            jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(
                module_spec)
            for subf_i in os.listdir(image_dir):
                empty_lable = [0] * len(label_name)
                index_label = label_name.index(subf_i)
                empty_lable[index_label] = 1
                for file_i in os.listdir(image_dir + subf_i):

                    bottleneck_value = get_bottleneck_values(
                        sess, image_lists, image_dir + subf_i + '/' + file_i,
                        '', jpeg_data_tensor, decoded_image_tensor,
                        resized_image_tensor, bottleneck_tensor,
                        'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
                    )
                    bottleneck_list.append(bottleneck_value)
                    label_list.append(empty_lable)
    return bottleneck_list, label_list
示例#15
0
def train_neural_network():
    ModuleSpec = hub.load_module_spec(
        "https://tfhub.dev/google/imagenet/mobilenet_v1_100_128/feature_vector/1"
    )
    graph, bottleneck_tensor, input_tensor = creat_graph(ModuleSpec)

    with graph.as_default():
        _, bottleneck_tensor_size = bottleneck_tensor.get_shape().as_list()
        X = tf.placeholder_with_default(bottleneck_tensor,
                                        shape=[None, bottleneck_tensor_size],
                                        name='newlayerinputplacholder')
        output, cost, optimizer, y = last_layer(X)

        with tf.Session(graph=graph) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            bottleneck_value, labels, batch_step = run_and_save_bottleneck(
                sess, bottleneck_tensor, input_tensor)

            saver = tf.train.Saver()
            for epoch in range(hm_epochs):
                epoch_loss = 0
                for batch in range(batch_step):
                    epoch_x = bottleneck_value[batch * batch_size:(batch + 1) *
                                               batch_size]
                    epoch_y = labels[batch * batch_size:(batch + 1) *
                                     batch_size]

                    _, c = sess.run([optimizer, cost],
                                    feed_dict={
                                        X: epoch_x,
                                        y: epoch_y
                                    })
                    epoch_loss += c
                print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:',
                      epoch_loss)
            writer = tf.summary.FileWriter("output", sess.graph)
            writer.close()
            # please customize the directory for your project
            saver.save(
                sess, '/home/ali/PycharmProjects/tensorHub/save/my_test_model')

            # correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y,1))
            # accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            # print('Accyracy:', accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))

    return graph, input_tensor, init
    def _build_train_graph(self, n_classes):
        self._train_graph = tf.Graph()
        with self._train_graph.as_default():
            # Load module spec/blueprint:
            tfhub_module_spec = hub.load_module_spec('https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1')
            height, width = hub.get_expected_image_size(tfhub_module_spec)

            # Create a placeholder tensor for image input to the model (when bottleneck has not been pre-computed).
            resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3], name='resized_input')

            # m_reg = hub.Module(tfhub_module_spec, trainable=True, tags={'train'})
            m_reg = hub.Module(tfhub_module_spec, trainable=False, tags={'train'})
            # m = hub.Module(tfhub_module_spec)

            # Create a placeholder tensor to catch the output of the pre-activation layer:
            # bottleneck_tensor = m(resized_input_tensor)
            # bottleneck_tensor_reg = m_reg(resized_input_tensor)
            bottleneck_tensor_reg = m_reg()

            batch_size, bottleneck_tensor_size = bottleneck_tensor_reg.get_shape().as_list()
            self.bottleneck_input = tf.placeholder_with_default(
                bottleneck_tensor_reg,
                shape=[batch_size, bottleneck_tensor_size],
                name='BottleneckInputPlaceholder'
            )
            self.ground_truth_input = tf.placeholder(
                tf.int64, [batch_size], name='GroundTruthInput'
            )
            with tf.name_scope('weights'):
                initial_value = tf.variance_scaling_initializer()(shape=[bottleneck_tensor_size, n_classes])
                layer_weights = tf.Variable(initial_value=initial_value, name='final_weights')
            with tf.name_scope('biases'):
                layer_biases = tf.Variable(initial_value=tf.zeros([n_classes]), name='final_biases')
            with tf.name_scope('Wx_plus_b'):
                logits = tf.matmul(self.bottleneck_input, layer_weights) + layer_biases
            Y_proba = tf.nn.softmax(logits, name='Y_proba')
            xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.ground_truth_input, logits=logits)
            self.loss = tf.reduce_mean(xentropy, name='xentropy_loss')
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
            self.training_op = optimizer.minimize(self.loss)
            correct = tf.nn.in_top_k(logits, self.ground_truth_input, 1)
            self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
            # Declare initializer:
            self._init = tf.global_variables_initializer()
            # extra ops for batch normalization
            self.extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
def load_graphs(subsets_list, main_dir, saved_model_dir, tfhub_module,
                output_labels):
    graphs = {
        'graph': [],
        'sess': [],
        'image': [],
        'prediction': [],
        'jpeg_data_tensor': [],
        'decoded_image_tensor': [],
        'labels': [],
        'is_last_step': []
    }
    for subsets in subsets_list:
        subset_name = subsets.replace(":", "_").replace(",", "-")
        model_full_path = os.path.join(main_dir,
                                       "retrain_subset_{}".format(subset_name),
                                       saved_model_dir)
        labels_full_path = os.path.join(
            main_dir, "retrain_subset_{}".format(subset_name), output_labels)
        with open(labels_full_path) as f:
            labels = f.readlines()
        labels = [l.strip() for l in labels]
        print("Loading model: {}".format(model_full_path))
        with tf.Graph().as_default() as graph:
            with tf.Session(graph=graph).as_default() as sess:
                tf.saved_model.loader.load(sess, [tag_constants.SERVING],
                                           model_full_path)
                # resized_input_tensor
                image = graph.get_tensor_by_name('Placeholder:0')
                prediction = graph.get_tensor_by_name('final_result:0')

                module_spec = hub.load_module_spec(tfhub_module)
                jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(
                    module_spec)

                graphs['graph'].append(graph)
                graphs['sess'].append(sess)
                graphs['image'].append(image)
                graphs['prediction'].append(prediction)
                graphs['jpeg_data_tensor'].append(jpeg_data_tensor)
                graphs['decoded_image_tensor'].append(decoded_image_tensor)
                graphs['labels'].append(labels)
                graphs['is_last_step'].append(False)
    graphs['is_last_step'][-1] = True
    return graphs
def save_file_to_disk(graph, file, hub_module, num_classes, final_tensor_name,
                      learning_rate, CHECKPOINT_DIR):
    """Saves intermediate model files to disk
	Args:
		graph: tensorflow graph
		file: name of file to be saved
		module: Tensorflow hub module
		num_classes: number of classes in our dataset
	"""
    module = hub.load_module_spec(hub_module)
    sess, _, _, _, _ = compute_test_graph(hub_module, num_classes,
                                          final_tensor_name, learning_rate,
                                          CHECKPOINT_DIR)
    graph = tf.graph.util.convert_variables_to_constants(
        sess, sess.graph.as_graph_def(), [final_tensor_name])

    with tf.gfile.FastGFile(file, 'wb') as f:
        f.write(graph.SerializeToString())
def build_graph(hub_module):
    """Build a graph from tensorflow hub module
	
	Args:
		hub_module: Tensorflow Hub module
	Returns:
		graph extracted from hub module
		pre final tensor(bottleneck)
		input tensor (expected image size by graph)	
	"""
    module = hub.load_module_spec(hub_module)
    h, w = hub.get_expected_image_size(module)
    with tf.Graph().as_default() as graph:
        input_tensor = tf.placeholder(tf.float32, shape=(None, h, w, 3))
        mod = hub.Module(module)
        pre_final_tensor = mod(input_tensor)

    return graph, pre_final_tensor, input_tensor
def main(_):
  #需要確保日誌輸出可見。
  tf.logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.image_dir:
    tf.logging.error('Must set flag --image_dir.')
    return -1

  #準備可在培訓期間使用的必要目錄
  prepare_file_system()

  #查看文件夾結構,並創建所有圖像的列表。
  image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                   FLAGS.validation_percentage)
  class_count = len(image_lists.keys())
  if class_count == 0:
    tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir)
    return -1
  if class_count == 1:
    tf.logging.error('Only one valid folder of images found at ' +
                     FLAGS.image_dir +
                     ' - multiple classes are needed for classification.')
    return -1

  #看看命令行標誌是否意味著我們正在應用任何扭曲。
  do_distort_images = should_distort_images(
      FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
      FLAGS.random_brightness)

  #設置預先訓練的圖形。
  module_spec = hub.load_module_spec(FLAGS.tfhub_module)
  graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
      create_module_graph(module_spec))

  #添加我們將要訓練的新圖層。
  with graph.as_default():
    (train_step, cross_entropy, bottleneck_input,
     ground_truth_input, final_tensor) = add_final_retrain_ops(
         class_count, FLAGS.final_tensor_name, bottleneck_tensor,
         wants_quantization, is_training=True)

  with tf.Session(graph=graph) as sess:
示例#21
0
def extract_jpg_feature(image_path):    
    tfhub_module = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
    module_spec = hub.load_module_spec(tfhub_module)
    graph, bottleneck_tensor, jpeg_tensor = create_module_graph(module_spec)
    
    with tf.Session(graph=graph) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        
        image_data = tf.gfile.GFile(image_path, 'rb').read()
        try:
            bottleneck_values = sess.run(bottleneck_tensor,
                                            {jpeg_tensor: image_data})
            bottleneck_values = np.squeeze(bottleneck_values)
            #np.savetxt(feature_path, bottleneck_values)
            return bottleneck_values.reshape(1, utils.static_dimension)

        except Exception as e:
            log_util.logger.error("extract feature of jpg[%s] fail: %s", image_path, str(e))
            return None
示例#22
0
def decode_and_resize(hub_module):
	"""Performs image processing steps(decoding and reshaping)
	Args:
		hub_module: Tensorflow Hub module
	Returns:
		placeholder for image data
		reshaped tensor as expected by graph
	"""
	module = hub.load_module_spec(hub_module)
	h, w = hub.get_expected_image_size(module)
	reshape_specs = tf.stack((h, w))
	num_channels = hub.get_num_image_channels(module)
	
	data_placeholder = tf.placeholder(tf.string, name='data_placeholder')
	decode = tf.image.decode_jpeg(data_placeholder, channels=num_channels)
	decode = tf.image.convert_image_dtype(decode, tf.float32)
	decode = tf.expand_dims(decode, 0)
	reshape = tf.cast(reshape_specs, dtype=tf.int32)
	reshaped_image = tf.image.resize_bilinear(decode, reshape)

	return  data_placeholder, reshaped_image
def main(_):
    if not FLAGS.vid_dir:
        tf.logging.error('Must set flag --vid_dir.')
        return -1
    included_extensions = ['avi', 'mp4', 'mkv', 'mpeg']
    file_list = [
        fn for fn in os.listdir(FLAGS.vid_dir) if any(
            fn.endswith(ext) for ext in included_extensions)
    ]
    module = hub.load_module_spec(FLAGS.tfhub_module)
    height, width = hub.get_expected_image_size(module)
    resized_input_tensor = tf.placeholder(tf.float32, [None, width, width, 3])
    m = hub.Module(module)
    feature_tensor = m(resized_input_tensor)
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    for file_name in file_list:
        print('Processing file: ' + file_name)
        vidcap = cv2.VideoCapture(FLAGS.vid_dir + '/' + file_name)
        success, image = vidcap.read()
        count = 0
        frames = []
        while success:
            frames.append(image)
            success, image = vidcap.read()
            count += 1
        print('Number of frames: ', count)
        img = [
            cv2.cvtColor(
                cv2.resize(frames[i],
                           dsize=(height, width),
                           interpolation=cv2.INTER_LINEAR), cv2.COLOR_BGR2RGB)
            / 255 for i in range(len(frames))
        ]
        feature_vector = sess.run(feature_tensor,
                                  feed_dict={resized_input_tensor: img})
        txt_name = FLAGS.vid_dir + '/' + os.path.splitext(
            file_name)[0] + '.csv'
        np.savetxt(txt_name, feature_vector)
示例#24
0
  def testAssets(self):
    export_path = os.path.join(self.get_temp_dir(), "assets-module")
    vocabulary_file = self.create_vocab_file("tokens.txt",
                                             ["emerson", "lake", "palmer"])
    with tf.Graph().as_default():
      assets_module_fn = create_assets_module_fn(vocabulary_file)
      spec = hub.create_module_spec(assets_module_fn)
      embedding_module = hub.Module(spec)
      output = embedding_module(tf.constant([1, 2], dtype=tf.int64))
      with tf.Session() as sess:
        sess.run(tf.tables_initializer())
        self.assertAllEqual(list(sess.run(output)), [b"lake", b"palmer"])
        embedding_module.export(export_path, sess)

    asset_file = os.path.join(*[export_path, "assets", "tokens.txt"])
    # Check that asset file got written to the expected place:
    self.assertTrue(tf.gfile.Exists(asset_file))

    # Assets should be hermetic, so we can delete the original vocab file:
    tf.gfile.Remove(vocabulary_file)

    with tf.Graph().as_default():
      spec = hub.load_module_spec(export_path)
      embedding_module = hub.Module(spec)
      output = embedding_module(tf.constant([1, 2], dtype=tf.int64))
      with tf.Session() as sess:
        sess.run(tf.tables_initializer())
        # Check functionality:
        self.assertAllEqual(list(sess.run(output)), [b"lake", b"palmer"])
        # Check that the ASSET_FILEPATHS collection was restored properly:
        asset_filepaths = [
            sess.run(tensor)
            for tensor in tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
        ]
        # ASSET_FILEPATHS are added for the state graph and for the apply graph:
        self.assertAllEqual(asset_filepaths,
                            [tf.compat.as_bytes(asset_file)] * 2)
def compute_final_op(sess, batch_size, features_dir, data_dir, hub_module,
                     num_classes, files, data_placeholder, reshaped_image,
                     pre_final_tensor, input_tensor, final_tensor_name,
                     learning_rate, CHECKPOINT_DIR):
    """Computes final performance using test set
	Args:
		sess: Current tensorflow session
		batch_size: batch size
		features_dir:
		hub_module: Tensorflow hub module
		num_classes: number of classes in our dataset
		files: Training file names 
		data_placeholder: Placeholder for image data
		reshaped_image: Reshaped tensor as expected by graph
		pre_final_tensor: pre_final (bottleneck) tensor
		input_tensor: input tensor (expected image size by graph)
		final_tensor_name:
		learning_rate:
		CHECKPOINT_DIR:	
	"""
    module = hub.load_module_spec(hub_module)
    features, labels, filenames = sample_random_features(
        sess, num_classes, files, batch_size, 'test', features_dir, data_dir,
        data_placeholder, reshaped_image, pre_final_tensor, input_tensor,
        hub_module)

    sess, input_tensor, pre_final_input_tensor, truth_input_tensor, \
           pred, step = compute_test_graph(hub_module, num_classes,
               final_tensor_name, learning_rate, CHECKPOINT_DIR)

    acc, p = sess.run([step, pred],
                      feed_dict={
                          data_placeholder: features,
                          truth_input_tensor: labels
                      })

    print('Final Test Accuracy {}'.format(acc * 100))
示例#26
0
    def __init__(self):

        # member variables.
        # the URL of the pre-trained model.
        self.HUB_MODULE = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
        # the model spec.
        self.Module_Spec = hub.load_module_spec(self.HUB_MODULE)
        # the image size that is required by this model.
        self.Module_Height, self.Module_Width = hub.get_expected_image_size(
            self.Module_Spec)
        self.Modelu_Depth = hub.get_num_image_channels(self.Module_Spec)
        # A module is understood as instrumented for quantization with TF-Lite
        # if it contains any of these ops.
        self.FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars',
                               'FakeQuantWithMinMaxVarsPerChannel')

        # the size of our input images.
        self.ImageHeight = self.Module_Height
        self.ImageWidth = self.Module_Width
        self.ImageChannels = self.Modelu_Depth

        # Set up the pre-trained graph.
        self.graph, self.bottleneck_tensor, self.resized_input_tensor, self.wants_quantization = self.create_module_graph(
            self.Module_Spec)
def _build_graph(tfhub_module_url):
    module_spec = hub.load_module_spec(tfhub_module_url)
    height, width = hub.get_expected_image_size(module_spec)
    tf.logging.info(msg='Loaded TensorFlowHub module spec: %s' %
                    tfhub_module_url)
    graph = tf.Graph()
    with graph.as_default():
        # Create a placeholder tensor for image input to the model (when bottleneck has not been pre-computed).
        resized_input_tensor = tf.placeholder(tf.float32,
                                              [None, height, width, 3],
                                              name='resized_input')
        # Declare the model in accordance with the chosen architecture:
        m = hub.Module(module_spec)
        # Create a placeholder tensor to catch the output of the pre-activation layer:
        bottleneck_tensor = m(resized_input_tensor)
        tf.logging.info(
            msg=
            'Defined computational graph from the tensorflow hub module spec.')

        # Image decoding sub-graph:
        with tf.name_scope('image_decoding'):
            jpeg_data_tensor, decoded_image_tensor = _add_jpeg_decoding(
                module_spec)
    return graph, bottleneck_tensor, resized_input_tensor, jpeg_data_tensor, decoded_image_tensor
示例#28
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.image_dir:
    tf.logging.error('Must set flag --image_dir.')
    return -1

  prepare_file_system()
  image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                   FLAGS.validation_percentage)
  class_count = len(image_lists.keys())
  if class_count == 0:
    tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir)
    return -1
  if class_count == 1:
    tf.logging.error('Only one valid folder of images found at ' +
                     FLAGS.image_dir +
                     ' - multiple classes are needed for classification.')
    return -1

  do_distort_images = should_distort_images(
      FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
      FLAGS.random_brightness)

  module_spec = hub.load_module_spec(FLAGS.tfhub_module)
  graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
      create_module_graph(module_spec))

  with graph.as_default():
    (train_step, cross_entropy, bottleneck_input,
     ground_truth_input, final_tensor) = add_final_retrain_ops(
         class_count, FLAGS.final_tensor_name, bottleneck_tensor,
         wants_quantization, is_training=True)

  with tf.Session(graph=graph) as sess:

    init = tf.global_variables_initializer()
    sess.run(init)

    jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

    if do_distort_images:

      (distorted_jpeg_data_tensor,
       distorted_image_tensor) = add_input_distortions(
           FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
           FLAGS.random_brightness, module_spec)
    else:

      cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                        FLAGS.bottleneck_dir, jpeg_data_tensor,
                        decoded_image_tensor, resized_image_tensor,
                        bottleneck_tensor, FLAGS.tfhub_module)

    # Create the operations we need to evaluate the accuracy of our new layer.
    evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input)

    # Merge all the summaries and write them out to the summaries_dir
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)

    validation_writer = tf.summary.FileWriter(
        FLAGS.summaries_dir + '/validation')

    train_saver = tf.train.Saver()

    for i in range(FLAGS.how_many_training_steps):

      if do_distort_images:
        (train_bottlenecks,
         train_ground_truth) = get_random_distorted_bottlenecks(
             sess, image_lists, FLAGS.train_batch_size, 'training',
             FLAGS.image_dir, distorted_jpeg_data_tensor,
             distorted_image_tensor, resized_image_tensor, bottleneck_tensor)
      else:
        (train_bottlenecks,
         train_ground_truth, _) = get_random_cached_bottlenecks(
             sess, image_lists, FLAGS.train_batch_size, 'training',
             FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
             decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
             FLAGS.tfhub_module)

      train_summary, _ = sess.run(
          [merged, train_step],
          feed_dict={bottleneck_input: train_bottlenecks,
                     ground_truth_input: train_ground_truth})
      train_writer.add_summary(train_summary, i)

      is_last_step = (i + 1 == FLAGS.how_many_training_steps)
      if (i % FLAGS.eval_step_interval) == 0 or is_last_step:
        train_accuracy, cross_entropy_value = sess.run(
            [evaluation_step, cross_entropy],
            feed_dict={bottleneck_input: train_bottlenecks,
                       ground_truth_input: train_ground_truth})
        tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' %
                        (datetime.now(), i, train_accuracy * 100))
        tf.logging.info('%s: Step %d: Cross entropy = %f' %
                        (datetime.now(), i, cross_entropy_value))
        # TODO: Make this use an eval graph, to avoid quantization

        validation_bottlenecks, validation_ground_truth, _ = (
            get_random_cached_bottlenecks(
                sess, image_lists, FLAGS.validation_batch_size, 'validation',
                FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
                decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
                FLAGS.tfhub_module))

        validation_summary, validation_accuracy = sess.run(
            [merged, evaluation_step],
            feed_dict={bottleneck_input: validation_bottlenecks,
                       ground_truth_input: validation_ground_truth})
        validation_writer.add_summary(validation_summary, i)
        tf.logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' %
                        (datetime.now(), i, validation_accuracy * 100,
                         len(validation_bottlenecks)))

      intermediate_frequency = FLAGS.intermediate_store_frequency

      if (intermediate_frequency > 0 and (i % intermediate_frequency == 0)
          and i > 0):
        train_saver.save(sess, CHECKPOINT_NAME)
        intermediate_file_name = (FLAGS.intermediate_output_graphs_dir +
                                  'intermediate_' + str(i) + '.pb')
        tf.logging.info('Save intermediate result to : ' +
                        intermediate_file_name)
        save_graph_to_file(intermediate_file_name, module_spec,
                           class_count)

    train_saver.save(sess, CHECKPOINT_NAME)

    run_final_eval(sess, module_spec, class_count, image_lists,
                   jpeg_data_tensor, decoded_image_tensor, resized_image_tensor,
                   bottleneck_tensor)

    tf.logging.info('Save final result to : ' + FLAGS.output_graph)
    if wants_quantization:
      tf.logging.info('The model is instrumented for quantization with TF-Lite')
    save_graph_to_file(FLAGS.output_graph, module_spec, class_count)
    with tf.gfile.FastGFile(FLAGS.output_labels, 'w') as f:
      f.write('\n'.join(image_lists.keys()) + '\n')

    if FLAGS.saved_model_dir:
      export_model(module_spec, class_count, FLAGS.saved_model_dir)
def main(_):
    logging_verbosity = logging_level_verbosity(FLAGS.logging_verbosity)
    tf.logging.set_verbosity(logging_verbosity)

    if not FLAGS.image_dir:
        tf.logging.error('Must set flag --image_dir.')
        return -1

    # Prepare necessary directories that can be used during training
    prepare_file_system()

    # Look at the folder structure, and create lists of all the images.
    image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                     FLAGS.validation_percentage)
    class_count = len(image_lists.keys())
    if class_count == 0:
        tf.logging.error('No valid folders of images found at ' +
                         FLAGS.image_dir)
        return -1
    if class_count == 1:
        tf.logging.error('Only one valid folder of images found at ' +
                         FLAGS.image_dir +
                         ' - multiple classes are needed for classification.')
        return -1

    # See if the command-line flags mean we're applying any distortions.
    do_distort_images = should_distort_images(FLAGS.flip_left_right,
                                              FLAGS.random_crop,
                                              FLAGS.random_scale,
                                              FLAGS.random_brightness)

    # Set up the pre-trained graph.
    module_spec = hub.load_module_spec(FLAGS.tfhub_module)
    graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
        create_module_graph(module_spec))

    # Add the new layer that we'll be training.
    with graph.as_default():
        (train_step, cross_entropy, bottleneck_input, ground_truth_input,
         final_tensor) = add_final_retrain_ops(class_count,
                                               FLAGS.final_tensor_name,
                                               bottleneck_tensor,
                                               wants_quantization,
                                               is_training=True)

    with tf.Session(graph=graph) as sess:
        # Initialize all weights: for the module to their pretrained values,
        # and for the newly added retraining layer to random initial values.
        init = tf.global_variables_initializer()
        sess.run(init)

        # Set up the image decoding sub-graph.
        jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

        if do_distort_images:
            # We will be applying distortions, so set up the operations we'll need.
            (distorted_jpeg_data_tensor,
             distorted_image_tensor) = add_input_distortions(
                 FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
                 FLAGS.random_brightness, module_spec)
        else:
            # We'll make sure we've calculated the 'bottleneck' image summaries and
            # cached them on disk.
            cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                              FLAGS.bottleneck_dir, jpeg_data_tensor,
                              decoded_image_tensor, resized_image_tensor,
                              bottleneck_tensor, FLAGS.tfhub_module)

        # Create the operations we need to evaluate the accuracy of our new layer.
        evaluation_step, _ = add_evaluation_step(final_tensor,
                                                 ground_truth_input)

        # Merge all the summaries and write them out to the summaries_dir
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                             sess.graph)

        validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                                  '/validation')

        # Create a train saver that is used to restore values into an eval graph
        # when exporting models.
        train_saver = tf.train.Saver()

        # Run the training for as many cycles as requested on the command line.
        for i in range(FLAGS.how_many_training_steps):
            # Get a batch of input bottleneck values, either calculated fresh every
            # time with distortions applied, or from the cache stored on disk.
            if do_distort_images:
                (train_bottlenecks,
                 train_ground_truth) = get_random_distorted_bottlenecks(
                     sess, image_lists, FLAGS.train_batch_size, 'training',
                     FLAGS.image_dir, distorted_jpeg_data_tensor,
                     distorted_image_tensor, resized_image_tensor,
                     bottleneck_tensor)
            else:
                (train_bottlenecks,
                 train_ground_truth, _) = get_random_cached_bottlenecks(
                     sess, image_lists, FLAGS.train_batch_size, 'training',
                     FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
                     decoded_image_tensor, resized_image_tensor,
                     bottleneck_tensor, FLAGS.tfhub_module)
            # Feed the bottlenecks and ground truth into the graph, and run a training
            # step. Capture training summaries for TensorBoard with the `merged` op.
            train_summary, _ = sess.run(
                [merged, train_step],
                feed_dict={
                    bottleneck_input: train_bottlenecks,
                    ground_truth_input: train_ground_truth
                })
            train_writer.add_summary(train_summary, i)

            # Every so often, print out how well the graph is training.
            is_last_step = (i + 1 == FLAGS.how_many_training_steps)
            if (i % FLAGS.eval_step_interval) == 0 or is_last_step:
                train_accuracy, cross_entropy_value = sess.run(
                    [evaluation_step, cross_entropy],
                    feed_dict={
                        bottleneck_input: train_bottlenecks,
                        ground_truth_input: train_ground_truth
                    })
                tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' %
                                (datetime.now(), i, train_accuracy * 100))
                tf.logging.info('%s: Step %d: Cross entropy = %f' %
                                (datetime.now(), i, cross_entropy_value))
                # TODO: Make this use an eval graph, to avoid quantization
                # moving averages being updated by the validation set, though in
                # practice this makes a negligable difference.
                validation_bottlenecks, validation_ground_truth, _ = (
                    get_random_cached_bottlenecks(
                        sess, image_lists, FLAGS.validation_batch_size,
                        'validation', FLAGS.bottleneck_dir, FLAGS.image_dir,
                        jpeg_data_tensor, decoded_image_tensor,
                        resized_image_tensor, bottleneck_tensor,
                        FLAGS.tfhub_module))
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy = sess.run(
                    [merged, evaluation_step],
                    feed_dict={
                        bottleneck_input: validation_bottlenecks,
                        ground_truth_input: validation_ground_truth
                    })
                validation_writer.add_summary(validation_summary, i)
                tf.logging.info(
                    '%s: Step %d: Validation accuracy = %.1f%% (N=%d)' %
                    (datetime.now(), i, validation_accuracy * 100,
                     len(validation_bottlenecks)))

            # Store intermediate results
            intermediate_frequency = FLAGS.intermediate_store_frequency

            if (intermediate_frequency > 0
                    and (i % intermediate_frequency == 0) and i > 0):
                # If we want to do an intermediate save, save a checkpoint of the train
                # graph, to restore into the eval graph.
                train_saver.save(sess, FLAGS.checkpoint_path)
                intermediate_file_name = (
                    FLAGS.intermediate_output_graphs_dir + 'intermediate_' +
                    str(i) + '.pb')
                tf.logging.info('Save intermediate result to : ' +
                                intermediate_file_name)
                save_graph_to_file(intermediate_file_name, module_spec,
                                   class_count)

        # After training is complete, force one last save of the train checkpoint.
        train_saver.save(sess, FLAGS.checkpoint_path)

        # We've completed all our training, so run a final test evaluation on
        # some new images we haven't used before.
        run_final_eval(sess, module_spec, class_count, image_lists,
                       jpeg_data_tensor, decoded_image_tensor,
                       resized_image_tensor, bottleneck_tensor)

        # Write out the trained graph and labels with the weights stored
        # constants.
        tf.logging.info('Save final result to : ' + FLAGS.output_graph)
        if wants_quantization:
            tf.logging.info(
                'The model is instrumented for quantization with TF-Lite')
        save_graph_to_file(FLAGS.output_graph, module_spec, class_count)
        with tf.gfile.GFile(FLAGS.output_labels, 'w') as f:
            f.write('\n'.join(image_lists.keys()) + '\n')

        if FLAGS.saved_model_dir:
            export_model(module_spec, class_count, FLAGS.saved_model_dir)
示例#30
0
  def testModuleWithBatchNorm(self):
    export_path = os.path.join(self.get_temp_dir(), "batch-norm-module")
    # This test resorts to lookup by name to retrieve the moving mean,
    # because tf.contrib.layers.batch_norm() does not return it, and even if,
    # module_fn() has no way to return it next to the result for training.
    moving_mean_name = (
        "module/BatchNorm/moving_mean/Read/ReadVariableOp:0")

    batch_norm_train_tags = ["batch_norm_trains"]
    batch_norm_fixed_tags = ["batch_norm_fixed"]
    spec = hub.create_module_spec(
        batch_norm_module_fn,
        [(batch_norm_train_tags, {"is_training": True}),
         (batch_norm_fixed_tags, {"is_training": False})])
    # Test Module creation and training.
    with tf.Graph().as_default() as g:
      m = hub.Module(spec, trainable=True, tags=batch_norm_train_tags)
      # The module is trained on a fixed batch of inputs, which has a mean
      # of 12.0 and some sample variance of a less obvious value. The module
      # learns scale and offset parameters that achieve the mapping x --> 2*x
      # for the observed mean and variance.
      x = tf.constant([[11.0], [12.0], [13.0]])
      training_mean = [12.0]
      y_target = tf.constant([[22.0], [24.0], [26.0]])
      y = m(x)
      step = tf.Variable(0, trainable=False, name="global_step")
      train = tf.contrib.layers.optimize_loss(
          loss=tf.losses.mean_squared_error(y, y_target),
          global_step=step,
          learning_rate=0.1,
          optimizer="SGD")
      moving_mean = g.get_tensor_by_name(moving_mean_name)
      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        self.assertAllClose(sess.run(moving_mean), [0.0])
        for _ in range(100):
          sess.run([train])
        trained_moving_mean, trained_y = sess.run([moving_mean, y])
        self.assertAllClose(trained_moving_mean, training_mean)
        self.assertAllClose(trained_y, [[22.0], [24.0], [26.0]])
        # Test export.
        m.export(export_path, sess)

    # Test import and use.
    spec = hub.load_module_spec(export_path)
    with tf.Graph().as_default() as g:
      # The module gets run for inference on inputs with different mean and
      # variance. However, both mean and variance as well as offset and scale
      # are now frozen to the values from learning, so the same mapping
      # x --> 2*x is recovered.
      x = tf.constant([[10.0], [20.0], [30.0]])
      y = hub.Module(
          spec, tags=batch_norm_fixed_tags)(x)
      moving_mean = g.get_tensor_by_name(moving_mean_name)
      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for _ in range(100):
          served_moving_mean, served_y = sess.run([moving_mean, y])
        # No update occurs to the moving_mean from training time.
        self.assertAllClose(served_moving_mean, training_mean)
        # Prediction results are correct.
        self.assertAllClose(served_y, [[20.0], [40.0], [60.0]])
示例#31
0
    if args.saved_model_dir:
        saved_model_dir = args.saved_model_dir
    else:
        saved_model_dir = os.path.join(
            os.path.join(os.path.abspath(os.sep), 'tmp'), 'saved_model')
    if args.output_labels:
        output_labels = args.output_labels
    else:
        output_labels = os.path.join(saved_model_dir, 'label.txt')

    image_size = None
    if args.image_size:
        image_size = (args.image_size, args.image_size)
    else:
        try:
            module_spec = hub.load_module_spec(args.tfhub_module)
            image_size = tuple(hub.get_expected_image_size(module_spec))
            print("get model spec", image_size)
        except:
            if "mobilenet" in args.tfhub_module:
                print("get model spec failed, use default spec as 224 x 224,")
                image_size = (224, 224)

    if image_size:
        Retrain(data_dir=args.image_dir,
                saved_model_path=saved_model_dir,
                saved_label_path=output_labels,
                epochs=args.epochs,
                batch_size=args.batch_size,
                module_handle=args.tfhub_module,
                image_size=image_size,
示例#32
0
    # train params
    epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    csv_out = args.csv_output
    pred_out = args.prediction_output
    dropout = args.dropout
    save_models = args.save
    import_features = args.import_features
    tfhub = args.tfhub_module


    ##### LOAD IMAGES ######
    if tfhub != None:
        module_spec = hub.load_module_spec(tfhub)
        height, width = hub.get_expected_image_size(module_spec)
        channels = hub.get_num_image_channels(module_spec)
    else:
        height, width, channels = 224, 224, 3

    ### training images
    # read paths and labels for each image
    listimgs, listlabels = parse_input(train_paths)
    # load images
    loaded_imgs = [load_image(img, size=height).reshape((height, width, channels)) for img in listimgs]
    print('[TRAINING] Loaded', len(loaded_imgs), 'images and', len(listlabels), 'labels')
    # map string labels to unique integers
    u,indices = np.unique(np.array(listlabels), return_inverse=True)
    print('[TRAINING] Categories: ', u)
    num_categories = len(u)
def retrain(inputdir=None):
    # Needed to make sure the logging output is visible.
    # See https://github.com/tensorflow/tensorflow/issues/3047
    tf.logging.set_verbosity(tf.logging.ERROR)
    

    if inputdir:
        if type(inputdir)==str:
            FLAGS.image_dir = inputdir
        else:
            tf.logging.error("Invalid input directory!")
            return -1

    if not FLAGS.image_dir:
        tf.logging.error('Must set flag --image_dir.')
        return -1

    print('Retraining with images in directory: ' + FLAGS.image_dir)

    # Prepare necessary directories that can be used during training
    prepare_file_system()

    # Look at the folder structure, and create lists of all the images.
    image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                   FLAGS.validation_percentage)
    class_count = len(image_lists.keys())
    if class_count == 0:
        tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir)
        return -1
    if class_count == 1:
        tf.logging.error('Only one valid folder of images found at ' +
                     FLAGS.image_dir +
                     ' - multiple classes are needed for classification.')
        return -1

    # See if the command-line flags mean we're applying any distortions.
    do_distort_images = should_distort_images(
            FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
            FLAGS.random_brightness)

    # Set up the pre-trained graph.
    module_spec = hub.load_module_spec(FLAGS.tfhub_module)
    graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
            create_module_graph(module_spec))

    # Add the new layer that we'll be training.
    with graph.as_default():
        (train_step, cross_entropy, bottleneck_input,
     ground_truth_input, final_tensor) = add_final_retrain_ops(
         class_count, FLAGS.final_tensor_name, bottleneck_tensor,
         wants_quantization, is_training=True)

    with tf.Session(graph=graph) as sess:
        # Initialize all weights: for the module to their pretrained values,
        # and for the newly added retraining layer to random initial values.
        init = tf.global_variables_initializer()
        sess.run(init)

        # Set up the image decoding sub-graph.
        jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

        if do_distort_images:
            # We will be applying distortions, so set up the operations we'll need.
            (distorted_jpeg_data_tensor,
       distorted_image_tensor) = add_input_distortions(
           FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
           FLAGS.random_brightness, module_spec)
        else:
            # We'll make sure we've calculated the 'bottleneck' image summaries and
            # cached them on disk.
            cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                        FLAGS.bottleneck_dir, jpeg_data_tensor,
                        decoded_image_tensor, resized_image_tensor,
                        bottleneck_tensor, FLAGS.tfhub_module)

        # Create the operations we need to evaluate the accuracy of our new layer.
        evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input)

        # Merge all the summaries and write them out to the summaries_dir
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)

        validation_writer = tf.summary.FileWriter(
                FLAGS.summaries_dir + '/validation')

        # Create a train saver that is used to restore values into an eval graph
        # when exporting models.
        train_saver = tf.train.Saver()

        # Run the training for as many cycles as requested on the command line.
        for i in range(FLAGS.how_many_training_steps):
            # Get a batch of input bottleneck values, either calculated fresh every
            # time with distortions applied, or from the cache stored on disk.
            if do_distort_images:
                (train_bottlenecks,
                 train_ground_truth) = get_random_distorted_bottlenecks(
                     sess, image_lists, FLAGS.train_batch_size, 'training',
                     FLAGS.image_dir, distorted_jpeg_data_tensor,
                     distorted_image_tensor, resized_image_tensor, bottleneck_tensor)
            else:
                (train_bottlenecks,
                 train_ground_truth, _) = get_random_cached_bottlenecks(
                     sess, image_lists, FLAGS.train_batch_size, 'training',
                     FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
                     decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
                     FLAGS.tfhub_module)
            # Feed the bottlenecks and ground truth into the graph, and run a training
            # step. Capture training summaries for TensorBoard with the `merged` op.
            train_summary, _ = sess.run(
                    [merged, train_step],
                    feed_dict={bottleneck_input: train_bottlenecks,
                     ground_truth_input: train_ground_truth})
            train_writer.add_summary(train_summary, i)

            # Every so often, print out how well the graph is training.
            is_last_step = (i + 1 == FLAGS.how_many_training_steps)
            if (i % FLAGS.eval_step_interval) == 0 or is_last_step:
                train_accuracy, cross_entropy_value = sess.run(
                        [evaluation_step, cross_entropy],
                        feed_dict={bottleneck_input: train_bottlenecks,
                       ground_truth_input: train_ground_truth})
                tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' %
                        (datetime.now(), i, train_accuracy * 100))
                tf.logging.info('%s: Step %d: Cross entropy = %f' %
                        (datetime.now(), i, cross_entropy_value))
                # TODO: Make this use an eval graph, to avoid quantization
                # moving averages being updated by the validation set, though in
                # practice this makes a negligable difference.
                validation_bottlenecks, validation_ground_truth, _ = (
                        get_random_cached_bottlenecks(
                sess, image_lists, FLAGS.validation_batch_size, 'validation',
                FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
                decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
                FLAGS.tfhub_module))
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy = sess.run(
                        [merged, evaluation_step],
                        feed_dict={bottleneck_input: validation_bottlenecks,
                       ground_truth_input: validation_ground_truth})
                validation_writer.add_summary(validation_summary, i)
                tf.logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' %
                        (datetime.now(), i, validation_accuracy * 100,
                         len(validation_bottlenecks)))

            # Store intermediate results
            intermediate_frequency = FLAGS.intermediate_store_frequency

            if (intermediate_frequency > 0 and (i % intermediate_frequency == 0)
                    and i > 0):
                # If we want to do an intermediate save, save a checkpoint of the train
                # graph, to restore into the eval graph.
                train_saver.save(sess, CHECKPOINT_NAME)
                intermediate_file_name = (FLAGS.intermediate_output_graphs_dir +
                                  'intermediate_' + str(i) + '.pb')
                tf.logging.info('Save intermediate result to : ' +
                        intermediate_file_name)
                save_graph_to_file(graph, intermediate_file_name, module_spec,
                           class_count)

        # After training is complete, force one last save of the train checkpoint.
        train_saver.save(sess, CHECKPOINT_NAME)

        # We've completed all our training, so run a final test evaluation on
        # some new images we haven't used before.
        run_final_eval(sess, module_spec, class_count, image_lists,
                   jpeg_data_tensor, decoded_image_tensor, resized_image_tensor,
                   bottleneck_tensor)

        # Write out the trained graph and labels with the weights stored as
        # constants.
        tf.logging.info('Save final result to : ' + FLAGS.output_graph)
        if wants_quantization:
            tf.logging.info('The model is instrumented for quantization with TF-Lite')
        save_graph_to_file(graph, FLAGS.output_graph, module_spec, class_count)
        with tf.gfile.FastGFile(FLAGS.output_labels, 'w') as f:
            f.write('\n'.join(image_lists.keys()) + '\n')

        if FLAGS.saved_model_dir:
            export_model(module_spec, class_count, FLAGS.saved_model_dir)