示例#1
0
def model(isTrain, isTrainBn):
    end_point = []
    tf_input = tf.placeholder(dtype=tf.float32, shape=[None, 1, 1, 2], name='tf_input')
    tf_label = tf.placeholder(dtype=tf.int32, shape=[None], name='tf_label')
    if isTrain and isQuant:
        tf_input_1 = tf.fake_quant_with_min_max_args(tf_input, input_min, input_max, name='x0_1')
    else:
        tf_input_1 = tf_input
    # with tf.variable_scope('model'):
    x = tf.layers.separable_conv2d(tf_input_1, filters=10000, kernel_size=1, use_bias=False, name='L1')
    x = tf.layers.batch_normalization(x, training=isTrainBn, fused=True, name='L1_bn')
    with tf.variable_scope('L1_hard_swish'):
        x1 = tf.nn.relu6(x + 3)
        # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6)
        x = x * x1 * 0.16666667
    x = tf.layers.conv2d(x, filters=4, kernel_size=1, use_bias=False, name='L2')
    x = tf.layers.batch_normalization(x, training=isTrainBn, fused=True, name='L2_bn')
    with tf.variable_scope('L2_hard_swish'):
        x1 = tf.nn.relu6(x + 3)
        # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6)
        x = x * x1 * 0.16666667
    if isQuant: x = tf.fake_quant_with_min_max_args(x, 0, 6)
    x = tf.layers.conv2d(x, filters=2, kernel_size=1, use_bias=True, name='FCN')
    x = tf.layers.flatten(x, name='Xflatten')
    x = tf.identity(x, 'Xoutput')
    end_point.append(x)
    # if (not isTrain) and isQuant:
    #     x = tf.fake_quant_with_min_max_args(x, -1, 1, name='x5')
    return tf_input, tf_label, x, end_point
示例#2
0
def model(x): # float-in, float-out
  variables = {}
  x_2d = tf.reshape(x, [-1, 28, 28, 1])
  fake_x = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) 
  y = tf.nn.avg_pool(fake_x, ksize=[1, 2, 2, 1],
          strides=[1, 2, 2, 1], padding='SAME')
  y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys')
  return y, variables
示例#3
0
def model(x): # float-in, float-out
  variables = {}
  x_2d = tf.reshape(x, [-1, 14, 14, 4])
  x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8)

  y = tf.depth_to_space(x_2d, 2)
  y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys')
  return y, variables
示例#4
0
def model(x): # float-in, float-out
  variables = {}
  
  W = generate_variable([10, 28, 28, 1], name='W')
  variables['W'] = W
  W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=3.0, num_bits=8)
 
  x_2d = tf.reshape(x, [-1, 28, 28, 1])
  x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8)

  y = tf.multiply(x_2d, W2)
  y = tf.fake_quant_with_min_max_args(y, min=-3.0, max=9.0 ,name='ys')
  return y, variables
示例#5
0
def model(x):
    # return variables to save
    variables = {}
    x_2d = tf.reshape(x, [-1, 14, 14, 4])
    x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8)
    pad_value = [[0, 0], [1, 2], [2, 1], [0, 0]]
    y = tf.pad(x_2d, pad_value, "CONSTANT", name='ys')
    y = tf.fake_quant_with_min_max_args(y,
                                        min=-1.0,
                                        max=3.0,
                                        num_bits=8,
                                        name='ys')
    return y, variables
示例#6
0
def model(x):
    # return variables to save
    variables = {}
    x_2d = tf.reshape(x, [-1, 28, 28, 1])
    fake_x = tf.fake_quant_with_min_max_args(x_2d,
                                             min=-1.0,
                                             max=3.0,
                                             num_bits=8)
    y = tf.reduce_mean(fake_x, [-1, 1, 1, -1])
    y = tf.fake_quant_with_min_max_args(y,
                                        min=-1.0,
                                        max=3.0,
                                        num_bits=8,
                                        name='ys')
    return y, variables
示例#7
0
def model(is_train, is_train_bn):
    end_point = []
    tf_input = tf.placeholder(dtype=tf.float32,
                              shape=[None, 1, 1, 1],
                              name='tf_input')
    tf_label = tf.placeholder(dtype=tf.float32,
                              shape=[None, 1],
                              name='tf_label')
    if is_train:
        tf_input_1 = tf.fake_quant_with_min_max_args(tf_input,
                                                     input_min,
                                                     input_max,
                                                     name='x0_1')
    else:
        tf_input_1 = tf_input
    with tf.variable_scope('X1'):
        x = tf.layers.conv2d(tf_input_1, 1, 1, use_bias=True, name='X1')
        end_point.append(x)
        x = tf.layers.batch_normalization(x,
                                          training=is_train_bn,
                                          name='X1/bn',
                                          fused=False)
        end_point.append(x)
        with tf.variable_scope('hard_swish'):
            x1 = tf.nn.relu6(x) - 3
            x1 = tf.fake_quant_with_min_max_args(x1, -3, 3)
            # x1 = tf.nn.relu6(x + 3)
            # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6)
            # x = x * x1 * 0.16666667
            end_point.append(x)
    # with tf.variable_scope("X2"):
    #     x = tf.layers.conv2d(x, 1, 1, use_bias=False, name='x2')
    #     end_point.append(x)
    #     x = tf.layers.batch_normalization(x, training=is_train_bn, name='x2/bn', fused=True)
    #     end_point.append(x)
    #     with tf.variable_scope('hard_swish'):
    #         x1 = tf.nn.relu6(x + 3)
    #         # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6)
    #         x = x * x1 * 0.16666667
    #         end_point.append(x)
    x = tf.layers.flatten(x, name='Xflatten')
    x = tf.identity(x, 'Xoutput')
    end_point.append(x)
    if not is_train:
        # todo: how to ues
        x = tf.fake_quant_with_min_max_args(x, -1, 1, name='x5')
        end_point.append(x)
    return tf_input, tf_label, x, end_point
示例#8
0
def model(x):
    # return variables to save
    variables = {}
    x = tf.reshape(x, [2, 14, 56, 5])
    y = tf.slice(x, [1, 7, 21, 0], [1, 6, 1, 5])
    y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys')
    return y, variables
示例#9
0
def model(x):
    # return variables to save
    variables = {}
    x_2d = tf.reshape(x, [-1, 28, 28, 1])
    y = tf.image.resize_bilinear(x_2d, [54, 54])
    y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys')
    return y, variables
示例#10
0
def produce_low_resolution(input, k=3, blur_size=3, blur_sigma=0.5):
    """
    Produces a batch of low resolution images from the high resolution images `input`.
    The images are produced by applying a Gaussian blur with kernel size `blur_size` x `blur_size`
    and standard deviation `blur_sigma`, downsampling by `k`, and applying bicubic interpolation
    up to the size of the input batch.
    """

    n_channels = input.get_shape().as_list()[3]

    # Apply Gaussian blur
    kernel = gaussian_kernel(n_channels, blur_size, blur_sigma)
    lr = tf.nn.depthwise_conv2d_native(input, kernel, [1, 1, 1, 1], 'VALID')

    # Downsample the image
    lr = tf.nn.depthwise_conv2d_native(lr, tf.ones([1, 1, n_channels, 1]),
                                       [1, k, k, 1], 'VALID')

    # Apply bicubic interpolation
    lr = tf.image.resize_bicubic(lr, input.get_shape().as_list()[1:3])

    # Apply clipping and quantization
    lr = tf.clip_by_value(lr, 0, 1)
    lr = tf.fake_quant_with_min_max_args(lr, min=0, max=1)

    return lr
示例#11
0
def model(x):
    # return variables to save
    variables = {}
    x_2d = tf.reshape(x, [-1, 28, 28, 1])
    y = tf.nn.leaky_relu(x_2d)
    y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys')
    return y, variables
示例#12
0
def model(x):  # float-in, float-out
    variables = {}
    x_2d = tf.reshape(x, [-1, 14, 14, 4])
    # x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8)

    W = weight_variable([3, 3, 4, 32], name='W')
    b = bias_variable([32], name='b')
    variables['W'] = W
    variables['b'] = b
    W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8)
    b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8)

    x_dconv2d = tf.nn.conv2d(x_2d, W2, strides=[1, 1, 1, 1], padding='SAME')
    y = tf.nn.relu(x_dconv2d + b2)
    # 3 * 1.0 * (3 * 3 * 4) + 0.4 = 108.4
    y = tf.fake_quant_with_min_max_args(y, min=0.0, max=108.4, name='ys')
    return y, variables
示例#13
0
def dec_relu(input, enable_quantization=False):
    out = tf.nn.relu6(input)
    if (enable_quantization):
        return tf.fake_quant_with_min_max_args(out,
                                               min=0.0,
                                               max=6.0,
                                               name="fq_relu")
    return out
示例#14
0
def model(x): # float-in, float-out
  variables = {}
  x_2d = tf.reshape(x, [-1, 28, 28, 1])
  fake_x = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8)
  
  W = weight_variable([5, 5, 1, 32], name='W')
  b = bias_variable([32], name='b')
  variables['W'] = W
  variables['b'] = b
  W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8)
  # b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8)
 
  x_dconv2d = tf.nn.conv2d(fake_x, W2, strides=[1, 1, 1, 1], padding='SAME')
  y = tf.nn.relu(x_dconv2d + b)
  # 3 * 1.0 * (5 * 5 * 1) + 0.4 = 75.4
  y = tf.fake_quant_with_min_max_args(y, min=0.0, max=75.4 ,name='ys')
  return y, variables
示例#15
0
def model(x):  # float-in, float-out
    variables = {}
    y = tf.nn.softmax(x, name='ys')
    y = tf.fake_quant_with_min_max_args(y,
                                        min=0.0,
                                        max=1.0,
                                        num_bits=8,
                                        name='ys')
    return y, variables
示例#16
0
def model(x):
    # return variables to save
    variables = {}
    x_2d = tf.reshape(x, [-1, 14, 14, 4])
    W = weight_variable([3, 3, 8, 4], name='W')
    b = bias_variable([32], name='b')
    variables['W'] = W
    variables['b'] = b

    W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8)
    b2 = tf.fake_quant_with_min_max_args(W, min=-0.4, max=0.4, num_bits=8)
    y = tf.nn.conv2d_transpose(x_2d,
                               W,
                               output_shape=[10, 28, 28, 8],
                               strides=[1, 2, 2, 1],
                               padding='SAME')
    y = tf.fake_quant_with_min_max_args(y, min=0.0, max=108.4, name='ys')
    return y, variables
def main(args=None):
  assert FLAGS.train_path, 'train_path is not set.'
  assert FLAGS.output_dir, 'output_dir is not set.'

  with tf.Graph().as_default() as g:
    image_ph = tf.placeholder(
      tf.float32,
      [model.IMAGE_SIZE * model.IMAGE_SIZE * INPUT_CHANNEL],
      name='input')

    image = tf.reshape(
      image_ph,
      [model.IMAGE_SIZE, model.IMAGE_SIZE, INPUT_CHANNEL])

    # アルファチャンネルを削除
    image = image[:, :, :3]

    normalized_image = tf.multiply(image, 1.0 / 255.0)
    normalized_image = tf.expand_dims(normalized_image, axis=0)

    feature_map = model.base_layers(normalized_image, is_train=False)
    ssd_logits = model.ssd_layers(feature_map, is_train=False)
    ssd_logits = tf.reshape(ssd_logits, [-1, model.OFFSET + model.CLASSES])

    location_offset = tf.fake_quant_with_min_max_args(
      tf.nn.tanh(ssd_logits[:, :4]),
      min=-6,
      max=6,
      name='offset'
    )

    confidence = tf.fake_quant_with_min_max_args(
      tf.nn.sigmoid(ssd_logits[:, 4:]),
      min=-6,
      max=6,
      name='confidence'
    )

    saver = tf.train.Saver(tf.global_variables())
    with tf.Session() as sess:
      saver.restore(sess, FLAGS.train_path)

      _export_graph(sess)
      _export_boxes_position(feature_map, FLAGS.output_dir)
示例#18
0
def model(x):  # float-in, float-out
    variables = {}
    # fake_x = tf.fake_quant_with_min_max_args(x, min=-32.0, max=31.0, num_bits=8)
    W = weight_variable([784, 10], name='W')
    b = bias_variable([10], name='b')
    variables['W'] = W
    variables['b'] = b
    W2 = tf.fake_quant_with_min_max_args(W, min=0.0, max=256.0, num_bits=8)
    b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8)

    # 3 * 1.0 * 784 + 0.4 = 2352.4
    y = tf.matmul(x, W2)
    y = tf.nn.relu(tf.add(y, b2))
    y = tf.fake_quant_with_min_max_args(y,
                                        min=-0.0,
                                        max=2352.4,
                                        num_bits=8,
                                        name='ys')

    return y, variables
示例#19
0
    def _create_weights_node(self, weights_data):
        weights_name_scope = _get_name_scope() + "/weights"

        w_min, w_max = self._get_thresholds(weights_name_scope)

        weights_node = tf.constant(weights_data, tf.float32, name="weights")
        self._add_reference_node(weights_node)

        quantized_weights = tf.fake_quant_with_min_max_args(
            weights_node, w_min, w_max, name="quantized_weights")
        return quantized_weights
示例#20
0
    def _cell_output(self, net, output_type=None):

        output_name_scope = _get_name_scope() + "/output"

        if output_type == "fixed":
            i_min, i_max = -1, 1
        else:
            i_min, i_max = self._get_thresholds(output_name_scope)

        net = tf.fake_quant_with_min_max_args(net, i_min, i_max, name="output")

        self._add_reference_node(net)

        return net
def fake_quantize_tensor(input_tensor, quantization_bits, min_val, max_val, name):
  with tf.name_scope(name):

    # TODO: Min and Max values need to be given manually right now
    # Get the max value in the input tensor
    # max_val = sess.run(tf.reduce_max(input_tensor))
    # Get the min value in the input tensor
    # min_val = sess.run(tf.reduce_min(input_tensor))
    # if(max_val == min_val):
    #   min_val = -max_val # If biases are initialized as a constant

    # Quantization
    quantized_tensor = tf.fake_quant_with_min_max_args(input_tensor, min_val, max_val, quantization_bits, False, name)
    variable_summaries(quantized_tensor)
    return quantized_tensor
示例#22
0
    def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size,
                                  stride):
        """ Helper function to build the depth-wise separable convolution layer.
        """

        # skip pointwise by setting num_outputs=None
        depthwise_conv = slim.separable_convolution2d(inputs,
                                                      num_outputs=None,
                                                      stride=stride,
                                                      depth_multiplier=1,
                                                      kernel_size=kernel_size,
                                                      scope=sc +
                                                      '/depthwise_conv')
        if (is_training):
            bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm')
        else:
            bn = depthwise_conv
        if (activations_bits <= 8):
            bn = tf.fake_quant_with_min_max_args(bn,
                                                 min=-8,
                                                 max=8,
                                                 num_bits=activations_bits)
        pointwise_conv = slim.convolution2d(bn,
                                            num_pwc_filters,
                                            kernel_size=[1, 1],
                                            scope=sc + '/pointwise_conv')
        if (is_training):
            bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm')
        else:
            bn = pointwise_conv
        if (activations_bits <= 8):
            bn == tf.fake_quant_with_min_max_args(bn,
                                                  min=-8,
                                                  max=8,
                                                  num_bits=activations_bits)
        return bn
示例#23
0
  def quantize_test(input_tensor):
    with tf.name_scope('quantized_tensor'):
      # Get the max value in the input tensor
      max_val_index = tf.argmax(input_tensor, output_type=tf.int32)
      max_val = sess.run(input_tensor[max_val_index])
      # Get the min value in the input tensor
      min_val_index = tf.argmin(input_tensor, output_type=tf.int32)
      min_val = sess.run(input_tensor[min_val_index])

      max_val = .3
      min_val = -.3

      # Quantization
      quantized_tensor = tf.fake_quant_with_min_max_args(input_tensor, min_val, max_val, quantization_bits, False, 'quantized_tensor')
      variable_summaries(quantized_tensor)
    return quantized_tensor
示例#24
0
def _get_outputs_from_inputs(input_tensors,
                             detection_model,
                             output_collection_name,
                             pipeline_config,
                             half=False,
                             quantize=False):
    if not quantize:
        if not half:
            inputs = tf.to_float(input_tensors)
        else:
            inputs = input_tensors
    else:
        if not half:
            inputs = tf.fake_quant_with_min_max_args(
                tf.to_float(input_tensors), 0, 255)
        else:
            inputs = input_tensors
    if not half:
        preprocessed_inputs, true_image_shapes = detection_model.preprocess(
            inputs)
    else:
        preprocessed_inputs = inputs
        try:
            fixed_shape_resizer_config = pipeline_config.model.ssd.image_resizer.fixed_shape_resizer
        except:
            raise NotImplementedError(
                "Half Graph Expoter Is Only For SSD Structure!")
        true_image_shapes = [
            1, fixed_shape_resizer_config.height,
            fixed_shape_resizer_config.width, 3
        ]
    output_tensors = detection_model.predict(preprocessed_inputs,
                                             true_image_shapes, half)
    if not half:
        postprocessed_tensors = detection_model.postprocess(
            output_tensors, true_image_shapes)
        return _add_output_tensor_nodes(postprocessed_tensors,
                                        output_collection_name)
    else:
        return _add_half_output_tensor_nodes(output_tensors,
                                             output_collection_name)
def  downscale_model(input_tensor, input_tensor_lr,  scale=2):

    tensor = None
    conv_00_w = tf.get_variable("conv_00_w", [3,3,1,64], initializer=tf.contrib.layers.xavier_initializer())
    #conv_00_w = tf.get_variable("conv_00_w", [3,3,1,64], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/9)))
    conv_00_b = tf.get_variable("conv_00_b", [64], initializer=tf.constant_initializer(0))
    tensor = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input_tensor, conv_00_w, strides=[1,1,1,1], padding='SAME'), conv_00_b))



    # in each loop build a resNet block, and then cascade them.
    for i in range(5):
        tensor_shortcut = tensor
        conv_w = tf.get_variable("conv_%02d_w" % (2*i+1), [3,3,64,64], initializer=tf.contrib.layers.xavier_initializer())
        conv_b = tf.get_variable("conv_%02d_b" % (2*i+1), [64], initializer=tf.constant_initializer(0))
        tensor = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(tensor, conv_w, strides=[1,1,1,1], padding='SAME'), conv_b))


        conv_w = tf.get_variable("conv_%02d_w" % (2*i+2), [3,3,64,64], initializer=tf.contrib.layers.xavier_initializer())
        conv_b = tf.get_variable("conv_%02d_b" % (2*i+2), [64], initializer=tf.constant_initializer(0))
        tensor = tf.nn.relu( tf.add(  tf.nn.bias_add( tf.nn.conv2d(tensor, conv_w, strides=[1,1,1,1], padding='SAME' ), conv_b  ) , tensor_shortcut  )  )



    # add a down scaling conv layer, scale = 2 by default. and should converge the chanel into 1.
    conv_w = tf.get_variable("conv_%02d_w" % (19), [3,3,64,1], initializer=tf.contrib.layers.xavier_initializer())
    conv_b = tf.get_variable("conv_%02d_b" % (19), [1], initializer=tf.constant_initializer(0))
    # Here we want to set the downscaled image between (0,1),so we can make further processing.
    tensor = tf.nn.relu6( tf.add( tf.nn.bias_add(tf.nn.conv2d(tensor, conv_w, strides=[1,scale,scale,1], padding='SAME'), conv_b), input_tensor_lr) *6 )/6


    tensor = tf.fake_quant_with_min_max_args(tensor, min =0, max =1 )

    # this is the downsampled image, which will be encoded and transmitted between transmitter and receiver
    # now it's quantized andnormalized
    tensor_downsampled = tensor 

    return tensor_downsampled
示例#26
0
def main(_):
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess)
  audio_processor = input_data.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir,
      FLAGS.silence_percentage, FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))

  input_placeholder = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')
  if FLAGS.quantize:
    fingerprint_min, fingerprint_max = input_data.get_features_range(
        model_settings)
    fingerprint_input = tf.fake_quant_with_min_max_args(
        input_placeholder, fingerprint_min, fingerprint_max)
  else:
    fingerprint_input = input_placeholder

  logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=True)

  # Define loss and optimizer
  ground_truth_input = tf.placeholder(
      tf.int64, [None], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
        labels=ground_truth_input, logits=logits)
  if FLAGS.quantize:
    tf.contrib.quantize.create_training_graph(quant_delay=0)
  with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
    learning_rate_input = tf.placeholder(
        tf.float32, [], name='learning_rate_input')
    train_step = tf.train.GradientDescentOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  correct_prediction = tf.equal(predicted_indices, ground_truth_input)
  confusion_matrix = tf.confusion_matrix(
      ground_truth_input, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  with tf.get_default_graph().name_scope('eval'):
    tf.summary.scalar('cross_entropy', cross_entropy_mean)
    tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)

  saver = tf.train.Saver(tf.global_variables())

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all(scope='eval')
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')

  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))

  # Training loop.
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    training_steps_sum = 0
    for i in range(len(training_steps_list)):
      training_steps_sum += training_steps_list[i]
      if training_step <= training_steps_sum:
        learning_rate_value = learning_rates_list[i]
        break
    # Pull the audio samples we'll use for training.
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    # Run the graph with this batch of training data.
    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries,
            evaluation_step,
            cross_entropy_mean,
            train_step,
            increment_global_step,
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            dropout_prob: 0.5
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_summary, validation_accuracy, conf_matrix = sess.run(
            [merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                dropout_prob: 1.0
            })
        validation_writer.add_summary(validation_summary, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix
      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)

  set_size = audio_processor.set_size('testing')
  tf.logging.info('set_size=%d', set_size)
  total_accuracy = 0
  total_conf_matrix = None
  for i in xrange(0, set_size, FLAGS.batch_size):
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    test_accuracy, conf_matrix = sess.run(
        [evaluation_step, confusion_matrix],
        feed_dict={
            fingerprint_input: test_fingerprints,
            ground_truth_input: test_ground_truth,
            dropout_prob: 1.0
        })
    batch_size = min(FLAGS.batch_size, set_size - i)
    total_accuracy += (test_accuracy * batch_size) / set_size
    if total_conf_matrix is None:
      total_conf_matrix = conf_matrix
    else:
      total_conf_matrix += conf_matrix
  tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
  tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100,
                                                           set_size))
示例#27
0
def main(_):
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess)
  audio_processor = input_data.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir,
      FLAGS.silence_percentage, FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))

  input_placeholder = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')
  if FLAGS.quantize:
    fingerprint_min, fingerprint_max = input_data.get_features_range(
        model_settings)
    fingerprint_input = tf.fake_quant_with_min_max_args(
        input_placeholder, fingerprint_min, fingerprint_max)
  else:
    fingerprint_input = input_placeholder

  logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=True)

  # Define loss and optimizer
  ground_truth_input = tf.placeholder(
      tf.int64, [None], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
        labels=ground_truth_input, logits=logits)
  if FLAGS.quantize:
    tf.contrib.quantize.create_training_graph(quant_delay=0)
  with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
    learning_rate_input = tf.placeholder(
        tf.float32, [], name='learning_rate_input')
    train_step = tf.train.GradientDescentOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  correct_prediction = tf.equal(predicted_indices, ground_truth_input)
  confusion_matrix = tf.confusion_matrix(
      ground_truth_input, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  with tf.get_default_graph().name_scope('eval'):
    tf.summary.scalar('cross_entropy', cross_entropy_mean)
    tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)

  saver = tf.train.Saver(tf.global_variables())

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all(scope='eval')
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')

  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))

  # Training loop.
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    training_steps_sum = 0
    for i in range(len(training_steps_list)):
      training_steps_sum += training_steps_list[i]
      if training_step <= training_steps_sum:
        learning_rate_value = learning_rates_list[i]
        break
    # Pull the audio samples we'll use for training.
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    # Run the graph with this batch of training data.
    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries,
            evaluation_step,
            cross_entropy_mean,
            train_step,
            increment_global_step,
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            dropout_prob: 0.5
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_summary, validation_accuracy, conf_matrix = sess.run(
            [merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                dropout_prob: 1.0
            })
        validation_writer.add_summary(validation_summary, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix
      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)

  set_size = audio_processor.set_size('testing')
  tf.logging.info('set_size=%d', set_size)
  total_accuracy = 0
  total_conf_matrix = None
  for i in xrange(0, set_size, FLAGS.batch_size):
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    test_accuracy, conf_matrix = sess.run(
        [evaluation_step, confusion_matrix],
        feed_dict={
            fingerprint_input: test_fingerprints,
            ground_truth_input: test_ground_truth,
            dropout_prob: 1.0
        })
    batch_size = min(FLAGS.batch_size, set_size - i)
    total_accuracy += (test_accuracy * batch_size) / set_size
    if total_conf_matrix is None:
      total_conf_matrix = conf_matrix
    else:
      total_conf_matrix += conf_matrix
  tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
  tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100,
                                                           set_size))
示例#28
0
def create_rendered_images(batch_size, textures):
    backgrounds_ = tf.placeholder(tf.float32, [batch_size, None, None, 3],
                                  name='backgrounds')
    frames_ = tf.placeholder(tf.float32, [batch_size, None, None, 4],
                             name='frames')

    texture_channel_multiplicative_noise_ = tf.placeholder_with_default(
        [1., 1.], [2], name='texture_channel_multiplicative_noise')
    texture_channel_additive_noise_ = tf.placeholder_with_default(
        [0., 0.], [2], name='texture_channel_additive_noise')

    texture_pixel_multiplicative_noise_ = tf.placeholder_with_default(
        [1., 1.], [2], name='texture_pixel_multiplicative_noise')
    texture_pixel_additive_noise_ = tf.placeholder_with_default(
        [0., 0.], [2], name='texture_pixel_additive_noise')

    texture_gaussian_noise_stddev_ = tf.placeholder_with_default(
        [0., 0.], [2], name='texture_gaussian_noise_stddev')

    image_channel_multiplicative_noise_ = tf.placeholder_with_default(
        [1., 1.], [2], name='image_channel_multiplicative_noise')
    image_channel_additive_noise_ = tf.placeholder_with_default(
        [0., 0.], [2], name='image_channel_additive_noise')

    image_pixel_multiplicative_noise_ = tf.placeholder_with_default(
        [1., 1.], [2], name='image_pixel_multiplicative_noise')
    image_pixel_additive_noise_ = tf.placeholder_with_default(
        [0., 0.], [2], name='image_pixel_additive_noise')

    image_gaussian_noise_stddev_ = tf.placeholder_with_default(
        [0., 0.], [2], name='image_gaussian_noise_stddev')

    IDENTITY_KERNEL = [[0., 0., 0.], [0., 1., 0.], [0., 0., 0.]]
    image_gaussian_blur_kernel_ = tf.placeholder_with_default(
        IDENTITY_KERNEL, [None, None], name='image_gaussian_blur_kernel')
    image_gaussian_blur_kernel_ = image_gaussian_blur_kernel_[:, :, tf.newaxis,
                                                              tf.newaxis]
    image_gaussian_blur_kernel_ = tf.tile(image_gaussian_blur_kernel_,
                                          [1, 1, 3, 1])

    # TODO: This could probably be made faster by removing random elements to outside of loop
    def render_frame(frame_):
        textures_ = textures

        # Add noise to textures
        textures_ = textures_ * tf.random_uniform(
            [3], texture_channel_multiplicative_noise_[0],
            texture_channel_multiplicative_noise_[1])
        textures_ = textures_ + tf.random_uniform(
            [3], texture_channel_additive_noise_[0],
            texture_channel_additive_noise_[1])

        textures_ = textures_ * tf.random_uniform(
            [], texture_pixel_multiplicative_noise_[0],
            texture_pixel_multiplicative_noise_[1])
        textures_ = textures_ + tf.random_uniform(
            [], texture_pixel_additive_noise_[0],
            texture_pixel_additive_noise_[1])

        textures_ = textures_ + tf.random_normal(
            textures_.shape,
            stddev=tf.random_uniform([], texture_gaussian_noise_stddev_[0],
                                     texture_gaussian_noise_stddev_[1]))

        #textures_ = tf.clip_by_value(textures_, 0.0, 1.0)

        # Render
        uvf_ = frame_[..., :3]
        image_ = sample_bilinear(textures_, uvf_)

        # Composite onto background
        # FIXME: This only really works with batch_size=1
        alpha_ = frame_[..., 3:]
        image_ = image_ * alpha_ + backgrounds_[0] * (1 - alpha_)

        # Blur image
        image_ = image_[tf.newaxis, :, :, :]
        image_ = tf.nn.depthwise_conv2d(image_,
                                        image_gaussian_blur_kernel_,
                                        strides=[1, 1, 1, 1],
                                        padding='SAME')
        image_ = image_[0]

        # Blur alpha
        alpha_ = alpha_[tf.newaxis, :, :, :]
        alpha_ = tf.nn.depthwise_conv2d(
            alpha_,
            image_gaussian_blur_kernel_[:, :, :1, :],
            strides=[1, 1, 1, 1],
            padding='SAME')
        alpha_ = alpha_[0]

        # Recomposite blurred image onto background
        # FIXME: This only really works with batch_size=1
        image_ = image_ * alpha_ + backgrounds_[0] * (1 - alpha_)

        # Add noise to image
        image_ = image_ * tf.random_uniform(
            [3], image_channel_multiplicative_noise_[0],
            image_channel_multiplicative_noise_[1])
        image_ = image_ + tf.random_uniform([3],
                                            image_channel_additive_noise_[0],
                                            image_channel_additive_noise_[1])

        image_ = image_ * tf.random_uniform(
            [], image_pixel_multiplicative_noise_[0],
            image_pixel_multiplicative_noise_[1])
        image_ = image_ + tf.random_uniform([], image_pixel_additive_noise_[0],
                                            image_pixel_additive_noise_[1])

        image_ = image_ + tf.random_normal(
            tf.shape(image_),
            stddev=tf.random_uniform([], image_gaussian_noise_stddev_[0],
                                     image_gaussian_noise_stddev_[1]))

        #image_ = tf.clip_by_value(image_, 0.0, 1.0)

        return image_

    input_images_ = tf.map_fn(render_frame, frames_, dtype=(tf.float32))
    # TODO: Can we move image compositing to out of render_frame?
    # TODO: Move noising of image to out of render_frame to here

    input_images_ = tf.fake_quant_with_min_max_args(input_images_,
                                                    min=0.,
                                                    max=1.,
                                                    num_bits=8)
    input_images_ = tf.identity(input_images_, name='input_images')

    return input_images_
def main(_):
    # 可看到所有日志消息
    tf.logging.set_verbosity(tf.logging.INFO)

    # 开始一个新的tensorflow对话
    sess = tf.InteractiveSession()

    model_settings = models.prepare_model_settings(
        len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
        FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess)
    audio_processor = input_data.AudioProcessor(
        FLAGS.data_url, FLAGS.data_dir,
        FLAGS.silence_percentage, FLAGS.unknown_percentage,
        FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
        FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir)
    fingerprint_size = model_settings['fingerprint_size']
    label_count = model_settings['label_count']
    time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)

    #计算每个训练阶段的学习率。由于在训练开始时设置较高的学习率,然后在训练结束时设置较低的学习率通常是有效的,
    #因此将步骤数和学习率指定为逗号分隔的列表,以定义每个阶段的学习率。
    #例如--how_many_training_steps=10000,3000--learning_rate=0.001,0.0001
    #将总共运行13000个训练循环,前10000个循环的学习速率为0.001,最后3000个循环的学习速率为0.0001

    training_steps_list = list(
        map(int, FLAGS.how_many_training_steps.split(',')))
    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))

    input_placeholder = tf.placeholder(tf.float32, [None, fingerprint_size],
                                       name='fingerprint_input')
    if FLAGS.quantize:

        if FLAGS.preprocess == 'average':
            fingerprint_min = 0.0
            fingerprint_max = 2048.0
        elif FLAGS.preprocess == 'mfcc':
            fingerprint_min = -247.0
            fingerprint_max = 30.0
        else:
            raise Exception('Unknown preprocess mode "%s" (should be "mfcc" or'
                            ' "average")' % (FLAGS.preprocess))
        fingerprint_input = tf.fake_quant_with_min_max_args(
            input_placeholder, fingerprint_min, fingerprint_max)
    else:
        fingerprint_input = input_placeholder

    logits, dropout_prob = models.create_model(fingerprint_input,
                                               model_settings,
                                               FLAGS.model_architecture,
                                               is_training=True)

    #定义loss值和优化器
    ground_truth_input = tf.placeholder(tf.int64, [None],
                                        name='groundtruth_input')

    #或者,我们可以添加运行时检查,以确定在训练期间何时开始出现NaNs或其他数值错误症状。
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # 在图中创建反向传播和训练评估机制。
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)
    if FLAGS.quantize:
        tf.contrib.quantize.create_training_graph(quant_delay=0)
    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        learning_rate_input = tf.placeholder(tf.float32, [],
                                             name='learning_rate_input')
        train_step = tf.train.GradientDescentOptimizer(
            learning_rate_input).minimize(cross_entropy_mean)
    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.confusion_matrix(ground_truth_input,
                                           predicted_indices,
                                           num_classes=label_count)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    with tf.get_default_graph().name_scope('eval'):
        tf.summary.scalar('cross_entropy', cross_entropy_mean)
        tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # 合并所有摘要并将其写入/tmp/retrain_logs(默认情况下)
    merged_summaries = tf.summary.merge_all(scope='eval')
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')

    tf.global_variables_initializer().run()

    start_step = 1

    if FLAGS.start_checkpoint:
        models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
        start_step = global_step.eval(session=sess)

    tf.logging.info('Training from step: %d ', start_step)

    #保存 graph.pbtxt
    tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                         FLAGS.model_architecture + '.pbtxt')

    # 保存词列表。
    with gfile.GFile(
            os.path.join(FLAGS.train_dir,
                         FLAGS.model_architecture + '_labels.txt'), 'w') as f:
        f.write('\n'.join(audio_processor.words_list))

    # 训练循环.
    training_steps_max = np.sum(training_steps_list)
    for training_step in xrange(start_step, training_steps_max + 1):

        # 找出当前的学习率。
        training_steps_sum = 0
        for i in range(len(training_steps_list)):
            training_steps_sum += training_steps_list[i]
            if training_step <= training_steps_sum:
                learning_rate_value = learning_rates_list[i]
                break

        # 把我们需要用于训练的音频样本拉出来。

        train_fingerprints, train_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
            FLAGS.background_volume, time_shift_samples, 'training', sess)

        #运行这一批训练数据的图表

        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries,
                evaluation_step,
                cross_entropy_mean,
                train_step,
                increment_global_step,
            ],
            feed_dict={
                fingerprint_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                learning_rate_input: learning_rate_value,
                dropout_prob: 0.5
            })
        train_writer.add_summary(train_summary, training_step)
        tf.logging.info(
            'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
            (training_step, learning_rate_value, train_accuracy * 100,
             cross_entropy_value))

        traintxt = str(train_accuracy * 100)  # data是前面运行出的数据,先将其转为字符串才能写入
        with open(
                'E:\\speech_rocognition_demo\\method3\\tf-keywords\\result\\result_original\\train_low_latency_conv.txt',
                'a') as file_handle:
            file_handle.write(traintxt)  # 写入
            file_handle.write('\n')  # 有时放在循环里面需要自动转行,不然会覆盖上一条数据

        is_last_step = (training_step == training_steps_max)
        if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
            set_size = audio_processor.set_size('validation')
            total_accuracy = 0
            total_conf_matrix = None
            for i in xrange(0, set_size, FLAGS.batch_size):
                validation_fingerprints, validation_ground_truth = (
                    audio_processor.get_data(FLAGS.batch_size, i,
                                             model_settings, 0.0, 0.0, 0,
                                             'validation', sess))

                # 运行验证步骤并且使用’merged‘方法来获取tensorboard的训练摘要

                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        fingerprint_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })
                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(FLAGS.batch_size, set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size
                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix
            tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
            tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                            (training_step, total_accuracy * 100, set_size))

            validationtxt = str(total_accuracy *
                                100)  # data是前面运行出的数据,先将其转为字符串才能写入
            with open(
                    'E:\\speech_rocognition_demo\\method3\\tf-keywords\\result\\result_original\\validation_low_latency_conv.txt',
                    'a') as file_handle:
                file_handle.write(validationtxt)  # 写入
                file_handle.write('\n')  # 有时放在循环里面需要自动转行,不然会覆盖上一条数据

        #定期保存模型checkpoint
        if (training_step % FLAGS.save_step_interval == 0
                or training_step == training_steps_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')
            tf.logging.info('Saving to "%s-%d"', checkpoint_path,
                            training_step)
            saver.save(sess, checkpoint_path, global_step=training_step)

    set_size = audio_processor.set_size('testing')
    tf.logging.info('set_size=%d', set_size)
    total_accuracy = 0
    total_conf_matrix = None
    for i in xrange(0, set_size, FLAGS.batch_size):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
        test_accuracy, conf_matrix = sess.run(
            [evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: test_fingerprints,
                ground_truth_input: test_ground_truth,
                dropout_prob: 1.0
            })
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (test_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
            total_conf_matrix = conf_matrix
        else:
            total_conf_matrix += conf_matrix
    tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
    tf.logging.info('Final test accuracy = %.1f%% (N=%d)' %
                    (total_accuracy * 100, set_size))
def main(_):
    NUM_INPUTS = 4
    NUM_CLASSES = 9

    # the data, split between train and test sets
    x_train, y_train, x_test, y_test = generate_simulated_data()

    x_train = x_train.astype('uint8')
    x_test = x_test.astype('uint8')
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = y_train.astype('int64')
    y_test = y_test.astype('int64')

    tf.logging.set_verbosity(tf.logging.INFO)
    sess = tf.InteractiveSession()

    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_steps_list = list(
        map(int, FLAGS.how_many_training_steps.split(',')))
    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))

    input_placeholder = tf.placeholder(tf.float32, [None, NUM_INPUTS],
                                       name='graph_input')
    if FLAGS.quantize:
        input_min, input_max = 0, 256
        graph_input = tf.fake_quant_with_min_max_args(input_placeholder,
                                                      input_min, input_max)
    else:
        graph_input = input_placeholder

    logits, dropout_prob = models.create_three_fc_model(graph_input,
                                                        NUM_INPUTS,
                                                        20,
                                                        20,
                                                        NUM_CLASSES,
                                                        is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.placeholder(tf.int64, [None],
                                        name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)
    if FLAGS.quantize:
        tf.contrib.quantize.create_training_graph(quant_delay=0)
    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        learning_rate_input = tf.placeholder(tf.float32, [],
                                             name='learning_rate_input')
        train_step = tf.train.GradientDescentOptimizer(
            learning_rate_input).minimize(cross_entropy_mean)
    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.confusion_matrix(ground_truth_input,
                                           predicted_indices,
                                           num_classes=NUM_CLASSES)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    with tf.get_default_graph().name_scope('eval'):
        tf.summary.scalar('cross_entropy', cross_entropy_mean)
        tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.summary.merge_all(scope='eval')
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')

    tf.global_variables_initializer().run()

    start_step = 1
    tf.logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                         FLAGS.model_architecture + '.pbtxt')

    # Training loop.
    training_steps_max = np.sum(training_steps_list)
    for training_step in xrange(start_step, training_steps_max + 1):
        # Figure out what the current learning rate is.
        training_steps_sum = 0
        for i in range(len(training_steps_list)):
            training_steps_sum += training_steps_list[i]
            if training_step <= training_steps_sum:
                learning_rate_value = learning_rates_list[i]
                break

        # Pull the audio samples we'll use for training.
        index = (training_step * FLAGS.batch_size) % x_train.shape[0]
        train_fingerprints = x_train[index:index + FLAGS.batch_size]
        train_ground_truth = y_train[index:index + FLAGS.batch_size]
        # Run the graph with this batch of training data.
        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries,
                evaluation_step,
                cross_entropy_mean,
                train_step,
                increment_global_step,
            ],
            feed_dict={
                graph_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                learning_rate_input: learning_rate_value,
                dropout_prob: 0.5
            })
        train_writer.add_summary(train_summary, training_step)
        tf.logging.info(
            'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
            (training_step, learning_rate_value, train_accuracy * 100,
             cross_entropy_value))

        is_last_step = (training_step == training_steps_max)
        if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
            set_size = y_test.shape[0]
            total_accuracy = 0
            total_conf_matrix = None
            for i in xrange(0, set_size, FLAGS.batch_size):
                validation_fingerprints = x_test[i:i + FLAGS.batch_size]
                validation_ground_truth = y_test[i:i + FLAGS.batch_size]
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        graph_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })
                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(FLAGS.batch_size, set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size
                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix
            tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
            tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                            (training_step, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_step % FLAGS.save_step_interval == 0
                or training_step == training_steps_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')
            tf.logging.info('Saving to "%s-%d"', checkpoint_path,
                            training_step)
            saver.save(sess, checkpoint_path, global_step=training_step)
示例#31
0
x = np.random.random((1, 3))
y = np.random.random((1, 3, 3))
model.train_on_batch(x, y)
model.predict(x)
# Save tf.keras model in HDF5 format.
keras_file = "keras_model.h5"
tf.keras.models.save_model(model, keras_file)
# Convert to TensorFlow Lite model.
converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file)
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)

img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3))
const = tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.])
val = img + const
out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output")
with tf.Session() as sess:
    converter = tf.lite.TFLiteConverter.from_session(sess, [img], [out])
    converter.inference_type = tf.lite.constants.QUANTIZED_UINT8
    input_arrays = converter.get_input_arrays()
    converter.quantized_input_stats = {
        input_arrays[0]: (0., 1.)
    }  # mean, std_dev
    tflite_model = converter.convert()
    open("converted_model.tflite", "wb").write(tflite_model)

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()