예제 #1
0
    def build_graph(self, data_paths, batch_size, is_training):
        """Builds generic graph for training or eval."""
        tensors = GraphReferences()

        _, tensors.examples = util.read_examples(
            data_paths,
            batch_size,
            shuffle=is_training,
            num_epochs=None if is_training else 2)

        parsed = parse_examples(tensors.examples)

        # Build a Graph that computes predictions from the inference model.
        logits = inference(parsed['images'], self.hidden1, self.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss_value = loss(logits, parsed['labels'])

        # Add to the Graph the Ops that calculate and apply gradients.
        if is_training:
            tensors.train, tensors.global_step = training(
                loss_value, self.learning_rate)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        accuracy_updates, accuracy_op = util.accuracy(logits, parsed['labels'])

        # HYPERPARAMETER TUNING: Write the objective value.
        if not is_training:
            # Remove this if once Tensorflow 0.12 is standard.
            try:
                tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
                tf.contrib.deprecated.scalar_summary('loss', loss_op)
                tf.contrib.deprecated.scalar_summary(
                    'training/hptuning/metric', accuracy_op)
            except AttributeError:
                tf.scalar_summary('accuracy', accuracy_op)
                tf.scalar_summary('loss', loss_op)
                tf.scalar_summary('training/hptuning/metric', accuracy_op)

        tensors.metric_updates = loss_updates + accuracy_updates
        tensors.metric_values = [loss_op, accuracy_op]
        return tensors
예제 #2
0
  def build_graph(self, data_paths, batch_size, is_training):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()

    _, tensors.examples = util.read_examples(
        data_paths,
        batch_size,
        shuffle=is_training,
        num_epochs=None if is_training else 2)

    parsed = parse_examples(tensors.examples)

    # Build a Graph that computes predictions from the inference model.
    #logits = inference(parsed['images'], self.hidden1, self.hidden2)
    logits = inference(parsed['images'], self.hidden1, self.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss_value = loss(logits, parsed['labels'])

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value,
                                                    self.learning_rate)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, parsed['labels'])

    if not is_training:
      # Remove this if once Tensorflow 0.12 is standard.
      try:
        tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
        tf.contrib.deprecated.scalar_summary('loss', loss_op)
      except AttributeError:
        tf.scalar_summary('accuracy', accuracy_op)
        tf.scalar_summary('loss', loss_op)

    tensors.metric_updates = loss_updates + accuracy_updates
    tensors.metric_values = [loss_op, accuracy_op]
    return tensors

#trained_filename, model_params, words_to_ids, ids_to_words = train_attack_model(training_samples=20000, test_samples=5000, review_path = '/home/kalvin_kao/yelp_challenge_dataset/review.csv')
#generate_text(trained_filename, model_params, words_to_ids, ids_to_words)
예제 #3
0
    def build_graph(self, data_paths, batch_size, is_training):
        """Builds generic graph for training or eval."""
        tensors = GraphReferences()

        _, tensors.examples = util.read_examples(
            data_paths,
            batch_size,
            shuffle=is_training,
            num_epochs=None if is_training else 2)

        parsed = parse_examples(tensors.examples)

        # Build a Graph that computes predictions from the inference model.
        logits = inference(parsed['images'], self.hidden1, self.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss_value = loss(logits, parsed['labels'])

        # Add to the Graph the Ops that calculate and apply gradients.
        if is_training:
            tensors.train, tensors.global_step = training(
                loss_value, self.learning_rate)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        accuracy_updates, accuracy_op = util.accuracy(logits, parsed['labels'])

        if not is_training:
            # TODO(b/33420312): remove the if once 0.12 is fully rolled out to prod.
            if tf.__version__ < '0.12':
                tf.scalar_summary('accuracy', accuracy_op)
                tf.scalar_summary('loss', loss_op)
            else:
                tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
                tf.contrib.deprecated.scalar_summary('loss', loss_op)

        tensors.metric_updates = loss_updates + accuracy_updates
        tensors.metric_values = [loss_op, accuracy_op]
        return tensors
예제 #4
0
파일: model.py 프로젝트: cottrell/notebooks
  def build_graph(self, data_paths, batch_size, is_training):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()

    _, tensors.examples = util.read_examples(
        data_paths,
        batch_size,
        shuffle=is_training,
        num_epochs=None if is_training else 2)

    parsed = parse_examples(tensors.examples)

    # Build a Graph that computes predictions from the inference model.
    logits = inference(parsed['images'], self.hidden1, self.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss_value = loss(logits, parsed['labels'])

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value,
                                                    self.learning_rate)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, parsed['labels'])

    if not is_training:
      # Remove this if once Tensorflow 0.12 is standard.
      try:
        tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
        tf.contrib.deprecated.scalar_summary('loss', loss_op)
      except AttributeError:
        tf.scalar_summary('accuracy', accuracy_op)
        tf.scalar_summary('loss', loss_op)

    tensors.metric_updates = loss_updates + accuracy_updates
    tensors.metric_values = [loss_op, accuracy_op]
    return tensors
예제 #5
0
    def build_graph(self, data_paths, batch_size, graph_mod):
        """Builds generic graph for training or eval."""
        tensors = GraphReferences()
        is_training = graph_mod == GraphMod.TRAIN
        if data_paths:
            _, tensors.examples = util.read_examples(
                data_paths,
                batch_size,
                shuffle=is_training,
                num_epochs=None if is_training else 2)
        else:
            tensors.examples = tf.placeholder(tf.string,
                                              name='input',
                                              shape=(None, ))

        if graph_mod == GraphMod.PREDICT:
            inception_input, inception_embeddings = self.build_inception_graph(
            )
            # Build the Inception graph. We later add final training layers
            # to this graph. This is currently used only for prediction.
            # For training, we use pre-processed data, so it is not needed.
            embeddings = inception_embeddings
            tensors.input_jpeg = inception_input
        else:
            # For training and evaluation we assume data is preprocessed, so the
            # inputs are tf-examples.
            # Generate placeholders for examples.
            with tf.name_scope('inputs'):
                feature_map = {
                    'image_uri':
                    tf.FixedLenFeature(shape=[],
                                       dtype=tf.string,
                                       default_value=['']),
                    # Some images may have no labels. For those, we assume a default
                    # label. So the number of labels is label_count+1 for the default
                    # label.
                    'label':
                    tf.FixedLenFeature(shape=[1],
                                       dtype=tf.int64,
                                       default_value=[self.label_count]),
                    'embedding':
                    tf.FixedLenFeature(shape=[BOTTLENECK_TENSOR_SIZE],
                                       dtype=tf.float32)
                }
                parsed = tf.parse_example(tensors.examples,
                                          features=feature_map)
                labels = tf.squeeze(parsed['label'])
                uris = tf.squeeze(parsed['image_uri'])
                embeddings = parsed['embedding']

        # We assume a default label, so the total number of labels is equal to
        # label_count+1.
        all_labels_count = self.label_count + 1
        with tf.name_scope('final_ops'):
            softmax, logits = self.add_final_training_ops(
                embeddings,
                all_labels_count,
                BOTTLENECK_TENSOR_SIZE,
                dropout_keep_prob=self.dropout if is_training else None)

        # Prediction is the index of the label with the highest score. We are
        # interested only in the top score.
        prediction = tf.argmax(softmax, 1)
        tensors.predictions = [prediction, softmax, embeddings]

        if graph_mod == GraphMod.PREDICT:
            return tensors

        with tf.name_scope('evaluate'):
            loss_value = loss(logits, labels)

        # Add to the Graph the Ops that calculate and apply gradients.
        if is_training:
            tensors.train, tensors.global_step = training(loss_value)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)
            tensors.uris = uris

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        accuracy_updates, accuracy_op = util.accuracy(logits, labels)

        if not is_training:
            tf.summary.scalar('accuracy', accuracy_op)
            tf.summary.scalar('loss', loss_op)

        tensors.metric_updates = loss_updates + accuracy_updates
        tensors.metric_values = [loss_op, accuracy_op]
        return tensors
예제 #6
0
    def build_graph(self,
                    data_paths,
                    batch_size,
                    graph_mod,
                    l1_size,
                    imap,
                    omap,
                    reduce1x1,
                    dropout_prob=None):
        tensors = GraphReferences()
        is_training = graph_mod == GraphMod.TRAIN
        if data_paths:
            tensors.keys, tensors.examples = util.read_examples(
                data_paths,
                batch_size,
                shuffle=is_training,
                num_epochs=None if is_training else 2)
        else:
            tensors.examples = tf.placeholder(tf.string,
                                              name='input',
                                              shape=(None, ))

        if graph_mod != GraphMod.PREDICT:
            with tf.name_scope('data'):
                feature_map = {
                    'height': tf.FixedLenFeature(shape=1, dtype=tf.int64),
                    'kspace': tf.FixedLenFeature(shape=1, dtype=tf.string),
                    'width': tf.FixedLenFeature(shape=1, dtype=tf.int64),
                    'xspace': tf.FixedLenFeature(shape=1, dtype=tf.string)
                }
                parsed = tf.parse_example(tensors.examples,
                                          features=feature_map)
                inputs, outputs = self.decode(parsed['kspace'],
                                              batch_size,
                                              xspace=parsed['xspace'])

        if graph_mod == GraphMod.PREDICT:
            inputs = tf.placeholder(tf.float32, shape=(IMAGE_SIZE))
            tensors.inputs = inputs

        with tf.name_scope('network'):
            layer1 = self.build_input_layer(inputs,
                                            l1_size,
                                            dropout_keep_prob=dropout_prob)
            inception1 = self.build_inception_layer(layer1, imap[0], omap[0],
                                                    reduce1x1[0])
            inception2 = self.build_inception_layer(inception1, imap[1],
                                                    omap[1], reduce1x1[1])
            final = self.build_final_layer(inception2)

        if graph_mod == GraphMod.PREDICT:
            tensors.predictions = [final]
            return tensors

        with tf.name_scope('evaluate'):
            loss_value = loss(final, outputs)

        if is_training:
            tensors.train, tensors.global_step = training(loss_value)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)

        loss_updates, loss_op = util.loss(loss_value)
        #accuracy_updates, accuracy_op = util.accuracy(final, outputs)

        if not is_training:
            # tf.summary.scalar('accuracy', accuracy_op)
            tf.summary.scalar('loss', loss_op)

        tensors.metric_updates = loss_updates  # + accuracy_updates
        tensors.metric_values = [loss_op]  #, accuracy_op]
        return tensors
예제 #7
0
    def build_graph(self, data_paths, batch_size, max_time, is_training):
        """Construct the core RNNLM graph, needed for any use of the model.
        This should include:
        - Placeholders for input tensors (input_w_, initial_h_, target_y_)
        - Variables for model parameters
        - Tensors representing various intermediate states
        - A Tensor for the final state (final_h_)
        - A Tensor for the output logits (logits_), i.e. the un-normalized argument
          of the softmax(...) function in the output layer.
        - A scalar loss function (loss_)
        Your loss function should be a *scalar* value that represents the
        _average_ loss across all examples in the batch (i.e. use tf.reduce_mean,
        not tf.reduce_sum).
        You shouldn't include training or sampling functions here; you'll do
        this in BuildTrainGraph and BuildSampleGraph below.
        We give you some starter definitions for input_w_ and target_y_, as
        well as a few other tensors that might help. We've also added dummy
        values for initial_h_, logits_, and loss_ - you should re-define these
        in your code as the appropriate tensors.
        See the in-line comments for more detail.
        """
        tensors = GraphReferences()
        to_pass = GraphIntermediates()

        #_, tensors.examples = util.read_examples(
        #data_paths,
        #batch_size,
        #shuffle=is_training,
        #num_epochs=None if is_training else 2)

        # Input ids, with dynamic shape depending on input.
        # Should be shape [batch_size, max_time] and contain integer word indices.
        self.input_w_ = tf.placeholder(tf.int32, [None, None], name="w")
        self.initial_h_ = None
        self.final_h_ = None
        # Overwrite this with an actual Tensor of shape
        # [batch_size, max_time, V].
        self.logits_ = None
        #tf.placeholder(tf.int32, [self.batch_size_, self.max_time_, self.V], name="logits")
        #tf.Variable(tf.random_normal([self.batch_size_, self.max_time_, self.V]), name="logits")

        # Should be the same shape as inputs_w_
        self.target_y_ = tf.placeholder(tf.int32, [None, None], name="y")
        #tf.placeholder(tf.int32, [None, None], name="y")

        # Replace this with an actual loss function
        self.loss_ = None
        #tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels_, logits=self.logits_),0)#don't forget last parameter

        # Get dynamic shape info from inputs
        with tf.name_scope("batch_size"):
            self.batch_size_ = tf.shape(self.input_w_)[0]
        with tf.name_scope("max_time"):
            self.max_time_ = tf.shape(self.input_w_)[1]

        #self.batch_size_ = batch_size
        #self.max_time_ = max_time
        #_, tensors.examples = util.read_examples(
        #data_paths,
        #batch_size=self.batch_size_,
        #shuffle=is_training,
        #num_epochs=None if is_training else 2)

        # Get sequence length from input_w_.
        # TL;DR: pass this to dynamic_rnn.
        # This will be a vector with elements ns[i] = len(input_w_[i])
        # You can override this in feed_dict if you want to have different-length
        # sequences in the same batch, although you shouldn't need to for this
        # assignment.
        self.ns_ = tf.tile([self.max_time_], [
            self.batch_size_,
        ], name="ns")  #update this for project

        with tf.name_scope("embedding_layer"):
            self.W_in_ = tf.get_variable(
                "W_in",
                shape=[self.V, self.H],
                initializer=tf.random_uniform_initializer(minval=-1.0,
                                                          maxval=1.0))
            self.x_ = tf.nn.embedding_lookup(self.W_in_, self.input_w_)

        # Construct RNN/LSTM cell and recurrent layer.
        with tf.name_scope("recurrent_layer"):
            self.cell_ = MakeFancyRNNCell(self.H, self.dropout_keep_prob_,
                                          self.num_layers)
            self.initial_h_ = self.cell_.zero_state(self.batch_size_,
                                                    tf.float32)
            self.outputs_, self.final_h_ = tf.nn.dynamic_rnn(
                self.cell_,
                inputs=self.x_,
                sequence_length=self.ns_,
                initial_state=self.initial_h_,
                dtype=tf.float32)

        with tf.name_scope("softmax_output_layer"):
            self.W_out_ = tf.get_variable(
                "W_out",
                shape=[self.H, self.V],
                initializer=tf.random_uniform_initializer(minval=-1.0,
                                                          maxval=1.0))

            self.b_out_ = tf.get_variable("b_out",
                                          shape=[
                                              self.V,
                                          ],
                                          initializer=tf.zeros_initializer())

            self.logits_ = tf.add(matmul3d(self.outputs_, self.W_out_),
                                  self.b_out_,
                                  name="logits")

        ## Loss computation (true loss, for prediction)
        #with tf.name_scope("loss_computation"):
        #per_example_loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.target_y_,
        #logits=self.logits_,
        #name="per_example_loss")
        #self.loss_ = tf.reduce_mean(per_example_loss_, name="loss")
        with tf.name_scope("loss_computation"):
            per_example_loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.target_y_,
                logits=self.logits_,
                name="per_example_loss")
            self.loss_ = tf.reduce_mean(per_example_loss_, name="loss")

        # Add to the Graph the Ops that calculate and apply gradients.
        # Replace this with an actual training op
        self.train_step_ = None
        # Replace this with an actual loss function
        self.train_loss_ = None

        with tf.name_scope("training_loss_function"):
            per_example_train_loss_ = tf.nn.sampled_softmax_loss(
                weights=tf.transpose(self.W_out_),
                biases=self.b_out_,
                labels=tf.reshape(self.target_y_,
                                  [self.batch_size_ * self.max_time_, 1]),
                inputs=tf.reshape(self.outputs_,
                                  [self.batch_size_ * self.max_time_, self.H]),
                num_sampled=self.softmax_ns,
                num_classes=self.V,
                name="per_example_sampled_softmax_loss")
            #partition_strategy="div" ???
            self.train_loss_ = tf.reduce_mean(per_example_train_loss_,
                                              name="sampled_softmax_loss")

        with tf.name_scope("optimizer_and_training_op"):
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer_ = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate_)
            gradients, v = zip(*optimizer_.compute_gradients(self.train_loss_))
            gradients, _ = tf.clip_by_global_norm(gradients,
                                                  self.max_grad_norm_)
            self.train_step_ = optimizer_.apply_gradients(
                zip(gradients, v), global_step=global_step)

        self.pred_samples_ = None

        with tf.name_scope("sampling_ops"):
            self.pred_samples_ = tf.multinomial(tf.reshape(
                self.logits_, [-1, self.logits_.get_shape()[-1]]),
                                                1,
                                                name="pred_samples")
            self.pred_samples_ = tf.reshape(
                self.pred_samples_, [self.batch_size_, self.max_time_, 1])

        if is_training:
            tensors.train = self.train_step_
            tensors.global_step = global_step
            loss_value = self.train_loss_
            #use_dropout = True
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)
            loss_value = self.loss_
            #use_dropout = False
            #tensors.train = tf.no_op()

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        #accuracy_updates, accuracy_op = util.accuracy(self.logits_, parsed['labels'])

        if not is_training:
            # Remove this if once Tensorflow 0.12 is standard.
            try:
                #tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
                tf.contrib.deprecated.scalar_summary('loss', loss_op)
            except AttributeError:
                #tf.scalar_summary('accuracy', accuracy_op)
                tf.scalar_summary('loss', loss_op)

        #tensors.metric_updates = loss_updates + accuracy_updates
        #tensors.metric_values = [loss_op, accuracy_op]
        tensors.metric_updates = loss_updates
        tensors.metric_values = [loss_op]
        to_pass.loss = loss_value
        to_pass.final_h = self.final_h_
        to_pass.inital_h = self.initial_h_
        return tensors, to_pass
예제 #8
0
  def build_graph(self, data_paths, batch_size, graph_mod):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()
    is_training = graph_mod == GraphMod.TRAIN
    if data_paths:
      tensors.keys, tensors.examples = util.read_examples(
          data_paths,
          batch_size,
          shuffle=is_training,
          num_epochs=None if is_training else 2)
    else:
      tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,))

    if graph_mod == GraphMod.PREDICT:
      inception_input, inception_embeddings = self.build_inception_graph()
      # Build the Inception graph. We later add final training layers
      # to this graph. This is currently used only for prediction.
      # For training, we use pre-processed data, so it is not needed.
      embeddings = inception_embeddings
      tensors.input_jpeg = inception_input
    else:
      # For training and evaluation we assume data is preprocessed, so the
      # inputs are tf-examples.
      # Generate placeholders for examples.
      with tf.name_scope('inputs'):
        feature_map = {
            'image_uri':
                tf.FixedLenFeature(
                    shape=[], dtype=tf.string, default_value=['']),
            # Some images may have no labels. For those, we assume a default
            # label. So the number of labels is label_count+1 for the default
            # label.
            'label':
                tf.FixedLenFeature(
                    shape=[1], dtype=tf.int64,
                    default_value=[self.label_count]),
            'embedding':
                tf.FixedLenFeature(
                    shape=[BOTTLENECK_TENSOR_SIZE], dtype=tf.float32)
        }
        parsed = tf.parse_example(tensors.examples, features=feature_map)
        labels = tf.squeeze(parsed['label'])
        uris = tf.squeeze(parsed['image_uri'])
        embeddings = parsed['embedding']

    # We assume a default label, so the total number of labels is equal to
    # label_count+1.
    all_labels_count = self.label_count + 1
    with tf.name_scope('final_ops'):
      softmax, logits = self.add_final_training_ops(
          embeddings,
          all_labels_count,
          BOTTLENECK_TENSOR_SIZE,
          dropout_keep_prob=self.dropout if is_training else None)

    # Prediction is the index of the label with the highest score. We are
    # interested only in the top score.
    prediction = tf.argmax(softmax, 1)
    tensors.predictions = [prediction, softmax, embeddings]

    if graph_mod == GraphMod.PREDICT:
      return tensors

    with tf.name_scope('evaluate'):
      loss_value = loss(logits, labels)

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, labels)

    if not is_training:
      tf.summary.scalar('accuracy', accuracy_op)
      tf.summary.scalar('loss', loss_op)

    tensors.metric_updates = loss_updates + accuracy_updates
    tensors.metric_values = [loss_op, accuracy_op]
    return tensors
예제 #9
0
  def build_graph(self, data_paths, batch_size, graph_mod):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()
    is_training = graph_mod == GraphMod.TRAIN
    tf.keras.backend.set_learning_phase(1 if is_training else 0)
    if data_paths:
      tensors.keys, tensors.examples = util.read_examples(
          data_paths,
          batch_size,
          shuffle=is_training,
          num_epochs=None if is_training else 2)
    else:
      tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,))

    if graph_mod == GraphMod.PREDICT:
      inception_input, inception_embeddings = self.build_inception_graph()
      image_embeddings = inception_embeddings

      title_embeddings = tf.placeholder(tf.float32, shape=[None, TITLE_EMBEDDING_SIZE])
      title_words_count = tf.placeholder(tf.int64, shape=[None])
      content_embeddings = tf.placeholder(tf.float32, shape=[None, CONTENT_EMBEDDING_SIZE])
      content_words_count = tf.placeholder(tf.int64, shape=[None])

      title_word_chars = tf.placeholder(tf.string, shape=[None, TITLE_WORD_CHARS_SIZE])
      content_word_chars = tf.placeholder(tf.string, shape=[None, CONTENT_WORD_CHARS_SIZE])
      title_word_char_lengths = tf.placeholder(tf.int64, shape=[None, TITLE_WORD_SIZE])
      content_word_char_lengths = tf.placeholder(tf.int64, shape=[None, CONTENT_WORD_SIZE])

      category_ids = tf.placeholder(tf.int64, shape=[None])
      price = tf.placeholder(tf.int64, shape=[None])
      images_count = tf.placeholder(tf.int64, shape=[None])
      recent_articles_count = tf.placeholder(tf.int64, shape=[None])
      title_length = tf.placeholder(tf.int64, shape=[None])
      content_length = tf.placeholder(tf.int64, shape=[None])
      blocks_inline = tf.placeholder(tf.string, shape=[None])
      username_chars = tf.placeholder(tf.string, shape=[None, USERNAME_CHAR_SIZE])
      username_length = tf.placeholder(tf.int64, shape=[None])
      created_at_ts = tf.placeholder(tf.int64, shape=[None])
      offerable = tf.placeholder(tf.int64, shape=[None])

      tensors.input_image = inception_input
      tensors.input_title = title_embeddings
      tensors.input_title_words_count = title_words_count
      tensors.input_content = content_embeddings
      tensors.input_content_words_count = content_words_count
      tensors.input_category_id = category_ids
      tensors.input_price = price
      tensors.input_images_count = images_count
      tensors.input_recent_articles_count = recent_articles_count
      tensors.input_title_length = title_length
      tensors.input_content_length = content_length
      tensors.input_blocks_inline = blocks_inline
      tensors.input_username_chars = username_chars
      tensors.input_username_length = username_length
      tensors.input_created_at_ts = created_at_ts
      tensors.input_offerable = offerable
      tensors.input_title_word_chars = title_word_chars
      tensors.input_content_word_chars = content_word_chars
      tensors.input_title_word_char_lengths = title_word_char_lengths
      tensors.input_content_word_char_lengths = content_word_char_lengths

      username_chars = tf.reshape(username_chars, [-1, USERNAME_CHAR_SIZE])
    else:
      # For training and evaluation we assume data is preprocessed, so the
      # inputs are tf-examples.
      # Generate placeholders for examples.
      with tf.name_scope('inputs'):
        feature_map = {
            'id':
                tf.FixedLenFeature(
                    shape=[], dtype=tf.string, default_value=['']),
            # Some images may have no labels. For those, we assume a default
            # label. So the number of labels is label_count+1 for the default
            # label.
            'label':
                tf.FixedLenFeature(
                    shape=[1], dtype=tf.int64,
                    default_value=[self.label_count]),
            'embedding':
                tf.FixedLenFeature(
                    shape=[BOTTLENECK_TENSOR_SIZE], dtype=tf.float32),
            'title_embedding':
                tf.FixedLenFeature(
                    shape=[TITLE_EMBEDDING_SIZE], dtype=tf.float32),
            'title_words_count':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'content_embedding':
                tf.FixedLenFeature(
                    shape=[CONTENT_EMBEDDING_SIZE], dtype=tf.float32),
            'content_words_count':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'category_id':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'price':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'images_count':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'recent_articles_count':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'title_length':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'content_length':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'blocks_inline':
                tf.FixedLenFeature(shape=[], dtype=tf.string),
            'username_chars':
                tf.FixedLenFeature(shape=[USERNAME_CHAR_SIZE], dtype=tf.string),
            'username_length':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'created_at_ts':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'offerable':
                tf.FixedLenFeature(shape=[], dtype=tf.int64),
            'title_word_chars':
                tf.FixedLenFeature(shape=[TITLE_WORD_CHARS_SIZE], dtype=tf.string),
            'content_word_chars':
                tf.FixedLenFeature(shape=[CONTENT_WORD_CHARS_SIZE], dtype=tf.string),
            'title_word_char_lengths':
                tf.FixedLenFeature(shape=[TITLE_WORD_SIZE], dtype=tf.int64),
            'content_word_char_lengths':
                tf.FixedLenFeature(shape=[CONTENT_WORD_SIZE], dtype=tf.int64),
        }
        parsed = tf.parse_example(tensors.examples, features=feature_map)
        labels = tf.squeeze(parsed['label'])
        tensors.labels = labels
        tensors.ids = tf.squeeze(parsed['id'])
        image_embeddings = parsed['embedding']
        title_embeddings = parsed['title_embedding']
        title_words_count = parsed['title_words_count']
        content_embeddings = parsed['content_embedding']
        content_words_count = parsed['content_words_count']
        category_ids = parsed['category_id']
        price = parsed['price']
        images_count = parsed['images_count']
        recent_articles_count = parsed['recent_articles_count']
        title_length = parsed['title_length']
        content_length = parsed['content_length']
        blocks_inline = parsed['blocks_inline']
        username_chars = parsed['username_chars']
        username_length = parsed['username_length']
        created_at_ts = parsed['created_at_ts']
        offerable = parsed['offerable']
        title_word_chars = parsed['title_word_chars']
        content_word_chars = parsed['content_word_chars']
        title_word_char_lengths = parsed['title_word_char_lengths']
        content_word_char_lengths = parsed['content_word_char_lengths']

    dropout_keep_prob = self.dropout if is_training else None
    if self.rnn_type == 'LSTM':
        if tf.test.gpu_device_name():
            base_cell = tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell
        else:
            base_cell = tf.contrib.rnn.BasicLSTMCell
    else:
        if tf.test.gpu_device_name():
            base_cell = tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell
        else:
            base_cell = tf.contrib.rnn.GRUCell

    def dropout(x, keep_prob):
        if keep_prob:
            return tf.nn.dropout(x, keep_prob)
        return x

    if self.args.l2_reg_scale > 0.:
        regularizer = tf.contrib.layers.l2_regularizer(self.args.l2_reg_scale)
    else:
        regularizer = None

    def dense(x, units):
        for unit in units:
            if self.activation == 'maxout':
                x = layers.fully_connected(x, unit, activation_fn=None,
                        weights_regularizer=regularizer)
                x = tf.contrib.layers.maxout(x, unit)
                x = tf.reshape(x, [-1, unit])
            elif self.activation == 'none':
                x = layers.fully_connected(x, unit,
                        weights_regularizer=regularizer,
                        normalizer_fn=tf.contrib.layers.batch_norm,
                        normalizer_params={'is_training': is_training})
            else:
                x = layers.fully_connected(x, unit, weights_regularizer=regularizer)
            x = dropout(x, dropout_keep_prob)
        return x

    def shallow_and_wide_cnn(inputs, filters, kernel_sizes):
        outputs = []
        for kernel_size in kernel_sizes:
            conv = tf.layers.conv1d(inputs, filters, kernel_size, padding="same",
                    kernel_regularizer=regularizer)
            conv = tf.layers.batch_normalization(conv, training=is_training)
            conv = tf.nn.relu(conv)
            conv = GlobalMaxPooling1D()(conv)
            outputs.append(conv)
        output = tf.concat(outputs, 1)
        return dropout(output, dropout_keep_prob)

    def get_word_chars(table, char_embedding, word_chars, char_lengths, word_size):
        word_chars = tf.reshape(word_chars, [-1, word_size, WORD_CHAR_SIZE])
        char_ids = table.lookup(word_chars)
        x = char_embedding(char_ids)
        mask = tf.sequence_mask(char_lengths, WORD_CHAR_SIZE, dtype=tf.float32)
        mask = tf.expand_dims(mask, 3)  # [batch, seq_len, char_dim, 1]
        x = x * mask
        x = tf.reshape(x, [-1, WORD_CHAR_SIZE, CHAR_DIM])   # [batch * word_size, word_char_size, char_dim]
        if self.args.word_char_type == 'cnn':
            filters = 16
            output = shallow_and_wide_cnn(x, filters, [1,2,3])
            last_states = output
        else:
            length = tf.reshape(char_lengths, [-1])
            outputs, last_states = stack_bidirectional_dynamic_rnn(x, [CHAR_DIM],
                    length, dropout_keep_prob=dropout_keep_prob,
                    cell_wrapper=self.rnn_cell_wrapper,
                    variational_recurrent=self.variational_recurrent,
                    base_cell=base_cell,
                    is_training=is_training)
        return tf.reshape(last_states, [-1, word_size, CHAR_DIM*2]) # [batch, word_size, char_dim*2]

    if self.args.word_char_type != 'none':
        with tf.variable_scope("word_chars", reuse=tf.AUTO_REUSE):
            table = tf.contrib.lookup.index_table_from_tensor(
                    mapping=tf.constant(self.text_chars),
                    default_value=len(self.text_chars))
            char_dict_size = len(self.text_chars) + 1 # add unknown char
            char_embedding = Embedding(char_dict_size, CHAR_DIM)
            title_word_chars = get_word_chars(table, char_embedding,
                    title_word_chars, title_word_char_lengths, TITLE_WORD_SIZE)
            content_word_chars = get_word_chars(table, char_embedding,
                    content_word_chars, content_word_char_lengths, CONTENT_WORD_SIZE)

    with tf.variable_scope("username"):
        table = tf.contrib.lookup.index_table_from_tensor(
                mapping=tf.constant(self.username_chars),
                default_value=len(self.username_chars))
        char_ids = table.lookup(username_chars)
        char_dict_size = len(self.username_chars) + 1 # add unknown char
        x = Embedding(char_dict_size, CHAR_DIM)(char_ids)
        mask = tf.sequence_mask(username_length, USERNAME_CHAR_SIZE, dtype=tf.float32)
        x = x * tf.expand_dims(mask, 2)

        if self.username_type == 'dense':
            username = tf.reshape(x, [-1, USERNAME_CHAR_SIZE * CHAR_DIM])
            username = dense(username, [30, 30])
        elif self.username_type == 'cnn':
            def conv_username(x, filters):
                k3 = tf.layers.conv1d(x, filters, 3)
                k3 = tf.nn.relu(k3)
                k3 = tf.layers.max_pooling1d(k3, 3, 3)
                k3 = tf.layers.conv1d(k3, filters, 3)
                k3 = tf.nn.relu(k3)

                k2 = tf.layers.conv1d(x, filters, 2)
                k2 = tf.nn.relu(k2)
                k2 = tf.layers.max_pooling1d(k2, 2, 2)
                k2 = tf.layers.conv1d(k2, filters, 2, strides=2)
                k2 = tf.nn.relu(k2)
                k2 = tf.layers.max_pooling1d(k2, 2, 2)

                k1 = tf.layers.conv1d(x, filters, 1)
                k1 = tf.nn.relu(k1)
                k1 = tf.layers.max_pooling1d(k1, 3, 3)
                k1 = tf.layers.conv1d(k1, filters, 2, strides=2)
                k1 = tf.nn.relu(k1)
                k1 = tf.layers.max_pooling1d(k1, 2, 2)

                x = tf.concat([k1, k2, k3], 2)
                x = tf.reshape(x, [-1, filters * 3])
                return tf.layers.batch_normalization(x, training=is_training)

            filters = 10
            #username = shallow_and_wide_cnn(x, filters, [1,2,3])
            username = conv_username(x, filters)
        elif self.username_type == 'rnn':
            outputs, last_states = stack_bidirectional_dynamic_rnn(x, [CHAR_DIM],
                    username_length, dropout_keep_prob=dropout_keep_prob,
                    cell_wrapper=self.rnn_cell_wrapper,
                    variational_recurrent=self.variational_recurrent,
                    base_cell=base_cell,
                    is_training=is_training)
            username = last_states
        elif self.username_type == 'none':
            username = None
        else:
            raise Exception('Invaild username_type: %s' % self.username_type)


    with tf.variable_scope("user"):
        recent_articles_count = tf.minimum(recent_articles_count, 300)
        recent_articles_count = tf.expand_dims(recent_articles_count, 1)
        recent_articles_count = tf.to_int32(recent_articles_count)
        blocks = blocks_inline_to_matrix(blocks_inline)
        blocks = tf.minimum(blocks, 50)

        user = tf.concat([recent_articles_count#, blocks
            ], 1)
        user = tf.cast(user, tf.float32)
        user = tf.layers.batch_normalization(user, training=is_training)
        user = dropout(user, dropout_keep_prob)

    with tf.variable_scope("category"):
        category_ids = tf.minimum(category_ids - 1, TOTAL_CATEGORIES_COUNT - 1)
        category = Embedding(TOTAL_CATEGORIES_COUNT, 10)(category_ids)
        category = dropout(category, dropout_keep_prob)

    with tf.variable_scope("continuous"):
        price = tf.minimum(price, 1000000000)
        title_length = tf.minimum(title_length, 100)
        content_length = tf.minimum(content_length, 3000)
        created_time = tf.mod(created_at_ts, DAY_TIME)
        day = tf.mod(created_at_ts / DAY_TIME, 7)

        continuous = tf.stack([price, images_count, title_length,
            content_length#, offerable, created_time, day
            ], 1)
        continuous = tf.cast(continuous, tf.float32)
        continuous = tf.concat([continuous, tf.square(continuous)], 1)
        continuous = tf.layers.batch_normalization(continuous, training=is_training)
        continuous = dropout(continuous, dropout_keep_prob)

    with tf.variable_scope("image"):
        image_embeddings = dense(image_embeddings, [256])

    with tf.variable_scope('bunch'):
      bunch = tf.concat([image_embeddings, category, continuous, user], 1)
      if self.username_type != 'none':
          bunch = tf.concat([bunch, username], 1)

    if self.args.word_char_type != 'none':
        word_dim = CHAR_WORD_DIM
    else:
        word_dim = WORD_DIM

    with tf.variable_scope('title'):
      initial_state = dense(bunch, [word_dim*2])
      layer_sizes = [word_dim * (2**i) for i in range(max(1, self.rnn_layers_count-1))]
      title_embeddings = tf.reshape(title_embeddings, [-1, TITLE_WORD_SIZE, WORD_DIM])
      if self.args.word_char_type != 'none':
          title_embeddings = tf.concat([title_embeddings, title_word_chars], -1)
      title_outputs, title_last_states = stack_bidirectional_dynamic_rnn(title_embeddings, layer_sizes,
              title_words_count, initial_state=initial_state,
              cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent,
              base_cell=base_cell, dropout_keep_prob=dropout_keep_prob, is_training=is_training)

    with tf.variable_scope('content'):
      bunch = tf.concat([bunch, title_last_states], 1)
      initial_state = dense(bunch, [192, word_dim*2])

      layer_sizes = [word_dim * (2**i) for i in range(self.rnn_layers_count)]
      content_embeddings = tf.reshape(content_embeddings, [-1, CONTENT_WORD_SIZE, WORD_DIM])
      if self.args.word_char_type != 'none':
          content_embeddings = tf.concat([content_embeddings, content_word_chars], -1)
      content_outputs, content_last_states = stack_bidirectional_dynamic_rnn(content_embeddings, layer_sizes,
              content_words_count, initial_state=initial_state,
              cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent,
              base_cell=base_cell, dropout_keep_prob=dropout_keep_prob, is_training=is_training)

    with tf.variable_scope('final_ops'):
      hidden = tf.concat([bunch, content_last_states], 1)
      if self.final_layers_count > 0:
          hidden = dense(hidden, [192] + [64] * (self.final_layers_count-1))
      softmax, logits = self.add_final_training_ops(hidden, self.label_count)

    # Prediction is the index of the label with the highest score. We are
    # interested only in the top score.
    prediction = tf.argmax(logits, 1)
    tensors.predictions = [prediction, softmax]

    if graph_mod == GraphMod.PREDICT:
      return tensors

    def is_l2_var_name(name):
        for token in ['bias', 'table', 'BatchNorm']:
            if token in name:
                return False
        return True

    with tf.name_scope('evaluate'):
      loss_value = loss(logits, labels)
      #l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if is_l2_var_name(v.name) ])
      #loss_value += l2_loss * 0.001

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, labels)

    all_precision_op, all_precision_update = tf.metrics.precision(labels, prediction)
    all_recall_op, all_recall_update = tf.metrics.recall(labels, prediction)

    precision = {'ops': [], 'updates': []}
    recall = {'ops': [], 'updates': []}

    with tf.name_scope('metrics'):
        for i in range(self.label_count):
            op, update = tf.metrics.recall_at_k(labels, logits, 1, class_id=i)
            recall['ops'].append(op)
            recall['updates'].append(update)
            op, update = tf.metrics.precision_at_k(labels, logits, 1, class_id=i)
            precision['ops'].append(op)
            precision['updates'].append(update)

    if not is_training:
      tf.summary.scalar('accuracy', accuracy_op, family='general')
      tf.summary.scalar('loss', loss_op, family='general')
      tf.summary.scalar('precision', all_precision_op, family='general')
      tf.summary.scalar('recall', all_recall_op, family='general')
      for i in range(self.label_count):
          label_name = self.labels[i]
          tf.summary.scalar('%s' % label_name, recall['ops'][i], family='recall')
          tf.summary.scalar('%s' % label_name, precision['ops'][i], family='precision')

    tensors.metric_updates = loss_updates + accuracy_updates + \
            [all_precision_update, all_recall_update] + \
            recall['updates'] + precision['updates']
    tensors.metric_values = [loss_op, accuracy_op, all_precision_op, all_recall_op]
    return tensors
    def build_graph(self, data_paths, batch_size, graph_mod):
        """Builds generic graph for training or eval."""
        tensors = GraphReferences()
        is_training = graph_mod == GraphMod.TRAIN
        if data_paths:
            tensors.keys, tensors.examples = util.read_examples(
                data_paths,
                batch_size,
                shuffle=is_training,
                num_epochs=None if is_training else 2)
        else:
            logging.info("No data path")
            tensors.examples = tf.placeholder(tf.string,
                                              name='input',
                                              shape=(None, ))

        if graph_mod == GraphMod.PREDICT:
            pass
        else:
            # For training and evaluation we assume data is preprocessed, so the
            # inputs are tf-examples.
            # Generate placeholders for examples.
            with tf.name_scope('inputs'):
                feature_map = {
                    'image_uri':
                    tf.FixedLenFeature(shape=[],
                                       dtype=tf.string,
                                       default_value=['']),
                    'image_bytes':
                    tf.FixedLenFeature(shape=[],
                                       dtype=tf.string,
                                       default_value=['']),
                    'label':
                    tf.FixedLenFeature(shape=[1],
                                       dtype=tf.int64,
                                       default_value=[self.label_count])
                }
                #tensors.examples = tf.Print(tensors.examples, [tf.shape(tensors.examples)], message="Parsing examples: ")
                parsed = tf.parse_example(tensors.examples,
                                          features=feature_map)
                labels = tf.squeeze(parsed['label'])
                uris = tf.squeeze(parsed['image_uri'])
                images_str_tensor = parsed['image_bytes']

            def decode_and_resize(image_str_tensor):
                """Decodes jpeg string, resizes it and returns a uint8 tensor."""
                image = tf.image.decode_jpeg(image_str_tensor, channels=1)

                # Note resize expects a batch_size, but tf_map supresses that index,
                # thus we have to expand then squeeze.  Resize returns float32 in the
                # range [0, uint8_max]
                """
          image = tf.expand_dims(image, 0)
          image = tf.image.resize_bilinear(
              image, [height, width], align_corners=False)*/
          image = tf.squeeze(image, squeeze_dims=[0])
          """

                image = tf.cast(image, dtype=tf.uint8)

                # convert_image_dtype, also scales [0, uint8_max] -> [0 ,1).
                return tf.image.convert_image_dtype(image, dtype=tf.float32)

        #images_str_tensor = tf.Print(images_str_tensor, [tf.shape(images_str_tensor)], message="Decoding images: ")
        images = tf.map_fn(decode_and_resize,
                           images_str_tensor,
                           back_prop=False,
                           dtype=tf.float32)

        # We assume a default label, so the total number of labels is equal to
        # label_count+1.
        all_labels_count = self.label_count + 1

        with tf.name_scope('model'):

            fc_padding = 'VALID'
            with tf.variable_scope('model', 'vgg_16', [images]) as sc:
                end_points_collection = sc.original_name_scope + '_end_points'
                # Collect outputs for conv2d, fully_connected and max_pool2d.
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
                    # images = tf.Print(images, [tf.shape(images)], message="Shape of input: ", summarize=4)
                    net = slim.repeat(images,
                                      2,
                                      slim.conv2d,
                                      64, [3, 3],
                                      scope='conv1')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='conv1')
                    net = slim.max_pool2d(net, [2, 2], scope='pool1')
                    net = slim.repeat(net,
                                      2,
                                      slim.conv2d,
                                      128, [3, 3],
                                      scope='conv2')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='conv2')
                    net = slim.max_pool2d(net, [2, 2], scope='pool2')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      256, [3, 3],
                                      scope='conv3')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='conv3')
                    net = slim.max_pool2d(net, [2, 2], scope='pool3')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope='conv4')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='conv4')
                    net = slim.max_pool2d(net, [2, 2], scope='pool4')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope='conv5')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='conv5')
                    net = slim.max_pool2d(net, [2, 2], scope='pool5')

                    # Use conv2d instead of fully_connected layers.
                    net = slim.conv2d(net,
                                      4096, [7, 7],
                                      padding=fc_padding,
                                      scope='fc6')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='fc6')
                    net = slim.dropout(net,
                                       0.5,
                                       is_training=True,
                                       scope='dropout6')
                    net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='fc7')
                    # Convert end_points_collection into a end_point dict.
                    end_points = slim.utils.convert_collection_to_dict(
                        end_points_collection)

                    net = slim.dropout(net,
                                       0.5,
                                       is_training=True,
                                       scope='dropout7')
                    net = slim.conv2d(net,
                                      all_labels_count, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='fc8')
                    #net = tf.Print(net, [tf.shape(net)], summarize=4, message='fc8')

                    net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                    end_points[sc.name + '/fc8'] = net
        logits = net
        softmax = tf.nn.softmax(logits)
        #softmax = tf.Print(softmax, [tf.shape(softmax)], summarize=4, message='softmax')
        # Prediction is the index of the label with the highest score. We are
        # interested only in the top score.
        prediction = tf.argmax(softmax, 1)
        tensors.predictions = [prediction, softmax, images]

        if graph_mod == GraphMod.PREDICT:
            return tensors

        with tf.name_scope('evaluate'):
            loss_value = loss(logits, labels)

        # Add to the Graph the Ops that calculate and apply gradients.
        if is_training:
            tensors.train, tensors.global_step = training(loss_value)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        accuracy_updates, accuracy_op = util.accuracy(logits, labels)

        if not is_training:
            #accuracy_op = tf.Print(accuracy_op, [accuracy_op], message="Accuracy")
            #loss_op = tf.Print(loss_op, [loss_op], message="Loss")
            tf.summary.scalar('accuracy', accuracy_op)
            tf.summary.scalar('loss', loss_op)

        tensors.metric_updates = loss_updates + accuracy_updates
        tensors.metric_values = [loss_op, accuracy_op]
        return tensors
예제 #11
0
def run_training():
    '''train the Neural Network'''
    # sanity check
    assert (FLAGS.input_data_type == 'float' or FLAGS.input_data_type == 'int')
    assert (FLAGS.output_data_type == 'float'
            or FLAGS.output_data_type == 'int')
    # import the dataset
    data_sets = dataset.Datasets(FLAGS.data_dir, FLAGS.separate_file,
                                 FLAGS.input_data_type, FLAGS.output_data_type)
    #for hotspot training
    '''
    data_sets = dataset.Datasets(FLAGS.data_dir,
            FLAGS.separate_file,
            FLAGS.input_data_type, FLAGS.output_data_type,
            FLAGS.tile_size, FLAGS.num_maps)
    '''

    with tf.Graph().as_default():
        # placeholder
        input_pl, golden_pl = util.generate_placeholder(
            data_sets.num_in_neuron, data_sets.num_out_neuron,
            FLAGS.batch_size, FLAGS.input_data_type, FLAGS.output_data_type)
        # build graph
        if FLAGS.hidden1 == 0:
            assert (FLAGS.hidden2 == 0)
            outputs = util.layer('output_layer', input_pl,
                                 data_sets.num_in_neuron,
                                 data_sets.num_out_neuron, None)
        else:
            hidden1 = util.layer('hidden1', input_pl, data_sets.num_in_neuron,
                                 FLAGS.hidden1, util.fast_sigmoid)
            if FLAGS.hidden2 == 0:
                outputs = util.layer('output_layer', hidden1, FLAGS.hidden1,
                                     data_sets.num_out_neuron, None)
            else:
                hidden2 = util.layer('hidden2', hidden1, FLAGS.hidden1,
                                     FLAGS.hidden2, util.fast_sigmoid)
                outputs = util.layer('output_layer', hidden2, FLAGS.hidden2,
                                     data_sets.num_out_neuron, None)

        # loss
        #loss = bm.loss(outputs, golden_pl)
        loss = util.loss(outputs, golden_pl, FLAGS.benchmark)

        # train
        #train_op = bm.training(loss, FLAGS.learning_rate)
        train_op = util.training(loss, FLAGS.learning_rate)

        # accumulated error for one batch of data
        error = util.error(outputs, golden_pl, FLAGS.benchmark)

        # summary - not necessary
        summary = tf.merge_all_summaries()

        # init
        init = tf.initialize_all_variables()

        # sess
        sess = tf.Session()

        # summary writer - not necessary
        summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)

        # everything built, run init
        sess.run(init)

        # start training
        #_, max_steps = data_sets.train.max_steps(FLAGS.batch_size)
        for step in xrange(FLAGS.max_steps):
            feed_dict = util.fill_feed_dict(data_sets.train, input_pl,
                                            golden_pl, FLAGS.batch_size)
            sess.run(train_op, feed_dict=feed_dict)

            # print the loss every 100 steps
            # write the summary
            # evaluate the model
            if not step % 100:
                print('step %d: loss = %.2f' %
                      (step, sess.run(loss, feed_dict=feed_dict)))

                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
                '''
                print('training data evaluation')
                util.do_eval(sess, error,
                        input_pl, golden_pl,
                        FLAGS.batch_size, data_sets.train)
                '''
                print('validation data evaluation')
                util.do_eval(sess, error, input_pl, golden_pl,
                             FLAGS.batch_size, data_sets.validate)

        # final accuracy
        print('test data evaluation')
        util.do_eval(sess, error, input_pl, golden_pl, FLAGS.batch_size,
                     data_sets.test)

        # filename for saving
        savefile = str(data_sets.num_in_neuron) + "_" + str(
            FLAGS.hidden1) + "_" + str(FLAGS.hidden2) + "_" + str(
                data_sets.num_out_neuron) + ".txt"

        # save weights and biases
        util.save_config(sess, NUM_LAYERS, FLAGS.config_dir, savefile)

        # save trained output
        #util.save_output(sess, data_sets.train, outputs, FLAGS.data_dir)
        #need to fetch original input data
        output_save = sess.run(outputs,
                               feed_dict={input_pl: data_sets.input_data})
        np.savetxt(FLAGS.data_dir + "train_result/" + savefile,
                   output_save,
                   delimiter=" ")
    def build_graph(self, data_paths, batch_size, graph_mod):
        """Builds generic graph for training or eval."""
        tensors = GraphReferences()
        is_training = graph_mod == GraphMod.TRAIN
        if data_paths:
            tensors.keys, tensors.examples = util.read_examples(
                data_paths,
                batch_size,
                shuffle=is_training,
                num_epochs=None if is_training else 2)
        else:
            tensors.examples = tf.placeholder(tf.string,
                                              name='input',
                                              shape=(None, ))

        if graph_mod == GraphMod.PREDICT:
            audio_input, audio_fingerprints = self.build_fingerprints_graph()
            # Build the fingerprints graph. We later add final trained layers
            # to this graph. This is currently used only for prediction.
            # For training, we use pre-processed data, so it is not needed.
            fingerprints = audio_fingerprints
            tensors.input_audio = audio_input
        else:
            # For training and evaluation we assume data is preprocessed, so the
            # inputs are tf-examples.
            # Generate placeholders for examples.
            with tf.name_scope('inputs'):
                feature_map = {
                    'audio_uri':
                    tf.FixedLenFeature(shape=[],
                                       dtype=tf.string,
                                       default_value=['']),
                    'label':
                    tf.FixedLenFeature(shape=[1],
                                       dtype=tf.int64,
                                       default_value=[self.label_count]),
                    'fingerprint':
                    tf.FixedLenFeature(shape=[self.fingerprint_size],
                                       dtype=tf.float32)
                }
                parsed = tf.parse_example(tensors.examples,
                                          features=feature_map)
                labels = tf.squeeze(parsed['label'])
                uris = tf.squeeze(parsed['audio_uri'])
                fingerprints = parsed['fingerprint']

        all_labels_count = self.label_count

        if self.model_architecture == "lowlatencyconv":
            with tf.name_scope('lowlatencyconv'):
                softmax, logits = self.add_low_latency_conv(
                    fingerprints,
                    all_labels_count,
                    is_training,
                    dropout_keep_prob=self.dropout if is_training else None)

        elif self.model_architecture == "crnn":
            with tf.name_scope('crnn'):
                softmax, logits = self.add_crnn(
                    fingerprints,
                    all_labels_count,
                    is_training,
                    dropout_keep_prob=self.dropout if is_training else None)
        elif self.model_architecture == "rcnn":
            with tf.name_scope('rcnn'):
                softmax, logits = self.add_rcnn(
                    fingerprints,
                    all_labels_count,
                    is_training,
                    dropout_keep_prob=self.dropout if is_training else None)
        else:
            with tf.name_scope('conv'):
                softmax, logits = self.add_conv(
                    fingerprints,
                    all_labels_count,
                    is_training,
                    dropout_keep_prob=self.dropout if is_training else None)

        # Prediction is the index of the label with the highest score. We are
        # interested only in the top score.
        prediction = tf.argmax(softmax, 1)
        tensors.predictions = [prediction, softmax, fingerprints]

        if graph_mod == GraphMod.PREDICT:
            return tensors

        with tf.name_scope('evaluate'):
            loss_value = loss(logits, labels)

        # Add to the Graph the Ops that calculate and apply gradients.
        if is_training:
            tensors.train, tensors.global_step = training(loss_value)
        else:
            tensors.global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)

        # Add means across all batches.
        loss_updates, loss_op = util.loss(loss_value)
        accuracy_updates, accuracy_op = util.accuracy(logits, labels)

        if not is_training:
            tf.summary.scalar('accuracy', accuracy_op)
            tf.summary.scalar('loss', loss_op)

        tensors.metric_updates = loss_updates + accuracy_updates
        tensors.metric_values = [loss_op, accuracy_op]
        return tensors