def embedding_lookup(params, ids, name='embedding_lookup'):
    """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
    with ops.name_scope(name, 'embedding_lookup', [params, ids]):
        params = ops.convert_to_tensor(params)
        ids = ops.convert_to_tensor(ids)
        shape = array_ops_.shape(ids)
        ids_flat = array_ops_.reshape(
            ids, math_ops.reduce_prod(shape, keep_dims=True))
        embeds_flat = nn.embedding_lookup(params, ids_flat, name)
        embed_shape = array_ops_.concat([shape, [-1]], 0)
        embeds = array_ops_.reshape(embeds_flat, embed_shape)
        embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
        return embeds
Пример #2
0
def embedding_lookup(params, ids, name='embedding_lookup'):
  """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
  with ops.name_scope(name, 'embedding_lookup', [params, ids]):
    params = ops.convert_to_tensor(params)
    ids = ops.convert_to_tensor(ids)
    shape = array_ops_.shape(ids)
    ids_flat = array_ops_.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    embeds_flat = nn.embedding_lookup(params, ids_flat, name)
    embed_shape = array_ops_.concat_v2([shape, [-1]], 0)
    embeds = array_ops_.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
    return embeds
Пример #3
0
      def network(x, y1, y2, la, lr):
        del x
        with variable_scope.variable_scope("vs", use_resource=True):
          w = variable_scope.get_variable(
              "w",
              shape=[200, 10],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(2.))
          y = array_ops.reshape(w, [10, 200])
          g1 = nn.embedding_lookup(y, y1)
          g2 = nn.embedding_lookup(y, y2)
          g = array_ops.concat([g1, g2], axis=1)

          ce = losses.absolute_difference(labels=la, predictions=g)
          loss = math_ops.reduce_mean(ce)

        optimizer = gradient_descent.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
        return loss, train
Пример #4
0
    def __init__(self,
                 cfg,
                 word_embd,
                 max_ques_len,
                 input_producer,
                 generated=None):
        batch_size = cfg.batch_size
        vocab_size = len(word_embd)
        with tf.variable_scope('disc'):
            word_embd = tf.get_variable(
                'word_embd',
                shape=word_embd.shape,
                initializer=tf.constant_initializer(word_embd))
            if generated:
                self.ques = generated['ques']
                self.ques_len = generated['ques_len']

                # soft embedding_lookup
                ques = tf.reshape(self.ques, [-1, vocab_size])
                ques = tf.matmul(ques, word_embd)
                ques = tf.reshape(ques, [batch_size, -1, cfg.embed_dim])
            else:
                self.ques = tf.placeholder(tf.int32,
                                           shape=[None, max_ques_len],
                                           name='question')
                self.ques_len = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='question_length')
                ques = embedding_lookup(word_embd, self.ques)
            self.answ = input_producer.answ_disc
            cell = GRUCell(cfg.hidden_size)
            _, state = dynamic_rnn(cell,
                                   ques,
                                   sequence_length=self.ques_len,
                                   dtype=tf.float32)
            output_layer = Dense(vocab_size)
            logits = output_layer(state)
            labels = tf.one_hot(self.answ, vocab_size)
            self.pred = tf.argmax(logits, 1)
            loss = softmax_cross_entropy_with_logits(labels=labels,
                                                     logits=logits)
            self.loss = tf.reduce_mean(loss)
Пример #5
0
    def __init__(self, input_producer, embed_mat, config, is_train):
        x_enc = input_producer.x_enc
        x_dec = input_producer.x_dec
        y_dec = input_producer.y_dec
        len_enc = input_producer.len_enc
        len_dec = input_producer.len_dec
        self.answer = input_producer.answ_disc

        max_len = input_producer.seq_max_length
        vocab_num = input_producer.vocab_num
        config.update(**dict(max_len=max_len, vocab_num=vocab_num))
        # import ipdb; ipdb.set_trace()
        self.kl_weight = tf.Variable(0.0, "KL_weight")
        self.input_ids = y_dec

        modeler = CtrlVAEModelingHelper(config, embed_mat)

        with tf.variable_scope("CtrlVAE"):

            ### VAE ############################################################

            # encoder
            x_enc_onehot = tf.one_hot(x_enc, vocab_num)
            out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot,
                                        len_enc=len_enc)
            (vae_z, vae_mu, vae_logvar) = out_tuple

            # holistic representation
            with tf.device("/cpu:0"):
                vae_c = embedding_lookup(modeler.embed, self.answer)
            vae_c = tf.reshape(vae_c, [config.batch_size, -1])
            vae_represent = tf.concat([vae_z, vae_c], axis=1)

            # decoder
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=vae_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True)

            (vae_outputs, vae_state, vae_outputs_len) = out_tuple  # final
            (self.vae_output, self.vae_sample) = vae_outputs

            ### Generator ######################################################

            # random z and c from the prior
            self.gen_z = tf.random_normal(
                [config.batch_size, config.hidden_size])
            self.gen_c = vae_c
            gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1)

            # generator (decoder)
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=gen_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True,
                                        reuse=True)

            (gen_outputs, gen_state, gen_outputs_len) = out_tuple  # final
            (self.gen_output, self.gen_sample) = gen_outputs
            gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO)

            # discriminator (for c code)
            out_tuple = modeler.discriminator(inputs=gen_outputs_onehot,
                                              inputs_length=gen_outputs_len)
            (self.gen_c_output, self.gen_c_sample) = out_tuple

            # encoder again (for z code ; additional discriminator)
            out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot,
                                        len_enc=gen_outputs_len,
                                        reuse=True)
            (gen_z, dis_mu, dis_logvar) = out_tuple

            ### Discriminator ##################################################

            # discriminator (for training)
            x_dis_onehot = tf.one_hot(x_enc, config.vocab_num)
            out_tuple = modeler.discriminator(inputs=x_dis_onehot,
                                              inputs_length=gen_outputs_len,
                                              reuse=True)
            (self.dis_outputs, self.dis_sample) = out_tuple

        ########################################################################
        # get all the variables in this scope
        self.vars = get_variables("CtrlVAE")
        self.enc_vars = get_variables("CtrlVAE/encoder")
        self.gen_vars = get_variables("CtrlVAE/decoder")
        self.dis_vars = get_variables("CtrlVAE/discriminator")
        self.vae_vars = self.enc_vars + self.gen_vars
        ########################################################################
        # compute AE loss (reconstruction)
        len_out = tf.reduce_max(vae_outputs_len)
        targets = y_dec[:, :len_out]
        weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.vae_output,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=False,
                                     average_across_batch=False)

        # NOTE: fix later!
        loss_sum = tf.reduce_sum(softmax_loss, axis=1)
        self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0)
        #self.ae_loss_mean = tf.reduce_mean(softmax_loss)

        # compute KL loss (regularization)
        KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar)
        self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.kl_loss_mean = tf.reduce_mean(self.kl_loss)

        # VAE total loss
        self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean
        ########################################################################
        # c code loss
        answer_labels = tf.one_hot(self.answer, config.vocab_num)
        c_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                   logits=self.gen_c_output)
        self.c_loss = tf.reduce_mean(c_loss)

        # z code loss
        mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu)
        logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar)
        self.z_loss = (mu_loss + logvar_loss) / 2

        # generator total loss
        self.gen_loss = self.c_loss + self.z_loss
        ########################################################################
        # discriminator training loss
        dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                     logits=self.dis_outputs)
        self.dis_loss = tf.reduce_mean(dis_loss)
        ########################################################################

        # optimization
        lr = config.learning_rate
        self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr")
        self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr")
        self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr")

        vae_optim = tf.train.AdamOptimizer(self.vae_lr)
        gen_optim = tf.train.AdamOptimizer(self.gen_lr)
        dis_optim = tf.train.AdamOptimizer(self.dis_lr)

        vae_grads = tf.gradients(self.vae_loss, self.vae_vars)
        gen_grads = tf.gradients(self.gen_loss, self.gen_vars)
        dis_grads = tf.gradients(self.dis_loss, self.dis_vars)

        vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm)
        gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm)
        dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm)

        self.global_step = get_or_create_global_step()
        self.vae_train = vae_optim.apply_gradients(
            zip(vae_grads, self.vae_vars))
        self.gen_train = gen_optim.apply_gradients(
            zip(gen_grads, self.gen_vars))
        self.dis_train = dis_optim.apply_gradients(
            zip(dis_grads, self.dis_vars), self.global_step)

        # learning_rate update
        self.new_gen_lr = tf.placeholder(tf.float32,
                                         shape=[],
                                         name="new_gen_lr")
        self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr)

        # KL weight update
        self.new_kl_weight = tf.placeholder(tf.float32,
                                            shape=[],
                                            name="new_kl")
        self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight)

        # summaries
        tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean)
        tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean)
        tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean)
        tf.summary.scalar("Misc/kl_weight", self.kl_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu))
        tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar))
        tf.summary.scalar("Misc/gen_lr", self.gen_lr)
        self.summary_op = tf.summary.merge_all()
Пример #6
0
 def test_indexed_slice(self):
     inp = random_ops.random_uniform([3, 2])
     output = nn.embedding_lookup(inp, [0, 2])
     pfor_jacobian = gradients.jacobian(output, inp, use_pfor=True)
     while_jacobian = gradients.jacobian(output, inp, use_pfor=False)
     self.run_and_assert_equal(while_jacobian, pfor_jacobian)
Пример #7
0
 def testEmbeddingLookupBatchSize2(self):
   ids = constant_op.constant([[1, 2, 3], [3, 4, 5]])
   paras = np.array([[10], [20], [80], [40], [50], [60]])
   emb_lookup_tf = nn.embedding_lookup(paras, ids)
   emb_lookup_ipu = kerasIPUEmbeddingLookup(paras, ids, name="emb_test_1")
   self.assertAllClose(emb_lookup_tf, emb_lookup_ipu)
Пример #8
0
    def __init__(self, input_producer, embed_mat, config, is_train):

        with tf.variable_scope("VAE") as var_scope:
            x_enc = input_producer.x_enc
            x_dec = input_producer.x_dec
            y_dec = input_producer.y_dec
            len_enc = input_producer.len_enc
            len_dec = input_producer.len_dec

            max_len = input_producer.seq_max_length
            vocab_num = input_producer.vocab_num
            batch_size = config.batch_size
            hidden_size = config.hidden_size
            embed_dim = config.embed_dim

            is_GRU = config.is_GRU
            is_argmax_sampling = config.is_argmax_sampling
            word_keep_prob = config.word_dropout_keep_prob
            max_grad_norm = config.max_grad_norm
            learning_rate = config.learning_rate

            self.KL_weight = tf.Variable(0.0, "KL_weight")
            self.input_ids = y_dec

            def _lstm_cell():
                return BasicLSTMCell(num_units=hidden_size,
                                     forget_bias=1.0,
                                     state_is_tuple=True,
                                     reuse=tf.get_variable_scope().reuse)
            def _gru_cell():
                return GRUCell(num_units=hidden_size,
                               reuse=tf.get_variable_scope().reuse)

            cell = _gru_cell if is_GRU else _lstm_cell
            self.initial_state = cell().zero_state(batch_size, tf.float32)


            # encoder
            with tf.device("/cpu:0"):
                embed_init = tf.constant_initializer(embed_mat)\
                                if (embed_mat is not None) else None
                embedding = tf.get_variable("embedding", [vocab_num, embed_dim],
                                             initializer=embed_init,
                                             trainable=True)
                in_enc = embedding_lookup(embedding, x_enc)



            with tf.variable_scope("encoder"):
                out_tuple = dynamic_rnn(cell=cell(),
                                        inputs=in_enc,
                                        sequence_length=len_enc,
                                        initial_state=self.initial_state)
                (_, encoder_hidden) = out_tuple

                # linear layers for mu and log(var)
                latent_dim = hidden_size # may have to change this later
                W_mu = tf.get_variable("W_mu", [hidden_size,latent_dim])
                b_mu = tf.get_variable("b_mu", [latent_dim])
                W_logvar = tf.get_variable("W_logvar", [hidden_size,latent_dim])
                b_logvar = tf.get_variable("b_logvar", [latent_dim])
                #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar)

                mu = tf.matmul(encoder_hidden, W_mu) + b_mu
                logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar

                # sample epsilon
                epsilon = tf.random_normal(tf.shape(logvar), name='epsilon')

                # sample latent variable
                stddev = tf.exp(0.5 * logvar) # standard deviation
                self.z = mu + tf.multiply(stddev, epsilon)

            # decoder
            with tf.device("/cpu:0"):
                in_dec = embedding_lookup(embedding, x_dec)

            with tf.variable_scope("decoder"):

                helper = WordDropoutTrainingHelper(
                                      inputs=in_dec,
                                      sequence_length=len_dec,
                                      embedding=embedding,
                                      dropout_keep_prob=word_keep_prob,
                                      drop_token_id=UNK_ID,
                                      is_argmax_sampling=is_argmax_sampling)

                # projection layer
                output_layer = Dense(units=vocab_num,
                                     activation=None,
                                     use_bias=True,
                                     trainable=True)

                # decoder
                decoder = BasicDecoder(cell=cell(),
                                       helper=helper,
                                       initial_state=self.z,
                                       output_layer=output_layer)

                # dynamic_decode
                out_tuple = dynamic_decode(decoder=decoder,
                                           output_time_major=False, #  speed
                                           impute_finished=True)

            # get all the variables in this scope
            self.vars = tf.contrib.framework.get_variables(var_scope)

        # (ouputs, state, sequence_length)
        (self.outputs, _, self.cell_outputs_len) = out_tuple # final

        # (cell_outputs, sample_ids)
        (self.cell_outputs, self.sampled_ids) = self.outputs

        # compute softmax loss (reconstruction)
        len_out = tf.reduce_max(len_dec)
        targets = y_dec[:,:len_out]
        weights = tf.sequence_mask(self.cell_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.cell_outputs,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=True,
                                     average_across_batch=True)

        self.AE_loss = self.AE_loss_mean = softmax_loss

        # compute KL loss (regularization)
        KL_term = 1 + logvar - tf.pow(mu, 2) - tf.exp(logvar)
        self.KL_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.KL_loss_mean = tf.reduce_mean(self.KL_loss)

        # total loss
        self.loss = self.AE_loss + self.KL_weight * self.KL_loss_mean

        # optimization
        self.lr = tf.Variable(learning_rate, trainable=False, name="lr")

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.vars),
                                          max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self.global_step = get_or_create_global_step()
        self.train_op = optimizer.apply_gradients(zip(grads, self.vars),
                                                  global_step=self.global_step)

        # learning_rate update
        self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr")
        self.lr_update = tf.assign(self.lr, self.new_lr)

        # KL weight update
        self.new_KL_weight = tf.placeholder(tf.float32, shape=[], name="new_kl")
        self.KL_weight_update = tf.assign(self.KL_weight, self.new_KL_weight)

        # summaries
        tf.summary.scalar("Loss/AE_mean", self.AE_loss_mean)
        tf.summary.scalar("Loss/KL_mean", self.KL_loss_mean)
        tf.summary.scalar("Loss/Total", self.AE_loss_mean + self.KL_loss_mean)
        tf.summary.scalar("Misc/KL_weight", self.KL_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(mu))
        tf.summary.scalar("Misc/sigma_mean", tf.reduce_mean(stddev))
        tf.summary.scalar("Misc/learning_rate", self.lr)
        self.summary_op = tf.summary.merge_all()
Пример #9
0
 def network(w, y):
   g = nn.embedding_lookup(w, y)
   return g
Пример #10
0
    def __init__(
      self, sequence_length, num_classes, vocab_size,
      embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0, embedding_type='static'):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        # with tf.device('/cpu:0'), tf.name_scope("embedding"):
        with tf.device('/cpu:0'), tf.name_scope("embedding"):

            # use pretrained word2vec embeddings
            if embedding_type in 'static':
                ids = self.input_x
                embeddings = vs.get_variable('w2v' + "_embeddings",
                                             [num_classes, embedding_size])
                name = "embedding_lookup"
                ids = ops.convert_to_tensor(ids)

                # used to load w2v as follows
                # w2v = word2vec_basic.load_word_2_vec()

                # load from nm.save()
                w2v = np.load("w2v.model.en.npy")
                print("Loaded w2v....")

                params = tf.Variable(w2v)

                # shape
                shape = array_ops_.shape(ids)

                # concatenates all the ids from all the sentences
                ids_flat = array_ops_.reshape(ids, math_ops.reduce_prod(shape, keep_dims=True))
                #
                embeds_flat = nn.embedding_lookup(params, ids_flat, name)
                embed_shape = array_ops_.concat(0, [shape, [-1]])
                embeds = array_ops_.reshape(embeds_flat, embed_shape)
                embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))

                self.embedded_chars = embeds
                self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)


            else:
                W = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                    name="W")

                self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
                self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")