示例#1
0
def get_composite_function():
    x = constant([1.0, 0.9])
    w1 = constant([1.0, 0.8])
    w2 = constant(0.9)
    w3 = constant(0.75)
    w4 = constant(0.92)
    w5 = constant(0.70)
    y_pred = constant(1.0)

    s1 = tf.reduce_sum(x * w1)
    a1 = sigmoid(s1)
    s2 = a1 * w2
    a2 = sigmoid(s2)
    s3 = a2 * w3
    a3 = sigmoid(s3)
    s4 = a3 * w4
    a4 = sigmoid(s4)
    s5 = a4 * w5
    a5 = sigmoid(s5)

    out = {
        'x': x,
        'y': y_pred,
        'w1': w1,
        'a1': a1,
        'w2': w2,
        'a2': a2,
        'w3': w3,
        'a3': a3,
        'w4': w4,
        'a4': a4,
        'w5': w5,
        'a5': a5
    }
    return out
示例#2
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     l2_penalty=1e-8,
                     is_training=True,
                     input_size=1024 + 128,
                     **unused_params):
        """Creates a Multi Layered Perceptron model.
    """
        # General transform layer
        fc1 = slim.fully_connected(
            model_input,
            input_size,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope='fc1')
        bn1 = slim.batch_norm(fc1, is_training=is_training, scope='bn1')
        relu1 = nn.relu(bn1, name='relu1')

        # Coarse classification
        coarse_scores = slim.fully_connected(
            relu1,
            25,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope='coarse')

        # Concatenate p(coarse) and prior features
        concat = tf.concat([relu1, coarse_scores], -1, name='concat')

        # Specific transform layer
        fc2 = slim.fully_connected(
            concat,
            input_size + 25,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope='fc2')
        bn2 = slim.batch_norm(fc2, is_training=is_training, scope='bn2')
        relu2 = nn.relu(bn2, name='relu2')

        # Final classifier
        classifier = slim.fully_connected(
            relu2,
            vocab_size,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope='classifier')

        final_probs = nn.sigmoid(classifier, name='final_probs')
        coarse_probs = nn.sigmoid(coarse_scores, name='coarse_probs')

        return {"predictions": final_probs, "coarse_predictions": coarse_probs}
示例#3
0
 def discriminator_2(self, x, name):
     print('making D2-network')
     sess = tf.Session()
     with tf.variable_scope('D2'):
         #Cv0
         conv0 = conv_layer(x, [self.KERNEL_SIZE, self.KERNEL_SIZE, 7, 64],
                            1, str(name + 'd_wc0'), True, False)
         print('Cv1')
         print(sess.run(tf.shape(conv0)))
         #Cv1 d_wc1
         conv1 = conv_layer(conv0,
                            [self.KERNEL_SIZE, self.KERNEL_SIZE, 64, 128],
                            1, str(name + 'd_wc1'))
         print('Cv1')
         print(sess.run(tf.shape(conv1)))
         #Cv2
         conv2 = conv_layer(conv1,
                            [self.KERNEL_SIZE, self.KERNEL_SIZE, 128, 256],
                            1, str(name + 'd_wc2'))
         print('Cv2')
         print(sess.run(tf.shape(conv2)))
         #Cv3
         conv3 = conv_layer(conv2,
                            [self.KERNEL_SIZE, self.KERNEL_SIZE, 256, 512],
                            1, str(name + 'd_wc3'))
         print('Cv3')
         print(sess.run(tf.shape(conv3)))
         #Cv4
         conv4 = conv_layer(conv3,
                            [self.KERNEL_SIZE, self.KERNEL_SIZE, 512, 1], 1,
                            str(name + 'd_wc4'), False, False)
         print('Cv4')
         print(sess.run(tf.shape(conv4)))
         conv4 = nn.sigmoid(conv4)
         return conv4
    def train_layers(self, train_x, train_y, test_x, test_y):
        X = tf.placeholder(tf.float32,
                           shape=[
                               None, self.opt['bands'], self.opt['frames'],
                               self.opt['num_channels']
                           ])
        Y = tf.placeholder(tf.float32, shape=[None, self.opt['n_classes']])

        conv_layer = self.apply_convolution(train_x, self.opt['k_size'],
                                            self.opt['num_channels'],
                                            self.opt['depth'])

        shape = conv_layer.get_shape().as_list()
        conv_flat = tf.reshape(conv_layer, [-1, shape[1] * shape[2], shape[3]])

        f_weights = self.weight_variable(
            shape[1] * shape[2] * self.opt['depth'], self.opt['num_hidden'])
        f_biases = self.bias_variable([self.opt['num_hidden']])
        f = nn.sigmoid(tf.add(tf.matmul(conv_flat, f_weights), f_biases))

        out_weights = self.weight_variable(
            [self.opt['num_hidden'], self.opt['n_classes']])
        out_biases = bias_variable([self.opt['n_classes']])
        out = nn.softmax(tf.matmul(f, out_weights) + out_biases)

        cost = tf.reduce_mean(
            -tf.reduce_sum(Y * tf.log(out), reduction_indices=[1]))
        #cross_entropy = -tf.reduce_sum(Y * tf.log(out))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.opt['learning_rate']).minimize(cost)
        correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        cost_history = np.empty(shape=[1], dtype=float)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for i in range(training_epochs):
                offset = (i * self.opt['batch_size']) % (train_y.shape[0] -
                                                         batch_size)
                batch_x = train_x[offset:(offset + batch_size), :, :, :]
                batch_y = train_y[offset:(offset + batch_size), :]

                _, loss = sess.run([optimizer, cost],
                                   feed_dict={
                                       X: batch_x,
                                       Y: batch_y
                                   })
                cost_history = np.append(cost_history, loss)

            print(
                'Test accuracy: ',
                round(sess.run(accuracy, feed_dict={
                    X: test_x,
                    Y: test_y
                }), 3))
            fig = plt.figure(figsize=(15, 10))
            plt.plot(cost_history)
            plt.axis([0, training_epochs, o, np.max(cost_history)])
            plt.show()
示例#5
0
def discriminator(x, alpha=0.01, reuse=tf.AUTO_REUSE):
    with tf.variable_scope('dis', reuse=reuse):
        with tf.variable_scope("conv1"):
            conv1 = default_conv2d(x, 16)
            conv1 = nn.relu(conv1)

        with tf.variable_scope("conv2"):
            conv2 = default_conv2d(conv1, 32)
            conv2 = nn.relu(conv2)

        with tf.variable_scope("conv3"):
            conv3 = default_conv2d(conv2, 64)
            conv3 = nn.relu(conv3)

        with tf.variable_scope("conv4"):
            conv4 = default_conv2d(conv3, 128)
            conv4 = nn.relu(conv4)

        with tf.variable_scope("linear"):
            linear = clayers.flatten(conv4)
            linear = clayers.fully_connected(linear, 1)

        with tf.variable_scope("out"):
            out = nn.sigmoid(linear)
    return out
示例#6
0
def discriminator(x):
    with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE):
        with tf.variable_scope("conv1"):
            conv1 = default_conv2d(x, 128)
            conv1 = nn.leaky_relu(conv1, alpha=0.2)

        with tf.variable_scope("conv2"):
            conv2 = default_conv2d(conv1, 256)
            conv2 = layers.batch_normalization(conv2)
            conv2 = nn.leaky_relu(conv2, alpha=0.2)

        with tf.variable_scope("conv3"):
            conv3 = default_conv2d(conv2, 512)
            conv3 = layers.batch_normalization(conv3)
            conv3 = nn.leaky_relu(conv3, alpha=0.2)

        with tf.variable_scope("conv4"):
            conv4 = default_conv2d(conv3, 1024)
            conv4 = layers.batch_normalization(conv3)
            conv4 = nn.leaky_relu(conv3, alpha=0.2)

        with tf.variable_scope("linear"):
            linear = clayers.flatten(conv4)
            linear = clayers.fully_connected(linear, 1)

        with tf.variable_scope("out"):
            out = nn.sigmoid(linear)
    return out
示例#7
0
def ACT(inputs, act_fn):
    if act_fn == 'relu':
        act = relu(inputs)
    elif act_fn == 'lrelu':
        act = leaky_relu(inputs)
    elif act_fn == 'sigmoid':
        act = sigmoid(inputs)
    return act
示例#8
0
 def select_best_rlns(self, n_adj, g):
     #  map h onto adj_mat using h_to_a() function
     # make NMF on this adj_mat
     # infer relationships from the kernel (kernel output by random walker algorithm)
     # need to be created once at the init of master network.
     # self.sig =
     prob_mat = nn.sigmoid(n_adj)
     return prob_mat
示例#9
0
def make_unet_estimator(features, labels, mode, params):
    """
    Creation of tf.estimator.Estimator for U-net

    :param features: tf.Tensor: images
    :param labels: tf.Tensor: masks
    :param mode: tf.estimator.ModeKeys
    :param params: dict: params of the model
    :return: tf.estimator.EstimatorSpec
    """
    orig_images = features
    true_masks = labels
    model = params["model"]
    net = model(orig_images, **params["model_params"])
    predictions = {
        "predicted_soft_masks": sigmoid(net),
        "predicted_masks": tf.round(sigmoid(net))
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions)

    loss = get_loss(net, true_masks, lam=params["IOU_weight"])

    if params["create_summary"]:
        data = {
            "scalar": [("Loss", loss), ("IOU", IOU(net, true_masks))],
            "image": [('Original image', orig_images),
                      ("Original_masks", true_masks), ("Predicted_masks", sigmoid(net))]
        }
        create_summary(data)

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(params["learning_rate"], global_step,
                                                   params["lr_decay_steps"], params["lr_decay_rate"], staircase=True)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        optim = tf.train.AdamOptimizer(learning_rate=learning_rate)
        with tf.control_dependencies(update_ops):
            train_op = optim.minimize(loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

    eval_metric_ops = {"IOU": tf.metrics.mean(IOU(net, labels))}

    return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)
示例#10
0
def ACT(inputs, act_fn):
    if act_fn == 'relu':
        act = relu(inputs)
    elif act_fn == 'lrelu':
        act = leaky_relu(inputs)
    elif act_fn == 'sigmoid':
        act = sigmoid(inputs)
    elif act_fn == 'tanh':
        act = tanh(inputs)
    else:
        act = inputs
    return act
示例#11
0
def myconcat(mytensor, top_k, center):

    mytensor = tf.expand_dims(mytensor, axis=2)
    mytensor = tf.tile(input=mytensor, multiples=[1, 1, 48])
    temp = tf.multiply(mytensor, top_k)
    # center = tf.reduce_sum(center, axis=1)
    outs = tf.concat([center, temp], axis=1)
    # outs = outs.sum(axis=1)
    # outs = tf.reduce_max(outs, 1)
    outs = tf.reduce_sum(outs, 1)
    outs = nn.sigmoid(outs)
    return outs
示例#12
0
def discriminator(x):
    """Start addition"""
    with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE):
        #relu - convolutional layers
        with tf.variable_scope("conv1"):
            conv1_1 = conv_layer(x, "conv1_1")
            conv1_2 = conv_layer(conv1_1, "conv1_2")
            pool1 = max_pool(conv1_2, "pool1")

        with tf.variable_scope("conv2"):
            conv2_1 = conv_layer(pool1, "conv2_1")
            conv2_2 = conv_layer(conv2_1, "conv2_2")
            pool2 = max_pool(conv2_2, "pool2")

        with tf.variable_scope("conv3"):
            conv3_1 = conv_layer(pool2, "conv3_1")
            conv3_2 = conv_layer(conv3_1, "conv3_2")
            conv3_3 = conv_layer(conv3_2, "conv3_3")
            conv3_4 = conv_layer(conv3_3, "conv3_4")
            pool3 = max_pool(conv3_4, "pool3")

        with tf.variable_scope("conv4"):
            conv4_1 = conv_layer(pool3, "conv4_1")
            conv4_2 = conv_layer(conv4_1, "conv4_2")
            conv4_3 = conv_layer(conv4_2, "conv4_3")
            conv4_4 = conv_layer(conv4_3, "conv4_4")
            pool4 = max_pool(conv4_4, "pool4")

        with tf.variable_scope("conv5"):
            conv5_1 = conv_layer(pool4, "conv5_1")
            conv5_2 = conv_layer(conv5_1, "conv5_2")
            conv5_3 = conv_layer(conv5_2, "conv5_3")
            conv5_4 = conv_layer(conv5_3, "conv5_4")

        #fully connected layer to determine if the image is fake or real

        with tf.variable_scope("linear"):
            linear = layers.flatten(conv5_4)
            linear = layers.dense(
                linear,
                2,
                use_bias=False,
                kernel_initializer=tf.initializers.random_normal(0.0, 0.1))

        with tf.variable_scope("out"):
            out = nn.sigmoid(linear)

        return out
示例#13
0
def graph_conv(adj_m,
               outs,
               num_out,
               adj_keep_r,
               keep_r,
               is_train,
               scope,
               k=5,
               coord_tensor=None,
               act_fn=tf.nn.relu6,
               count_adj=None,
               **kw):
    num_in = outs.shape[-1].value
    adj_m = dropout(adj_m, adj_keep_r, is_train, scope + '/drop1')
    center, top_k = top_k_features_no_concat(adj_m, outs, k, scope + '/top_k')
    outs = top_k_features(adj_m, outs, k, scope + '/top_kkk')
    if top_k.shape[2] == 48:
        mytensor = mylayer(center, top_k, coord_tensor, 0.4, is_train)
        outs = myconcat(mytensor, top_k, center)
    else:
        outs = tf.reduce_sum(outs, axis=1)
        outs = nn.sigmoid(outs)

        # outs = nn.dropout(outs, keep_prob=0.5)
        # outs = tf.layers.dense(outs, 32)
        # outs = batch_norm(outs, is_train=is_train, scope='sdfdfss'+'/norm2', act_fn= tf.nn.relu)
        # outs = nn.elu(outs)
        # outs =
        # return outs
        '''
        outs = dropout(outs, keep_r, is_train, scope+'/drop1')
        outs = conv1d(outs, 20, (k+1)//2+1, scope+'/conv1', None, True)
        outs = act_fn(outs, scope+'act1') if act_fn else outs
        outs = dropout(outs, keep_r, is_train, scope+'/drop2')
        outs = conv1d(outs, 32, k//2+1, scope+'/conv2', None)
        outs = tf.squeeze(outs, axis=[1], name=scope+'/squeeze')
        outs = act_fn(outs, scope+'act1') if act_fn else outs
        outs = batch_norm(outs, True, scope+'/norm2', act_fn)
        print(outs, 'xianyan')
        '''

    return outs
示例#14
0
 def decodeForward(self, z):
     x = nn.relu(self.decodeL(z))
     x = x.view(x.shape[0], self.channels[-1], self.finalSize[0], self.finalSize[1])
     x = self.decode(x)
     x = nn.sigmoid(x)
     return x
 def _gate(self, x):
     tanh_preactivation, sigmoid_preactivation = tf.split(x, 2, axis=-1)
     return nn.tanh(tanh_preactivation) * nn.sigmoid(sigmoid_preactivation)
    def train_layers(self, train_x, train_y, test_x, test_y):
        params = {}

        X = tf.placeholder(tf.float32, [None, self.opt['n_dim']])
        Y = tf.placeholder(tf.float32, [None, self.opt['n_classes']])
        keep_prob = tf.placeholder(tf.float32)  #for dropout

        params['W1'] = tf.Variable(
            tf.random_normal([self.opt['n_dim'], self.opt['num_hidden1']],
                             mean=0,
                             stddev=self.opt['std']))
        params['b1'] = tf.Variable(
            tf.random_normal([self.opt['num_hidden1']],
                             mean=0,
                             stddev=self.opt['std']))
        params['a1'] = nn.sigmoid(tf.matmul(X, params['W1']) + params['b1'])
        params['dropout1'] = nn.dropout(params['a1'], keep_prob)

        params['W2'] = tf.Variable(
            tf.random_normal(
                [self.opt['num_hidden1'], self.opt['num_hidden2']],
                mean=0,
                stddev=self.opt['std']))
        params['b2'] = tf.Variable(
            tf.random_normal([self.opt['num_hidden2']],
                             mean=0,
                             stddev=self.opt['std']))
        params['a2'] = nn.relu(
            tf.matmul(params['dropout1'], params['W2']) + params['b2'])
        params['dropout2'] = nn.dropout(params['a2'], keep_prob)

        params['W3'] = tf.Variable(
            tf.random_normal(
                [self.opt['num_hidden2'], self.opt['num_hidden3']],
                mean=0,
                stddev=self.opt['std']))
        params['b3'] = tf.Variable(
            tf.random_normal([self.opt['num_hidden3']],
                             mean=0,
                             stddev=self.opt['std']))
        params['a3'] = nn.tanh(
            tf.matmul(params['dropout2'], params['W3']) + params['b3'])
        params['dropout3'] = nn.dropout(params['a3'], keep_prob)

        params['outW'] = tf.Variable(
            tf.random_normal([self.opt['num_hidden3'], self.opt['n_classes']],
                             mean=0,
                             stddev=self.opt['std']))
        params['outb'] = tf.Variable(
            tf.random_normal([self.opt['n_classes']],
                             mean=0,
                             stddev=self.opt['std']))

        out = nn.softmax(
            tf.matmul(params['dropout3'], params['outW']) + params['outb'])

        cost = tf.reduce_mean(
            -tf.reduce_sum(Y * tf.log(out), reduction_indices=[1]))
        optimizer = tf.train.AdamOptimizer(
            self.opt['learning_rate']).minimize(cost)

        correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        cost_history = np.empty(shape=[1], dtype=float)
        y, y_pred = None, None

        #reshape labels into a one hot vector
        f = FeatureParser()
        train_y = f.one_hot_encode(train_y)
        test_y = f.one_hot_encode(test_y)

        print('TRAIN_ONE_HOT_LABEL{}'.format(train_y))

        print('Training...')
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for epoch in range(training_epochs):
                _, loss, acc = sess.run([optimizer, cost, accuracy],
                                        feed_dict={
                                            X: train_x,
                                            Y: train_y,
                                            keep_prob: 0.5
                                        })
                cost_history = np.append(cost_history, loss)
                if epoch % 50 == 0:
                    print('Epoch#', epoch, 'Cost:', loss, 'Train acc.:', acc)

            y_pred = sess.run(tf.argmax(out, 1),
                              feed_dict={
                                  X: test_x,
                                  keep_prob: 1.0
                              })
            y = sess.run(tf.argmax(test_y, 1))

            print(
                "Test accuracy: ",
                round(
                    sess.run(accuracy,
                             feed_dict={
                                 X: test_x,
                                 Y: test_y,
                                 keep_prob: 1.0
                             }), 3))

        fig = plt.figure(figsize=(10, 8))
        plt.plot(cost_history)
        plt.xlabel('Iterations')
        plt.ylabel('Cost')
        plt.axis([0, training_epochs, 0, np.max(cost_history)])
        plt.show()

        precision, recall, f_score, s = precision_recall_fscore_support(
            y, y_pred, average='micro')
        print('F score:', round(f_score, 3))
示例#17
0
# tf.Tensor(
# [[0. 0. 1.]
#  [0. 0. 1.]], shape=(2, 3), dtype=float32)
# tf.Tensor(
# [[0. 0. 1.]
#  [0. 0. 1.]], shape=(2, 3), dtype=float32)
print(_leaky_relu(x))
print(nn.leaky_relu(x))
# tf.Tensor(
# [[-0.2  0.   1. ]
#  [ 0.   0.   1. ]], shape=(2, 3), dtype=float32)
# tf.Tensor(
# [[-0.2  0.   1. ]
#  [ 0.   0.   1. ]], shape=(2, 3), dtype=float32)
print(_sigmoid(x))
print(nn.sigmoid(x))
# tf.Tensor(
# [[0.26894143 0.5        0.73105854]
#  [0.5        0.5        0.73105854]], shape=(2, 3), dtype=float32)
# tf.Tensor(
# [[0.26894143 0.5        0.73105854]
#  [0.5        0.5        0.7310586 ]], shape=(2, 3), dtype=float32)
print(_tanh(x))
print(nn.tanh(x))
# tf.Tensor(
# [[-0.7615942  0.         0.7615941]
#  [ 0.         0.         0.7615941]], shape=(2, 3), dtype=float32)
# tf.Tensor(
# [[-0.7615942  0.         0.7615942]
#  [ 0.         0.         0.7615942]], shape=(2, 3), dtype=float32)
print(_softmax(x))
示例#18
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     coarse_num_mixtures=None,
                     label_num_mixtures=None,
                     rank_of_basis=None,
                     l2_penalty=1e-8,
                     phase=True,
                     **unused_params):
        coarse_num_mixtures = coarse_num_mixtures or FLAGS.coarse_num_mixtures
        label_num_mixtures = label_num_mixtures or FLAGS.label_num_mixtures
        rank_of_basis = rank_of_basis or FLAGS.rank_of_basis

        ### Coarse Level
        coarse_gate_activations = slim.fully_connected(
            model_input,
            25 * (coarse_num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="coarse_gates")
        coarse_expert_activations = slim.fully_connected(
            model_input,
            25 * coarse_num_mixtures,
            # activation_fn=nn.relu,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="coarse_experts")

        coarse_gating_distribution = tf.nn.softmax(
            tf.reshape(coarse_gate_activations,
                       [-1, coarse_num_mixtures + 1
                        ]))  # (Batch * #Labels) x (num_mixtures + 1)
        # coarse_expert_distribution = tf.nn.sigmoid(tf.reshape(
        coarse_expert_distribution = tf.reshape(
            coarse_expert_activations,
            # [-1, coarse_num_mixtures]))  # (Batch * #Labels) x num_mixtures
            [-1, 25 * coarse_num_mixtures])  # Batch x (#Labels * num_mixtures)

        coarse_normed_indie = tf.reshape(
            slim.batch_norm(coarse_expert_distribution,
                            center=True,
                            scale=True,
                            activation_fn=nn.relu,
                            is_training=phase,
                            scope="coarse_bn"), [-1, coarse_num_mixtures])

        coarse_probabilities_by_class_and_batch = tf.reduce_sum(
            # coarse_gating_distribution[:, :coarse_num_mixtures] * coarse_expert_distribution, 1)
            coarse_gating_distribution[:, :coarse_num_mixtures] *
            coarse_normed_indie,
            1)
        coarse_indie_probabilities = tf.reshape(
            coarse_probabilities_by_class_and_batch, [-1, 25])

        coarse_scores = slim.fully_connected(
            coarse_indie_probabilities,
            25,
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="actual_coarse")
        coarse_probabilities = nn.sigmoid(coarse_scores)

        concat = tf.concat([model_input, nn.relu(coarse_scores)],
                           -1,
                           name='middle_concat')

        ### Label Level
        label_gate_activations = slim.fully_connected(
            concat,
            vocab_size * (label_num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="label_gates")
        label_expert_activations = slim.fully_connected(
            concat,
            vocab_size * label_num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="label_experts")

        label_gating_distribution = tf.nn.softmax(
            tf.reshape(label_gate_activations,
                       [-1, label_num_mixtures + 1
                        ]))  # (Batch * #Labels) x (num_mixtures + 1)
        # label_expert_distribution = tf.nn.sigmoid(tf.reshape(
        label_expert_distribution = tf.reshape(
            label_expert_activations,
            # [-1, label_num_mixtures]))  # (Batch * #Labels) x num_mixtures
            [-1, vocab_size * label_num_mixtures
             ])  # Batch x (#Labels * num_mixtures)

        label_normed_indie = tf.reshape(
            slim.batch_norm(label_expert_distribution,
                            center=True,
                            scale=True,
                            activation_fn=nn.relu,
                            is_training=phase,
                            scope="label_bn"), [-1, label_num_mixtures])

        label_probabilities_by_class_and_batch = tf.reduce_sum(
            # label_gating_distribution[:, :label_num_mixtures] * label_expert_distribution, 1)
            label_gating_distribution[:, :label_num_mixtures] *
            label_normed_indie,
            1)
        # label_normed_indie[:, :label_num_mixtures] * label_expert_distribution, 1)
        label_indie_probabilities = tf.reshape(
            label_probabilities_by_class_and_batch, [-1, vocab_size])

        projection_to_basis = slim.fully_connected(
            label_indie_probabilities,
            rank_of_basis,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="reduce_to_basis")
        reduce_normed_indie = slim.batch_norm(projection_to_basis,
                                              center=True,
                                              scale=True,
                                              activation_fn=nn.relu,
                                              is_training=phase,
                                              scope="reduce_bn")

        reduce_propogation = slim.fully_connected(
            # projection_to_basis,
            reduce_normed_indie,
            rank_of_basis,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="propogation")
        transformed_normed_indie = slim.batch_norm(reduce_propogation,
                                                   center=True,
                                                   scale=True,
                                                   activation_fn=nn.relu,
                                                   is_training=phase,
                                                   scope="transform_bn")

        label_scores = slim.fully_connected(
            # reduce_propogation,
            transformed_normed_indie,
            vocab_size,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="label_prob")
        label_probabilities = nn.sigmoid(label_scores)

        return {
            "predictions": label_probabilities,
            "coarse_predictions": coarse_probabilities
        }