print("--------------")

# Layer 1 : with a single neuron
Wi = np.array(
    np.diag(1 / Xstd), dtype='float32'
)  # tf.constant([1/3/ts_std, 0,0, 0,1/3/dxl_std, 0, 0, 0, 1/3/lmois_std], shape=[3,3], dtype=tf.float32)
bi = np.matmul(
    Xmeans, Wi
)  #tf.constant([ts_mean, dxl_mean, lmois_mean], shape=[1,3], dtype=tf.float32)

print(Wi)
print(bi)

#### TENSORFLOW GRAPH BUILDING STARTS HERE ###

xin = tf.matmul(tf.cast(my_data[:, X_ids].astype(float), tf.float32), Wi) - bi
yin = tf.one_hot(my_data[:, Y_id].astype(float),
                 depth=n_classes,
                 dtype=tf.int32)

print("Scaled inputs:")
print(tf.Session().run(xin[0:5, :]))
print(tf.Session().run(yin[0:5, :]))
print("--------------")

xeval = tf.matmul(tf.cast(eval_data[:, X_ids].astype(float), tf.float32),
                  Wi) - bi
yeval = tf.one_hot(eval_data[:, Y_id].astype(float),
                   depth=n_classes,
                   dtype=tf.int32)
Пример #2
0
 def __call__(self, tensor):
     y = tf.matmul(tensor, self._weights) + self._bias
     return tf.nn.relu(y) if self._activate_relu else y
Пример #3
0
def conv_capsule_mat(input_tensor,
                     input_activation,
                     input_dim,
                     output_dim,
                     layer_name,
                     num_routing=3,
                     num_in_atoms=3,
                     num_out_atoms=3,
                     stride=2,
                     kernel_size=5,
                     min_var=0.0005,
                     final_beta=1.0):
    """Convolutional Capsule layer with Pose Matrices."""
    print('caps conv stride: {}'.format(stride))
    in_atom_sq = num_in_atoms * num_in_atoms
    with tf.variable_scope(layer_name):
        input_shape = tf.shape(input_tensor)
        _, _, _, in_height, in_width = input_tensor.get_shape()
        # This Variable will hold the state of the weights for the layer
        kernel = utils.weight_variable(shape=[
            input_dim, kernel_size, kernel_size, num_in_atoms,
            output_dim * num_out_atoms
        ],
                                       stddev=0.3)
        # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3])
        activation_biases = utils.bias_variable(
            [1, 1, output_dim, 1, 1, 1, 1, 1],
            init_value=0.5,
            name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1],
                                           init_value=.5,
                                           name='sigma_biases')
        with tf.name_scope('conv'):
            print('convi;')
            # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2]
            print(input_tensor.get_shape())
            input_tensor_reshaped = tf.reshape(input_tensor, [
                input_shape[0] * input_dim * in_atom_sq, input_shape[3],
                input_shape[4], 1
            ])
            input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                             input_tensor.get_shape()[4], 1))
            input_act_reshaped = tf.reshape(input_activation, [
                input_shape[0] * input_dim, input_shape[3], input_shape[4], 1
            ])
            input_act_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                          input_tensor.get_shape()[4], 1))
            print(input_tensor_reshaped.get_shape())
            # conv: [x*128,out*out_at, c3,c4]
            conv_patches = tf.extract_image_patches(
                images=input_tensor_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            act_patches = tf.extract_image_patches(
                images=input_act_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            o_height = (in_height - kernel_size) // stride + 1
            o_width = (in_width - kernel_size) // stride + 1
            patches = tf.reshape(conv_patches,
                                 (input_shape[0], input_dim, in_atom_sq,
                                  o_height, o_width, kernel_size, kernel_size))
            patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width,
                               kernel_size, kernel_size))
            patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2])
            patch_split = tf.reshape(
                patch_trans,
                (input_dim, kernel_size, kernel_size, input_shape[0] *
                 o_height * o_width * num_in_atoms, num_in_atoms))
            patch_split.set_shape(
                (input_dim, kernel_size, kernel_size, None, num_in_atoms))
            a_patches = tf.reshape(act_patches,
                                   (input_shape[0], input_dim, 1, 1, o_height,
                                    o_width, kernel_size, kernel_size))
            a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width,
                                 kernel_size, kernel_size))
            with tf.name_scope('input_act'):
                utils.activation_summary(
                    tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches,
                                                              axis=1),
                                                axis=-1),
                                  axis=-1))
            with tf.name_scope('Wx'):
                wx = tf.matmul(patch_split, kernel)
                wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size,
                                     input_shape[0], o_height, o_width,
                                     num_in_atoms * num_out_atoms, output_dim))
                wx.set_shape(
                    (input_dim, kernel_size, kernel_size, None, o_height,
                     o_width, num_in_atoms * num_out_atoms, output_dim))
                wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2])
                utils.activation_summary(wx)

        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10, c3, c4]
            logit_shape = [
                input_dim, output_dim, 1, o_height, o_width, kernel_size,
                kernel_size
            ]
            activation, center = update_conv_routing(
                wx=wx,
                input_activation=a_patches,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms * num_out_atoms,
                input_dim=input_dim,
                num_routing=num_routing,
                output_dim=output_dim,
                min_var=min_var,
                final_beta=final_beta,
            )
            # activations: [x, 10, 8, c3, c4]

        out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7])
        out_center = tf.squeeze(center, axis=[1, 6, 7])
        with tf.name_scope('center'):
            utils.activation_summary(out_center)
        return tf.sigmoid(out_activation), out_center
Пример #4
0
batch_size = 128
num_steps = 1800
learning_rate = 0.01
start = time.time()

# input
x = tf.placeholder(tf.float32, [None, 784], "x")
y_ = tf.placeholder(tf.float32, [None, 10], "y")

# weight
W = tf.Variable(tf.zeros([784, 10]))
# bias
b = tf.Variable(tf.zeros([10]))
# test_data * W + b
y = tf.matmul(x, W) + b
sm = tf.nn.softmax(y, name="softmax")

# cross entropy (loss function)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_), name="loss")

# train step
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

# evaluating the model
correct_prediction = tf.equal(tf.argmax(sm, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")

saver = tf.train.Saver()
init = tf.global_variables_initializer()
Пример #5
0
    def testRunSimpleNetworkoWithInfAndNaNWorks(self):
        with tf.Session() as sess:
            x_init_val = np.array([[2.0], [-1.0]])
            y_init_val = np.array([[0.0], [-0.25]])
            z_init_val = np.array([[0.0, 3.0], [-1.0, 0.0]])

            x_init = tf.constant(x_init_val, shape=[2, 1], name="x_init")
            x = tf.Variable(x_init, name="x")
            y_init = tf.constant(y_init_val, shape=[2, 1])
            y = tf.Variable(y_init, name="y")
            z_init = tf.constant(z_init_val, shape=[2, 2])
            z = tf.Variable(z_init, name="z")

            u = tf.div(x, y, name="u")  # Produces an Inf.
            v = tf.matmul(z, u, name="v")  # Produces NaN and Inf.

            sess.run(x.initializer)
            sess.run(y.initializer)
            sess.run(z.initializer)

            run_options = tf.RunOptions(output_partition_graphs=True)
            tf_debug.watch_graph(run_options,
                                 sess.graph,
                                 debug_ops=["DebugNumericSummary"],
                                 debug_urls=[self._debug_url])

            result = sess.run(v, options=run_options)
            self.assertTrue(np.isnan(result[0, 0]))
            self.assertEqual(-np.inf, result[1, 0])

        # Debugger data is stored within a special directory within logdir.
        event_files = glob.glob(
            os.path.join(self._logdir, constants.DEBUGGER_DATA_DIRECTORY_NAME,
                         "events.debugger*"))
        self.assertEqual(1, len(event_files))

        self._check_health_pills_in_events_file(
            event_files[0], {
                "x:0:DebugNumericSummary": [x_init_val],
                "y:0:DebugNumericSummary": [y_init_val],
                "z:0:DebugNumericSummary": [z_init_val],
                "u:0:DebugNumericSummary": [x_init_val / y_init_val],
                "v:0:DebugNumericSummary":
                [np.matmul(z_init_val, x_init_val / y_init_val)],
            })

        report = self._debug_data_server.numerics_alert_report()
        self.assertEqual(2, len(report))
        self.assertTrue(report[0].device_name.lower().endswith("cpu:0"))
        self.assertEqual("u:0", report[0].tensor_name)
        self.assertGreater(report[0].first_timestamp, 0)
        self.assertEqual(0, report[0].nan_event_count)
        self.assertEqual(0, report[0].neg_inf_event_count)
        self.assertEqual(1, report[0].pos_inf_event_count)
        self.assertTrue(report[1].device_name.lower().endswith("cpu:0"))
        self.assertEqual("u:0", report[0].tensor_name)
        self.assertGreaterEqual(report[1].first_timestamp,
                                report[0].first_timestamp)
        self.assertEqual(1, report[1].nan_event_count)
        self.assertEqual(1, report[1].neg_inf_event_count)
        self.assertEqual(0, report[1].pos_inf_event_count)
Пример #6
0
layer_1_nodes = 50
layer_2_nodes = 165
layer_3_nodes = 50

# Defining the model.
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))

with tf.variable_scope('layer_1'):
    weights = tf.get_variable('weights1',
                              shape=[number_of_inputs, layer_1_nodes],
                              initializer=tf.initializers.glorot_normal())
    biases = tf.get_variable('biases1',
                             shape=[layer_1_nodes],
                             initializer=tf.zeros_initializer())
    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)

with tf.variable_scope('layer_2'):
    weights = tf.get_variable('weights2',
                              shape=[layer_1_nodes, layer_2_nodes],
                              initializer=tf.initializers.glorot_normal())
    biases = tf.get_variable(name='biases2',
                             shape=[layer_2_nodes],
                             initializer=tf.zeros_initializer())
    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)

with tf.variable_scope('layer_3'):
    weights = tf.get_variable('weights3',
                              shape=[layer_2_nodes, layer_3_nodes],
                              initializer=tf.initializers.glorot_normal())
    biases = tf.get_variable(name='biases3',
Пример #7
0
def embedding_postprocessor(input_tensor,
                            use_token_type=False,
                            token_type_ids=None,
                            token_type_vocab_size=16,
                            token_type_embedding_name="token_type_embeddings",
                            use_position_embeddings=True,
                            position_embedding_name="position_embeddings",
                            initializer_range=0.02,
                            max_position_embeddings=512,
                            dropout_prob=0.1):
    """Performs various post-processing on a word embedding tensor.

    Args:
      input_tensor: float Tensor of shape [batch_size, seq_length,
        embedding_size].
      use_token_type: bool. Whether to add embeddings for `token_type_ids`.
      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
        Must be specified if `use_token_type` is True.
      token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
      token_type_embedding_name: string. The name of the embedding table variable
        for token type ids.
      use_position_embeddings: bool. Whether to add position embeddings for the
        position of each token in the sequence.
      position_embedding_name: string. The name of the embedding table variable
        for positional embeddings.
      initializer_range: float. Range of the weight initialization.
      max_position_embeddings: int. Maximum sequence length that might ever be
        used with this model. This can be longer than the sequence length of
        input_tensor, but cannot be shorter.
      dropout_prob: float. Dropout probability applied to the final output tensor.

    Returns:
      float tensor with same shape as `input_tensor`.

    Raises:
      ValueError: One of the tensor shapes or input values is invalid.
    """
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    width = input_shape[2]

    output = input_tensor

    if use_token_type:
        if token_type_ids is None:
            raise ValueError("`token_type_ids` must be specified if"
                             "`use_token_type` is True.")
        token_type_table = tf.get_variable(
            name=token_type_embedding_name,
            shape=[token_type_vocab_size, width],
            initializer=create_initializer(initializer_range))
        # This vocab will be small so we always do one-hot here, since it is always
        # faster for a small vocabulary.
        flat_token_type_ids = tf.reshape(token_type_ids, [-1])
        one_hot_ids = tf.one_hot(flat_token_type_ids,
                                 depth=token_type_vocab_size)
        token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
        token_type_embeddings = tf.reshape(token_type_embeddings,
                                           [batch_size, seq_length, width])
        output += token_type_embeddings

    if use_position_embeddings:
        assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
        with tf.control_dependencies([assert_op]):
            full_position_embeddings = tf.get_variable(
                name=position_embedding_name,
                shape=[max_position_embeddings, width],
                initializer=create_initializer(initializer_range))
            # Since the position embedding table is a learned variable, we create it
            # using a (long) sequence length `max_position_embeddings`. The actual
            # sequence length might be shorter than this, for faster training of
            # tasks that do not have long sequences.
            #
            # So `full_position_embeddings` is effectively an embedding table
            # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
            # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
            # perform a slice.
            position_embeddings = tf.slice(full_position_embeddings, [0, 0],
                                           [seq_length, -1])
            num_dims = len(output.shape.as_list())

            # Only the last two dimensions are relevant (`seq_length` and `width`), so
            # we broadcast among the first dimensions, which is typically just
            # the batch size.
            position_broadcast_shape = []
            for _ in range(num_dims - 2):
                position_broadcast_shape.append(1)
            position_broadcast_shape.extend([seq_length, width])
            position_embeddings = tf.reshape(position_embeddings,
                                             position_broadcast_shape)
            output += position_embeddings

    output = layer_norm_and_dropout(output, dropout_prob)
    return output
Пример #8
0
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

W1 = tf.get_variable("W1",
                     shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2",
                     shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3",
                     shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)

W4 = tf.get_variable("W4",
                     shape=[512, 512],
Пример #9
0
def outer(x, y):
    return tf.matmul(tf.expand_dims(x, 1), tf.transpose(tf.expand_dims(y, 1)))
Пример #10
0
    def __init__(self,
                 linear_size,
                 num_layers,
                 residual,
                 batch_norm,
                 max_norm,
                 batch_size,
                 learning_rate,
                 summaries_dir,
                 predict_14=False,
                 dtype=tf.float32):
        """Creates the linear + relu model
    Args
      linear_size: integer. number of units in each layer of the model
      num_layers: integer. number of bilinear blocks in the model
      residual: boolean. Whether to add residual connections
      batch_norm: boolean. Whether to use batch normalization
      max_norm: boolean. Whether to clip weights to a norm of 1
      batch_size: integer. The size of the batches used during training
      learning_rate: float. Learning rate to start with
      summaries_dir: String. Directory where to log progress
      predict_14: boolean. Whether to predict 14 instead of 17 joints
      dtype: the data type to use to store internal variables
    """

        # There are in total 17 joints in H3.6M and 16 in MPII (and therefore in stacked
        # hourglass detections). We settled with 16 joints in 2d just to make models
        # compatible (e.g. you can train on ground truth 2d and test on SH detections).
        # This does not seem to have an effect on prediction performance.
        self.HUMAN_2D_SIZE = 16 * 2

        # In 3d all the predictions are zero-centered around the root (hip) joint, so
        # we actually predict only 16 joints. The error is still computed over 17 joints,
        # because if one uses, e.g. Procrustes alignment, there is still error in the
        # hip to account for!
        # There is also an option to predict only 14 joints, which makes our results
        # directly comparable to those in https://arxiv.org/pdf/1611.09010.pdf
        self.HUMAN_3D_SIZE = 14 * 3 if predict_14 else 16 * 3

        self.input_size = self.HUMAN_2D_SIZE
        self.output_size = self.HUMAN_3D_SIZE

        self.isTraining = tf.placeholder(tf.bool, name="isTrainingflag")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # Summary writers for train and test runs
        self.train_writer = tf.summary.FileWriter(
            os.path.join(summaries_dir, 'train'))
        self.test_writer = tf.summary.FileWriter(
            os.path.join(summaries_dir, 'test'))

        self.linear_size = linear_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=dtype,
                                         name="learning_rate")
        self.global_step = tf.Variable(0, trainable=False, name="global_step")
        decay_steps = 100000  # empirical
        decay_rate = 0.96  # empirical
        self.learning_rate = tf.train.exponential_decay(
            self.learning_rate, self.global_step, decay_steps, decay_rate)

        # === Transform the inputs ===
        with vs.variable_scope("inputs"):

            # in=2d poses, out=3d poses
            enc_in = tf.placeholder(dtype,
                                    shape=[None, self.input_size],
                                    name="enc_in")
            dec_out = tf.placeholder(dtype,
                                     shape=[None, self.output_size],
                                     name="dec_out")

            self.encoder_inputs = enc_in
            self.decoder_outputs = dec_out

        # === Create the linear + relu combos ===
        with vs.variable_scope("linear_model"):

            # === First layer, brings dimensionality up to linear_size ===
            w1 = tf.get_variable(name="w1",
                                 initializer=kaiming,
                                 shape=[self.HUMAN_2D_SIZE, linear_size],
                                 dtype=dtype)
            b1 = tf.get_variable(name="b1",
                                 initializer=kaiming,
                                 shape=[linear_size],
                                 dtype=dtype)
            w1 = tf.clip_by_norm(w1, 1) if max_norm else w1
            y3 = tf.matmul(enc_in, w1) + b1

            if batch_norm:
                y3 = tf.layers.batch_normalization(y3,
                                                   training=self.isTraining,
                                                   name="batch_normalization")
            y3 = tf.nn.relu(y3)
            y3 = tf.nn.dropout(y3, self.dropout_keep_prob)

            # === Create multiple bi-linear layers ===
            for idx in range(num_layers):
                y3 = self.two_linear(y3, linear_size, residual,
                                     self.dropout_keep_prob, max_norm,
                                     batch_norm, dtype, idx)

            # === Last linear layer has HUMAN_3D_SIZE in output ===
            w4 = tf.get_variable(name="w4",
                                 initializer=kaiming,
                                 shape=[linear_size, self.HUMAN_3D_SIZE],
                                 dtype=dtype)
            b4 = tf.get_variable(name="b4",
                                 initializer=kaiming,
                                 shape=[self.HUMAN_3D_SIZE],
                                 dtype=dtype)
            w4 = tf.clip_by_norm(w4, 1) if max_norm else w4
            y = tf.matmul(y3, w4) + b4
            # === End linear model ===

        # Store the outputs here
        self.outputs = y
        self.loss = tf.reduce_mean(tf.square(y - dec_out))
        self.loss_summary = tf.summary.scalar('loss/loss', self.loss)

        # To keep track of the loss in mm
        self.err_mm = tf.placeholder(tf.float32, name="error_mm")
        self.err_mm_summary = tf.summary.scalar("loss/error_mm", self.err_mm)

        # Gradients and update operation for training the model.
        opt = tf.train.AdamOptimizer(self.learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        with tf.control_dependencies(update_ops):

            # Update all the trainable parameters
            gradients = opt.compute_gradients(self.loss)
            self.gradients = [[] if i == None else i for i in gradients]
            self.updates = opt.apply_gradients(gradients,
                                               global_step=self.global_step)

        # Keep track of the learning rate
        self.learning_rate_summary = tf.summary.scalar(
            'learning_rate/learning_rate', self.learning_rate)

        # To save the model
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
Пример #11
0
def main():
    # Reset Graph
    tf.reset_default_graph()
    # Load Data
    DATA_FILE = "cifar_10_tf_train_test.pkl"
    train_x,train_y, test_x, test_y = loadData(DATA_FILE)
    test_y_np = np.array(test_y)

    print("Train X size:\t", train_x.shape)
    print("Train Y size:\t", len(train_y))
    print("Test X size:\t", test_x.shape)
    print("Test Y size:\t", len(test_y))

    """
    imgplot = plt.imshow(data_list[0][0])
    plt.colorbar()
    plt.show()
    """

    # Hyper Parameters
    batch_size = 100
    num_epochs = 3000
    learning_rate = .005
    # Convolution Layer1
    filter_size1 = 5
    num_filters1 = 32
    # Convolution Layer2
    filter_size2 = 5
    num_filters2 = 32
    # Convolution Layer3
    filter_size3 = 3
    num_filters3 = 64

    # Dimensions of Data
    img_size = 32
    img_depth = 3           # number of channels in the image (red,blue,green)
    img_size_flat = 32*32*img_depth
    img_shape = (img_size,img_size,img_depth)
    num_classes = 10

    # Initializers
    xavier_init = tf.initializers.glorot_normal()
    #xavier_init = tf.contrib.layers.xavier_initializer()
    zero_init = tf.zeros_initializer()

    # Input Variables
    input_img = tf.placeholder(dtype=tf.uint8, shape=[None, img_size, img_size, img_depth], name="input_img")
    y = tf.placeholder(dtype=tf.int64, shape=[None], name="labels")

    # Normalization
    x = tf.image.convert_image_dtype(input_img,dtype="float32")
    x = tf.math.divide(x,255)
    mean = tf.math.reduce_mean(x,0)
    x = tf.math.subtract(x,mean)

    y_true = tf.one_hot(y, 10,dtype="float32")

    # Filters,Weights, and Biases
    F1_shape = [filter_size1,filter_size1,img_depth,num_filters1]
    F1 = tf.get_variable(shape=F1_shape, dtype='float32', initializer=xavier_init, name="filter1")
    F1_bias = tf.get_variable(shape=[num_filters1],dtype='float32', initializer=zero_init, name="filter_bias1")

    F2_shape = [filter_size2,filter_size2,num_filters1,num_filters2]
    F2 = tf.get_variable(shape=F2_shape, dtype='float32', initializer=xavier_init, name="filter2")
    F2_bias = tf.get_variable(shape=[num_filters2],dtype='float32', initializer=zero_init, name="filter_bias2")

    F3_shape = [filter_size3,filter_size3,num_filters2,num_filters3]
    F3 = tf.get_variable(shape=F3_shape, dtype='float32', initializer=xavier_init, name="filter3")
    F3_bias = tf.get_variable(shape=[num_filters3],dtype='float32', initializer=zero_init, name="filter_bias3")

    weights_fc = tf.get_variable(shape=[576,num_classes] , dtype="float32", initializer=xavier_init, name="weightsfc")
    bias_fc = tf.get_variable(shape=[10] , dtype="float32", initializer=zero_init, name="biasfc")

    # Forward Propagation
    conv_layer1 = tf.nn.leaky_relu(tf.nn.conv2d(x, filters=F1, strides=[1,1,1,1],padding="VALID") + F1_bias)
    pool1 = tf.nn.pool(conv_layer1, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID")

    conv_layer2 = tf.nn.leaky_relu(tf.nn.conv2d(pool1, filters=F2, strides=[1,1,1,1],padding="VALID") + F2_bias)
    pool2 = tf.nn.pool(conv_layer2, window_shape=[2,2],pooling_type="MAX", strides=[2,2], padding="VALID")

    conv_layer3 = tf.nn.leaky_relu(tf.nn.conv2d(pool2, filters=F3, strides=[1,1,1,1],padding="VALID") + F3_bias)

    # Vectorize Final Convolution
    conv_vector = tf.layers.flatten(conv_layer3)

    print(conv_layer1.get_shape())
    print(conv_layer2.get_shape())
    print(conv_layer3.get_shape())
    print(conv_vector.get_shape())

    # Fully Connected Layer
    logits = tf.matmul(conv_vector, weights_fc)+bias_fc
    softmax_op = tf.nn.softmax(logits)
    predict_lbl = tf.argmax(softmax_op, axis=1, name='predict_lbl')

    # Cost Function
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                        logits=logits, name=None)
    correct_prediction = tf.equal(predict_lbl, y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    cost = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # Create the collection.
    tf.get_collection("validation_nodes")
    # Add stuff to the collection.
    tf.add_to_collection("validation_nodes", input_img)
    tf.add_to_collection("validation_nodes", predict_lbl)
    # start training
    saver = tf.train.Saver()

    # Plot Variables
    cost_list = []
    train_accuracy_list = []
    test_accuracy_list = []
    test_accuracy_cls = {}

    start_time = time.time()
    # Initialize the Graph
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        print("\n\n\n")
        sess.run(init)

        index = 0
        trained_set = set()
        for e in range(num_epochs):
            time.sleep(.1)

            indlimit = train_x.shape[0]-batch_size
            index = random.randint(0,indlimit)

            for i in range(index,index+batch_size):trained_set.add(int(i))

            x_batch = train_x[index: index+batch_size]
            y_batch = train_y[index: index+batch_size]

            permutation = np.random.permutation(len(y_batch))
            x_batch = x_batch[permutation,:]
            y_batch = np.asarray(y_batch)[permutation]

            sess.run(optimizer, feed_dict={input_img:x_batch, y:y_batch})
            # Store Values for plots
            cost_list.append(sess.run(cost, feed_dict={input_img:x_batch, y:y_batch}))
            train_accuracy_list.append(sess.run(accuracy, feed_dict={input_img:x_batch, y:y_batch}))





            if(e%100==0):
                print("Iteration:\t", e)
                print("Index Start:\t",index)

                print("Len Trained Set:", len(trained_set))
                predict_test = sess.run(predict_lbl, feed_dict={input_img:test_x})
                test_accuracy = np.sum(predict_test==test_y_np)/5000

                test_accuracy_list.append(test_accuracy)



                print("Test Accuracy:", test_accuracy)
                print()



        # this saver.save() should be within the same tf.Session() after the training is
        conv1_filters = sess.run(F1, feed_dict={input_img:x_batch})
        conv1_filter_images = ((conv1_filters + 0.1) * (1/0.3) * 255).astype('uint8')
        save_path = saver.save(sess, "my_model")


        for pred in range(len(predict_test)):
            if test_y_np[pred] not in test_accuracy_cls:
                test_accuracy_cls[test_y_np[pred]]={}
                test_accuracy_cls[test_y_np[pred]]["correct"] = 0
                test_accuracy_cls[test_y_np[pred]]["total"] = 0

            test_accuracy_cls[test_y_np[pred]]["total"] += 1
            if(test_y_np[pred]==predict_test[pred]):
                test_accuracy_cls[test_y_np[pred]]["total"] += 1

    print(test_accuracy_cls)

    end_time = time.time()
    print("Time Ellapsed:", end_time-start_time)

    plt.plot(range(0,len(train_accuracy_list)), train_accuracy_list)
    plt.title('Total Training Accuracy')
    plt.xlabel('Iterations')
    plt.ylabel('%Accuracy')
    plt.show()

    plt.plot(range(0,len(cost_list)), cost_list)
    plt.title('Total Error/Cost')
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.show()

    plt.plot(range(0,len(test_accuracy_list)), test_accuracy_list)
    plt.title('Total Test Accuracy')
    plt.xlabel('Iterations')
    plt.ylabel('%Accuracy')
    plt.show()
    # TODO: plot filters
    print(conv1_filter_images.shape)
    conv1_filter_images = conv1_filter_images.T
    for i in range(32):
        plt.subplot(4,8,i+1)
        plt.title("Filter "+str(i+1),fontsize=6)
        plt.axis("off")
        plt.imshow(conv1_filter_images[i].T)
    plt.show()
Пример #12
0
    def two_linear(self, xin, linear_size, residual, dropout_keep_prob,
                   max_norm, batch_norm, dtype, idx):
        """
    Make a bi-linear block with optional residual connection
    Args
      xin: the batch that enters the block
      linear_size: integer. The size of the linear units
      residual: boolean. Whether to add a residual connection
      dropout_keep_prob: float [0,1]. Probability of dropping something out
      max_norm: boolean. Whether to clip weights to 1-norm
      batch_norm: boolean. Whether to do batch normalization
      dtype: type of the weigths. Usually tf.float32
      idx: integer. Number of layer (for naming/scoping)
    Returns
      y: the batch after it leaves the block
    """

        with vs.variable_scope("two_linear_" + str(idx)) as scope:

            input_size = int(xin.get_shape()[1])

            # Linear 1
            w2 = tf.get_variable(name="w2_" + str(idx),
                                 initializer=kaiming,
                                 shape=[input_size, linear_size],
                                 dtype=dtype)
            b2 = tf.get_variable(name="b2_" + str(idx),
                                 initializer=kaiming,
                                 shape=[linear_size],
                                 dtype=dtype)
            w2 = tf.clip_by_norm(w2, 1) if max_norm else w2
            y = tf.matmul(xin, w2) + b2
            if batch_norm:
                y = tf.layers.batch_normalization(y,
                                                  training=self.isTraining,
                                                  name="batch_normalization1" +
                                                  str(idx))

            y = tf.nn.relu(y)
            y = tf.nn.dropout(y, dropout_keep_prob)

            # Linear 2
            w3 = tf.get_variable(name="w3_" + str(idx),
                                 initializer=kaiming,
                                 shape=[linear_size, linear_size],
                                 dtype=dtype)
            b3 = tf.get_variable(name="b3_" + str(idx),
                                 initializer=kaiming,
                                 shape=[linear_size],
                                 dtype=dtype)
            w3 = tf.clip_by_norm(w3, 1) if max_norm else w3
            y = tf.matmul(y, w3) + b3

            if batch_norm:
                y = tf.layers.batch_normalization(y,
                                                  training=self.isTraining,
                                                  name="batch_normalization2" +
                                                  str(idx))

            y = tf.nn.relu(y)
            y = tf.nn.dropout(y, dropout_keep_prob)

            # Residual every 2 blocks
            y = (xin + y) if residual else y

        return y
Пример #13
0
#########################################################
"""Matrix Addition"""

graph2 = tf.Graph()
with graph2.as_default():
    m1 = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    m2 = tf.constant([[0, 0, 1], [0, 0, 0], [1, 0, 0]])
    m_sum = tf.add(m1, m2)
    m_sum2 = m1 + m2

with tf.Session(graph=graph2) as sess:
    result = sess.run(m_sum)
    print(result)
    result = sess.run(m_sum2)
    print(result)

###################################################
"""matrix multiplication"""

graph3 = tf.Graph()
with graph3.as_default():
    m1 = tf.constant([[2, 2], [3, 3]])
    m2 = tf.constant([[1, 0], [0, 1]])
    m_mul = tf.matmul(m1, m2)

with tf.Session(graph=graph3) as sess:
    result = sess.run(m_mul)
    print(result)

#####################################################
Пример #14
0
b_conv4 = bias_variable([64])

h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, 1) + b_conv4)

#fifth convolutional layer
W_conv5 = weight_variable([3, 3, 64, 64])
b_conv5 = bias_variable([64])

h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, 1) + b_conv5)

#FCL 1
W_fc1 = weight_variable([1152, 1164])
b_fc1 = bias_variable([1164])

h_conv5_flat = tf.reshape(h_conv5, [-1, 1152])
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

#FCL 2
W_fc2 = weight_variable([1164, 100])
b_fc2 = bias_variable([100])

h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

#FCL 3
W_fc3 = weight_variable([100, 50])
b_fc3 = bias_variable([50])
def fit(net,
        img_shape,
        img_name, 
        image_mode,
        type_measurements,
        num_measurements,
        y_feed,
        A_feed,
        mask_info1,
        ini_channel = 32,
        mask_feed = None, 
        lr_decay_epoch=0,
        lr_decay_rate=0.65,
        LR=0.01,
        OPTIMIZER='adam',
        num_iter=5000,
        find_best=False,
        verbose=False,
        random_vector = None, 
        selection_mask = None,
        save = False,
        random_array = None):
    
    with tf.Graph().as_default():
        # Global step
        global_step = tf.train.get_or_create_global_step()
        
        # Set up palceholders
        n_input = img_shape[1]*img_shape[2]*img_shape[3]
        width = int(img_shape[1])
        height = int(img_shape[2])
        if mask_feed is None:
            if type_measurements == 'random': #compressed sensing with random matirx 
                A  = tf.placeholder(tf.float32, shape=(n_input, num_measurements), name='A') #e.g.[img_wid*img_high*3, 200]
                y = tf.placeholder(tf.float32, shape=(1, num_measurements), name='y') #e.g.[1, 200]
                #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] 
            elif type_measurements == 'identity': #denosing 
                if image_mode != '3D':
                    A = tf.placeholder(tf.float32, shape=(n_input, n_input), name='A') #e.g.[img_wid*img_high*3, img_wid*img_high*3] ########!!!!!!#####!!!!!!!
                y = tf.placeholder(tf.float32, shape=(1, n_input), name='y') #e.g.[1, img_wid*img_high*3]
                #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] 
            elif type_measurements == 'circulant': #compressed sensing with circulant matirx 
                y = tf.placeholder(tf.float32, shape=(1, n_input), name='y')#e.g.[1, img_wid*img_high*3]
                #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] 
        else: #inpainting
            y = tf.placeholder(tf.float32, shape=(1, img_shape[1], img_shape[2], img_shape[3]), name='y')#e.g.[1, img_wid, img_high, 3]
            #rand = tf.placeholder(tf.float32, shape=(1, width, height, ini_channel), name='random_noise') #e.g.[1,img_wid,img_high,32] 
        
        # Define input uniform noise
        #rand = np.random.uniform(0, 1.0/30.0, size=(1, width, height, ini_channel)).astype(np.float32)
        out = tf.constant(np.random.uniform(size=(1, width, height, ini_channel)).astype(np.float32) * 1. / 10) #+ rand  #[1,4096,1,32] 
        out = tf.Variable(out, name='input_noise', trainable=False)
        
        # Deep image prior 
        feed_forward = tf.make_template("DeepImagePrior", net) #feed_forward takes a 4D Tensor (batch, width, height, channels) as input and outputs a 4D Tensor (batch, width*2^6, height*2^6, channels=3)
        x = feed_forward(out) #e.g. net_output with shape [1, img_wid, img_high, 3]               
        
        # Inverse problem settings
        def circulant_tf(signal_vector, random_vector_m, selection_mask_m):  
            signal_vector = tf.cast(signal_vector, dtype=tf.complex64, name='circulant_real2complex')
            t = tf.convert_to_tensor(random_vector_m, dtype=tf.complex64)
            #step 1: F^{-1} @ x
            r1 = tf.signal.ifft(signal_vector, name='circulant_step1_ifft')               
            #step 2: Diag() @ F^{-1} @ x
            Ft = tf.signal.fft(t)
            r2 = tf.multiply(r1, Ft, name='circulant_step2_diag')                
            #step 3: F @ Diag() @ F^{-1} @ x
            compressive = tf.signal.fft(r2, name='circulant_step3_fft')
            float_compressive = tf.cast(compressive, tf.float32, name='circulant_complex2real')               
            #step 4: R_{omega} @ C_{t}
            select_compressive = tf.multiply(float_compressive, selection_mask_m, name='circulant_step4_A')            
            return select_compressive
        
        if mask_feed is None: # Compressed sensing & Denoising      
            if type_measurements == 'circulant': # Compressed sensing with Circulant matrix 
                flip = tf.convert_to_tensor(random_array, dtype=tf.float32) # flip
                x_circulant =  tf.reshape(x, [1,-1]) * flip 
                y_hat = circulant_tf(x_circulant, random_vector, selection_mask) 
            else: # Compressed sensing with Random matrix & Denoising 
                if image_mode != '3D':
                    y_hat = tf.matmul(tf.reshape(x, [1,-1]), A) ########!!!!!!#####!!!!!!!
                else:
                    y_hat = tf.reshape(x, [1,-1])
        else:
            # Inpainting 
            y_hat = x * mask_feed
            
    
        # Define loss  
        mse = tf.losses.mean_squared_error
        loss = mse(y, y_hat)            

        # Define learning rate 
        if lr_decay_epoch > 0:
            LR = tf.train.exponential_decay(LR, global_step, lr_decay_epoch, lr_decay_rate, staircase=True)

        # Define optimizer 
        if OPTIMIZER == 'adam':
            #print("optimize with adam", LR)
            optimizer = tf.train.AdamOptimizer(LR)
        elif OPTIMIZER == 'LBFGS':
            raise NotImplementedError('LBFGS Optimizer')
        
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)    

        # Set up gpu
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.85 
        config.log_device_placement= True
        
        with tf.Session() as sess:
            # Init            
            mse = [0.] * num_iter
            sess.run(tf.global_variables_initializer())    
                        
            # Initial deep decoder output
            if find_best:
                if not os.path.exists('log'):
                    os.makedirs('log/')
                if not os.path.exists('result'):
                    os.makedirs('result/')
                saver = tf.train.Saver(max_to_keep=1)
                #saver.save(sess, os.path.join('log/', 'model.ckpt'), global_step=0)
                best_mse = 1000000.0
                best_img = sess.run(x)
                #save_img(best_img, 'result/', img_name, '0', image_mode, decoder_type, filter_size, upsample_mode) 
            
            # Feed dict
            if mask_feed is None:
                if type_measurements == 'circulant':#compressed sensing
                    feed_dict = {y: y_feed}
                elif type_measurements == 'identity':
                    if image_mode != '3D':
                        feed_dict = {A: A_feed, y: y_feed}  ########!!!!!!#####!!!!!!!
                    else:
                        feed_dict = {y: y_feed}
            else:#inpainting
                feed_dict = {y: y_feed}
                            
            # Optimize
            num_params = get_num_params()
            sess.graph.finalize()
            #print('\x1b[37mFinal graph size: %.2f MB\x1b[0m' % (tf.get_default_graph().as_graph_def().ByteSize() / 10e6))

            for i in range(num_iter):
                loss_, _ = sess.run([loss, train_op], feed_dict=feed_dict)
                #psnr = 10 * np.log10(1 * 1 / loss_) #PSNR
                mse[i] = loss_
       
                # Display
                #if i > 0 and i % 100 == 0:
                #    print ('\r[Iteration %05d] loss=%.9f' % (i, loss_), end='')
                
                # Best net
                if find_best and best_mse > 1.005 * loss_:
                    best_mse = loss_
                    #best_psnr = 10 * np.log10(1 * 1 / best_mse)
                    best_img = sess.run(x)
                    #saver.save(sess, os.path.join('log/', 'model.ckpt'), global_step=i + 1)
                         
            # Return final image or best found so far if `find_best`
            if find_best:
                out_img = best_img
                #mask_info = mask_info1[8:-4]
                # if save:
                #     save_img(out_img, 'result/', img_name, '{}'.format(i + 1), image_mode, decoder_type, filter_size, upsample_mode, num_channels_real, num_layers, input_size, mask_info, act_function)
                #print('Best MSE (wrt noisy) {}: {}: {}: {}: {}: {}: {}: {}: {}'.format(num_channels_real, num_layers, img_name, mask_info, decoder_type, filter_size, upsample_mode, upsample_factor, best_mse))
            else:
                out_img = sess.run(x)
                #mask_info = mask_info1[8:-4]
                # if save:
                #     save_img(out_img, 'result/', img_name, '{}'.format(i + 1), image_mode, decoder_type, filter_size, upsample_mode, num_channels_real, num_layers, input_size, mask_info, act_function)
                #print('FINAL MSE (wrt noisy) {}: {}: {}: {}: {}: {}: {}: {}: {}'.format(num_channels_real, num_layers, img_name, mask_info, decoder_type, filter_size, upsample_mode, upsample_factor, mse[-1]))
            if verbose:
                return mse, out_img, num_params
            else:
                return mse, out_img
with tf1.Session() as sess:
    print(y.eval())
    print(z.eval())

### Linear Regression with Tensorflow
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf1.constant(housing_data_plus_bias, dtype=tf1.float32, name="X")
y = tf1.constant(housing.target.reshape(-1, 1), dtype=tf1.float32, name="y")
XT = tf1.transpose(X)
theta = tf1.matmul(tf1.matmul(tf1.matrix_inverse(tf1.matmul(XT, X)), XT), y)

with tf1.Session() as sess:
    theta_value = theta.eval()

print(theta_value)

### Gradient Descent
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
housing_scaled = scaler.fit_transform(housing.data)
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing_scaled]
housing_data_plus_bias[0]

n_epochs = 1000
learning_rate = .01
Пример #17
0
import numpy as np

x_data = np.array([[0, 0], [1, 0], [1, 1], [0, 0], [0, 0], [0, 1]])
y_data = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0],
                   [0, 0, 1]])

X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

W1 = tf.Variable(tf.random.uniform([2, 10], -1., 1.))
W2 = tf.Variable(tf.random.uniform([10, 3], -1., 1.))

b1 = tf.Variable(tf.zeros([10]))
b2 = tf.Variable(tf.zeros([3]))

L1 = tf.add(tf.matmul(X, W1), b1)
L1 = tf.nn.relu(L1)

model = tf.add(tf.matmul(L1, W2), b2)

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=model))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(cost)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

for step in range(100):
    sess.run(train_op, feed_dict={X: x_data, Y: y_data})
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
import tensorflow.compat.v1 as tf

learning_rate = 0.01
training_iteration = 30
batch_size = 100
display_step = 2

x = tf.placeholder("float", [None, 784])  #Input Vector
y = tf.placeholder("float", [None, 10])  #Output Vector

W = tf.Variable(tf.zeros([784, 10]))  #Weight Tensor
b = tf.Variable(tf.zeros([10]))  #Bias Tensor

with tf.name_scope("Wx_b") as scope:
    model = tf.nn.softmax(tf.matmul(x, W) + b)

w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("biases", b)

with tf.name_scope("cost_function") as scope:
    cost_function = -tf.reduce_sum(y * tf.log(model))
    tf.summary.scalar("cost_function", cost_function)

with tf.name_scope("train") as scope:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        cost_function)

init = tf.initialize_all_variables()

merged_summary_op = tf.summary.merge_all()
Пример #19
0
import numpy as np
from sklearn.datasets import fetch_california_housing

reset_graph()

housing = fetch_california_housing()
m, n = housing.data.shape
print(m, n)
print(housing.target.reshape(-1, 1))
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
print("XT", XT)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
print("theta", theta)

with tf.Session() as sess:
    theta_value = theta.eval()
    print("线性回归 theta_value \r\n", theta_value)

# 梯度下降
reset_graph()

n_epochs = 1000
learning_rate = 0.01

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
                         strides=[1, 1, h1, 1],
                         padding='SAME')
#output=545/4
#1 LAYER*************************************************************************************

#Rectifier LAYER*****************************************************************************
#calculated coefficient for the flattening from the size of the 3 convolutional layer
coef = int(h_pool1.get_shape()[1] * h_pool1.get_shape()[2] *
           h_pool1.get_shape()[3])
h_pool2_flat = tf.reshape(h_pool1, [-1, coef])
#declare the weights considering the constants and 256 output
W_fc1 = weight_variable([coef, w4])
b_fc1 = bias_variable([w4])

#rectifier (matmul)
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#Rectifier LAYER*****************************************************************************

#Rectifier-Dropout LAYER**********************************************************************
#dropout
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#declare weights with the ouput layer in this case 2 (labelSize)
W_fc2 = weight_variable([w4, labelSize])
b_fc2 = bias_variable([labelSize])
#output
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Rectifier-Dropout LAYER**********************************************************************

#Loss Function********************************************************************************
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[0]))
cross_entropy = tf.reduce_mean(
Пример #21
0
def attention_layer(from_tensor,
                    to_tensor,
                    attention_mask=None,
                    num_attention_heads=1,
                    size_per_head=512,
                    query_act=None,
                    key_act=None,
                    value_act=None,
                    attention_probs_dropout_prob=0.0,
                    initializer_range=0.02,
                    do_return_2d_tensor=False,
                    batch_size=None,
                    from_seq_length=None,
                    to_seq_length=None):
    """Performs multi-headed attention from `from_tensor` to `to_tensor`.

    This is an implementation of multi-headed attention based on "Attention
    is all you Need". If `from_tensor` and `to_tensor` are the same, then
    this is self-attention. Each timestep in `from_tensor` attends to the
    corresponding sequence in `to_tensor`, and returns a fixed-with vector.

    This function first projects `from_tensor` into a "query" tensor and
    `to_tensor` into "key" and "value" tensors. These are (effectively) a list
    of tensors of length `num_attention_heads`, where each tensor is of shape
    [batch_size, seq_length, size_per_head].

    Then, the query and key tensors are dot-producted and scaled. These are
    softmaxed to obtain attention probabilities. The value tensors are then
    interpolated by these probabilities, then concatenated back to a single
    tensor and returned.

    In practice, the multi-headed attention are done with transposes and
    reshapes rather than actual separate tensors.

    Args:
      from_tensor: float Tensor of shape [batch_size, from_seq_length,
        from_width].
      to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
      attention_mask: (optional) int32 Tensor of shape [batch_size,
        from_seq_length, to_seq_length]. The values should be 1 or 0. The
        attention scores will effectively be set to -infinity for any positions in
        the mask that are 0, and will be unchanged for positions that are 1.
      num_attention_heads: int. Number of attention heads.
      size_per_head: int. Size of each attention head.
      query_act: (optional) Activation function for the query transform.
      key_act: (optional) Activation function for the key transform.
      value_act: (optional) Activation function for the value transform.
      attention_probs_dropout_prob: (optional) float. Dropout probability of the
        attention probabilities.
      initializer_range: float. Range of the weight initializer.
      do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
        * from_seq_length, num_attention_heads * size_per_head]. If False, the
        output will be of shape [batch_size, from_seq_length, num_attention_heads
        * size_per_head].
      batch_size: (Optional) int. If the input is 2D, this might be the batch size
        of the 3D version of the `from_tensor` and `to_tensor`.
      from_seq_length: (Optional) If the input is 2D, this might be the seq length
        of the 3D version of the `from_tensor`.
      to_seq_length: (Optional) If the input is 2D, this might be the seq length
        of the 3D version of the `to_tensor`.

    Returns:
      float Tensor of shape [batch_size, from_seq_length,
        num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
        true, this will be of shape [batch_size * from_seq_length,
        num_attention_heads * size_per_head]).

    Raises:
      ValueError: Any of the arguments or tensor shapes are invalid.
    """
    def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
                             seq_length, width):
        output_tensor = tf.reshape(
            input_tensor, [batch_size, seq_length, num_attention_heads, width])

        output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
        return output_tensor

    from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
    to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])

    if len(from_shape) != len(to_shape):
        raise ValueError(
            "The rank of `from_tensor` must match the rank of `to_tensor`.")

    if len(from_shape) == 3:
        batch_size = from_shape[0]
        from_seq_length = from_shape[1]
        to_seq_length = to_shape[1]
    elif len(from_shape) == 2:
        if batch_size is None or from_seq_length is None or to_seq_length is None:
            raise ValueError(
                "When passing in rank 2 tensors to attention_layer, the values "
                "for `batch_size`, `from_seq_length`, and `to_seq_length` "
                "must all be specified.")

    # Scalar dimensions referenced here:
    #   B = batch size (number of sequences)
    #   F = `from_tensor` sequence length
    #   T = `to_tensor` sequence length
    #   N = `num_attention_heads`
    #   H = `size_per_head`

    from_tensor_2d = reshape_to_matrix(from_tensor)
    to_tensor_2d = reshape_to_matrix(to_tensor)

    # `query_layer` = [B*F, N*H]
    query_layer = tf.layers.dense(
        from_tensor_2d,
        num_attention_heads * size_per_head,
        activation=query_act,
        name="query",
        kernel_initializer=create_initializer(initializer_range))

    # `key_layer` = [B*T, N*H]
    key_layer = tf.layers.dense(
        to_tensor_2d,
        num_attention_heads * size_per_head,
        activation=key_act,
        name="key",
        kernel_initializer=create_initializer(initializer_range))

    # `value_layer` = [B*T, N*H]
    value_layer = tf.layers.dense(
        to_tensor_2d,
        num_attention_heads * size_per_head,
        activation=value_act,
        name="value",
        kernel_initializer=create_initializer(initializer_range))

    # `query_layer` = [B, N, F, H]
    query_layer = transpose_for_scores(query_layer, batch_size,
                                       num_attention_heads, from_seq_length,
                                       size_per_head)

    # `key_layer` = [B, N, T, H]
    key_layer = transpose_for_scores(key_layer, batch_size,
                                     num_attention_heads, to_seq_length,
                                     size_per_head)

    # Take the dot product between "query" and "key" to get the raw
    # attention scores.
    # `attention_scores` = [B, N, F, T]
    attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
    attention_scores = tf.multiply(attention_scores,
                                   1.0 / math.sqrt(float(size_per_head)))

    if attention_mask is not None:
        # `attention_mask` = [B, 1, F, T]
        attention_mask = tf.expand_dims(attention_mask, axis=[1])

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0

        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        attention_scores += adder

    # Normalize the attention scores to probabilities.
    # `attention_probs` = [B, N, F, T]
    attention_probs = tf.nn.softmax(attention_scores)

    # This is actually dropping out entire tokens to attend to, which might
    # seem a bit unusual, but is taken from the original Transformer paper.
    attention_probs = dropout(attention_probs, attention_probs_dropout_prob)

    # `value_layer` = [B, T, N, H]
    value_layer = tf.reshape(
        value_layer,
        [batch_size, to_seq_length, num_attention_heads, size_per_head])

    # `value_layer` = [B, N, T, H]
    value_layer = tf.transpose(value_layer, [0, 2, 1, 3])

    # `context_layer` = [B, N, F, H]
    context_layer = tf.matmul(attention_probs, value_layer)

    # `context_layer` = [B, F, N, H]
    context_layer = tf.transpose(context_layer, [0, 2, 1, 3])

    if do_return_2d_tensor:
        # `context_layer` = [B*F, N*H]
        context_layer = tf.reshape(context_layer, [
            batch_size * from_seq_length, num_attention_heads * size_per_head
        ])
    else:
        # `context_layer` = [B, F, N*H]
        context_layer = tf.reshape(
            context_layer,
            [batch_size, from_seq_length, num_attention_heads * size_per_head])

    return context_layer, attention_probs
Пример #22
0
def batch_loss(model, batch):
    predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
    return -tf.reduce_mean(tf.reduce_sum(
        tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
Пример #23
0
  def GetProjectLastDim(cls, inputs, weight, input_dim, output_dim, proj_obj):
    """Linear projection on the last dim of the input tensor along with pruning.

    This is a TPU efficient implementation to avoid reshaping inputs to Rank-2
    tensor by using Einsum for the compute.

    Args:
      inputs: An input Tensor, the last dimension of which is input_dim.
      weight: A weight matrix with shape [input_dim, output_dim].
      input_dim: An integer or a symbolic dim, the last dimension of the inputs.
      output_dim: An integer or a symbolic dim, the last dimension of the
                  outputs.
      proj_obj: a ProjectionLayer object.

    Returns:
      An output Tensor of the same rank as inputs, the last dimension is
      output_dim.
    """
    theta = proj_obj.theta
    p = proj_obj.params
    input_dim = int(
        symbolic.ToStatic(input_dim) if symbolic.IsExpr(input_dim
                                                       ) else input_dim)
    output_dim = int(
        symbolic.ToStatic(output_dim) if symbolic.IsExpr(output_dim
                                                        ) else output_dim)
    if (py_utils.use_tpu() and inputs.shape is not None and
        inputs.shape.rank is not None and inputs.shape.rank < 26):
      # Avoids reshape if feasible and uses Einsum.
      if inputs.shape.rank == 2:
        outputs = tf.matmul(inputs, weight)
      else:
        outputs = cls.GetEinSumResult(inputs, proj_obj)
    else:
      if p.pruning_hparams_dict['compress_input']:
        blocked_inputs = tf.reshape(
            inputs,
            py_utils.ToStaticShape(
                [-1, p.pruning_hparams_dict['input_block_size']]))
        compressed_inputs = tf.reshape(
            py_utils.Matmul(blocked_inputs, theta.b_matrix_tfvar),
            py_utils.ToStaticShape([
                -1, input_dim //
                p.pruning_hparams_dict['input_compression_factor']
            ]))
      else:
        compressed_inputs = tf.reshape(inputs,
                                       py_utils.ToStaticShape([-1, input_dim]))

      intermediate_result = py_utils.Matmul(compressed_inputs,
                                            theta.c_matrix_tfvar)

      if p.pruning_hparams_dict['compress_output']:
        blocked_intermediate_result = tf.reshape(
            intermediate_result,
            py_utils.ToStaticShape([
                -1, p.pruning_hparams_dict['output_block_size'] //
                p.pruning_hparams_dict['output_compression_factor']
            ]))
        outputs = py_utils.Matmul(blocked_intermediate_result,
                                  theta.d_matrix_tfvar)
      else:
        outputs = intermediate_result

      outputs = tf.reshape(
          outputs,
          tf.concat([
              tf.cast(py_utils.GetShape(inputs)[:-1], tf.int32),
              py_utils.ToStaticShape([output_dim])
          ],
                    axis=0))

    return outputs
Пример #24
0
Файл: DEN.py Проект: AuMgLi/DEN
    def build_model(self,
                    task_id,
                    prediction=False,
                    splitting=False,
                    expansion=None):
        bottom = self.X
        if splitting:
            for i in range(1, self.n_layers):
                prev_w = np.copy(self.prev_W_split['layer%d' % i +
                                                   '/weight:0'])
                cur_w = np.copy(self.cur_W['layer%d' % i + '/weight:0'])
                indices = self.unit_indices['layer%d' % i]
                next_dim = prev_w.shape[1]
                if 2 <= i < self.n_layers:
                    below_dim = prev_w.shape[0]
                    below_indices = self.unit_indices['layer%d' % (i - 1)]
                    bottom_p_prev_ary, bottom_p_new_ary, bottom_c_prev_ary, bottom_c_new_ary = [], [], [], []
                    for j in range(below_dim):
                        if j in below_indices:
                            bottom_p_prev_ary.append(prev_w[j, :])
                            bottom_p_new_ary.append(cur_w[j, :])
                            bottom_c_prev_ary.append(cur_w[j, :])
                            bottom_c_new_ary.append(cur_w[j, :])
                        else:
                            bottom_p_prev_ary.append(cur_w[j, :])
                            bottom_c_prev_ary.append(cur_w[j, :])
                    prev_w = np.array(bottom_p_prev_ary +
                                      bottom_p_new_ary).astype(np.float32)
                    cur_w = np.array(bottom_c_prev_ary +
                                     bottom_c_new_ary).astype(np.float32)

                prev_ary = []
                new_ary = []
                for j in range(next_dim):
                    if j in indices:
                        prev_ary.append(prev_w[:, j])
                        new_ary.append(cur_w[:, j])  # will be expanded
                    else:
                        prev_ary.append(cur_w[:, j])
                # fully connected, L1
                expanded_w = np.array(prev_ary + new_ary).T.astype(np.float32)
                expanded_b = np.concatenate(
                    (self.prev_W_split['layer%d' % i + '/biases:0'],
                     np.random.rand(len(new_ary)))).astype(np.float32)
                with tf.variable_scope('layer%d' % i):
                    w = tf.get_variable('weight',
                                        initializer=expanded_w,
                                        trainable=True)
                    b = tf.get_variable('biases',
                                        initializer=expanded_b,
                                        trainable=True)
                self.params[w.name] = w
                self.params[b.name] = b
                bottom = tf.nn.relu(tf.matmul(bottom, w) + b)
            w, b = self.extend_top('layer%d' % self.n_layers, len(new_ary))
            self.y = tf.matmul(bottom, w) + b
        elif expansion:
            for i in range(1, self.n_layers):
                if i == 1:
                    w, b = self.extend_bottom('layer%d' % i, self.ex_k)
                else:
                    w, b = self.extend_param('layer%d' % i, self.ex_k)
                bottom = tf.nn.relu(tf.matmul(bottom, w) + b)
            w, b = self.extend_param('layer%d' % self.n_layers, self.ex_k)
            self.y = tf.matmul(bottom, w) + b
        elif prediction:
            stamp = self.time_stamp['task%d' % task_id]
            for i in range(1, self.n_layers):
                w = self.get_variable('layer%d' % i, 'weight', False)
                b = self.get_variable('layer%d' % i, 'biases', False)
                w = w[:stamp[i - 1], :stamp[i]]
                b = b[:stamp[i]]
                print(' [*] task %d, shape : %s' %
                      (i, w.get_shape().as_list()))

                bottom = tf.nn.relu(tf.matmul(bottom, w) + b)

            w = self.get_variable('layer%d' % self.n_layers,
                                  'weight_%d' % task_id, False)
            b = self.get_variable('layer%d' % self.n_layers,
                                  'biases_%d' % task_id, False)
            w = w[:stamp[self.n_layers - 1], :stamp[self.n_layers]]
            b = b[:stamp[self.n_layers]]
            self.y = tf.matmul(bottom, w) + b
        else:
            for i in range(1, self.n_layers):
                w = self.get_variable('layer%d' % i, 'weight', True)
                b = self.get_variable('layer%d' % i, 'biases', True)
                bottom = tf.nn.relu(tf.matmul(bottom, w) + b)
            prev_dim = bottom.get_shape().as_list()[1]
            w = self.create_variable('layer%d' % self.n_layers,
                                     'weight_%d' % task_id,
                                     [prev_dim, self.n_classes], True)
            b = self.create_variable('layer%d' % self.n_layers,
                                     'biases_%d' % task_id, [self.n_classes],
                                     True)
            self.y = tf.matmul(bottom, w) + b

        self.yhat = tf.nn.sigmoid(self.y)
        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.y,
                                                    labels=self.Y))

        if prediction:
            return
Пример #25
0
    def testConcurrentNumericsAlertsAreRegisteredCorrectly(self):
        num_threads = 3
        num_runs_per_thread = 2
        total_num_runs = num_threads * num_runs_per_thread

        # Before any Session runs, the report ought to be empty.
        self.assertEqual([], self._debug_data_server.numerics_alert_report())

        with tf.Session() as sess:
            x_init_val = np.array([[2.0], [-1.0]])
            y_init_val = np.array([[0.0], [-0.25]])
            z_init_val = np.array([[0.0, 3.0], [-1.0, 0.0]])

            x_init = tf.constant(x_init_val, shape=[2, 1], name="x_init")
            x = tf.Variable(x_init, name="x")
            y_init = tf.constant(y_init_val, shape=[2, 1])
            y = tf.Variable(y_init, name="y")
            z_init = tf.constant(z_init_val, shape=[2, 2])
            z = tf.Variable(z_init, name="z")

            u = tf.div(x, y, name="u")  # Produces an Inf.
            v = tf.matmul(z, u, name="v")  # Produces NaN and Inf.

            sess.run(x.initializer)
            sess.run(y.initializer)
            sess.run(z.initializer)

            run_options_list = []
            for i in range(num_threads):
                run_options = tf.RunOptions(output_partition_graphs=True)
                # Use different grpc:// URL paths so that each thread opens a separate
                # gRPC stream to the debug data server, simulating multi-worker setting.
                tf_debug.watch_graph(
                    run_options,
                    sess.graph,
                    debug_ops=["DebugNumericSummary"],
                    debug_urls=[self._debug_url + "/thread%d" % i])
                run_options_list.append(run_options)

            def run_v(thread_id):
                for _ in range(num_runs_per_thread):
                    sess.run(v, options=run_options_list[thread_id])

            run_threads = []
            for thread_id in range(num_threads):
                thread = threading.Thread(
                    target=functools.partial(run_v, thread_id))
                thread.start()
                run_threads.append(thread)

            for thread in run_threads:
                thread.join()

        report = self._debug_data_server.numerics_alert_report()
        self.assertEqual(2, len(report))
        self.assertTrue(report[0].device_name.lower().endswith("cpu:0"))
        self.assertEqual("u:0", report[0].tensor_name)
        self.assertGreater(report[0].first_timestamp, 0)
        self.assertEqual(0, report[0].nan_event_count)
        self.assertEqual(0, report[0].neg_inf_event_count)
        self.assertEqual(total_num_runs, report[0].pos_inf_event_count)
        self.assertTrue(report[1].device_name.lower().endswith("cpu:0"))
        self.assertEqual("u:0", report[0].tensor_name)
        self.assertGreaterEqual(report[1].first_timestamp,
                                report[0].first_timestamp)
        self.assertEqual(total_num_runs, report[1].nan_event_count)
        self.assertEqual(total_num_runs, report[1].neg_inf_event_count)
        self.assertEqual(0, report[1].pos_inf_event_count)
Пример #26
0
def create_model(
    albert_config,
    is_training,
    input_ids,
    input_mask,
    segment_ids,
    labels,
    num_labels,
    use_one_hot_embeddings,
    max_seq_length,
    dropout_prob,
    hub_module,
):
    """Creates a classification model."""
    bsz_per_core = tf.shape(input_ids)[0]

    input_ids = tf.reshape(input_ids,
                           [bsz_per_core * num_labels, max_seq_length])
    input_mask = tf.reshape(input_mask,
                            [bsz_per_core * num_labels, max_seq_length])
    token_type_ids = tf.reshape(segment_ids,
                                [bsz_per_core * num_labels, max_seq_length])

    (output_layer, _) = fine_tuning_utils.create_albert(
        albert_config=albert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=token_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        use_einsum=True,
        hub_module=hub_module,
    )

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights",
        [1, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02),
    )

    output_bias = tf.get_variable("output_bias", [1],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=1 - dropout_prob)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [bsz_per_core, num_labels])
        probabilities = tf.nn.softmax(logits, axis=-1)
        predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels,
                                    depth=tf.cast(num_labels, dtype=tf.int32),
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)
Пример #27
0
def connector_capsule_mat(input_tensor,
                          position_grid,
                          input_activation,
                          input_dim,
                          output_dim,
                          layer_name,
                          num_routing=3,
                          num_in_atoms=3,
                          num_out_atoms=3,
                          leaky=False,
                          final_beta=1.0,
                          min_var=0.0005):
    """Final Capsule Layer with Pose Matrices and Shared connections."""
    # One weight tensor for each capsule of the layer bellow: w: [8*128, 8*10]
    with tf.variable_scope(layer_name):
        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('input_center_connector'):
            utils.activation_summary(input_tensor)
        weights = utils.weight_variable(
            [input_dim, num_out_atoms, output_dim * num_out_atoms],
            stddev=0.01)
        # weights = tf.clip_by_norm(weights, 1.0, axes=[1])
        activation_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1],
                                                init_value=1.0,
                                                name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1],
                                           init_value=2.0,
                                           name='sigma_biases')

        with tf.name_scope('Wx_plus_b'):
            # input_tensor: [x, 128, 8, h, w]
            input_shape = tf.shape(input_tensor)
            input_trans = tf.transpose(input_tensor, [1, 0, 3, 4, 2])
            input_share = tf.reshape(input_trans,
                                     [input_dim, -1, num_in_atoms])
            # input_expanded: [x, 128, 8, 1]
            wx_share = tf.matmul(input_share, weights)
            # sqr_num_out_atoms = num_out_atoms
            num_out_atoms *= num_out_atoms
            wx_trans = tf.reshape(wx_share, [
                input_dim, input_shape[0], input_shape[3], input_shape[4],
                num_out_atoms, output_dim
            ])
            wx_trans.set_shape(
                (input_dim, None, input_tensor.get_shape()[3],
                 input_tensor.get_shape()[4], num_out_atoms, output_dim))
            h, w, _ = position_grid.get_shape()
            height = h
            width = w
            # t_pose = tf.transpose(position_grid, [2, 0, 1])
            # t_pose_exp = tf.scatter_nd([[sqr_num_out_atoms -1],
            #   [2 * sqr_num_out_atoms - 1]], t_pose, [num_out_atoms, height, width])
            # pose_g_exp = tf.transpose(t_pose_exp, [1, 2, 0])
            zero_grid = tf.zeros([height, width, num_out_atoms - 2])
            pose_g_exp = tf.concat([position_grid, zero_grid], axis=2)
            pose_g = tf.expand_dims(
                tf.expand_dims(tf.expand_dims(pose_g_exp, -1), 0), 0)
            wx_posed = wx_trans + pose_g
            wx_posed_t = tf.transpose(wx_posed, [1, 0, 2, 3, 5, 4])

            # Wx_reshaped: [x, 128, 10, 8]
            wx = tf.reshape(wx_posed_t, [
                -1, input_dim * height * width, output_dim, num_out_atoms, 1, 1
            ])
        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10]
            logit_shape = [input_dim * height * width, output_dim, 1, 1, 1]
            for _ in range(4):
                input_activation = tf.expand_dims(input_activation, axis=-1)
            activation, center = update_em_routing(
                wx=wx,
                input_activation=input_activation,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms,
                num_routing=num_routing,
                output_dim=output_dim,
                leaky=leaky,
                final_beta=final_beta / 4,
                min_var=min_var,
            )
        out_activation = tf.squeeze(activation, axis=[1, 3, 4, 5])
        out_center = tf.squeeze(center, axis=[1, 4, 5])
        return tf.sigmoid(out_activation), out_center
Пример #28
0
# Playground: S=Start, G=Goal, F=Frozen, H=Hole
# SFFF
# FHFH
# FFFH
# HFFG

# hyper parameters
EPISODES = 20000
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.99
EPSILON = 0.1

# 16x4 network definition
input_state = tf.placeholder(tf.float32, shape=(1, 16))
weights = tf.Variable(tf.random_uniform([16, 4], 0, 0.01))
output_Q = tf.matmul(input_state, weights)
predicted_action = tf.argmax(output_Q, 1)

# loss function
next_Q = tf.placeholder(tf.float32, shape=(1, 4))
loss = tf.reduce_sum(tf.square(next_Q - output_Q))

# optimization
optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
train = optimizer.minimize(loss)

# training
init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
Пример #29
0
 def call(self, inputs, state):
     gate_inputs = tf.matmul(tf.concat([inputs, state], axis=1),
                             self._weights)
     gate_inputs = tf.nn.bias_add(gate_inputs, self._bias)
     output = self._activation(gate_inputs)
     return output, output
Пример #30
0
def model_fn(model, features, labels, mode):

  def sum_pooling(embeddings, slots):
    slot_embeddings = []
    for slot in slots:
      slot_embeddings.append(embeddings[_SLOT_2_IDX[slot]])
    if len(slot_embeddings) == 1:
      return slot_embeddings[0]
    return tf.add_n(slot_embeddings)

  global_step = tf.train.get_or_create_global_step()
  num_slot, embed_size = len(_SLOT_2_BUCKET), 8
  xavier_initializer = tf.glorot_normal_initializer()

  flt.feature.FeatureSlot.set_default_bias_initializer(
        tf.zeros_initializer())
  flt.feature.FeatureSlot.set_default_vec_initializer(
        tf.random_uniform_initializer(-0.0078125, 0.0078125))
  flt.feature.FeatureSlot.set_default_bias_optimizer(
        tf.train.FtrlOptimizer(learning_rate=0.01))
  flt.feature.FeatureSlot.set_default_vec_optimizer(
        tf.train.AdagradOptimizer(learning_rate=0.01))

  # deal with input cols
  categorical_embed = []
  num_slot, embed_dim = len(_SLOT_2_BUCKET), 8

  with tf.variable_scope("follower"):
    for slot, bucket_size in _SLOT_2_BUCKET:
      fs = model.add_feature_slot(slot, bucket_size)
      fc = model.add_feature_column(fs)
      categorical_embed.append(fc.add_vector(embed_dim))


  # concate all embeddings
  slot_embeddings = categorical_embed
  concat_embedding = tf.concat(slot_embeddings, axis=1)
  output_size = len(slot_embeddings) * embed_dim

  model.freeze_slots(features)

  with tf.variable_scope("follower"):
    fc1_size, fc2_size, fc3_size = 512, 256, 128
    w1 = tf.get_variable('w1', shape=[output_size, fc1_size], dtype=tf.float32,
                        initializer=xavier_initializer)
    b1 = tf.get_variable(
        'b1', shape=[fc1_size], dtype=tf.float32, initializer=tf.zeros_initializer())
    w2 = tf.get_variable('w2', shape=[fc1_size, fc2_size], dtype=tf.float32,
                        initializer=xavier_initializer)
    b2 = tf.get_variable(
        'b2', shape=[fc2_size], dtype=tf.float32, initializer=tf.zeros_initializer())
    w3 = tf.get_variable('w3', shape=[fc2_size, fc3_size], dtype=tf.float32,
                        initializer=xavier_initializer)
    b3 = tf.get_variable(
        'b3', shape=[fc3_size], dtype=tf.float32, initializer=tf.zeros_initializer())

  act1_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(concat_embedding, w1), b1))
  act1_l = tf.layers.batch_normalization(act1_l, training=True)
  act2_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(act1_l, w2), b2))
  act2_l = tf.layers.batch_normalization(act2_l, training=True)
  embedding = tf.nn.relu(tf.nn.bias_add(tf.matmul(act2_l, w3), b3))
  embedding = tf.layers.batch_normalization(embedding, training=True)

  if mode == tf.estimator.ModeKeys.TRAIN:
    embedding_grad = model.send('embedding', embedding, require_grad=True)
    optimizer = tf.train.GradientDescentOptimizer(0.1)
    train_op = model.minimize(
        optimizer, embedding, grad_loss=embedding_grad, global_step=global_step)
    return model.make_spec(mode, loss=tf.math.reduce_mean(embedding), train_op=train_op)
  elif mode == tf.estimator.ModeKeys.PREDICT:
    return model.make_spec(mode, predictions={'embedding': embedding})