示例#1
0
文件: model.py 项目: lz-chen/kbot
def model(hparams, X, past=None, scope='model', reuse=tf.AUTO_REUSE):
    with tf.variable_scope(scope, reuse=reuse):
        results = {}
        batch, sequence = shape_list(X)

        wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd],
                              initializer=tf.random_normal_initializer(stddev=0.01))
        wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd],
                              initializer=tf.random_normal_initializer(stddev=0.02))
        past_length = 0 if past is None else tf.shape(past)[-2]
        h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length))

        # Transformer
        presents = []
        pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer
        assert len(pasts) == hparams.n_layer
        for layer, past in enumerate(pasts):
            h, present = block(h, 'h%d' % layer, past=past, hparams=hparams)
            if layer == 10:
                tf.add_to_collection('checkpoints', h)
            presents.append(present)
        results['present'] = tf.stack(presents, axis=1)
        h = norm(h, 'ln_f')

        # Language model loss.  Do tokens <n predict token n?
        h_flat = tf.reshape(h, [batch * sequence, hparams.n_embd])
        logits = tf.matmul(h_flat, wte, transpose_b=True)
        logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab])
        results['logits'] = logits
        return results
示例#2
0
文件: model.py 项目: lz-chen/kbot
def conv1d(x, scope, nf, *, w_init_stdev=0.02):
    with tf.variable_scope(scope):
        *start, nx = shape_list(x)
        w = tf.get_variable('w', [1, nx, nf],
                            initializer=tf.random_normal_initializer(stddev=w_init_stdev))
        b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0))
        c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf])) + b,
                       start + [nf])
        return c
示例#3
0
文件: model.py 项目: lz-chen/kbot
 def mask_attn_weights(w):
     # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from
     # src to dst.
     _, _, nd, ns = shape_list(w)
     b = attention_mask(nd, ns, dtype=w.dtype)
     b = tf.reshape(b, [1, 1, nd, ns])
     w = w * b - tf.cast(1e10, w.dtype) * (1 - b)
     return w
示例#4
0
 def __init__(self):
     self.embedding = self.getEmb()
     self.embSize = self.embedding.shape[1]
     self.vocabSize = self.embedding.shape[0]
     self.x = tf.placeholder(tf.int32, [None, 5])
     with tf.variable_scope("training_variable"):
         self.weights = {
             "MLP1":
             tf.Variable(
                 tf.truncated_normal(
                     shape=[self.embSize,
                            int(self.embSize / 2)],
                     stddev=0.08)),
             "MLP2":
             tf.Variable(
                 tf.truncated_normal(shape=[int(self.embSize / 2), 1],
                                     stddev=0.08))
         }
         self.biases = {
             "MLP1":
             tf.Variable(
                 tf.constant(0.01,
                             shape=[int(self.embSize / 2)],
                             dtype=tf.float32)),
             "MLP2":
             tf.Variable(tf.constant(0.01, shape=[1], dtype=tf.float32))
         }
     self.inputEmb = tf.nn.embedding_lookup(self.embedding, self.x)
     p1 = tf.matmul(tf.reshape(self.inputEmb, [-1, self.embSize]),
                    self.weights["MLP1"]) + self.biases["MLP1"]
     p1 = tf.matmul(tf.nn.relu(p1),
                    self.weights["MLP2"]) + self.biases["MLP2"]
     p1 = tf.reshape(p1, [-1, 5])
     p1 = tf.reshape(tf.nn.softmax(p1), [-1, 1, 5])
     self.finalState = tf.reshape(tf.matmul(p1, self.inputEmb),
                                  [-1, self.embSize])
示例#5
0
文件: pca.py 项目: Stardust-lf/Utils
def pca(x, dim=2):
    '''
        x:输入矩阵
        dim:降维之后的维度数
    '''
    with tf.name_scope("PCA"):

        m, n = tf.to_float(x.get_shape()[0]), tf.to_int32(x.get_shape()[1])
        assert not tf.assert_less(dim, n)
        mean = tf.reduce_mean(x, axis=1)
        x_new = x - tf.reshape(mean, (-1, 1))
        cov = tf.matmul(x_new, x_new, transpose_a=True) / (m - 1)
        e, v = tf.linalg.eigh(cov, name="eigh")
        e_index_sort = tf.math.top_k(e, sorted=True, k=dim)[1]
        v_new = tf.gather(v, indices=e_index_sort)
        pca = tf.matmul(x_new, v_new, transpose_b=True)
    return pca
示例#6
0
    def __init__(self,
                 input_width=227,
                 input_height=227,
                 input_channels=3,
                 num_classes=1000,
                 learning_rate=0.01,
                 momentum=0.9,
                 keep_prob=0.5):

        # From article: The learning rate was initialized at 0.01.
        # From article: We trained our models using stochastic gradient descent with a batch size of 128 examples,
        # momentum of 0.9, and weight decay of 0.0005

        # From article: We initialized the weights in each layer from a zero-mean Gaussian distribution with standard
        # deviation 0.01.

        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.keep_prob = keep_prob

        self.random_mean = 0
        self.random_stddev = 0.01

        # ----------------------------------------------------------------------------------------------------

        # From article: We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well
        # as in the fully-connected hidden layers, with the constant 1. ... We initialized the neuron biases in the
        # remaining layers with the constant 0.

        # Input: 227x227x3.
        with tf.name_scope('input'):
            self.X = tf.placeholder(dtype=tf.float32,
                                    shape=[
                                        None, self.input_height,
                                        self.input_width, self.input_channels
                                    ],
                                    name='X')

        # Labels: 1000.
        with tf.name_scope('labels'):
            self.Y = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.num_classes],
                                    name='Y')

        # Dropout keep prob.
        with tf.name_scope('dropout'):
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name='dropout_keep_prob')

        # Layer 1.
        # [Input] ==> 227x227x3
        # --> 227x227x3 ==> [Convolution: size=(11x11x3)x96, strides=4, padding=valid] ==> 55x55x96
        # --> 55x55x96 ==> [ReLU] ==> 55x55x96
        # --> 55x55x96 ==> [Local Response Normalization] ==> 55x55x96
        # --> 55x55x96 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 27x27x96
        # --> [Output] ==> 27x27x96
        # Note: 48*2=96, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer1'):
            layer1_activations = self.__conv(
                input=self.X,
                filter_width=11,
                filter_height=11,
                filters_count=96,
                stride_x=4,
                stride_y=4,
                padding='VALID',
                init_biases_with_the_constant_1=False)
            layer1_lrn = self.__local_response_normalization(
                input=layer1_activations)
            layer1_pool = self.__max_pool(input=layer1_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 2.
        # [Input] ==> 27x27x96
        # --> 27x27x96 ==> [Convolution: size=(5x5x96)x256, strides=1, padding=same] ==> 27x27x256
        # --> 27x27x256 ==> [ReLU] ==> 27x27x256
        # --> 27x27x256 ==> [Local Response Normalization] ==> 27x27x256
        # --> 27x27x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 13x13x256
        # --> [Output] ==> 13x13x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer2'):
            layer2_activations = self.__conv(
                input=layer1_pool,
                filter_width=5,
                filter_height=5,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer2_lrn = self.__local_response_normalization(
                input=layer2_activations)
            layer2_pool = self.__max_pool(input=layer2_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 3.
        # [Input] ==> 13x13x256
        # --> 13x13x256 ==> [Convolution: size=(3x3x256)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer3'):
            layer3_activations = self.__conv(
                input=layer2_pool,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=False)

        # Layer 4.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer4'):
            layer4_activations = self.__conv(
                input=layer3_activations,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)

        # Layer 5.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x256, strides=1, padding=same] ==> 13x13x256
        # --> 13x13x256 ==> [ReLU] ==> 13x13x256
        # --> 13x13x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 6x6x256
        # --> [Output] ==> 6x6x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer5'):
            layer5_activations = self.__conv(
                input=layer4_activations,
                filter_width=3,
                filter_height=3,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer5_pool = self.__max_pool(input=layer5_activations,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 6.
        # [Input] ==> 6x6x256=9216
        # --> 9216 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer6'):
            pool5_shape = layer5_pool.get_shape().as_list()
            flattened_input_size = pool5_shape[1] * pool5_shape[
                2] * pool5_shape[3]
            layer6_fc = self.__fully_connected(
                input=tf.reshape(layer5_pool, shape=[-1,
                                                     flattened_input_size]),
                inputs_count=flattened_input_size,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer6_dropout = self.__dropout(input=layer6_fc)

        # Layer 7.
        # [Input] ==> 4096
        # --> 4096 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer7'):
            layer7_fc = self.__fully_connected(
                input=layer6_dropout,
                inputs_count=4096,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer7_dropout = self.__dropout(input=layer7_fc)

        # Layer 8.
        # [Input] ==> 4096
        # --> 4096 ==> [Logits: neurons=1000] ==> 1000
        # --> [Output] ==> 1000
        with tf.name_scope('layer8'):
            layer8_logits = self.__fully_connected(
                input=layer7_dropout,
                inputs_count=4096,
                outputs_count=self.num_classes,
                relu=False,
                name='logits')

        # Cross Entropy.
        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=layer8_logits, labels=self.Y, name='cross_entropy')
            self.__variable_summaries(cross_entropy)

        # Training.
        with tf.name_scope('training'):
            loss_operation = tf.reduce_mean(cross_entropy,
                                            name='loss_operation')
            tf.summary.scalar(name='loss', tensor=loss_operation)

            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate, momentum=self.momentum)

            # self.training_operation = optimizer.minimize(loss_operation, name='training_operation')

            grads_and_vars = optimizer.compute_gradients(loss_operation)
            self.training_operation = optimizer.apply_gradients(
                grads_and_vars, name='training_operation')

            for grad, var in grads_and_vars:
                if grad is not None:
                    with tf.name_scope(var.op.name + '/gradients'):
                        self.__variable_summaries(grad)

        # Accuracy.
        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(layer8_logits, 1),
                                          tf.argmax(self.Y, 1),
                                          name='correct_prediction')
            self.accuracy_operation = tf.reduce_mean(tf.cast(
                correct_prediction, tf.float32),
                                                     name='accuracy_operation')
            tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
示例#7
0
    return tf.nn.max_pool(x,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')


mnist = input_data.read_data_sets("data/", one_hot=True)

g = tf.Graph()
with g.as_default():
    x = tf.placeholder("float", shape=[None, 784])
    y_ = tf.placeholder("float", shape=[None, 10])

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
示例#8
0
    tf.Variable(tf.truncated_normal(fc_connection_shapes["f3_shape"][3]),
                name="f3_biases")
}

dataset_dict["total_image_size"] = dataset_dict["image_size"] * dataset_dict[
    "image_size"]

# Declare the input and output placeholders
input_img = tf.placeholder(tf.float32,
                           shape=[
                               BATCH_SIZE, dataset_dict["image_size"],
                               dataset_dict["image_size"],
                               dataset_dict["num_channels"]
                           ])
img_4d_shaped = tf.reshape(input_img, [
    -1, dataset_dict["image_size"], dataset_dict["image_size"],
    dataset_dict["num_channels"]
])
labels = tf.placeholder(tf.float32, shape=[None, dataset_dict["num_labels"]])

# Convolution Layer 1 | Response Normalization | Max Pooling | ReLU
c_layer_1 = tf.nn.conv2d(img_4d_shaped,
                         conv_weights["c1_weights"],
                         strides=[1, 4, 4, 1],
                         padding="SAME",
                         name="c_layer_1")
c_layer_1 += conv_biases["c1_biases"]
c_layer_1 = tf.nn.relu(c_layer_1)
c_layer_1 = tf.nn.lrn(c_layer_1,
                      depth_radius=N_DEPTH_RADIUS,
                      bias=K_BIAS,
                      alpha=ALPHA,
示例#9
0
def main(trainModel=True,
         buildConfusionMatrix=True,
         restore=False,
         buildClassifiedMatrix=True):

    tf.disable_v2_behavior()

    input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input")
    real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes")

    layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]),
                               1,
                               28, [5, 5], [2, 2],
                               name="conv_no_pool")
    layer2 = create_conv_layer(layer1,
                               28,
                               56, [5, 5], [2, 2],
                               name='conv_with_pool')
    conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56])

    relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000],
                                                        stddev=STDDEV * 2),
                                    name='relu_layer_weight')
    rely_layer_bias = tf.Variable(tf.truncated_normal([1000],
                                                      stddev=STDDEV / 2),
                                  name='rely_layer_bias')
    relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias
    relu_layer = tf.nn.relu(relu_layer)
    relu_layer = tf.nn.dropout(relu_layer, DROPOUT)

    final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES],
                                                         stddev=STDDEV * 2),
                                     name='final_layer_weight')
    final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES],
                                                       stddev=STDDEV / 2),
                                   name='final_layer_bias')
    final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias

    predicts = tf.nn.softmax(final_layer)
    predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999)

    #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1))

    loss = -tf.reduce_mean(
        tf.reduce_sum(real * tf.log(predicts_for_log) +
                      (1 - real) * tf.log(1 - predicts_for_log),
                      axis=1),
        axis=0)
    #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1)
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real))
    optimiser = tf.train.GradientDescentOptimizer(
        learning_rate=LEARNING_RATE).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(real, axis=1),
                                  tf.argmax(predicts, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1),
                                           predictions=tf.argmax(predicts,
                                                                 axis=1),
                                           num_classes=CLASSES)

    saver = tf.train.Saver()

    # dataset = get_mnist_dataset()
    dataset = get_fashion_dataset()

    with tf.Session() as session:

        session.run(tf.global_variables_initializer())

        if restore:
            saver.restore(session, SAVE_PATH)

        if trainModel:
            train(input_images, real, session, optimiser, loss, accuracy,
                  saver, dataset)

        if buildConfusionMatrix:
            test_cm = session.run(confusion_matrix,
                                  feed_dict={
                                      input_images: dataset.test_x,
                                      real: dataset.test_y
                                  })
            draw_confusion_matrix(test_cm)

        if buildClassifiedMatrix:
            all_probs = session.run(predicts,
                                    feed_dict={
                                        input_images: dataset.test_x,
                                        real: dataset.test_y
                                    })
            max_failure_picture_index = [[(-1, -1.0)] * CLASSES
                                         for _ in range(CLASSES)]
            for i in range(len(all_probs)):
                real = np.argmax(dataset.test_y[i])
                for j in range(CLASSES):
                    if max_failure_picture_index[real][j][1] < all_probs[i][j]:
                        max_failure_picture_index[real][j] = (i,
                                                              all_probs[i][j])
            draw_max_failure_pictures(dataset.test_x,
                                      max_failure_picture_index)
示例#10
0
文件: model.py 项目: lz-chen/kbot
def merge_states(x):
    """Smash the last two dimensions of x into a single dimension."""
    *start, a, b = shape_list(x)
    return tf.reshape(x, start + [a * b])
示例#11
0
文件: model.py 项目: lz-chen/kbot
def split_states(x, n):
    """Reshape the last dimension of x into [n, x.shape[-1]/n]."""
    *start, m = shape_list(x)
    return tf.reshape(x, start + [n, m // n])