def scaled_dotproduct_attention(queries,
                                keys,
                                num_unit=None,
                                num_heads=0,
                                dropout_rate=0,
                                is_tranining=True,
                                causality=False,
                                scope="scaled_att",
                                reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_unit is None:
            num_unit = queries.get_shape().as_list[-1]
        # 线性变换
        Q = tf.layers.dense(queries, num_unit, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)

        outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1]))
        outputs = outputs / (K.get_shape().as_list()[-1]**0.5)

        # 对填充的部分进行mask,这些位置att score变得极小,
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # 一个mask操作,对模型屏蔽未来信息
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)
        # Query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks
        outputs = tf.layers.dropout(
            outputs,
            rate=dropout_rate,
            training=tf.convert_to_tensor(is_tranining))
        # 加权平均
        outputs = tf.matmul(outputs, V)
        #
        outputs += queries
        outputs = normalize(outputs)
    return outputs
def accuracy_fn(inference_fn, inputs, labels):
    prediction = tf.nn.softmax(inference_fn(inputs))
    correct_pred = tf.equal(tf.argmax(prediction, 1), labels)
    return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Exemplo n.º 3
0
# training
for i in range(1, num_steps + 1):
    _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                         feed_dict={X: full_data_x})
    if i % 10 == 0 or i == 1:
        print("Step %i, Avg Distance: %f" % (i, d))

# 为质心分配标签
# 使用每次训练的标签,汇总每个质心的所有标签总数
counts = np.zeros(shape=(k, num_classes))
for i in range(len(idx)):
    counts[idx[i]] += mnist.train.labels[i]

# 把最频繁的标签分配到质心
labels_map = [np.argmax(c) for c in counts]
labels_map = tf.convert_to_tensor(labels_map)

# lookup:通过质心id映射到标签。
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
# 计算acc
correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1),
                                                     tf.int32))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# 测试Model
test_x, test_y = mnist.test.images, mnist.test.labels
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={
    X: test_x,
    Y: test_y
}))
Exemplo n.º 4
0
            logits_test = conv_net(_x,
                                   num_classes,
                                   dropout,
                                   reuse=True,
                                   is_training=False)

            # 定义loss和opts,带上logits_train 以使dropout生效
            loss_op = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits_train,
                                                        labels=_y))
            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads = optimizer.compute_gradients(loss_op)
            # 只用其中一个gpu计算acc
            if i == 0:
                # Evaluate model (with test logits, for dropout to be disabled)
                correct_pred = tf.equal(tf.argmax(logits_test, 1),
                                        tf.argmax(_y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            reuse_vars = True
            tower_grads.append(grads)

    tower_grads = average_gradients(tower_grads)
    train_op = optimizer.apply_gradients(tower_grads)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        for step in range(1, num_steps + 1):
            batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus)
            ts = time.time()
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
Exemplo n.º 5
0
    sess.run(init)

    for epoch in range(train_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # 循环所有的batchs
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            #
            _, c = sess.run([optimizer, cost],
                            feed_dict={
                                x: batch_xs,
                                y: batch_ys
                            })
            # 计算平均损失
            avg_cost += c / total_batch
        if (epoch + 1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost=",
                  "{:.9f}".format(avg_cost))
    print("Ooptimizer Finished!")

    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

    # 计算正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:",
          accuracy.eval({
              x: mnist.test.images,
              y: mnist.test.labels
          }))
def accuarcy_fn(interface_fn,inputs,labels):
    prediction = tf.nn.softmax(interface_fn(inputs))
    correct_pred = tf.equal(tf.argmax(prediction,1),labels) # 计算预测值和标签是否相等
    return tf.reduce_mean(tf.cast(correct_pred,tf.float32))
Exemplo n.º 7
0
                                              dtype=tf.float32)

    return tf.matmul(outputs[-1], weights['out']) + biases['out']


logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# 定义loss和optimizer
loss_op = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss_op)

# 评估模型
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

    for step in range(1, training_step + 1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # 把数据reshape
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))

        # 先run optimizer
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            loss, acc = sess.run([loss_op, accuracy],
Exemplo n.º 8
0
X = tf.placeholder(tf.float32, shape=[None, num_features])
Y = tf.placeholder(tf.float32, shape=[None])

hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                      num_features=num_features,
                                      num_trees=num_trees,
                                      max_nodes=max_nodes).fill()
# 构建随机森林
forgest_graph = tensor_forest.RandomForestGraphs(hparams)
# 获取训练图和损失
train_op = forgest_graph.training_graph(X, Y)
loss_op = forgest_graph.training_loss(X, Y)

# 衡量准确率
infer_op, _, _ = forgest_graph.inference_graph(X)
correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init_vars = tf.group(
    tf.global_variables_initializer(),
    resources.initialize_resources(resources.shared_resources()))

sess = tf.Session()

sess.run(init_vars)

for i in range(1, num_steps + 1):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    _, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
    if i % 50 == 0 or i == 1:
        acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
def multihead_attention(queries,
                        keys,
                        num_units=None,
                        num_heads=0,
                        dropout_rate=0,
                        is_training=True,
                        causality=False,
                        scope="multihead_attention",
                        reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_units is None:
            num_units = queries.get_shape().as_list()[-1]

        # linear projection
        Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        # split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0)
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0)
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0)

        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)
        # mask
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(key_masks, [num_heads, 1])
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # masked from future
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)

        # query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(query_masks, [num_heads, 1])
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks

        outputs = tf.layers.dropout(outputs,
                                    rate=dropout_rate,
                                    training=tf.convert_to_tensor(is_training))

        outputs = tf.matmul(outputs, V_)
        # restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)

        outputs += queries
        outputs = normalize(outputs)
    return outputs
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))  # 交叉熵

train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
    cross_entropy)  # 训练使用最小梯度下降,且最小化交叉熵loss
init = tf.global_variables_initializer()
for i in range(1000):
    batch = mnist.train.next_batch(50)  # load  mini-batchsize dataset
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
print("训练结束..")
"""
这段表达特别好:tf.argmax 是一个非常有用的函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。
由于标签向量是由0,1组成,因此最大值1所在的索引位置就是类别标签,比如tf.argmax(y,1)返回的是模型对于任一输入x预测到的标签值,
而 tf.argmax(y_,1) 代表正确的标签,我们可以用 tf.equal 来检测我们的预测是否真实标签匹配(索引位置一样表示匹配)。
"""
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuarcy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

print(accuarcy.eval(feed_dict={
    x: mnist.test.images,
    y_: mnist.test.labels
}))  # 使用softmax取得效果有限
"""
开始使用CNN进行训练识别
"""


# 首先需要创建大量的W和b,由于我们使用的是ReLU神经元,因此比较好的做法是用一个较小的正数来初始化偏置项
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
Exemplo n.º 11
0
    batch_size = tf.shape(outputs)[0]
    # 每个样本的起始索引
    index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)

    outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)

    return tf.matmul(outputs, weights['out']) + biases['out']


pred = dynamicRNN(x, seqlen, weights, biases)

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for step in range(1, training_steps + 1):
        batch_x, batch_y, batch_seqlen = trainset.next(batch_size)

        sess.run(optimizer,
                 feed_dict={
                     x: batch_x,
                     y: batch_y,
                     seqlen: batch_seqlen
b = tf.Variable(tf.zeros([10]), name='Bias')

# 构造模型并将所有操作封装到scope中,方便tensorboard可视化。

with tf.name_scope('Model'):
    pred = tf.nn.softmax(tf.matmul(x, W) + b)

with tf.name_scope('Loss'):
    cost = tf.reduce_mean(-tf.reduce_sum(y *
                                         tf.log(pred), reduction_indices=1))

with tf.name_scope('SGD'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

with tf.name_scope('Accuracy'):
    acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    acc = tf.reduce_mean(tf.cast(acc, tf.float32))

init = tf.global_variables_initializer()

tf.summary.scalar("loss", cost)

tf.summary.scalar("accuracy", acc)

merged_summary_op = tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(init)
    summary_writer = tf.summary.FileWriter(logs_path,
                                           graph=tf.get_default_graph())