示例#1
0
def embed(X, we):
    we = convert_gradient_to_tensor(we)
    """
    tf.gather(params, indices):
    axis: Defaults to the first non-batch dimension.
    (e.g.)
    >>> a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    >>> tf.gather(a, [1,2])
    <tf.Tensor: id=15, shape=(2, 3), dtype=float32, numpy= array([[4., 5., 6.],
                                                                  [7., 8., 9.]],
                                                                 dtype=float32)>
    [reference]
    https://www.tensorflow.org/api_docs/python/tf/gather
    https://bit.ly/3iG15cu

    shape of we: [n_vocab + n_special + n_ctx, n_embd]
    shape of X: [batch_size * 2(x12 and x13), n_ctx, 2]
    ? shape of e: [batch_size * 2(x12 and x13), n_ctx, 2, n_embd]
    """
    e = tf.gather(we, X)
    """
    ? shape of h: [batch_size * 2(x12 and x13), n_ctx, n_embd]
    """
    h = tf.reduce_sum(e, 2)
    return h
示例#2
0
文件: train.py 项目: zhijieqiu/ICAN
def embed(X, we):
    """
    X : batch * ctx_len
    we : vocab_len * embedding_size
    """
    we = convert_gradient_to_tensor(we)
    e = tf.gather(we, X)
    return e
示例#3
0
def embed(X, we):
    we = convert_gradient_to_tensor(we)
    print("22222we", we)
    e = tf.gather(we, X)
    print("333e", e)
    h = tf.reduce_sum(e, 2)
    print("3333h", h)
    return h
示例#4
0
def embed(X, we):
    """
    For rocstories: - we:[(vocab_size + 3 + 77), 768],
                    - X:[length_of_input_sequences in 1-D tensors of [77, 2]]; (3: n_special).
    e = tf.gather(): - get embeddings of X from we (weight of all embeddings)
                     - So e: would give a [?, 77, 2, 768] where ?: length of input batch sequence for the current /gpu:X
    h = tf.reduce_sum(): returns a [?, 77, 768] (i.e performs sum along axis 2: add pos. embeds to the input embed)
    """
    we = utils.convert_gradient_to_tensor(we)
    e = tf.gather(we, X)
    h = tf.reduce_sum(e, 2)
    return h
示例#5
0
def embed(X, we):
    we = convert_gradient_to_tensor(
        we)  ## we: [n_vocab+n_special+n_ctx, n_embd], X: [-1, n_ctx, 2]
    e = tf.gather(we, X)  ## input에 대한 ebeedding vector [-1, n_ctx, 2, n_embd]
    h = tf.reduce_sum(e, 2)  ## h: [-1, 1, n_embed]
    return h