def __init__(self, length, depth, max_relative_position, initializer_range, use_one_hot_embeddings=False): super(RelaPosEmbeddingsGenerator, self).__init__() self.depth = depth self.vocab_size = max_relative_position * 2 + 1 self.use_one_hot_embeddings = use_one_hot_embeddings self.embeddings_table = Parameter( initializer(TruncatedNormal(initializer_range), [self.vocab_size, self.depth]), name='embeddings_for_position') self.relative_positions_matrix = RelaPosMatrixGenerator(length=length, max_relative_position=max_relative_position) self.reshape = ops.Reshape() self.one_hot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.shape = ops.Shape() self.gather = ops.GatherV2() # index_select self.matmul = ops.BatchMatMul()
def __init__(self): super(CrossEntropyLoss, self).__init__() self.sm_scalar = ops.ScalarSummary() self.cross_entropy = ops.SoftmaxCrossEntropyWithLogits() self.mean = ops.ReduceMean() self.one_hot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32)
def __init__(self, reduction="mean"): super(CrossEntropyLoss, self).__init__() self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() if reduction == "sum": self.reduction = P.ReduceSum() if reduction == "mean": self.reduction = P.ReduceMean() self.one_hot = P.OneHot() self.one = Tensor(1.0, mstype.float32) self.zero = Tensor(0.0, mstype.float32)
def __init__(self, config): super(BertPretrainingLoss, self).__init__() self.vocab_size = config.vocab_size self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.reduce_sum = ops.ReduceSum() self.reduce_mean = ops.ReduceMean() self.reshape = ops.Reshape() self.last_idx = (-1,) self.neg = ops.Neg() self.cast = ops.Cast()
def gumbel_softmax(logits, temperature, hard, axis=-1, eps=1e-20): uniform_samples = ops.UniformReal()(logits.shape) gumbels = -ops.log(-ops.log(uniform_samples + eps) + eps) # ~Gumbel(0, 1) gumbels = (logits + gumbels) / temperature y_soft = ops.Softmax(axis)(gumbels) if hard: # Straight through index = y_soft.argmax(axis) y_hard = ops.OneHot(axis)(index, y_soft.shape[axis], ops.scalar_to_array(1.0), ops.scalar_to_array(0.0)) ret = ops.stop_gradient(y_hard - y_soft) + y_soft else: # Reparametrization trick. ret = y_soft return ret
def __init__(self, model, config, is_training, dropout_prob=0.0, use_one_hot_embeddings=False): super(BertPoetry, self).__init__(auto_prefix=False) self.num_tokens = 3191 self.poetry = model self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.reduce_sum = ops.ReduceSum() self.reduce_mean = ops.ReduceMean() self.reshape = ops.Reshape() self.neg = ops.Neg() self.cast = ops.Cast() self.last_idx = (-1,) self.log = ops.Log() self.max = ops.ArgMaxWithValue(axis=-1)
def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() self.exp = ops.Exp() self.sum = ops.ReduceSum(keep_dims=True) self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = ops.RealDiv() self.log = ops.Log() self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) self.mul = ops.Mul() self.mul2 = ops.Mul() self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse self.max = ops.ReduceMax(keep_dims=True) self.sub = ops.Sub()
def construct(self, logits): uniform_samples = self.uniform(logits.shape) gumbels = -ops.log(-ops.log(uniform_samples)) # ~Gumbel(0, 1) gumbels = (logits + gumbels) / self.temperature y_soft = self.softmax(gumbels) if self.hard: # Straight through index = y_soft.argmax(self.axis) y_hard = ops.OneHot(self.axis)(index, y_soft.shape[self.axis], self.on_value, self.off_value) ret = ops.stop_gradient(y_hard - y_soft) + y_soft else: # Reparametrization trick. ret = y_soft return ret
def __init__(self, batch_size, temperature=1, world_size=1): super(NT_Xent_Loss, self).__init__() # Parameters. self.LARGE_NUM = 1e9 self.batch_size = batch_size self.temperature = temperature self.world_size = world_size self.N = 2 * self.batch_size * self.world_size # Tail_Loss. self.criterion = CrossEntropyLoss(reduction="mean") self.norm = P.L2Normalize(axis=1) self.one_hot = P.OneHot() self.range = nn.Range(0, self.batch_size) self.one = Tensor(1.0, mstype.float32) self.zero = Tensor(0.0, mstype.float32) self.transpose = P.Transpose() self.matmul = nn.MatMul() # Operations. self.ones = P.Ones() self.zeros = P.Zeros() self.cat1 = P.Concat(axis=1)
def __init__(self, vocab_size, embedding_size, embedding_shape, use_one_hot_embeddings=False, initializer_range=0.02): super(EmbeddingLookup, self).__init__() self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [vocab_size, embedding_size])) self.expand = P.ExpandDims() self.shape_flat = (-1,) self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape)
def __init__(self, embedding_size, embedding_shape, use_relative_positions=False, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [token_type_vocab_size, embedding_size]), name='embedding_table') self.shape_flat = (-1,) self.one_hot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = ops.MatMul() self.reshape = ops.Reshape() self.shape = tuple(embedding_shape) self.layernorm = nn.LayerNorm((embedding_size,)) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = ops.GatherV2() self.use_relative_positions = use_relative_positions self.slice = ops.StridedSlice() self.full_position_embeddings = Parameter(initializer (TruncatedNormal(initializer_range), [max_position_embeddings, embedding_size]), name='full_position_embeddings')
def __init__(self, embedding_size, embedding_shape, use_relative_positions=False, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.token_type_embedding = nn.Embedding( vocab_size=token_type_vocab_size, embedding_size=embedding_size, use_one_hot=use_one_hot_embeddings) self.shape_flat = (-1,) self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() _, seq, _ = self.shape self.full_position_embedding = nn.Embedding( vocab_size=max_position_embeddings, embedding_size=embedding_size, use_one_hot=False) self.layernorm = nn.LayerNorm((embedding_size,), epsilon=1e-5) self.position_ids = Tensor(np.arange(seq).reshape(-1, seq).astype(np.int32)) self.add = P.Add()