Exemplo n.º 1
0
    def construct(self, s_t_hat, encoder_outputs, encoder_feature,
                  enc_padding_mask, coverage):
        b, t_k, n = encoder_outputs.shape

        dec_fea = self.decode_proj(s_t_hat)  # (B, 2 * hidden_dim)
        dec_fea_expand = P.ExpandDims()(dec_fea, 1)
        dec_fea_expand = P.BroadcastTo()(dec_fea_expand, (b, t_k, n))

        att_features = encoder_feature + dec_fea_expand
        if self.is_coverage:
            coverage_input = coverage.view(-1, 1)  # (B * t_k, 1)
            coverage_feature = self.W_c(
                coverage_input)  # (B * t_k, 2 * hidden_dim)
            att_features = att_features + coverage_feature

        e = P.Tanh()(att_features)  # (B * t_k, 2 * hidden_dim)
        scores = self.v(e)  # (B * t_k, 1)
        scores = scores.view(-1, t_k)  # (B, t_k)

        attn_dist_ = P.Softmax(1)(scores) * enc_padding_mask  # (B, t_k)
        normalization_factor = P.ReduceSum(True)(attn_dist_, 1)
        attn_dist = attn_dist_ / normalization_factor

        attn_dist = P.ExpandDims()(attn_dist, 1)  # (B, 1, t_k)
        c_t = P.BatchMatMul(attn_dist, encoder_outputs)  # (B, 1, n)
        c_t = c_t.view(-1, self.hidden_dim * 2)  # (B, 2 * hidden_dim)

        attn_dist = attn_dist.view(-1, t_k)

        if self.is_coverage:
            coverage = coverage.view(-1, t_k)
            coverage = coverage + attn_dist

        return c_t, attn_dist, coverage
Exemplo n.º 2
0
 def construct(self, query, key, value):
     score = self.score_proj(
         ops.tanh(self.key_proj(key) + self.query_proj(query) +
                  self.bias)).squeeze(-1)
     attn = ops.Softmax()(score)
     context = ops.matmul(attn.expand_dims(1), value)
     return context, attn
Exemplo n.º 3
0
class Scalar_mix(nn.Cell):
    """
    Computes a paramterised scalar mixture of N tensor, ```mixture = gamma * sum(s_k * tensor_k)```
    where ``s = softmax(w)``, with ``w`` and ``gamma`` scalar parameters.
    """

    def __init__(self, mixture_size: int, do_layer_norm: bool = False) -> None:
        super(Scalar_mix, self).__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm
        self.scalar_parameters = ParameterTuple([Parameter(Tensor(np.array([0.0]), mindspore.float32)) \
                                                 for _ in range(mixture_size)])
        self.gamma = Parameter(Tensor(np.array([0.0]), mindspore.float32))
        self.sum = P.ReduceSum()
        self.sqrt = P.Sqrt()
        self.cat = P.Concat()
        self.unsqueeze = P.ExpandDims(0)

    def construct(self, tensors, mask):
        """
        Compute a weighted average of the ``tensors``
        Args:
        tensors: The input tensors can be any shape
        with at least two dimensions, but must all be the same shape.
        mask: When ``do_layer_norm=True``, the ``mask`` is required input.
        for example with ``tensors`` of shape``(batch_size, timesteps, dim)``
        and ``mask`` of shape ``(batch_size, timesteps)``.dtype=mindspore.float32
        """
        if len(tensors) != self.mixture_size:
            raise ValueError("{} tensors were passed, but the module was initialized to "
                             "mix {} tensors.".format(len(tensors), self.mixture_size))

        def _do_layer_norm(tensor, broadcast_mask, num_elments_not_masked):
            tensor_masked = tensor * broadcast_mask
            mean = self.sum(tensor_masked) / num_elments_not_masked
            variance = self.sum(((tensor_masked - mean) * broadcast_mask) ** 2) /
                                num_elments_not_masked
            return (tensor - mean) / self.sqrt(variance + 1E-12)

        normed_weights = P.Softmax(dim=0)(self.cat([parameter for parameter \
                                                    in self.scalar_parameters]))
        normed_weights = P.Split(output_num=normed_weights.shape[0])(normed_weights)  # 待验证 torch.split(split=1)

        if not self.do_layer_norm:
            pieces = []
            for weight, tensor in zip(normed_weights, tensors):
                pieces.append(weight * tensor)
            return self.gamma * sum(pieces)
        else:
            # mask_float = mask.float()
            broadcast_mask = self.unsqueeze(mask)
            input_dim = tensors[0].shape[-1]
            num_elments_not_masked = sum(mask) * input_dim

            pieces = []
            for weight, tensor in zip(normed_weights, tensors):
                pieces.append(weight * _do_layer_norm(tensor,
                                                      broadcast_mask, num_elments_not_masked))
            return self.gamma * sum(pieces)
Exemplo n.º 4
0
def rerank(args):
    """rerank function"""
    rerank_feature_file = args.rerank_feature_file
    rerank_result_file = args.rerank_result_file
    encoder_ck_file = args.rerank_encoder_ck_path
    downstream_ck_file = args.rerank_downstream_ck_path
    seed = args.seed
    seq_len = args.seq_len
    batch_size = args.rerank_batch_size

    random.seed(seed)
    np.random.seed(seed)

    t1 = time()

    generator = DataGenerator(feature_file_path=rerank_feature_file,
                              example_file_path=None,
                              batch_size=batch_size,
                              seq_len=seq_len,
                              task_type="reranker")
    gather_dict = defaultdict(lambda: defaultdict(list))

    reranker = Reranker(batch_size=batch_size,
                        encoder_ck_file=encoder_ck_file,
                        downstream_ck_file=downstream_ck_file)

    print("start re-ranking ...")

    for _, batch in tqdm(enumerate(generator)):
        input_ids = Tensor(batch["context_idxs"], mstype.int32)
        attn_mask = Tensor(batch["context_mask"], mstype.int32)
        token_type_ids = Tensor(batch["segment_idxs"], mstype.int32)

        no_answer = reranker(input_ids, attn_mask, token_type_ids)

        no_answer_prob = ops.Softmax()(no_answer).asnumpy()
        no_answer_prob = no_answer_prob[:, 0]

        for i in range(len(batch['ids'])):
            qas_id = batch['ids'][i]
            gather_dict[qas_id][no_answer_prob[i]].append(
                batch['unique_ids'][i])
            gather_dict[qas_id][no_answer_prob[i]].append(batch['path'][i])

    rerank_result = {}
    for qas_id in tqdm(gather_dict, desc="get top1 path from re-rank result"):
        all_paths = gather_dict[qas_id]
        all_paths = sorted(all_paths.items(), key=lambda item: item[0])
        assert qas_id not in rerank_result
        rerank_result[qas_id] = all_paths[0][1]

    with open(rerank_result_file, 'w') as f:
        json.dump(rerank_result, f)

    t2 = time()

    print(f"re-rank cost time: {t2-t1} s")
Exemplo n.º 5
0
 def __init__(self, temperature=1, hard=False, axis=-1):
     super().__init__()
     self.temperature = temperature
     self.hard = hard
     self.axis = axis
     self.uniform = ops.UniformReal()
     self.softmax = ops.Softmax(axis)
     self.on_value = Tensor(1.0, mindspore.float32)
     self.off_value = Tensor(0.0, mindspore.float32)
Exemplo n.º 6
0
def gumbel_softmax(logits, temperature, hard, axis=-1, eps=1e-20):
    uniform_samples = ops.UniformReal()(logits.shape)
    gumbels = -ops.log(-ops.log(uniform_samples + eps) + eps) # ~Gumbel(0, 1)
    gumbels = (logits + gumbels) / temperature
    y_soft = ops.Softmax(axis)(gumbels)

    if hard:
        # Straight through
        index = y_soft.argmax(axis)
        y_hard = ops.OneHot(axis)(index, y_soft.shape[axis], ops.scalar_to_array(1.0), ops.scalar_to_array(0.0))
        ret = ops.stop_gradient(y_hard - y_soft) + y_soft
    else:
        # Reparametrization trick.
        ret = y_soft
    return ret
Exemplo n.º 7
0
 def __init__(self, config, is_training, num_tokens, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertPoetryModel, self).__init__()
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.num_tokens = num_tokens
     idx = np.arange(config.seq_length)
     mask = idx[None, :] <= idx[:, None]
     self.mask = Tensor([mask], mstype.float32)
     self.MLM_Dense = nn.Dense(config.hidden_size, config.hidden_size,\
                             has_bias=True, weight_init=TruncatedNormal(0.02),\
                             activation='gelu').to_float(mstype.float16)
     self.layer_norm = nn.LayerNorm((config.hidden_size,))
     self.matmul = ops.MatMul(transpose_b=True)
     self.biasadd = Parameter(initializer('zero', self.num_tokens), name='MLM_output_biasadd')
     self.softmax = ops.Softmax(axis=-1)
     self.seq_length = config.seq_length
     self.hidden_size = config.hidden_size
     self.cast = ops.Cast()
     self.reshape = ops.Reshape()
     self.batch_matmul = ops.BatchMatMul()
     ones = np.ones(shape=(config.batch_size, config.seq_length, config.seq_length))
     self.lower_triangle_mask = Tensor(np.tril(ones), dtype=mstype.float32)
     self.multiply = ops.Mul()
Exemplo n.º 8
0
    def construct(
        self,
        query: ms.Tensor,
        key: ms.Tensor,
        value: ms.Tensor,
        attn_mask: Optional[ms.Tensor] = None,
    ) -> Tuple[ms.Tensor, ms.Tensor]:
        r"""
        Args:
            query: [batch, num_attention_heads, len_query, dim_query]
            key: [batch, num_attention_heads, len_key, dim_key]
            value: [batch, num_attention_heads, len_value, dim_value]
            attn_mask: [batch, num_attention_heads, len_query, len_key]
        """

        attention = ops.matmul(query, key.transpose(0, 1, 3, 2))
        attention = attention / ops.sqrt(generate_factor(query.shape[-1]))
        if attn_mask is not None:
            attention = attention + attn_mask
        attention = ops.Softmax(axis=-1)(attention)
        attention = self.dropout(attention)
        context = ops.matmul(attention, value)
        return context, attention
Exemplo n.º 9
0
import mindspore as ms
import mindspore.nn as nn
import numpy as np
import mindspore.common.initializer as weight_init
import mindspore.ops as P

from mindspore import Tensor
from mindspore.common.initializer import Normal, Constant

# net = nn.MatMul()
# input_x1 = Tensor(np.ones(shape=[3, 2, 3]), ms.float32)
# input_x2 = Tensor(np.ones(shape=[2, 3, 4]), ms.float32)
# output = net(input_x1, input_x2)
# print(output.shape)

# ------------------------------------------------------------

gate = ms.Parameter(ms.Tensor(np.ones(3), dtype=ms.float64),
                    name="w",
                    requires_grad=True)
gate.set_data(weight_init.initializer(Constant(1 / 3), gate.shape, gate.dtype))
print(gate.dtype)
print("gate is ", gate)
softmax = P.Softmax()
gate_ = softmax(gate)
print(gate_)
Exemplo n.º 10
0
def read(args):
    """reader function"""
    db_file = args.wiki_db_file
    reader_feature_file = args.reader_feature_file
    reader_example_file = args.reader_example_file
    encoder_ck_file = args.reader_encoder_ck_file
    downstream_ck_file = args.reader_downstream_ck_file
    albert_model_path = args.albert_model_path
    reader_result_file = args.reader_result_file
    seed = args.seed
    sp_threshold = args.sp_threshold
    seq_len = args.seq_len
    batch_size = args.reader_batch_size
    para_limit = args.max_para_num
    sent_limit = args.max_sent_num

    random.seed(seed)
    np.random.seed(seed)

    t1 = time()

    doc_db = DocDB(db_file)

    generator = DataGenerator(feature_file_path=reader_feature_file,
                              example_file_path=reader_example_file,
                              batch_size=batch_size,
                              seq_len=seq_len,
                              para_limit=para_limit,
                              sent_limit=sent_limit,
                              task_type="reader")
    example_dict = generator.example_dict
    feature_dict = generator.feature_dict
    answer_dict = defaultdict(lambda: defaultdict(list))
    new_answer_dict = {}
    total_sp_dict = defaultdict(list)
    new_total_sp_dict = defaultdict(list)

    tokenizer = AlbertTokenizer.from_pretrained(albert_model_path)
    new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]']
    tokenizer.add_tokens(new_tokens)

    reader = Reader(batch_size=batch_size,
                    encoder_ck_file=encoder_ck_file,
                    downstream_ck_file=downstream_ck_file)

    print("start reading ...")

    for _, batch in tqdm(enumerate(generator)):
        input_ids = Tensor(batch["context_idxs"], mstype.int32)
        attn_mask = Tensor(batch["context_mask"], mstype.int32)
        token_type_ids = Tensor(batch["segment_idxs"], mstype.int32)
        context_mask = Tensor(batch["context_mask"], mstype.float32)
        square_mask = Tensor(batch["square_mask"], mstype.float32)
        packing_mask = Tensor(batch["query_mapping"], mstype.float32)
        para_start_mapping = Tensor(batch["para_start_mapping"],
                                    mstype.float32)
        sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32)
        unique_ids = batch["unique_ids"]
        sent_names = batch["sent_names"]
        cache_mask = Tensor(
            np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30),
            mstype.float32)

        _, _, q_type, _, sent_logit, y1, y2 = reader(
            input_ids, attn_mask, token_type_ids, context_mask, square_mask,
            packing_mask, cache_mask, para_start_mapping, sent_end_mapping)

        type_prob = ops.Softmax()(q_type).asnumpy()

        answer_dict_ = convert_to_tokens(example_dict, feature_dict,
                                         batch['ids'],
                                         y1.asnumpy().tolist(),
                                         y2.asnumpy().tolist(),
                                         type_prob, tokenizer,
                                         sent_logit.asnumpy(), sent_names,
                                         unique_ids)
        for q_id in answer_dict_:
            answer_dict[q_id] = answer_dict_[q_id]

    for q_id in answer_dict:
        res = answer_dict[q_id]
        answer_text_ = res[0]
        sent_ = res[1]
        sent_names_ = res[2]
        new_answer_dict[q_id] = answer_text_

        predict_support_np = ops.Sigmoid()(Tensor(sent_,
                                                  mstype.float32)).asnumpy()

        for j in range(predict_support_np.shape[0]):
            if j >= len(sent_names_):
                break
            if predict_support_np[j] > sp_threshold:
                total_sp_dict[q_id].append(sent_names_[j])

    for _id in total_sp_dict:
        _sent_names = total_sp_dict[_id]
        for para in _sent_names:
            title = make_wiki_id(para[0], 0)
            para_original_title = doc_db.get_doc_info(title)[-1]
            para[0] = para_original_title
            new_total_sp_dict[_id].append(para)

    prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict}

    with open(reader_result_file, 'w') as f:
        json.dump(prediction, f, indent=4)

    t2 = time()

    print(f"reader cost time: {t2-t1} s")
Exemplo n.º 11
0
def log_softmax(input, axis=-1):
    return ops.log(ops.Softmax(axis)(input))
Exemplo n.º 12
0
def softmax(input, axis=-1):
    return ops.Softmax(axis)(input)