def gru_cell(input, hidden, w_ih, w_hh, b_ih, b_hh): if b_ih is None: gi = P.MatMul(False, True)(input, w_ih) gh = P.MatMul(False, True)(hidden, w_hh) else: gi = P.MatMul(False, True)(input, w_ih) + b_ih gh = P.MatMul(False, True)(hidden, w_hh) + b_hh i_r, i_i, i_n = P.Split(1, 3)(gi) h_r, h_i, h_n = P.Split(1, 3)(gh) resetgate = P.Sigmoid()(i_r + h_r) inputgate = P.Sigmoid()(i_i + h_i) newgate = P.Tanh()(i_n + resetgate * h_n) hy = newgate + inputgate * (hidden - newgate) return hy
def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, has_bias=False, modulation=True): super(DeformConv2d, self).__init__() self.kernel_size = kernel_size self.padding = padding self.stride = stride self.zero_padding = nn.Pad(((0, 0), (0, 0), (padding, padding), (padding, padding))) self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, pad_mode='valid', padding=0, stride=kernel_size, has_bias=has_bias) self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=self.kernel_size, pad_mode='pad', padding=self.padding, stride=self.stride) self.modulation = modulation if modulation: self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=self.kernel_size, pad_mode='valid', padding=0, stride=self.stride) if kernel_size % 2 == 0: raise ValueError("Only odd number is supported, but current kernel sizeis {}".format(kernel_size)) self.N = kernel_size * kernel_size self.begin = kernel_size // 2 self.sigmoid = ops.Sigmoid() self.dtype = ops.DType() self.perm_list = (0, 2, 3, 1) self.transpose = ops.Transpose() self.floor = ops.Floor() self.half = ops.Split(axis=-1, output_num=2) self.clip_value = ClipByValue() self.expand_dims = ops.ExpandDims() self.shape = ops.Shape() self.cast = ops.Cast() self._get_offset = GetOffsetPosition(self.begin, self.stride) self._get_surround = GetSurroundFeature() self._generate_fm = RegenerateFeatureMap(self.kernel_size)
def lstm_cell(input, hidden, w_ih, w_hh, b_ih, b_hh): hx, cx = hidden if b_ih is None: gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh) else: gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh) + b_ih + b_hh ingate, forgetgate, cellgate, outgate = P.Split(1, 4)(gates) ingate = P.Sigmoid()(ingate) forgetgate = P.Sigmoid()(forgetgate) cellgate = P.Tanh()(cellgate) outgate = P.Sigmoid()(outgate) cy = (forgetgate * cx) + (ingate * cellgate) hy = outgate * P.Tanh()(cy) return hy, cy
def construct(self, inputs): current_input = inputs for layer in self._layers: carry_gate = layer[0](current_input) carry_gate = P.Sigmoid()(carry_gate) transform_gate = layer[1](current_input) transform_gate = self._activation(transform_gate) current_input = carry_gate * transform_gate + (1 - carry_gate) * inputs return current_input
def __init__(self, num_classes=256, log_scale_min=-7.0, reduce=True): super(discretized_mix_logistic_loss, self).__init__() self.num_classes = num_classes self.log_scale_min = log_scale_min self.reduce = reduce self.transpose_op = P.Transpose() self.exp = P.Exp() self.sigmoid = P.Sigmoid() self.softplus = Stable_softplus() self.log = P.Log() self.cast = P.Cast() self.logsoftmax = P.LogSoftmax(-1) self.expand_dims = P.ExpandDims() self.tile = P.Tile() self.maximum = P.Maximum() self.sums = P.ReduceSum() self.lse = log_sum_exp() self.reshape = P.Reshape() self.factor = self.log(Tensor((self.num_classes - 1) / 2, ms.float32))
def construct(self, y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, step): if not self.training and step == 0: h_decoder, c_decoder = s_t_1 h_decoder = h_decoder.view(-1, self.hidden_dim) c_decoder = c_decoder.view(-1, self.hidden_dim) s_t_hat = P.Concat(1)( (h_decoder, c_decoder)) # (B, 2 * hidden_dim) c_t, _, coverage_next = self.attention_network( s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage) coverage = coverage_next y_t_1_embed = self.embedding(y_t_1) x = self.x_content(P.Concat(1)((c_t_1, y_t_1_embed))) lstm_out, s_t = self.lstm(P.ExpandDims()(x, 1), s_t_1) h_decoder, c_decoder = s_t h_decoder = h_decoder.view(-1, self.hidden_dim) c_decoder = c_decoder.view(-1, self.hidden_dim) s_t_hat = P.Concat(1)((h_decoder, c_decoder)) c_t, attn_dist, coverage_next = self.attention_network( s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage) if self.training or step > 0: coverage = coverage_next p_gen = None if self.pointer_gen: p_gen_input = P.Concat(1)( (c_t, s_t_hat, x)) # (B, 2 * 2 * hidden_dim + embed_dim) p_gen = self.p_gen_linear(p_gen_input) p_gen = P.Sigmoid()(p_gen) output = P.Concat(1)( (lstm_out.view(-1, self.hidden_dim), c_t)) # (B, hidden_dim * 3) output = self.out1(output) # (B, hidden_dim) output = self.out2(output) # (B, vocab_size) vocab_dist = P.SoftMax(1)(output) if self.pointer_gen: vocab_dist_ = p_gen * vocab_dist attn_dist_ = (1 - p_gen) * attn_dist if extra_zeros is not None: vocab_dist_ = P.Concat(1)((vocab_dist_, extra_zeros)) # like pytorch scatter_add batch_size, attn_len = enc_batch_extend_vocab.shape batch_num = range_tensor(0, batch_size) batch_num = P.ExpandDims()(batch_num, 1) batch_num = P.Tile()(batch_num, (1, attn_len)) indices = P.Pack(2)((batch_num, enc_batch_extend_vocab)) shape = (batch_size, vocab_dist_.shape[1]) attn_dist_ = P.ScatterNd()(indices, attn_dist_, shape) final_dist = vocab_dist_ + attn_dist_ else: final_dist = vocab_dist return final_dist, s_t, c_t, attn_dist, p_gen, coverage
def read(args): """reader function""" db_file = args.wiki_db_file reader_feature_file = args.reader_feature_file reader_example_file = args.reader_example_file encoder_ck_file = args.reader_encoder_ck_file downstream_ck_file = args.reader_downstream_ck_file albert_model_path = args.albert_model_path reader_result_file = args.reader_result_file seed = args.seed sp_threshold = args.sp_threshold seq_len = args.seq_len batch_size = args.reader_batch_size para_limit = args.max_para_num sent_limit = args.max_sent_num random.seed(seed) np.random.seed(seed) t1 = time() doc_db = DocDB(db_file) generator = DataGenerator(feature_file_path=reader_feature_file, example_file_path=reader_example_file, batch_size=batch_size, seq_len=seq_len, para_limit=para_limit, sent_limit=sent_limit, task_type="reader") example_dict = generator.example_dict feature_dict = generator.feature_dict answer_dict = defaultdict(lambda: defaultdict(list)) new_answer_dict = {} total_sp_dict = defaultdict(list) new_total_sp_dict = defaultdict(list) tokenizer = AlbertTokenizer.from_pretrained(albert_model_path) new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]'] tokenizer.add_tokens(new_tokens) reader = Reader(batch_size=batch_size, encoder_ck_file=encoder_ck_file, downstream_ck_file=downstream_ck_file) print("start reading ...") for _, batch in tqdm(enumerate(generator)): input_ids = Tensor(batch["context_idxs"], mstype.int32) attn_mask = Tensor(batch["context_mask"], mstype.int32) token_type_ids = Tensor(batch["segment_idxs"], mstype.int32) context_mask = Tensor(batch["context_mask"], mstype.float32) square_mask = Tensor(batch["square_mask"], mstype.float32) packing_mask = Tensor(batch["query_mapping"], mstype.float32) para_start_mapping = Tensor(batch["para_start_mapping"], mstype.float32) sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32) unique_ids = batch["unique_ids"] sent_names = batch["sent_names"] cache_mask = Tensor( np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30), mstype.float32) _, _, q_type, _, sent_logit, y1, y2 = reader( input_ids, attn_mask, token_type_ids, context_mask, square_mask, packing_mask, cache_mask, para_start_mapping, sent_end_mapping) type_prob = ops.Softmax()(q_type).asnumpy() answer_dict_ = convert_to_tokens(example_dict, feature_dict, batch['ids'], y1.asnumpy().tolist(), y2.asnumpy().tolist(), type_prob, tokenizer, sent_logit.asnumpy(), sent_names, unique_ids) for q_id in answer_dict_: answer_dict[q_id] = answer_dict_[q_id] for q_id in answer_dict: res = answer_dict[q_id] answer_text_ = res[0] sent_ = res[1] sent_names_ = res[2] new_answer_dict[q_id] = answer_text_ predict_support_np = ops.Sigmoid()(Tensor(sent_, mstype.float32)).asnumpy() for j in range(predict_support_np.shape[0]): if j >= len(sent_names_): break if predict_support_np[j] > sp_threshold: total_sp_dict[q_id].append(sent_names_[j]) for _id in total_sp_dict: _sent_names = total_sp_dict[_id] for para in _sent_names: title = make_wiki_id(para[0], 0) para_original_title = doc_db.get_doc_info(title)[-1] para[0] = para_original_title new_total_sp_dict[_id].append(para) prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict} with open(reader_result_file, 'w') as f: json.dump(prediction, f, indent=4) t2 = time() print(f"reader cost time: {t2-t1} s")
def glu(input, dim=- 1): a, b = ops.Split(dim, 2)(input) return a * ops.Sigmoid()(b)
def __init__(self, dim: int = -1): super().__init__() self.split = ops.Split(dim, 2) self.sigmoid = ops.Sigmoid()
def __init__(self): super().__init__() self.sigmoid = ops.Sigmoid()
def __init__(self, hidden_size): super(Net, self).__init__() self.hidden_size = hidden_size self.fc1 = nn.Dense(2, hidden_size) self.fc2 = nn.Dense(hidden_size, 1) self.sig = ops.Sigmoid()