def forward(self, mask): """ Args: mask (Tensor): [B, H, W] Returns: pos (Tensor): [B, C, H, W] """ assert mask.dtype == paddle.bool if self.embed_type == 'sine': mask = mask.astype('float32') y_embed = mask.cumsum(1, dtype='float32') x_embed = mask.cumsum(2, dtype='float32') if self.normalize: y_embed = (y_embed + self.offset) / ( y_embed[:, -1:, :] + self.eps) * self.scale x_embed = (x_embed + self.offset) / ( x_embed[:, :, -1:] + self.eps) * self.scale dim_t = 2 * (paddle.arange(self.num_pos_feats) // 2).astype('float32') dim_t = self.temperature**(dim_t / self.num_pos_feats) pos_x = x_embed.unsqueeze(-1) / dim_t pos_y = y_embed.unsqueeze(-1) / dim_t pos_x = paddle.stack( (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), axis=4).flatten(3) pos_y = paddle.stack( (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), axis=4).flatten(3) pos = paddle.concat((pos_y, pos_x), axis=3).transpose([0, 3, 1, 2]) return pos elif self.embed_type == 'learned': h, w = mask.shape[-2:] i = paddle.arange(w) j = paddle.arange(h) x_emb = self.col_embed(i) y_emb = self.row_embed(j) pos = paddle.concat( [ x_emb.unsqueeze(0).repeat(h, 1, 1), y_emb.unsqueeze(1).repeat(1, w, 1), ], axis=-1).transpose([2, 0, 1]).unsqueeze(0).tile(mask.shape[0], 1, 1, 1) return pos else: raise ValueError(f"not supported {self.embed_type}")
def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): super().__init__() self.dim = dim self.window_size = window_size # Wh, Ww self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim**-0.5 # define a parameter table of relative position bias self.relative_position_bias_table = add_parameter( self, paddle.zeros(((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))) # 2*Wh-1 * 2*Ww-1, nH # get pair-wise relative position index for each token inside the window coords_h = paddle.arange(self.window_size[0]) coords_w = paddle.arange(self.window_size[1]) coords = paddle.stack(paddle.meshgrid( [coords_h, coords_w])) # 2, Wh, Ww coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww coords_flatten_1 = coords_flatten.unsqueeze(axis=2) coords_flatten_2 = coords_flatten.unsqueeze(axis=1) relative_coords = coords_flatten_1 - coords_flatten_2 relative_coords = relative_coords.transpose( [1, 2, 0]) # Wh*Ww, Wh*Ww, 2 relative_coords[:, :, 0] += self.window_size[ 0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 self.relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww self.register_buffer("relative_position_index", self.relative_position_index) self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) trunc_normal_(self.relative_position_bias_table) self.softmax = nn.Softmax(axis=-1)
def forward(self, x): b = x.shape[0] h = x.shape[2] w = x.shape[3] gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) gx.stop_gradient = True gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) gy.stop_gradient = True y = paddle.concat([x, gx, gy], axis=1) y = self.conv(y) return y
def node_batch_iter(self, batch_size, shuffle=True): """Node batch iterator Iterate all node by batch. Args: batch_size: The batch size of each batch of nodes. shuffle: Whether shuffle the nodes. Return: Batch iterator """ if self.is_tensor(): if shuffle: perm = paddle.randperm(self.num_nodes) else: perm = paddle.arange(self.num_nodes) else: perm = np.arange(self.num_nodes) if shuffle: np.random.shuffle(perm) start = 0 while start < self.num_nodes: yield perm[start:start + batch_size] start += batch_size
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() #get cmc choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') Ns = paddle.arange(gallery_img_id.shape[0]) + 1 equal_flag_cumsum = paddle.cumsum(equal_flag, axis=1) Precision_at_k = (paddle.mean(equal_flag_cumsum, axis=0) / Ns).numpy() for k in self.topk: metric_dict["precision@{}".format(k)] = Precision_at_k[k - 1] return metric_dict
def forward(self, query_input_ids, pos_title_input_ids, neg_title_input_ids, is_prediction=False, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, pos_title_token_type_ids=None, pos_title_position_ids=None, pos_title_attention_mask=None, neg_title_token_type_ids=None, neg_title_position_ids=None, neg_title_attention_mask=None): query_cls_embedding = self.get_pooled_embedding( query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) pos_title_cls_embedding = self.get_pooled_embedding( pos_title_input_ids, pos_title_token_type_ids, pos_title_position_ids, pos_title_attention_mask) neg_title_cls_embedding = self.get_pooled_embedding( neg_title_input_ids, neg_title_token_type_ids, neg_title_position_ids, neg_title_attention_mask) all_title_cls_embedding = paddle.concat( x=[pos_title_cls_embedding, neg_title_cls_embedding], axis=0) if is_prediction: logits = paddle.dot(query_cls_embedding, pos_title_cls_embedding) outputs = { "probs": logits, "q_rep": query_cls_embedding, "p_rep": pos_title_cls_embedding } return outputs if self.use_cross_batch: tensor_list = [] paddle.distributed.all_gather(tensor_list, all_title_cls_embedding) all_title_cls_embedding = paddle.concat(x=tensor_list, axis=0) # multiply logits = paddle.matmul(query_cls_embedding, all_title_cls_embedding, transpose_y=True) batch_size = query_cls_embedding.shape[0] labels = paddle.arange(batch_size * self.rank * 2, batch_size * (self.rank * 2 + 1), dtype='int64') labels = paddle.reshape(labels, shape=[-1, 1]) accuracy = paddle.metric.accuracy(input=logits, label=labels) loss = F.cross_entropy(input=logits, label=labels) outputs = {"loss": loss, "accuracy": accuracy} return outputs
def TopPProcess(probs, top_p, min_tokens_to_keep): sorted_probs = paddle.sort(probs, descending=True) sorted_indices = paddle.argsort(probs, descending=True) cumulative_probs = paddle.cumsum(sorted_probs, axis=-1) # Remove tokens with cumulative probs above the top_p, But keep at # least min_tokens_to_keep tokens sorted_indices_to_remove = cumulative_probs > top_p if min_tokens_to_keep > 1: # Set 'min_tokens_to_keep - 1' because the first token is kept sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0 # Keep the first token sorted_indices_to_remove = paddle.cast(sorted_indices_to_remove, dtype='int64') sorted_indices_to_remove[:, 1:] = ( sorted_indices_to_remove[:, :-1].clone()) sorted_indices_to_remove[:, 0] = 0 # Scatter sorted tensors to original indexing sorted_indices = sorted_indices + paddle.arange( probs.shape[0]).unsqueeze(-1) * probs.shape[-1] condition = paddle.scatter(sorted_indices_to_remove.flatten(), sorted_indices.flatten(), sorted_indices_to_remove.flatten()) condition = paddle.cast(condition, 'bool').reshape(probs.shape) probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs) return probs
def gaussian(M: int, std: float, sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute a Gaussian window. The Gaussian widows has a Gaussian shape defined by the standard deviation(std). Parameters: M(int): window size. std(float): the window-specific parameter. sym(bool):whether to return symmetric window. The default value is True dtype(str): the datatype of returned tensor. Returns: Tensor: the window tensor Notes: This function is consistent with scipy.signal.windows.gaussian(). """ if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 sig2 = 2 * std * std w = paddle.exp(-n**2 / sig2) return _truncate(w, needs_trunc)
def exponential(M: int, center=None, tau=1., sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute an exponential (or Poisson) window. Parameters: M(int): window size. tau(float): the window-specific parameter. sym(bool):whether to return symmetric window. The default value is True dtype(str): the datatype of returned tensor. Returns: Tensor: the window tensor Notes: This function is consistent with scipy.signal.windows.exponential(). """ if sym and center is not None: raise ValueError("If sym==True, center must be None.") if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) if center is None: center = (M - 1) / 2 n = paddle.arange(0, M, dtype=dtype) w = paddle.exp(-paddle.abs(n - center) / tau) return _truncate(w, needs_trunc)
def test_out(self): with fluid.program_guard(fluid.Program()): data = paddle.arange(0, 5, 1) place = fluid.CPUPlace() exe = fluid.Executor(place) result, = exe.run(fetch_list=[data]) expected_data = np.arange(0, 5, 1).astype(np.float32) self.assertEqual((result == expected_data).all(), True) with fluid.program_guard(fluid.Program()): data = paddle.arange(0.0, 5.0, 1.0, 'int32') place = fluid.CPUPlace() exe = fluid.Executor(place) result, = exe.run(fetch_list=[data]) expected_data = np.arange(0, 5, 1).astype(np.int32) self.assertEqual((result == expected_data).all(), True)
def forward(self, src_word): src_max_len = paddle.shape(src_word)[-1] src_slf_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 trg_src_attn_bias = src_slf_attn_bias src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) # Run encoder src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout(src_emb, p=self.dropout, training=False) if self.dropout else src_emb enc_output = self.transformer.encoder(enc_input, src_slf_attn_bias) # Init states (caches) for transformer, need to be updated according to selected beam incremental_cache, static_cache = self.transformer.decoder.gen_cache( enc_output, do_zip=True) static_cache, enc_output, trg_src_attn_bias = TransformerBeamSearchDecoder.tile_beam_merge_with_batch( (static_cache, enc_output, trg_src_attn_bias), self.beam_size) rs, _ = nn.decode.dynamic_decode(decoder=self.decode, inits=incremental_cache, max_step_num=self.max_out_len, memory=enc_output, trg_src_attn_bias=trg_src_attn_bias, static_cache=static_cache, is_test=True) return rs
def prepare_inputs_for_generation(self, decoder_input_ids, attention_mask=None, encoder_output=None, use_cache=True, cache=None, **kwargs): if encoder_output is not None: expand_size = int(decoder_input_ids.shape[0] / encoder_output.shape[0]) if expand_size > 1: index = paddle.tile( paddle.arange(encoder_output.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) encoder_output = paddle.index_select(encoder_output, index) if use_cache and cache is None: if encoder_output is None: raise ValueError( "Encoder output can not be none if `use_cache` is True") cache = self.decoder.decoder.gen_cache(memory=encoder_output) if cache is not None: decoder_input_ids = decoder_input_ids[:, -1:] return { "input_ids": None, # during prediction, Encoder_output is provided, do not need input_ids. "decoder_input_ids": decoder_input_ids, "encoder_output": encoder_output, "attention_mask": attention_mask, "use_cache": use_cache, "cache": cache }
def scatter_paddle(self, refined_seg_logits, point_indices, point_logits): """ paddle version scatter : equal to pytorch version scatter(-1,point_indices,point_logits). Args: refined_seg_logits(Tensor): shape=[batch_size, channels, height * width] point_indices(Tensor): shape=[batch_size, channels, height * width] point_logits(Tensor): shape[batch_size, channels, height * width] Returns: scattered refined_seg_logits(Tensor). """ original_shape = paddle.shape( refined_seg_logits) # [batch_size, channels, height * width] new_refined_seg_logits = refined_seg_logits.flatten(0, 1) # [N*C,H*W] offsets = (paddle.arange(paddle.shape(new_refined_seg_logits)[0]) * paddle.shape(new_refined_seg_logits)[1]).unsqueeze( -1) # [N*C,1] point_indices = point_indices.flatten(0, 1) # [N*C,H*W] new_point_indices = (point_indices + offsets).flatten() point_logits = point_logits.flatten() # [N*C*H*W] refined_seg_logits = paddle.scatter(refined_seg_logits.flatten(), new_point_indices, point_logits, overwrite=True) return refined_seg_logits.reshape(shape=original_shape)
def prepare_inputs_for_generation(self, decoder_input_ids, attention_mask=None, encoder_output=None, use_cache=True, cache=None, **kwargs): """ Prepare inputs for decoder to generate sentences. Return: dict: A dictionary containing necessary inputs for generating next token. """ if encoder_output is not None: expand_size = int(decoder_input_ids.shape[0] / encoder_output.shape[0]) if expand_size > 1: index = paddle.tile( paddle.arange(encoder_output.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) encoder_output = paddle.index_select(encoder_output, index) if cache is not None: decoder_input_ids = decoder_input_ids[:, -1:] return { "input_ids": None, # during prediction, Encoder_output is provided, do not need input_ids. "decoder_input_ids": decoder_input_ids, "encoder_output": encoder_output, "attention_mask": attention_mask, "use_cache": use_cache, "cache": cache }
def gen_bias(encoder_inputs, decoder_inputs, step): decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] encoder_bsz, encoder_seqlen = encoder_inputs.shape[:2] attn_bias = paddle.reshape( paddle.arange(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) decoder_bias = paddle.cast( (paddle.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), 'float32') #[1, decoderlen, decoderlen] encoder_bias = paddle.unsqueeze( paddle.cast(paddle.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] encoder_bias = paddle.expand(encoder_bias, [encoder_bsz, decoder_seqlen, encoder_seqlen ]) #[bsz,decoderlen, encoderlen] decoder_bias = paddle.expand(decoder_bias, [decoder_bsz, decoder_seqlen, decoder_seqlen ]) #[bsz, decoderlen, decoderlen] if step > 0: bias = paddle.concat([ encoder_bias, paddle.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias ], -1) else: bias = paddle.concat([encoder_bias, decoder_bias], -1) return bias
def triang(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute a triangular window. Parameters: M(int): window size. sym(bool):whether to return symmetric window. The default value is True dtype(str): the datatype of returned tensor. Returns: Tensor: the window tensor Notes: This function is consistent with scipy.signal.windows.triang(). """ if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype) if M % 2 == 0: w = (2 * n - 1.0) / M w = paddle.concat([w, w[::-1]]) else: w = 2 * n / (M + 1.0) w = paddle.concat([w, w[-2::-1]]) return _truncate(w, needs_trunc)
def forward(self, input_ids_shape, past_key_values_length=0): """`input_ids_shape` is expected to be [bsz x seqlen].""" bsz, seq_len = input_ids_shape[:2] positions = paddle.arange(past_key_values_length, past_key_values_length + seq_len, dtype="int64") return super().forward(positions + self.offset)
def test_median_exception(self): paddle.disable_static() x = [1, 2, 3, 4] self.assertRaises(TypeError, paddle.median, x) x = paddle.arange(12).reshape([3, 4]) self.assertRaises(ValueError, paddle.median, x, 1.0) self.assertRaises(ValueError, paddle.median, x, 2)
def generate_relative_positions_embeddings(self, length, depth, max_relative_position=127): vocab_size = max_relative_position * 2 + 1 range_vec = paddle.arange(length) range_mat = paddle.tile(range_vec, repeat_times=[length]).reshape( (length, length)) distance_mat = range_mat - paddle.t(range_mat) distance_mat_clipped = paddle.clip(distance_mat.astype('float32'), -max_relative_position, max_relative_position) final_mat = distance_mat_clipped + max_relative_position embeddings_table = np.zeros([vocab_size, depth]) for pos in range(vocab_size): for i in range(depth // 2): embeddings_table[pos, 2 * i] = np.sin( pos / np.power(10000, 2 * i / depth)) embeddings_table[pos, 2 * i + 1] = np.cos( pos / np.power(10000, 2 * i / depth)) embeddings_table_tensor = paddle.to_tensor(embeddings_table, dtype='float32') flat_relative_positions_matrix = final_mat.reshape((-1, )) one_hot_relative_positions_matrix = paddle.nn.functional.one_hot( flat_relative_positions_matrix.astype('int64'), num_classes=vocab_size) embeddings = paddle.matmul(one_hot_relative_positions_matrix, embeddings_table_tensor) my_shape = final_mat.shape my_shape.append(depth) embeddings = embeddings.reshape(my_shape) return embeddings
def paddle2D_scatter_add(x_tensor, index_tensor, update_tensor, dim=0): dim0, dim1 = update_tensor.shape update_tensor = paddle.flatten(update_tensor, start_axis=0, stop_axis=1) index_tensor = paddle.reshape(index_tensor, [-1, 1]) if dim == 0: index_tensor = paddle.concat( x=[index_tensor, (paddle.arange(dim1 * dim0) % dim0).unsqueeze(1)], axis=1) elif dim == 1: index_tensor = paddle.concat(x=[ (paddle.arange(dim1 * dim0) // dim1).unsqueeze(1), index_tensor ], axis=1) output_tensor = paddle.scatter_nd_add(x_tensor, index_tensor, update_tensor) return output_tensor
def expand_inputs_for_generation(input_ids, expand_size, attention_mask=None, **model_kwargs): index = paddle.tile( paddle.arange(input_ids.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) input_ids = paddle.index_select(input_ids, index) if attention_mask is not None: model_kwargs["attention_mask"] = paddle.index_select( attention_mask, index) if "token_type_ids" in model_kwargs: token_type_ids = model_kwargs["token_type_ids"] model_kwargs["token_type_ids"] = paddle.index_select( token_type_ids, index) if "position_ids" in model_kwargs: position_ids = model_kwargs["position_ids"] model_kwargs["position_ids"] = paddle.index_select( position_ids, index) return input_ids, model_kwargs
def __init__(self, channels, scale): super(AntiAliasInterpolation2d, self).__init__() sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 self.ka = kernel_size // 2 self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] # The gaussian kernel is the product of the # gaussian function of each dimension. kernel = 1 meshgrids = paddle.meshgrid( [paddle.arange(size, dtype='float32') for size in kernel_size]) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= paddle.exp(-(mgrid - mean)**2 / (2 * std**2 + 1e-9)) # Make sure sum of values in gaussian kernel equals 1. kernel = kernel / paddle.sum(kernel) # Reshape to depthwise convolutional weight kernel = kernel.reshape([1, 1, *kernel.shape]) kernel = paddle.tile(kernel, [channels, *[1] * (kernel.dim() - 1)]) self.register_buffer('weight', kernel) self.groups = channels self.scale = scale
def __call__(self, x, index): if self.dim < 0: self.dim += len(x.shape) x_range = list(range(len(x.shape))) x_range[0] = self.dim x_range[self.dim] = 0 x_swaped = paddle.transpose(x, perm=x_range) index_range = list(range(len(index.shape))) index_range[0] = self.dim index_range[self.dim] = 0 index_swaped = paddle.transpose(index, perm=index_range) dtype = index.dtype x_shape = paddle.shape(x_swaped) index_shape = paddle.shape(index_swaped) prod = paddle.cast(paddle.prod(x_shape), dtype=dtype) / x_shape[0] x_swaped_flattend = paddle.flatten(x_swaped) index_swaped_flattend = paddle.flatten(index_swaped) index_swaped_flattend *= prod bias = paddle.arange(start=0, end=prod, dtype=dtype) bias = paddle.reshape(bias, x_shape[1:]) bias = paddle.crop(bias, index_shape[1:]) bias = paddle.flatten(bias) bias = paddle.tile(bias, [index_shape[0]]) index_swaped_flattend += bias gathered = paddle.index_select(x_swaped_flattend, index_swaped_flattend) gathered = paddle.reshape(gathered, index_swaped.shape) out = paddle.transpose(gathered, perm=x_range) return out
def __init__( self, vocab_size, hidden_size, hidden_dropout_prob, max_position_embeddings, type_vocab_size, layer_norm_eps, pad_token_id, ): super(FNetEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size, padding_idx=pad_token_id) self.position_embeddings = nn.Embedding(max_position_embeddings, hidden_size) self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size) self.layer_norm = nn.LayerNorm(hidden_size, epsilon=layer_norm_eps) # NOTE: This is the project layer and will be needed. The original code allows for different embedding and different model dimensions. self.projection = nn.Linear(hidden_size, hidden_size) self.dropout = nn.Dropout(hidden_dropout_prob) # position_ids (1, len position emb) is contiguous in memory and exported when serialized self.register_buffer( "position_ids", paddle.arange(max_position_embeddings).expand((1, -1)))
def __init__(self, config): super(LayoutXLMEmbeddings, self).__init__() self.word_embeddings = nn.Embedding( config["vocab_size"], config["hidden_size"], padding_idx=0) self.position_embeddings = nn.Embedding( config["max_position_embeddings"], config["hidden_size"]) # gry add for layoutxlm self.x_position_embeddings = nn.Embedding( config["max_2d_position_embeddings"], config["coordinate_size"]) self.y_position_embeddings = nn.Embedding( config["max_2d_position_embeddings"], config["coordinate_size"]) self.h_position_embeddings = nn.Embedding( config["max_2d_position_embeddings"], config["coordinate_size"]) self.w_position_embeddings = nn.Embedding( config["max_2d_position_embeddings"], config["coordinate_size"]) # end of gry add for layoutxlm self.token_type_embeddings = nn.Embedding(config["type_vocab_size"], config["hidden_size"]) self.LayerNorm = nn.LayerNorm( config["hidden_size"], epsilon=config["layer_norm_eps"]) self.dropout = nn.Dropout(config["hidden_dropout_prob"]) self.register_buffer( "position_ids", paddle.arange(config["max_position_embeddings"]).expand((1, -1)))
def forward(self, input_ids, position_ids=None, attention_mask=None, use_cache=False, cache=None): self.checkpoints = [] if position_ids is None: past_length = 0 if cache is not None: past_length = paddle.shape(cache[0].k)[-2] position_ids = paddle.arange(past_length, paddle.shape(input_ids)[-1] + past_length, dtype='int64') position_ids = position_ids.unsqueeze(0) # .expand_as(input_ids) position_ids = paddle.fluid.layers.expand_as( position_ids, input_ids) embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids) encoder_outputs = self.decoder(embedding_output, memory=None, tgt_mask=None, use_cache=use_cache, cache=cache) self.checkpoints.extend(self.decoder.checkpoints) return encoder_outputs
def forward(self, src_word): src_max_len = paddle.shape(src_word)[-1] src_slf_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) # Run encoder src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout(src_emb, p=self.dropout, training=False) if self.dropout else src_emb enc_output = self.transformer.encoder(enc_input, src_slf_attn_bias) if self.use_fp16_decoding: enc_output = paddle.cast(enc_output, dtype="float16") mem_seq_lens = paddle.sum(paddle.cast(src_word != self.bos_id, dtype="int32"), axis=1) ids = self.decoding(enc_output, mem_seq_lens) return ids
def _compute_locatioins_by_level(self, fpn_stride, feature): shape_fm = feature.shape h, w = shape_fm[2], shape_fm[3] shift_x = paddle.arange(0, w * fpn_stride, fpn_stride) shift_y = paddle.arange(0, h * fpn_stride, fpn_stride) shift_x = paddle.unsqueeze(shift_x, axis=0) shift_y = paddle.unsqueeze(shift_y, axis=1) shift_x = paddle.expand_as(shift_x, feature[0, 0, :, :]) shift_y = paddle.expand_as(shift_y, feature[0, 0, :, :]) shift_x.stop_gradient = True shift_y.stop_gradient = True shift_x = paddle.reshape(shift_x, shape=[-1]) shift_y = paddle.reshape(shift_y, shape=[-1]) location = paddle.stack([shift_x, shift_y], axis=-1) + fpn_stride / 2 location.stop_gradient = True return location
def _shard_edges_by_dst(self, edges, edge_feat): """Shard Edges by dst Args: edges: list of (u, v) tuples, 2D numpy.ndarry or 2D paddle.Tensor edge_feat (optional): a dict of numpy array as edge features (should have consistent order with edges) Returns: Return a tuple (shard_edges, shard_edge_feat) as the shard results. """ shard_flag = edges[:, 1] mask = (shard_flag % dist.get_world_size()) == dist.get_rank() if type(mask) == paddle.Tensor: eid = paddle.masked_select(paddle.arange(edges.shape[0]), mask) shard_edges = paddle.gather(edges, eid) shard_edge_feat = {} for key, value in edge_feat.items(): shard_edge_feat[key] = paddle.gather(value, eid) else: eid = np.arange(edges.shape[0])[mask] shard_edges = edges[eid] shard_edge_feat = {} for key, value in edge_feat.items(): shard_edge_feat[key] = value[eid] return shard_edges, shard_edge_feat
def loss(self, embeds): """ Computes the softmax loss according the section 2.1 of GE2E. :param embeds: the embeddings as a tensor of shape (speakers_per_batch, utterances_per_speaker, embedding_size) :return: the loss and the EER for this batch of embeddings. """ speakers_per_batch, utterances_per_speaker = embeds.shape[:2] # Loss sim_matrix, *_ = self.similarity_matrix(embeds) sim_matrix = sim_matrix.reshape( [speakers_per_batch * utterances_per_speaker, speakers_per_batch]) target = paddle.arange(0, speakers_per_batch, dtype="int64").unsqueeze(-1) target = paddle.expand(target, [speakers_per_batch, utterances_per_speaker]) target = paddle.reshape(target, [-1]) loss = nn.CrossEntropyLoss()(sim_matrix, target) # EER (not backpropagated) with paddle.no_grad(): ground_truth = target.numpy() inv_argmax = lambda i: np.eye( 1, speakers_per_batch, i, dtype=np.int)[0] labels = np.array([inv_argmax(i) for i in ground_truth]) preds = sim_matrix.numpy() # Snippet from https://yangcha.github.io/EER-ROC/ fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten()) eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) return loss, eer