def generalied_box_iou(boxes1, boxes2): """ Generalized IoU from https://giou.stanford.edu/ The boxes should be in [x0, y0, x1, y1] format Returns a [N, M] pairwise matrix, where N = len(boxes1) and M = len(boxes2) """ # degenerate boxes gives inf / nan results # so do an early check assert L.reduce_all(boxes1[:, 2:] >= boxes1[:, :2]) assert L.reduce_all(boxes2[:, 2:] >= boxes2[:, :2]) iou, union = box_iou(boxes1, boxes2) N, M = boxes1.shape[0], boxes2.shape[0] boxes1 = L.unsqueeze(boxes1, axes=[1]) # [N, 1, 4] boxes1 = L.expand(boxes1, [1, M, 1]) # [N, M, 4] boxes2 = L.unsqueeze(boxes2, axes=[0]) # [1, M, 4] boxes2 = L.expand(boxes2, [N, 1, 1]) # [N, M, 4] lt = L.elementwise_min(boxes1[:, :, :2], boxes2[:, :, :2]) # [N, M, 2] rb = L.elementwise_max(boxes1[:, :, 2:], boxes2[:, :, 2:]) # [N, M, 2] wh = L.clip(rb - lt, min=0, max=1e8) # [N, M, 2] area = wh[:, :, 0] * wh[:, :, 1] + 1e-4 # prevent devided by zero return iou - (area - union) / area
def get_face_mask(densepose_map): """ Obtain mask of faces. Args: densepose_map (3D or 4D tensor) """ need_reshape = len(densepose_map.shape) == 4 if need_reshape: bo, t, h, w = densepose_map.shape densepose_map = L.reshape(densepose_map, (-1, h, w)) b, h, w = densepose_map.shape part_map = (densepose_map / 2 + 0.5) * 24 assert L.reduce_all((part_map >= 0)) and L.reduce_all((part_map < 25)) mask = dg.to_variable(np.zeros((b, h, w)).astype('bool')) for j in [23, 24]: mask = L.logical_or( mask, L.logical_and((part_map > j - 0.1), (part_map < j + 0.1))) if need_reshape: mask = L.reshape(mask, (bo, t, h, w)) return P.cast(mask, "float32")
def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished): """ is_finished """ base_1 = layers.cast(source_length, 'float32') + 55.0 base_1 /= 6.0 max_length_penalty = layers.pow(base_1, self.alpha) flat_alive_log_probs = layers.reshape(alive_log_probs, [-1]) lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index]) lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1) finished_in_finished = layers.cast(finished_in_finished, 'bool') lowest_score_of_finished_in_finish += \ ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF) #print lowest_score_of_finished_in_finish bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, lower_bound_alive_scores)) decode_length = source_length + 50 length_cond = layers.less_than(x=step_idx, y=decode_length) return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
def epoch_evaluate(args, model, loader, puncts): """Evaluate in one epoch""" model.eval() total_loss, metric = 0, Metric() for words, feats, arcs, rels in loader(): # ignore the first token of each sentence tmp_words = layers.pad(words[:, 1:], paddings=[0, 0, 1, 0], pad_value=args.pad_index) mask = tmp_words != args.pad_index s_arc, s_rel = model(words, feats) loss = loss_function(s_arc, s_rel, arcs, rels, mask) arc_preds, rel_preds = decode(args, s_arc, s_rel, mask) # ignore all punctuation if not specified if not args.punct: punct_mask = layers.reduce_all( layers.expand(layers.unsqueeze(words, -1), (1, 1, puncts.shape[0])) != layers.expand( layers.reshape(puncts, (1, 1, -1)), (*words.shape, 1)), dim=-1) mask = layers.logical_and(mask, punct_mask) metric(arc_preds, rel_preds, arcs, rels, mask) total_loss += loss.numpy().item() total_loss /= len(loader) return total_loss, metric
def reduce_compare(x, op_str, y): element_wise_result = eval("x " + op_str + " y") if op_str == "!=": return reduce_any(element_wise_result) elif op_str == "is" or op_str == "is not" or op_str == "in" or op_str == "not in": return element_wise_result else: return reduce_all(element_wise_result)
def epoch_evaluate(args, model, loader, punctuation): """Evaluate in one epoch""" model.eval() total_loss, metric = 0, Metric() pad_index = args.pad_index bos_index = args.bos_index eos_index = args.eos_index for batch_index, inputs in enumerate(loader(), start=1): if args.encoding_model.startswith("ernie"): words, connections, deprel = inputs connection_prob, deprel_prob, words = model(words) else: words, feats, connections, deprel = inputs connection_prob, deprel_prob, words = model(words, feats) mask = layers.logical_and( layers.logical_and(words != pad_index, words != bos_index), words != eos_index, ) loss = loss_function(connection_prob, deprel_prob, connections, deprel, mask) connection_predict, deprel_predict = decode(args, connection_prob, deprel_prob, mask) # ignore all punctuation if not specified if not args.punct: punct_mask = layers.reduce_all( layers.expand(layers.unsqueeze(words, -1), (1, 1, punctuation.shape[0])) != layers.expand(layers.reshape(punctuation, (1, 1, -1)), words.shape + [1]), dim=-1) mask = layers.logical_and(mask, punct_mask) metric(connection_predict, deprel_predict, connections, deprel, mask) total_loss += loss.numpy().item() total_loss /= len(loader) return total_loss, metric
def early_finish(alive_log_probs, finished_scores, finished_in_finished): max_length_penalty = np.power(((5. + max_len) / 6.), alpha) # The best possible score of the most likely alive sequence lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty # Now to compute the lowest score of a finished sequence in finished # If the sequence isn't finished, we multiply it's score by 0. since # scores are all -ve, taking the min will give us the score of the lowest # finished item. lowest_score_of_fininshed_in_finished = layers.reduce_min( finished_scores * finished_in_finished, 1) # If none of the sequences have finished, then the min will be 0 and # we have to replace it by -ve INF if it is. The score of any seq in alive # will be much higher than -ve INF and the termination condition will not # be met. lowest_score_of_fininshed_in_finished += ( 1. - layers.reduce_max(finished_in_finished, 1)) * -inf bound_is_met = layers.reduce_all( layers.greater_than(lowest_score_of_fininshed_in_finished, lower_bound_alive_scores)) return bound_is_met
def beam_search(self, src_word, src_pos, src_slf_attn_bias, trg_word, trg_src_attn_bias, bos_id=0, eos_id=1, beam_size=4, max_len=256): def expand_to_beam_size(tensor, beam_size): tensor = layers.reshape(tensor, [tensor.shape[0], 1] + tensor.shape[1:]) tile_dims = [1] * len(tensor.shape) tile_dims[1] = beam_size return layers.expand(tensor, tile_dims) def merge_batch_beams(tensor): return layers.reshape(tensor, [tensor.shape[0] * tensor.shape[1]] + tensor.shape[2:]) def split_batch_beams(tensor): return fluid.layers.reshape(tensor, shape=[-1, beam_size] + list(tensor.shape[1:])) def mask_probs(probs, finished, noend_mask_tensor): # TODO: use where_op finished = layers.cast(finished, dtype=probs.dtype) probs = layers.elementwise_mul(layers.expand( layers.unsqueeze(finished, [2]), [1, 1, self.trg_vocab_size]), noend_mask_tensor, axis=-1) - layers.elementwise_mul( probs, (finished - 1), axis=0) return probs def gather(x, indices, batch_pos): topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2) return layers.gather_nd(x, topk_coordinates) # run encoder enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias) # constant number inf = float(1. * 1e7) batch_size = enc_output.shape[0] max_len = (enc_output.shape[1] + 20) if max_len is None else max_len vocab_size_tensor = layers.fill_constant(shape=[1], dtype="int64", value=self.trg_vocab_size) end_token_tensor = to_variable( np.full([batch_size, beam_size], eos_id, dtype="int64")) noend_array = [-inf] * self.trg_vocab_size noend_array[eos_id] = 0 noend_mask_tensor = to_variable(np.array(noend_array, dtype="float32")) batch_pos = layers.expand( layers.unsqueeze( to_variable(np.arange(0, batch_size, 1, dtype="int64")), [1]), [1, beam_size]) predict_ids = [] parent_ids = [] ### initialize states of beam search ### log_probs = to_variable( np.array([[0.] + [-inf] * (beam_size - 1)] * batch_size, dtype="float32")) finished = to_variable( np.full([batch_size, beam_size], 0, dtype="bool")) ### initialize inputs and states of transformer decoder ### ## init inputs for decoder, shaped `[batch_size*beam_size, ...]` trg_word = layers.fill_constant(shape=[batch_size * beam_size, 1], dtype="int64", value=bos_id) trg_pos = layers.zeros_like(trg_word) trg_src_attn_bias = merge_batch_beams( expand_to_beam_size(trg_src_attn_bias, beam_size)) enc_output = merge_batch_beams( expand_to_beam_size(enc_output, beam_size)) ## init states (caches) for transformer, need to be updated according to selected beam caches = [{ "k": layers.fill_constant( shape=[batch_size * beam_size, self.n_head, 0, self.d_key], dtype=enc_output.dtype, value=0), "v": layers.fill_constant( shape=[batch_size * beam_size, self.n_head, 0, self.d_value], dtype=enc_output.dtype, value=0), } for i in range(self.n_layer)] for i in range(max_len): trg_pos = layers.fill_constant(shape=trg_word.shape, dtype="int64", value=i) caches = map_structure( # can not be reshaped since the 0 size lambda x: x if i == 0 else merge_batch_beams(x), caches) logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias, enc_output, caches) caches = map_structure(split_batch_beams, caches) step_log_probs = split_batch_beams( fluid.layers.log(fluid.layers.softmax(logits))) step_log_probs = mask_probs(step_log_probs, finished, noend_mask_tensor) log_probs = layers.elementwise_add(x=step_log_probs, y=log_probs, axis=0) log_probs = layers.reshape(log_probs, [-1, beam_size * self.trg_vocab_size]) scores = log_probs topk_scores, topk_indices = fluid.layers.topk(input=scores, k=beam_size) beam_indices = fluid.layers.elementwise_floordiv( topk_indices, vocab_size_tensor) token_indices = fluid.layers.elementwise_mod( topk_indices, vocab_size_tensor) # update states caches = map_structure( lambda x: gather(x, beam_indices, batch_pos), caches) log_probs = gather(log_probs, topk_indices, batch_pos) finished = gather(finished, beam_indices, batch_pos) finished = layers.logical_or( finished, layers.equal(token_indices, end_token_tensor)) trg_word = layers.reshape(token_indices, [-1, 1]) predict_ids.append(token_indices) parent_ids.append(beam_indices) if layers.reduce_all(finished).numpy(): break predict_ids = layers.stack(predict_ids, axis=0) parent_ids = layers.stack(parent_ids, axis=0) finished_seq = layers.transpose( layers.gather_tree(predict_ids, parent_ids), [1, 2, 0]) finished_scores = topk_scores return finished_seq, finished_scores
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs): """A modification of paddle.fluid.layers.dynamic_decode(...). Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Tensor indicating finished status contains all True values or the number of decoding step reachs to :attr:`max_step_num`. :code:`decoder.initialize()` would be called once before the decoding loop. If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` would be called once after the decoding loop. Args: decoder(Decoder): An instance of `Decoder`. inits(tuple): Argument passed to `decoder.initialize`. decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len) max_step_num(int): The maximum number of steps. **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. Returns: tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \ outputs and states, both are Tensor or nested structure of Tensor. \ `final_outputs` has the same structure and data types as \ :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \ is the stacked of all decoding steps' outputs, which might be revised \ by :code:`decoder.finalize` . `final_states` is the counterpart \ at last time step of initial states returned by :code:`decoder.initialize` , \ thus has the same structure with it and has tensors with same shapes \ and data types. """ step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1) max_step_num_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=max_step_num - 2) # shape = [batch_size, beam_size, ...] initial_inputs, initial_states, initial_finished = decoder.initialize( inits, decode_vocab) global_inputs, global_states, global_finished = (initial_inputs, initial_states, initial_finished) inputs = initial_inputs states = initial_states # 保存输出结果 outputs_arr_data = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num], dtype=decoder.output_dtype.predicted_ids, value=0) outputs_arr_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) outputs_array = data_structure.ArrayData( decoder.merge_batch_beams(outputs_arr_data), decoder.merge_batch_beams(outputs_arr_pos)) sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") # 按语法解码的相关约束数据结构 grammar_stack_dat = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES], dtype='int64', value=0) grammar_stack_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) grammar_stack = data_structure.StackData( decoder.merge_batch_beams(grammar_stack_dat), decoder.merge_batch_beams(grammar_stack_pos)) ############ 循环解码,直到全部为 finish 状态 ############ # finish 的判断:通过 global_finished/next_finished && max_step_num 判断 cond = layers.logical_not((layers.reduce_all(initial_finished))) while_op = layers.While(cond) with while_op.block(): # step_outputs --> OutputWrapper # next_states --> StateWrapper # next_inputs --> DecoderInputsWrapper step_outputs, next_states, next_inputs = decoder.step( inputs, states, **kwargs) predicted_ids = step_outputs.predicted_ids _save_predict_output(outputs_array, predicted_ids, next_states.finished) pred_gmr_type = decoder.grammar_type(predicted_ids) cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF) cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID) _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs, next_states.finished) _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs, predicted_ids) ##next_sequence_lengths = layers.elementwise_add(sequence_lengths, ## tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype)) _check_finished(decoder, next_inputs, next_states.finished, outputs_array) layers.utils.map_structure(tensor.assign, next_inputs, global_inputs) layers.utils.map_structure(tensor.assign, next_states, global_states) tensor.assign(next_states.finished, global_finished) ##tensor.assign(next_sequence_lengths, sequence_lengths) # 更新循环条件 layers.increment(x=step_cnt, value=1.0, in_place=True) layers.logical_and( layers.logical_not(layers.reduce_all(next_states.finished)), layers.less_equal(step_cnt, max_step_num_tensor), cond) final_outputs = outputs_array.data final_states = global_states final_outputs, final_states = decoder.finalize(final_outputs, global_states, sequence_lengths) return final_outputs, final_states
def _greedy_search(self, src_word, src_pos, src_slf_attn_bias, trg_word, trg_src_attn_bias, bos_id=0, eos_id=1, max_len=256): # run encoder enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias) # constant number batch_size = enc_output.shape[0] max_len = (enc_output.shape[1] + 20) if max_len is None else max_len end_token_tensor = layers.fill_constant(shape=[batch_size, 1], dtype="int64", value=eos_id) predict_ids = [] log_probs = layers.fill_constant(shape=[batch_size, 1], dtype="float32", value=0) trg_word = layers.fill_constant(shape=[batch_size, 1], dtype="int64", value=bos_id) finished = layers.fill_constant(shape=[batch_size, 1], dtype="bool", value=0) ## init states (caches) for transformer caches = [{ "k": layers.fill_constant(shape=[batch_size, self.n_head, 0, self.d_key], dtype=enc_output.dtype, value=0), "v": layers.fill_constant( shape=[batch_size, self.n_head, 0, self.d_value], dtype=enc_output.dtype, value=0), } for i in range(self.n_layer)] for i in range(max_len): trg_pos = layers.fill_constant(shape=trg_word.shape, dtype="int64", value=i) logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias, enc_output, caches) step_log_probs = layers.log(layers.softmax(logits)) log_probs = layers.elementwise_add(x=step_log_probs, y=log_probs, axis=0) scores = log_probs topk_scores, topk_indices = layers.topk(input=scores, k=1) finished = layers.logical_or( finished, layers.equal(topk_indices, end_token_tensor)) trg_word = topk_indices log_probs = topk_scores predict_ids.append(topk_indices) if layers.reduce_all(finished).numpy(): break predict_ids = layers.stack(predict_ids, axis=0) finished_seq = layers.transpose(predict_ids, [1, 2, 0]) finished_scores = topk_scores return finished_seq, finished_scores