def __init__(self, mode='winner_take_all'): self.scorer = ciderEval('vqa_%s_idxs_end' % 'kptrain') self.pred_has_start_end_token = True self.use_end_token = True self.thresh = 9.0 self.verbose = False self.mode = mode self.diversity_scorer = UniqueReward() assert (self.mode in ['winner_take_all', 'kill_all'])
class DiversityReward(object): def __init__(self, mode='winner_take_all'): self.scorer = ciderEval('vqa_%s_idxs_end' % 'kptrain') self.pred_has_start_end_token = True self.use_end_token = True self.thresh = 9.0 self.verbose = False self.mode = mode self.diversity_scorer = UniqueReward() assert (self.mode in ['winner_take_all', 'kill_all']) def set_mode(self, mode): self.mode = mode assert (self.mode in ['winner_take_all', 'kill_all']) def get_reward(self, sampled, scores): """ Winner takes all diversity reward, only winner will be given a reward 1, otherwise 0. :param sampled: sampled questions :param scores: log likelihood of the sentence :return: """ sampled = self.process_sampled(sampled) wrapped_ref, wrapped_res, path_ids = self.wrap_sampled_pairwise( sampled) _, sim = self.scorer.evaluate(wrapped_ref, wrapped_res) # cider similarity diversity, is_gt = self.diversity_scorer.get_reward(sampled) d_rewards = [] for _ids, _scs, _ps in zip(path_ids, scores, sampled): _scs = np.array(_scs) num_cand = len(_scs) _sim = sim[_ids] _d_reward = np.ones(shape=(num_cand, ), dtype=np.float32) _rows, _cols = np.tril_indices(num_cand, k=-1) connect_tab = _sim > self.thresh # too close _edges = [(r, c) for r, c in zip(_rows[connect_tab], _cols[connect_tab])] if _edges: _ccs = find_connected_components(_edges) for _cc in _ccs: _cc_scores = _scs[_cc] _max_idx = _cc[_cc_scores.argmax()] _d_reward[_cc] = 0. if self.mode == 'winner_take_all': _d_reward[_max_idx] = 1. d_rewards.append(_d_reward) if _edges and self.verbose: self.print_questions(_ps, _d_reward, _scs) d_rewards = np.concatenate(d_rewards) d_rewards *= diversity return d_rewards, is_gt def print_questions(self, sampled, rewards, scores): for sm, r, sc in zip(sampled, rewards, scores): sent = _SENT.index_to_question(sm[:-1]) print('%s (%0.3f, %0.3f)' % (sent, r, sc)) print('\n') def process_sampled(self, sampled): new_sampled = [] for ps in sampled: tmp = [] for p in ps: if self.pred_has_start_end_token: _p = p[1:] else: _p = p + [END_TOKEN] if not self.use_end_token: _p = _p[:-1] tmp.append(_p) new_sampled.append(tmp) return new_sampled @staticmethod def wrap_sampled_pairwise(sampled): wrapped_ref = OrderedDict() wrapped_res = [] idx = 0 path_ids = [] for _var_s in sampled: _u_tmp = [] _rows, _cols = np.tril_indices(len(_var_s), k=-1) for _res_id, ref_id in zip(_rows, _cols): _key = str(idx) wrapped_ref[_key] = [serialize_path(_var_s[ref_id])] wrapped_res.append({ 'image_id': _key, 'caption': [serialize_path(_var_s[_res_id])] }) _u_tmp.append(idx) idx += 1 path_ids.append(_u_tmp) return wrapped_ref, wrapped_res, path_ids