def custom_evaluation( self, teacher_action: Message, labels: Optional[Tuple[str]], model_response: Message, ) -> None: if ((teacher_action[CONST.SELECTED_SENTENCES][0] == CONST.NO_SELECTED_SENTENCES_TOKEN) or (model_response.is_padding()) or ('text' not in model_response)): # Has NOT selected knowledge or a is batch padding message return resp = model_response['text'] self.metrics.add( 'knowledge_f1_docs', F1Metric.compute(resp, [' '.join(teacher_action[CONST.SELECTED_DOCS])]), ) self.metrics.add('knowledge_f1_max_docs', F1Metric.compute(resp, CONST.SELECTED_DOCS)) self.metrics.add( 'knowledge_f1_sentences', F1Metric.compute( resp, [' '.join(teacher_action[CONST.SELECTED_SENTENCES])]), ) self.metrics.add( 'knowledge_f1_max_sentences', F1Metric.compute(resp, CONST.SELECTED_SENTENCES), )
def custom_evaluation( self, teacher_action: Message, labels: Optional[Tuple[str]], model_response: Message, ) -> None: if model_response.is_padding() or (not model_response.get( 'text', None)): return expected_graph = break_knowledge_graph(labels[0].lower()) predicted_graph = break_knowledge_graph(model_response['text'].lower()) # Encoding the graph edges/mutation operations into ints for readily use of F1Metric expected_graph_enc, predicted_graph_enc = encode_set_elements( expected_graph, predicted_graph) self.metrics.add( 'response_elements_f1', F1Metric.compute( guess=' '.join(predicted_graph_enc), answers=[' '.join(expected_graph_enc)], ), ) # Subject, Relation F1 # Changind "(MUT) < you , in , house >" --into--> "(MUT) < you , in " # This is to check F1 for the predicted subject and relation overlap. ekg_sub_rel = set([e.rsplit(',', 1)[0] for e in expected_graph]) pkg_sub_rel = set([e.rsplit(',', 1)[0] for e in predicted_graph]) ekg_sub_rel_ids, pkg_sub_rel_ids = encode_set_elements( ekg_sub_rel, pkg_sub_rel) self.metrics.add( 'graph_subject_relation_f1', F1Metric.compute(guess=' '.join(pkg_sub_rel_ids), answers=[' '.join(ekg_sub_rel_ids)]), ) # Subject F1 # Changind "(MUT) < you , in " (produced above) --into--> "(MUT) < you " # This is to check F1 for the predicted subject overlap. ekg_sub = set([e.split(',')[0] for e in ekg_sub_rel]) pkg_sub = set([e.split(',')[0] for e in pkg_sub_rel]) ekg_sub_ids, pkg_sub_ids = encode_set_elements(ekg_sub, pkg_sub) self.metrics.add( 'graph_subject_f1', F1Metric.compute(guess=' '.join(pkg_sub_ids), answers=[' '.join(ekg_sub_ids)]), )
def _add_msgs(self, acts, idx=0): """ Add messages from a `parley()` to the current episode of logs. :param acts: list of acts from a `.parley()` call """ msgs = [] for act in acts: # padding examples in the episode[0] if not isinstance(act, Message): act = Message(act) if act.is_padding(): break if not self.keep_all: msg = {f: act[f] for f in self.keep_fields if f in act} else: msg = act msgs.append(msg) if len(msgs) == 0: return self._current_episodes.setdefault(idx, []) self._current_episodes[idx].append(msgs)