def generate_questions( self, events: List[Event], visits: Dict[int, List[str]] ) -> Tuple[List[Question], List[Question], List[Question], List[Question]]: # extractive single_span_questions = [] multi_span_questions = [] unanswerable_questions = [] abstractive_questions = [] self.all_events = events self.event_type = None # per sentence per attribute self.visits = visits # per-sentence action questions for self.event_type in self.EVENT_TYPES: self.relevant_events = self.get_relevant_events() for ith, event in enumerate(self.relevant_events): # actor q = Question( type=QuestionTypes.DIRECT, target="actor", evidence=[event.sentence_nr], event_type=self.event_type, # TODO: WHAT IF COREF ETC answer=self.post_process_actor_answer(event.actor), reasoning=ReasoningTypes.Retrieval if ith == 0 else ReasoningTypes.OrderingEasy, question_data={"n": ith + 1}) self.post_process_question(q) if any(f"sent.actor" in v for v in visits[event.sentence_nr]): single_span_questions.append(q) else: q.answer = None unanswerable_questions.append(q) # attribute questions for attribute in self.ATTRIBUTES: q = Question( type=QuestionTypes.DIRECT, target=attribute, event_type=self.event_type, reasoning=ReasoningTypes.Retrieval if ith == 0 else ReasoningTypes.OrderingEasy, question_data={"n": ith + 1}, ) if self.is_realised(attribute, event): q.answer = self.post_process_attribute_answers( attribute, event.attributes[attribute]) q.evidence = [event.sentence_nr] single_span_questions.append(q) else: q.answer = None q.evidence = [] unanswerable_questions.append(q) self.post_process_question(q) # overall questions # target = actor q = Question( type=QuestionTypes.OVERALL, target='actor', event_type=self.event_type, ) # events = self.get_relevant_events(event_type, story) # sum(s.event_type == event_type for s in story) # [s.sentence_nr for s in story if s.event_type == event_type] q.evidence = [e.sentence_nr for e in self.relevant_events] if len(self.relevant_events) > 1: q.reasoning = ReasoningTypes.MultiRetrieval q.answer = [ self.post_process_actor_answer(s.actor) for s in self.relevant_events ] multi_span_questions.append(q) elif len(self.relevant_events) == 1: q.reasoning = ReasoningTypes.Retrieval q.answer = self.post_process_actor_answer( self.relevant_events[0].actor) single_span_questions.append(q) elif len(self.relevant_events) < 1: q.answer = None unanswerable_questions.append(q) self.post_process_question(q) # target = attribute for attribute in self.ATTRIBUTES: q = Question(type=QuestionTypes.OVERALL, target=attribute, event_type=self.event_type) # def condition(s): # return any(f"sent.attributes.{attribute}" in v for v in visits[s.sentence_nr]) and \ # s.event_type == event_type # events = sum(1 for s in story if condition(s)) visited_events = [ event for event in self.relevant_events if self.is_realised(attribute, event) ] # q.evidence = [e.sentence_nr for s in story if condition(s)] q.evidence = [e.sentence_nr for e in visited_events] answers = [ self.post_process_attribute_answers( attribute, event.attributes[attribute]) for event in visited_events ] if len(visited_events) > 1: q.reasoning = ReasoningTypes.MultiRetrieval q.answer = answers multi_span_questions.append(q) elif len(visited_events) == 1: q.reasoning = ReasoningTypes.Retrieval q.answer = answers[0] single_span_questions.append(q) elif len(visited_events) < 1: q.answer = None unanswerable_questions.append(q) self.post_process_question(q) return (single_span_questions, multi_span_questions, unanswerable_questions, abstractive_questions)
def realise_question(self, q: Question, passage: List[str], ignore_missing_keys=True): self.processor.chooser = RandomChooser() logger.debug(f"Question: {q}") try: # first see if there's a reasoning key template, template_nr = self.question_templates[q.type][q.target][ q.reasoning][q.event_type].random() except KeyError as e: try: # if not, try without reasoning logger.debug(str(e)) logger.warning( f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])} " 'not found, trying without reasoning key....') template, template_nr = self.question_templates[q.type][ q.target][q.event_type].random() except KeyError: # if still not: ¯\_(ツ)_/¯ if ignore_missing_keys: return None else: raise YouIdiotException( f"Question templates are missing the key " f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])}" ) logger.debug(f'Template: {template}') question_words = [] template.reverse() stack = template while stack: logger.debug(f"Current stack is: {stack}") word = stack.pop() logger.debug(word) # option as in () if word.startswith("(") and word.endswith(")"): new_words = self.processor.process_option(word) stack.extend(new_words[::-1]) # context access elif word.startswith("#"): try: new_word = str(q.question_data[word[1:]]) except KeyError: raise NotImplementedError( f"{word} is not in question data!") stack.append(str(new_word)) elif word.startswith("!"): new_words = self.processor.process_function( word, args=q.question_data) stack.extend(new_words[::-1]) else: question_words.append(word) logger.debug(question_words) q.realized = " ".join(" ".join( self.post_process(question_words)).split()) + " ?" answer = self._fix_units(q, passage) assert answer, f"{q}, {passage}" q.answer = answer return q.realized, q.answer