Пример #1
0
 def test_duplicate_ids_not_allowed(self, mock_download_dataset):
     self.write_questions(HotpotQAType.TRAIN,
                          [TRAIN_QUESTION, TRAIN_QUESTION])
     with self.assertRaises(AssertionError):
         dm.HOTPOT_QA(HotpotQAType.TRAIN)
     mock_download_dataset.assert_called_once_with(Collection.HOTPOT_QA,
                                                   ANY)  # noqa: E501
Пример #2
0
 def test_load_in_test_fullwiki_format(self, mock_download_dataset):
     self.write_questions(HotpotQAType.TEST_FULLWIKI,
                          [TEST_FULLWIKI_QUESTION])  # noqa: E501
     df = dm.HOTPOT_QA(HotpotQAType.TEST_FULLWIKI)
     expected_df = pd.DataFrame(
         json.loads("""[
         {
             "id": "5ab5072e5542990594ba9cda",
             "question": "Test question?",
             "answer": null,
             "gold_paragraphs": [],
             "supporting_facts": [],
             "context": [
                 [
                     "The Rolling Stone Album Guide",
                     ["Sent 1.", " Sent 2."]
                 ],
                 [
                     "Fear and Loathing at Rolling Stone",
                     ["Sent 3."]
                 ]
             ],
             "question_type": null,
             "question_level": null
         }
     ]"""))
     pd.testing.assert_frame_equal(df, expected_df)
     mock_download_dataset.assert_called_once_with(Collection.HOTPOT_QA,
                                                   ANY)  # noqa: E501
Пример #3
0
 def test_load_multiple_questions(self, mock_download_dataset):
     similar_question = deepcopy(TRAIN_QUESTION)
     similar_question["_id"] = "aaaabbbbccccdddd!2"
     self.write_questions(HotpotQAType.TRAIN,
                          [TRAIN_QUESTION, similar_question])
     df = dm.HOTPOT_QA(HotpotQAType.TRAIN)
     mock_download_dataset.assert_called_once_with(Collection.HOTPOT_QA,
                                                   ANY)  # noqa: E501
     self.assertEqual(len(df), 2)
Пример #4
0
def main():
    df = dm.HOTPOT_QA(HotpotQAType.DEV_DISTRACTOR)
    print(df)
    print("\n")

    df = df.sample(n=1)
    row = next(df.iterrows())[1]
    gold_paragraphs = row.gold_paragraphs
    print("Question: ", row.question)
    print("Answer: ", row.answer, "\n")
    for i, paragraph in enumerate(gold_paragraphs):
        print("Paragraph {}) {}\n".format(chr(ord('A') + i), paragraph))
Пример #5
0
 def test_load_in_dev_fullwiki_format(self, mock_download_dataset):
     self.write_questions(HotpotQAType.DEV_FULLWIKI,
                          [DEV_FULLWIKI_QUESTION])  # noqa: E501
     df = dm.HOTPOT_QA(HotpotQAType.DEV_FULLWIKI)
     expected_df = pd.DataFrame(
         json.loads("""[
         {
             "id": "5a899013554299515336131a",
             "question": "Some question",
             "answer": "Dallas",
             "gold_paragraphs": [
                 "Sent good 3."
             ],
             "supporting_facts": [
                 ["Limitless (EP)", 0],
                 ["Crown the Empire", 0]
             ],
             "context": [
                 [
                     "The Resistance: Rise of The Runaways",
                     ["Sent 1", " Sent 2.", " Sent 3."]
                 ],
                 [
                     "Reign of Terror (Capture the Crown album)",
                     ["Sent 4."]
                 ],
                 [
                     "Retrograde (Crown the Empire album)",
                     ["Sent 5.", " Sent 6."]
                 ],
                 [
                     "Roots (Sepultura album)",
                     ["Sent 7.", " Sent 8.", " Sent 9."]
                 ],
                 [
                     "Forest Stream",
                     ["Sent 10.", " Sent 11.", " Sent 12.", " Sent 13.",
                     " Sent 14.", " Sent 15."]
                 ],
                 [
                     "The Crown (band)",
                     ["Sent 16."]
                 ],
                 [
                     "The Fallout (Crown the Empire album)",
                     ["Sent 17.", " Sent 18."]
                 ],
                 [
                     "Crown the Empire discography",
                     [" Sent 19."]
                 ],
                 [
                     "Crown the Empire - is missing",
                     ["Sent good 1.", " Sent good 2."]
                 ],
                 [
                     "Limitless (EP)",
                     ["Sent good 3."]
                 ]
             ],
             "question_type": "bridge",
             "question_level": "medium"
         }
     ]"""))
     pd.testing.assert_frame_equal(df, expected_df)
     mock_download_dataset.assert_called_once_with(Collection.HOTPOT_QA,
                                                   ANY)  # noqa: E501
Пример #6
0
 def test_empty_dataset(self, mock_download_dataset):
     self.write_questions(HotpotQAType.TRAIN, [])
     df = dm.HOTPOT_QA(HotpotQAType.TRAIN)
     mock_download_dataset.assert_called_once_with(Collection.HOTPOT_QA,
                                                   ANY)  # noqa: E501
     self.assertEqual(len(df), 0)