def test_scoring_with_properties_filter(self):
        json_in = {
          "witnesses" : [
            {
              "id" : "A",
              "tokens" : [
                  { "t" : "filler1" },
                  { "t" : "token" },
              ]
            },
            {
              "id" : "B",
              "tokens" : [
                  { "t" : "token", "rend" : "b" },
                  { "t" : "filler2" },
              ]
            }
          ]
        }

        expected_output = """+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate_pretokenized_json(json_in)
        self.assertEqual(expected_output, str(alignment_table))

        expected_output = """+---+---------+---------+
| A | filler1 | token   |
| B | token   | filler2 |
+---+---------+---------+"""
        alignment_table = collate_pretokenized_json(json_in, properties_filter=self.match_properties)
        self.assertEqual(expected_output, str(alignment_table))
 def testJSONOutput_empty_cells_in_output(self):
     json_in = {
         "witnesses": [{
             "id":
             "A",
             "tokens": [{
                 "t": "A",
                 "ref": 123
             }, {
                 "t": "black",
                 "adj": True
             }, {
                 "t": "cat",
                 "id": "xyz"
             }]
         }, {
             "id": "B",
             "tokens": [{
                 "t": "A"
             }, {
                 "t": "kitten.",
                 "n": "cat"
             }]
         }]
     }
     expected_json = '{"table": [[[{"ref": 123, "t": "A"}], [{"adj": true, "t": "black"}], [{"id": "xyz", "t": "cat"}]], [[{"t": "A"}], [null], [{"n": "cat", "t": "kitten."}]]], "witnesses": ["A", "B"]}'
     json_out = collate_pretokenized_json(json_in, output="json")
     self.assertEqual(expected_json, json_out)
 def testSegmentationPretokenizedJSON(self):
     json_in = {
         "witnesses": [{
             "id":
             "A",
             "tokens": [{
                 "t": "A",
                 "ref": 123
             }, {
                 "t": "black",
                 "adj": True
             }, {
                 "t": "cat",
                 "id": "xyz"
             }]
         }, {
             "id":
             "B",
             "tokens": [{
                 "t": "A"
             }, {
                 "t": "white",
                 "adj": True
             }, {
                 "t": "stripy",
                 "adj": True
             }, {
                 "t": "kitten.",
                 "n": "cat"
             }]
         }]
     }
     bad_func = lambda x: collate_pretokenized_json(x, segmentation=True)
     self.assertRaises(UnsupportedError, bad_func, json_in)
 def testPretokenizedWitness(self):
     pretokenized_witness = {
         "witnesses": [
             {
                 "id": "A",
                 "tokens": [
                     {"t": "A", "ref": 123},
                     {"t": "black", "adj": True},
                     {"t": "cat", "id": "xyz"},
                     {"t": "bird", "id": "abc"}
                 ]
             },
             {
                 "id": "B",
                 "tokens": [
                     {"t": "A"},
                     {"t": "white", "adj": True},
                     {"t": "mousedog bird", "adj": False}
                 ]
             }
         ]
     }
     result = collate_pretokenized_json(pretokenized_witness)
     self.assertEqual(len(result.rows[0].to_list()), 4)
     self.assertEqual(len(result.rows[1].to_list()), 4)
     # The second witness should have a token that reads 'mousedog bird'.
     self.assertIn("mousedog bird", result.rows[1].to_list())
示例#5
0
    def testHTMLOutputVerticalLayoutPretokenizedJSON(self):
        json_in = {
      "witnesses" : [
        {
          "id" : "A",
          "tokens" : [
              { "t" : "A", "ref" : 123 },
              { "t" : "black" , "adj" : True },
              { "t" : "cat", "id" : "xyz" }
          ]
        },
        {
          "id" : "B",
          "tokens" : [
              { "t" : "A" },
              { "t" : "white" , "adj" : True },
              { "t" : "kitten.", "n" : "cat" }
          ]
        }
      ]
    }
        expected_output = """\
+-------+---------+
|   A   |    B    |
+-------+---------+
|   A   |    A    |
+-------+---------+
| black |  white  |
+-------+---------+
|  cat  | kitten. |
+-------+---------+"""
        plain_text_output = str(collate_pretokenized_json(json_in, layout="vertical"))
        self.assertEquals(expected_output, plain_text_output)
示例#6
0
    def testHTMLOutputPretokenizedJSON(self):
        json_in = {
      "witnesses" : [
        {
          "id" : "A",
          "tokens" : [
              { "t" : "A", "ref" : 123 },
              { "t" : "black" , "adj" : True },
              { "t" : "cat", "id" : "xyz" }
          ]
        },
        {
          "id" : "B",
          "tokens" : [
              { "t" : "A" },
              { "t" : "white" , "adj" : True },
              { "t" : "kitten.", "n" : "cat" }
          ]
        }
      ]
    }
        expected_plain_table = """\
+---+---+-------+---------+
| A | A | black | cat     |
| B | A | white | kitten. |
+---+---+-------+---------+"""
        plain_table = str(collate_pretokenized_json(json_in, output="table"))
        self.assertEqual(expected_plain_table, plain_table)
示例#7
0
    def testJSONOutputPretokenizedJSON(self):
        json_in = {
      "witnesses" : [
        {
          "id" : "A",
          "tokens" : [
              { "t" : "A", "ref" : 123 },
              { "t" : "black" , "adj" : True },
              { "t" : "cat", "id" : "xyz" }
          ]
        },
        {
          "id" : "B",
          "tokens" : [
              { "t" : "A" },
              { "t" : "white" , "adj" : True },
              { "t" : "kitten.", "n" : "cat" }
          ]
        }
      ]
    }

        expected_json = '{"table": [[[{"ref": 123, "t": "A"}], [{"adj": true, "t": "black"}], [{"id": "xyz", "t": "cat"}]], [[{"t": "A"}], [{"adj": true, "t": "white"}], [{"n": "cat", "t": "kitten."}]]], "witnesses": ["A", "B"]}'
        json_out = collate_pretokenized_json(json_in, output="json")
        self.assertEqual(expected_json, json_out)
 def test_near_matching_segmented(self):
     result = collate_pretokenized_json(self.json_in,
                                        near_match=True,
                                        segmentation=True)
     self.assertEquals([
         "I bought", "this glass, because it matches those dinner plates."
     ], result.rows[0].to_list())
     self.assertEquals(["I bought", "those glasses."],
                       result.rows[1].to_list())
 def test_near_matching(self):
     result = collate_pretokenized_json(self.json_in, near_match=True)
     self.assertEquals([
         "I", "bought", "this", "glass", ",", "because", "it", "matches",
         "those", "dinner", "plates", "."
     ], result.rows[0].to_list())
     self.assertEquals([
         "I", "bought", "those", "glasses", None, None, None, None, None,
         None, None, "."
     ], result.rows[1].to_list())
    def testHTMLOutputVerticalLayoutPretokenizedJSON(self):
        json_in = {
            "witnesses": [{
                "id":
                "A",
                "tokens": [{
                    "t": "A",
                    "ref": 123
                }, {
                    "t": "black",
                    "adj": True
                }, {
                    "t": "cat",
                    "id": "xyz"
                }]
            }, {
                "id":
                "B",
                "tokens": [{
                    "t": "A"
                }, {
                    "t": "white",
                    "adj": True
                }, {
                    "t": "kitten.",
                    "n": "cat"
                }]
            }]
        }
        expected_output = """\
+-------+---------+
|   A   |    B    |
+-------+---------+
|   A   |    A    |
+-------+---------+
| black |  white  |
+-------+---------+
|  cat  | kitten. |
+-------+---------+"""
        plain_text_output = str(
            collate_pretokenized_json(json_in, layout="vertical"))
        self.assertEquals(expected_output, plain_text_output)
示例#11
0
 def testPretokenizedWitness(self):
     pretokenized_witness = {
         "witnesses": [{
             "id":
             "A",
             "tokens": [{
                 "t": "A",
                 "ref": 123
             }, {
                 "t": "black",
                 "adj": True
             }, {
                 "t": "cat",
                 "id": "xyz"
             }, {
                 "t": "bird",
                 "id": "abc"
             }]
         }, {
             "id":
             "B",
             "tokens": [{
                 "t": "A"
             }, {
                 "t": "white",
                 "adj": True
             }, {
                 "t": "mousedog bird",
                 "adj": False
             }]
         }]
     }
     result = collate_pretokenized_json(pretokenized_witness)
     self.assertEqual(len(result.rows[0].to_list()), 4)
     self.assertEqual(len(result.rows[1].to_list()), 4)
     # The second witness should have a token that reads 'mousedog bird'.
     self.assertIn("mousedog bird", result.rows[1].to_list())
    def testHTMLOutputPretokenizedJSON(self):
        json_in = {
            "witnesses": [{
                "id":
                "A",
                "tokens": [{
                    "t": "A",
                    "ref": 123
                }, {
                    "t": "black",
                    "adj": True
                }, {
                    "t": "cat",
                    "id": "xyz"
                }]
            }, {
                "id":
                "B",
                "tokens": [{
                    "t": "A"
                }, {
                    "t": "white",
                    "adj": True
                }, {
                    "t": "kitten.",
                    "n": "cat"
                }]
            }]
        }
        expected_plain_table = """\
+---+---+-------+---------+
| A | A | black | cat     |
| B | A | white | kitten. |
+---+---+-------+---------+"""
        plain_table = str(collate_pretokenized_json(json_in, output="table"))
        self.assertEqual(expected_plain_table, plain_table)
示例#13
0
 def testSegmentationPretokenizedJSON(self):
     json_in = {
   "witnesses" : [
     {
       "id" : "A",
       "tokens" : [
           { "t" : "A", "ref" : 123 },
           { "t" : "black" , "adj" : True },
           { "t" : "cat", "id" : "xyz" }
       ]
     },
     {
       "id" : "B",
       "tokens" : [
           { "t" : "A" },
           { "t" : "white" , "adj" : True },
           { "t" : "stripy", "adj" : True },
           { "t" : "kitten.", "n" : "cat" }
       ]
     }
   ]
 }
     bad_func = lambda x: collate_pretokenized_json(x, segmentation=True)
     self.assertRaises(UnsupportedError, bad_func, json_in )
 def test_near_matching_segmented(self):
     result = collate_pretokenized_json(self.json_in, near_match=True, segmentation=True)
     self.assertEquals(["I bought", "this glass, because it matches those dinner plates."],
                       result.rows[0].to_list())
     self.assertEquals(["I bought", "those glasses."], result.rows[1].to_list())
 def test_near_matching(self):
     result = collate_pretokenized_json(self.json_in, near_match=True)
     self.assertEquals(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
                       result.rows[0].to_list())
     self.assertEquals(["I", "bought", "those", "glasses", "-", "-", "-", "-", "-", "-", "-", "."], result.rows[1].to_list())