示例#1
0
    def test_scoring_with_properties_filter(self):
        json_in = {
          "witnesses" : [
            {
              "id" : "A",
              "tokens" : [
                  { "t" : "filler1" },
                  { "t" : "token" },
              ]
            },
            {
              "id" : "B",
              "tokens" : [
                  { "t" : "token", "rend" : "b" },
                  { "t" : "filler2" },
              ]
            }
          ]
        }

        expected_output = """+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in, segmentation=False)
        self.assertEqual(expected_output, str(alignment_table))

        expected_output = """+---+---------+---------+
| A | filler1 | token   |
| B | token   | filler2 |
+---+---------+---------+"""
        alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False)
        self.assertEqual(expected_output, str(alignment_table))
示例#2
0
    def test_scoring_with_properties_filter(self):
        json_in = {
            "witnesses": [{
                "id": "A",
                "tokens": [
                    {
                        "t": "filler1"
                    },
                    {
                        "t": "token"
                    },
                ]
            }, {
                "id":
                "B",
                "tokens": [
                    {
                        "t": "token",
                        "rend": "b"
                    },
                    {
                        "t": "filler2"
                    },
                ]
            }]
        }

        expected_output = """\
+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in, segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))

        expected_output = """\
+---+---------+---------+
| A | filler1 | token   |
| B | token   | filler2 |
+---+---------+---------+"""
        alignment_table = collate(json_in,
                                  properties_filter=self.match_properties,
                                  segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))

        del json_in["witnesses"][1]["tokens"][0][
            "rend"]  # so the 2 tokens have the same user-defined token_data
        expected_output = """\
+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in,
                                  properties_filter=self.match_properties,
                                  segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))
 def testColumnStatusInAlignmentTable(self):
     collation = Collation()
     collation.add_plain_witness("A", "The quick brown fox jumps over the dog.")
     collation.add_plain_witness("B", "The brown fox jumps over the lazy dog.")
     alignment_table = collate(collation)
     status_array = []
     for column in alignment_table.columns:
         status_array.append(column.variant)
     self.assertEqual([False, True, False, True, False], status_array)
     collation.add_plain_witness("C", "The brown fox walks around the lazy dog.")
     collate(collation)
     alignment_table = collate(collation)
     status_array = []
     for column in alignment_table.columns:
         status_array.append(column.variant)
     self.assertEqual([False, True, False, True, False, True, False], status_array)
    def testJSONAlignmentTableRenderingNoSegmentation(self):
        collation = Collation()
        collation.add_plain_witness("A", "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = {"table": [[[{"_sigil": "A", "_token_array_position": 0, "n": "This", "t": "This "}],
                                      [{"_sigil": "A", "_token_array_position": 1, "n": "very", "t": "very "}],
                                      [{"_sigil": "A", "_token_array_position": 2, "n": "quick", "t": "quick "}],
                                      [{"_sigil": "A", "_token_array_position": 3, "n": "very", "t": "very "}],
                                      [{"_sigil": "A", "_token_array_position": 4, "n": "quick", "t": "quick "}],
                                      [{"_sigil": "A", "_token_array_position": 5, "n": "brown", "t": "brown "}],
                                      [{"_sigil": "A", "_token_array_position": 6, "n": "wombat", "t": "wombat"}]],

                                     [[{"_sigil": "B", "_token_array_position": 8, "n": "That", "t": "That "}],
                                      None,
                                      None,
                                      [{"_sigil": "B", "_token_array_position": 9, "n": "very", "t": "very "}],
                                      [{"_sigil": "B", "_token_array_position": 10, "n": "quick", "t": "quick "}],
                                      [{"_sigil": "B", "_token_array_position": 11, "n": "brown", "t": "brown "}],
                                      [{"_sigil": "B", "_token_array_position": 12, "n": "koala", "t": "koala"}]],

                                     [[{"_sigil": "C", "_token_array_position": 14, "n": "That", "t": "That "}],
                                      None,
                                      None,
                                      [{"_sigil": "C", "_token_array_position": 15, "n": "very", "t": "very "}],
                                      [{"_sigil": "C", "_token_array_position": 16, "n": "quick", "t": "quick "}],
                                      [{"_sigil": "C", "_token_array_position": 17, "n": "brown", "t": "brown "}],
                                      [{"_sigil": "C", "_token_array_position": 18, "n": "kangaroo", "t": "kangaroo"}]]],
                           "witnesses": ["A", "B", "C"]}
        json_out = collate(collation, output="json", segmentation=False)
        self.assertEqual(expected_output, json.loads(json_out))
    def testPlainTableRenderingVerticalNoSegmentation(self):
        collation = Collation()
        collation.add_plain_witness("A", "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """\
+--------+-------+----------+
|   A    |   B   |    C     |
+--------+-------+----------+
|  This  |  That |   That   |
+--------+-------+----------+
|  very  |   -   |    -     |
+--------+-------+----------+
| quick  |   -   |    -     |
+--------+-------+----------+
|  very  |  very |   very   |
+--------+-------+----------+
| quick  | quick |  quick   |
+--------+-------+----------+
| brown  | brown |  brown   |
+--------+-------+----------+
| wombat | koala | kangaroo |
+--------+-------+----------+"""
        plain_text_output = str(collate(collation, layout="vertical", segmentation=None))
        self.assertEqual(expected_output, plain_text_output)
示例#6
0
    def testPlainTableRenderingVerticalNoSegmentation(self):
        collation = Collation()
        collation.add_plain_witness("A",
                                    "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """\
+--------+-------+----------+
|   A    |   B   |    C     |
+--------+-------+----------+
|  This  |  That |   That   |
+--------+-------+----------+
|  very  |   -   |    -     |
+--------+-------+----------+
| quick  |   -   |    -     |
+--------+-------+----------+
|  very  |  very |   very   |
+--------+-------+----------+
| quick  | quick |  quick   |
+--------+-------+----------+
| brown  | brown |  brown   |
+--------+-------+----------+
| wombat | koala | kangaroo |
+--------+-------+----------+"""
        plain_text_output = str(
            collate(collation, layout="vertical", segmentation=None))
        self.assertEqual(expected_output, plain_text_output)
    def testJSONAlignmentTableRendering(self):
        collation = Collation()
        collation.add_plain_witness("A", "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = {"table": [[[{"n": "This", "_sigil": "A", "t": "This ", "_token_array_position": 0},
                                       {"n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 1},
                                       {"n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 2}],
                                      [{"n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 3},
                                       {"n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 4},
                                       {"n": "brown", "_sigil": "A", "t": "brown ", "_token_array_position": 5}],
                                      [{"n": "wombat", "_sigil": "A", "t": "wombat", "_token_array_position": 6}]],

                                     [[{"n": "That", "_sigil": "B", "t": "That ", "_token_array_position": 8}],
                                      [{"n": "very", "_sigil": "B", "t": "very ", "_token_array_position": 9},
                                       {"n": "quick", "_sigil": "B", "t": "quick ", "_token_array_position": 10},
                                       {"n": "brown", "_sigil": "B", "t": "brown ", "_token_array_position": 11}],
                                      [{"n": "koala", "_sigil": "B", "t": "koala", "_token_array_position": 12}]],

                                     [[{"n": "That", "_sigil": "C", "t": "That ", "_token_array_position": 14}],
                                      [{"n": "very", "_sigil": "C", "t": "very ", "_token_array_position": 15},
                                       {"n": "quick", "_sigil": "C", "t": "quick ", "_token_array_position": 16},
                                       {"n": "brown", "_sigil": "C", "t": "brown ", "_token_array_position": 17}],
                                      [{"n": "kangaroo", "_sigil": "C", "t": "kangaroo", "_token_array_position": 18}]]],
                           "witnesses": ["A", "B", "C"]}
        json_out = collate(collation, output="json")
        print(json_out)
        self.assertEqual(expected_output, json.loads(json_out))
    def test_scoring_with_properties_filter(self):
        json_in = {
          "witnesses" : [
            {
              "id" : "A",
              "tokens" : [
                  { "t" : "filler1" },
                  { "t" : "token" },
              ]
            },
            {
              "id" : "B",
              "tokens" : [
                  { "t" : "token", "rend" : "b" },
                  { "t" : "filler2" },
              ]
            }
          ]
        }

        expected_output = """\
+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in, segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))

        expected_output = """\
+---+---------+---------+
| A | filler1 | token   |
| B | token   | filler2 |
+---+---------+---------+"""
        alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))

        del json_in["witnesses"][1]["tokens"][0]["rend"] # so the 2 tokens have the same user-defined token_data
        expected_output = """\
+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False)
        print(str(alignment_table))
        self.assertEqual(expected_output, str(alignment_table))
 def testJSONAlignmentTableRendering(self):
     collation = Collation()
     collation.add_plain_witness("A", "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = '{"table": [[["This very quick"], ["very quick brown"], ["wombat"]], [["That"], ["very quick brown"], ["koala"]], [["That"], ["very quick brown"], ["kangaroo"]]], "witnesses": ["A", "B", "C"]}'
     json = collate(collation, output="json")
     self.assertEquals(expected_output, json)
 def testJSONAlignmentTableRenderingNoSegmentation(self):
     collation = Collation()
     collation.add_plain_witness("A", "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = {"table": [[[{"n": "This", "t": "This "}], [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "wombat", "t": "wombat"}]], [[{"n": "That", "t": "That "}], None, None, [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "koala", "t": "koala"}]], [[{"n": "That", "t": "That "}], None, None, [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "kangaroo", "t": "kangaroo"}]]], "witnesses": ["A", "B", "C"]}
     json_out = collate(collation, output="json", segmentation=False)
     self.assertEquals(expected_output, json.loads(json_out))
 def testJSONAlignmentTableRendering(self):
     collation = Collation()
     collation.add_plain_witness("A",
                                 "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = '{"table": [[["This very quick"], ["very quick brown"], ["wombat"]], [["That"], ["very quick brown"], ["koala"]], [["That"], ["very quick brown"], ["kangaroo"]]], "witnesses": ["A", "B", "C"]}'
     json = collate(collation, output="json")
     self.assertEquals(expected_output, json)
    def test_align_with_longest_match(self):
        collation = Collation()
        collation.add_plain_witness("A", "a g a g c t a g t")
        collation.add_plain_witness("B", "a g c t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a g ', 'a g c t ', 'a g t'], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual([None, 'a g c t', None], alignment_table.rows[1].to_list_of_strings())
    def test_non_overlapping_blocks_Hermans(self):
        collation = Collation()
        collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
        collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a b c d F g h i ', '! K ', '! q r s t'], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['a b c d F g h i ', None, '! q r s t'], alignment_table.rows[1].to_list_of_strings())
    def test_scoring_with_properties_filter(self):
        json_in = {
            "witnesses": [{
                "id": "A",
                "tokens": [
                    {
                        "t": "filler1"
                    },
                    {
                        "t": "token"
                    },
                ]
            }, {
                "id":
                "B",
                "tokens": [
                    {
                        "t": "token",
                        "rend": "b"
                    },
                    {
                        "t": "filler2"
                    },
                ]
            }]
        }

        expected_output = """+---+---------+-------+---------+
| A | filler1 | token | -       |
| B | -       | token | filler2 |
+---+---------+-------+---------+"""
        alignment_table = collate(json_in, segmentation=False)
        self.assertEqual(expected_output, str(alignment_table))

        expected_output = """+---+---------+---------+
| A | filler1 | token   |
| B | token   | filler2 |
+---+---------+---------+"""
        alignment_table = collate(json_in,
                                  properties_filter=self.match_properties,
                                  segmentation=False)
        self.assertEqual(expected_output, str(alignment_table))
示例#15
0
    def test_non_overlapping_blocks_Hermans(self):
        collation = Collation()
        collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
        collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a b c d F g h i ', '! K ', '! q r s t'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['a b c d F g h i ', None, '! q r s t'],
                         alignment_table.rows[1].to_list_of_strings())
    def test_1(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "a b")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a', None], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual([None, 'b'], alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['a ', 'b'], alignment_table.rows[2].to_list_of_strings())
    def test_2(self):
        collation = Collation()
        collation.add_plain_witness("W1", "in the in the bleach")
        collation.add_plain_witness("W2", "in the in the bleach in the")
        collation.add_plain_witness("W3", "in the in the bleach in the")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['in the in the bleach', None], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['in the in the bleach ', 'in the'], alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['in the in the bleach ', 'in the'], alignment_table.rows[2].to_list_of_strings())
示例#18
0
    def test_align_with_longest_match(self):
        collation = Collation()
        collation.add_plain_witness("A", "a g a g c t a g t")
        collation.add_plain_witness("B", "a g c t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a g ', 'a g c t ', 'a g t'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual([None, 'a g c t', None],
                         alignment_table.rows[1].to_list_of_strings())
示例#19
0
 def testColumnStatusInAlignmentTable(self):
     collation = Collation()
     collation.add_plain_witness("A",
                                 "The quick brown fox jumps over the dog.")
     collation.add_plain_witness("B",
                                 "The brown fox jumps over the lazy dog.")
     alignment_table = collate(collation)
     status_array = []
     for column in alignment_table.columns:
         status_array.append(column.variant)
     self.assertEqual([False, True, False, True, False], status_array)
     collation.add_plain_witness(
         "C", "The brown fox walks around the lazy dog.")
     collate(collation)
     alignment_table = collate(collation)
     status_array = []
     for column in alignment_table.columns:
         status_array.append(column.variant)
     self.assertEqual([False, True, False, True, False, True, False],
                      status_array)
    def test_blocks_Hermans_case_three_witnesses(self):
        collation = Collation()
        collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
        collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
        collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a b c d ', 'F ', 'g h i ', '! K ', '! q r s t'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['a b c d ', 'F ', 'g h i ', None, '! q r s t'], alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['a b c d ', 'E ', 'g h i ', None, '! q r s t'], alignment_table.rows[2].to_list_of_strings())
    def testPlainTableRendering(self):
        collation = Collation()
        collation.add_plain_witness("A", "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """+---+-----------------+------------------+----------+
| A | This very quick | very quick brown | wombat   |
| B | That            | very quick brown | koala    |
| C | That            | very quick brown | kangaroo |
+---+-----------------+------------------+----------+"""
        plain_text_output = str(collate(collation))
        self.assertEquals(expected_output, plain_text_output)
    def testPlainTableRendering(self):
        collation = Collation()
        collation.add_plain_witness("A",
                                    "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """+---+-----------------+------------------+----------+
| A | This very quick | very quick brown | wombat   |
| B | That            | very quick brown | koala    |
| C | That            | very quick brown | kangaroo |
+---+-----------------+------------------+----------+"""
        plain_text_output = str(collate(collation))
        self.assertEquals(expected_output, plain_text_output)
    def testPlainTableRenderingNoParallelSegmentation(self):
        collation = Collation()
        collation.add_plain_witness("A", "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """\
+---+------+------+-------+------+-------+-------+----------+
| A | This | very | quick | very | quick | brown | wombat   |
| B | That | -    | -     | very | quick | brown | koala    |
| C | That | -    | -     | very | quick | brown | kangaroo |
+---+------+------+-------+------+-------+-------+----------+"""
        plain_text_output = str(collate(collation, segmentation=False))
        self.assertEqual(expected_output, plain_text_output)
    def test_duplicated_tokens_in_witness(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "c")
        collation.add_plain_witness("D", "a a")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual([None, 'a'], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['b', None], alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['c', None], alignment_table.rows[2].to_list_of_strings())
        self.assertEqual(['a ', 'a'], alignment_table.rows[3].to_list_of_strings())
示例#25
0
    def test_blocks_Hermans_case_three_witnesses(self):
        collation = Collation()
        collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
        collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
        collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a b c d ', 'F ', 'g h i ', '! K ', '! q r s t'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['a b c d ', 'F ', 'g h i ', None, '! q r s t'],
                         alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['a b c d ', 'E ', 'g h i ', None, '! q r s t'],
                         alignment_table.rows[2].to_list_of_strings())
示例#26
0
    def test_1(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "a b")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['a', None],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual([None, 'b'],
                         alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['a ', 'b'],
                         alignment_table.rows[2].to_list_of_strings())
示例#27
0
    def test_2(self):
        collation = Collation()
        collation.add_plain_witness("W1", "in the in the bleach")
        collation.add_plain_witness("W2", "in the in the bleach in the")
        collation.add_plain_witness("W3", "in the in the bleach in the")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['in the in the bleach', None],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['in the in the bleach ', 'in the'],
                         alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['in the in the bleach ', 'in the'],
                         alignment_table.rows[2].to_list_of_strings())
示例#28
0
    def testPlainTableRenderingNoParallelSegmentation(self):
        collation = Collation()
        collation.add_plain_witness("A",
                                    "This very quick very quick brown wombat")
        collation.add_plain_witness("B", "That very quick brown koala")
        collation.add_plain_witness("C", "That very quick brown kangaroo")
        expected_output = """\
+---+------+------+-------+------+-------+-------+----------+
| A | This | very | quick | very | quick | brown | wombat   |
| B | That | -    | -     | very | quick | brown | koala    |
| C | That | -    | -     | very | quick | brown | kangaroo |
+---+------+------+-------+------+-------+-------+----------+"""
        plain_text_output = str(collate(collation, segmentation=False))
        self.assertEqual(expected_output, plain_text_output)
    def test_rank_adjustment(self):
        collation = Collation()
        collation.add_plain_witness('A', 'aa bb cc dd ee ff')
        collation.add_plain_witness('B', 'aa bb ex ff')
        collation.add_plain_witness('C', 'aa bb cc ee ff')
        collation.add_plain_witness('D', 'aa bb ex dd ff')
        collation.add_plain_witness('E', 'aaa aaa aaa aaa aaa')

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['aa bb ', 'cc ', 'dd ', 'ee ', 'ff'], alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['aa bb ', 'ex ', None, None, 'ff'], alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['aa bb ', 'cc ', None, 'ee ', 'ff'], alignment_table.rows[2].to_list_of_strings())
        self.assertEqual(['aa bb ', 'ex ', 'dd ', None, 'ff'], alignment_table.rows[3].to_list_of_strings())
        self.assertEqual(['aaa aaa aaa aaa aaa', None, None, None, None], alignment_table.rows[4].to_list_of_strings())
示例#30
0
    def test_duplicated_tokens_in_witness(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "c")
        collation.add_plain_witness("D", "a a")

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual([None, 'a'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['b', None],
                         alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['c', None],
                         alignment_table.rows[2].to_list_of_strings())
        self.assertEqual(['a ', 'a'],
                         alignment_table.rows[3].to_list_of_strings())
示例#31
0
    def test_rank_adjustment(self):
        collation = Collation()
        collation.add_plain_witness('A', 'aa bb cc dd ee ff')
        collation.add_plain_witness('B', 'aa bb ex ff')
        collation.add_plain_witness('C', 'aa bb cc ee ff')
        collation.add_plain_witness('D', 'aa bb ex dd ff')
        collation.add_plain_witness('E', 'aaa aaa aaa aaa aaa')

        alignment_table = collate(collation)
        print("alignment_table=\n", alignment_table)
        self.assertEqual(['aa bb ', 'cc ', 'dd ', 'ee ', 'ff'],
                         alignment_table.rows[0].to_list_of_strings())
        self.assertEqual(['aa bb ', 'ex ', None, None, 'ff'],
                         alignment_table.rows[1].to_list_of_strings())
        self.assertEqual(['aa bb ', 'cc ', None, 'ee ', 'ff'],
                         alignment_table.rows[2].to_list_of_strings())
        self.assertEqual(['aa bb ', 'ex ', 'dd ', None, 'ff'],
                         alignment_table.rows[3].to_list_of_strings())
        self.assertEqual(['aaa aaa aaa aaa aaa', None, None, None, None],
                         alignment_table.rows[4].to_list_of_strings())
    def test_duplicated_tokens_in_witness2(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "c")
        collation.add_plain_witness("D", "a b c a b c")

        # alignment_table = collate(collation)
        # self.assertEqual(['a', None, None, None], alignment_table.rows[0].to_list_of_strings())
        # self.assertEqual([None, 'b', None, None], alignment_table.rows[1].to_list_of_strings())
        # self.assertEqual([None, None, 'c', None], alignment_table.rows[2].to_list_of_strings())
        # self.assertEqual(['a ', 'b ', 'c ', 'a b c'], alignment_table.rows[3].to_list_of_strings())

        expected_tei = """<?xml version="1.0" ?>
<cx:apparatus xmlns="http://www.tei-c.org/ns/1.0" xmlns:cx="http://interedition.eu/collatex/ns/1.0">
	<app>
		<rdg wit="#D">a b c</rdg>
	</app>
	 
	<app>
		<rdg wit="#A">a</rdg>
		<rdg wit="#D">a</rdg>
	</app>
	 
	<app>
		<rdg wit="#B">b</rdg>
		<rdg wit="#D">b</rdg>
	</app>
	 
	<app>
		<rdg wit="#C #D">c</rdg>
	</app>
</cx:apparatus>
"""

        # alignment_table = collate(collation)
        # print("alignment_table=\n",alignment_table)

        output_tei = collate(collation, output="tei", indent=True)
        self.assertEqual(expected_tei, output_tei)
示例#33
0
    def test_duplicated_tokens_in_witness2(self):
        collation = Collation()
        collation.add_plain_witness("A", "a")
        collation.add_plain_witness("B", "b")
        collation.add_plain_witness("C", "c")
        collation.add_plain_witness("D", "a b c a b c")

        # alignment_table = collate(collation)
        # self.assertEqual(['a', None, None, None], alignment_table.rows[0].to_list_of_strings())
        # self.assertEqual([None, 'b', None, None], alignment_table.rows[1].to_list_of_strings())
        # self.assertEqual([None, None, 'c', None], alignment_table.rows[2].to_list_of_strings())
        # self.assertEqual(['a ', 'b ', 'c ', 'a b c'], alignment_table.rows[3].to_list_of_strings())

        expected_tei = """<?xml version="1.0" ?>
<cx:apparatus xmlns="http://www.tei-c.org/ns/1.0" xmlns:cx="http://interedition.eu/collatex/ns/1.0">
	<app>
		<rdg wit="#D">a b c</rdg>
	</app>
	 
	<app>
		<rdg wit="#A">a</rdg>
		<rdg wit="#D">a</rdg>
	</app>
	 
	<app>
		<rdg wit="#B">b</rdg>
		<rdg wit="#D">b</rdg>
	</app>
	 
	<app>
		<rdg wit="#C #D">c</rdg>
	</app>
</cx:apparatus>
"""

        # alignment_table = collate(collation)
        # print("alignment_table=\n",alignment_table)

        output_tei = collate(collation, output="tei", indent=True)
        self.assertEqual(expected_tei, output_tei)
 def testPretokenizedWitness(self):
     pretokenized_witness = {
         "witnesses": [{
             "id":
             "A",
             "tokens": [{
                 "t": "A",
                 "ref": 123
             }, {
                 "t": "black",
                 "adj": True
             }, {
                 "t": "cat",
                 "id": "xyz"
             }, {
                 "t": "bird",
                 "id": "abc"
             }]
         }, {
             "id":
             "B",
             "tokens": [{
                 "t": "A"
             }, {
                 "t": "white",
                 "adj": True
             }, {
                 "t": "mousedog bird",
                 "adj": False
             }]
         }]
     }
     c = Collation.create_from_dict(pretokenized_witness)
     result = collate(c, segmentation=False)
     self.assertEqual(len(result.rows[0].to_list()), 4)
     self.assertEqual(len(result.rows[1].to_list()), 4)
     # The second witness should have a token that reads 'mousedog bird'.
     self.assertIn("mousedog bird", str(result.rows[1].to_list()))
示例#35
0
 def test_near_matching(self):
     result = collate(self.json_in, near_match=True, segmentation=False)
     self.assertEqual(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
                       result.rows[0].to_list_of_strings())
     self.assertEqual(["I", "bought", None, None, None, None, None, None, "those", None, "glasses", "."], result.rows[1].to_list_of_strings())
示例#36
0
 def testJSONAlignmentTableRendering(self):
     collation = Collation()
     collation.add_plain_witness("A",
                                 "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = {
         "table": [[[{
             "n": "This",
             "t": "This "
         }, {
             "n": "very",
             "t": "very "
         }, {
             "n": "quick",
             "t": "quick "
         }],
                    [{
                        "n": "very",
                        "t": "very "
                    }, {
                        "n": "quick",
                        "t": "quick "
                    }, {
                        "n": "brown",
                        "t": "brown "
                    }], [{
                        "n": "wombat",
                        "t": "wombat"
                    }]],
                   [[{
                       "n": "That",
                       "t": "That "
                   }],
                    [{
                        "n": "very",
                        "t": "very "
                    }, {
                        "n": "quick",
                        "t": "quick "
                    }, {
                        "n": "brown",
                        "t": "brown "
                    }], [{
                        "n": "koala",
                        "t": "koala"
                    }]],
                   [[{
                       "n": "That",
                       "t": "That "
                   }],
                    [{
                        "n": "very",
                        "t": "very "
                    }, {
                        "n": "quick",
                        "t": "quick "
                    }, {
                        "n": "brown",
                        "t": "brown "
                    }], [{
                        "n": "kangaroo",
                        "t": "kangaroo"
                    }]]],
         "witnesses": ["A", "B", "C"]
     }
     json_out = collate(collation, output="json")
     self.assertEquals(expected_output, json.loads(json_out))
 def test_near_matching_segmented(self):
     result = collate(self.json_in, near_match=True, segmentation=True)
     self.assertEquals(["I bought", "this glass, because it matches those dinner plates."],
                       result.rows[0].to_list_of_strings())
     self.assertEquals(["I bought", "those glasses."], result.rows[1].to_list_of_strings())
 def test_near_matching(self):
     result = collate(self.json_in, near_match=True, segmentation=False)
     self.assertEquals(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
                       result.rows[0].to_list_of_strings())
     self.assertEquals(["I", "bought", None, None, None, None, None, None, "those", None, "glasses", "."], result.rows[1].to_list_of_strings())
示例#39
0
 def testJSONAlignmentTableRendering(self):
     collation = Collation()
     collation.add_plain_witness("A",
                                 "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = {
         "table": [[[{
             "n": "This",
             "_sigil": "A",
             "t": "This ",
             "_token_array_position": 0
         }, {
             "n": "very",
             "_sigil": "A",
             "t": "very ",
             "_token_array_position": 1
         }, {
             "n": "quick",
             "_sigil": "A",
             "t": "quick ",
             "_token_array_position": 2
         }],
                    [{
                        "n": "very",
                        "_sigil": "A",
                        "t": "very ",
                        "_token_array_position": 3
                    }, {
                        "n": "quick",
                        "_sigil": "A",
                        "t": "quick ",
                        "_token_array_position": 4
                    }, {
                        "n": "brown",
                        "_sigil": "A",
                        "t": "brown ",
                        "_token_array_position": 5
                    }],
                    [{
                        "n": "wombat",
                        "_sigil": "A",
                        "t": "wombat",
                        "_token_array_position": 6
                    }]],
                   [[{
                       "n": "That",
                       "_sigil": "B",
                       "t": "That ",
                       "_token_array_position": 8
                   }],
                    [{
                        "n": "very",
                        "_sigil": "B",
                        "t": "very ",
                        "_token_array_position": 9
                    }, {
                        "n": "quick",
                        "_sigil": "B",
                        "t": "quick ",
                        "_token_array_position": 10
                    }, {
                        "n": "brown",
                        "_sigil": "B",
                        "t": "brown ",
                        "_token_array_position": 11
                    }],
                    [{
                        "n": "koala",
                        "_sigil": "B",
                        "t": "koala",
                        "_token_array_position": 12
                    }]],
                   [[{
                       "n": "That",
                       "_sigil": "C",
                       "t": "That ",
                       "_token_array_position": 14
                   }],
                    [{
                        "n": "very",
                        "_sigil": "C",
                        "t": "very ",
                        "_token_array_position": 15
                    }, {
                        "n": "quick",
                        "_sigil": "C",
                        "t": "quick ",
                        "_token_array_position": 16
                    }, {
                        "n": "brown",
                        "_sigil": "C",
                        "t": "brown ",
                        "_token_array_position": 17
                    }],
                    [{
                        "n": "kangaroo",
                        "_sigil": "C",
                        "t": "kangaroo",
                        "_token_array_position": 18
                    }]]],
         "witnesses": ["A", "B", "C"]
     }
     json_out = collate(collation, output="json")
     print(json_out)
     self.assertEqual(expected_output, json.loads(json_out))
示例#40
0
 def test_near_matching_segmented(self):
     result = collate(self.json_in, near_match=True, segmentation=True)
     self.assertEqual(["I bought", "this glass, because it matches those dinner plates."],
                       result.rows[0].to_list_of_strings())
     self.assertEqual(["I bought", "those glasses."], result.rows[1].to_list_of_strings())
示例#41
0
 def testJSONAlignmentTableRenderingNoSegmentation(self):
     collation = Collation()
     collation.add_plain_witness("A",
                                 "This very quick very quick brown wombat")
     collation.add_plain_witness("B", "That very quick brown koala")
     collation.add_plain_witness("C", "That very quick brown kangaroo")
     expected_output = {
         "table": [[[{
             "_sigil": "A",
             "_token_array_position": 0,
             "n": "This",
             "t": "This "
         }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 1,
                        "n": "very",
                        "t": "very "
                    }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 2,
                        "n": "quick",
                        "t": "quick "
                    }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 3,
                        "n": "very",
                        "t": "very "
                    }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 4,
                        "n": "quick",
                        "t": "quick "
                    }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 5,
                        "n": "brown",
                        "t": "brown "
                    }],
                    [{
                        "_sigil": "A",
                        "_token_array_position": 6,
                        "n": "wombat",
                        "t": "wombat"
                    }]],
                   [[{
                       "_sigil": "B",
                       "_token_array_position": 8,
                       "n": "That",
                       "t": "That "
                   }], None, None,
                    [{
                        "_sigil": "B",
                        "_token_array_position": 9,
                        "n": "very",
                        "t": "very "
                    }],
                    [{
                        "_sigil": "B",
                        "_token_array_position": 10,
                        "n": "quick",
                        "t": "quick "
                    }],
                    [{
                        "_sigil": "B",
                        "_token_array_position": 11,
                        "n": "brown",
                        "t": "brown "
                    }],
                    [{
                        "_sigil": "B",
                        "_token_array_position": 12,
                        "n": "koala",
                        "t": "koala"
                    }]],
                   [[{
                       "_sigil": "C",
                       "_token_array_position": 14,
                       "n": "That",
                       "t": "That "
                   }], None, None,
                    [{
                        "_sigil": "C",
                        "_token_array_position": 15,
                        "n": "very",
                        "t": "very "
                    }],
                    [{
                        "_sigil": "C",
                        "_token_array_position": 16,
                        "n": "quick",
                        "t": "quick "
                    }],
                    [{
                        "_sigil": "C",
                        "_token_array_position": 17,
                        "n": "brown",
                        "t": "brown "
                    }],
                    [{
                        "_sigil": "C",
                        "_token_array_position": 18,
                        "n": "kangaroo",
                        "t": "kangaroo"
                    }]]],
         "witnesses": ["A", "B", "C"]
     }
     json_out = collate(collation, output="json", segmentation=False)
     self.assertEqual(expected_output, json.loads(json_out))