示例#1
0
 def test_blocks_splitting_token_case(self):
     collation = Collation()
     collation.add_witness("W1", "a c b c")
     collation.add_witness("W2", "a c b")
     algorithm = DekkerSuffixAlgorithm(collation)
     blocks = algorithm.get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 5-7")) # a c b
     self.assertIn(block1, blocks)
示例#2
0
 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = DekkerSuffixAlgorithm(collation)
     blocks = algorithm.get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 16-24")), blocks) # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 25-28")), blocks) # q r s t
示例#3
0
 def test_non_overlapping_blocks_black_cat(self):
     collation = Collation()
     collation.add_witness("W1", "the black cat")
     collation.add_witness("W2", "the black cat")
     algorithm = DekkerSuffixAlgorithm(collation)
     blocks = algorithm.get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 4-6"))
     self.assertEqual([block1], blocks)
示例#4
0
 def testPlainWitness(self):
     plain_witness = {
         'id': 'A',
         'content': 'The quick brown fox jumped over the lazy dogs.'
     }
     c = Collation()
     c.add_witness(plain_witness)
     self.assertEqual(len(c.witnesses[0].tokens()), 10)
示例#5
0
 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_witness("W1", "a a")
     collation.add_witness("w2", "a")
     extsufarr = collation.to_extended_suffix_array()
     potential_blocks = extsufarr.split_lcp_array_into_intervals()
     algorithm = DekkerSuffixAlgorithm(collation)
     algorithm.filter_potential_blocks(potential_blocks)
     self.assertFalse(potential_blocks)
示例#6
0
 def test_block_witnesses_Hermans_case_two_witnesses(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = DekkerSuffixAlgorithm(collation)
     block_witness = algorithm.get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
     block_witness = algorithm.get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
示例#7
0
 def test_blocks_failing_transposition_use_case_old_algorithm(self):
     collation = Collation()
     collation.add_witness("W1", "the cat and the dog")
     collation.add_witness("W2", "the dog and the cat")
     algorithm = DekkerSuffixAlgorithm(collation)
     blocks = algorithm.get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-1, 9-10"))
     block2 = Block(RangeSet("3-4, 6-7"))
     block3 = Block(RangeSet("2, 8"))
     self.assertEqual([block1, block2, block3], blocks)
示例#8
0
 def test_split_lcp_intervals_into_smaller_intervals(self):
     collation = Collation()
     collation.add_witness("W1", "the cat")
     collation.add_witness("W2", "the cat")
     collation.add_witness("W3", "the cat")
     extsufarr = collation.to_extended_suffix_array()
     split_intervals = extsufarr.split_lcp_array_into_intervals()
     self.assertIntervalIn(0, 2, 3, split_intervals) # the cat
     self.assertIntervalIn(1, 1, 3, split_intervals) # cat
     self.assertEqual(2, len(split_intervals), "More items: "+str(split_intervals))
示例#9
0
 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = DekkerSuffixAlgorithm(collation)
     blocks = algorithm.get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks) # a b c d
     self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks) # g h i
     self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")), blocks) # ! q r s t
     self.assertIn(Block(RangeSet("4, 20")), blocks) # F
示例#10
0
 def test_block_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = DekkerSuffixAlgorithm(collation)
     block_witness1 = algorithm.get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness1.debug())
     block_witness2 = algorithm.get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness2.debug())
     block_witness3 = algorithm.get_block_witness(collation.witnesses[2])
     self.assertEquals(["a b c d", "g h i", "! q r s t"], block_witness3.debug())
 def testPretokenizedWitnessAdd(self):
     pt_witness = {
                 "id": "A",
                 "tokens": [
                     {"t": "A", "ref": 123},
                     {"t": "black and blue", "adj": True},
                     {"t": "cat", "id": "xyz"},
                     {"t": "bird", "id": "abc"}
                 ]
             }
     c = Collation()
     c.add_witness(pt_witness)
     self.assertEqual(len(c.witnesses[0].tokens()), 4)
示例#12
0
 def test_Hermans_case_variantgraph(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     graph = VariantGraph()
     algorithm = DekkerSuffixAlgorithm(collation)
     algorithm.build_variant_graph_from_blocks(graph, collation)
     start_vertex = graph.start
     a = graph.vertexWith("a")
     b = graph.vertexWith("b")
     t = graph.vertexWith("t")
     end_vertex = graph.end
     self.assert_(graph.edge_between(start_vertex, a))
     self.assert_(graph.edge_between(a, b))
     self.assert_(graph.edge_between(t, end_vertex))
    def align_witnesses(self, witnesses):
        normalized_witnesses = []
        tokenized_witnesses = []
        for witness in witnesses:
            normalized_tokens = []
            tokenized_witness = []
            sigil = witness["id"]
            for token in witness["tokens"]:
                tokenized_witness.append(token)
                if "n" in token:
                    normalized_tokens.append(token["n"])
                else:
                    normalized_tokens.append(token["t"])
            normalized_witnesses.append(
                Witness(sigil, " ".join(normalized_tokens)))
            tokenized_witnesses.append(tokenized_witness)
        collation = Collation()
        for normalized_witness in normalized_witnesses:
            if normalized_witness.content:
                collation.add_witness(normalized_witness.sigil,
                                      normalized_witness.content)

        results = {"witnesses": [], "table": [[]], "status": []}

        if len(collation.witnesses) > 0:
            at = collate(collation,
                         output="novisualization",
                         segmentation=False)
            tokenized_at = AlignmentTable(collation)
            for row, tokenized_witness in zip(at.rows, tokenized_witnesses):

                new_row = Row(row.header)
                tokenized_at.rows.append(new_row)
                token_counter = 0

                for cell in row.cells:
                    if cell != "-":
                        if token_counter <= len(tokenized_witness) - 1:
                            new_row.cells.append(
                                tokenized_witness[token_counter])
                            token_counter += 1
                    else:
                        # TODO: should probably be null or None instead, but that would break the rendering at the moment
                        new_row.cells.append({"t": "^"})

            alignment = json.loads(
                display_alignment_table_as_json(tokenized_at))
            self.transform_alignment(alignment)
示例#14
0
    def testBeckett(self):
        collation = Collation()
        collation.add_witness("1", "The same clock as when for example Magee once died.")
        collation.add_witness("2", "The same as when for example Magee once died.")
        table = collate(collation, output="novisualization")
        self.assertEquals(["The same", "clock", "as when for example Magee once died."], table.rows[0].to_list())
        self.assertEquals(["The same", "-", "as when for example Magee once died."], table.rows[1].to_list())
        
#         table.print_plain_text()
#         "The same as when for example McKee once died .",//
#         "The same as when among others Darly once died & left him.",//
#       #  "The same as when Darly among others once died and left him.");
        pass

# 
# if __name__ == "__main__":
#     #import sys;sys.argv = ['', 'Test.testName']
#     unittest.main()
示例#15
0
 def testPretokenizedWitnessAdd(self):
     pt_witness = {
         "id":
         "A",
         "tokens": [{
             "t": "A",
             "ref": 123
         }, {
             "t": "black and blue",
             "adj": True
         }, {
             "t": "cat",
             "id": "xyz"
         }, {
             "t": "bird",
             "id": "abc"
         }]
     }
     c = Collation()
     c.add_witness(pt_witness)
     self.assertEqual(len(c.witnesses[0].tokens()), 4)
示例#16
0
    def testDoubleTransposition1(self):
        collation = Collation()
        collation.add_witness("A", "the cat is black")
        collation.add_witness("B", "black is the cat")
        alignment_table = collate(collation, output="novisualization")
        self.assertEquals(["the cat", "is", "black"], alignment_table.rows[0].to_list())
        self.assertEquals(["black", "is", "the cat"], alignment_table.rows[1].to_list())

#   @Test
#   public void doubleTransposition1() {
#     final SimpleWitness[] w = createWitnesses("the cat is black", "black is the cat");
#     final RowSortedTable<Integer, Witness, Set<Token>> t = table(collate(w));
#     assertEquals("|the|cat|is|black| |", toString(t, w[0]));
#     assertEquals("|black| |is|the|cat|", toString(t, w[1]));
#   }
# 
#   @Test
#   public void doubleTransposition2() {
#     final SimpleWitness[] w = createWitnesses("a b", "b a");
#     final RowSortedTable<Integer, Witness, Set<Token>> t = table(collate(w));
#     assertEquals("| |a|b|", toString(t, w[0]));
#     assertEquals("|b|a| |", toString(t, w[1]));
#   }
# 
#   @Test
#   public void doubleTransposition3() {
#     final SimpleWitness[] w = createWitnesses("a b c", "b a c");
#     final RowSortedTable<Integer, Witness, Set<Token>> t = table(collate(w));
#     assertEquals("| |a|b|c|", toString(t, w[0]));
#     assertEquals("|b|a| |c|", toString(t, w[1]));
#   }


# 
# if __name__ == "__main__":
#     #import sys;sys.argv = ['', 'Test.testName']
#     unittest.main()
示例#17
0
 def test_witness_ranges_hermans_case(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     self.assertEquals(RangeSet("0-14"), collation.get_range_for_witness("W1"))
     self.assertEquals(RangeSet("16-28"), collation.get_range_for_witness("W2"))
 def testPlainWitness(self):
     plain_witness = {'id': 'A', 'content': 'The quick brown fox jumped over the lazy dogs.'}
     c = Collation()
     c.add_witness(plain_witness)
     self.assertEqual(len(c.witnesses[0].tokens()), 10)
示例#19
0
 def test_combined_string_hermans_case(self):
     collation = Collation()
     collation.add_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_witness("W2", "a b c d F g h i ! q r s t")
     # $ is meant to separate witnesses here
     self.assertEquals("a b c d F g h i ! K ! q r s t $1 a b c d F g h i ! q r s t", collation.get_combined_string())