Пример #1
0
def encode_bits_as_words(chain: MarkovChain,
                         wt_dict: WordTypeDictionary,
                         bits: Bits,
                         pad_text=True) -> list:
    """
    Given a bit stream, a Markov chain, and a word-type dictionary, retrieve a corresponding list of words.
    Every state in the Markov chain, except the start state s0, must have a corresponding word-type in the given
    dictionary with at least one word.

    If the word-type dictionary does not have path bits to match the end of the input exactly, it will append 0s
    until the function can complete.

    :param chain: a Markov chain with states
    :param wt_dict: a corresponding dictionary of word-types
    :param bits: the input bits
    :param pad_text: if true, generate cover text from random bits until the Markov chain reaches state s0
    :return: an ordered list of words encoded by the system
    """
    if bits is None or bits.__eq__(Bits()):
        raise ValueError("Bits cannot be None or empty.")

    words = []
    prefix = bits
    while len(prefix) > 0:
        chain.transition()
        if chain.current_state.__eq__(START_STATE_LABEL):
            chain.transition()

        word, word_bits, encode_spaces = _encode_next_word(
            chain, wt_dict, prefix)
        words.append((word, encode_spaces))
        bit_length = len(word_bits)
        prefix = prefix[bit_length:]

    if pad_text:
        # add filler bits until s0 reached
        while not chain.current_state.__eq__(START_STATE_LABEL):
            chain.transition()
            if chain.current_state.__eq__(START_STATE_LABEL):
                break
            longest_word = get_longest_word_in_dictionary(wt_dict)
            pseudo_random_bits = Bits(bin="".join(
                random.choice(["0", "1"]) for _ in range(len(longest_word))))
            word, word_bits, encode_spaces = _encode_next_word(
                chain, wt_dict, pseudo_random_bits)
            words.append((word, encode_spaces))

    return words
Пример #2
0
class TestMarkovChainTransitions(unittest.TestCase):
    def setUp(self):
        self.states = {"s1", "s2", "s3", ("s4", "dict")}
        self.markov_chain = MarkovChain(self.states)

        self.transitions = set()
        self.transitions.add(("s0", "s1", 2))
        self.transitions.add(("s0", "s2", 3))
        self.transitions.add(("s1", "s3", 0.5))
        self.transitions.add(("s1", "s4", 0.5))
        self.transitions.add(("s2", "s4", 1))
        self.transitions.add(("s3", "s0", 1))
        self.transitions.add(("s4", "s0", 2))

    def test_set_transitions(self):
        self.markov_chain.set_transitions(self.transitions)
        self.assertEqual(5, len(self.markov_chain.markov_chain))
        self.assertIsInstance(self.markov_chain.markov_chain.get("s0"),
                              StateTransitions)
        self.assertDictEqual(
            {
                "s1": 2,
                "s2": 3
            },
            self.markov_chain.markov_chain.get("s0").transitions)

    def test_set_transitions_bad_outbound(self):
        self.transitions.add(("s4", "s5", 1))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_set_transitions_bad_inbound(self):
        self.transitions.add(("s5", "s1", 1))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_set_transitions_neg_probability(self):
        self.transitions.add(("s3", "s4", -1))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_set_transitions_zero_probability(self):
        self.transitions.add(("s3", "s4", 0))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_set_transitions_s0_no_outbound(self):
        self.transitions = {
            x
            for x in self.transitions if not x[0].__eq__("s0")
        }
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_set_transitions_s0_no_inbound(self):
        self.transitions = {
            x
            for x in self.transitions if not x[1].__eq__("s0")
        }
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_dict(self):
        self.markov_chain.set_transitions(self.transitions)
        serial_dict = self.markov_chain.__dict__()
        self.assertEqual(2, len(serial_dict))

        chain_dict = serial_dict.get("chain")
        self.assertIsInstance(chain_dict, dict)
        self.assertEqual(5, len(chain_dict))
        self.assertIsInstance(chain_dict.get("s0"), dict)
        self.assertDictEqual({"s1": 2, "s2": 3}, chain_dict.get("s0"))

    def test_get_outbound_transitions(self):
        self.markov_chain.set_transitions(self.transitions)
        self.markov_chain.current_state = "s0"
        outbound_transitions = self.markov_chain.get_outbound_transitions()
        self.assertIsInstance(outbound_transitions, StateTransitions)
        self.assertEqual(2, len(outbound_transitions.transitions))
        self.assertEqual(2, outbound_transitions.transitions.get("s1"))
        self.assertEqual(3, outbound_transitions.transitions.get("s2"))

        self.markov_chain.current_state = "s4"
        outbound_transitions = self.markov_chain.get_outbound_transitions()
        self.assertIsInstance(outbound_transitions, StateTransitions)
        self.assertEqual(1, len(outbound_transitions.transitions))
        self.assertEqual(2, outbound_transitions.transitions.get("s0"))

    def test_accumulate_transitions(self):
        self.markov_chain.set_transitions(self.transitions)
        self.markov_chain.current_state = "s0"
        accum = self.markov_chain.accumulate_transitions()
        self.assertIsInstance(accum, list)
        self.assertEqual(2, len(accum))
        self.assertTrue(any(x[1] == 5) for x in accum)

    def test_transition(self):
        self.markov_chain.set_transitions(self.transitions)
        self.markov_chain.current_state = "s0"
        self.markov_chain.transition()
        self.assertIn(self.markov_chain.current_state, {"s1", "s2"})

        self.markov_chain.transition()
        self.assertIn(self.markov_chain.current_state, {"s3", "s4"})

        self.markov_chain.transition()
        self.assertEqual("s0", self.markov_chain.current_state)

    def test_find_2_cycle(self):
        self.transitions.add(("s4", "s2", 1))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_find_3_cycle(self):
        self.transitions = {
            x
            for x in self.transitions if not x[1].__eq__("s3")
        }
        self.transitions.add(("s4", "s3", 1))
        self.transitions.add(("s3", "s1", 1))
        self.assertRaises(MarkovError, self.markov_chain.set_transitions,
                          self.transitions)

    def test_ignore_non_cycle(self):
        self.transitions.add(("s1", "s2", 1))
        self.markov_chain.set_transitions(self.transitions)

    def test_get_number_of_paths(self):
        self.markov_chain.set_transitions(self.transitions)
        no_of_paths = markov.get_number_of_paths(self.markov_chain)
        self.assertEqual(3, no_of_paths)

        self.states.add("s5")
        self.markov_chain = MarkovChain(self.states)
        self.transitions.add(("s2", "s5", 1))
        self.transitions.add(("s4", "s5", 1))
        self.transitions.add(("s5", "s0", 1))
        self.markov_chain.set_transitions(self.transitions)
        no_of_paths = markov.get_number_of_paths(self.markov_chain)
        self.assertEqual(6, no_of_paths)

    @unittest.skip
    def test_save(self):
        self.markov_chain.set_transitions(self.transitions)
        markov.save_markov_chain(self.markov_chain)