示例#1
0
  def test_banana(self):
    string = "blah blah blah blah blah blah baabab blah blah"
    markov = MarkovModel(list(string))

    results = {'a': 0, 'b': 0}

    for i in range(1000):
      token = markov.get_n_tokens(list("ba"), 1)[0]
      results[token] += 1

    bwt_log_ratio = self.log_ratio(results, 'a', 'b')
    print "BWT: log ratio is", bwt_log_ratio

    results = {'a': 0, 'b': 0}
    for i in range(1000):
      start = randrange(len(string))
      substring = None
      while (substring != "ba"):
        substring = string[start:start+2] if start < (len(string) - 2) else (string[-1:]+string[:1] if start == len(string) - 1 else string[-2:]+string[:0])
        start = (start + 1) % len(string)
      token = string[start+1]
      results[token] += 1

    plain_log_ratio = self.log_ratio(results, 'a', 'b')
    print "PLAIN: log ratio is", plain_log_ratio

    self.assertTrue(abs(bwt_log_ratio)<abs(plain_log_ratio))
示例#2
0
  def test_markov(self):
    markov = MarkovModel(list("Tom Tucker"))
    exp_tok = [["o"], ["u"]]

    token = markov.get_n_tokens(list("T"), 1)
    self.assertIn(token, exp_tok)

    tokens = [token for token in markov.get_all_possible_n_grams(list("T"), 1)]
    self.assertEqual(tokens, exp_tok)