def test_ngram_freq_symbols(self): self.assertEqual( etao.ngram_frequency('wow!', 2), {'wo': 1/3.0, 'ow': 1/3.0, 'w!': 1/3.0} )
key = '--------------------------' cht = 'etaoinshrldcumfgpwybvkjxzq' # ------.----...-.--...--... def conv(sd): sd0 = [] for i in sd: sd1 = [] for j in i: sd1.append(j if j not in key else cht[key.index(j)].upper()) # sd1.append('-' if j not in key else cht[key.index(j)]) sd0.append(''.join(sd1)) return sd0 def order(sd): ert = sorted(sd.keys(), key=lambda a: sd[a], reverse=True) # return ert return list(zip(ert, conv(ert))) # print(*sorted([[asd.count(i),i,con(i)] for i in set(asd)],reverse=True),sep='\t',end='\n\n') print(order(etao.ngram_frequency(asd, 1))) print(order(etao.ngram_frequency(asd, 2))[:30]) print(order(etao.ngram_frequency(asd, 3))[:30]) print(order(etao.ngram_frequency(asd, 4))[:30]) print(order(etao.ngram_frequency(asd, 5))[:30]) print(''.join(conv([asd]))) print('Key =>', ''.join([key[cht.index(i)] for i in wer]))
def test_ngram_freq(self): self.assertEqual( etao.ngram_frequency('the', 2), {'th': 0.5, 'he': 0.5} )
def test_ngram_freq_only_alpha(self): self.assertEqual( etao.ngram_frequency('t h e!!', 2, only_alpha=True), {'th': 0.5, 'he': 0.5} )
def test_ngram_freq_symbols(self): self.assertEqual(etao.ngram_frequency('wow!', 2), { 'wo': 1 / 3.0, 'ow': 1 / 3.0, 'w!': 1 / 3.0 })
def test_ngram_freq_only_alpha(self): self.assertEqual(etao.ngram_frequency('t h e!!', 2, only_alpha=True), { 'th': 0.5, 'he': 0.5 })
def test_ngram_freq(self): self.assertEqual(etao.ngram_frequency('the', 2), { 'th': 0.5, 'he': 0.5 })
def test_ngram_freq_no_preserve_format(self): self.assertEqual( etao.ngram_frequency('t h e!!', 2, preserve_format=False), { 'th': 0.5, 'he': 0.5 })
def test_ngram_freq_preserve_format(self): self.assertEqual(etao.ngram_frequency('t h.e!!', 2), {})