def test_token_index_simple(self): tk_str = 'adams wat badcat xcat $9000 xcat' ti = token_index(tk_str) print(ti) self.assertIn(1, ti['wat']) self.assertIn(3, ti['xcat']) self.assertIn(5, ti['xcat'])
def test_freq_dist_dict_full(self): with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), 'r')\ as f: text = f.read().decode('utf-8') text = remove_punctuation(text) stopped = stop_words(text) ti = token_index(stopped) #print(pformat(ti), file=stderr) with open('{}{}'.format(target_out, '2011-1-19token_index'),\ 'w') as out_file: out_file.write(pformat(ti))