def test_startswith(self):
     pd = PrefixSet()
     keys = ["".join(combo) for combo in itertools.product("abc", repeat=3)]
     for key in reversed(keys):
         pd.add(key)
     subset = [k for k in keys if k.startswith("ab")]
     self.assertSequenceEqual(subset, list(pd.startswith("ab")))
示例#2
0
 def test_startswith(self):
     pd = PrefixSet()
     keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)]
     for key in reversed(keys):
         pd.add(key)
     subset = [k for k in keys if k.startswith('ab')]
     self.assertSequenceEqual(subset, list(pd.startswith('ab')))
 def insert_search_delete(self, keys):
     pd = PrefixSet()
     for key in keys:
         pd.add(key)
     self.assertEqual(len(pd), len(set(keys)))
     for key in keys:
         self.assertIn(key, pd)
     for key in keys:
         pd.discard(key)
     self.assertEqual(len(pd), 0)
     for key in keys:
         self.assertFalse(key in pd)
     self.assertEqual(len(pd._root), 0)
示例#4
0
 def insert_search_delete(self, keys):
     pd = PrefixSet()
     for key in keys:
         pd.add(key)
     self.assertEqual(len(pd), len(set(keys)))
     for key in keys:
         self.assertIn(key, pd)
     for key in keys:
         pd.discard(key)
     self.assertEqual(len(pd), 0)
     for key in keys:
         self.assertFalse(key in pd)
     self.assertEqual(len(pd._root), 0)
示例#5
0
 def test_iter_post_el(self):
     pd = PrefixSet(['a', 'b', 'c'])
     pd.remove('b')
     list(pd)
示例#6
0
 def test_commonprefix_full(self):
     pd = PrefixSet(['abcd'])
     self.assertEqual('abcd', pd.commonprefix('abcd'))
示例#7
0
 def test_commonprefix_half(self):
     pd = PrefixSet(['abcd'])
     self.assertEqual(b'ab', pd.commonprefix('abef'))
示例#8
0
 def test_commonprefix_empty(self):
     pd = PrefixSet(['abcd'])
     self.assertEqual(b'', pd.commonprefix('efgh'))
示例#9
0
 def test_sort_order(self):
     pd = PrefixSet()
     keys = ['', 'a', 'aa', 'ab', 'b', 'ba']
     for key in reversed(keys):
         pd.add(key)
     self.assertSequenceEqual(keys, list(iter(pd)))
 def test_sort_order(self):
     pd = PrefixSet()
     keys = ["", "a", "aa", "ab", "b", "ba"]
     for key in reversed(keys):
         pd.add(key)
     self.assertSequenceEqual(keys, list(iter(pd)))
示例#11
0
 def test_init_iterable(self):
     pd = PrefixSet(['a'])
     self.assertIn('a', pd)
 def test_pickle(self):
     pd = PrefixSet()
     pd.add("a")
     pickle.dumps(pd, pickle.HIGHEST_PROTOCOL)
 def test_reversed(self):
     pd = PrefixSet()
     keys = ["".join(combo) for combo in itertools.product("abc", repeat=3)]
     for key in keys:
         pd.add(key)
     self.assertSequenceEqual(list(reversed(keys)), list(reversed(pd)))
 def test_iter_post_el(self):
     pd = PrefixSet(["a", "b", "c"])
     pd.remove("b")
     list(pd)
 def test_commonprefix_full(self):
     pd = PrefixSet(["abcd"])
     self.assertEqual("abcd", pd.commonprefix("abcd"))
 def test_commonprefix_half(self):
     pd = PrefixSet(["abcd"])
     self.assertEqual(b"ab", pd.commonprefix("abef"))
 def test_commonprefix_empty(self):
     pd = PrefixSet(["abcd"])
     self.assertEqual(b"", pd.commonprefix("efgh"))
示例#18
0
 def test_pickle(self):
     pd = PrefixSet()
     pd.add('a')
     pickle.dumps(pd, pickle.HIGHEST_PROTOCOL)
示例#19
0
 def test_invalid_key(self):
     pd = PrefixSet()
     self.assertRaises(TypeError, operator.setitem, pd, 0)
示例#20
0
文件: places.py 项目: zh-tbug/bixin
# -*- coding: utf-8 -*-

import json
import os
import re
from prefixtree import PrefixSet

file = os.path.join(os.path.dirname(__file__), "../dictionaries/areas.json")
places = os.path.join(os.path.dirname(__file__), "../dictionaries/places.txt")

import ast
ps = PrefixSet()
with open(file, 'r') as f,\
        open(places, 'w') as out:
    content = f.read()
    spaces = re.findall('"[^"]+"', content)
    for s in spaces:
        space = ast.literal_eval(s)
        ps.add(space)
        out.write("%s 30000 ns\n" % space)  # 北京 34488 ns

assert "大连" not in ps

assert ps.startswith("大连")

for x in ps.startswith("大连"):
    print(x)
示例#21
0
 def test_reversed(self):
     pd = PrefixSet()
     keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)]
     for key in keys:
         pd.add(key)
     self.assertSequenceEqual(list(reversed(keys)), list(reversed(pd)))
 def test_startswith_empty(self):
     pd = PrefixSet()
     pd.add("a")
     self.assertSequenceEqual([], list(pd.startswith("b")))
示例#23
0
 def test_startswith_empty(self):
     pd = PrefixSet()
     pd.add('a')
     self.assertSequenceEqual([], list(pd.startswith('b')))
示例#24
0
文件: tagger.py 项目: zxuer2020/bixin
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")

pos_result = os.path.join(DATA_DIR, "pos_combined.txt")

neg_result = os.path.join(DATA_DIR, "neg_combined.txt")

ntusd_dir = os.path.join(DICTIONARIES_DIR, "台湾大学NTUSD简体中文情感词典")

pos_file = os.path.join(ntusd_dir, "NTUSD_positive_simplified.txt")

neg_file = os.path.join(ntusd_dir, "NTUSD_negative_simplified.txt")

new_line = "%s\n"

ps = PrefixSet()

pos_sentences = set()
neg_sentences = set()

places = os.path.join(os.path.dirname(__file__), "../dictionaries/places.txt")

with open(places) as f:
    tokenizer.load_userdict(f)

    for line in f:
        s = line.strip().split()[0]
        ps.add(s)
    # print(pseg.lcut("大连"))
    # x, y = pseg.lcut("大连")[0]
    # assert y == "ns"