示例#1
0
 def test_startswith(self):
     pd = PrefixSet()
     keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)]
     for key in reversed(keys):
         pd.add(key)
     subset = [k for k in keys if k.startswith('ab')]
     self.assertSequenceEqual(subset, list(pd.startswith('ab')))
 def test_startswith(self):
     pd = PrefixSet()
     keys = ["".join(combo) for combo in itertools.product("abc", repeat=3)]
     for key in reversed(keys):
         pd.add(key)
     subset = [k for k in keys if k.startswith("ab")]
     self.assertSequenceEqual(subset, list(pd.startswith("ab")))
示例#3
0
 def insert_search_delete(self, keys):
     pd = PrefixSet()
     for key in keys:
         pd.add(key)
     self.assertEqual(len(pd), len(set(keys)))
     for key in keys:
         self.assertIn(key, pd)
     for key in keys:
         pd.discard(key)
     self.assertEqual(len(pd), 0)
     for key in keys:
         self.assertFalse(key in pd)
     self.assertEqual(len(pd._root), 0)
 def insert_search_delete(self, keys):
     pd = PrefixSet()
     for key in keys:
         pd.add(key)
     self.assertEqual(len(pd), len(set(keys)))
     for key in keys:
         self.assertIn(key, pd)
     for key in keys:
         pd.discard(key)
     self.assertEqual(len(pd), 0)
     for key in keys:
         self.assertFalse(key in pd)
     self.assertEqual(len(pd._root), 0)
示例#5
0
 def test_pickle(self):
     pd = PrefixSet()
     pd.add('a')
     pickle.dumps(pd, pickle.HIGHEST_PROTOCOL)
示例#6
0
 def test_reversed(self):
     pd = PrefixSet()
     keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)]
     for key in keys:
         pd.add(key)
     self.assertSequenceEqual(list(reversed(keys)), list(reversed(pd)))
示例#7
0
 def test_sort_order(self):
     pd = PrefixSet()
     keys = ['', 'a', 'aa', 'ab', 'b', 'ba']
     for key in reversed(keys):
         pd.add(key)
     self.assertSequenceEqual(keys, list(iter(pd)))
示例#8
0
 def test_startswith_empty(self):
     pd = PrefixSet()
     pd.add('a')
     self.assertSequenceEqual([], list(pd.startswith('b')))
 def test_pickle(self):
     pd = PrefixSet()
     pd.add("a")
     pickle.dumps(pd, pickle.HIGHEST_PROTOCOL)
 def test_reversed(self):
     pd = PrefixSet()
     keys = ["".join(combo) for combo in itertools.product("abc", repeat=3)]
     for key in keys:
         pd.add(key)
     self.assertSequenceEqual(list(reversed(keys)), list(reversed(pd)))
 def test_sort_order(self):
     pd = PrefixSet()
     keys = ["", "a", "aa", "ab", "b", "ba"]
     for key in reversed(keys):
         pd.add(key)
     self.assertSequenceEqual(keys, list(iter(pd)))
 def test_startswith_empty(self):
     pd = PrefixSet()
     pd.add("a")
     self.assertSequenceEqual([], list(pd.startswith("b")))
示例#13
0
文件: places.py 项目: zh-tbug/bixin
# -*- coding: utf-8 -*-

import json
import os
import re
from prefixtree import PrefixSet

file = os.path.join(os.path.dirname(__file__), "../dictionaries/areas.json")
places = os.path.join(os.path.dirname(__file__), "../dictionaries/places.txt")

import ast
ps = PrefixSet()
with open(file, 'r') as f,\
        open(places, 'w') as out:
    content = f.read()
    spaces = re.findall('"[^"]+"', content)
    for s in spaces:
        space = ast.literal_eval(s)
        ps.add(space)
        out.write("%s 30000 ns\n" % space)  # 北京 34488 ns

assert "大连" not in ps

assert ps.startswith("大连")

for x in ps.startswith("大连"):
    print(x)
示例#14
0
文件: tagger.py 项目: zxuer2020/bixin
new_line = "%s\n"

ps = PrefixSet()

pos_sentences = set()
neg_sentences = set()

places = os.path.join(os.path.dirname(__file__), "../dictionaries/places.txt")

with open(places) as f:
    tokenizer.load_userdict(f)

    for line in f:
        s = line.strip().split()[0]
        ps.add(s)
    # print(pseg.lcut("大连"))
    # x, y = pseg.lcut("大连")[0]
    # assert y == "ns"


def is_space(word):
    l = list(ps.startswith(word))
    return len(l)


def common_igrnoe(word, tag, text_len):
    word_len = len(word)
    if word_len == 1:  # before accuracy: 0.663919
        return None
    elif tag.startswith('u'):  # u 助词