def benchmark_fst(): from rust_fst import Set start = time.time() lines = Set.from_iter(SENTENCES, path="temp.fst") for key in KEYS: matches = list(lines.search(key, max_dist=0)) print(time.time() - start)
def test_isdisjoint(tmpdir, fst_set): oth_path = tmpdir.join('other.fst') do_build(str(oth_path), keys=[u'ene', u'mene']) other_set = Set(str(oth_path)) assert fst_set.isdisjoint(other_set) assert other_set.isdisjoint(fst_set) assert not fst_set.isdisjoint(fst_set) assert not fst_set.issuperset(other_set) assert not fst_set.issubset(other_set)
def do_build(path, keys=TEST_KEYS, sorted_=True): with Set.build(path) as builder: for key in (sorted(keys) if sorted_ else keys): builder.insert(key)
def test_issuperset(tmpdir, fst_set): oth_path = tmpdir.join('other.fst') do_build(str(oth_path), keys=TEST_KEYS[:-2]) other_set = Set(str(oth_path)) assert fst_set.issuperset(other_set) assert fst_set.issuperset(fst_set)
def test_load_badfile(tmpdir): bad_path = tmpdir.join("bad.fst") with bad_path.open('wb') as fp: fp.write(b'\xFF' * 16) with pytest.raises(lib.TransducerError): Set(str(bad_path))
def test_build_memory(): memset = Set.from_iter(sorted(TEST_KEYS)) assert len(memset) == 4
def test_build_baddir(): fst_path = "/guaranteed-to-not-exist/set.fst" with pytest.raises(OSError): with Set.build(fst_path) as builder: for key in sorted(TEST_KEYS): builder.insert(key)
def fst_set(tmpdir): fst_path = str(tmpdir.join('test.fst')) do_build(fst_path) return Set(fst_path)
def test_intersection(): a = Set.from_iter(["bar", "foo"]) b = Set.from_iter(["baz", "foo"]) assert list(a.intersection(b)) == ["foo"]
def test_symmetric_difference(): a = Set.from_iter(["bar", "foo"]) b = Set.from_iter(["baz", "foo"]) assert list(a.symmetric_difference(b)) == ["bar", "baz"]
def test_difference(): a = Set.from_iter(["bar", "foo"]) b = Set.from_iter(["baz", "foo"]) assert list(a.difference(b)) == ["bar"]
def test_union(): a = Set.from_iter(["bar", "foo"]) b = Set.from_iter(["baz", "foo"]) assert list(a.union(b)) == ["bar", "baz", "foo"]