def test_explicit_vocab_roundtrip(tmp_path): filename = tmp_path / "write_explicit_vocab.fifu" i = ExplicitIndexer([str(i) for i in range(10)]) v = ExplicitVocab([str(i) for i in range(10, 100)], indexer=i) v.write(filename) v2 = load_vocab(filename) assert v == v2
def test_ff_buckets_lookup(tests_root): v = load_vocab(tests_root / "data" / "ff_buckets.fifu") assert v.words[0] == "one" assert v["one"] == 0 tuebingen_buckets = [ 14, 69, 74, 124, 168, 181, 197, 246, 250, 276, 300, 308, 325, 416, 549, 590, 648, 651, 707, 717, 761, 817, 820, 857, 860, 1007 ] assert sorted(v.idx('tübingen')) == tuebingen_buckets
def test_reading(tests_root): with pytest.raises(TypeError): finalfusion.vocab.load_vocab(None) with pytest.raises(FinalfusionFormatError): # 0 opens sys.stdin, should result in an error when trying to read magic finalfusion.vocab.load_vocab(0) with pytest.raises(IOError): finalfusion.vocab.load_vocab("foo") vocab_path = tests_root / "data" / "simple_vocab.fifu" v = load_vocab(vocab_path) assert v.words[0] == "Paris"
def test_simple_roundtrip(tests_root, tmp_path): filename = tmp_path / "write_simple.fifu" v = load_vocab(tests_root / "data" / "simple_vocab.fifu") v.write(filename) assert load_vocab(filename)
def test_fifu_buckets_roundtrip(tests_root, tmp_path): filename = tmp_path / "write_ff_buckets.fifu" v = load_vocab(tests_root / "data" / "ff_buckets.fifu") v.write(filename) assert v == load_vocab(filename)
def test_fasttext_vocab_roundtrip(tmp_path): filename = tmp_path / "write_ft_vocab.fifu" v = FastTextVocab([str(i) for i in range(10)]) v.write(filename) v2 = load_vocab(filename) assert v == v2