Пример #1
0
def test_truncated_file_json():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add('a', '{1:2}')
    c.Add('b', '{2:4}')
    c.Add('c', '{4:4}')
    c.Add('d', '{2:3}')
    c.Compile()

    c.WriteToFile(os.path.join(tmp_dir, 'truncation_test.kv'))
    size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv'))

    fd_in = open(os.path.join(tmp_dir, 'truncation_test.kv'), 'rb')
    fd = open(os.path.join(tmp_dir, 'truncation_test1.kv'), 'wb')
    fd.write(fd_in.read(int(size / 2)))
    fd.close()

    fd2 = open(os.path.join(tmp_dir, 'truncation_test2.kv'), 'wb')
    fd2.write(fd_in.read(int(size - 2)))
    fd2.close()

    with pytest.raises(ValueError):
        d = keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv'))
    with pytest.raises(ValueError):
        d = keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test2.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test2.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test1.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test.kv'))
Пример #2
0
def test_truncated_file_json():
    c=keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add('a', '{1:2}')
    c.Add('b', '{2:4}')
    c.Add('c', '{4:4}')
    c.Add('d', '{2:3}')
    c.Compile()

    c.WriteToFile(os.path.join(tmp_dir,'truncation_test.kv'))
    size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv'))

    fd_in = open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb')
    fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb')
    fd.write(fd_in.read(int(size/2)))
    fd.close()

    exception_caught = False
    try:
        d=keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv'))
    except ValueError:
        exception_caught = True

    assert exception_caught
    os.remove(os.path.join(tmp_dir, 'truncation_test1.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test.kv'))
def test_merge(merger):
    tmp_dir = tempfile.mkdtemp()
    try:
        file_1 = path.join(tmp_dir, 'test_merger_1.kv')
        file_2 = path.join(tmp_dir, 'test_merger_2.kv')
        file_3 = path.join(tmp_dir, 'test_merger_3.kv')
        merge_file = path.join(tmp_dir, 'merge.kv')

        generate_keyvi(keys_1, file_1)
        generate_keyvi(keys_2, file_2)
        generate_keyvi(keys_3, file_3)

        merger.Add(file_1)
        merger.Add(file_2)
        merger.Add(file_3)
        merger.Merge(merge_file)

        merged_dictionary = keyvi.Dictionary(merge_file)

        keys = set()
        keys.update(keys_1)
        keys.update(keys_2)
        keys.update(keys_3)

        keys_ordered = sorted(keys)

        for base_key, keyvi_key in zip(keys_ordered,
                                       merged_dictionary.GetAllKeys()):
            assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key)

    finally:
        shutil.rmtree(tmp_dir)
Пример #4
0
def test_manifest_for_merger():
    try:
        c = keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
        c.Add("abc", '{"a" : 2}')
        c.Compile()
        c.SetManifest({"author": "Zapp Brannigan"})
        c.WriteToFile('manifest_json_merge1.kv')
        del c

        c2 = keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
        c2.Add("abd", '{"a" : 3}')
        c2.Compile()
        c2.SetManifest({"author": "Leela"})
        c2.WriteToFile('manifest_json_merge2.kv')
        del c2

        merger = keyvi.JsonDictionaryMerger({"memory_limit_mb":"10"})
        merger.SetManifest({"author": "Fry"})
        merger.Merge('manifest_json_merged.kv')

        d = keyvi.Dictionary('manifest_json_merged.kv')
        m = d.GetManifest()
        assert m['author'] == "Fry"
        del d

    finally:
        os.remove('manifest_json_merge1.kv')
        os.remove('manifest_json_merge2.kv')
        os.remove('manifest_json_merged.kv')
def test_merge(merger):
    tmp_dir = tempfile.mkdtemp()
    try:
        file_1 = path.join(tmp_dir, 'test_merger_1.kv')
        file_2 = path.join(tmp_dir, 'test_merger_2.kv')
        file_3 = path.join(tmp_dir, 'test_merger_3.kv')
        merge_file = path.join(tmp_dir, 'merge.kv')

        generate_keyvi(key_values_1, file_1)
        generate_keyvi(key_values_2, file_2)
        generate_keyvi(key_values_3, file_3)

        merger.Add(file_1)
        merger.Add(file_2)
        merger.Add(file_3)
        merger.Merge(merge_file)

        merged_dictionary = keyvi.Dictionary(merge_file)

        key_values = {}
        key_values.update(key_values_1)
        key_values.update(key_values_2)
        key_values.update(key_values_3)

        key_values_ordered = collections.OrderedDict(sorted(
            key_values.items()))

        for (base_key, base_value), (keyvi_key, keyvi_value) in zip(
                key_values_ordered.items(), merged_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value

    finally:
        shutil.rmtree(tmp_dir)
Пример #6
0
def test_invalid_filemagic():
    fd = open(os.path.join(tmp_dir, 'broken_file'), 'w')
    fd.write('dead beef')
    fd.close()
    exception_caught = False
    with pytest.raises(ValueError):
        d = keyvi.Dictionary(os.path.join(tmp_dir, 'broken_file'))
    os.remove(os.path.join(tmp_dir, 'broken_file'))
Пример #7
0
def tmp_dictionary(compiler, file_name):
    tmp_dir = tempfile.gettempdir()
    fq_file_name = os.path.join(tmp_dir, file_name)
    compiler.Compile()
    compiler.WriteToFile(fq_file_name)
    del compiler
    d = keyvi.Dictionary(fq_file_name)
    yield d
    del d
    os.remove(fq_file_name)
Пример #8
0
def test_invalid_filemagic():
    fd = open(os.path.join(tmp_dir, 'broken_file'),'w')
    fd.write ('dead beef')
    fd.close()
    exception_caught = False
    try:
        d=keyvi.Dictionary(os.path.join(tmp_dir, 'broken_file'))
    except ValueError:
        exception_caught = True

    assert exception_caught
    os.remove(os.path.join(tmp_dir, 'broken_file'))
Пример #9
0
def dump(args):
    dictionary = keyvi.Dictionary(args.input_file)
    with open(args.output_file, 'w') as file_out:
        for key, value in dictionary.GetAllItems():
            if args.json_dumps:
                key = json.dumps(key)
            if isinstance(key, bytes):
                key = key.decode()
            file_out.write(key)
            if value:
                if args.json_dumps:
                    value = json.dumps(value)
                file_out.write('\t{}'.format(value))
            file_out.write('\n')
Пример #10
0
def test_manifest_after_compile():
    c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("Leela")
    c.Add("Kif")
    c.Compile()
    c.SetManifest({"author": "Zapp Brannigan"})
    file_name = os.path.join(tempfile.gettempdir(),'brannigan_manifest2.kv')
    try:
        c.WriteToFile(file_name)
        d = keyvi.Dictionary(file_name)
        m = d.GetManifest()
        assert m['author'] == "Zapp Brannigan"
        del d
    finally:
        os.remove(file_name)
Пример #11
0
import keyvi

MULTIWORD_QUERY_SEPARATOR = '\x1b'

query = ""

d = keyvi.Dictionary("mw-completion.keyvi")
c = keyvi.MultiWordCompletion(d)


def get_lookup_key(query):
    l = query.split(" ")
    l_bow = " ".join(sorted(l[:-1]) + l[-1:])

    return l_bow


while query != "exit":
    query = raw_input("Query:")
    for m in c.GetCompletions(get_lookup_key(query.strip())):
        print "{} {}".format(m.GetMatchedString(), m.GetAttribute("weight"))
Пример #12
0
import keyvi

query = ""

d = keyvi.Dictionary("cities.keyvi")


def get_lookup_key(query):
    return query


while query != "exit":
    query = raw_input("Query:")
    for m in d.LookupText(get_lookup_key(query.strip())):
        print "{}".format(m.GetMatchedString())
Пример #13
0
import sys
import keyvi

d = keyvi.Dictionary("normalization.keyvi")
n = keyvi.FsaTransform(d)

for line in sys.stdin:
    print n.Normalize(line)
Пример #14
0
def test_non_existing_file():
    assert os.path.exists('non_existing_file') == False
    with pytest.raises(ValueError):
        d = keyvi.Dictionary(os.path.join(tmp_dir, 'non_existing_file'))
Пример #15
0
def stats(input_file):
    print (json.dumps(keyvi.Dictionary(input_file).GetStatistics(), indent=4, sort_keys=True))
import keyvi

query = ""

d = keyvi.Dictionary("prefix-completion.keyvi")
c = keyvi.PrefixCompletion(d)


def get_lookup_key(query):
    return query


while query != "exit":
    query = raw_input("Query:")
    for m in c.GetCompletions(get_lookup_key(query.strip())):
        print "{} ({})".format(m.GetMatchedString(), m.GetAttribute("weight"))
import keyvi

query = ""

d = keyvi.Dictionary("your-own.keyvi")


def get_lookup_key(query):
    return query


while query != "exit":
    query = raw_input("Query:")
    for m in d.Get(get_lookup_key(query.strip())):
        print "{} {}".format(m.GetMatchedString(), m.GetValueAsString())