示例#1
0
def test_invalid_algo(algo):
    if algo in cmph._ALGOS:
        pytest.skip("Random algo is a known algo !")

    with pytest.raises(ValueError):
        test_data = _words
        with open(test_data) as test_input:
            cmph.generate_hash(test_input, algorithm=algo)
示例#2
0
def test_invalid_hash_fn(hash_fns):
    assume(len(hash_fns) > 1)
    assume(any(fn not in cmph._HASH_FNS for fn in hash_fns))

    with pytest.raises(ValueError):
        test_data = _words
        with open(test_data) as test_input:
            cmph.generate_hash(test_input, hash_fns=hash_fns)
示例#3
0
def test_invalid_hash_fn(hash_fns):
    assume(len(hash_fns) > 1)
    assume(any(fn not in cmph._HASH_FNS for fn in hash_fns))

    with pytest.raises(ValueError):
        test_data = _words
        with open(test_data) as test_input:
            cmph.generate_hash(test_input, hash_fns=hash_fns)
示例#4
0
def test_invalid_algo(algo):
    if algo in cmph._ALGOS:
        pytest.skip("Random algo is a known algo !")

    with pytest.raises(ValueError):
        test_data = _words
        with open(test_data) as test_input:
            cmph.generate_hash(test_input, algorithm=algo)
示例#5
0
def test_each_algo_defaults(tmpdir, algo):
    if algo == 'brz':
        pytest.skip("brz is known to segfault on some machines")

    test_data = _words
    if algo == 'bmz8':
        test_data = _words8

    with open(test_data) as test_input:
        mph = cmph.generate_hash(test_input, algorithm=algo)

    out = tmpdir.ensure('%s.mph' % algo)

    with out.open('w') as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    with open(test_data) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)

    # Check that nothing untoward happens in __del__
    del mph
    del mph2
示例#6
0
def test_each_algo_defaults(tmpdir, algo):
    if algo == "brz":
        pytest.skip("brz is known to segfault on some machines")

    test_data = _words
    if algo == "bmz8":
        test_data = _words8

    with open(test_data) as test_input:
        mph = cmph.generate_hash(test_input, algorithm=algo)

    out = tmpdir.ensure("%s.mph" % algo)

    with out.open("w") as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    with open(test_data) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)

    # Check that nothing untoward happens in __del__
    del mph
    del mph2
示例#7
0
    def _storedata(self, path, keys):
        if os.path.exists(path):
            shutil.rmtree(path)

        os.makedirs(path)
        mph = cmph.generate_hash(keys)
        mph.save(self._mph_path)

        indices = [mph(k) for k in keys]
        _max = max(indices)

        keyindices = dict((k, i) for i, k in enumerate(keys))
        data = dict((zip(indices, keys)))
        d = VarArray(path)

        _data = []

        for i in range(_max + 1):
            k = data.get(i, None)
            v = None if k is None else (keyindices[k], k)
            _data.append(v)

        d.extend(_data)

        d.flush()

        return d, mph
示例#8
0
def test_filename_usage(tmpdir):
    mph = cmph.generate_hash(_words)
    out = tmpdir.ensure('out.mph')
    mph.save(out.strpath)

    mph2 = cmph.load_hash(out.strpath)

    with open(_words) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)
示例#9
0
def test_filename_usage(tmpdir):
    mph = cmph.generate_hash(_words)
    out = tmpdir.ensure("out.mph")
    mph.save(out.strpath)

    mph2 = cmph.load_hash(out.strpath)

    with open(_words) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)
示例#10
0
def test_str_input2(tmpdir):
    data = open(_words).readlines()
    mph = cmph.generate_hash(data)
    out = tmpdir.ensure("out.mph")

    with out.open("w") as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    for word in data:
        assert mph(word) == mph2(word)
示例#11
0
def test_str_input(tmpdir):
    data = 'This is a string list test'.split()
    mph = cmph.generate_hash(data)
    out = tmpdir.ensure('out.mph')

    with out.open('w') as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    for word in data:
        assert mph(word) == mph2(word)
示例#12
0
def test_str_input(tmpdir):
    data = "This is a string list test".split()
    mph = cmph.generate_hash(data)
    out = tmpdir.ensure("out.mph")

    with out.open("w") as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    for word in data:
        assert mph(word) == mph2(word)
示例#13
0
def test_str_input2(tmpdir):
    data = open(_words).readlines()
    mph = cmph.generate_hash(data)
    out = tmpdir.ensure('out.mph')

    with out.open('w') as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    for word in data:
        assert mph(word) == mph2(word)
示例#14
0
def test_simple_usage(tmpdir):
    with open(_words) as test_input:
        mph = cmph.generate_hash(test_input)

    out = tmpdir.ensure('out.mph')

    with out.open('w') as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    with open(_words) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)
示例#15
0
def test_simple_usage(tmpdir):
    with open(_words) as test_input:
        mph = cmph.generate_hash(test_input)

    out = tmpdir.ensure("out.mph")

    with out.open("w") as test_output:
        mph.save(test_output)

    with out.open() as saved_mph:
        mph2 = cmph.load_hash(saved_mph)

    with open(_words) as test_input:
        for word in test_input:
            assert mph(word) == mph2(word)
示例#16
0
def test_unicode_input(unicrud):
    unicrud = list(set(unicrud))
    assume(len(unicrud) > 5)

    # MPH is an entropy game, hence things with low-entropy will
    # confuse the hash algorithms preventing convergence on a
    # solution, making this test fail
    assume(_entropy(unicrud) == -0.0)

    mph = cmph.generate_hash(unicrud)

    # ... break the encapsulation, knowing that we
    # do this under the hood
    test_strs = [convert_to_bytes(s) for s in unicrud]
    for original, escaped in zip(unicrud, test_strs):
        assert mph(escaped) == mph(original)
示例#17
0
def test_unicode_input(unicrud):
    unicrud = list(set(unicrud))
    assume(len(unicrud) > 5)

    # MPH is an entropy game, hence things with low-entropy will
    # confuse the hash algorithms preventing convergence on a
    # solution, making this test fail
    assume(_entropy(unicrud) == -0.0)

    mph = cmph.generate_hash(unicrud)

    # ... break the encapsulation, knowing that we
    # do this under the hood
    test_strs = [convert_to_bytes(s) for s in unicrud]
    for original, escaped in zip(unicrud, test_strs):
        assert mph(escaped) == mph(original)
示例#18
0
    if len(places) < r:
        placeref_t = t
        break

transref_t = None
for (r, t) in range2type:
    if len(transs) < r:
        transref_t = t
        break

if placeref_t is None or transref_t is None:
    sys.stderr.write(
        'FATAL: Unable to fit places and transitions into 64 bit integers\n')
    sys.exit(1)

place_mph = cmph.generate_hash(places, algorithm='bdz')
trans_mph = cmph.generate_hash(transs, algorithm='bdz')

place_mph.save(pidx_fn)
trans_mph.save(tidx_fn)

place_idx = {}
trans_idx = {}
place_list = [''] * place_num
trans_list = [''] * trans_num

for p in places:
    place_idx[p] = place_mph.lookup(p)
    place_list[place_idx[p]] = p
for t in transs:
    trans_idx[t] = trans_mph.lookup(t)