Пример #1
0
def test_round_trip():
    freqs = {
        'the': 10,
        'quick': 3,
        'brown': 4,
        'fox': 1,
        'jumped': 5,
        'over': 8,
        'lazy': 1,
        'dog': 2,
        '.': 9
    }
    codec = HuffmanCodec(freqs.items())

    message = [
        'the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'the', 'lazy',
        'dog', '.'
    ]
    strings = list(codec.strings)
    codes = dict([(codec.leaves[i], strings[i])
                  for i in range(len(codec.leaves))])
    bits = codec.encode(message)
    string = ''.join('{0:b}'.format(c).rjust(8, '0')[::-1]
                     for c in bits.as_bytes())
    for word in message:
        code = codes[word]
        assert string[:len(code)] == code
        string = string[len(code):]
    unpacked = [0] * len(message)
    bits.seek(0)
    codec.decode(bits, unpacked)
    assert message == unpacked
Пример #2
0
def test_vocab_codec():
    def get_lex_props(string, prob):
        return {
            'flags': 0,
            'length': len(string),
            'orth': string,
            'lower': string, 
            'norm': string,
            'shape': string,
            'prefix': string[0],
            'suffix': string[-3:],
            'cluster': 0,
            'prob': prob,
            'sentiment': 0
        }

    vocab = Vocab()
    vocab['dog'] = get_lex_props('dog', 0.001)
    vocab['the'] = get_lex_props('the', 0.05)
    vocab['jumped'] = get_lex_props('jumped', 0.005)

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Пример #3
0
def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Пример #4
0
def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Пример #5
0
def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()

    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Пример #6
0
def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Пример #7
0
def test_round_trip():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
    codec = HuffmanCodec(freqs.items())

    message = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the',
                'the', 'lazy', 'dog', '.']
    strings = list(codec.strings)
    codes = dict([(codec.leaves[i], strings[i]) for i in range(len(codec.leaves))])
    bits = codec.encode(message)
    string = ''.join('{0:b}'.format(c).rjust(8, '0')[::-1] for c in bits.as_bytes())
    for word in message:
        code = codes[word]
        assert string[:len(code)] == code
        string = string[len(code):]
    unpacked = [0] * len(message)
    bits.seek(0)
    codec.decode(bits, unpacked)
    assert message == unpacked