Python HuffmanCodec примеры использования

Язык программирования: Python

Пространство имен/Пакет: spacy.serialize.huffman

Класс/Тип: HuffmanCodec

Примеров на hotexamples.com: 12

Python HuffmanCodec - 12 примеров найдено. Это лучшие примеры Python кода для spacy.serialize.huffman.HuffmanCodec, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

HuffmanCodec(9)

decode(4)

encode(4)

Пример #1

Показать файл

def test_round_trip():
    freqs = {
        'the': 10,
        'quick': 3,
        'brown': 4,
        'fox': 1,
        'jumped': 5,
        'over': 8,
        'lazy': 1,
        'dog': 2,
        '.': 9
    }
    codec = HuffmanCodec(freqs.items())

    message = [
        'the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'the', 'lazy',
        'dog', '.'
    ]
    strings = list(codec.strings)
    codes = dict([(codec.leaves[i], strings[i])
                  for i in range(len(codec.leaves))])
    bits = codec.encode(message)
    string = ''.join('{0:b}'.format(c).rjust(8, '0')[::-1]
                     for c in bits.as_bytes())
    for word in message:
        code = codes[word]
        assert string[:len(code)] == code
        string = string[len(code):]
    unpacked = [0] * len(message)
    bits.seek(0)
    codec.decode(bits, unpacked)
    assert message == unpacked

Пример #2

Показать файл

def test_vocab(EN):
    codec = HuffmanCodec([(w.orth, numpy.exp(w.prob)) for w in EN.vocab])
    expected_length = 0
    for i, code in enumerate(codec.strings):
        leaf = codec.leaves[i]
        expected_length += len(code) * numpy.exp(EN.vocab[leaf].prob)
    assert 8 < expected_length < 15

Пример #3

Показать файл

Файл: test_codecs.py Проект: Arttii/spaCy

def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)

Пример #4

Показать файл

Файл: test_codecs.py Проект: Arttii/spaCy

def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)

Пример #5

Показать файл

Файл: test_huffman.py Проект: adamhadani/spaCy

def test_round_trip():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
    codec = HuffmanCodec(freqs.items())

    message = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the',
                'the', 'lazy', 'dog', '.']
    strings = list(codec.strings)
    codes = dict([(codec.leaves[i], strings[i]) for i in range(len(codec.leaves))])
    bits = codec.encode(message)
    string = ''.join('{0:b}'.format(c).rjust(8, '0')[::-1] for c in bits.as_bytes())
    for word in message:
        code = codes[word]
        assert string[:len(code)] == code
        string = string[len(code):]
    unpacked = [0] * len(message)
    bits.seek(0)
    codec.decode(bits, unpacked)
    assert message == unpacked

Пример #6

Показать файл

Файл: test_codecs.py Проект: underrun/spaCy

def test_vocab_codec():
    def get_lex_props(string, prob):
        return {
            'flags': 0,
            'length': len(string),
            'orth': string,
            'lower': string, 
            'norm': string,
            'shape': string,
            'prefix': string[0],
            'suffix': string[-3:],
            'cluster': 0,
            'prob': prob,
            'sentiment': 0
        }

    vocab = Vocab()
    vocab['dog'] = get_lex_props('dog', 0.001)
    vocab['the'] = get_lex_props('the', 0.05)
    vocab['jumped'] = get_lex_props('jumped', 0.005)

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)

Пример #7

Показать файл

def test_freqs():
    freqs = []
    words = []
    for i, line in enumerate(open('freqs.txt')):
        pieces = line.strip().split()
        if len(pieces) != 2:
           continue
        freq, word = pieces
        freqs.append(int(freq))
        words.append(word)
    total = float(sum(freqs))
    codec = HuffmanCodec(zip(words, freqs))
    expected_length = 0
    for i, code in enumerate(codec.strings):
        expected_length += len(code) * (freqs[i] / total)
    assert 8 < expected_length < 14

Пример #8

Показать файл

def test1():
    probs = numpy.zeros(shape=(10,), dtype=numpy.float32)
    probs[0] = 0.3
    probs[1] = 0.2
    probs[2] = 0.15
    probs[3] = 0.1
    probs[4] = 0.06
    probs[5] = 0.02
    probs[6] = 0.01
    probs[7] = 0.005
    probs[8] = 0.0001
    probs[9] = 0.000001
    
    codec = HuffmanCodec(list(enumerate(probs)))
    
    py_codes = py_encode(dict(enumerate(probs)))
    py_codes = list(py_codes.items())
    py_codes.sort()
    assert codec.strings == [c for i, c in py_codes]

Пример #9

Показать файл

def test_rosetta():
    txt = u"this is an example for huffman encoding"
    symb2freq = defaultdict(int)
    for ch in txt:
        symb2freq[ch] += 1
    by_freq = list(symb2freq.items())
    by_freq.sort(reverse=True, key=lambda item: item[1])
    symbols = [sym for sym, prob in by_freq]

    codec = HuffmanCodec(symb2freq.items())
    py_codec = py_encode(symb2freq)

    codes = dict([(codec.leaves[i], codec.strings[i]) for i in range(len(codec.leaves))])

    my_lengths = defaultdict(int)
    py_lengths = defaultdict(int)
    for symb, freq in symb2freq.items():
        my = codes[symb]
        my_lengths[len(my)] += freq
        py_lengths[len(py_codec[symb])] += freq
    my_exp_len = sum(length * weight for length, weight in my_lengths.items())
    py_exp_len = sum(length * weight for length, weight in py_lengths.items())
    assert my_exp_len == py_exp_len

Пример #10

Показать файл

Файл: test_codecs.py Проект: michigan-com/spaCy

def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()

    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)

Пример #11

Показать файл

Файл: test_codecs.py Проект: underrun/spaCy

def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)

Пример #12

Показать файл

def test_empty():
    codec = HuffmanCodec({})
    assert codec.strings == []