示例#1
0
def test_encode_decode_different_levels(encoder_dict):

    test = BitEncoder(encoder_dict, SNGram)
    pattern = SNGram.from_element_list([
        PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ])

    assert test.decode(test.encode(pattern)) == pattern
示例#2
0
def test_encode_unknown_not_set_bitencoder():

    test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    with pytest.raises(EncodeError):
        test.encode(pattern)
示例#3
0
def test_bit_size_8():

    test = BitEncoder({
        'form': set(['a', 'b']),
        'function': set(['a'])
    }, SNGram)
    assert test.element_size == 4
示例#4
0
def test_bit_size_7_unknown():

    test = BitEncoder({
        'form': set(['a']),
        'function': set(['a'])
    }, SNGram, '__unknown__')
    assert test.element_size == 4
示例#5
0
def test_encode_decode_unknown_bitencoder():

    unknown_token = "__unknown__"

    test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram,
                      unknown_token)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    expected_pattern_list = pattern_list
    expected_pattern_list[2] = PatternElement(unknown_token, 'form')
    expected_pattern = SNGram.from_element_list(expected_pattern_list)

    assert test.decode(test.encode(pattern)) == expected_pattern
示例#6
0
}])
def test_huffman_encode_unknown_item(freq_dict):

    test = HuffmanEncoder(freq_dict, SNGram)

    element = PatternElement('unknown', 'form')

    with pytest.raises(EncodeError):
        test.encode_item(element)


### encoders to test
encoder = [
    BitEncoder({'form': {
        'fox': 0,
        'The': 2,
        'quick': 1,
        'brown': 3
    }}, SNGram),
    BitEncoder({'form': set(['fox', 'The', 'quick', 'brown'])}, SNGram),
    HuffmanEncoder({'form': {
        'fox': 5,
        'The': 10,
        'quick': 3,
        'brown': 8
    }}, SNGram),
    Base64Encoder(
        HuffmanEncoder({'form': {
            'fox': 5,
            'The': 10,
            'quick': 3,
            'brown': 8