def test_encode_decode_different_levels(encoder_dict): test = BitEncoder(encoder_dict, SNGram) pattern = SNGram.from_element_list([ PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ]) assert test.decode(test.encode(pattern)) == pattern
def test_encode_unknown_not_set_bitencoder(): test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) with pytest.raises(EncodeError): test.encode(pattern)
def test_bit_size_8(): test = BitEncoder({ 'form': set(['a', 'b']), 'function': set(['a']) }, SNGram) assert test.element_size == 4
def test_bit_size_7_unknown(): test = BitEncoder({ 'form': set(['a']), 'function': set(['a']) }, SNGram, '__unknown__') assert test.element_size == 4
def test_encode_decode_unknown_bitencoder(): unknown_token = "__unknown__" test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram, unknown_token) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) expected_pattern_list = pattern_list expected_pattern_list[2] = PatternElement(unknown_token, 'form') expected_pattern = SNGram.from_element_list(expected_pattern_list) assert test.decode(test.encode(pattern)) == expected_pattern
}]) def test_huffman_encode_unknown_item(freq_dict): test = HuffmanEncoder(freq_dict, SNGram) element = PatternElement('unknown', 'form') with pytest.raises(EncodeError): test.encode_item(element) ### encoders to test encoder = [ BitEncoder({'form': { 'fox': 0, 'The': 2, 'quick': 1, 'brown': 3 }}, SNGram), BitEncoder({'form': set(['fox', 'The', 'quick', 'brown'])}, SNGram), HuffmanEncoder({'form': { 'fox': 5, 'The': 10, 'quick': 3, 'brown': 8 }}, SNGram), Base64Encoder( HuffmanEncoder({'form': { 'fox': 5, 'The': 10, 'quick': 3, 'brown': 8