Пример #1
0
 def __init__(self, bstr):
     Decoder.__init__(self, bstr)
     self.prefix = self.bstr.read_unicode(self.bstr.read_int())
     self._article_symbols = self.bstr.read_symbols()
     self._heading_symbols = self.bstr.read_symbols()
     self._ltArticles = LenTable(self.bstr)
     self._ltHeadings = LenTable(self.bstr)
     self._ltPrefixLengths = LenTable(self.bstr)
     self._ltPostfixLengths = LenTable(self.bstr)
     self._huffman1Number = self.bstr.read_bits(32)
     self._huffman2Number = self.bstr.read_bits(32)
     return
Пример #2
0
    def read(self):
        self.prefix = self.read_xored_prefix(self.bstr.read_int())
        self._article_symbols = self.read_xored_symbols()
        self._heading_symbols = self.read_xored_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return
Пример #3
0
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return
Пример #4
0
    def read(self):
        prefix_len = self.bstr.read_int()
        self.prefix = self.bstr.read_unicode(prefix_len)
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return
Пример #5
0
class SystemDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert (sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res

    def decode_article(self, size):
        res = ""
        while len(res) < size:
            sym_idx = self._ltArticles.decode()
            sym = self._article_symbols[sym_idx]
            if sym <= 0x80:
                if sym <= 0x3F:
                    start_pref_idx = self.bstr.read_bits(
                        tools.bit_length(len(self.prefix)))
                    s = sym + 3
                    res += self.prefix[start_pref_idx:start_pref_idx + s]
                else:
                    start_idx = self.bstr.read_bits(tools.bit_length(size))
                    s = sym - 0x3d
                    res += res[start_idx:start_idx + s]
            else:
                res += unichr(sym - 0x80)
        return res
Пример #6
0
    def read(self):
        # self.bstr = XoredBitStream(self.bstr)
        # self.decode()

        prefix_len = self.bstr.read_some(4)
        self.prefix = self.bstr.read_unicode(prefix_len)
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return
Пример #7
0
class SystemDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert(sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res

    def decode_article(self, size):
        res = ""
        while len(res) < size:
            sym_idx = self._ltArticles.decode()
            sym = self._article_symbols[sym_idx]
            if sym <= 0x80:
                if sym <= 0x3F:
                    start_pref_idx = self.bstr.read_bits(tools.bit_length(len(self.prefix)))
                    s = sym + 3
                    res += self.prefix[start_pref_idx:start_pref_idx + s]
                else:
                    start_idx = self.bstr.read_bits(tools.bit_length(size))
                    s = sym - 0x3d
                    res += res[start_idx:start_idx + s]
            else:
                res += unichr(sym - 0x80)
        return res
Пример #8
0
    def read(self):
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return
Пример #9
0
    def read(self):
        # self.bstr = XoredBitStream(self.bstr)
        # self.decode()

        prefix_len = self.bstr.read_some(4)
        self.prefix = self.bstr.read_unicode(prefix_len)
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return
Пример #10
0
class SystemDictionaryDecoder15(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        return

    def read(self):
        # self.bstr = XoredBitStream(self.bstr)
        # self.decode()

        prefix_len = self.bstr.read_some(4)
        self.prefix = self.bstr.read_unicode(prefix_len)
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return

    def decode_article(self, size):
        res = ""
        while len(res) < size:
            sym_idx = self._ltArticles.decode()
            sym = self._article_symbols[sym_idx]
            if sym <= 0x80:
                if sym <= 0x3F:
                    start_pref_idx = self.bstr.read_bits(
                        tools.bit_length(len(self.prefix)))
                    s = sym + 3
                    res += self.prefix[start_pref_idx:start_pref_idx + s]
                else:
                    start_idx = self.bstr.read_bits(tools.bit_length(size))
                    s = sym - 0x3d
                    res += res[start_idx:start_idx + s]
            else:
                res += unichr(sym - 0x80)
        return res
Пример #11
0
class SystemDictionaryDecoder15(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        return

    def read(self):
        # self.bstr = XoredBitStream(self.bstr)
        # self.decode()

        prefix_len = self.bstr.read_some(4)
        self.prefix = self.bstr.read_unicode(prefix_len)
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPostfixLengths = LenTable(self.bstr)
        self._dummy = self.bstr.read_bits(32)
        self._ltPrefixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        self._readed = True
        return

    def decode_article(self, size):
        res = ""
        while len(res) < size:
            sym_idx = self._ltArticles.decode()
            sym = self._article_symbols[sym_idx]
            if sym <= 0x80:
                if sym <= 0x3F:
                    start_pref_idx = self.bstr.read_bits(tools.bit_length(len(self.prefix)))
                    s = sym + 3
                    res += self.prefix[start_pref_idx:start_pref_idx + s]
                else:
                    start_idx = self.bstr.read_bits(tools.bit_length(size))
                    s = sym - 0x3d
                    res += res[start_idx:start_idx + s]
            else:
                res += int2unichr(sym - 0x80)
        return res
Пример #12
0
class UserDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)
        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)
        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert(sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res
Пример #13
0
class UserDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.bstr.read_unicode(self.bstr.read_int())
        self._article_symbols = self.bstr.read_symbols()
        self._heading_symbols = self.bstr.read_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)
        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)
        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert (sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res
Пример #14
0
class AbbreviationDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.read_xored_prefix(self.bstr.read_int())
        self._article_symbols = self.read_xored_symbols()
        self._heading_symbols = self.read_xored_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def read_xored_symbols(self):
        size = self.bstr.read_bits(32)
        bits_per_symbol = self.bstr.read_bits(8)
        res = []
        for i in range(size):
            res.append(self.bstr.read_bits(bits_per_symbol) ^ 0x1325)
        return res

    def read_xored_prefix(self, size):
        res = ""
        for i in range(size):
            res += unichr(self.bstr.read_bits(16) ^ 0x879A)
        return res

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert(sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res
Пример #15
0
class AbbreviationDictionaryDecoder(Decoder):
    def __init__(self, bstr):
        Decoder.__init__(self, bstr)
        self.prefix = self.read_xored_prefix(self.bstr.read_int())
        self._article_symbols = self.read_xored_symbols()
        self._heading_symbols = self.read_xored_symbols()
        self._ltArticles = LenTable(self.bstr)
        self._ltHeadings = LenTable(self.bstr)

        self._ltPrefixLengths = LenTable(self.bstr)
        self._ltPostfixLengths = LenTable(self.bstr)

        self._huffman1Number = self.bstr.read_bits(32)
        self._huffman2Number = self.bstr.read_bits(32)
        return

    def read_xored_symbols(self):
        size = self.bstr.read_bits(32)
        bits_per_symbol = self.bstr.read_bits(8)
        res = []
        for i in range(size):
            res.append(self.bstr.read_bits(bits_per_symbol) ^ 0x1325)
        return res

    def read_xored_prefix(self, size):
        res = ""
        for i in range(size):
            res += unichr(self.bstr.read_bits(16) ^ 0x879A)
        return res

    def decode_heading(self, size):
        res = ""
        for i in range(size):
            sym_idx = self._ltHeadings.decode()
            sym = self._heading_symbols[sym_idx]
            assert (sym <= 0xffff)  # LingvoEngine:2EAB84E8
            res += unichr(sym)
        return res