Exemplo n.º 1
0
    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, dinfl,
                            inflectionNames, groupList):
        '''
        Create string which contains the inflection groups with inflection rules as mobipocket tags.

        @param mainEntry: The word to inflect.
        @param controlByteCount: The number of control bytes.
        @param tagTable: The tag table.
        @param data: The Inflection data object to properly select the right inflection data section to use
        @param inflectionNames: The inflection rule name data.
        @param groupList: The list of inflection groups to process.
        @return: String with inflection groups and rules or empty string if required tags are not available.
        '''
        result = ""
        for value in groupList:
            offset, nextOffset, data = dinfl.offsets(value)

            # First byte seems to be always 0x00 and must be skipped.
            assert ord(data[offset]) == 0x00
            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1,
                               nextOffset)

            # Make sure that the required tags are available.
            if 0x05 not in tagMap:
                print "Error: Required tag 0x05 not found in tagMap"
                return ""
            if 0x1a not in tagMap:
                print "Error: Required tag 0x1a not found in tagMap"
                return ""

            result += "<idx:infl>"

            for i in range(len(tagMap[0x05])):

                # Get name of inflection rule.
                value = tagMap[0x05][i]
                consumed, textLength = getVariableWidthValue(
                    inflectionNames, value)
                inflectionName = inflectionNames[value + consumed:value +
                                                 consumed + textLength]

                # Get and apply inflection rule across possibly multiple inflection data sections
                value = tagMap[0x1a][i]
                rvalue, start, count, data = dinfl.lookup(value)
                offset, = struct.unpack_from('>H', data,
                                             start + 4 + (2 * rvalue))
                textLength = ord(data[offset])
                inflection = self.applyInflectionRule(mainEntry, data,
                                                      offset + 1,
                                                      offset + 1 + textLength)
                if inflection is not None:
                    result += '  <idx:iform name="%s" value="%s"/>' % (
                        inflectionName, inflection)

            result += "</idx:infl>"
        return result
Exemplo n.º 2
0
    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, data, inflectionNames, groupList):
        '''
        Create string which contains the inflection groups with inflection rules as mobipocket tags.

        @param mainEntry: The word to inflect.
        @param controlByteCount: The number of control bytes.
        @param tagTable: The tag table.
        @param data: The inflection index data.
        @param inflectionNames: The inflection rule name data.
        @param groupList: The list of inflection groups to process.
        @return: String with inflection groups and rules or empty string if required tags are not available.
        '''
        result = ""
        idxtPos, = struct.unpack_from('>L', data, 0x14)
        entryCount, = struct.unpack_from('>L', data, 0x18)
        for value in groupList:
            offset, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * value))
            if value + 1 < entryCount:
                nextOffset, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * (value + 1)))
            else:
                nextOffset = None

            # First byte seems to be always 0x00 and must be skipped.
            assert ord(data[offset]) == 0x00
            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1, nextOffset)

            # Make sure that the required tags are available.
            if 0x05 not in tagMap:
                print "Error: Required tag 0x05 not found in tagMap"
                return ""
            if 0x1a not in tagMap:
                print "Error: Required tag 0x1a not found in tagMap"
                return ""

            result += "<idx:infl>"

            for i in range(len(tagMap[0x05])):
                # Get name of inflection rule.
                value = tagMap[0x05][i]
                consumed, textLength = getVariableWidthValue(inflectionNames, value)
                inflectionName = inflectionNames[value+consumed:value+consumed+textLength]

                # Get and apply inflection rule.
                value = tagMap[0x1a][i]
                offset, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * value))
                textLength = ord(data[offset])
                inflection = self.applyInflectionRule(mainEntry, data, offset+1, offset+1+textLength)
                if inflection != None:
                    result += '  <idx:iform name="%s" value="%s"/>' % (inflectionName, inflection)

            result += "</idx:infl>"
        return result
Exemplo n.º 3
0
    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, dinfl, inflectionNames, groupList):
        '''
        Create string which contains the inflection groups with inflection rules as mobipocket tags.

        @param mainEntry: The word to inflect.
        @param controlByteCount: The number of control bytes.
        @param tagTable: The tag table.
        @param data: The Inflection data object to properly select the right inflection data section to use
        @param inflectionNames: The inflection rule name data.
        @param groupList: The list of inflection groups to process.
        @return: String with inflection groups and rules or empty string if required tags are not available.
        '''
        result = ""
        for value in groupList:
            offset, nextOffset, data = dinfl.offsets(value)

            # First byte seems to be always 0x00 and must be skipped.
            assert ord(data[offset]) == 0x00
            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1, nextOffset)

            # Make sure that the required tags are available.
            if 0x05 not in tagMap:
                print "Error: Required tag 0x05 not found in tagMap"
                return ""
            if 0x1a not in tagMap:
                print "Error: Required tag 0x1a not found in tagMap"
                return ""

            result += "<idx:infl>"

            for i in range(len(tagMap[0x05])):

                # Get name of inflection rule.
                value = tagMap[0x05][i]
                consumed, textLength = getVariableWidthValue(inflectionNames, value)
                inflectionName = inflectionNames[value+consumed:value+consumed+textLength]

                # Get and apply inflection rule across possibly multiple inflection data sections
                value = tagMap[0x1a][i]
                rvalue, start, count, data = dinfl.lookup(value)
                offset, = struct.unpack_from('>H', data, start + 4 + (2 * rvalue))
                textLength = ord(data[offset])
                inflection = self.applyInflectionRule(mainEntry, data, offset+1, offset+1+textLength)
                if inflection is not None:
                    result += '  <idx:iform name="%s" value="%s"/>' % (inflectionName, inflection)

            result += "</idx:infl>"
        return result
Exemplo n.º 4
0
    def getPositionMap(self):
        header = self.header
        sect = self.sect

        positionMap = {}

        metaOrthIndex = self.metaOrthIndex
        metaInflIndex = self.metaInflIndex

        decodeInflection = True
        if metaOrthIndex != 0xFFFFFFFF:
            print "Info: Document contains orthographic index, handle as dictionary"
            if metaInflIndex == 0xFFFFFFFF:
                decodeInflection = False
            else:
                metaInflIndexData = sect.loadSection(metaInflIndex)

                print "\nParsing metaInflIndexData"
                midxhdr, mhordt1, mhordt2 = self.parseHeader(metaInflIndexData)

                metaIndexCount = midxhdr['count']
                idatas = []
                for j in range(metaIndexCount):
                    idatas.append(sect.loadSection(metaInflIndex + 1 + j))
                dinfl = InflectionData(idatas)

                inflNameData = sect.loadSection(metaInflIndex + 1 +
                                                metaIndexCount)
                tagSectionStart = midxhdr['len']
                inflectionControlByteCount, inflectionTagTable = readTagSection(
                    tagSectionStart, metaInflIndexData)
                if DEBUG_DICT:
                    print "inflectionTagTable: %s" % inflectionTagTable
                if self.hasTag(inflectionTagTable, 0x07):
                    print "Error: Dictionary uses obsolete inflection rule scheme which is not yet supported"
                    decodeInflection = False

            data = sect.loadSection(metaOrthIndex)

            print "\nParsing metaOrthIndex"
            idxhdr, hordt1, hordt2 = self.parseHeader(data)

            tagSectionStart = idxhdr['len']
            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
            orthIndexCount = idxhdr['count']
            print "orthIndexCount is", orthIndexCount
            if DEBUG_DICT:
                print "orthTagTable: %s" % tagTable
            if hordt2 is not None:
                print "orth entry uses ordt2 lookup table of type ", idxhdr[
                    'otype']
            hasEntryLength = self.hasTag(tagTable, 0x02)
            if not hasEntryLength:
                print "Info: Index doesn't contain entry length tags"

            print "Read dictionary index data"
            for i in range(metaOrthIndex + 1,
                           metaOrthIndex + 1 + orthIndexCount):
                data = sect.loadSection(i)
                hdrinfo, ordt1, ordt2 = self.parseHeader(data)
                idxtPos = hdrinfo['start']
                entryCount = hdrinfo['count']
                idxPositions = []
                for j in range(entryCount):
                    pos, = struct.unpack_from('>H', data,
                                              idxtPos + 4 + (2 * j))
                    idxPositions.append(pos)
                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
                idxPositions.append(idxtPos)
                for j in range(entryCount):
                    startPos = idxPositions[j]
                    endPos = idxPositions[j + 1]
                    textLength = ord(data[startPos])
                    text = data[startPos + 1:startPos + 1 + textLength]
                    if hordt2 is not None:
                        utext = u""
                        if idxhdr['otype'] == 0:
                            pattern = '>H'
                            inc = 2
                        else:
                            pattern = '>B'
                            inc = 1
                        pos = 0
                        while pos < textLength:
                            off, = struct.unpack_from(pattern, text, pos)
                            if off < len(hordt2):
                                utext += unichr(hordt2[off])
                            else:
                                utext += unichr(off)
                            pos += inc
                        text = utext.encode('utf-8')

                    tagMap = getTagMap(controlByteCount, tagTable, data,
                                       startPos + 1 + textLength, endPos)
                    if 0x01 in tagMap:
                        if decodeInflection and 0x2a in tagMap:
                            inflectionGroups = self.getInflectionGroups(
                                text, inflectionControlByteCount,
                                inflectionTagTable, dinfl, inflNameData,
                                tagMap[0x2a])
                        else:
                            inflectionGroups = ""
                        assert len(tagMap[0x01]) == 1
                        entryStartPosition = tagMap[0x01][0]
                        if hasEntryLength:
                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
                            ml = '<idx:entry scriptable="yes"><idx:orth value="%s">%s</idx:orth>' % (
                                text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[
                                    entryStartPosition] + ml
                            else:
                                positionMap[entryStartPosition] = ml
                            assert len(tagMap[0x02]) == 1
                            entryEndPosition = entryStartPosition + tagMap[
                                0x02][0]
                            if entryEndPosition in positionMap:
                                positionMap[
                                    entryEndPosition] = "</idx:entry>" + positionMap[
                                        entryEndPosition]
                            else:
                                positionMap[entryEndPosition] = "</idx:entry>"

                        else:
                            indexTags = '<idx:entry>\n<idx:orth value="%s">\n%s</idx:entry>\n' % (
                                text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[
                                    entryStartPosition] + indexTags
                            else:
                                positionMap[entryStartPosition] = indexTags
        return positionMap
Exemplo n.º 5
0
    def getPositionMap (self):
        header = self.header
        sect = self.sect

        positionMap = {}

        metaOrthIndex = self.metaOrthIndex
        metaInflIndex = self.metaInflIndex

        decodeInflection = True
        if metaOrthIndex != 0xFFFFFFFF:
            print "Info: Document contains orthographic index, handle as dictionary"
            if metaInflIndex == 0xFFFFFFFF:
                decodeInflection = False
            else:
                metaInflIndexData = sect.loadSection(metaInflIndex)

                print "\nParsing metaInflIndexData"
                midxhdr, mhordt1, mhordt2 = self.parseHeader(metaInflIndexData)

                metaIndexCount = midxhdr['count']
                idatas = []
                for j in range(metaIndexCount):
                    idatas.append(sect.loadSection(metaInflIndex + 1 + j))
                dinfl = InflectionData(idatas)
                
                inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount)
                tagSectionStart = midxhdr['len']
                inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData)
                if DEBUG_DICT:
                    print "inflectionTagTable: %s" % inflectionTagTable
                if self.hasTag(inflectionTagTable, 0x07):
                    print "Error: Dictionary uses obsolete inflection rule scheme which is not yet supported"
                    decodeInflection = False

            data = sect.loadSection(metaOrthIndex)

            print "\nParsing metaOrthIndex"
            idxhdr, hordt1, hordt2 = self.parseHeader(data)

            tagSectionStart = idxhdr['len']
            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
            orthIndexCount = idxhdr['count']
            print "orthIndexCount is", orthIndexCount
            if DEBUG_DICT:
                print "orthTagTable: %s" % tagTable
            if hordt2 is not None:
                print "orth entry uses ordt2 lookup table of type ", idxhdr['otype']
            hasEntryLength = self.hasTag(tagTable, 0x02)
            if not hasEntryLength:
                print "Info: Index doesn't contain entry length tags"

            print "Read dictionary index data"
            for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount):
                data = sect.loadSection(i)
                hdrinfo, ordt1, ordt2 = self.parseHeader(data)
                idxtPos = hdrinfo['start']
                entryCount = hdrinfo['count']
                idxPositions = []
                for j in range(entryCount):
                    pos, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * j))
                    idxPositions.append(pos)
                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
                idxPositions.append(idxtPos)
                for j in range(entryCount):
                    startPos = idxPositions[j]
                    endPos = idxPositions[j+1]
                    textLength = ord(data[startPos])
                    text = data[startPos+1:startPos+1+textLength]
                    if hordt2 is not None:
                        utext = u""
                        if idxhdr['otype'] == 0:
                            pattern = '>H'
                            inc = 2
                        else:
                            pattern = '>B'
                            inc = 1
                        pos = 0
                        while pos < textLength:
                            off, = struct.unpack_from(pattern, text, pos)
                            if off < len(hordt2):
                                utext += unichr(hordt2[off])
                            else:
                                utext += unichr(off)
                            pos += inc
                        text = utext.encode('utf-8')

                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
                    if 0x01 in tagMap:
                        if decodeInflection and 0x2a in tagMap:
                            inflectionGroups = self.getInflectionGroups(text, inflectionControlByteCount, inflectionTagTable, 
                                                                        dinfl, inflNameData, tagMap[0x2a])
                        else:
                            inflectionGroups = ""
                        assert len(tagMap[0x01]) == 1
                        entryStartPosition = tagMap[0x01][0]
                        if hasEntryLength:
                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
                            ml = '<idx:entry scriptable="yes"><idx:orth value="%s">%s</idx:orth>' % (text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + ml
                            else:
                                positionMap[entryStartPosition] = ml
                            assert len(tagMap[0x02]) == 1
                            entryEndPosition = entryStartPosition + tagMap[0x02][0]
                            if entryEndPosition in positionMap:
                                positionMap[entryEndPosition] = "</idx:entry>" + positionMap[entryEndPosition]
                            else:
                                positionMap[entryEndPosition] = "</idx:entry>"

                        else:
                            indexTags = '<idx:entry>\n<idx:orth value="%s">\n%s</idx:entry>\n' % (text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + indexTags
                            else:
                                positionMap[entryStartPosition] = indexTags
        return positionMap
Exemplo n.º 6
0
    def getPositionMap(self):
        header = self.header
        sect = self.sect

        positionMap = {}

        metaOrthIndex = self.metaOrthIndex
        metaInflIndex = self.metaInflIndex

        decodeInflection = True
        if metaOrthIndex != 0xFFFFFFFF:
            print "Info: Document contains orthographic index, handle as dictionary"
            if metaInflIndex == 0xFFFFFFFF:
                decodeInflection = False
            else:
                metaInflIndexData = sect.loadSection(metaInflIndex)
                metaIndexCount, = struct.unpack_from('>L', metaInflIndexData,
                                                     0x18)
                if metaIndexCount != 1:
                    print "Error: Dictionary contains multiple inflection index sections, which is not yet supported"
                    decodeInflection = False
                inflIndexData = sect.loadSection(metaInflIndex + 1)
                inflNameData = sect.loadSection(metaInflIndex + 1 +
                                                metaIndexCount)
                tagSectionStart, = struct.unpack_from('>L', metaInflIndexData,
                                                      0x04)
                inflectionControlByteCount, inflectionTagTable = readTagSection(
                    tagSectionStart, metaInflIndexData)
                if DEBUG_DICT:
                    print "inflectionTagTable: %s" % inflectionTagTable
                if self.hasTag(inflectionTagTable, 0x07):
                    print "Error: Dictionary uses obsolete inflection rule scheme which is not yet supported"
                    decodeInflection = False

            data = sect.loadSection(metaOrthIndex)
            tagSectionStart, = struct.unpack_from('>L', data, 0x04)
            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
            orthIndexCount, = struct.unpack_from('>L', data, 0x18)
            print "orthIndexCount is", orthIndexCount
            if DEBUG_DICT:
                print "orthTagTable: %s" % tagTable
            hasEntryLength = self.hasTag(tagTable, 0x02)
            if not hasEntryLength:
                print "Info: Index doesn't contain entry length tags"

            print "Read dictionary index data"
            for i in range(metaOrthIndex + 1,
                           metaOrthIndex + 1 + orthIndexCount):
                data = sect.loadSection(i)
                idxtPos, = struct.unpack_from('>L', data, 0x14)
                entryCount, = struct.unpack_from('>L', data, 0x18)
                idxPositions = []
                for j in range(entryCount):
                    pos, = struct.unpack_from('>H', data,
                                              idxtPos + 4 + (2 * j))
                    idxPositions.append(pos)
                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
                idxPositions.append(idxtPos)

                for j in range(entryCount):
                    startPos = idxPositions[j]
                    endPos = idxPositions[j + 1]
                    textLength = ord(data[startPos])
                    text = data[startPos + 1:startPos + 1 + textLength]
                    tagMap = getTagMap(controlByteCount, tagTable, data,
                                       startPos + 1 + textLength, endPos)
                    if 0x01 in tagMap:
                        if decodeInflection and 0x2a in tagMap:
                            inflectionGroups = self.getInflectionGroups(
                                text, inflectionControlByteCount,
                                inflectionTagTable, inflIndexData,
                                inflNameData, tagMap[0x2a])
                        else:
                            inflectionGroups = ""
                        assert len(tagMap[0x01]) == 1
                        entryStartPosition = tagMap[0x01][0]
                        if hasEntryLength:
                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
                            ml = '<idx:entry scriptable="yes"><idx:orth value="%s">%s</idx:orth>' % (
                                text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[
                                    entryStartPosition] + ml
                            else:
                                positionMap[entryStartPosition] = ml
                            assert len(tagMap[0x02]) == 1
                            entryEndPosition = entryStartPosition + tagMap[
                                0x02][0]
                            if entryEndPosition in positionMap:
                                positionMap[
                                    entryEndPosition] = "</idx:entry>" + positionMap[
                                        entryEndPosition]
                            else:
                                positionMap[entryEndPosition] = "</idx:entry>"

                        else:
                            indexTags = '<idx:entry>\n<idx:orth value="%s">\n%s</idx:entry>\n' % (
                                text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[
                                    entryStartPosition] + indexTags
                            else:
                                positionMap[entryStartPosition] = indexTags
        return positionMap
Exemplo n.º 7
0
    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, data,
                            inflectionNames, groupList):
        '''
        Create string which contains the inflection groups with inflection rules as mobipocket tags.

        @param mainEntry: The word to inflect.
        @param controlByteCount: The number of control bytes.
        @param tagTable: The tag table.
        @param data: The inflection index data.
        @param inflectionNames: The inflection rule name data.
        @param groupList: The list of inflection groups to process.
        @return: String with inflection groups and rules or empty string if required tags are not available.
        '''
        result = ""
        idxtPos, = struct.unpack_from('>L', data, 0x14)
        entryCount, = struct.unpack_from('>L', data, 0x18)
        for value in groupList:
            offset, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * value))
            if value + 1 < entryCount:
                nextOffset, = struct.unpack_from(
                    '>H', data, idxtPos + 4 + (2 * (value + 1)))
            else:
                nextOffset = None

            # First byte seems to be always 0x00 and must be skipped.
            assert ord(data[offset]) == 0x00
            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1,
                               nextOffset)

            # Make sure that the required tags are available.
            if 0x05 not in tagMap:
                print "Error: Required tag 0x05 not found in tagMap"
                return ""
            if 0x1a not in tagMap:
                print "Error: Required tag 0x1a not found in tagMap"
                return ""

            result += "<idx:infl>"

            for i in range(len(tagMap[0x05])):
                # Get name of inflection rule.
                value = tagMap[0x05][i]
                consumed, textLength = getVariableWidthValue(
                    inflectionNames, value)
                inflectionName = inflectionNames[value + consumed:value +
                                                 consumed + textLength]

                # Get and apply inflection rule.
                value = tagMap[0x1a][i]
                offset, = struct.unpack_from('>H', data,
                                             idxtPos + 4 + (2 * value))
                textLength = ord(data[offset])
                inflection = self.applyInflectionRule(mainEntry, data,
                                                      offset + 1,
                                                      offset + 1 + textLength)
                if inflection != None:
                    result += '  <idx:iform name="%s" value="%s"/>' % (
                        inflectionName, inflection)

            result += "</idx:infl>"
        return result
Exemplo n.º 8
0
    def getPositionMap (self):
        header = self.header
        sect = self.sect

        positionMap = {}

        metaOrthIndex = self.metaOrthIndex
        metaInflIndex = self.metaInflIndex

        decodeInflection = True
        if metaOrthIndex != 0xFFFFFFFF:
            print "Info: Document contains orthographic index, handle as dictionary"
            if metaInflIndex == 0xFFFFFFFF:
                decodeInflection = False
            else:
                metaInflIndexData = sect.loadSection(metaInflIndex)
                metaIndexCount, = struct.unpack_from('>L', metaInflIndexData, 0x18)
                if metaIndexCount != 1:
                    print "Error: Dictionary contains multiple inflection index sections, which is not yet supported"
                    decodeInflection = False
                inflIndexData = sect.loadSection(metaInflIndex + 1)
                inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount)
                tagSectionStart, = struct.unpack_from('>L', metaInflIndexData, 0x04)
                inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData)
                if DEBUG_DICT:
                    print "inflectionTagTable: %s" % inflectionTagTable
                if self.hasTag(inflectionTagTable, 0x07):
                    print "Error: Dictionary uses obsolete inflection rule scheme which is not yet supported"
                    decodeInflection = False

            data = sect.loadSection(metaOrthIndex)
            tagSectionStart, = struct.unpack_from('>L', data, 0x04)
            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
            orthIndexCount, = struct.unpack_from('>L', data, 0x18)
            print "orthIndexCount is", orthIndexCount
            if DEBUG_DICT:
                print "orthTagTable: %s" % tagTable
            hasEntryLength = self.hasTag(tagTable, 0x02)
            if not hasEntryLength:
                print "Info: Index doesn't contain entry length tags"

            print "Read dictionary index data"
            for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount):
                data = sect.loadSection(i)
                idxtPos, = struct.unpack_from('>L', data, 0x14)
                entryCount, = struct.unpack_from('>L', data, 0x18)
                idxPositions = []
                for j in range(entryCount):
                    pos, = struct.unpack_from('>H', data, idxtPos + 4 + (2 * j))
                    idxPositions.append(pos)
                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
                idxPositions.append(idxtPos)

                for j in range(entryCount):
                    startPos = idxPositions[j]
                    endPos = idxPositions[j+1]
                    textLength = ord(data[startPos])
                    text = data[startPos+1:startPos+1+textLength]
                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
                    if 0x01 in tagMap:
                        if decodeInflection and 0x2a in tagMap:
                            inflectionGroups = self.getInflectionGroups(text, inflectionControlByteCount, inflectionTagTable, inflIndexData, inflNameData, tagMap[0x2a])
                        else:
                            inflectionGroups = ""
                        assert len(tagMap[0x01]) == 1
                        entryStartPosition = tagMap[0x01][0]
                        if hasEntryLength:
                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
                            ml = '<idx:entry scriptable="yes"><idx:orth value="%s">%s</idx:orth>' % (text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + ml
                            else:
                                positionMap[entryStartPosition] = ml
                            assert len(tagMap[0x02]) == 1
                            entryEndPosition = entryStartPosition + tagMap[0x02][0]
                            if entryEndPosition in positionMap:
                                positionMap[entryEndPosition] = "</idx:entry>" + positionMap[entryEndPosition]
                            else:
                                positionMap[entryEndPosition] = "</idx:entry>"

                        else:
                            indexTags = '<idx:entry>\n<idx:orth value="%s">\n%s</idx:entry>\n' % (text, inflectionGroups)
                            if entryStartPosition in positionMap:
                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + indexTags
                            else:
                                positionMap[entryStartPosition] = indexTags
        return positionMap