Exemplo n.º 1
0
def parse_index_record(table,
                       data,
                       control_byte_count,
                       tags,
                       codec,
                       ordt_map,
                       strict=False):
    header = parse_indx_header(data)
    idxt_pos = header['start']
    if data[idxt_pos:idxt_pos + 4] != b'IDXT':
        print('WARNING: Invalid INDX record')
    entry_count = header['count']

    # loop through to build up the IDXT position starts
    idx_positions = []
    for j in range(entry_count):
        pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j))
        idx_positions.append(pos)
    # The last entry ends before the IDXT tag (but there might be zero fill
    # bytes we need to ignore!)
    idx_positions.append(idxt_pos)

    # For each entry in the IDXT build up the tag map and any associated
    # text
    for j in range(entry_count):
        start, end = idx_positions[j:j + 2]
        rec = data[start:end]
        # Sometimes (in the guide table if the type attribute has non ascii
        # values) the ident is UTF-16 encoded. Try to handle that.
        try:
            ident, consumed = decode_string(rec,
                                            codec=codec,
                                            ordt_map=ordt_map)
        except UnicodeDecodeError:
            ident, consumed = decode_string(rec,
                                            codec='utf-16',
                                            ordt_map=ordt_map)
        if '\x00' in ident:
            try:
                ident, consumed = decode_string(rec,
                                                codec='utf-16',
                                                ordt_map=ordt_map)
            except UnicodeDecodeError:
                ident = ident.replace('u\x00', '')
        rec = rec[consumed:]
        tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
        table[ident] = tag_map
    return header
Exemplo n.º 2
0
def parse_index_record(table, data, control_byte_count, tags, codec,
        ordt_map, strict=False):
    header = parse_indx_header(data)
    idxt_pos = header['start']
    if data[idxt_pos:idxt_pos+4] != b'IDXT':
        print('WARNING: Invalid INDX record')
    entry_count = header['count']

    # loop through to build up the IDXT position starts
    idx_positions= []
    for j in range(entry_count):
        pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j))
        idx_positions.append(pos)
    # The last entry ends before the IDXT tag (but there might be zero fill
    # bytes we need to ignore!)
    idx_positions.append(idxt_pos)

    # For each entry in the IDXT build up the tag map and any associated
    # text
    for j in range(entry_count):
        start, end = idx_positions[j:j+2]
        rec = data[start:end]
        # Sometimes (in the guide table if the type attribute has non ascii
        # values) the ident is UTF-16 encoded. Try to handle that.
        try:
            ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
        except UnicodeDecodeError:
            ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map)
        if u'\x00' in ident:
            try:
                ident, consumed = decode_string(rec, codec='utf-16',
                        ordt_map=ordt_map)
            except UnicodeDecodeError:
                ident = ident.replace('u\x00', u'')
        rec = rec[consumed:]
        tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
        table[ident] = tag_map
    return header