Пример #1
0
def read_index(file, index_offset):
    # Read the index header
    file.seek(index_offset)
    index_header_data = file.read(INDEX_HEADER_SIZE)
    index_header = parse_index_header(index_header_data)
    LOGGER.debug("Parsed index header: %s", index_header)
    number_of_chunks = index_header["number_of_entries"]

    # Read each index entry to get the address of the chunk attributes
    file.seek(index_offset + INDEX_HEADER_SIZE + index_header["entries_size"])

    index_entries = []
    for i in range(0, number_of_chunks):
        offset, size = struct.unpack("<2I", file.read(8))
        index_entries.append((offset, size))

    # Read attributes for each chunk
    chunks = []
    has_compressed_chunks = False
    for (chunk_attributes_offset, chunk_attributes_size) in index_entries:
        file.seek(index_offset + INDEX_HEADER_SIZE + chunk_attributes_offset)
        chunk_attributes_data = file.read(chunk_attributes_size)

        attrs = parse_chunk_attributes(chunk_attributes_data)
        LOGGER.debug(attrs)

        # Read chunk data
        file.seek(attrs["offset"])
        chunk_data = file.read(attrs["size"])

        if not has_compressed_chunks and attrs["flags"] & model.ChunkFlags.Compressed:
            has_compressed_chunks = True

        # Create a new chunk
        children = [model.ChunkChild(t["chid"], model.ChunkId(t["tag"], t["number"])) for t in attrs["children"]]
        name = attrs.get("name")
        this_chunk = model.Chunk(attrs["tag"], attrs["number"], flags=attrs["flags"],
                                 data=chunk_data, children=children, name=name)
        chunks.append(this_chunk)

    return chunks
Пример #2
0
def parse_and_assemble(input_file, opcode_list, verbose=False):
    """ Parse a script """

    # TODO: rewrite this using a real parser!

    # Generate reverse lookup for opcodes
    string_to_opcode = {opcode.mnemonic.lower(): opcode_id for opcode_id, opcode in opcode_list.items()}

    # Load known constants
    defs = knownconstants.load_constants()
    defs = {v: k for k, v in defs.items()}

    string_table = None
    script = None
    script_chunk_tag = None
    script_chunk_number = None
    string_table_chunk_tag = "GSTX"
    string_table_chunk_number = None

    # instruction pointer starts at 2
    instruction_pointer = 2
    labels = {"start": instruction_pointer}
    stack = list()

    for line_number, line in enumerate(input_file):
        # Remove whitespace and comments. This is pretty rough.
        line = line.strip()
        while "#" in line:
            line = line[:line.find("#")]
        line = line.strip()

        if len(line) == 0:
            continue

        # Check if this is a define
        define_match = DEFINE_FORMAT.match(line)
        if define_match:
            define_name, define_value_str, _ = define_match.groups()
            define_value = parse_number(define_value_str)
            if define_name in defs:
                logger.warning("replacing const value %s with 0x%x (was 0x%x)", define_name, define_value,
                               defs[define_name])
            defs[define_name] = define_value
            continue

        # Split line into components
        split_line = re.match("^((?P<label>[A-Za-z0-9_@]+)\:\s*)?(?P<cmd>[A-Za-z]+)?\s*(?P<args>.*)$", line)
        if not split_line:
            continue

        label_name = split_line.group("label")
        command = split_line.group("cmd").lower()
        args = split_line.group("args")

        # Handle labels first
        if label_name:
            if label_name in labels:
                raise AssembleScriptException("Label %s already defined" % label_name)
            labels[label_name] = instruction_pointer

            if not command:
                continue

        if command == "stringtable":
            # Create a new stringtable

            if not args:
                raise AssembleScriptException("syntax: stringtable <string-table-chunk-no>")
            string_table_chunk_number = parse_number(args)

            if string_table is not None:
                raise AssembleScriptException("String table already defined")

            logger.debug("Creating string table: GSTX 0x%x", string_table_chunk_number)
            string_table = stringtable.StringTable()

        elif command == "string":
            string_id = None
            string_value = None

            if args:
                string_args = args.split()
                if len(string_args) >= 2:
                    string_id = parse_number(string_args[0])
                    string_value = " ".join(string_args[1:]).strip("\"")

            if not string_id or not string_value:
                raise AssembleScriptException("syntax: string <id> <quoted-string>")

            logger.debug("Adding to string table: 0x%x -> %s", string_id, string_value)
            string_table[string_id] = string_value
        elif command == "script":
            # New script directive

            script_chunk_tag = None
            script_chunk_number = None

            if args:
                script_args = args.split()
                if len(script_args) == 2:
                    script_chunk_tag = script_args[0].upper()
                    script_chunk_number = parse_number(script_args[1])

            if script_chunk_tag is None or script_chunk_number is None:
                raise AssembleScriptException("syntax: script chunk-tag chunk-number")

            if script_chunk_tag != "GLSC" and script_chunk_tag != "GLOP":
                raise AssembleScriptException("invalid script chunk tag type")

            if script is not None:
                raise AssembleScriptException("Cannot have more than one script in a source file yet")

            # Use same version as in 3DMM
            compiler_version = chunkyfilemodel.Version(29, 16)
            script = scriptmodel.Script(endianness=chunkyfilemodel.Endianness.LittleEndian,
                                        characterset=chunkyfilemodel.CharacterSet.ANSI,
                                        compilerversion=compiler_version)

        elif command == "push":
            # Special handling for PUSH instructions
            if not args:
                raise AssembleScriptException("syntax: push <value>")

            stack.append(parse_value(args, defs))

        elif command in string_to_opcode:
            # It's an opcode
            opcode = opcode_list[string_to_opcode[command]]

            # Check if we have a variable name
            variable_name = None
            if args:
                variable_name = args

            # If there's existing stuff on the stack, generate a PUSH instruction
            if len(stack) > 0:
                push_instruction = scriptmodel.Instruction(opcode=0, params=list(stack), address=instruction_pointer)
                stack.clear()
                script.instructions.append(push_instruction)
                instruction_pointer += push_instruction.number_of_dwords

            # Generate this instruction
            this_instruction = scriptmodel.Instruction(opcode=opcode.opcode, address=instruction_pointer,
                                                       variable=variable_name)
            script.instructions.append(this_instruction)
            instruction_pointer += this_instruction.number_of_dwords
        else:
            raise AssembleScriptException("invalid command: %s", command)

    # Add trailing PUSH instruction if required
    if len(stack) > 0:
        push_instruction = scriptmodel.Instruction(opcode=0, params=list(stack))
        stack.clear()
        script.instructions.append(push_instruction)
        instruction_pointer += push_instruction.number_of_dwords()

    # Log defined labels
    if len(labels.items()) > 0 and verbose:
        logger.debug("Labels:")
        for label_name, label_instruction_pointer in labels.items():
            logger.debug("%s: %d", label_name, label_instruction_pointer)

    # Resolve defined labels:
    for ins_pos in range(0, len(script.instructions)):
        if len(script.instructions[ins_pos].params) > 0:
            for param_pos, param in enumerate(script.instructions[ins_pos].params):
                if isinstance(param, LabelReference):

                    # hack to handle output from disassembler
                    if param.label_name[0] == "$":
                        param.label_name = "@" + param.label_name[1:]

                    label_ip = labels.get(param.label_name)
                    if label_ip is None:
                        raise AssembleScriptException("Label %s not found" % param.label_name)

                    # Replace parameter with resolved value
                    script.instructions[ins_pos].params[param_pos] = 0xCC000000 + label_ip

    # Create a chunky file containing the script and string table
    chunky_file = chunkyfilemodel.ChunkyFile(endianness=chunkyfilemodel.Endianness.LittleEndian,
                                             characterset=chunkyfilemodel.CharacterSet.ANSI,
                                             file_type="ASMX")

    if string_table:
        if verbose:
            logger.debug("String table:")
            for string_id, string_value in string_table.items():
                logger.debug("0x%x: %s", string_id, string_value)

        string_table_data = string_table.to_buffer()
        string_table_chunk = chunkyfilemodel.Chunk(tag=string_table_chunk_tag,
                                                   number=string_table_chunk_number,
                                                   data=string_table_data)

        chunky_file.chunks.append(string_table_chunk)

    if script:
        if verbose:
            logger.debug("Script:")
            script_formatter = scriptformatter.TextScriptFormatter()

            logger.debug(
                script_formatter.format_script(script, chunkyfilemodel.ChunkId(script_chunk_tag, script_chunk_number)))

        script_data = assemble_script(script)
        script_chunk = chunkyfilemodel.Chunk(tag=script_chunk_tag,
                                             number=script_chunk_number,
                                             data=script_data)
        if string_table:
            string_table_id = chunkyfilemodel.ChunkId(string_table_chunk_tag, string_table_chunk_number)
            string_table_child = chunkyfilemodel.ChunkChild(chid=0, ref=string_table_id)
            script_chunk.children.append(string_table_child)

        chunky_file.chunks.append(script_chunk)

    return chunky_file
Пример #3
0
def xml_to_chunky_file(chunky_file, xml_path, change_file_type=False):
    """
    Load chunks from an XML file and add them to a chunky file
    :param chunky_file: Existing chunky file instance that new chunks will be added to
    :param xml_path: XML filename
    :param change_file_type: change the file type tag in the header
    """
    logger = logging.getLogger(__name__)

    # Load XML file
    tree = ElementTree.parse(xml_path)
    chunky_file_xml = tree.getroot()

    # TODO: validate with an XSD?
    if chunky_file_xml.tag != "ChunkyFile":
        raise Exception("Not the right kind of XML file")

    # Set chunky file options if not already set
    file_type = chunky_file_xml.attrib.get("type")
    endianness = model.Endianness[chunky_file_xml.attrib.get(
        "endianness", "LittleEndian")]
    charset = model.CharacterSet[chunky_file_xml.attrib.get("charset", "ANSI")]
    if chunky_file.file_type == EMPTY_FILE:
        chunky_file.file_type = file_type
        chunky_file.endianness = endianness
        chunky_file.characterset = charset
    else:
        if file_type is not None and chunky_file.file_type != file_type and change_file_type:
            logger.warning("Changing file type from %s to %s",
                           chunky_file.file_type, file_type)
            chunky_file.file_type = file_type
        if chunky_file.endianness != endianness:
            logger.warning("Changing file endianness from %s to %s",
                           chunky_file.endianness, endianness)
            chunky_file.endianness = endianness
        if chunky_file.characterset != charset:
            logger.warning("Changing file character set from %s to %s",
                           chunky_file.characterset, charset)
            chunky_file.characterset = charset

    for chunk_xml in chunky_file_xml.findall("Chunk"):
        # Get chunk metadata
        chunk_tag = chunk_xml.attrib["tag"]
        chunk_number = int(chunk_xml.attrib["number"])
        chunk_id = model.ChunkId(chunk_tag, chunk_number)
        chunk_name = chunk_xml.attrib.get("name", None)
        logger.debug("Processing chunk: %s - %s", chunk_id,
                     chunk_name if chunk_name else "n/a")
        chunk_flags = model.ChunkFlags.Default
        if chunk_xml.attrib.get("loner", "false").lower() == "true":
            chunk_flags |= model.ChunkFlags.Loner
        if chunk_xml.attrib.get("compressed", "false").lower() == "true":
            chunk_flags |= model.ChunkFlags.Compressed

        # Get chunk children and data
        chunk_data = None
        chunk_children = list()
        for child_xml in chunk_xml:
            if child_xml.tag == "Child":
                chid = int(child_xml.attrib["chid"])
                tag = child_xml.attrib["tag"]
                number = int(child_xml.attrib["number"])

                chunk_child = model.ChunkChild(chid=chid,
                                               ref=model.ChunkId(tag, number))
                chunk_children.append(chunk_child)

            elif child_xml.tag == "Data":
                chunk_data = base64.b64decode(child_xml.text)
            elif child_xml.tag == "File":
                with open(child_xml.text, "rb") as data_file:
                    chunk_data = data_file.read()
            else:
                raise Exception("unhandled child tag type: %s" % child_xml.tag)

        # Check if there is an existing chunk
        if chunk_id in chunky_file:
            existing_chunk = chunky_file[chunk_id]
            logger.info("%s: Modifying existing chunk", chunk_id)

            # Update chunk metadata
            if chunk_name:
                existing_chunk.name = chunk_name
            if chunk_flags != existing_chunk.flags:
                # TODO: set flags correctly
                # if the loner flag is not set correctly the file won't load
                logger.warning("Chunk flags are different: %s vs %s",
                               existing_chunk.flags, chunk_flags)

            # TODO: update existing children instead of just adding
            for new_child in chunk_children:
                existing_child = [
                    c for c in existing_chunk.children
                    if c.chid == new_child.chid
                ]
                if len(existing_child) > 0:
                    logger.warning("child %s: %s already exists" %
                                   (existing_chunk, existing_child))
                else:
                    existing_chunk.children.append(new_child)

            # Set chunk data
            # TODO: handle compression
            if chunk_data:
                existing_chunk.raw_data = chunk_data
        else:
            logger.info("%s: Creating new chunk", chunk_id)
            # Create a new chunk
            this_chunk = model.Chunk(chunk_tag,
                                     chunk_number,
                                     chunk_name,
                                     chunk_flags,
                                     data=chunk_data)
            this_chunk.children = chunk_children
            chunky_file.chunks.append(this_chunk)
Пример #4
0
def parse_and_assemble(input_file, opcode_list, verbose=False):
    """ Parse a script """

    # TODO: rewrite this using a real parser!

    # Generate reverse lookup for opcodes
    string_to_opcode = {
        opcode.mnemonic.lower(): opcode_id
        for opcode_id, opcode in opcode_list.items()
    }

    string_table = None
    script = None
    script_chunk_tag = None
    script_chunk_number = None
    string_table_chunk_tag = "GSTX"
    string_table_chunk_number = None

    # instruction pointer starts at 2
    instruction_pointer = 2
    labels = {"start": instruction_pointer}
    stack = list()

    for line_number, line in enumerate(input_file):
        # Remove whitespace and comments. This is pretty rough.
        line = line.strip()
        while "#" in line:
            line = line[:line.find("#")]
        line = line.strip()

        split_line = line.split(" ")
        command = split_line[0].lower()
        if command == "":
            continue
        elif command == "stringtable":
            # Create a new stringtable

            if len(split_line) < 2:
                raise AssembleScriptException(
                    "syntax: stringtable <string-table-chunk-no>")
            string_table_chunk_number = parse_number(split_line[1])

            if string_table is not None:
                raise AssembleScriptException("String table already defined")

            logger.debug("Creating string table: GSTX 0x%x",
                         string_table_chunk_number)
            string_table = stringtable.StringTable()

        elif command == "string":
            if len(split_line) < 3:
                raise AssembleScriptException(
                    "syntax: string <id> <quoted-string>")

            string_id = parse_number(split_line[1])
            string_value = " ".join(split_line[2:]).strip("\"")

            logger.debug("Adding to string table: 0x%x -> %s", string_id,
                         string_value)
            string_table[string_id] = string_value
        elif command == "script":
            # New script directive
            if len(split_line) < 3:
                raise AssembleScriptException(
                    "syntax: script chunk-tag chunk-number")

            script_chunk_tag = split_line[1].upper()
            if script_chunk_tag != "GLSC" and script_chunk_tag != "GLOP":
                raise AssembleScriptException("invalid script chunk tag type")

            script_chunk_number = parse_number(split_line[2])

            if script is not None:
                raise AssembleScriptException("Script already defined")

            # Use same version as in 3DMM
            compiler_version = chunkyfilemodel.Version(29, 16)
            script = scriptmodel.Script(
                endianness=chunkyfilemodel.Endianness.LittleEndian,
                characterset=chunkyfilemodel.CharacterSet.ANSI,
                compilerversion=compiler_version)

        elif command.endswith(":"):
            # Labels
            label_name = command[:-1]
            if label_name in labels:
                raise AssembleScriptException("Label %s already defined" %
                                              label_name)
            labels[label_name] = instruction_pointer
        elif command == "push":
            # Special handling for PUSH instructions
            if len(split_line) != 2:
                raise AssembleScriptException("syntax: push <value>")

            value = parse_value(split_line[1], labels)
            stack.append(value)
        elif command in string_to_opcode:
            # It's an opcode
            opcode = opcode_list[string_to_opcode[command]]

            # Check if we have a variable name
            variable_name = None
            if len(split_line) > 1:
                variable_name = split_line[1]

            # If there's existing stuff on the stack, generate a PUSH instruction
            if len(stack) > 0:
                push_instruction = scriptmodel.Instruction(
                    opcode=0, params=list(stack), address=instruction_pointer)
                stack.clear()
                script.instructions.append(push_instruction)
                instruction_pointer += push_instruction.number_of_dwords

            # Generate this instruction
            this_instruction = scriptmodel.Instruction(
                opcode=opcode.opcode,
                address=instruction_pointer,
                variable=variable_name)
            script.instructions.append(this_instruction)
            instruction_pointer += this_instruction.number_of_dwords
        else:
            raise AssembleScriptException("invalid command: %s", command)

    # Add trailing PUSH instruction if required
    if len(stack) > 0:
        push_instruction = scriptmodel.Instruction(opcode=0,
                                                   params=list(stack))
        stack.clear()
        script.instructions.append(push_instruction)
        instruction_pointer += push_instruction.number_of_dwords()

    # Log labels
    if len(labels.items()) > 0 and verbose:
        logger.debug("Labels:")
        for label_name, label_instruction_pointer in labels.items():
            logger.debug("%s: %d", label_name, label_instruction_pointer)

    # Create a chunky file containing the script and string table
    chunky_file = chunkyfilemodel.ChunkyFile(
        endianness=chunkyfilemodel.Endianness.LittleEndian,
        characterset=chunkyfilemodel.CharacterSet.ANSI,
        file_type="ASMX")

    if string_table:
        if verbose:
            logger.debug("String table:")
            for string_id, string_value in string_table.items():
                logger.debug("0x%x: %s", string_id, string_value)

        string_table_data = string_table.to_buffer()
        string_table_chunk = chunkyfilemodel.Chunk(
            tag=string_table_chunk_tag,
            number=string_table_chunk_number,
            data=string_table_data,
            flags=chunkyfilemodel.ChunkFlags.Loner)

        chunky_file.chunks.append(string_table_chunk)

    if script:
        if verbose:
            logger.debug("Script:")
            script_formatter = scriptformatter.TextScriptFormatter()

            logger.debug(
                script_formatter.format_script(
                    script,
                    chunkyfilemodel.ChunkId(script_chunk_tag,
                                            script_chunk_number)))

        script_data = assemble_script(script)
        script_chunk = chunkyfilemodel.Chunk(tag=script_chunk_tag,
                                             number=script_chunk_number,
                                             data=script_data)
        if string_table:
            string_table_id = chunkyfilemodel.ChunkId(
                string_table_chunk_tag, string_table_chunk_number)
            string_table_child = chunkyfilemodel.ChunkChild(
                chid=0, ref=string_table_id)
            script_chunk.children.append(string_table_child)

        chunky_file.chunks.append(script_chunk)

    return chunky_file