Exemplo n.º 1
0
    def parse(self):
        try:
            self.data = java_class.JavaClass.from_io(self.infile)
        except (Exception, ValidationFailedError) as e:
            raise UnpackParserException(e.args)

        # make sure that all the pointers
        # into the constant pool are actually valid
        constant_pool_index = 1
        for i in self.data.constant_pool:
            if i.is_prev_two_entries:
                constant_pool_index += 1
                continue
            if self.data.this_class == constant_pool_index:
                try:
                    decoded_string = mutf8.decode_modified_utf8(
                        i.cp_info.name_as_str)
                except UnicodeDecodeError:
                    # This shouldn't happen and means there
                    # is an error in the mutf8 package
                    pass
            if type(i.cp_info) == java_class.JavaClass.StringCpInfo:
                try:
                    decoded_string = mutf8.decode_modified_utf8(
                        i.cp_info.name_as_str)
                except UnicodeDecodeError:
                    # This shouldn't happen and means there
                    # is an error in the mutf8 package
                    pass
                except AttributeError as e:
                    raise UnpackParserException(e.args)
            constant_pool_index += 1
Exemplo n.º 2
0
 def _read_utf8(read):
     """Reads a length-prefixed MUTF-8 string."""
     name_length = read('h', 2)[0]
     return mutf8.decode_modified_utf8(read.src.read(name_length))
Exemplo n.º 3
0
    def set_metadata_and_labels(self):
        """sets metadata and labels for the unpackresults"""
        labels = ['java class']

        # store the results for Java:
        # * methods
        # * interfaces (TODO)
        # * fields
        # * source file name
        # * class name
        # * strings
        metadata = {}

        # walk the constant pool for information that isn't
        # available some other way.
        metadata['strings'] = []
        constant_pool_index = 1
        for i in self.data.constant_pool:
            if i.is_prev_two_entries:
                constant_pool_index += 1
                continue
            if self.data.this_class == constant_pool_index:
                try:
                    decoded_string = mutf8.decode_modified_utf8(
                        i.cp_info.name_as_str)
                    metadata['classname'] = decoded_string
                except UnicodeDecodeError:
                    # This shouldn't happen and means there
                    # is an error in the mutf8 package
                    pass
            if type(i.cp_info) == java_class.JavaClass.StringCpInfo:
                try:
                    decoded_string = mutf8.decode_modified_utf8(
                        i.cp_info.name_as_str)
                    metadata['strings'].append(decoded_string)
                except UnicodeDecodeError:
                    # This shouldn't happen and means there
                    # is an error in the mutf8 package
                    pass
            constant_pool_index += 1

        #metadata['interfaces'] = []
        #for i in self.data.interfaces:
        #    try:
        #        decoded_string = mutf8.decode_modified_utf8(i.name_as_str)
        #        metadata['interfaces'].append(decoded_string)
        #    except (UnicodeDecodeError, AttributeError):
        #        pass

        metadata['fields'] = []
        for i in self.data.fields:
            try:
                decoded_string = mutf8.decode_modified_utf8(i.name_as_str)
                metadata['fields'].append(decoded_string)
            except UnicodeDecodeError:
                # This shouldn't happen and means there
                # is an error in the mutf8 package
                pass

        metadata['methods'] = []
        for i in self.data.methods:
            try:
                decoded_string = mutf8.decode_modified_utf8(i.name_as_str)
                metadata['methods'].append(decoded_string)
            except UnicodeDecodeError:
                # This shouldn't happen and means there
                # is an error in the mutf8 package
                pass

        for i in self.data.attributes:
            try:
                name = mutf8.decode_modified_utf8(i.name_as_str)
            except UnicodeDecodeError:
                # This shouldn't happen and means there
                # is an error in the mutf8 package
                continue

            if name == 'SourceFile':
                try:
                    decoded_string = mutf8.decode_modified_utf8(
                        i.info.sourcefile_as_str)
                    metadata['sourcefile'] = decoded_string
                except UnicodeDecodeError:
                    # This shouldn't happen and means there
                    # is an error in the mutf8 package
                    continue
        self.unpack_results.set_metadata(metadata)
        self.unpack_results.set_labels(labels)
Exemplo n.º 4
0
 def unpack_name(buf) -> str:
     return decode_modified_utf8(buf.read(buf.unpack("H")))
Exemplo n.º 5
0
    def set_metadata_and_labels(self):
        """sets metadata and labels for the unpackresults"""
        labels = ['dex', 'android']
        metadata = {}
        metadata['version'] = self.data.header.version_str
        metadata['classes'] = []

        for class_definition in self.data.class_defs:
            if class_definition.class_data is None:
                continue
            class_obj = {}
            try:
                class_obj['classname'] = mutf8.decode_modified_utf8(class_definition.type_name[1:-1])
            except UnicodeDecodeError:
                pass
            if class_definition.sourcefile_name is not None:
                class_obj['source'] = mutf8.decode_modified_utf8(class_definition.sourcefile_name)
            class_obj['methods'] = []

            # process direct methods
            method_id = 0
            for method in class_definition.class_data.direct_methods:
                if method.code is None:
                    continue

                # compute various hashes for the bytecode and store them
                hashes = {}
                sha256 = hashlib.sha256(method.code.insns).hexdigest()
                hashes['sha256'] = sha256
                tlsh_hash = tlsh.hash(method.code.insns)
                if tlsh_hash != 'TNULL':
                    hashes['tlsh'] = tlsh_hash
                else:
                    hashes['tlsh'] = None

                # extract the relevant strings from the bytecode and store them
                strings = []
                res = self.parse_bytecode(method.code.insns)
                for r in res:
                    try:
                        # this shouldn't happen, but there is was bug
                        # in mutf8: https://github.com/TkTech/mutf8/issues/1
                        bytecode_string = mutf8.decode_modified_utf8(self.data.string_ids[r].value.data)
                        try:
                            # this shouldn't happen, but there is likely a bug in mutf8
                            bytecode_string.encode()
                            strings.append(bytecode_string)
                        except:
                            pass
                    except UnicodeDecodeError:
                        pass

                method_id += method.method_idx_diff.value
                method_name = mutf8.decode_modified_utf8(self.data.method_ids[method_id].method_name)
                class_obj['methods'].append({'name': method_name,
                                            'method_type': 'direct', 'bytecode_hashes': hashes,
                                            'strings': strings})
            # process virtual methods
            method_id = 0
            for method in class_definition.class_data.virtual_methods:
                if method.code is None:
                    continue

                # compute various hashes for the bytecode and store them
                hashes = {}
                sha256 = hashlib.sha256(method.code.insns).hexdigest()
                hashes['sha256'] = sha256
                tlsh_hash = tlsh.hash(method.code.insns)
                if tlsh_hash != 'TNULL':
                    hashes['tlsh'] = tlsh_hash
                else:
                    hashes['tlsh'] = None

                # extract the relevant strings from the bytecode and store them
                strings = []
                res = self.parse_bytecode(method.code.insns)
                for r in res:
                    try:
                        # this shouldn't happen, but there is was bug
                        # in mutf8: https://github.com/TkTech/mutf8/issues/1
                        bytecode_string = mutf8.decode_modified_utf8(self.data.string_ids[r].value.data)
                        try:
                            # this shouldn't happen, but there is likely a bug in mutf8
                            bytecode_string.encode()
                            strings.append(bytecode_string)
                        except:
                            pass
                    except UnicodeDecodeError:
                        pass

                method_id += method.method_idx_diff.value
                method_name = mutf8.decode_modified_utf8(self.data.method_ids[method_id].method_name)
                class_obj['methods'].append({'name': method_name,
                                            'method_type': 'virtual', 'bytecode_hashes': hashes,
                                            'strings': strings})

            # process fields
            class_obj['fields'] = []
            field_id = 0
            for field in class_definition.class_data.static_fields:
                field_id += field.field_idx_diff.value
                field_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].type_name)
                if field_type.endswith(';'):
                    field_type = field_type[1:-1]
                class_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].class_name)
                if class_type.endswith(';'):
                    class_type = class_type[1:-1]
                field_name = mutf8.decode_modified_utf8(self.data.field_ids[field_id].field_name)
                class_obj['fields'].append({'name': field_name,
                                            'type': field_type, 'class': class_type,
                                            'field_type': 'static'})
            field_id = 0
            for field in class_definition.class_data.instance_fields:
                field_id += field.field_idx_diff.value
                field_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].type_name)
                if field_type.endswith(';'):
                    field_type = field_type[1:-1]
                try:
                    class_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].class_name)
                    if class_type.endswith(';'):
                        class_type = class_type[1:-1]
                except UnicodeError:
                    pass
                field_name = mutf8.decode_modified_utf8(self.data.field_ids[field_id].field_name)
                class_obj['fields'].append({'name': field_name,
                                            'type': field_type, 'class': class_type,
                                            'field_type': 'instance'})
            metadata['classes'].append(class_obj)

        self.unpack_results.set_metadata(metadata)
        self.unpack_results.set_labels(labels)