Пример #1
0
def check_with_matches(filename, matches):
    if 'IsWEBM' not in matches:
        return None
    level = PolyglotLevel()
    reader = WebmReader(filename)
    reader.read_ebml_header()
    reader.read_segment_id()
    segment_size = reader.read_size()
    webm_size = segment_size + reader.file.tell()
    reader.close()
    file_size = os.stat(filename).st_size
    if file_size > webm_size:
        level.add_chunk(webm_size, file_size - webm_size)
    return level
Пример #2
0
def check_with_matches(filename, matches):
    if 'IsPDF' not in matches:
        return None

    level = PolyglotLevel()

    if 'HasTruncatedMagic' in matches:
        truncated_magic_offset = matches['HasTruncatedMagic'].strings[0][0]

        magic_offset = matches['HasMagic'].strings[0][0] if 'HasMagic' in matches else None

        # If the offset of the full magic is the first magic found in the file
        if not (magic_offset == truncated_magic_offset <= 1024):
            level.invalid()

        if truncated_magic_offset > 0:
            level.add_chunk(0, truncated_magic_offset)

    eof_match = matches['HasEOF'].strings[-1] if 'HasEOF' in matches else None
    file_size = os.stat(filename).st_size

    if eof_match is not None and eof_match[0] + len(eof_match[2]) < file_size:
        pdf_end = eof_match[0] + len(eof_match[2])
        level.add_chunk(pdf_end, file_size - pdf_end)

    return level
Пример #3
0
def check_with_matches(filename, matches):
    if 'IsBMP' not in matches:
        return None

    try:
        with BmpImageFile(filename) as image:
            level = PolyglotLevel()
            image.fp.seek(2)
            image_size = unpack('<I', image.fp.read(4))[0]
            image.fp.seek(0, io.SEEK_END)
            file_size = image.fp.tell()
            if file_size != image_size:
                level.add_chunk(image_size, file_size - image_size)
            return level
    except SyntaxError:
        return None
Пример #4
0
def check_with_matches(filename, matches):
    if 'IsGIF' not in matches:
        return None

    try:
        with GifImageFile(filename) as image:
            image.seek(image.n_frames - 1)
            while image.data():  # Pass the last frame
                pass
            level = PolyglotLevel()
            image_end = image.fp.tell()
            if image.fp.read(1) == b';':
                image_end += 1
            image.fp.seek(0, io.SEEK_END)
            image_size = image.fp.tell()
            if image_end != image_size:
                level.add_chunk(image_end, image_size - image_end)
            return level
    except SyntaxError:
        return None
Пример #5
0
def check_with_matches(filename, matches):
    if 'IsPNG' not in matches:
        return None

    with open(filename, 'rb') as file:
        if file.read(len(_MAGIC)) != _MAGIC:
            return None
        try:
            name = ''
            while name != _PNG_END_SECTION:
                name, length = read_section(file)
                file.seek(length + _CRC_SIZE, io.SEEK_CUR)
            png_end = file.tell()

            file.seek(0, io.SEEK_END)
            file_size = file.tell()

            level = PolyglotLevel()
            if png_end != file_size:
                level.add_chunk(png_end, file_size - png_end)
            return level
        except SyntaxError:
            return None
Пример #6
0
def check_with_matches(filename, matches):
    if 'IsJPG' not in matches:
        return None

    with open(filename, 'rb') as file:
        try:
            with mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as buf:
                level = PolyglotLevel()

                buf.seek(len(__JPG_MAGIC))
                try:
                    section = b''
                    while section != __JPG_START_OF_SCAN:
                        section, length = read_section(buf)
                        buf.seek(length - 2, io.SEEK_CUR)
                except (ValueError, SyntaxError):
                    return level.invalid()

                scan_offset = buf.tell()

                # Read the image data until end marker
                end_marker_matches = matches[
                    'HasEndMarker'].strings if 'HasEndMarker' in matches else None
                end_marker_matches_after_start_of_scan = [
                    m for m in end_marker_matches if m[0] > scan_offset
                ]
                end_marker_offset = end_marker_matches_after_start_of_scan[0][
                    0] if end_marker_matches_after_start_of_scan else None
                if end_marker_offset is not None and end_marker_offset + len(
                        __JPG_END_MARKER) < buf.size():
                    end_offset = end_marker_offset + len(__JPG_END_MARKER)
                    level.add_chunk(end_offset, buf.size() - end_offset)
                return level

        except ValueError:
            return None
Пример #7
0
def check_with_matches(filename: str, matches):
    if 'AVIHeader' not in matches:
        return None
    level = PolyglotLevel()
    start = matches['AVIHeader'].strings[0][0]
    if start > 0:
        level.add_chunk(0, start)
    size = __get_size(matches['AVIHeader'].strings[0][2])
    end_offset = start + size
    file_size = os.stat(filename).st_size
    if end_offset < file_size:
        level.add_chunk(end_offset, file_size - end_offset)
    return level
Пример #8
0
def check_with_matches(filename: str, matches):
    if 'MP3Header' not in matches:
        return None
    strings = list(filter(__is_good, matches['MP3Header'].strings))
    if not strings:
        return None
    # Heuristic to reduce the number of false positives:
    # each frame is 28ms long, so we search for at least 50 frames
    if len(strings) < 50:
        return None
    begin = 0
    if 'HasID3' in matches:
        size = __synchsafe(bytes(matches['HasID3'].strings[0][2][6:]))
        begin = 10 + size
    level = PolyglotLevel()
    first_mp3_header_offset = matches['MP3Header'].strings[0][0]
    if first_mp3_header_offset > begin:
        level.add_chunk(0, first_mp3_header_offset)
    idx = 0
    while idx < len(strings):
        string = strings[idx]
        third_byte = string[2][2]
        bitrate = __bitrate_conversion[(int(third_byte) & 0xF0) >> 4] * 1000
        sampling_frequency = __sampling_conversion[(int(third_byte) & 0x0C)
                                                   >> 2]
        padding = (int(third_byte) & 0x02) >> 1
        unit_size = math.floor(
            144 * bitrate / sampling_frequency
        ) + padding  # Source for computation : https://www.researchgate.net/publication/225793510_A_study_on_multimedia_file_carving_method, page 8
        next_headers = [s for s in strings if s[0] >= string[0] + unit_size]
        if not next_headers:
            file_size = os.stat(filename).st_size
            if file_size != string[0] + unit_size:
                level.add_chunk(string[0] + unit_size,
                                file_size - (string[0] + unit_size))
            break
        if next_headers[0][0] != string[0] + unit_size:
            level.add_chunk(string[0] + unit_size,
                            next_headers[0][0] - (string[0] + unit_size))
        idx = strings.index(next_headers[0])
    return level
Пример #9
0
def check_with_matches(filename: str, matches):
    if 'OGGHeader' not in matches:
        return None
    level = PolyglotLevel()
    begin_offset = matches['OGGHeader'].strings[0][0]
    if begin_offset > 0:
        level.add_chunk(0, begin_offset)
    for string_idx, string in enumerate(matches['OGGHeader'].strings):
        page_size = __get_page_size(filename, string)
        if string_idx < len(matches['OGGHeader'].strings) - 1:
            next_header_offset = matches['OGGHeader'].strings[string_idx +
                                                              1][0]
            if next_header_offset > string[0] + page_size:
                level.add_chunk(string[0] + page_size,
                                next_header_offset - (string[0] + page_size))
        else:
            file_size = os.stat(filename).st_size
            end_offset = string[0] + page_size
            if file_size != end_offset:
                level.add_chunk(end_offset, file_size - end_offset)
    return level
Пример #10
0
def check_with_matches(filename, matches):
    if 'IsRAR' not in matches:
        return None
    try:
        with _RARFile(filename) as rar_file:
            level = PolyglotLevel(is_valid=rar_file.is_valid)
            if rar_file.magic_offset != 0:
                level.add_chunk(0, rar_file.magic_offset)
            if rar_file.buf.tell() != rar_file.buf.size():
                level.add_chunk(rar_file.buf.tell(),
                                rar_file.buf.size() - rar_file.buf.tell())
            return level
    except SyntaxError:
        return None
Пример #11
0
def check_with_matches(filename, matches):
    """
    Check if the file is a TIFF file, and if it is, if there is potentially other formats in the file
    WARNING: The method used to know if there is unusued garbage at the end of the file is not perfect !
    It only check if the last used zone is at the end of the file, but it would be very easy for an attacker to
    craft a TIFF with a tag which has an offset at the end of the file
    :param filename: Path to the file
    :return: A PolyglotLevel or None if the file is not a TIFF
    """

    if 'IsTIFF' not in matches:
        return None

    try:
        with _TIFFFile(filename) as image:
            level = PolyglotLevel()
            for chunk in image.buf.get_not_read_zones():
                # FIXME Add other unreaded zone when parser will read image data
                # For now we only add the last zone if it is at the end of the file
                if chunk[0] + chunk[1] == image.buf.size():
                    level.suspicious_chunks.append(chunk)
            return level
    except SyntaxError:
        return None
Пример #12
0
def check_with_matches(filename: str, matches):
    if 'IsHTML' not in matches:
        return None

    with open(filename, 'rb') as file, \
            mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as buf:
        doc_start = -1
        doc_end = -1
        doctype_pos = buf.find(__DOCTYPE)
        if doctype_pos != -1:
            doc_start = doctype_pos
            doc_end = doc_start + len(__DOCTYPE)
        tags = [b'html', b'body', b'script']
        for tag in tags:
            tag_pos = buf.find(b'<' + tag + b'>')
            if tag_pos != -1:
                if doc_start == -1:
                    doc_start = tag_pos
                end_tag_pos = buf.find(b'</' + tag + b'>')
                if end_tag_pos != -1:
                    doc_end = end_tag_pos + len(tag) + 3
                elif doc_end == -1:
                    doc_end = tag_pos + len(tag) + 2
                break
        if doc_start != -1:
            level = PolyglotLevel()

            buf.seek(0, io.SEEK_SET)
            begin_content = buf.read(doc_start)  # Read until doc start
            if not __is_whitespace(begin_content):
                level.add_chunk(0, doc_start)

            buf.seek(doc_end)
            contents = buf.read()
            if not __is_whitespace(contents):
                level.add_chunk(doc_end, len(contents))
            return level
        else:
            return None
Пример #13
0
def check_with_matches(filename, matches):
    zip_rule = matches.get('IsZIP', None)
    if zip_rule is None:
        return None

    flag = PolyglotLevel()

    file_size = os.stat(filename).st_size
    last_eocd_magic = [s for s in zip_rule.strings if s[1] == '$EOCD_magic'][0]
    eocd_offset = last_eocd_magic[0]

    if 'HasZIPMagic' in matches:
        rules = matches['HasZIPMagic']
        sorted_strings = sorted(rules.strings, key=lambda string: string[0])
        first_string = sorted_strings[0]
        if first_string[0] != 0:
            flag.add_chunk(0, first_string[0])

    # TODO Take comment in account ? Mark as less suspicious ?
    eocd_min_end = eocd_offset + __EOCD_MIN_SIZE
    if eocd_min_end < file_size:
        flag.add_chunk(eocd_min_end, file_size - eocd_min_end)

    if 'IsDOCX' in matches:
        flag.embed('docx')

    if 'IsJAR' in matches:
        flag.embed('jar')

    if 'IsAPK' in matches:
        flag.embed('apk')

    return flag
Пример #14
0
def check_with_matches(filename, matches):
    if 'HasPHPOpen' not in matches:
        return None
    return PolyglotLevel()
Пример #15
0
def check_with_matches(filename, matches):
    if 'IsELF' not in matches:
        return None
    return PolyglotLevel()