Exemplo n.º 1
0
 def print_status(self, verbose):
     """Print file reader status text."""
     if verbose >= 1:
         # print buffer offset
         print("# File: '%s', stream_offset: %s, count: %s, more: %r" %
                        (self.filename, self.stream_offset,
                         len(self.current), self.more()))
     if verbose >= 2:
         # print buffer contents
         print("# previous: '%s'\nCurrent: '%s'" %
                        (be_scan.escape(self.previous),
                         be_scan.escape(self.current)))
Exemplo n.º 2
0
def recurse(uncompressed_buffer, recursion_prefix, depth):
    """Recursively scan and uncompress until max depth."""

    # runtime status
    if args.verbose >= 1:
        print("# Processing %s count %d depth %d..." %
              (recursion_prefix, len(uncompressed_buffer), depth))
    if args.verbose >= 2:
        print("# %s\n" % be_scan.escape(uncompressed_buffer))

    # open a scanner
    artifacts = be_scan.artifacts_t()
    scanner = be_scan.scanner_t(scan_engine, artifacts)
    scanner.scan_setup(args.filename, recursion_prefix)

    # scan
    status = scanner.scan_final(0, "", uncompressed_buffer)
    if status:
        raise ValueError(status)

    # consume recursed artifacts
    while True:
        artifact = artifacts.get()
        if artifact.blank():
            break

        # prepare for zip or gzip
        if artifact.artifact_class == "zip" or \
                        artifact.artifact_class == "gzip":

            # uncompress
            uncompressed = uncompressor.uncompress(uncompressed_buffer,
                                                   artifact.offset)
            if not uncompressed.status and not uncompressed.buffer:
                # no error and nothing uncompressed so disregard artifact
                continue

            set_compression_text(artifact, uncompressed)

            # skip popular useless uncompressed data
            if artifact.artifact == "8da7a0b0144fc58332b03e90aaf7ba25":
                continue

        # prepare for other artifact types as needed
        # none.

        # print the artifact
        print(artifact.to_string())

        # manage recursion
        if (artifact.artifact_class == "zip" or \
                        artifact.artifact_class == "gzip") and \
                        depth <= MAX_RECURSION_DEPTH:

            # calculate next recursion prefix
            next_recursion_prefix = "%s-%d-%s" % (
                artifact.recursion_prefix, artifact.offset,
                artifact.artifact_class.upper())

            # recurse
            recurse(uncompressed.buffer, next_recursion_prefix, depth + 1)
Exemplo n.º 3
0
def test_escape():
    str_equals(be_scan.escape("a\0b"), "a\\x00b")
Exemplo n.º 4
0
def scan_range(filename, offset, page_size, margin_size, max_depth, verbose):
    with open(filename, 'rb') as f:
        f.seek(offset)
        buffer1 = f.read(page_size)
        buffer2 = f.read(margin_size)

        # print runtime status
        if verbose >= 1:
            # print buffer offset
            print("# File: '%s', offset: %d, size: %d, overflow size: %d" %
                  (filename, offset, len(buffer1), len(buffer2)))
        if verbose >= 2:
            # print buffer contents
            print("# previous: '%s'\nCurrent: '%s'" %
                  (be_scan.escape(buffer1), be_scan.escape(buffer2)))

        # the uncompressor this process will use, if needed
        uncompressor = None

        # the formatted artifacts
        formatted_artifacts = list()

        # create scanner
        artifacts = be_scan.artifacts_t()
        scanner = be_scan.scanner_t(scan_engine, artifacts)
        scanner.scan_setup(args.filename, "")

        # scan buffer
        status = scanner.scan_stream(offset, "", buffer1)
        if status:
            raise ValueError(status)

        # scan fence
        status = scanner.scan_fence_final(offset, buffer1, buffer2)
        if status:
            raise ValueError(status)

        # consume
        while not artifacts.empty():
            artifact = artifacts.get()

            # check for compression
            if artifact.artifact_class == "zip" or \
                            artifact.artifact_class == "gzip":

                # get uncompressor ready
                if uncompressor == None:
                    uncompressor = be_scan.uncompressor_t()

                # get buffer to uncompress
                buffer1_offset = artifact.offset - offset
                if buffer1_offset + margin_size > len(buffer1):
                    print("scan_range.a buffer1 offset: %d" % buffer1_offset)
                    # use margin size
                    buffer3 = buffer1[buffer1_offset:] + buffer2

                    # uncompress
                    uncompressed = uncompressor.uncompress(buffer3, 0)

                else:
                    # use buffer1
                    print("scan_range.b buffer1 offset: %d" % buffer1_offset)
                    uncompressed = uncompressor.uncompress(
                        buffer1, buffer1_offset)

                # no error and nothing uncompressed so disregard this artifact
                if not uncompressed.status and not uncompressed.buffer:
                    continue

                # set artifact text for this uncompression
                set_compression_text(artifact, uncompressed)

                # skip popular useless uncompressed data
                if artifact.artifact == "8da7a0b0144fc58332b03e90aaf7ba25":
                    continue

            # prepare for other artifact types as needed
            # none.

            # consume this artifact
            formatted_artifacts.append(artifact.to_string())

            # manage recursion
            if (artifact.artifact_class == "zip" or \
                        artifact.artifact_class == "gzip") and \
                        max_depth >= 1:

                # calculate recursion prefix for first recursion depth
                next_recursion_prefix = "%d-%s" % (
                    artifact.offset, artifact.artifact_class.upper())

                # recurse
                recurse(uncompressed.buffer, next_recursion_prefix, 1,
                        uncompressor, formatted_artifacts, max_depth, verbose)

        # return the accumulated list of formatted artifacts
        return formatted_artifacts