def test_all():
    logging.basicConfig(level=logging.DEBUG)
    _logger = logging.getLogger(os.path.basename(__file__))

    s0 = set()

    v0 = Objects.VolumeObject()
    v1 = Objects.VolumeObject()

    s0.add(v0)
    s0.add(v1)

    _logger.debug("len(s0) = %r" % len(s0))
    assert len(s0) == 2

    f0 = Objects.FileObject()
    f1 = Objects.FileObject()
    f0.volume_object = v0
    f1.volume_object = v0

    s1 = set()
    s1.add(f0.volume_object)
    s1.add(f1.volume_object)
    _logger.debug("len(s1) = %r" % len(s1))
    assert len(s1) == 1
Exemplo n.º 2
0
def test_disk_image_in_file_system():
    dobj = Objects.DFXMLObject(version="1.2.0")

    vobj = Objects.VolumeObject()
    vobj.ftype_str = "iso9660"
    dobj.append(vobj)

    fobj_vobj = Objects.FileObject()
    fobj_vobj.sha512 = TEST_HASH_1
    vobj.append(fobj_vobj)

    diobj = Objects.DiskImageObject()
    vobj.append(diobj)

    fobj_diobj = Objects.FileObject()
    fobj_diobj.alloc_inode = False
    fobj_diobj.alloc_name = False
    fobj_diobj.sha512 = TEST_HASH_2
    diobj.append(fobj_diobj)

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        vobj_reconst = dobj_reconst.volumes[0]
        diobj_reconst = vobj_reconst.disk_images[0]
        assert vobj_reconst.files[0].sha512 == TEST_HASH_1
        assert diobj_reconst.files[0].sha512 == TEST_HASH_2
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 3
0
def test_volume_error_roundtrip_with_file():
    dobj = Objects.DFXMLObject(version="1.2.0")
    vobj = Objects.VolumeObject()
    dobj.append(vobj)

    vobj.error = ERROR_STRING_V

    assert vobj.error == ERROR_STRING_V

    fobj = Objects.FileObject()
    vobj.append(fobj)

    fobj.error = ERROR_STRING_F

    assert fobj.error == ERROR_STRING_F
    assert vobj.error == ERROR_STRING_V

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        vobj_reconst = dobj_reconst.volumes[0]
        fobj_reconst = vobj_reconst.files[0]
        assert vobj_reconst.error == ERROR_STRING_V
        assert fobj_reconst.error == ERROR_STRING_F
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 4
0
def test_error_after_file():
    #TODO Bump version when feature branch merged into schema.
    dobj = Objects.DFXMLObject(version="1.2.0+")
    diobj = Objects.DiskImageObject()
    dobj.append(diobj)

    diobj.error = ERROR_1

    fobj = Objects.FileObject()
    fobj.alloc_inode = False
    fobj.alloc_name = False
    fobj.error = ERROR_2
    diobj.append(fobj)

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        diobj_reconst = dobj_reconst.disk_images[0]
        fobj_reconst = diobj_reconst.files[0]
        assert diobj_reconst.error == ERROR_1
        assert fobj_reconst.error == ERROR_2
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 5
0
def test_error_element_order():
    #TODO When schema 1.3.0 is released, update version.
    dobj = Objects.DFXMLObject(version="1.2.0+")
    psobj = Objects.PartitionSystemObject()
    fobj = Objects.FileObject()

    psobj.pstype_str = "gpt"

    # The error element should come after the fileobject stream.
    psobj.error = "foo"

    # Add a unallocated file object found floating in the partition system.
    fobj.alloc_inode = False
    fobj.alloc_name = False

    dobj.append(psobj)
    psobj.append(fobj)

    el = dobj.to_Element()

    # Confirm error comes after file stream.
    assert el[-1][0].tag.endswith("pstype_str")
    assert el[-1][-2].tag.endswith("fileobject")
    assert el[-1][-1].tag.endswith("error")

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    psobj_reconst = dobj_reconst.partition_systems[0]
    try:
        assert psobj_reconst.pstype_str == "gpt"
        assert psobj_reconst.error == "foo"
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
def test_all():

    logging.basicConfig(level=logging.DEBUG)
    _logger = logging.getLogger(os.path.basename(__file__))

    f0 = Objects.FileObject()
    f0.populate_from_stat(os.stat(__file__))
    _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())
Exemplo n.º 7
0
def test_solaris_ps_in_partition():
    dobj = Objects.DFXMLObject(version="1.2.0")

    psobj_outer = Objects.PartitionSystemObject()
    dobj.append(psobj_outer)

    # Add file to outer partition system.
    fobj_psobj_outer = Objects.FileObject()
    fobj_psobj_outer.alloc_inode = False
    fobj_psobj_outer.alloc_name = False
    fobj_psobj_outer.sha512 = TEST_HASH_1
    psobj_outer.append(fobj_psobj_outer)

    pobj = Objects.PartitionObject()
    psobj_outer.append(pobj)

    # Add file to partition.
    fobj_pobj = Objects.FileObject()
    fobj_pobj.alloc_inode = False
    fobj_pobj.alloc_name = False
    fobj_pobj.sha512 = TEST_HASH_2
    pobj.append(fobj_pobj)

    psobj_inner = Objects.PartitionSystemObject()
    pobj.append(psobj_inner)

    # Add file to inner partition system.
    fobj_psobj_inner = Objects.FileObject()
    fobj_psobj_inner.alloc_inode = False
    fobj_psobj_inner.alloc_name = False
    fobj_psobj_inner.sha512 = TEST_HASH_3
    psobj_inner.append(fobj_psobj_inner)

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        psobj_outer_reconst = dobj_reconst.partition_systems[0]
        pobj_reconst = psobj_outer_reconst.partitions[0]
        psobj_inner_reconst = pobj_reconst.partition_systems[0]
        assert psobj_outer_reconst.files[0].sha512 == TEST_HASH_1
        assert pobj_reconst.files[0].sha512 == TEST_HASH_2
        assert psobj_inner_reconst.files[0].sha512 == TEST_HASH_3
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 8
0
def test_all():

    _logger = logging.getLogger(os.path.basename(__file__))
    logging.basicConfig(level=logging.DEBUG)

    br1 = Objects.ByteRun(img_offset=1, len=1)
    br2 = Objects.ByteRun(img_offset=2, len=2)
    br3 = Objects.ByteRun(img_offset=4, len=3)

    dbr = Objects.ByteRuns()
    ibr = Objects.ByteRuns()
    nbr = Objects.ByteRuns()

    dbr.append(br1)
    ibr.append(br2)
    nbr.append(br3)

    dbr.facet = "data"
    ibr.facet = "inode"
    nbr.facet = "name"

    f1 = Objects.FileObject()
    f1.data_brs = dbr
    f1.inode_brs = ibr
    f1.name_brs = nbr

    assert f1.data_brs[0].img_offset == 1
    assert f1.inode_brs[0].img_offset == 2
    assert f1.name_brs[0].img_offset == 4

    e1 = f1.to_Element()
    #_logger.debug(f1)
    #_logger.debug(ET.tostring(e1))

    f2 = Objects.FileObject()

    f2.populate_from_Element(e1)
    #_logger.debug(f2)

    assert f2.data_brs[0].img_offset == 1
    assert f2.inode_brs[0].img_offset == 2
    assert f2.name_brs[0].img_offset == 4
def test_all():
    dobj = Objects.DFXMLObject(version="1.2.0")

    # Make objects for simple appends.
    diobj_0 = Objects.DiskImageObject()
    psobj_0 = Objects.PartitionSystemObject()
    pobj_0 = Objects.PartitionObject()
    vobj_0 = Objects.VolumeObject()
    vobj_0.ftype_str = "hfs"
    fobj_0 = Objects.FileObject()

    # Make objects for more exotic appends.
    psobj_1 = Objects.PartitionSystemObject()
    vobj_1 = Objects.VolumeObject()
    vobj_1.ftype_str = "hfsplus"
    fobj_dobj_1 = Objects.FileObject()
    fobj_dobj_1.alloc_inode = False
    fobj_dobj_1.alloc_name = False
    fobj_psobj_1 = Objects.FileObject()
    fobj_psobj_1.alloc_inode = False
    fobj_psobj_1.alloc_name = False
    fobj_pobj_1 = Objects.FileObject()
    fobj_pobj_1.alloc_inode = False
    fobj_pobj_1.alloc_name = False

    # Do simple appends.
    dobj.append(diobj_0)
    diobj_0.append(psobj_0)
    psobj_0.append(pobj_0)
    pobj_0.append(vobj_0)
    vobj_0.append(fobj_0)

    # Do more exotic appends.
    pobj_0.append(psobj_1)
    vobj_0.append(vobj_1)
    dobj.append(fobj_dobj_1)
    psobj_0.append(fobj_psobj_1)
    pobj_0.append(fobj_pobj_1)
Exemplo n.º 10
0
 def _worker():
     while True:
         filepath = q.get()
         if filepath is None:
             break
         try:
             fobj = filepath_to_fileobject(
                 filepath, ignore_properties=ignore_properties)
         except FileNotFoundError as e:
             fobj = Objects.FileObject()
             fobj.filename = filepath
             fobj.error = "".join(traceback.format_stack())
             if e.args:
                 fobj.error += "\n" + str(e.args)
         fileobjects_by_filepath[filepath] = fobj
         q.task_done()
Exemplo n.º 11
0
def test_hash_properties():
    dobj = Objects.DFXMLObject(version="1.2.0")

    fobj = Objects.FileObject()
    dobj.append(fobj)

    fobj.byte_runs = Objects.ByteRuns()
    br = Objects.ByteRun()
    fobj.byte_runs.append(br)

    fobj.filesize = len(TEST_BYTE_STRING)
    br.len = len(TEST_BYTE_STRING)

    hash_functions = {"md5", "sha1", "sha224", "sha256", "sha384", "sha512"}

    # Key: Hash function.
    # Value: Hash of the byte string b"test".
    hash_values = dict()

    for hash_function in sorted(hash_functions):
        hash_object = getattr(hashlib, hash_function)()
        hash_object.update(TEST_BYTE_STRING)
        hash_values[hash_function] = hash_object.hexdigest()
        _logger.debug("hash_values[%r] = %r." %
                      (hash_function, hash_values[hash_function]))

        setattr(fobj, hash_function, hash_values[hash_function])
        setattr(br, hash_function, hash_values[hash_function])

        assert getattr(fobj, hash_function) == hash_values[hash_function]
        assert getattr(br, hash_function) == hash_values[hash_function]

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        fobj_reconst = dobj_reconst.files[0]
        br_reconst = fobj_reconst.byte_runs[0]
        for hash_function in sorted(hash_functions):
            assert getattr(fobj_reconst,
                           hash_function) == hash_values[hash_function]
            assert getattr(br_reconst,
                           hash_function) == hash_values[hash_function]
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 12
0
def test_volume_error_roundtrip_with_file_and_extns():
    dobj = Objects.DFXMLObject(version="1.2.0")
    vobj = Objects.VolumeObject()
    dobj.append(vobj)

    ET.register_namespace("testextra", XMLNS_TEST_EXTRA)

    vobj.error = ERROR_STRING_V

    # Dummy up a non-DFXML namespace element.  This should be appendable.
    e = ET.Element("{%s}extra_element" % XMLNS_TEST_EXTRA)
    e.text = "Extra content"
    vobj.externals.append(e)

    # Dummy up a non-DFXML namespace 'error' element.  This should be appendable.
    e = ET.Element("{%s}error" % XMLNS_TEST_EXTRA)
    e.text = "Extra error"
    vobj.externals.append(e)

    assert vobj.error == ERROR_STRING_V

    fobj = Objects.FileObject()
    vobj.append(fobj)

    fobj.error = ERROR_STRING_F

    assert fobj.error == ERROR_STRING_F
    assert vobj.error == ERROR_STRING_V

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        vobj_reconst = dobj_reconst.volumes[0]
        fobj_reconst = vobj_reconst.files[0]
        assert vobj_reconst.error == ERROR_STRING_V
        assert fobj_reconst.error == ERROR_STRING_F
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 13
0
def _test_file_in_non_fs_levels_deep(include_disk_image,
                                     include_partition_system,
                                     include_partition, include_file_system):
    """
    This test follows a simple, vertical storage layer stack, but adds a file at each layer.
    """
    dobj = Objects.DFXMLObject(version="1.2.0")

    # Add file to top-level document.
    fobj_dobj = Objects.FileObject()
    fobj_dobj.alloc_inode = False
    fobj_dobj.alloc_name = False
    fobj_dobj.sha512 = TEST_HASH_1
    dobj.append(fobj_dobj)

    appender_stack = [dobj]

    if include_disk_image:
        # Add disk image to top-level document.
        diobj = Objects.DiskImageObject()
        appender_stack[-1].append(diobj)
        appender_stack.append(diobj)

        # Add file to disk image.
        fobj_diobj = Objects.FileObject()
        fobj_diobj.alloc_inode = False
        fobj_diobj.alloc_name = False
        fobj_diobj.sha512 = TEST_HASH_2
        diobj.append(fobj_diobj)

    if include_partition_system:
        # Add partition system to disk image.
        psobj = Objects.PartitionSystemObject()
        appender_stack[-1].append(psobj)
        appender_stack.append(psobj)

        # Add file to partition system.
        fobj_psobj = Objects.FileObject()
        fobj_psobj.alloc_inode = False
        fobj_psobj.alloc_name = False
        fobj_psobj.sha512 = TEST_HASH_3
        psobj.append(fobj_psobj)

    if include_partition:
        # Add partition to partition system, but not disk image.
        if not (include_disk_image and not include_partition_system):
            pobj = Objects.PartitionObject()
            appender_stack[-1].append(pobj)
            appender_stack.append(pobj)

            # Add file to partition.
            fobj_pobj = Objects.FileObject()
            fobj_pobj.alloc_inode = False
            fobj_pobj.alloc_name = False
            fobj_pobj.sha512 = TEST_HASH_4
            pobj.append(fobj_pobj)

    if include_file_system:
        # Add file system to anything but a partition system.
        if not (include_partition_system and not include_partition):
            vobj = Objects.VolumeObject()
            appender_stack[-1].append(vobj)
            appender_stack.append(vobj)

            # Add file to file system.
            fobj_vobj = Objects.FileObject()
            fobj_vobj.sha512 = TEST_HASH_5
            vobj.append(fobj_vobj)

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        container_stack = [dobj_reconst]
        assert dobj_reconst.files[0].sha512 == TEST_HASH_1

        if include_disk_image:
            diobj_reconst = container_stack[-1].disk_images[0]
            container_stack.append(diobj_reconst)
            assert diobj_reconst.files[0].sha512 == TEST_HASH_2

        if include_partition_system:
            psobj_reconst = container_stack[-1].partition_systems[0]
            container_stack.append(psobj_reconst)
            assert psobj_reconst.files[0].sha512 == TEST_HASH_3

        if include_partition:
            if not (include_disk_image and not include_partition_system):
                pobj_reconst = container_stack[-1].partitions[0]
                container_stack.append(pobj_reconst)
                assert pobj_reconst.files[0].sha512 == TEST_HASH_4

        if include_file_system:
            if not (include_partition_system and not include_partition):
                vobj_reconst = container_stack[-1].volumes[0]
                assert vobj_reconst.files[0].sha512 == TEST_HASH_5
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
def test_all():

    _logger = logging.getLogger(os.path.basename(__file__))
    logging.basicConfig(level=logging.DEBUG)

    XMLNS_TEST_CLAMSCAN = "file:///opt/local/bin/clamscan"
    XMLNS_TEST_UNREGGED = "file:///dev/random"

    ET.register_namespace("clam", XMLNS_TEST_CLAMSCAN)

    fi = Objects.FileObject()
    fi.filename = "clamscanned"

    #Try and fail to add a non-Element to the list.
    failed = None
    _logger.debug("Before:  " + repr(fi.externals))
    try:
        fi.externals.append(1)
        failed = False
    except TypeError:
        failed = True
    except:
        failed = True
        raise
    _logger.debug("After:  " + repr(fi.externals))
    assert failed
    failed = None

    #Dummy up a non-DFXML namespace element.  This should be appendable.
    e = ET.Element("{%s}scan_results" % XMLNS_TEST_CLAMSCAN)
    e.text = "Clean"
    fi.externals.append(e)

    #Dummy up a DFXML namespace element.  This should not be appendable (the schema specifies other namespaces).
    e = ET.Element("{%s}filename" % Objects.dfxml.XMLNS_DFXML)
    e.text = "Superfluous name"
    _logger.debug("Before:  " + repr(fi.externals))
    try:
        fi.externals.append(e)
        failed = False
    except ValueError:
        failed = True
    except:
        failed = True
        raise
    _logger.debug("After:  " + repr(fi.externals))
    assert failed
    failed = None

    #Add an element with the colon prefix style
    e = ET.Element("clam:version")
    e.text = "20140101"
    fi.externals.append(e)

    #Add an element that doesn't have an ET-registered namespace prefix.
    e = ET.Element("{%s}test2" % XMLNS_TEST_UNREGGED)
    e.text = "yes"
    fi.externals.append(e)

    #Test serialization
    s = Objects._ET_tostring(fi.to_Element(
    ))  #TODO Maybe this should be more than an internal function.
    _logger.debug(s)
    if s.find("scan_results") == -1:
        raise ValueError(
            "Serialization did not output other-namespace element 'scan_results'."
        )
    if s.find("clam:version") == -1:
        raise ValueError(
            "Serialization did not output prefixed element 'clam:version'.")
    if s.find("test2") == -1:
        raise ValueError(
            "Serialization did not output unregistered-prefix element 'test2'."
        )

    #Test de-serialization
    fir = Objects.FileObject()
    x = ET.XML(s)
    fir.populate_from_Element(x)
    _logger.debug("De-serialized: %r." % fir.externals)
    assert len(fir.externals) == 3
Exemplo n.º 15
0
def _test_file_in_non_fs_levels_flat(include_disk_image,
                                     include_partition_system,
                                     include_partition, include_file_system):
    """
    This test follows a simple, horizontal storage layer stack (every container attached to top document object), and adds a file for each container.
    """
    dobj = Objects.DFXMLObject(version="1.2.0")

    # Add file to top-level document.
    fobj_dobj = Objects.FileObject()
    fobj_dobj.alloc_inode = False
    fobj_dobj.alloc_name = False
    fobj_dobj.sha512 = TEST_HASH_1
    dobj.append(fobj_dobj)

    if include_disk_image:
        # Add disk image.
        diobj = Objects.DiskImageObject()
        dobj.append(diobj)

        # Add file to disk image.
        fobj_diobj = Objects.FileObject()
        fobj_diobj.alloc_inode = False
        fobj_diobj.alloc_name = False
        fobj_diobj.sha512 = TEST_HASH_2
        diobj.append(fobj_diobj)

    if include_partition_system:
        # Add partition system.
        psobj = Objects.PartitionSystemObject()
        dobj.append(psobj)

        # Add file to partition system.
        fobj_psobj = Objects.FileObject()
        fobj_psobj.alloc_inode = False
        fobj_psobj.alloc_name = False
        fobj_psobj.sha512 = TEST_HASH_3
        psobj.append(fobj_psobj)

    if include_partition:
        # Add partition.
        pobj = Objects.PartitionObject()
        dobj.append(pobj)

        # Add file to partition.
        fobj_pobj = Objects.FileObject()
        fobj_pobj.alloc_inode = False
        fobj_pobj.alloc_name = False
        fobj_pobj.sha512 = TEST_HASH_4
        pobj.append(fobj_pobj)

    if include_file_system:
        # Add file system.
        vobj = Objects.VolumeObject()
        dobj.append(vobj)

        # Add file to file system.
        fobj_vobj = Objects.FileObject()
        fobj_vobj.sha512 = TEST_HASH_5
        vobj.append(fobj_vobj)

    # Do file I/O round trip.
    (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
    try:
        assert dobj_reconst.files[0].sha512 == TEST_HASH_1

        if include_disk_image:
            diobj_reconst = dobj_reconst.disk_images[0]
            assert diobj_reconst.files[0].sha512 == TEST_HASH_2

        if include_partition_system:
            psobj_reconst = dobj_reconst.partition_systems[0]
            assert psobj_reconst.files[0].sha512 == TEST_HASH_3

        if include_partition:
            pobj_reconst = dobj_reconst.partitions[0]
            assert pobj_reconst.files[0].sha512 == TEST_HASH_4

        if include_file_system:
            vobj_reconst = dobj_reconst.volumes[0]
            assert vobj_reconst.files[0].sha512 == TEST_HASH_5
    except:
        _logger.debug("tmp_filename = %r." % tmp_filename)
        raise
    os.remove(tmp_filename)
Exemplo n.º 16
0
def test_all():
    logging.basicConfig(level=logging.DEBUG)
    _logger = logging.getLogger(os.path.basename(__file__))

    f0 = Objects.FileObject()

    fo = Objects.FileObject()
    pfo = Objects.FileObject()
    pfo.inode = 234
    f0.parent_object = pfo
    f0.filename = "test file"
    f0.error = "Neither a real file, nor real error"
    f0.partition = 2
    f0.id = 235
    f0.name_type = "r"
    f0.filesize = 1234
    f0.unalloc = 0
    f0.unused = 0
    f0.orphan = 0
    f0.compressed = 1
    f0.inode = 6543
    f0.libmagic = "data"
    f0.meta_type = 8
    f0.mode = 755
    f0.nlink = 1
    f0.uid = "S-1-234-etc"
    f0.gid = "S-2-234-etc"
    f0.mtime = "1999-12-31T12:34:56Z"
    f0.ctime = "1998-12-31T12:34:56Z"
    f0.atime = "1997-12-31T12:34:56Z"
    f0.crtime = "1996-12-31T12:34:56Z"
    f0.seq = 3
    f0.dtime = "1995-12-31T12:34:56Z"
    f0.bkup_time = "1994-12-31T12:34:56Z"
    f0.link_target = "Nonexistent file"
    f0.libmagic = "Some kind of compressed"
    f0.md5 = "db72d20e83d0ae39771403bc4cdde040"
    f0.sha1 = "866e1f426b2380aaf74a091aa0f39f62ae8a2de7"
    f0.sha256 = "4bc5996997ab9196b2d998b05ef302ed1dc167d74ec881533ee35008b5168630"
    f0.sha384 = "2ec378692eeae4b855f58832664f95bb85411caac8dcebe7cd3916e915559d3f0ccda688a1fad1e3f47801fe15298ac0"
    #fo.brs = brs #TODO
    _logger.debug("f0 = %r" % f0)
    _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())

    e0 = f0.to_Element()
    _logger.debug("e0 = %r" % e0)

    #f1 = eval(repr(f0)) #TODO The recursive evals cause namespace confusion (Objects.foo); replace the next two lines when that's settled.
    f1 = Objects.FileObject()
    f1.populate_from_Element(e0)

    f2 = Objects.FileObject()
    f2.populate_from_Element(e0)

    #The id property should not be included in the comparisons
    f1.id = 111
    f1.alloc = False

    f2.mtime = "2999-12-31T12:34:56Z"
    f2.md5 = "593c8fe4a2236f3eeba7f4577b663876"
    f2.sha1 = "0c0c20c03bdb8913da8ea120bd59ba5f596deceb"
    f2.sha256 = "4f6dcb46e0f7b0ad748d083f6e92d7df586d0298a94acc3795287ff156614540"
    f2.sha384 = "2af87ca47d01989009caf3927a84be215528a53629dd935a828921ac0a4b22202bcba20d38fdd16d719b8c4241fcdacb"

    _logger.debug("f1 = %r" % f1)
    d01 = f0.compare_to_other(f1)
    _logger.debug("d01 = %r" % d01)
    assert d01 == set(["alloc"]) or d01 == set(["alloc", "unalloc"])

    d02 = f0.compare_to_other(f2)

    _logger.debug("d02 = %r" % d02)
    assert d02 == set(["mtime", "md5", "sha1", "sha256", "sha384"])

    f2.original_fileobject = f0
    f2.compare_to_original()
    _logger.debug("f2.diffs = %r" % f2.diffs)
    assert f2.diffs == d02
Exemplo n.º 17
0
def extract_files(image_path,
                  outdir,
                  dfxml_path=None,
                  file_predicate=is_file,
                  file_name=name_with_part_path,
                  dry_run=None,
                  out_manifest_path=None,
                  err_manifest_path=None,
                  keep_going=False):
    """
    @param file_name Unary function.  Takes a Objects.FileObject; returns the file path to which this file will be extracted, relative to outdir.  So, if outdir="extraction" and the name_with_part_path function of this module is used, the file "/Users/Administrator/ntuser.dat" in partition 1 will be extracted to "extraction/partition_1/Users/Administrator/ntuser.dat".
    """

    extraction_byte_tally = 0

    _path_for_iterparse = dfxml_path or image_path

    #Set up base manifest to track extracted files
    base_manifest = Objects.DFXMLObject(version="1.2.0")
    base_manifest.program = sys.argv[0]
    if sys.argv[0] == os.path.basename(__file__):
        base_manifest.program_version = __version__
        #Otherwise, this DFXMLObject would need to be passed back to the calling function.
    base_manifest.command_line = " ".join(sys.argv)
    base_manifest.add_namespace("extractor", XMLNS_EXTRACTOR)
    base_manifest.add_namespace("delta", dfxml.XMLNS_DELTA)
    base_manifest.sources.append(image_path)
    if dfxml_path:
        base_manifest.sources.append(dfxml_path)
    base_manifest.add_creator_library("Python", ".".join(
        map(str, sys.version_info[0:3])
    ))  #A bit of a bend, but gets the major version information out.
    base_manifest.add_creator_library("Objects.py", Objects.__version__)
    base_manifest.add_creator_library("dfxml.py", Objects.dfxml.__version__)

    #Clone base manifest to all-files' manifest and errors-only manifest
    out_manifest = None
    if out_manifest_path:
        out_manifest = copy.deepcopy(base_manifest)
    err_manifest = None
    if err_manifest_path:
        err_manifest = copy.deepcopy(base_manifest)

    for (event, obj) in Objects.iterparse(_path_for_iterparse):
        #Absolute prerequisites:
        if not isinstance(obj, Objects.FileObject):
            continue

        #Invoker prerequisites
        if not file_predicate(obj):
            continue

        extraction_entry = Objects.FileObject()
        extraction_entry.original_fileobject = obj

        #Construct path where the file will be extracted
        extraction_write_path = os.path.join(outdir, file_name(obj))

        #Extract idempotently
        if os.path.exists(extraction_write_path):
            _logger.debug(
                "Skipping already-extracted file: %r.  Extraction path already exists: %r."
                % (obj.filename, extraction_write_path))
            continue

        extraction_entry.filename = extraction_write_path

        #Set up checksum verifier
        checker = None
        checked_byte_tally = 0
        if obj.sha1:
            checker = hashlib.sha1()

        extraction_byte_tally += obj.filesize

        any_error = None
        tsk_error = None
        if not dry_run:
            extraction_write_dir = os.path.dirname(extraction_write_path)
            if not os.path.exists(extraction_write_dir):
                os.makedirs(extraction_write_dir)
            _logger.debug("Extracting to: %r." % extraction_write_path)
            with open(extraction_write_path, "wb") as extraction_write_fh:
                try:
                    for chunk in obj.extract_facet("content", image_path):
                        if checker:
                            checker.update(chunk)
                        checked_byte_tally += len(chunk)
                        extraction_write_fh.write(chunk)

                    if checked_byte_tally != obj.filesize:
                        any_error = True
                        extraction_entry.filesize = checked_byte_tally
                        extraction_entry.diffs.add("filesize")
                        _logger.error("File size mismatch on %r." %
                                      obj.filename)
                        _logger.info("Recorded filesize = %r" % obj.filesize)
                        _logger.info("Extracted bytes   = %r" %
                                     checked_byte_tally)
                    if checker and (obj.sha1 != checker.hexdigest()):
                        any_error = True
                        extraction_entry.sha1 = checker.hexdigest()
                        extraction_entry.diffs.add("sha1")
                        _logger.error("Hash mismatch on %r." % obj.filename)
                        _logger.info("Recorded SHA-1 = %r" % obj.sha1)
                        _logger.info("Computed SHA-1 = %r" %
                                     checker.hexdigest())
                        #_logger.debug("File object: %r." % obj)
                except Exception as e:
                    any_error = True
                    tsk_error = True
                    extraction_entry.error = "".join(traceback.format_stack())
                    if e.args:
                        extraction_entry.error += "\n" + str(e.args)
        if out_manifest:
            out_manifest.append(extraction_entry)
        if err_manifest and any_error:
            err_manifest.append(extraction_entry)
        if tsk_error and not keep_going:
            _logger.warning(
                "Terminating extraction loop early, due to encountered error.")
            break

    #Report
    _logger.info("Estimated extraction: %d bytes." % extraction_byte_tally)
    if not out_manifest is None:
        with open(out_manifest_path, "w") as out_manifest_fh:
            out_manifest.print_dfxml(out_manifest_fh)
    if not err_manifest is None:
        tally = 0
        for obj in err_manifest:
            if isinstance(obj, Objects.FileObject):
                tally += 1
        _logger.info("Encountered errors extracting %d files." % tally)
        with open(err_manifest_path, "w") as err_manifest_fh:
            err_manifest.print_dfxml(err_manifest_fh)
Exemplo n.º 18
0
def filepath_to_fileobject(filepath, **kwargs):
    """
    Optional arguments:
    * ignore_properties - dictionary of property names to exclude from FileObject.
    """
    global walk_default_hashes
    fobj = Objects.FileObject()

    ignore_properties = kwargs.get("ignore_properties", dict())
    #_logger.debug("ignore_properties = %r." % ignore_properties)

    #Determine type - done in three steps.
    if os.path.islink(filepath):
        name_type = "l"
    elif os.path.isdir(filepath):
        name_type = "d"
    elif os.path.isfile(filepath):
        name_type = "r"
    else:
        #Nop. Need to finish type determinations with stat structure.
        name_type = None

    # Retrieve stat struct for file to finish determining name type, and later to populate properties.
    if name_type == "l":
        sobj = os.lstat(filepath)
    else:
        sobj = os.stat(filepath)
    #_logger.debug(sobj)

    if name_type is None:
        if stat.S_ISCHR(sobj.st_mode):
            name_type = "c"
        elif stat.S_ISBLK(sobj.st_mode):
            name_type = "b"
        elif stat.S_ISFIFO(sobj.st_mode):
            name_type = "p"
        elif stat.S_ISSOCK(sobj.st_mode):
            name_type = "s"
        elif stat.S_ISWHT(sobj.st_mode):
            name_type = "w"
        else:
            raise NotImplementedError(
                "No reporting check written for file type of %r." % filepath)

    _should_ignore = lambda x: Objects.FileObject._should_ignore_property(
        ignore_properties, name_type, x)

    if not _should_ignore("name_type"):
        fobj.name_type = name_type

    #Prime fileobjects from Stat data (lstat for soft links).
    fobj.populate_from_stat(sobj,
                            ignore_properties=ignore_properties,
                            name_type=name_type)

    #Hard-coded information: Name, and assumed allocation status.
    if not _should_ignore("filename"):
        fobj.filename = filepath
    if not _should_ignore("alloc"):
        fobj.alloc = True

    if not _should_ignore("link_target"):
        if name_type == "l":
            fobj.link_target = os.readlink(filepath)

    #Add hashes for (mostly regular) files.
    if name_type in ["-", "r", "v"]:
        # Check total OR
        if functools.reduce(
                lambda y, z: y or z,
                map(lambda x: not _should_ignore(x), walk_default_hashes)):
            try:
                with open(filepath, "rb") as in_fh:
                    chunk_size = 2**22
                    md5obj = hashlib.md5()
                    sha1obj = hashlib.sha1()
                    sha224obj = hashlib.sha224()
                    sha256obj = hashlib.sha256()
                    sha384obj = hashlib.sha384()
                    sha512obj = hashlib.sha512()
                    any_error = False
                    while True:
                        buf = b""
                        try:
                            buf = in_fh.read(chunk_size)
                        except Exception as e:
                            any_error = True
                            if not _should_ignore("error"):
                                fobj.error = "".join(traceback.format_stack())
                                if e.args:
                                    fobj.error += "\n" + str(e.args)
                            buf = b""
                        if buf == b"":
                            break

                        if not _should_ignore("md5"):
                            md5obj.update(buf)
                        if not _should_ignore("sha1"):
                            sha1obj.update(buf)
                        if not _should_ignore("sha224"):
                            sha224obj.update(buf)
                        if not _should_ignore("sha256"):
                            sha256obj.update(buf)
                        if not _should_ignore("sha384"):
                            sha384obj.update(buf)
                        if not _should_ignore("sha512"):
                            sha512obj.update(buf)

                    if not any_error:
                        if not _should_ignore("md5"):
                            fobj.md5 = md5obj.hexdigest()
                        if not _should_ignore("sha1"):
                            fobj.sha1 = sha1obj.hexdigest()
                        if not _should_ignore("sha224"):
                            fobj.sha224 = sha224obj.hexdigest()
                        if not _should_ignore("sha256"):
                            fobj.sha256 = sha256obj.hexdigest()
                        if not _should_ignore("sha384"):
                            fobj.sha384 = sha384obj.hexdigest()
                        if not _should_ignore("sha512"):
                            fobj.sha512 = sha512obj.hexdigest()
            except Exception as e:
                if not _should_ignore("error"):
                    if fobj.error is None:
                        fobj.error = ""
                    else:
                        fobj.error += "\n"
                    fobj.error += "".join(traceback.format_stack())
                    if e.args:
                        fobj.error += "\n" + str(e.args)
    return fobj
Exemplo n.º 19
0
def make_differential_dfxml(pre, post, **kwargs):
    """
    Takes as input two paths to DFXML files.  Returns a DFXMLObject.
    @param pre String.
    @param post String.
    @param diff_mode Optional.  One of "all" or "idifference".
    @param retain_unchanged Optional.  Boolean.
    @param ignore_properties Optional.  Set.
    @param annotate_matches Optional.  Boolean.  True -> matched file objects get a "delta:matched='1'" attribute.
    @param rename_requires_hash Optional.  Boolean.  True -> all matches require matching SHA-1's, if present.
    @param ignore_filename_function Optional.  Function, string -> Boolean.  Returns True if a file name (which can be null) should be ignored.
    @param glom_byte_runs Optional.  Boolean.  Joins contiguous-region byte runs together in FileObject byte run lists.
    """

    diff_mode = kwargs.get("diff_mode", "all")
    retain_unchanged = kwargs.get("retain_unchanged", False)
    ignore_properties = kwargs.get("ignore_properties", set())
    annotate_matches = kwargs.get("annotate_matches", False)
    rename_requires_hash = kwargs.get("rename_requires_hash", False)
    ignore_filename_function = kwargs.get("ignore_filename_function",
                                          ignorable_name)
    glom_byte_runs = kwargs.get("glom_byte_runs", False)

    _expected_diff_modes = ["all", "idifference"]
    if diff_mode not in _expected_diff_modes:
        raise ValueError("Differencing mode should be in: %r." %
                         _expected_diff_modes)
    diff_mask_set = set()

    if diff_mode == "idifference":
        diff_mask_set |= set([
            "atime", "byte_runs", "crtime", "ctime", "filename", "filesize",
            "md5", "mtime", "sha1"
        ])
    _logger.debug("diff_mask_set = " + repr(diff_mask_set))

    #d: The container DFXMLObject, ultimately returned.
    d = Objects.DFXMLObject(version="1.2.0")
    if sys.argv[0] == os.path.basename(__file__):
        d.program = sys.argv[0]
        d.program_version = __version__
    d.command_line = " ".join(sys.argv)
    d.add_namespace("delta", dfxml.XMLNS_DELTA)
    d.dc["type"] = "Disk image difference set"
    d.add_creator_library("Python", ".".join(
        map(str, sys.version_info[0:3]
            )))  #A bit of a bend, but gets the major version information out.
    d.add_creator_library("Objects.py", Objects.__version__)
    d.add_creator_library("dfxml.py", Objects.dfxml.__version__)

    d.diff_file_ignores |= ignore_properties
    _logger.debug("d.diff_file_ignores = " + repr(d.diff_file_ignores))

    #The list most of this function is spent on building
    fileobjects_changed = []

    #Unmodified files; only retained if requested.
    fileobjects_unchanged = []

    #Key: (partition, inode, filename); value: FileObject
    old_fis = None
    new_fis = None

    #Key: (partition, inode, filename); value: FileObject list
    old_fis_unalloc = None
    new_fis_unalloc = None

    #Key: Partition byte offset within the disk image, paired with the file system type
    #Value: VolumeObject
    old_volumes = None
    new_volumes = None
    matched_volumes = dict()

    #Populated in distinct (offset, file system type as string) encounter order
    volumes_encounter_order = dict()

    for infile in [pre, post]:

        _logger.debug("infile = %r" % infile)
        old_fis = new_fis
        new_fis = dict()

        old_volumes = new_volumes
        new_volumes = dict()
        #Fold in the matched volumes - we're just discarding the deleted volumes
        for k in matched_volumes:
            old_volumes[k] = matched_volumes[k]
        matched_volumes = dict()

        old_fis_unalloc = new_fis_unalloc
        new_fis_unalloc = collections.defaultdict(list)

        d.sources.append(infile)

        for (i, (event, new_obj)) in enumerate(Objects.iterparse(infile)):
            if isinstance(new_obj, Objects.DFXMLObject):
                #Inherit desired properties from the source DFXMLObject.

                #Inherit namespaces
                for (prefix, url) in new_obj.iter_namespaces():
                    d.add_namespace(prefix, url)

                continue
            elif isinstance(new_obj, Objects.VolumeObject):
                if event == "end":
                    #This algorithm doesn't yet need to know when a volume is concluded.  On to the next object.
                    continue

                offset = new_obj.partition_offset
                if offset is None:
                    raise AttributeError(
                        "To perform differencing with volumes, the <volume> elements must have a <partition_offset>.  Either re-generate your DFXML with partition offsets, or run this program again with the --ignore-volumes flag."
                    )

                #Use the lower-case volume spelling
                ftype_str = _lower_ftype_str(new_obj)

                #Re-capping the general differential analysis algorithm:
                #0. If the volume is in the new list, something's gone wrong.
                if (offset, ftype_str) in new_volumes:
                    _logger.debug("new_obj.partition_offset = %r." % offset)
                    _logger.warning(
                        "Encountered a volume that starts at an offset as another volume, in the same disk image.  This analysis is based on the assumption that that doesn't happen.  Check results that depend on partition mappings."
                    )

                #1. If the volume is in the old list, pop it out of the old list - it's matched.
                if old_volumes and (offset, ftype_str) in old_volumes:
                    _logger.debug(
                        "Found a volume in post image, at offset %r." % offset)
                    old_obj = old_volumes.pop((offset, ftype_str))
                    new_obj.original_volume = old_obj
                    new_obj.compare_to_original()
                    matched_volumes[(offset, ftype_str)] = new_obj

                #2. If the volume is NOT in the old list, add it to the new list.
                else:
                    _logger.debug("Found a new volume, at offset %r." % offset)
                    new_volumes[(offset, ftype_str)] = new_obj
                    volumes_encounter_order[(
                        offset, ftype_str)] = len(new_volumes) + (
                            (old_volumes and len(old_volumes))
                            or 0) + len(matched_volumes)

                #3. Afterwards, the old list contains deleted volumes.

                #Record the ID
                new_obj.id = volumes_encounter_order[(offset, ftype_str)]

                #Move on to the next object
                continue
            elif not isinstance(new_obj, Objects.FileObject):
                #The rest of this loop compares only file objects.
                continue

            if ignore_filename_function(new_obj.filename):
                continue

            #Simplify byte runs if requested
            if glom_byte_runs:
                if new_obj.byte_runs:
                    temp_byte_runs = Objects.ByteRuns()
                    for run in new_obj.byte_runs:
                        temp_byte_runs.glom(run)
                    new_obj.byte_runs = temp_byte_runs

            #Normalize the partition number
            if new_obj.volume_object is None:
                new_obj.partition = None
            else:
                vo = new_obj.volume_object
                fts = _lower_ftype_str(vo)
                new_obj.partition = volumes_encounter_order[(
                    vo.partition_offset, fts)]

            #Define the identity key of this file -- affected by the --ignore argument
            _key_partition = None if "partition" in ignore_properties else new_obj.partition
            _key_inode = None if "inode" in ignore_properties else new_obj.inode
            _key_filename = None if "filename" in ignore_properties else new_obj.filename
            key = (_key_partition, _key_inode, _key_filename)

            #Ignore unallocated content comparisons until a later loop.  The unique identification of deleted files needs a little more to work.
            if not new_obj.alloc:
                new_fis_unalloc[key].append(new_obj)
                continue

            #The rest of this loop is irrelevant until the second DFXML file.
            if old_fis is None:
                new_fis[key] = new_obj
                continue

            if key in old_fis:
                #Extract the old fileobject and check for changes
                old_obj = old_fis.pop(key)
                new_obj.original_fileobject = old_obj
                new_obj.compare_to_original(file_ignores=d.diff_file_ignores)

                #_logger.debug("Diffs: %r." % _diffs)
                _diffs = new_obj.diffs - d.diff_file_ignores
                #_logger.debug("Diffs after ignore-set: %r." % _diffs)
                if diff_mask_set:
                    _diffs &= diff_mask_set
                    #_logger.debug("Diffs after mask-set: %r." % _diffs)

                if len(_diffs) > 0:
                    #_logger.debug("Remaining diffs: " + repr(_diffs))
                    fileobjects_changed.append(new_obj)
                else:
                    #Unmodified file; only keep if requested.
                    if retain_unchanged:
                        fileobjects_unchanged.append(new_obj)
            else:
                #Store the new object
                new_fis[key] = new_obj

        #The rest of the files loop is irrelevant until the second file.
        if old_fis is None:
            continue

        _logger.debug("len(old_fis) = %d" % len(old_fis))
        _logger.debug("len(old_fis_unalloc) = %d" % len(old_fis_unalloc))
        _logger.debug("len(new_fis) = %d" % len(new_fis))
        _logger.debug("len(new_fis_unalloc) = %d" % len(new_fis_unalloc))
        _logger.debug("len(fileobjects_changed) = %d" %
                      len(fileobjects_changed))

        #Identify renames - only possible if 1-to-1.  Many-to-many renames are just left as new and deleted files.
        _logger.debug("Detecting renames...")
        fileobjects_renamed = []

        def _make_name_map(d):
            """Returns a dictionary, mapping (partition, inode) -> {filename}."""
            retdict = collections.defaultdict(lambda: set())
            for (partition, inode, filename) in d.keys():
                retdict[(partition, inode)].add(filename)
            return retdict

        old_inode_names = _make_name_map(old_fis)
        new_inode_names = _make_name_map(new_fis)
        for key in new_inode_names.keys():
            (partition, inode) = key

            if len(new_inode_names[key]) != 1:
                continue
            if not key in old_inode_names:
                continue
            if len(old_inode_names[key]) != 1:
                continue
            if rename_requires_hash:
                #Peek at the set elements by doing a quite-ephemeral list cast
                old_obj = old_fis[(partition, inode,
                                   list(old_inode_names[key])[0])]
                new_obj = new_fis[(partition, inode,
                                   list(new_inode_names[key])[0])]
                if old_obj.sha1 != new_obj.sha1:
                    continue

            #Found a match if we're at this point in the loop
            old_name = old_inode_names[key].pop()
            new_name = new_inode_names[key].pop()
            old_obj = old_fis.pop((partition, inode, old_name))
            new_obj = new_fis.pop((partition, inode, new_name))
            new_obj.original_fileobject = old_obj
            new_obj.compare_to_original(file_ignores=d.diff_file_ignores)
            fileobjects_renamed.append(new_obj)
        _logger.debug("len(old_fis) -> %d" % len(old_fis))
        _logger.debug("len(new_fis) -> %d" % len(new_fis))
        _logger.debug("len(fileobjects_changed) -> %d" %
                      len(fileobjects_changed))
        _logger.debug("len(fileobjects_renamed) = %d" %
                      len(fileobjects_renamed))

        #Identify files that just changed inode number - basically, doing the rename detection again
        _logger.debug("Detecting inode number changes...")

        def _make_inode_map(d):
            """Returns a dictionary, mapping (partition, filename) -> inode."""
            retdict = dict()
            for (partition, inode, filename) in d.keys():
                if (partition, filename) in retdict:
                    _logger.warning(
                        "Multiple instances of the file path %r were found in partition %r; this violates an assumption of this program, that paths are unique within partitions."
                        % (filename, partition))
                retdict[(partition, filename)] = inode
            return retdict

        old_name_inodes = _make_inode_map(old_fis)
        new_name_inodes = _make_inode_map(new_fis)
        for key in new_name_inodes.keys():
            if not key in old_name_inodes:
                continue
            (partition, name) = key
            old_obj = old_fis.pop((partition, old_name_inodes[key], name))
            new_obj = new_fis.pop((partition, new_name_inodes[key], name))
            new_obj.original_fileobject = old_obj
            #TODO Test for what chaos ensues when filename is in the ignore list.
            new_obj.compare_to_original(file_ignores=d.diff_file_ignores)
            fileobjects_changed.append(new_obj)
        _logger.debug("len(old_fis) -> %d" % len(old_fis))
        _logger.debug("len(new_fis) -> %d" % len(new_fis))
        _logger.debug("len(fileobjects_changed) -> %d" %
                      len(fileobjects_changed))
        #And that's the end of the allocated-only, per-volume analysis.

        #We may be able to match files that aren't allocated against files we think are deleted
        _logger.debug("Detecting modifications from unallocated files...")
        fileobjects_deleted = []
        for key in new_fis_unalloc:
            #1 partition; 1 inode number; 1 name, repeated:  Too ambiguous to compare.
            if len(new_fis_unalloc[key]) != 1:
                continue

            if key in old_fis_unalloc:
                if len(old_fis_unalloc[key]) == 1:
                    #The file was unallocated in the previous image, too.
                    old_obj = old_fis_unalloc[key].pop()
                    new_obj = new_fis_unalloc[key].pop()
                    new_obj.original_fileobject = old_obj
                    new_obj.compare_to_original(
                        file_ignores=d.diff_file_ignores)
                    #The file might not have changed.  It's interesting if it did, though.

                    _diffs = new_obj.diffs - diff_mask_set
                    #_logger.debug("Diffs: %r." % _diffs)
                    if diff_mask_set:
                        _diffs &= diff_mask_set
                        #_logger.debug("Diffs after mask-set: %r." % _diffs)
                    if len(_diffs) > 0:
                        _logger.debug("Remaining diffs: " + repr(_diffs))
                        fileobjects_changed.append(new_obj)
                    elif retain_unchanged:
                        fileobjects_unchanged.append(new_obj)
            elif key in old_fis:
                #Identified a deletion.
                old_obj = old_fis.pop(key)
                new_obj = new_fis_unalloc[key].pop()
                new_obj.original_fileobject = old_obj
                new_obj.compare_to_original(file_ignores=d.diff_file_ignores)
                fileobjects_deleted.append(new_obj)
        _logger.debug("len(old_fis) -> %d" % len(old_fis))
        _logger.debug("len(old_fis_unalloc) -> %d" % len(old_fis_unalloc))
        _logger.debug("len(new_fis) -> %d" % len(new_fis))
        _logger.debug("len(new_fis_unalloc) -> %d" % len(new_fis_unalloc))
        _logger.debug("len(fileobjects_changed) -> %d" %
                      len(fileobjects_changed))
        _logger.debug("len(fileobjects_deleted) -> %d" %
                      len(fileobjects_deleted))

        #After deletion matching is performed, one might want to look for files migrating to other partitions.
        #However, since between-volume migration creates a new deleted file, this algorithm instead ignores partition migrations.
        #AJN TODO Thinking about it a little more, I can't suss out a reason against trying this match.  It's complicated if we try looking for reallocations in new_fis, strictly from new_fis_unalloc.

        #TODO We might also want to match the unallocated objects based on metadata addresses.  Unfortunately, that requires implementation of additional byte runs, which hasn't been fully designed yet in the DFXML schema.

        #Begin output.
        #First, annotate the volume objects.
        for key in new_volumes:
            v = new_volumes[key]
            v.annos.add("new")
        for key in old_volumes:
            v = old_volumes[key]
            v.annos.add("deleted")
        for key in matched_volumes:
            v = matched_volumes[key]
            if len(v.diffs) > 0:
                v.annos.add("modified")

        #Build list of FileObject appenders, child volumes of the DFXML Document.
        #Key: Partition number, or None
        #Value: Reference to the VolumeObject corresponding with that partition number.  None -> the DFXMLObject.
        appenders = dict()
        for volume_dict in [new_volumes, matched_volumes, old_volumes]:
            for (offset, ftype_str) in volume_dict:
                veo = volumes_encounter_order[(offset, ftype_str)]
                if veo in appenders:
                    raise ValueError(
                        "This pair is already in the appenders dictionary, which was supposed to be distinct: "
                        + repr((offset, ftype_str)) + ", encounter order " +
                        str(veo) + ".")
                v = volume_dict[(offset, ftype_str)]
                appenders[veo] = v
                d.append(v)

        #Add in the default appender, the DFXML Document itself.
        appenders[None] = d

        #A file should only be considered "modified" if its contents have changed.
        content_diffs = set(["md5", "sha1", "sha256"])

        def _maybe_match_attr(obj):
            """Just adds the 'matched' annotation when called."""
            if annotate_matches:
                obj.annos.add("matched")

        #Populate DFXMLObject.
        for key in new_fis:
            #TODO If this script ever does a series of >2 DFXML files, these diff additions need to be removed for the next round.
            fi = new_fis[key]
            fi.annos.add("new")
            appenders[fi.partition].append(fi)
        for key in new_fis_unalloc:
            for fi in new_fis_unalloc[key]:
                fi.annos.add("new")
                appenders[fi.partition].append(fi)
        for fi in fileobjects_deleted:
            #Independently flag for name, content, and metadata modifications
            if len(fi.diffs - content_diffs) > 0:
                fi.annos.add("changed")
            if len(content_diffs.intersection(fi.diffs)) > 0:
                fi.annos.add("modified")
            if "filename" in fi.diffs:
                fi.annos.add("renamed")
            fi.annos.add("deleted")
            _maybe_match_attr(fi)
            appenders[fi.partition].append(fi)
        for key in old_fis:
            ofi = old_fis[key]
            nfi = Objects.FileObject()
            nfi.original_fileobject = ofi
            nfi.annos.add("deleted")
            appenders[ofi.partition].append(nfi)
        for key in old_fis_unalloc:
            for ofi in old_fis_unalloc[key]:
                nfi = Objects.FileObject()
                nfi.original_fileobject = ofi
                nfi.annos.add("deleted")
                appenders[ofi.partition].append(nfi)
        for fi in fileobjects_renamed:
            #Independently flag for content and metadata modifications
            if len(content_diffs.intersection(fi.diffs)) > 0:
                fi.annos.add("modified")
            if len(fi.diffs - content_diffs) > 0:
                fi.annos.add("changed")
            fi.annos.add("renamed")
            _maybe_match_attr(fi)
            appenders[fi.partition].append(fi)
        for fi in fileobjects_changed:
            #Independently flag for content and metadata modifications
            if len(content_diffs.intersection(fi.diffs)) > 0:
                fi.annos.add("modified")
            if len(fi.diffs - content_diffs) > 0:
                fi.annos.add("changed")
            _maybe_match_attr(fi)
            appenders[fi.partition].append(fi)
        for fi in fileobjects_unchanged:
            _maybe_match_attr(fi)
            appenders[fi.partition].append(fi)

        #Output
        return d
Exemplo n.º 20
0
def main():
    dobj = Objects.DFXMLObject(version="1.2.0")
    dobj.program = sys.argv[0]
    dobj.program_version = __version__
    dobj.command_line = " ".join(sys.argv)
    dobj.dc["type"] = "Example"
    dobj.add_creator_library("Python", ".".join(map(str, sys.version_info[0:3]))) #A bit of a bend, but gets the major version information out.
    dobj.add_creator_library("Objects.py", Objects.__version__)
    dobj.add_creator_library("dfxml.py", Objects.dfxml.__version__)

    vobj = Objects.VolumeObject()
    dobj.append(vobj)

    vobj.ftype_str = "examplefs"

    # Define file system position.
    vobj.byte_runs = Objects.ByteRuns()
    vbr = Objects.ByteRun()
    vobj.byte_runs.append(vbr)
    vbr.img_offset = FILE_SYSTEM_START
    vbr.len = DISK_IMAGE_SIZE - FILE_SYSTEM_START

    fobj_specs = [
      (
        "first_sector.bin",
        [
          (0, 512)
        ]
      ),
      (
        "first_four_kilobytes.bin",
        [
          (0, 4000)
        ]
      ),
      (
        "contiguous_before_bad_region.dat",
        [
          (FILE_SYSTEM_START + 4096*1, 4096)
        ]
      ),
      (
        "contiguous_around_bad_region_left_edge.dat",
        [
          (DAMAGE_REGION_START - 4096, 8192)
        ]
      ),
      (
        "contiguous_in_bad_region.dat",
        [
          (DAMAGE_REGION_START + 4096*1, 4096)
        ]
      ),
      (
        "contiguous_around_bad_region_right_edge.dat",
        [
          (GOOD_REGION_START - 4096*1, 8192)
        ]
      ),
      (
        "contiguous_after_bad_region.dat",
        [
          (GOOD_REGION_START + 4096*2, 4096)
        ]
      ),
      (
        "fragmented_all_before_bad_region.dat",
        [
          (FILE_SYSTEM_START + 4096*10, 4096),
          (FILE_SYSTEM_START + 4096*20, 4096),
          (FILE_SYSTEM_START + 4096*30, 4096)
        ]
      ),
      (
        "fragmented_all_after_bad_region.dat",
        [
          (GOOD_REGION_START + 4096*10, 4096),
          (GOOD_REGION_START + 4096*20, 4096),
          (GOOD_REGION_START + 4096*30, 4096)
        ]
      ),
      (
        "fragmented_all_inside_bad_region.dat",
        [
          (DAMAGE_REGION_START + 4096*10, 4096),
          (DAMAGE_REGION_START + 4096*20, 4096),
          (DAMAGE_REGION_START + 4096*30, 4096)
        ]
      ),
      (
        "fragmented_beginning_inside_bad_region.dat",
        [
          (DAMAGE_REGION_START + 4096*40, 4096),
          (GOOD_REGION_START + 4096*40, 4096)
        ]
      ),
      (
        "fragmented_middle_inside_bad_region.dat",
        [
          (FILE_SYSTEM_START + 4096*50, 4096),
          (DAMAGE_REGION_START + 4096*50, 4096),
          (GOOD_REGION_START + 4096*50, 4096)
        ]
      ),
      (
        "fragmented_end_inside_bad_region.dat",
        [
          (FILE_SYSTEM_START + 4096*60, 4096),
          (DAMAGE_REGION_START + 4096*60, 4096)
        ]
      ),
      (
        "after_disk_image_end.dat",
        [
          (DISK_IMAGE_SIZE + 4096*1000, 4096)
        ]
      ),
      (
        "fragmented_partially_recoverable_directory",
        [
          (FILE_SYSTEM_START + 4096*170, 4096),
          (DAMAGE_REGION_START + 4096*170, 4096),
          (GOOD_REGION_START + 4096*170, 4096),
        ]
      ),
      (
        "fragmented_partially_recoverable_directory/child_file_1",
        [
          (FILE_SYSTEM_START + 4096*180, 4096)
        ]
      ),
      (
        "fragmented_partially_recoverable_directory/child_file_2",
        [
          (FILE_SYSTEM_START + 4096*190, 4096)
        ]
      ),
      (
        "fragmented_partially_recoverable_directory/child_file_3",
        [
          (FILE_SYSTEM_START + 4096*200, 4096)
        ]
      ),
      (
        "fragmented_partially_recoverable_directory/child_file_4",
        [
          (FILE_SYSTEM_START + 4096*210, 4096)
        ]
      ),
      (
        "fragmented_partially_recoverable_directory/child_file_9",
        [
          (GOOD_REGION_START + 4096*180, 4096)
        ]
      )
    ]
    for fobj_spec in fobj_specs:
        fobj = Objects.FileObject()
        vobj.append(fobj)

        fobj.filename = fobj_spec[0]
        fobj.alloc = True
        # Naming convention for this sample - the .bin files are virtual files that reference a region outside of the file system.
        if fobj.filename == "fragmented_partially_recoverable_directory":
            fobj.name_type = "d"
        elif fobj.filename.endswith(".bin"):
            fobj.name_type = "v"
        else:
            fobj.name_type = "r"

        fobj.data_brs = Objects.ByteRuns()
        for interval in fobj_spec[1]:
            br = Objects.ByteRun()
            fobj.data_brs.append(br)
            br.img_offset = interval[0]
            br.len = interval[1]
        fobj.filesize = sum([br.len for br in fobj.data_brs])

    dobj.print_dfxml()
def test_all():
    fa1 = Objects.FileObject()
    fa1.alloc = True
    assert fa1.is_allocated() == True

    fa2 = Objects.FileObject()
    fa2.alloc = False
    assert fa2.is_allocated() == False

    fa3 = Objects.FileObject()
    assert fa3.is_allocated() == None

    fin1 = Objects.FileObject()
    fin1.alloc_inode = True
    fin1.alloc_name = True
    assert fin1.is_allocated() == True

    fin2 = Objects.FileObject()
    fin2.alloc_inode = True
    fin2.alloc_name = False
    assert fin2.is_allocated() == False

    fin3 = Objects.FileObject()
    fin3.alloc_inode = True
    fin3.alloc_name = None
    assert fin3.is_allocated() == False

    fin4 = Objects.FileObject()
    fin4.alloc_inode = False
    fin4.alloc_name = True
    assert fin4.is_allocated() == False

    fin5 = Objects.FileObject()
    fin5.alloc_inode = False
    fin5.alloc_name = False
    assert fin5.is_allocated() == False

    fin6 = Objects.FileObject()
    fin6.alloc_inode = False
    fin6.alloc_name = None
    assert fin6.is_allocated() == False

    fin7 = Objects.FileObject()
    fin7.alloc_inode = None
    fin7.alloc_name = True
    assert fin7.is_allocated() == False

    fin8 = Objects.FileObject()
    fin8.alloc_inode = None
    fin8.alloc_name = False
    assert fin8.is_allocated() == False

    fin9 = Objects.FileObject()
    fin9.alloc_inode = None
    fin9.alloc_name = None
    assert fin9.is_allocated() == None