def test_error_after_partition_system(): #TODO Bump version when feature branch merged into schema. dobj = Objects.DFXMLObject(version="1.2.0+") diobj = Objects.DiskImageObject() dobj.append(diobj) diobj.error = ERROR_1 psobj = Objects.PartitionSystemObject() #TODO This should be uncommented after the branch add_partition_system_error is merged. #psobj.error = ERROR_2 diobj.append(psobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: diobj_reconst = dobj_reconst.disk_images[0] #TODO This should be uncommented after the branch add_partition_system_error is merged. #psobj_reconst = diobj_reconst.partitionsystems[0] assert diobj_reconst.error == ERROR_1 #TODO This should be uncommented after the branch add_partition_system_error is merged. #assert psobj_reconst.error == ERROR_2 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_error_element_order(): #TODO When schema 1.3.0 is released, update version. dobj = Objects.DFXMLObject(version="1.2.0+") psobj = Objects.PartitionSystemObject() fobj = Objects.FileObject() psobj.pstype_str = "gpt" # The error element should come after the fileobject stream. psobj.error = "foo" # Add a unallocated file object found floating in the partition system. fobj.alloc_inode = False fobj.alloc_name = False dobj.append(psobj) psobj.append(fobj) el = dobj.to_Element() # Confirm error comes after file stream. assert el[-1][0].tag.endswith("pstype_str") assert el[-1][-2].tag.endswith("fileobject") assert el[-1][-1].tag.endswith("error") # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) psobj_reconst = dobj_reconst.partition_systems[0] try: assert psobj_reconst.pstype_str == "gpt" assert psobj_reconst.error == "foo" except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_volume_error_roundtrip_with_file(): dobj = Objects.DFXMLObject(version="1.2.0") vobj = Objects.VolumeObject() dobj.append(vobj) vobj.error = ERROR_STRING_V assert vobj.error == ERROR_STRING_V fobj = Objects.FileObject() vobj.append(fobj) fobj.error = ERROR_STRING_F assert fobj.error == ERROR_STRING_F assert vobj.error == ERROR_STRING_V # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: vobj_reconst = dobj_reconst.volumes[0] fobj_reconst = vobj_reconst.files[0] assert vobj_reconst.error == ERROR_STRING_V assert fobj_reconst.error == ERROR_STRING_F except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_error_after_file(): #TODO Bump version when feature branch merged into schema. dobj = Objects.DFXMLObject(version="1.2.0+") diobj = Objects.DiskImageObject() dobj.append(diobj) diobj.error = ERROR_1 fobj = Objects.FileObject() fobj.alloc_inode = False fobj.alloc_name = False fobj.error = ERROR_2 diobj.append(fobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: diobj_reconst = dobj_reconst.disk_images[0] fobj_reconst = diobj_reconst.files[0] assert diobj_reconst.error == ERROR_1 assert fobj_reconst.error == ERROR_2 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_partition_in_partition(): #TODO Remove "+" on DFXML Schema 1.3.0 tracking. dobj = Objects.DFXMLObject(version="1.2.0+") psobj = Objects.PartitionSystemObject() psobj.pstype_str = "mbr" dobj.append(psobj) pobj_outer = Objects.PartitionObject() psobj.append(pobj_outer) pobj_inner = Objects.PartitionObject() pobj_outer.append(pobj_inner) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: psobj_reconst = dobj_reconst.partition_systems[0] pobj_outer_reconst = psobj_reconst.partitions[0] pobj_inner_reconst = pobj_outer_reconst.partitions[0] assert isinstance(pobj_inner_reconst, Objects.PartitionObject) except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_disk_image_in_file_system(): dobj = Objects.DFXMLObject(version="1.2.0") vobj = Objects.VolumeObject() vobj.ftype_str = "iso9660" dobj.append(vobj) fobj_vobj = Objects.FileObject() fobj_vobj.sha512 = TEST_HASH_1 vobj.append(fobj_vobj) diobj = Objects.DiskImageObject() vobj.append(diobj) fobj_diobj = Objects.FileObject() fobj_diobj.alloc_inode = False fobj_diobj.alloc_name = False fobj_diobj.sha512 = TEST_HASH_2 diobj.append(fobj_diobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: vobj_reconst = dobj_reconst.volumes[0] diobj_reconst = vobj_reconst.disk_images[0] assert vobj_reconst.files[0].sha512 == TEST_HASH_1 assert diobj_reconst.files[0].sha512 == TEST_HASH_2 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def main(): d = Objects.DFXMLObject("1.2.0") d.program = sys.argv[0] d.program_version = __version__ d.command_line = " ".join(sys.argv) d.dc["type"] = "File system silent-change report" d.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. d.add_creator_library("Objects.py", Objects.__version__) d.add_creator_library("dfxml.py", Objects.dfxml.__version__) current_appender = d tally = 0 for (event, obj) in Objects.iterparse(args.infile): if event == "start": #Inherit namespaces if isinstance(obj, Objects.DFXMLObject): for (prefix, url) in obj.iter_namespaces(): d.add_namespace(prefix, url) #Group files by volume elif isinstance(obj, Objects.VolumeObject): d.append(obj) current_appender = obj elif event == "end": if isinstance(obj, Objects.VolumeObject): current_appender = d elif isinstance(obj, Objects.FileObject): if "_changed" not in obj.diffs: if "_modified" in obj.diffs or "_renamed" in obj.diffs: current_appender.append(obj) tally += 1 print(d.to_dfxml()) _logger.info("Found %d suspiciously-changed files." % tally)
def test_empty_object(): dobj = Objects.DFXMLObject(version="1.2.0") vobj = Objects.VolumeObject() dobj.append(vobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: vobj_reconst = dobj_reconst.volumes[0] except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_empty_object(): dobj = Objects.DFXMLObject(version="1.2.0") psobj = Objects.PartitionSystemObject() dobj.append(psobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: psobj_reconst = dobj_reconst.partition_systems[0] except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_sector_size(): dobj = Objects.DFXMLObject(version="1.2.0") diobj = Objects.DiskImageObject() dobj.append(diobj) diobj.sector_size = 2048 # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: diobj_reconst = dobj_reconst.disk_images[0] assert diobj_reconst.sector_size == 2048 assert diobj.sector_size == diobj_reconst.sector_size except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_solaris_ps_in_partition(): dobj = Objects.DFXMLObject(version="1.2.0") psobj_outer = Objects.PartitionSystemObject() dobj.append(psobj_outer) # Add file to outer partition system. fobj_psobj_outer = Objects.FileObject() fobj_psobj_outer.alloc_inode = False fobj_psobj_outer.alloc_name = False fobj_psobj_outer.sha512 = TEST_HASH_1 psobj_outer.append(fobj_psobj_outer) pobj = Objects.PartitionObject() psobj_outer.append(pobj) # Add file to partition. fobj_pobj = Objects.FileObject() fobj_pobj.alloc_inode = False fobj_pobj.alloc_name = False fobj_pobj.sha512 = TEST_HASH_2 pobj.append(fobj_pobj) psobj_inner = Objects.PartitionSystemObject() pobj.append(psobj_inner) # Add file to inner partition system. fobj_psobj_inner = Objects.FileObject() fobj_psobj_inner.alloc_inode = False fobj_psobj_inner.alloc_name = False fobj_psobj_inner.sha512 = TEST_HASH_3 psobj_inner.append(fobj_psobj_inner) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: psobj_outer_reconst = dobj_reconst.partition_systems[0] pobj_reconst = psobj_outer_reconst.partitions[0] psobj_inner_reconst = pobj_reconst.partition_systems[0] assert psobj_outer_reconst.files[0].sha512 == TEST_HASH_1 assert pobj_reconst.files[0].sha512 == TEST_HASH_2 assert psobj_inner_reconst.files[0].sha512 == TEST_HASH_3 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_hash_properties(): dobj = Objects.DFXMLObject(version="1.2.0") fobj = Objects.FileObject() dobj.append(fobj) fobj.byte_runs = Objects.ByteRuns() br = Objects.ByteRun() fobj.byte_runs.append(br) fobj.filesize = len(TEST_BYTE_STRING) br.len = len(TEST_BYTE_STRING) hash_functions = {"md5", "sha1", "sha224", "sha256", "sha384", "sha512"} # Key: Hash function. # Value: Hash of the byte string b"test". hash_values = dict() for hash_function in sorted(hash_functions): hash_object = getattr(hashlib, hash_function)() hash_object.update(TEST_BYTE_STRING) hash_values[hash_function] = hash_object.hexdigest() _logger.debug("hash_values[%r] = %r." % (hash_function, hash_values[hash_function])) setattr(fobj, hash_function, hash_values[hash_function]) setattr(br, hash_function, hash_values[hash_function]) assert getattr(fobj, hash_function) == hash_values[hash_function] assert getattr(br, hash_function) == hash_values[hash_function] # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: fobj_reconst = dobj_reconst.files[0] br_reconst = fobj_reconst.byte_runs[0] for hash_function in sorted(hash_functions): assert getattr(fobj_reconst, hash_function) == hash_values[hash_function] assert getattr(br_reconst, hash_function) == hash_values[hash_function] except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_cfreds_macwd_properties(): """ These were drawn from a CFReDS sample Mac disk image. """ dobj = Objects.DFXMLObject(version="1.2.0") pobj = Objects.PartitionObject() dobj.append(pobj) pobj.ptype_str = "Apple_Boot" pobj.partition_index = 8 # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: pobj_reconst = dobj_reconst.partitions[0] assert pobj_reconst.ptype_str == "Apple_Boot" assert pobj_reconst.partition_index == "8" except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_volume_error_roundtrip_with_file_and_extns(): dobj = Objects.DFXMLObject(version="1.2.0") vobj = Objects.VolumeObject() dobj.append(vobj) ET.register_namespace("testextra", XMLNS_TEST_EXTRA) vobj.error = ERROR_STRING_V # Dummy up a non-DFXML namespace element. This should be appendable. e = ET.Element("{%s}extra_element" % XMLNS_TEST_EXTRA) e.text = "Extra content" vobj.externals.append(e) # Dummy up a non-DFXML namespace 'error' element. This should be appendable. e = ET.Element("{%s}error" % XMLNS_TEST_EXTRA) e.text = "Extra error" vobj.externals.append(e) assert vobj.error == ERROR_STRING_V fobj = Objects.FileObject() vobj.append(fobj) fobj.error = ERROR_STRING_F assert fobj.error == ERROR_STRING_F assert vobj.error == ERROR_STRING_V # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: vobj_reconst = dobj_reconst.volumes[0] fobj_reconst = vobj_reconst.files[0] assert vobj_reconst.error == ERROR_STRING_V assert fobj_reconst.error == ERROR_STRING_F except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_hfsplus_in_hfs(): dobj = Objects.DFXMLObject(version="1.2.0") vobj_outer = Objects.VolumeObject() vobj_outer.ftype_str = "hfs" dobj.append(vobj_outer) vobj_inner = Objects.VolumeObject() vobj_inner.ftype_str = "hfsplus" vobj_outer.append(vobj_inner) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: vobj_outer_reconst = dobj_reconst.volumes[0] vobj_inner_reconst = vobj_outer_reconst.volumes[0] assert isinstance(vobj_inner_reconst, Objects.VolumeObject) assert vobj_outer_reconst.ftype_str == "hfs" assert vobj_inner_reconst.ftype_str == "hfsplus" except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def test_all(): dobj = Objects.DFXMLObject(version="1.2.0") # Make objects for simple appends. diobj_0 = Objects.DiskImageObject() psobj_0 = Objects.PartitionSystemObject() pobj_0 = Objects.PartitionObject() vobj_0 = Objects.VolumeObject() vobj_0.ftype_str = "hfs" fobj_0 = Objects.FileObject() # Make objects for more exotic appends. psobj_1 = Objects.PartitionSystemObject() vobj_1 = Objects.VolumeObject() vobj_1.ftype_str = "hfsplus" fobj_dobj_1 = Objects.FileObject() fobj_dobj_1.alloc_inode = False fobj_dobj_1.alloc_name = False fobj_psobj_1 = Objects.FileObject() fobj_psobj_1.alloc_inode = False fobj_psobj_1.alloc_name = False fobj_pobj_1 = Objects.FileObject() fobj_pobj_1.alloc_inode = False fobj_pobj_1.alloc_name = False # Do simple appends. dobj.append(diobj_0) diobj_0.append(psobj_0) psobj_0.append(pobj_0) pobj_0.append(vobj_0) vobj_0.append(fobj_0) # Do more exotic appends. pobj_0.append(psobj_1) vobj_0.append(vobj_1) dobj.append(fobj_dobj_1) psobj_0.append(fobj_psobj_1) pobj_0.append(fobj_pobj_1)
def test_bsd_disklabel_properties(): """ These were drawn from a BSD Disk Label sample image. """ dobj = Objects.DFXMLObject(version="1.2.0") pobj_a = Objects.PartitionObject() pobj_c = Objects.PartitionObject() dobj.append(pobj_a) dobj.append(pobj_c) pobj_a.partition_index = "a" pobj_c.partition_index = "c" # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: pobj_a_reconst = dobj_reconst.partitions[0] pobj_c_reconst = dobj_reconst.partitions[1] assert pobj_a_reconst.partition_index == "a" assert pobj_c_reconst.partition_index == "c" except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def main(): d = Objects.DFXMLObject(version="1.2.0") d.program = sys.argv[0] d.program_version = __version__ d.command_line = " ".join(sys.argv) d.dc["type"] = "File system walk concatentation" d.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. d.add_creator_library("Objects.py", Objects.__version__) d.add_creator_library("dfxml.py", Objects.dfxml.__version__) _offsets_and_pxml_paths = [] for (lxfno, lxf) in enumerate(args.labeled_xml_file): lxf_parts = lxf.split(":") if len(lxf_parts) != 2 or not lxf_parts[0].isdigit(): raise ValueError( "Malformed argument in labeled_xml_file. Expecting space-delimited list of '<number>:<path>'. This entry doesn't work: %r." % lxf) offset = int(lxf_parts[0]) path = lxf_parts[1] _offsets_and_pxml_paths.append((offset, path)) offsets_and_pxml_paths = sorted(_offsets_and_pxml_paths) for (pxml_path_index, (offset, pxml_path)) in enumerate(offsets_and_pxml_paths): _logger.debug("Running on path %r." % pxml_path) pdo = Objects.parse(pxml_path) building_volume = None #Fetch or build volume we'll append if len(pdo.volumes) > 1: raise ValueError( "An input DFXML document has multiple volumes; this script assumes each input document only has one. The document here has %d: %r." % (len(pdo.volumes), pxml_path)) elif len(pdo.volumes) == 0: v = Objects.VolumeObject() building_volume = True else: v = pdo.volumes[0] building_volume = False v.partition_offset = offset #Accumulate namespaces for (prefix, url) in pdo.iter_namespaces(): d.add_namespace(prefix, url) for obj in pdo: #Force-update image offsets in byte runs for brs_prop in ["data_brs", "name_brs", "inode_brs"]: if hasattr(obj, brs_prop): brs = getattr(obj, brs_prop) if brs is None: continue for br in brs: if not br.fs_offset is None: br.img_offset = br.fs_offset + offset #For files, set partition identifier and attach to partition if isinstance(obj, Objects.FileObject): obj.partition = pxml_path_index + 1 if building_volume: v.append(obj) #Collect the constructed and/or updated volume d.append(v) d.print_dfxml()
def parse(self, in_fh): """ Returns a DFXMLObject. """ dobj = Objects.DFXMLObject(version="1.2.0+") dobj.program = sys.argv[0] dobj.program_version = __version__ dobj.command_line = " ".join(sys.argv) dobj.dc["type"] = "Disk image sector map" dobj.add_creator_library("Python", ".".join(map(str, sys.version_info[0:3]))) #A bit of a bend, but gets the major version information out. dobj.add_creator_library("Objects.py", Objects.__version__) dobj.add_creator_library("dfxml.py", Objects.dfxml.__version__) diobj = Objects.DiskImageObject() dobj.append(diobj) brs = Objects.ByteRuns() diobj.byte_runs = brs dobj.add_namespace("gddr", Objects.dfxml.XMLNS_DFXML + "#gddrescue") self._state = ParseState.FILE_OPENED self._disk_image_len = 0 for (line_no, line) in enumerate(in_fh): self._line_no = line_no cleaned_line = line.strip() if cleaned_line.startswith("0x"): if self._state in (ParseState.TABLE_HEAD, ParseState.IN_TABLE): self.transition(ParseState.IN_TABLE) else: self.transition(ParseState.CURRENT_POS_RECORD) elif cleaned_line == "# pos size status": self.transition(ParseState.TABLE_HEAD) elif cleaned_line == "# current_pos current_status current_pass": self.transition(ParseState.CURRENT_POS_HEAD) else: self.transition(ParseState.PRE_TABLE) if self._state != ParseState.IN_TABLE: continue br = Objects.ByteRun() line_parts = cleaned_line.split(" ") br.img_offset = int(line_parts[0], base=16) br.len = int(line_parts[1], base=16) self._disk_image_len = br.img_offset + br.len # TODO # Independent design decision, while awaiting a consensus design: # Only report the byte runs ddrescue was able to collect. if line_parts[2] != "+": continue brs.append(br) diobj.filesize = self._disk_image_len _logger.info("diobj.filesize = %r." % diobj.filesize) self.transition(ParseState.STREAM_COMPLETE) return dobj
def main(): dobj = Objects.DFXMLObject(version="1.2.0") dobj.program = sys.argv[0] dobj.program_version = __version__ dobj.command_line = " ".join(sys.argv) dobj.dc["type"] = "Example" dobj.add_creator_library("Python", ".".join(map(str, sys.version_info[0:3]))) #A bit of a bend, but gets the major version information out. dobj.add_creator_library("Objects.py", Objects.__version__) dobj.add_creator_library("dfxml.py", Objects.dfxml.__version__) vobj = Objects.VolumeObject() dobj.append(vobj) vobj.ftype_str = "examplefs" # Define file system position. vobj.byte_runs = Objects.ByteRuns() vbr = Objects.ByteRun() vobj.byte_runs.append(vbr) vbr.img_offset = FILE_SYSTEM_START vbr.len = DISK_IMAGE_SIZE - FILE_SYSTEM_START fobj_specs = [ ( "first_sector.bin", [ (0, 512) ] ), ( "first_four_kilobytes.bin", [ (0, 4000) ] ), ( "contiguous_before_bad_region.dat", [ (FILE_SYSTEM_START + 4096*1, 4096) ] ), ( "contiguous_around_bad_region_left_edge.dat", [ (DAMAGE_REGION_START - 4096, 8192) ] ), ( "contiguous_in_bad_region.dat", [ (DAMAGE_REGION_START + 4096*1, 4096) ] ), ( "contiguous_around_bad_region_right_edge.dat", [ (GOOD_REGION_START - 4096*1, 8192) ] ), ( "contiguous_after_bad_region.dat", [ (GOOD_REGION_START + 4096*2, 4096) ] ), ( "fragmented_all_before_bad_region.dat", [ (FILE_SYSTEM_START + 4096*10, 4096), (FILE_SYSTEM_START + 4096*20, 4096), (FILE_SYSTEM_START + 4096*30, 4096) ] ), ( "fragmented_all_after_bad_region.dat", [ (GOOD_REGION_START + 4096*10, 4096), (GOOD_REGION_START + 4096*20, 4096), (GOOD_REGION_START + 4096*30, 4096) ] ), ( "fragmented_all_inside_bad_region.dat", [ (DAMAGE_REGION_START + 4096*10, 4096), (DAMAGE_REGION_START + 4096*20, 4096), (DAMAGE_REGION_START + 4096*30, 4096) ] ), ( "fragmented_beginning_inside_bad_region.dat", [ (DAMAGE_REGION_START + 4096*40, 4096), (GOOD_REGION_START + 4096*40, 4096) ] ), ( "fragmented_middle_inside_bad_region.dat", [ (FILE_SYSTEM_START + 4096*50, 4096), (DAMAGE_REGION_START + 4096*50, 4096), (GOOD_REGION_START + 4096*50, 4096) ] ), ( "fragmented_end_inside_bad_region.dat", [ (FILE_SYSTEM_START + 4096*60, 4096), (DAMAGE_REGION_START + 4096*60, 4096) ] ), ( "after_disk_image_end.dat", [ (DISK_IMAGE_SIZE + 4096*1000, 4096) ] ), ( "fragmented_partially_recoverable_directory", [ (FILE_SYSTEM_START + 4096*170, 4096), (DAMAGE_REGION_START + 4096*170, 4096), (GOOD_REGION_START + 4096*170, 4096), ] ), ( "fragmented_partially_recoverable_directory/child_file_1", [ (FILE_SYSTEM_START + 4096*180, 4096) ] ), ( "fragmented_partially_recoverable_directory/child_file_2", [ (FILE_SYSTEM_START + 4096*190, 4096) ] ), ( "fragmented_partially_recoverable_directory/child_file_3", [ (FILE_SYSTEM_START + 4096*200, 4096) ] ), ( "fragmented_partially_recoverable_directory/child_file_4", [ (FILE_SYSTEM_START + 4096*210, 4096) ] ), ( "fragmented_partially_recoverable_directory/child_file_9", [ (GOOD_REGION_START + 4096*180, 4096) ] ) ] for fobj_spec in fobj_specs: fobj = Objects.FileObject() vobj.append(fobj) fobj.filename = fobj_spec[0] fobj.alloc = True # Naming convention for this sample - the .bin files are virtual files that reference a region outside of the file system. if fobj.filename == "fragmented_partially_recoverable_directory": fobj.name_type = "d" elif fobj.filename.endswith(".bin"): fobj.name_type = "v" else: fobj.name_type = "r" fobj.data_brs = Objects.ByteRuns() for interval in fobj_spec[1]: br = Objects.ByteRun() fobj.data_brs.append(br) br.img_offset = interval[0] br.len = interval[1] fobj.filesize = sum([br.len for br in fobj.data_brs]) dobj.print_dfxml()
def _test_file_in_non_fs_levels_deep(include_disk_image, include_partition_system, include_partition, include_file_system): """ This test follows a simple, vertical storage layer stack, but adds a file at each layer. """ dobj = Objects.DFXMLObject(version="1.2.0") # Add file to top-level document. fobj_dobj = Objects.FileObject() fobj_dobj.alloc_inode = False fobj_dobj.alloc_name = False fobj_dobj.sha512 = TEST_HASH_1 dobj.append(fobj_dobj) appender_stack = [dobj] if include_disk_image: # Add disk image to top-level document. diobj = Objects.DiskImageObject() appender_stack[-1].append(diobj) appender_stack.append(diobj) # Add file to disk image. fobj_diobj = Objects.FileObject() fobj_diobj.alloc_inode = False fobj_diobj.alloc_name = False fobj_diobj.sha512 = TEST_HASH_2 diobj.append(fobj_diobj) if include_partition_system: # Add partition system to disk image. psobj = Objects.PartitionSystemObject() appender_stack[-1].append(psobj) appender_stack.append(psobj) # Add file to partition system. fobj_psobj = Objects.FileObject() fobj_psobj.alloc_inode = False fobj_psobj.alloc_name = False fobj_psobj.sha512 = TEST_HASH_3 psobj.append(fobj_psobj) if include_partition: # Add partition to partition system, but not disk image. if not (include_disk_image and not include_partition_system): pobj = Objects.PartitionObject() appender_stack[-1].append(pobj) appender_stack.append(pobj) # Add file to partition. fobj_pobj = Objects.FileObject() fobj_pobj.alloc_inode = False fobj_pobj.alloc_name = False fobj_pobj.sha512 = TEST_HASH_4 pobj.append(fobj_pobj) if include_file_system: # Add file system to anything but a partition system. if not (include_partition_system and not include_partition): vobj = Objects.VolumeObject() appender_stack[-1].append(vobj) appender_stack.append(vobj) # Add file to file system. fobj_vobj = Objects.FileObject() fobj_vobj.sha512 = TEST_HASH_5 vobj.append(fobj_vobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: container_stack = [dobj_reconst] assert dobj_reconst.files[0].sha512 == TEST_HASH_1 if include_disk_image: diobj_reconst = container_stack[-1].disk_images[0] container_stack.append(diobj_reconst) assert diobj_reconst.files[0].sha512 == TEST_HASH_2 if include_partition_system: psobj_reconst = container_stack[-1].partition_systems[0] container_stack.append(psobj_reconst) assert psobj_reconst.files[0].sha512 == TEST_HASH_3 if include_partition: if not (include_disk_image and not include_partition_system): pobj_reconst = container_stack[-1].partitions[0] container_stack.append(pobj_reconst) assert pobj_reconst.files[0].sha512 == TEST_HASH_4 if include_file_system: if not (include_partition_system and not include_partition): vobj_reconst = container_stack[-1].volumes[0] assert vobj_reconst.files[0].sha512 == TEST_HASH_5 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def make_differential_dfxml(pre, post, **kwargs): """ Takes as input two paths to DFXML files. Returns a DFXMLObject. @param pre String. @param post String. @param diff_mode Optional. One of "all" or "idifference". @param retain_unchanged Optional. Boolean. @param ignore_properties Optional. Set. @param annotate_matches Optional. Boolean. True -> matched file objects get a "delta:matched='1'" attribute. @param rename_requires_hash Optional. Boolean. True -> all matches require matching SHA-1's, if present. @param ignore_filename_function Optional. Function, string -> Boolean. Returns True if a file name (which can be null) should be ignored. @param glom_byte_runs Optional. Boolean. Joins contiguous-region byte runs together in FileObject byte run lists. """ diff_mode = kwargs.get("diff_mode", "all") retain_unchanged = kwargs.get("retain_unchanged", False) ignore_properties = kwargs.get("ignore_properties", set()) annotate_matches = kwargs.get("annotate_matches", False) rename_requires_hash = kwargs.get("rename_requires_hash", False) ignore_filename_function = kwargs.get("ignore_filename_function", ignorable_name) glom_byte_runs = kwargs.get("glom_byte_runs", False) _expected_diff_modes = ["all", "idifference"] if diff_mode not in _expected_diff_modes: raise ValueError("Differencing mode should be in: %r." % _expected_diff_modes) diff_mask_set = set() if diff_mode == "idifference": diff_mask_set |= set([ "atime", "byte_runs", "crtime", "ctime", "filename", "filesize", "md5", "mtime", "sha1" ]) _logger.debug("diff_mask_set = " + repr(diff_mask_set)) #d: The container DFXMLObject, ultimately returned. d = Objects.DFXMLObject(version="1.2.0") if sys.argv[0] == os.path.basename(__file__): d.program = sys.argv[0] d.program_version = __version__ d.command_line = " ".join(sys.argv) d.add_namespace("delta", dfxml.XMLNS_DELTA) d.dc["type"] = "Disk image difference set" d.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. d.add_creator_library("Objects.py", Objects.__version__) d.add_creator_library("dfxml.py", Objects.dfxml.__version__) d.diff_file_ignores |= ignore_properties _logger.debug("d.diff_file_ignores = " + repr(d.diff_file_ignores)) #The list most of this function is spent on building fileobjects_changed = [] #Unmodified files; only retained if requested. fileobjects_unchanged = [] #Key: (partition, inode, filename); value: FileObject old_fis = None new_fis = None #Key: (partition, inode, filename); value: FileObject list old_fis_unalloc = None new_fis_unalloc = None #Key: Partition byte offset within the disk image, paired with the file system type #Value: VolumeObject old_volumes = None new_volumes = None matched_volumes = dict() #Populated in distinct (offset, file system type as string) encounter order volumes_encounter_order = dict() for infile in [pre, post]: _logger.debug("infile = %r" % infile) old_fis = new_fis new_fis = dict() old_volumes = new_volumes new_volumes = dict() #Fold in the matched volumes - we're just discarding the deleted volumes for k in matched_volumes: old_volumes[k] = matched_volumes[k] matched_volumes = dict() old_fis_unalloc = new_fis_unalloc new_fis_unalloc = collections.defaultdict(list) d.sources.append(infile) for (i, (event, new_obj)) in enumerate(Objects.iterparse(infile)): if isinstance(new_obj, Objects.DFXMLObject): #Inherit desired properties from the source DFXMLObject. #Inherit namespaces for (prefix, url) in new_obj.iter_namespaces(): d.add_namespace(prefix, url) continue elif isinstance(new_obj, Objects.VolumeObject): if event == "end": #This algorithm doesn't yet need to know when a volume is concluded. On to the next object. continue offset = new_obj.partition_offset if offset is None: raise AttributeError( "To perform differencing with volumes, the <volume> elements must have a <partition_offset>. Either re-generate your DFXML with partition offsets, or run this program again with the --ignore-volumes flag." ) #Use the lower-case volume spelling ftype_str = _lower_ftype_str(new_obj) #Re-capping the general differential analysis algorithm: #0. If the volume is in the new list, something's gone wrong. if (offset, ftype_str) in new_volumes: _logger.debug("new_obj.partition_offset = %r." % offset) _logger.warning( "Encountered a volume that starts at an offset as another volume, in the same disk image. This analysis is based on the assumption that that doesn't happen. Check results that depend on partition mappings." ) #1. If the volume is in the old list, pop it out of the old list - it's matched. if old_volumes and (offset, ftype_str) in old_volumes: _logger.debug( "Found a volume in post image, at offset %r." % offset) old_obj = old_volumes.pop((offset, ftype_str)) new_obj.original_volume = old_obj new_obj.compare_to_original() matched_volumes[(offset, ftype_str)] = new_obj #2. If the volume is NOT in the old list, add it to the new list. else: _logger.debug("Found a new volume, at offset %r." % offset) new_volumes[(offset, ftype_str)] = new_obj volumes_encounter_order[( offset, ftype_str)] = len(new_volumes) + ( (old_volumes and len(old_volumes)) or 0) + len(matched_volumes) #3. Afterwards, the old list contains deleted volumes. #Record the ID new_obj.id = volumes_encounter_order[(offset, ftype_str)] #Move on to the next object continue elif not isinstance(new_obj, Objects.FileObject): #The rest of this loop compares only file objects. continue if ignore_filename_function(new_obj.filename): continue #Simplify byte runs if requested if glom_byte_runs: if new_obj.byte_runs: temp_byte_runs = Objects.ByteRuns() for run in new_obj.byte_runs: temp_byte_runs.glom(run) new_obj.byte_runs = temp_byte_runs #Normalize the partition number if new_obj.volume_object is None: new_obj.partition = None else: vo = new_obj.volume_object fts = _lower_ftype_str(vo) new_obj.partition = volumes_encounter_order[( vo.partition_offset, fts)] #Define the identity key of this file -- affected by the --ignore argument _key_partition = None if "partition" in ignore_properties else new_obj.partition _key_inode = None if "inode" in ignore_properties else new_obj.inode _key_filename = None if "filename" in ignore_properties else new_obj.filename key = (_key_partition, _key_inode, _key_filename) #Ignore unallocated content comparisons until a later loop. The unique identification of deleted files needs a little more to work. if not new_obj.alloc: new_fis_unalloc[key].append(new_obj) continue #The rest of this loop is irrelevant until the second DFXML file. if old_fis is None: new_fis[key] = new_obj continue if key in old_fis: #Extract the old fileobject and check for changes old_obj = old_fis.pop(key) new_obj.original_fileobject = old_obj new_obj.compare_to_original(file_ignores=d.diff_file_ignores) #_logger.debug("Diffs: %r." % _diffs) _diffs = new_obj.diffs - d.diff_file_ignores #_logger.debug("Diffs after ignore-set: %r." % _diffs) if diff_mask_set: _diffs &= diff_mask_set #_logger.debug("Diffs after mask-set: %r." % _diffs) if len(_diffs) > 0: #_logger.debug("Remaining diffs: " + repr(_diffs)) fileobjects_changed.append(new_obj) else: #Unmodified file; only keep if requested. if retain_unchanged: fileobjects_unchanged.append(new_obj) else: #Store the new object new_fis[key] = new_obj #The rest of the files loop is irrelevant until the second file. if old_fis is None: continue _logger.debug("len(old_fis) = %d" % len(old_fis)) _logger.debug("len(old_fis_unalloc) = %d" % len(old_fis_unalloc)) _logger.debug("len(new_fis) = %d" % len(new_fis)) _logger.debug("len(new_fis_unalloc) = %d" % len(new_fis_unalloc)) _logger.debug("len(fileobjects_changed) = %d" % len(fileobjects_changed)) #Identify renames - only possible if 1-to-1. Many-to-many renames are just left as new and deleted files. _logger.debug("Detecting renames...") fileobjects_renamed = [] def _make_name_map(d): """Returns a dictionary, mapping (partition, inode) -> {filename}.""" retdict = collections.defaultdict(lambda: set()) for (partition, inode, filename) in d.keys(): retdict[(partition, inode)].add(filename) return retdict old_inode_names = _make_name_map(old_fis) new_inode_names = _make_name_map(new_fis) for key in new_inode_names.keys(): (partition, inode) = key if len(new_inode_names[key]) != 1: continue if not key in old_inode_names: continue if len(old_inode_names[key]) != 1: continue if rename_requires_hash: #Peek at the set elements by doing a quite-ephemeral list cast old_obj = old_fis[(partition, inode, list(old_inode_names[key])[0])] new_obj = new_fis[(partition, inode, list(new_inode_names[key])[0])] if old_obj.sha1 != new_obj.sha1: continue #Found a match if we're at this point in the loop old_name = old_inode_names[key].pop() new_name = new_inode_names[key].pop() old_obj = old_fis.pop((partition, inode, old_name)) new_obj = new_fis.pop((partition, inode, new_name)) new_obj.original_fileobject = old_obj new_obj.compare_to_original(file_ignores=d.diff_file_ignores) fileobjects_renamed.append(new_obj) _logger.debug("len(old_fis) -> %d" % len(old_fis)) _logger.debug("len(new_fis) -> %d" % len(new_fis)) _logger.debug("len(fileobjects_changed) -> %d" % len(fileobjects_changed)) _logger.debug("len(fileobjects_renamed) = %d" % len(fileobjects_renamed)) #Identify files that just changed inode number - basically, doing the rename detection again _logger.debug("Detecting inode number changes...") def _make_inode_map(d): """Returns a dictionary, mapping (partition, filename) -> inode.""" retdict = dict() for (partition, inode, filename) in d.keys(): if (partition, filename) in retdict: _logger.warning( "Multiple instances of the file path %r were found in partition %r; this violates an assumption of this program, that paths are unique within partitions." % (filename, partition)) retdict[(partition, filename)] = inode return retdict old_name_inodes = _make_inode_map(old_fis) new_name_inodes = _make_inode_map(new_fis) for key in new_name_inodes.keys(): if not key in old_name_inodes: continue (partition, name) = key old_obj = old_fis.pop((partition, old_name_inodes[key], name)) new_obj = new_fis.pop((partition, new_name_inodes[key], name)) new_obj.original_fileobject = old_obj #TODO Test for what chaos ensues when filename is in the ignore list. new_obj.compare_to_original(file_ignores=d.diff_file_ignores) fileobjects_changed.append(new_obj) _logger.debug("len(old_fis) -> %d" % len(old_fis)) _logger.debug("len(new_fis) -> %d" % len(new_fis)) _logger.debug("len(fileobjects_changed) -> %d" % len(fileobjects_changed)) #And that's the end of the allocated-only, per-volume analysis. #We may be able to match files that aren't allocated against files we think are deleted _logger.debug("Detecting modifications from unallocated files...") fileobjects_deleted = [] for key in new_fis_unalloc: #1 partition; 1 inode number; 1 name, repeated: Too ambiguous to compare. if len(new_fis_unalloc[key]) != 1: continue if key in old_fis_unalloc: if len(old_fis_unalloc[key]) == 1: #The file was unallocated in the previous image, too. old_obj = old_fis_unalloc[key].pop() new_obj = new_fis_unalloc[key].pop() new_obj.original_fileobject = old_obj new_obj.compare_to_original( file_ignores=d.diff_file_ignores) #The file might not have changed. It's interesting if it did, though. _diffs = new_obj.diffs - diff_mask_set #_logger.debug("Diffs: %r." % _diffs) if diff_mask_set: _diffs &= diff_mask_set #_logger.debug("Diffs after mask-set: %r." % _diffs) if len(_diffs) > 0: _logger.debug("Remaining diffs: " + repr(_diffs)) fileobjects_changed.append(new_obj) elif retain_unchanged: fileobjects_unchanged.append(new_obj) elif key in old_fis: #Identified a deletion. old_obj = old_fis.pop(key) new_obj = new_fis_unalloc[key].pop() new_obj.original_fileobject = old_obj new_obj.compare_to_original(file_ignores=d.diff_file_ignores) fileobjects_deleted.append(new_obj) _logger.debug("len(old_fis) -> %d" % len(old_fis)) _logger.debug("len(old_fis_unalloc) -> %d" % len(old_fis_unalloc)) _logger.debug("len(new_fis) -> %d" % len(new_fis)) _logger.debug("len(new_fis_unalloc) -> %d" % len(new_fis_unalloc)) _logger.debug("len(fileobjects_changed) -> %d" % len(fileobjects_changed)) _logger.debug("len(fileobjects_deleted) -> %d" % len(fileobjects_deleted)) #After deletion matching is performed, one might want to look for files migrating to other partitions. #However, since between-volume migration creates a new deleted file, this algorithm instead ignores partition migrations. #AJN TODO Thinking about it a little more, I can't suss out a reason against trying this match. It's complicated if we try looking for reallocations in new_fis, strictly from new_fis_unalloc. #TODO We might also want to match the unallocated objects based on metadata addresses. Unfortunately, that requires implementation of additional byte runs, which hasn't been fully designed yet in the DFXML schema. #Begin output. #First, annotate the volume objects. for key in new_volumes: v = new_volumes[key] v.annos.add("new") for key in old_volumes: v = old_volumes[key] v.annos.add("deleted") for key in matched_volumes: v = matched_volumes[key] if len(v.diffs) > 0: v.annos.add("modified") #Build list of FileObject appenders, child volumes of the DFXML Document. #Key: Partition number, or None #Value: Reference to the VolumeObject corresponding with that partition number. None -> the DFXMLObject. appenders = dict() for volume_dict in [new_volumes, matched_volumes, old_volumes]: for (offset, ftype_str) in volume_dict: veo = volumes_encounter_order[(offset, ftype_str)] if veo in appenders: raise ValueError( "This pair is already in the appenders dictionary, which was supposed to be distinct: " + repr((offset, ftype_str)) + ", encounter order " + str(veo) + ".") v = volume_dict[(offset, ftype_str)] appenders[veo] = v d.append(v) #Add in the default appender, the DFXML Document itself. appenders[None] = d #A file should only be considered "modified" if its contents have changed. content_diffs = set(["md5", "sha1", "sha256"]) def _maybe_match_attr(obj): """Just adds the 'matched' annotation when called.""" if annotate_matches: obj.annos.add("matched") #Populate DFXMLObject. for key in new_fis: #TODO If this script ever does a series of >2 DFXML files, these diff additions need to be removed for the next round. fi = new_fis[key] fi.annos.add("new") appenders[fi.partition].append(fi) for key in new_fis_unalloc: for fi in new_fis_unalloc[key]: fi.annos.add("new") appenders[fi.partition].append(fi) for fi in fileobjects_deleted: #Independently flag for name, content, and metadata modifications if len(fi.diffs - content_diffs) > 0: fi.annos.add("changed") if len(content_diffs.intersection(fi.diffs)) > 0: fi.annos.add("modified") if "filename" in fi.diffs: fi.annos.add("renamed") fi.annos.add("deleted") _maybe_match_attr(fi) appenders[fi.partition].append(fi) for key in old_fis: ofi = old_fis[key] nfi = Objects.FileObject() nfi.original_fileobject = ofi nfi.annos.add("deleted") appenders[ofi.partition].append(nfi) for key in old_fis_unalloc: for ofi in old_fis_unalloc[key]: nfi = Objects.FileObject() nfi.original_fileobject = ofi nfi.annos.add("deleted") appenders[ofi.partition].append(nfi) for fi in fileobjects_renamed: #Independently flag for content and metadata modifications if len(content_diffs.intersection(fi.diffs)) > 0: fi.annos.add("modified") if len(fi.diffs - content_diffs) > 0: fi.annos.add("changed") fi.annos.add("renamed") _maybe_match_attr(fi) appenders[fi.partition].append(fi) for fi in fileobjects_changed: #Independently flag for content and metadata modifications if len(content_diffs.intersection(fi.diffs)) > 0: fi.annos.add("modified") if len(fi.diffs - content_diffs) > 0: fi.annos.add("changed") _maybe_match_attr(fi) appenders[fi.partition].append(fi) for fi in fileobjects_unchanged: _maybe_match_attr(fi) appenders[fi.partition].append(fi) #Output return d
def main(): dobj = Objects.DFXMLObject() dobj.diff_file_ignores.add("atime") dobj.diff_file_ignores.add("crtime") with open(args.out_dfxml, "w") as fh: dobj.print_dfxml(fh)
def main(): global walk_default_hashes #Determine whether we're going in threading mode or not. (Some modules are not available by default.) using_threading = False if args.jobs > 1: using_threading = True #(unless supporting modules are absent) try: import threading except: using_threading = False _logger.warning( "Threading support not available. Running in single thread only." ) try: import queue except: using_threading = False _logger.warning( "Python queue support not available. (If running Ubuntu, this is in package python3-queuelib.) Running in single thread only." ) dobj = Objects.DFXMLObject(version="1.2.0") dobj.program = sys.argv[0] dobj.program_version = __version__ dobj.command_line = " ".join(sys.argv) dobj.dc["type"] = "File system walk" dobj.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. dobj.add_creator_library("Objects.py", Objects.__version__) dobj.add_creator_library("dfxml.py", Objects.dfxml.__version__) # Key: property. # Value: set of name_types that should have the property ignored. "*" indicates all. No sets should be empty by the end of this setup. ignore_properties = collections.defaultdict(set) if args.ignore: for property_descriptor in args.ignore: property_descriptor_parts = property_descriptor.split("@") property_name = property_descriptor_parts[0] if len(property_descriptor_parts) == 1: ignore_properties[property_name].add("*") else: ignore_properties[property_name].add( property_descriptor_parts[-1]) if args.ignore_hashes: for property_name in walk_default_hashes: ignore_properties[property_name].add("*") #_logger.debug("ignore_properties = %r." % ignore_properties) filepaths = set() filepaths.add(".") for (dirpath, dirnames, filenames) in os.walk("."): dirent_names = set() for dirname in dirnames: dirent_names.add(dirname) for filename in filenames: dirent_names.add(filename) for dirent_name in sorted(dirent_names): #The relpath wrapper removes "./" from the head of the path. filepath = os.path.relpath(os.path.join(dirpath, dirent_name)) filepaths.add(filepath) fileobjects_by_filepath = dict() if using_threading: #Threading syntax c/o: https://docs.python.org/3.5/library/queue.html q = queue.Queue() threads = [] def _worker(): while True: filepath = q.get() if filepath is None: break try: fobj = filepath_to_fileobject( filepath, ignore_properties=ignore_properties) except FileNotFoundError as e: fobj = Objects.FileObject() fobj.filename = filepath fobj.error = "".join(traceback.format_stack()) if e.args: fobj.error += "\n" + str(e.args) fileobjects_by_filepath[filepath] = fobj q.task_done() for i in range(args.jobs): t = threading.Thread(target=_worker) t.start() threads.append(t) for filepath in filepaths: q.put(filepath) # block until all tasks are done q.join() # stop workers for i in range(args.jobs): q.put(None) for t in threads: t.join() else: #Not threading. for filepath in sorted(filepaths): fobj = filepath_to_fileobject(filepath, ignore_properties=ignore_properties) fileobjects_by_filepath[filepath] = fobj #Build output DFXML tree. for filepath in sorted(fileobjects_by_filepath.keys()): dobj.append(fileobjects_by_filepath[filepath]) dobj.print_dfxml()
def _test_file_in_non_fs_levels_flat(include_disk_image, include_partition_system, include_partition, include_file_system): """ This test follows a simple, horizontal storage layer stack (every container attached to top document object), and adds a file for each container. """ dobj = Objects.DFXMLObject(version="1.2.0") # Add file to top-level document. fobj_dobj = Objects.FileObject() fobj_dobj.alloc_inode = False fobj_dobj.alloc_name = False fobj_dobj.sha512 = TEST_HASH_1 dobj.append(fobj_dobj) if include_disk_image: # Add disk image. diobj = Objects.DiskImageObject() dobj.append(diobj) # Add file to disk image. fobj_diobj = Objects.FileObject() fobj_diobj.alloc_inode = False fobj_diobj.alloc_name = False fobj_diobj.sha512 = TEST_HASH_2 diobj.append(fobj_diobj) if include_partition_system: # Add partition system. psobj = Objects.PartitionSystemObject() dobj.append(psobj) # Add file to partition system. fobj_psobj = Objects.FileObject() fobj_psobj.alloc_inode = False fobj_psobj.alloc_name = False fobj_psobj.sha512 = TEST_HASH_3 psobj.append(fobj_psobj) if include_partition: # Add partition. pobj = Objects.PartitionObject() dobj.append(pobj) # Add file to partition. fobj_pobj = Objects.FileObject() fobj_pobj.alloc_inode = False fobj_pobj.alloc_name = False fobj_pobj.sha512 = TEST_HASH_4 pobj.append(fobj_pobj) if include_file_system: # Add file system. vobj = Objects.VolumeObject() dobj.append(vobj) # Add file to file system. fobj_vobj = Objects.FileObject() fobj_vobj.sha512 = TEST_HASH_5 vobj.append(fobj_vobj) # Do file I/O round trip. (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) try: assert dobj_reconst.files[0].sha512 == TEST_HASH_1 if include_disk_image: diobj_reconst = dobj_reconst.disk_images[0] assert diobj_reconst.files[0].sha512 == TEST_HASH_2 if include_partition_system: psobj_reconst = dobj_reconst.partition_systems[0] assert psobj_reconst.files[0].sha512 == TEST_HASH_3 if include_partition: pobj_reconst = dobj_reconst.partitions[0] assert pobj_reconst.files[0].sha512 == TEST_HASH_4 if include_file_system: vobj_reconst = dobj_reconst.volumes[0] assert vobj_reconst.files[0].sha512 == TEST_HASH_5 except: _logger.debug("tmp_filename = %r." % tmp_filename) raise os.remove(tmp_filename)
def main(): # Initialize output object. # TODO Upgrade to 1.3.0 on schema release. dobj = Objects.DFXMLObject(version="1.2.0+") dobj.program = sys.argv[0] dobj.program_version = __version__ dobj.command_line = " ".join(sys.argv) dobj.dc["type"] = "Recoverability report" dobj.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. dobj.add_creator_library("objects.py", Objects.__version__) dobj.add_creator_library("dfxml.py", Objects.dfxml.__version__) dobj.add_creator_library("intervals.py", intact_byte_run_index.I.__version__) dobj.add_creator_library("intact_byte_run_index.py", intact_byte_run_index.__version__) if args.disk_image_dfxml: disk_image_dfxml = args.disk_image_dfxml else: disk_image_dfxml = args.files_dfxml br_index = intact_byte_run_index.IntactByteRunIndex() diobj = None # Index the byte runs of the disk image. for (event, obj) in Objects.iterparse(disk_image_dfxml): if not isinstance(obj, Objects.DiskImageObject): continue if event != "start": continue if obj.byte_runs is None or len(obj.byte_runs) == 0: raise ValueError( "DFXML document %r does not have diskimageobject with byte runs. Recoverability cannot be determined." % disk_image_dfxml) br_index.ingest_byte_runs(obj.byte_runs) diobj = obj break # Confirm initialization. if br_index.intervals is None: raise ValueError( "Disk image byte runs index not constructed after reading file that should have had disk image metadata: %r." % disk_image_dfxml) # Track diskimageobject. dobj.append(diobj) # The loop below will want to attach fileobjects to the closest/lowest parent in the object hierarchy. Might be the disk image, might be the containing file system. appender_stack = [diobj] file_count_encountered = 0 file_count_missing_byte_runs = 0 file_count_missing_byte_run_offset = 0 file_count_missing_byte_run_length = 0 file_count_containment_unknown = 0 file_count_intact = 0 file_count_not_fully_recoverable = 0 # Filter fileobject list, picking up file systems along the way. for (event, obj) in Objects.iterparse(args.files_dfxml): if isinstance(obj, Objects.VolumeObject): if event == "start": appender_stack[-1].append(obj) appender_stack.append(obj) continue elif event == "end": appender_stack.pop() continue if not isinstance(obj, Objects.FileObject): continue file_count_encountered += 1 if obj.byte_runs is None or len(obj.byte_runs) == 0: file_count_missing_byte_runs += 1 continue # This variable might be set to None within the loop through the content byte runs. byte_runs_contained = True for byte_run in obj.data_brs: if byte_run.img_offset is None: #TODO See if this can be computed from fs_offset. file_count_missing_byte_run_offset += 1 byte_runs_contained = None break if byte_run.len is None: file_count_missing_byte_run_length += 1 byte_runs_contained = None break byte_run_contained = br_index.is_byte_run_contained(byte_run) if byte_run_contained is None: file_count_containment_unknown += 1 byte_runs_contained = None break elif byte_run_contained == False: byte_runs_contained = False break if byte_runs_contained == True: file_count_intact += 1 if byte_runs_contained == False: file_count_not_fully_recoverable += 1 # Record fileobject as child of diskimageobject. appender_stack[-1].append(obj) _logger.debug("file_count_encountered = %d." % file_count_encountered) _logger.debug("file_count_missing_byte_runs = %d." % file_count_missing_byte_runs) _logger.debug("file_count_missing_byte_run_offset = %d." % file_count_missing_byte_run_offset) _logger.debug("file_count_missing_byte_run_length = %d." % file_count_missing_byte_run_length) _logger.debug("file_count_containment_unknown = %d." % file_count_containment_unknown) _logger.debug("file_count_intact = %d." % file_count_intact) _logger.debug("file_count_not_fully_recoverable = %d." % file_count_not_fully_recoverable) with open(args.out_dfxml, "w") as out_fh: dobj.print_dfxml(out_fh)
sys.path.append(os.path.join(os.path.dirname(__file__), "../..")) import dfxml import dfxml.objects as Objects if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) _logger = logging.getLogger(os.path.basename(__file__)) lobj = Objects.LibraryObject() _logger.debug("lobj = %r" % lobj) _logger.debug("lobj.to_Element() = %r" % lobj.to_Element()) dobj = Objects.DFXMLObject() dobj.add_creator_library(lobj) dobj.add_creator_library("libfoo", "1.2.3") dobj.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3] ))) #A bit of a bend, but gets the major version information out. try: dobj.add_creator_library("libbar", None) except ValueError: _logger.info( "Caught expected value error from passing in incorrect types.") pass dobj.add_build_library("libbaz", "4.5") with open(sys.argv[1], "w") as fh: dobj.print_dfxml(fh)
def extract_files(image_path, outdir, dfxml_path=None, file_predicate=is_file, file_name=name_with_part_path, dry_run=None, out_manifest_path=None, err_manifest_path=None, keep_going=False): """ @param file_name Unary function. Takes a Objects.FileObject; returns the file path to which this file will be extracted, relative to outdir. So, if outdir="extraction" and the name_with_part_path function of this module is used, the file "/Users/Administrator/ntuser.dat" in partition 1 will be extracted to "extraction/partition_1/Users/Administrator/ntuser.dat". """ extraction_byte_tally = 0 _path_for_iterparse = dfxml_path or image_path #Set up base manifest to track extracted files base_manifest = Objects.DFXMLObject(version="1.2.0") base_manifest.program = sys.argv[0] if sys.argv[0] == os.path.basename(__file__): base_manifest.program_version = __version__ #Otherwise, this DFXMLObject would need to be passed back to the calling function. base_manifest.command_line = " ".join(sys.argv) base_manifest.add_namespace("extractor", XMLNS_EXTRACTOR) base_manifest.add_namespace("delta", dfxml.XMLNS_DELTA) base_manifest.sources.append(image_path) if dfxml_path: base_manifest.sources.append(dfxml_path) base_manifest.add_creator_library("Python", ".".join( map(str, sys.version_info[0:3]) )) #A bit of a bend, but gets the major version information out. base_manifest.add_creator_library("Objects.py", Objects.__version__) base_manifest.add_creator_library("dfxml.py", Objects.dfxml.__version__) #Clone base manifest to all-files' manifest and errors-only manifest out_manifest = None if out_manifest_path: out_manifest = copy.deepcopy(base_manifest) err_manifest = None if err_manifest_path: err_manifest = copy.deepcopy(base_manifest) for (event, obj) in Objects.iterparse(_path_for_iterparse): #Absolute prerequisites: if not isinstance(obj, Objects.FileObject): continue #Invoker prerequisites if not file_predicate(obj): continue extraction_entry = Objects.FileObject() extraction_entry.original_fileobject = obj #Construct path where the file will be extracted extraction_write_path = os.path.join(outdir, file_name(obj)) #Extract idempotently if os.path.exists(extraction_write_path): _logger.debug( "Skipping already-extracted file: %r. Extraction path already exists: %r." % (obj.filename, extraction_write_path)) continue extraction_entry.filename = extraction_write_path #Set up checksum verifier checker = None checked_byte_tally = 0 if obj.sha1: checker = hashlib.sha1() extraction_byte_tally += obj.filesize any_error = None tsk_error = None if not dry_run: extraction_write_dir = os.path.dirname(extraction_write_path) if not os.path.exists(extraction_write_dir): os.makedirs(extraction_write_dir) _logger.debug("Extracting to: %r." % extraction_write_path) with open(extraction_write_path, "wb") as extraction_write_fh: try: for chunk in obj.extract_facet("content", image_path): if checker: checker.update(chunk) checked_byte_tally += len(chunk) extraction_write_fh.write(chunk) if checked_byte_tally != obj.filesize: any_error = True extraction_entry.filesize = checked_byte_tally extraction_entry.diffs.add("filesize") _logger.error("File size mismatch on %r." % obj.filename) _logger.info("Recorded filesize = %r" % obj.filesize) _logger.info("Extracted bytes = %r" % checked_byte_tally) if checker and (obj.sha1 != checker.hexdigest()): any_error = True extraction_entry.sha1 = checker.hexdigest() extraction_entry.diffs.add("sha1") _logger.error("Hash mismatch on %r." % obj.filename) _logger.info("Recorded SHA-1 = %r" % obj.sha1) _logger.info("Computed SHA-1 = %r" % checker.hexdigest()) #_logger.debug("File object: %r." % obj) except Exception as e: any_error = True tsk_error = True extraction_entry.error = "".join(traceback.format_stack()) if e.args: extraction_entry.error += "\n" + str(e.args) if out_manifest: out_manifest.append(extraction_entry) if err_manifest and any_error: err_manifest.append(extraction_entry) if tsk_error and not keep_going: _logger.warning( "Terminating extraction loop early, due to encountered error.") break #Report _logger.info("Estimated extraction: %d bytes." % extraction_byte_tally) if not out_manifest is None: with open(out_manifest_path, "w") as out_manifest_fh: out_manifest.print_dfxml(out_manifest_fh) if not err_manifest is None: tally = 0 for obj in err_manifest: if isinstance(obj, Objects.FileObject): tally += 1 _logger.info("Encountered errors extracting %d files." % tally) with open(err_manifest_path, "w") as err_manifest_fh: err_manifest.print_dfxml(err_manifest_fh)