def test_create_fail_altered_file(fs, simple_mhl_history): # alter a file with open("/root/Stuff.txt", "a") as file: file.write("!!") result = CliRunner().invoke(mhl.commands.create, ["/root"]) assert result.exit_code == 12 assert "Stuff.txt" in result.output # since the file is still altered every other seal will fail as well since we compare to the original hash result = CliRunner().invoke(mhl.commands.create, ["/root"]) assert result.exit_code == 12 assert "Stuff.txt" in result.output # when we now choose a new hash format we still fail but will add the new hash in the new format result = CliRunner().invoke(mhl.commands.create, ["/root", "-h", "md5"]) assert result.exit_code == 12 assert "Stuff.txt" in result.output root_history = MHLHistory.load_from_path("/root") stuff_txt_latest_media_hash = root_history.hash_lists[-1].find_media_hash_for_path("Stuff.txt") # the media hash for the Stuff.txt in the latest generation contains the failed xxh64 hash of the altered file assert stuff_txt_latest_media_hash.hash_entries[0].hash_format == "xxh64" assert stuff_txt_latest_media_hash.hash_entries[0].hash_string == "2346e97eb08788cc" assert stuff_txt_latest_media_hash.hash_entries[0].action == "failed" # and it contains NO new md5 hash value of the altered file assert len(stuff_txt_latest_media_hash.hash_entries) == 1 # since we didn't add a new md5 hash for the failing file before sealing will still fail for the altered file result = CliRunner().invoke(mhl.commands.create, ["/root", "-h", "md5"]) assert result.exit_code == 12 assert "Stuff.txt" in result.output
def test_record_fail_altered_file(fs, simple_mhl_history): # alter a file with open("/root/Stuff.txt", "a") as file: file.write("!!") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-sf", "/root/Stuff.txt"]) assert result.exit_code == 12 assert "Stuff.txt" in result.output # when passing a different file to record no error ws thrown since the altered file is ignored runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-sf", "/root/A/A1.txt"]) assert result.exit_code == 0 # make sure we have created one failing and one succeeded generation history = MHLHistory.load_from_path("/root") assert history.hash_lists[1].media_hashes[0].path == "Stuff.txt" assert history.hash_lists[1].media_hashes[0].hash_entries[ 0].action == "failed" assert history.hash_lists[2].media_hashes[0].path == "A/A1.txt" assert history.hash_lists[2].media_hashes[0].hash_entries[ 0].action == "verified"
def test_child_history_partial_verification_ba_1_file(fs, nested_mhl_histories): """ """ # create an additional file the record command will not add since we only pass it B1 as single file fs.create_file("/root/B/B2.txt", contents="B2\n") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-sf", "/root/B/B1.txt"], catch_exceptions=False) assert result.exit_code == 0 # two new generations have been written assert os.path.isfile("/root/ascmhl/0002_root_2020-01-16_091500.mhl") assert os.path.isfile("/root/B/ascmhl/0002_B_2020-01-16_091500.mhl") root_history = MHLHistory.load_from_path("/root") assert len(root_history.hash_lists) == 2 aa_history = root_history.child_histories[0] b_history = root_history.child_histories[1] bb_history = root_history.child_histories[1].child_histories[0] # the root hash list only contains a mhl reference to the hash list of the B history, no media hashes assert len(root_history.hash_lists[1].media_hashes) == 0 assert root_history.hash_lists[1].referenced_hash_lists[0] == b_history.hash_lists[1] # the B hash list contains the media hash of the verified file assert b_history.hash_lists[1].media_hashes[0].path == "B1.txt" assert b_history.hash_lists[1].media_hashes[0].hash_entries[0].action == "verified" # the created B2 file is not referenced in the B history, only B1 assert len(b_history.hash_lists[1].media_hashes) == 1 # the other histories don't have a new generation assert not os.path.isfile("/root/A/AA/ascmhl/0002_AA_2020-01-16_091500.mhl") assert not os.path.isfile("/root/B/BB/ascmhl/0002_BB_2020-01-16_091500.mhl") assert aa_history.latest_generation_number() == 1 assert bb_history.latest_generation_number() == 1
def test_create_fail_missing_file(fs, nested_mhl_histories): """ test that sealing fails if there is a file missing on the file system that is referenced by one of the histories """ root_history = MHLHistory.load_from_path("/root") paths = root_history.set_of_file_paths() assert paths == {"/root/B/B1.txt", "/root/B/BB/BB1.txt", "/root/Stuff.txt", "/root/A/AA/AA1.txt"} os.remove("/root/A/AA/AA1.txt") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root"]) assert result.exit_code == 15 assert "1 missing file(s):\n A/AA/AA1.txt" in result.output # the actual seal has been written to disk anyways we expect the history to contain # the new not yet referenced files (/root/B/BA/BA1.txt and /root/A/AB/AB1.txt) as well now root_history = MHLHistory.load_from_path("/root") paths = root_history.set_of_file_paths() # since we scan all generations for file paths we now get old files, missing files and new files here # as well as all entries for the directories assert paths == { "/root/B/B1.txt", "/root/B/BA/BA1.txt", "/root/B", "/root/A/AA", "/root/A/AB/AB1.txt", "/root/B/BA", "/root/A/AA/AA1.txt", "/root/A/AB", "/root/Stuff.txt", "/root/B/BB", "/root/A", "/root/B/BB/BB1.txt", } # since the file /root/A/AA/AA1.txt is still missing all further seal attempts will still fail runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root"]) assert result.exit_code == 15 assert "1 missing file(s):\n A/AA/AA1.txt" in result.output
def test_child_history_verify(fs, nested_mhl_histories): """ """ runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root'], catch_exceptions=False) assert result.exit_code == 0 assert os.path.isfile('/root/ascmhl/0002_root_2020-01-16_091500.mhl') assert os.path.isfile('/root/A/AA/ascmhl/0002_AA_2020-01-16_091500.mhl') assert os.path.isfile('/root/B/ascmhl/0002_B_2020-01-16_091500.mhl') assert os.path.isfile('/root/B/BB/ascmhl/0002_BB_2020-01-16_091500.mhl') root_history = MHLHistory.load_from_path('/root') assert len(root_history.hash_lists) == 2 assert root_history.hash_lists[1].media_hashes[1].path == 'A/AB/AB1.txt' assert root_history.hash_lists[1].media_hashes[1].hash_entries[0].action == 'original' assert root_history.hash_lists[1].media_hashes[5].path == 'Stuff.txt' assert root_history.hash_lists[1].media_hashes[5].hash_entries[0].action == 'verified' aa_history = root_history.child_histories[0] b_history = root_history.child_histories[1] bb_history = root_history.child_histories[1].child_histories[0] root_hash_list = root_history.hash_lists[-1] aa_hash_list = aa_history.hash_lists[-1] b_hash_list = b_history.hash_lists[-1] bb_hash_list = bb_history.hash_lists[-1] assert aa_history.latest_generation_number() == 2 assert b_hash_list.media_hashes[0].path == 'BA/BA1.txt' assert b_hash_list.media_hashes[3].path == 'B1.txt' assert b_hash_list.media_hashes[0].hash_entries[0].action == 'original' assert b_hash_list.media_hashes[3].hash_entries[0].action == 'verified' # check that the mhl references are correct assert root_history.hash_lists[1].referenced_hash_lists[0] == aa_hash_list assert root_history.hash_lists[1].referenced_hash_lists[1] == b_hash_list assert b_hash_list.referenced_hash_lists[0] == bb_hash_list assert len(aa_hash_list.referenced_hash_lists) == 0 # the media hashes of the directories that contain a history themselves should be both in the child history # as root media hash and in the parent history to represent the directory that contains the child history aa_dir_hash = root_hash_list.find_media_hash_for_path('A/AA').hash_entries[0].hash_string assert aa_dir_hash assert aa_hash_list.process_info.root_media_hash.hash_entries[0].hash_string == aa_dir_hash # the dir hash of BB is in the history of B not in the root history assert root_hash_list.find_media_hash_for_path('B/BB') is None bb_dir_hash = b_hash_list.find_media_hash_for_path('BB').hash_entries[0].hash_string assert bb_hash_list.process_info.root_media_hash.hash_entries[0].hash_string == bb_dir_hash # but the dir hash of B is also in the root history assert root_hash_list.find_media_hash_for_path('B')
def test_record_succeed_single_file(fs): fs.create_file('/root/Stuff.txt', contents='stuff\n') fs.create_file('/root/A/A1.txt', contents='A1\n') fs.create_file('/root/A/A2.txt', contents='A2\n') runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root', '-sf', '/root/A/A1.txt']) assert result.exit_code == 0 assert os.path.exists('/root/ascmhl/0001_root_2020-01-16_091500.mhl') assert os.path.exists('/root/ascmhl/chain.txt') # make sure that only the specified file was added history = MHLHistory.load_from_path('/root') assert len(history.hash_lists[0].media_hashes) == 1 assert history.hash_lists[0].media_hashes[0].path == 'A/A1.txt'
def test_record_succeed_single_directory(fs): fs.create_file("/root/Stuff.txt", contents="stuff\n") fs.create_file("/root/A/A1.txt", contents="A1\n") fs.create_file("/root/A/A2.txt", contents="A2\n") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-sf", "/root/A"]) assert result.exit_code == 0 assert os.path.exists("/root/ascmhl/0001_root_2020-01-16_091500.mhl") assert os.path.exists("/root/ascmhl/chain.txt") # make sure that only the specified file was added history = MHLHistory.load_from_path("/root") assert len(history.hash_lists[0].media_hashes) == 2 assert history.hash_lists[0].media_hashes[0].path == "A/A1.txt" assert history.hash_lists[0].media_hashes[1].path == "A/A2.txt"
def test_create_nested_new_format(fs, nested_mhl_histories): """ test that ensures that hasehs in a new format are also verified in child histories used to verify fix of bug: https://github.com/ascmitc/mhl/issues/48 """ runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-h", "md5"]) assert result.exit_code == 0 # load one of the the nested histories and check the first media hash of the last generation nested_history = MHLHistory.load_from_path("/root/A/AA") media_hash = nested_history.hash_lists[-1].media_hashes[0] # assure that the first hash entry is the verification of the original hash assert media_hash.hash_entries[0].action == "verified" assert media_hash.hash_entries[0].hash_format == "xxh64" # assure that the second hash entry is the new md5 hash assert media_hash.hash_entries[1].action == "verified" # formerly 'new' assert media_hash.hash_entries[1].hash_format == "md5"
def test_child_history_parsing(fs, nested_mhl_histories): """ """ root_history = MHLHistory.load_from_path('/root') assert len(root_history.child_histories) == 2 aa_history = root_history.child_histories[0] b_history = root_history.child_histories[1] bb_history = root_history.child_histories[1].child_histories[0] assert aa_history.asc_mhl_path == '/root/A/AA/ascmhl' assert aa_history.parent_history == root_history assert b_history.asc_mhl_path == '/root/B/ascmhl' assert b_history.parent_history == root_history assert len(b_history.child_histories) == 1 assert bb_history.asc_mhl_path == '/root/B/BB/ascmhl' assert bb_history.parent_history == b_history # check sub children mappings that map all transitive children and their relative path assert root_history.child_history_mappings['A/AA'] == aa_history assert root_history.child_history_mappings['B'] == b_history assert root_history.child_history_mappings['B/BB'] == bb_history assert b_history.child_history_mappings['BB'] == bb_history # check if the correct (child) histories are returned for a given path assert root_history.find_history_for_path('Stuff.txt')[0] == root_history assert root_history.find_history_for_path('A/AA/AA1.txt')[0] == aa_history assert root_history.find_history_for_path('A/AB/AB1.txt')[0] == root_history assert root_history.find_history_for_path('B/B1.txt')[0] == b_history assert root_history.find_history_for_path('B/BA/BA1.txt')[0] == b_history assert root_history.find_history_for_path('B/BB/BB1.txt')[0] == bb_history # the history object should only return the media hashes and hash entries it contains directly # if we need th entries from child histories we have to ask them directly assert root_history.find_original_hash_entry_for_path('Stuff.txt') is not None assert root_history.find_original_hash_entry_for_path('A/AA/AA1.txt') is None assert aa_history.find_original_hash_entry_for_path('AA1.txt') is not None
def test_child_history_partial_verification_bb_folder(fs, nested_mhl_histories): """ """ # create an additional file the record command will find because we pass it a folder fs.create_file('/root/B/BB/BB2.txt', contents='BB2\n') runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root', '-sf', '/root/B/BB']) assert result.exit_code == 0 assert os.path.isfile('/root/ascmhl/0002_root_2020-01-16_091500.mhl') assert os.path.isfile('/root/B/ascmhl/0002_B_2020-01-16_091500.mhl') assert os.path.isfile('/root/B/BB/ascmhl/0002_BB_2020-01-16_091500.mhl') root_history = MHLHistory.load_from_path('/root') assert len(root_history.hash_lists) == 2 aa_history = root_history.child_histories[0] b_history = root_history.child_histories[1] bb_history = root_history.child_histories[1].child_histories[0] # the root and the B hash lists only contains a mhl reference to the hash list # down the folder hierarchy, no media hashes assert len(root_history.hash_lists[1].media_hashes) == 0 assert root_history.hash_lists[1].referenced_hash_lists[0] == b_history.hash_lists[1] assert len(b_history.hash_lists[1].media_hashes) == 0 assert b_history.hash_lists[1].referenced_hash_lists[0] == bb_history.hash_lists[1] # the BB hash list contains the media hash of the verified files in BB assert bb_history.hash_lists[1].media_hashes[0].path == 'BB1.txt' assert bb_history.hash_lists[1].media_hashes[0].hash_entries[0].action == 'verified' assert bb_history.hash_lists[1].media_hashes[1].path == 'BB2.txt' assert bb_history.hash_lists[1].media_hashes[1].hash_entries[0].action == 'original' # the other histories don't have a new generation assert not os.path.isfile('/root/A/AA/ascmhl/0002_AA_2020-01-16_091500.mhl') assert aa_history.latest_generation_number() == 1
def test_create_no_directory_hashes(fs): fs.create_file("/root/Stuff.txt", contents="stuff\n") fs.create_file("/root/A/A1.txt", contents="A1\n") os.mkdir("/root/emptyFolder") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-v", "-n"]) assert result.exit_code == 0 # a directory entry without hash was created for the folder A hash_list = MHLHistory.load_from_path("/root").hash_lists[0] assert hash_list.find_media_hash_for_path("A").is_directory assert len(hash_list.find_media_hash_for_path("A").hash_entries) == 0 # and no directory hash of the root folder is set in the header assert len(hash_list.process_info.root_media_hash.hash_entries) == 0 # the empty folder is still referenced even if not creating directory hashes assert hash_list.find_media_hash_for_path("emptyFolder").is_directory # removing an empty folder will cause sealing to fail os.removedirs("/root/emptyFolder") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-v", "-n"]) assert result.exit_code == 15 assert "1 missing file(s):\n emptyFolder" in result.output
def test_create_directory_hashes(fs): fs.create_file("/root/Stuff.txt", contents="stuff\n") fs.create_file("/root/A/A1.txt", contents="A1\n") result = CliRunner().invoke(mhl.commands.create, ["/root", "-h", "xxh64", "-v"]) assert result.exit_code == 0 # a directory hash for the folder A was created hash_list = MHLHistory.load_from_path("/root").hash_lists[0] assert hash_list.find_media_hash_for_path("A").is_directory assert hash_list.find_media_hash_for_path("A").hash_entries[0].hash_string == "ee2c3b94b6eecb8d" # and the directory hash of the root folder is set in the header assert hash_list.process_info.root_media_hash.hash_entries[0].hash_string == "15ef0ade91fff267" # test that the directory-hash command creates the same directory hashes result = CliRunner().invoke(mhl.commands.directory_hash, ["/root", "-v", "-h", "xxh64"]) assert result.exit_code == 0 print(result.output) assert "directory hash for: /root/A xxh64: ee2c3b94b6eecb8d" in result.output assert "root hash: xxh64: 15ef0ade91fff267" in result.output # add some more files and folders fs.create_file("/root/B/B1.txt", contents="B1\n") fs.create_file("/root/A/A2.txt", contents="A2\n") fs.create_file("/root/A/AA/AA1.txt", contents="AA1\n") os.mkdir("/root/emptyFolderA") os.mkdir("/root/emptyFolderB") os.mkdir("/root/emptyFolderC") os.mkdir("/root/emptyFolderC/emptyFolderCA") os.mkdir("/root/emptyFolderC/emptyFolderCB") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-v", "-h", "xxh64"]) assert result.exit_code == 0 hash_list = MHLHistory.load_from_path("/root").hash_lists[-1] # due to the additional content the directory hash of folder A and the root folder changed assert hash_list.find_media_hash_for_path("A").hash_entries[0].hash_string == "47e7687ce4800633" assert hash_list.process_info.root_media_hash.hash_entries[0].hash_string == "5f4af3b3fd736415" # empty folder all have the same directory hash assert hash_list.find_media_hash_for_path("emptyFolderA").hash_entries[0].hash_string == "ef46db3751d8e999" assert hash_list.find_media_hash_for_path("emptyFolderB").hash_entries[0].hash_string == "ef46db3751d8e999" # but since we also contain the file names in the dir hashes an empty folder that contains other empty folders # has a different directory hash assert hash_list.find_media_hash_for_path("emptyFolderC").hash_entries[0].hash_string == "877071123901a4db" # test that the directory-hash command creates the same directory hashes result = CliRunner().invoke(mhl.commands.directory_hash, ["/root", "-h", "xxh64"]) assert result.exit_code == 0 assert " calculated root hash: xxh64: 5f4af3b3fd736415" in result.output # altering the content of one file with open("/root/A/A2.txt", "a") as file: file.write("!!") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-v", "-h", "xxh64"]) assert "ERROR: hash mismatch for A/A2.txt" in result.output hash_list = MHLHistory.load_from_path("/root").hash_lists[-1] # an altered file leads to a different root directory hash assert hash_list.process_info.root_media_hash.hash_entries[0].hash_string == "adf18c910489663c" # test that the directory-hash command creates the same root hash result = CliRunner().invoke(mhl.commands.directory_hash, ["/root", "-h", "xxh64"]) assert result.exit_code == 0 assert "root hash: xxh64: adf18c910489663c" in result.output # rename one file os.rename("/root/B/B1.txt", "/root/B/B2.txt") runner = CliRunner() result = runner.invoke(mhl.commands.create, ["/root", "-v", "-h", "xxh64"]) assert "ERROR: hash mismatch for A/A2.txt" in result.output # in addition to the failing verification we also have a missing file B1/B1.txt assert "missing file(s):\n B/B1.txt" in result.output hash_list = MHLHistory.load_from_path("/root").hash_lists[-1] # the file name is part of the directory hash of the containing directory so it's hash changes assert hash_list.find_media_hash_for_path("B").hash_entries[0].hash_string == "8cdb106e71c4989d" # a renamed file also leads to a different root directory hash assert hash_list.process_info.root_media_hash.hash_entries[0].hash_string == "01441cdf1803e2b8" # test that the directory-hash command creates the same root hash result = CliRunner().invoke(mhl.commands.directory_hash, ["/root", "-h", "xxh64"]) assert result.exit_code == 0 assert "root hash: xxh64: 01441cdf1803e2b8" in result.output
def test_create_directory_hashes(fs): fs.create_file('/root/Stuff.txt', contents='stuff\n') fs.create_file('/root/A/A1.txt', contents='A1\n') result = CliRunner().invoke(mhl.commands.create, ['/root', '-h', 'xxh64', '-v']) assert result.exit_code == 0 # a directory hash for the folder A was created hash_list = MHLHistory.load_from_path('/root').hash_lists[0] assert hash_list.find_media_hash_for_path('A').is_directory assert hash_list.find_media_hash_for_path( 'A').hash_entries[0].hash_string == 'ee2c3b94b6eecb8d' # and the directory hash of the root folder is set in the header assert hash_list.process_info.root_media_hash.hash_entries[ 0].hash_string == '15ef0ade91fff267' # test that the directory-hash command creates the same directory hashes result = CliRunner().invoke(mhl.commands.directory_hash, ['/root', '-v', '-h', 'xxh64']) assert result.exit_code == 0 print(result.output) assert 'directory hash for: /root/A xxh64: ee2c3b94b6eecb8d' in result.output assert 'root hash: xxh64: 15ef0ade91fff267' in result.output # add some more files and folders fs.create_file('/root/B/B1.txt', contents='B1\n') fs.create_file('/root/A/A2.txt', contents='A2\n') fs.create_file('/root/A/AA/AA1.txt', contents='AA1\n') os.mkdir('/root/emptyFolderA') os.mkdir('/root/emptyFolderB') os.mkdir('/root/emptyFolderC') os.mkdir('/root/emptyFolderC/emptyFolderCA') os.mkdir('/root/emptyFolderC/emptyFolderCB') runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root', '-v', '-h', 'xxh64']) assert result.exit_code == 0 hash_list = MHLHistory.load_from_path('/root').hash_lists[-1] # due to the additional content the directory hash of folder A and the root folder changed assert hash_list.find_media_hash_for_path( 'A').hash_entries[0].hash_string == '47e7687ce4800633' assert hash_list.process_info.root_media_hash.hash_entries[ 0].hash_string == '5f4af3b3fd736415' # empty folder all have the same directory hash assert hash_list.find_media_hash_for_path( 'emptyFolderA').hash_entries[0].hash_string == 'ef46db3751d8e999' assert hash_list.find_media_hash_for_path( 'emptyFolderB').hash_entries[0].hash_string == 'ef46db3751d8e999' # but since we also contain the file names in the dir hashes an empty folder that contains other empty folders # has a different directory hash assert hash_list.find_media_hash_for_path( 'emptyFolderC').hash_entries[0].hash_string == '877071123901a4db' # test that the directory-hash command creates the same directory hashes result = CliRunner().invoke(mhl.commands.directory_hash, ['/root', '-h', 'xxh64']) assert result.exit_code == 0 assert ' calculated root hash: xxh64: 5f4af3b3fd736415' in result.output # altering the content of one file with open('/root/A/A2.txt', "a") as file: file.write('!!') runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root', '-v', '-h', 'xxh64']) assert 'ERROR: hash mismatch for A/A2.txt' in result.output hash_list = MHLHistory.load_from_path('/root').hash_lists[-1] # an altered file leads to a different root directory hash assert hash_list.process_info.root_media_hash.hash_entries[ 0].hash_string == 'adf18c910489663c' # test that the directory-hash command creates the same root hash result = CliRunner().invoke(mhl.commands.directory_hash, ['/root', '-h', 'xxh64']) assert result.exit_code == 0 assert 'root hash: xxh64: adf18c910489663c' in result.output # rename one file os.rename('/root/B/B1.txt', '/root/B/B2.txt') runner = CliRunner() result = runner.invoke(mhl.commands.create, ['/root', '-v', '-h', 'xxh64']) assert 'ERROR: hash mismatch for A/A2.txt' in result.output # in addition to the failing verification we also have a missing file B1/B1.txt assert 'missing file(s):\n B/B1.txt' in result.output hash_list = MHLHistory.load_from_path('/root').hash_lists[-1] # the file name is part of the directory hash of the containing directory so it's hash changes assert hash_list.find_media_hash_for_path( 'B').hash_entries[0].hash_string == '8cdb106e71c4989d' # a renamed file also leads to a different root directory hash assert hash_list.process_info.root_media_hash.hash_entries[ 0].hash_string == '01441cdf1803e2b8' # test that the directory-hash command creates the same root hash result = CliRunner().invoke(mhl.commands.directory_hash, ['/root', '-h', 'xxh64']) assert result.exit_code == 0 assert 'root hash: xxh64: 01441cdf1803e2b8' in result.output