def generate_test_database(num_uids=10000, r_seed=42): random.seed(r_seed, version=2) database = Database() for i_uid in range(num_uids): uid = "".join(random.choices(database.UID_ALPHABET, k=8)) database.photo_db[uid] = [] for i_photo in range(random.randint(1, 3)): checksum = "".join(random.choices(string.hexdigits, k=64)) timestamp = random.randint(1037750179000000, 1637750179000000) / 1000000 dt = datetime.datetime.fromtimestamp(timestamp).astimezone( datetime.timezone(datetime.timedelta(hours=random.randint(-12, 12))) ) ts_str = dt.strftime("%Y-%m-%d %H:%M:%S%z") img_num = random.randint(0, 9999) source_path = f"/path/to/photo/{dt.year}/IMG_{img_num:04d}.JPG" store_path = ( "" if random.randint(0, 1) else f"{dt.year}/{source_path.rsplit('/', 1)[-1]}" ) filesize = random.randint(100000, 100000000) photo = PhotoFile( chk=checksum, src=source_path, ts=timestamp, dt=ts_str, fsz=filesize, sto=store_path, ) database.photo_db[uid].append(photo) return database
def test_database_init_version_too_high(): """ Database will raise DatabaseException if loaded database version is too high """ json_data = b"""{ "version": VERSION, "hash_algorithm": "sha256", "timezone_default": "-0400", "photo_db": {}, "command_history": {} }""".replace(b"VERSION", f"{Database.VERSION + 1}".encode()) with pytest.raises(DatabaseException): Database.from_json(json_data)
def test_database_add_photo_same_source_new_checksum(caplog): """ When adding a photo with a source_path in the database but a different checksum the photo is added to the database but a warning is issued. """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data2) uid = db.add_photo( PhotoFile( chk="not_a_match", src="/a/b/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=10, ), uid="uid1", ) print([(r.levelname, r) for r in caplog.records]) print(uid) assert uid == "uid1" assert db.hash_to_uid["not_a_match"] == "uid1" assert db.hash_to_uid["deadbeef"] == "uid1" print(db.photo_db["uid1"]) assert len(db.photo_db["uid1"]) == 2 print([(r.levelname, r) for r in caplog.records]) assert any(record.levelname == "WARNING" for record in caplog.records) assert any( "Checksum of previously-indexed source photo has changed" in record.msg for record in caplog.records)
def clean( database: Database, destination: Union[str, PathLike], subdir: Union[str, PathLike] = "", dry_run: bool = False, ) -> dict[str, int]: logger = logging.getLogger(__name__) photos_to_remove = database.get_photos_to_remove( destination, subdirectory=subdir, dry_run=dry_run ) total_file_size = sum(pf.fsz for pf in photos_to_remove) logger.info(f"Identified {len(photos_to_remove)} lower-priority items for removal") logger.info(f"Total file size: {sizeof_fmt(total_file_size)}") num_removed_photos, num_missing_photos = fileops.remove_photos( destination, photos_to_remove, dry_run=dry_run ) logger.info( f"{'Found' if dry_run else 'Removed'} {num_removed_photos} items " f"and skipped {num_missing_photos} missing items" ) return dict( num_removed_photos=num_removed_photos, total_file_size=total_file_size, num_missing_photos=num_missing_photos, )
def _stats(db: Union[str, PathLike]): config_logging() database = Database.from_file(db) num_uids, num_photos, num_stored_photos, total_file_size = database.get_stats( ) print(f"Total items: {num_photos}") print(f"Total unique items: {num_uids}") print(f"Total stored items: {num_stored_photos}") print(f"Total file size: {sizeof_fmt(total_file_size)}")
def test_database_save_not_modified(tmpdir, caplog): """ Database.save() will not save if the database is unchanged from loading """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data3) db_path = tmpdir / "photos.json" db.save(db_path, ["photomanager", "test"]) assert "The database was not modified and will not be saved" in caplog.messages assert not db_path.exists()
def test_database_get_photos_to_collect_same_checksum_same_priority( caplog, tmpdir): """ Photos with the same priority and checksum will not be recollected """ caplog.set_level(logging.DEBUG) example_database = { "version": 1, "hash_algorithm": "sha256", "photo_db": { "uid1": [ { "checksum": "deadbeef", "source_path": str(tmpdir / "source1" / "a.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "a.jpg", "priority": 11, }, { "checksum": "deadbeef", "source_path": str(tmpdir / "source2" / "a.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "", "priority": 11, }, ] }, "command_history": { "2021-03-08_23-56-00Z": "photomanager create --db test.json" }, } os.makedirs(tmpdir / "source1") os.makedirs(tmpdir / "source2") os.makedirs(tmpdir / "store") Path(tmpdir / "source1" / "a.jpg").touch() Path(tmpdir / "source2" / "a.jpg").touch() Path(tmpdir / "store" / "a.jpg").touch() db = Database.from_dict(example_database) ( photos_to_copy, (num_copied_photos, num_added_photos, num_missed_photos, num_stored_photos), ) = db.get_photos_to_collect(tmpdir / "store") print(photos_to_copy) print(num_copied_photos, num_added_photos, num_missed_photos, num_stored_photos) assert len(photos_to_copy) == 0 assert num_copied_photos == 0 assert num_added_photos == 0 assert num_missed_photos == 0 assert num_stored_photos == 2
def test_database_list_sources(caplog): """ The Database.sources property yields all src paths in the database """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data3) assert set(db.sources) == { "/a/b/c.jpg", "/o/b/c.jpg", "/a/c/e.jpg", }
def test_database_clean_verify_absolute_subdir(tmpdir, caplog): """ An exception is raised if subdir is an absolute path """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data2) with pytest.raises(DatabaseException): db.get_photos_to_remove(tmpdir / "a", subdirectory=tmpdir / "b") with pytest.raises(DatabaseException): db.get_stored_photos(subdirectory=tmpdir / "b") with pytest.raises(NotImplementedError): db.verify_indexed_photos()
def index( database: Database, files: Iterable[Union[str, PathLike]], priority: int = 10, timezone_default: Optional[str] = None, storage_type: str = "HDD", ) -> dict[str, Union[int, list[str]]]: """ Index photo files and add them to the database. :param database: the Database :param files: an iterable of paths to the photos :param priority: priority of indexed photos (lower is preferred) :param timezone_default: timezone to use when indexing timezone-naive photos :param storage_type: class of storage medium (HDD, SSD, RAID) :return: the number of errors found """ logger = logging.getLogger(__name__) tz_default = ( tz_str_to_tzinfo(timezone_default) if timezone_default is not None else database.timezone_default ) photos = fileops.index_photos( files=files, priority=priority, storage_type=storage_type, hash_algorithm=database.hash_algorithm, tz_default=tz_default, ) num_error_photos = sum(pf is None for pf in photos) ( changed_uids, num_added_photos, num_merged_photos, num_skipped_photos, ) = database.add_photos(pf for pf in photos if pf is not None) logger.info(f"Indexed {num_added_photos+num_merged_photos}/{len(photos)} items") logger.info( f"Added {num_added_photos} new items and merged {num_merged_photos} items" ) if num_skipped_photos: logger.info(f"Skipped {num_skipped_photos} items") if num_error_photos: # pragma: no cover logger.info(f"Encountered an error on {num_error_photos} items") return dict( changed_uids=changed_uids, num_added_photos=num_added_photos, num_merged_photos=num_merged_photos, num_skipped_photos=num_skipped_photos, num_error_photos=num_error_photos, )
def test_database_is_modified(caplog): """ Database.is_modified() is True if Database.db has been modified """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data3) assert not db.is_modified() db.add_command("test") assert db.is_modified() db.reset_saved() assert not db.is_modified() db.photo_db["uid1"][1].sto = "/path/to/sto.jpg" assert db.is_modified()
def test_database_save_modified(tmpdir, caplog): """ Database.save() will save if the database has been modified """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data3) db.photo_db["uid1"][1].sto = "/path/to/sto.jpg" db_path = tmpdir / "photos.json" db.save(db_path, ["photomanager", "test"]) assert "The database was not modified and will not be saved" not in caplog.messages assert db_path.exists() with open(db_path, "rb") as f: assert len(orjson.loads(f.read())["command_history"]) == 2
def test_database_init_update_version_1(): """ Database will upgrade loaded database files to current version """ json_data = b"""{ "version": 1, "hash_algorithm": "sha256", "timezone_default": "-0400", "photo_db": { "d239210f00534b76a2b215e073f75832": [ { "checksum": "deadbeef", "source_path": "/a/b/c.jpg", "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "/d/e/f.jpg", "priority": 11, "tz_offset": null }, { "checksum": "deadbeef", "source_path": "/g/b/c.jpg", "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "", "priority": 20, "tz_offset": -14400 } ] }, "command_history": { "2021-03-08_23-56-00Z": "photomanager create --db test.json", "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg" } }""" new_json_data = json_data.replace( b'"version": 1', f'"version": {Database.VERSION}'.encode()) for k, v in NAME_MAP_ENC.items(): new_json_data = new_json_data.replace( b'"' + k.encode() + b'"', b'"' + v.encode() + b'"', ) db = Database.from_json(json_data) print(db.db) assert db.db["timezone_default"] == "-0400" assert db.timezone_default == timezone(timedelta(days=-1, seconds=72000)) assert orjson.loads(db.json) == orjson.loads(new_json_data) assert db.to_json(pretty=True) == new_json_data
def test_database_save(tmpdir, caplog): caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data) db.to_file(tmpdir / "test.json") db2 = db.from_file(tmpdir / "test.json") print(db.db, db2.db, sep="\n") assert db == db2 db.to_file(tmpdir / "test.json.gz") db2 = db.from_file(tmpdir / "test.json.gz") print(db2.db) assert db == db2 db.to_file(tmpdir / "test.json.zst") db2 = db.from_file(tmpdir / "test.json.zst") print(db2.db) assert db == db2
def _create( db: Union[str, PathLike], hash_algorithm: str = DEFAULT_HASH_ALGO, timezone_default: str = "local", debug: bool = False, ): config_logging(debug=debug) try: database = Database.from_file(db) except FileNotFoundError: database = Database() database.hash_algorithm = HashAlgorithm(hash_algorithm) database.db["timezone_default"] = timezone_default database.save(path=db, argv=sys.argv, force=True)
def _import( db: Union[str, PathLike], destination: Union[str, PathLike], source: Optional[Union[str, PathLike]] = None, file: Optional[Union[str, PathLike]] = None, paths: Iterable[Union[str, PathLike]] = tuple(), exclude: Iterable[str] = tuple(), skip_existing: bool = False, debug: bool = False, dry_run: bool = False, priority: int = 10, timezone_default: Optional[str] = None, storage_type: str = "HDD", collect_db: bool = False, ): config_logging(debug=debug) database = Database.from_file(db, create_new=True) skip_existing = set(database.sources) if skip_existing else set() filtered_files = fileops.list_files( source=source, file=file, exclude=exclude, exclude_files=skip_existing, paths=paths, ) index_result = actions.index( database=database, files=filtered_files, priority=priority, timezone_default=timezone_default, storage_type=storage_type, ) collect_result = actions.collect( database=database, destination=destination, dry_run=dry_run, filter_uids=index_result["changed_uids"] if skip_existing else None, ) if not dry_run: database.save(path=db, argv=sys.argv, collect_db=collect_db, destination=destination) click_exit(1 if index_result["num_error_photos"] or collect_result["num_missed_photos"] or collect_result["num_error_photos"] else 0)
def test_database_add_photo_sort(caplog): caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data) uid = db.add_photo( PhotoFile( chk="deadbeef", src="/x/y/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=20, ), uid=None, ) db.add_photo( PhotoFile( chk="deadbeef", src="/z/y/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=11, ), uid=None, ) db.add_photo( PhotoFile( chk="deadbeef", src="/0/1/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=10, ), uid=None, ) assert list(p.src for p in db.photo_db[uid]) == [ "/0/1/c.jpg", "/a/b/c.jpg", "/z/y/c.jpg", "/x/y/c.jpg", ]
def collect( database: Database, destination: Union[str, PathLike], filter_uids: Optional[Container[str]] = None, dry_run: bool = False, ) -> dict[str, int]: """ Collect the database's highest-priority photos to destination. :param database: the Database :param destination: the photo storage directory :param filter_uids: optional, only collect the specified photo uids :param dry_run: perform a dry run that makes no changes :return: the number of errors found """ logger = logging.getLogger(__name__) ( photos_to_copy, (num_copied_photos, num_added_photos, num_missed_photos, num_stored_photos), ) = database.get_photos_to_collect(destination, filter_uids=filter_uids) total_copied_photos, total_copy_size, num_error_photos = fileops.copy_photos( destination, photos_to_copy, dry_run=dry_run ) logger.info( f"{'Would copy' if dry_run else 'Copied'} {total_copied_photos} items, " f"total size: {sizeof_fmt(total_copy_size)}: " f"{num_added_photos} new items and {num_copied_photos} " f"items marked as stored elsewhere" ) if num_stored_photos or num_missed_photos: logger.info( f"Skipped {num_stored_photos} items already stored " f"and {num_missed_photos} missing items" ) if num_error_photos: # pragma: no cover logger.warning(f"Encountered errors copying {num_error_photos} items") return dict( num_copied_photos=num_copied_photos, num_added_photos=num_added_photos, num_missed_photos=num_missed_photos, num_stored_photos=num_stored_photos, total_copied_photos=total_copied_photos, total_copy_size=total_copy_size, num_error_photos=num_error_photos, )
def _clean( db: Union[str, PathLike], destination: Union[str, PathLike], subdir: Union[str, PathLike] = "", debug: bool = False, dry_run: bool = False, ): config_logging(debug=debug) database = Database.from_file(db) result = actions.clean( database=database, destination=destination, subdir=subdir, dry_run=dry_run, ) if not dry_run: database.save(path=db, argv=sys.argv) click_exit(1 if result["num_missing_photos"] else 0)
def test_database_load_zstd_checksum_error(tmpdir, monkeypatch, caplog): caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data) db.to_file(tmpdir / "test.json.zst") with open(tmpdir / "test.json.zst", "r+b") as f: f.seek(4) c = f.read(1) f.seek(4) f.write(bytes([ord(c) ^ 0b1])) with pytest.raises(zstandard.ZstdError): db.from_file(tmpdir / "test.json.zst") monkeypatch.setattr( zstandard, "decompress", lambda _: db.to_json(pretty=True).replace(c, bytes([ord(c) ^ 0b1])), ) with pytest.raises(DatabaseException): db.from_file(tmpdir / "test.json.zst")
def _collect( db: Union[str, PathLike], destination: Union[str, PathLike], debug: bool = False, dry_run: bool = False, collect_db: bool = False, ): config_logging(debug=debug) database = Database.from_file(db) collect_result = actions.collect(database=database, destination=destination, dry_run=dry_run) if not dry_run: database.save(path=db, argv=sys.argv, collect_db=collect_db, destination=destination) click_exit(1 if collect_result["num_missed_photos"] or collect_result["num_error_photos"] else 0)
def _verify( db: Union[str, PathLike], destination: Union[str, PathLike], subdir: Union[str, PathLike] = "", storage_type: str = "HDD", random_fraction: Optional[float] = None, debug: bool = False, ): config_logging(debug=debug) database = Database.from_file(db) result = actions.verify( database=database, directory=destination, subdir=subdir, storage_type=storage_type, random_fraction=random_fraction, ) click_exit(1 if result["num_incorrect_photos"] or result["num_missing_photos"] else 0)
def test_database_add_photo_wrong_uid(caplog): """ When adding a photo with a matching checksum for a different uid, the photo is not added and add_photo returns None. """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data2) uid = db.add_photo( PhotoFile( chk="deadbeef", src="/x/y/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=10, ), uid="uid2", ) print([(r.levelname, r) for r in caplog.records]) print(uid) assert uid is None
def test_database_overwrite_error(tmpdir, caplog): caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data) path = Path(tmpdir / "test.json") db.to_file(path) base_path = path for _ in path.suffixes: base_path = base_path.with_suffix("") timestamp_str = datetime.fromtimestamp( path.stat().st_mtime).strftime("%Y-%m-%d_%H-%M-%S") new_path = base_path.with_name( f"{base_path.name}_{timestamp_str}").with_suffix("".join( path.suffixes)) os.makedirs(new_path) (new_path / "file.txt").touch() db.to_file(path) print(tmpdir.listdir()) assert (tmpdir / "test_1.json").exists() Path(tmpdir / "test_0.json").touch() Path(tmpdir / "test_a.json").touch() db.to_file(path) print(tmpdir.listdir()) assert (tmpdir / "test_2.json").exists() path = Path(tmpdir / "test_2.json") base_path = path for _ in path.suffixes: base_path = base_path.with_suffix("") timestamp_str = datetime.fromtimestamp( path.stat().st_mtime).strftime("%Y-%m-%d_%H-%M-%S") new_path = base_path.with_name( f"{base_path.name}_{timestamp_str}").with_suffix("".join( path.suffixes)) os.makedirs(new_path) (new_path / "file.txt").touch() db.to_file(path) print(tmpdir.listdir()) assert (tmpdir / "test_3.json").exists()
def test_database_add_photo_already_present(caplog): """ When adding a photo that is already in the database, the photo is not added and add_photo returns None. """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data2) uid = db.add_photo( PhotoFile( chk="deadbeef", src="/a/b/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=10, ), uid="uid1", ) print([(r.levelname, r) for r in caplog.records]) print(uid) assert uid is None
def test_database_find_photo_ambiguous(caplog): """ When there is no checksum match and an ambiguous timestamp+source match, find_photo returns the first match. """ caplog.set_level(logging.DEBUG) db = Database.from_json(example_database_json_data2) uid = db.find_photo( PhotoFile( chk="not_a_match", src="/x/y/c.jpg", dt="2015:08:27 04:09:36.50", ts=1440662976.5, fsz=1024, sto="", prio=10, )) print([(r.levelname, r) for r in caplog.records]) print(uid) assert any(record.levelname == "WARNING" for record in caplog.records) assert any("ambiguous timestamp+name match" in record.msg for record in caplog.records) assert uid == "uid1"
def _index( db: Union[str, PathLike], source: Optional[Union[str, PathLike]] = None, file: Optional[Union[str, PathLike]] = None, paths: Iterable[Union[str, PathLike]] = tuple(), exclude: Iterable[str] = tuple(), skip_existing: bool = False, debug: bool = False, dry_run: bool = False, priority: int = 10, timezone_default: Optional[str] = None, storage_type: str = "HDD", ): if not source and not file and not paths: print("Nothing to index") print(click.get_current_context().get_help()) click_exit(1) config_logging(debug=debug) database = Database.from_file(db, create_new=True) skip_existing = set(database.sources) if skip_existing else set() filtered_files = fileops.list_files( source=source, file=file, exclude=exclude, exclude_files=skip_existing, paths=paths, ) index_result = actions.index( database=database, files=filtered_files, priority=priority, timezone_default=timezone_default, storage_type=storage_type, ) if not dry_run: database.save(path=db, argv=sys.argv) click_exit(1 if index_result["num_error_photos"] else 0)
def test_verify_random_sample(tmpdir, caplog): """ The random_fraction parameter in actions.verify will verify the specified fraction of the stored photos (rounded to the nearest integer) """ caplog.set_level(logging.DEBUG) example_database = { "version": 1, "hash_algorithm": "sha256", "photo_db": { "uid1": [ { "checksum": "deadbeef", "source_path": str(tmpdir / "source1" / "a.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "a.jpg", "priority": 11, }, ], "uid2": [ { "checksum": "asdf", "source_path": str(tmpdir / "source2" / "b.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "b.jpg", "priority": 11, }, ], "uid3": [ { "checksum": "ffff", "source_path": str(tmpdir / "source1" / "c.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "c.jpg", "priority": 11, }, ], "uid4": [ { "checksum": "beef", "source_path": str(tmpdir / "source2" / "d.jpg"), "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "d.jpg", "priority": 11, }, ], }, "command_history": { "2021-03-08_23-56-00Z": "photomanager create --db test.json" }, } os.makedirs(tmpdir / "store") db = Database.from_dict(example_database) assert len(db.get_stored_photos()) == 4 result = actions.verify( database=db, directory=tmpdir / "store", random_fraction=0.33, ) print("\nVERIFY 33% (missing photos)") print(result) assert result["num_correct_photos"] == 0 assert result["num_incorrect_photos"] == 0 assert result["num_missing_photos"] == 1 Path(tmpdir / "store" / "a.jpg").touch() Path(tmpdir / "store" / "b.jpg").touch() Path(tmpdir / "store" / "c.jpg").touch() Path(tmpdir / "store" / "d.jpg").touch() result = actions.verify( database=db, directory=tmpdir / "store", random_fraction=0.5, ) print("\nVERIFY 50% (incorrect photos)") print(result) assert result["num_correct_photos"] == 0 assert result["num_incorrect_photos"] == 2 assert result["num_missing_photos"] == 0
def test_database_load_version_1(): json_data = b"""{ "version": 1, "hash_algorithm": "sha256", "photo_db": { "d239210f00534b76a2b215e073f75832": [ { "checksum": "deadbeef", "source_path": "/a/b/c.jpg", "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "/d/e/f.jpg", "priority": 11 }, { "checksum": "deadbeef", "source_path": "/g/b/c.jpg", "datetime": "2015:08:27 04:09:36.50", "timestamp": 1440662976.5, "file_size": 1024, "store_path": "", "priority": 20, "tz_offset": -14400 } ] }, "command_history": { "2021-03-08_23-56-00Z": "photomanager create --db test.json", "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg" } }""" db = Database.from_json(json_data) print(db.db) assert db.version == Database.VERSION assert db.hash_algorithm == HashAlgorithm.SHA256 assert db.db["timezone_default"] == "local" assert db.timezone_default is None photo_db_expected = { "d239210f00534b76a2b215e073f75832": [ PhotoFile.from_dict({ "chk": "deadbeef", "src": "/a/b/c.jpg", "dt": "2015:08:27 04:09:36.50", "ts": 1440662976.5, "fsz": 1024, "sto": "/d/e/f.jpg", "prio": 11, }), PhotoFile.from_dict({ "chk": "deadbeef", "src": "/g/b/c.jpg", "dt": "2015:08:27 04:09:36.50", "ts": 1440662976.5, "fsz": 1024, "sto": "", "prio": 20, "tzo": -14400, }), ] } command_history_expected = { "2021-03-08_23-56-00Z": "photomanager create --db test.json", "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg", } db_expected = { "version": Database.VERSION, "hash_algorithm": HashAlgorithm.SHA256, "timezone_default": "local", "photo_db": photo_db_expected, "command_history": command_history_expected, } assert db.photo_db == photo_db_expected assert db.command_history == command_history_expected assert orjson.loads(db.json) != orjson.loads(json_data) assert db.db == db_expected assert db == Database.from_dict(orjson.loads(json_data)) assert db.get_stats() == (1, 2, 1, 1024)
def verify( database: Database, directory: Union[str, PathLike], subdir: Union[str, PathLike] = "", storage_type: str = "HDD", random_fraction: Optional[float] = None, ) -> dict[str, int]: """ Check the files stored in directory against checksums in the database. :param database: the Database :param directory: the photo storage directory :param subdir: verify only photos within subdirectory :param storage_type: the type of media the photos are stored on (uses async if SSD) :param random_fraction: verify a randomly sampled fraction of the photos :return: the number of errors found """ logger = logging.getLogger(__name__) num_correct_photos = num_incorrect_photos = num_missing_photos = 0 destination = Path(directory).expanduser().resolve() stored_photos = database.get_stored_photos(subdir) if random_fraction is not None: n = len(stored_photos) k = max(min(round(random_fraction * n), n), 0) stored_photos = random.sample(stored_photos, k=k) total_file_size = sum(pf.fsz for pf in stored_photos) logger.info(f"Verifying {len(stored_photos)} items") logger.info(f"Total file size: {sizeof_fmt(total_file_size)}") logger.info("Collecting media hashes") checksum_cache = fileops.hash_stored_photos( photos=stored_photos, directory=directory, hash_algorithm=database.hash_algorithm, storage_type=storage_type, ) for photo in tqdm(stored_photos): abs_store_path = str(destination / photo.sto) if abs_store_path not in checksum_cache: tqdm.write(f"Missing photo: {abs_store_path}", file=sys.stderr) num_missing_photos += 1 elif checksum_cache[abs_store_path] == photo.chk: num_correct_photos += 1 else: tqdm.write(f"Incorrect checksum: {abs_store_path}", file=sys.stderr) num_incorrect_photos += 1 logger.info( f"Checked " f"{num_correct_photos+num_incorrect_photos+num_missing_photos} " f"items" ) if num_incorrect_photos or num_missing_photos: logger.warning( f"Found {num_incorrect_photos} incorrect and " f"{num_missing_photos} missing items" ) else: logger.info("No errors found") return dict( num_correct_photos=num_correct_photos, num_incorrect_photos=num_incorrect_photos, num_missing_photos=num_missing_photos, )