def test_rename_file(self): self.assertEquals("IMG_0001-dup.jpg", FileUtils.add_suffix("IMG_0001.jpg", "-dup")) self.assertEquals("IMG_0001None.jpg", FileUtils.add_suffix("IMG_0001.jpg", None)) self.assertEquals("IMG_00012.jpg", FileUtils.add_suffix("IMG_0001.jpg", 2))
def test_move_file(self): self.assertFalse( FileUtils.move_file( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/SortTarget/images/", "IMG_0001.jpg")) self.assertFalse( FileUtils.copy_file( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/SortTarget/frog/", "IMG_0001.jpg"))
def test_hash_file(self): self.assertEquals( "84365b010f4d772abbc275d8128bfa26", FileUtils.hash_file( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg")) self.assertEquals( "84365b010f4d772abbc275d8128bfa26", FileUtils.hash_file( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001 copy.jpg")) self.assertEquals("file doesn't exist", FileUtils.hash_file(2)) self.assertEquals("file doesn't exist", FileUtils.hash_file(None))
def test_get_file_type(self): self.assertEquals("jpg", FileUtils.get_file_type("img001.jpg")) self.assertEquals("jpeg", FileUtils.get_file_type("img001.JPeg")) self.assertEquals("jpg", FileUtils.get_file_type("img001.JPG")) self.assertEquals("jpg", FileUtils.get_file_type("img.001.JPG")) self.assertEquals("unrecognized", FileUtils.get_file_type("img001")) self.assertEquals( "thisisareallylongfileextension", FileUtils.get_file_type("img001.thisisareallylongfileextension")) self.assertEquals("unrecognized", FileUtils.get_file_type("")) self.assertEquals("error", FileUtils.get_file_type(None)) self.assertEquals("error", FileUtils.get_file_type(2))
def test_is_file_dup(self): self.assertTrue( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001 copy.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0002.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", 2)) self.assertFalse( FileUtils.is_file_dup( None, "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg")) self.assertFalse( FileUtils.is_file_dup( "", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0718.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0730.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/images/IMG_0001.jpg") )
def sort_file_type(fs_file): # Initialize type as other file_type = OTH_TAG tgt_folder = OTH_DIR # Get the extension f_ext = FileUtils.get_file_type(fs_file.get_filename()) # Check extension and switch to image or video if f_ext in IMG_TYPES: file_type = IMG_TAG tgt_folder = IMG_DIR elif f_ext in VID_TYPES: file_type = VID_TAG tgt_folder = VID_DIR return NavUtil.tag_file(fs_file, file_type, tgt_folder)
def walk_dir(dir_to_walk, tgt_dir): # Print time started ts = time.time() start_time = datetime.datetime.fromtimestamp(ts).strftime( '%Y-%m-%d %H:%M:%S') print("Walk Dir started at {0}:".format(start_time)) # Build empty container for the files all_files = [] if os.path.isdir(dir_to_walk): # Walk the directory and count the files for root, dirs, files in os.walk(dir_to_walk): for file in files: # Step 1: Build the file object with name and location fs_file = FSfile() fs_file.set_filename(file) fs_file.set_src_dir(root + SEPARATOR) fs_file.set_tgt_dir(tgt_dir + SEPARATOR) # Step 2: Add metadata of size and date to object fs_file.set_size( FileUtils.get_file_size(fs_file.get_full_path())) fs_file.set_date_taken( ImageUtils.get_original_date(fs_file.get_full_path())) # Step 3: Determine file type and tag current_file = NavUtil.sort_file_type(fs_file) # Step 4: Add file to collection all_files.append(current_file) # Print time ended ts2 = time.time() end_time = datetime.datetime.fromtimestamp(ts2).strftime( '%Y-%m-%d %H:%M:%S') print("Walk Dir completed at {0} with {1} files collected.".format( end_time, len(all_files))) return all_files
def mark_duplicates(all_files): """ :rtype: tuple[all_files, duplicates] """ # Print start time ts = time.time() start_time = datetime.datetime.fromtimestamp(ts).strftime( '%Y-%m-%d %H:%M:%S') print("Mark Duplicates: " + start_time) duplicates = [] dedupped_file_set = [] # Make a copy to test all_files_test = all_files.copy() for x in all_files: for y in all_files_test: x_size = x.get_size() y_size = y.get_size() if x != y and x_size == y_size: if FileUtils.is_file_dup(x.get_full_path(), y.get_full_path()): duplicates.append(y) all_files.remove(y) all_files_test.remove(y) dedupped_file_set += all_files # Print end time ts2 = time.time() end_time = datetime.datetime.fromtimestamp(ts2).strftime( '%Y-%m-%d %H:%M:%S') print( "Mark Duplicates completed at {0} with {1} files collected".format( end_time, len(dedupped_file_set))) return dedupped_file_set, duplicates
# Open file log with write privileges files_log = open(FILES_BATCH, "w") log_file = open(LOG_FILE, "w") start_time = datetime.datetime.now() log_file.write("Correct Date started at {0} \n".format(start_time)) for root, dirs, files in os.walk(STR_DIR1): for file in files: full_filename = root + os.sep + file if "." is file[0]: print("Moving..." + full_filename) FileUtils.move_file(full_filename, r"/Volumes/MyBook2TB/Backups/Trash/", file) else: date_taken = ImageUtils.get_dt_from_name(file)[0] orig_datetime = ImageUtils.get_original_date(full_filename)[0] if orig_datetime is not None: orig_date = orig_datetime.split(" ")[0] else: orig_date = "0000-00-00" new_file = file # TODO Figure out if there is a need to reconcile date_taken and orig_date if "0000" in orig_date: if file[0] is not ".":
def test_does_file_exist(self): self.assertFalse(FileUtils.does_file_exist("fakefile", " ")) self.assertFalse(FileUtils.does_file_exist(2, " ")) self.assertTrue( FileUtils.does_file_exist( "IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/"))
# @author Paul Ottley # @copyright 2017 import os from app import FileUtils from app import ImageUtils INPUT_FILE = r"/Users/paulottley/PycharmProjects/filesorter/test/unsupported_files.txt" LOG_FILE = r"/Users/paulottley/PycharmProjects/filesorter/test/Check_Dates_log.txt" orig_input = open(INPUT_FILE, "r") orig_arr = orig_input.readlines() orig_input.close() currentIndex = 0 for name in orig_arr: orig_name = name.rstrip() orig_path, orig_fn = os.path.split(orig_name) print("Moving %s" % orig_name) FileUtils.copy_file(orig_name, "/Volumes/MyBook2TB/Backups/Library/unsupported/", orig_fn) currentIndex += 1 print("Total files moved: %d" % currentIndex)
count = 0 zero_count = 0 missing_filename_dt = 0 missing_embed_dt = 0 mismatched_dt = 0 dot_file_count = 0 log = open(LOG_FILE, "w") print("Checking for Dot files...") for root, dirs, files in os.walk(STR_DIR1): for file in files: full_filename = root + os.sep + file if "." is file[0]: print("Moving..." + full_filename) FileUtils.move_file(full_filename, r"/Volumes/Elements2TB/Backups/Trash/", file) dot_file_count += 1 print("Number of dot files: {0}".format(dot_file_count)) # for file in all_files1: for root, dirs, files in os.walk(STR_DIR1): for file in files: count += 1 tgt_dir = root + os.sep filename = file full_path = root + os.sep + file orig_date = ImageUtils.get_original_date(full_path) line = ""
def copy_files(all_files): print("Total files to copy {0}".format(len(all_files))) for x in all_files: FileUtils.copy_file(x.get_full_path(), x.get_tgt_dir(), x.get_tgt_filename())
# Update date count = 0 zero_count = 0 for file in all_files1: count += 1 full_filename = file.get_full_path() path, fn = os.path.split(full_filename) date_taken = file.get_date_taken() if "0000" in date_taken: zero_count += 1 year, month, day, dt = ImageUtils.get_dt_captured_split() creation_date_str = "{0}-{1}-{2}".format(year, month, day) new_file = fn.replace("images_", "") new_file = new_file.replace("_.", ".") new_file = new_file.replace("~", "") new_file = new_file.replace("....", ".") new_file = "{0}_{1}".format(creation_date_str, fn) if FileUtils.does_file_exist(new_file, path) is not True: print(path + new_file) os.rename(full_filename, path + new_file) else: print(new_file + " file exists") print("Filename: {0} date: {1}".format(fn, date_taken)) print("Total count: {0} Zero Count: {1}".format(count, zero_count))
def get_original_date(filename, deep=False): date_frm_filename = None date_frm_exif_data = None # Images Only date_frm_atom = None # Video Only date_frm_parser = None # Images or Videos path, fn = os.path.split(filename) file_category = FileUtils.get_file_category(fn) # First, see if there is a date in the filename (Easiest and universal) dt_frm_file, str_dt_frm_file = ImageUtils.get_dt_from_name(fn) if dt_frm_file is not None and str_dt_frm_file is not None: date_frm_filename = dt_frm_file # Next, pull a date from the metadata if os.path.isfile(filename): if file_category == Rules.get_img_dir(): try: # First method uses exif and works mainly for images exif = Image.open(filename)._getexif().items() dt = ImageUtils.get_exif_field(filename, exif, 'DateTimeOriginal') if dt is not "NotFound" and dt is not None: date_frm_exif_data = re.sub(r"\D", "", dt.split(" ")[0]) except Exception as err: if Rules.get_debug() is True: print( "get_original_date(): Metadata extraction error: %s" % err) elif file_category == Rules.get_vid_dir(): # First, try Atom Parser if deep is True and os.path.isfile(filename): dt_from_atom = ImageUtils.get_dt_from_atom_parser(filename) if dt_from_atom is not None and \ "0000" not in dt_from_atom and \ CURRENT_YEAR >= int(dt_from_atom[:4]) > Rules.get_oldest_year(): date_frm_atom = re.sub(r"\D", "", dt_from_atom.split(" ")[0]) elif date_frm_exif_data is None and date_frm_atom is None: dt = ImageUtils.get_dt_from_parser(filename) if dt is not None: date_frm_parser = re.sub(r"\D", "", dt.split(" ")[0]) else: print("Not a file: " + str(filename)) return None, None # Finally, compare the two for the most likely one # Earliest date # After 1970 # Not past the current year date_list = [ date_frm_filename, date_frm_exif_data, date_frm_atom, date_frm_parser ] best_date_taken = ImageUtils.get_earliest_date(date_list) return ImageUtils.get_date_obj( best_date_taken) # Date object and string (YYYY-MM-DD)
if batch is True: batch_input = open(INPUT_FILE, "r") for line in batch_input: line = line.rstrip() path, fn = os.path.split(line) dt = ImageUtils.get_dt_from_name(fn) print(dt) ImageUtils.set_date(line, dt) # FileUtils.move_file(line, path + "/Zeros/", fn) print(ImageUtils.get_dt_from_atom_parser(line)) print(line) batch_input.close() else: for root, dirs, files in os.walk(START_DIR1): for file in files: full_filename = root + os.sep + file if "." is file[0]: print("Moving..." + full_filename) FileUtils.move_file(full_filename, r"/Volumes/MyBook2TB/Backups/Trash/", file) else: dt = ImageUtils.get_dt_from_name(file) print(dt) ImageUtils.set_date(full_filename, dt)
word_year = "" if m_year is not None: word_year = m_year.group().replace("_", "") m_day = re.search(r"_\d{1,2}_", word_date) word_day = "" if m_day is not None: word_day = m_day.group().replace("_", "") if int(word_day) < 10: word_day = "0" + word_day new_fn = fn.replace( "0000-00-00_" + word_date, "{0}-{1}-{2}_".format(word_year, month_num, word_day)) new_fn = new_fn.replace("__", "_") print("File: {0} New: {1}".format(fn, new_fn)) if FileUtils.does_file_exist(new_fn, path): print("Files exists! {0}".format(new_fn)) else: os.rename(line, path + os.sep + new_fn) """ for root, dirs, files in os.walk(START_DIR): for file in files: new_file = file.replace(",", "") if file != new_file: print("File: {0} New File: {1}".format(file, new_file)) os.rename(root + os.sep + file, root + os.sep + new_file) """ batch_input.close()
# Check date of original file # Parse date # Find mp4 file of same name # Update date for file in all_files1: print("Before Filename: {0} date: {1} filename parser date: {2}".format( file.get_filename(), file.get_date_taken(), ImageUtils.get_dt_from_parser(file.get_full_path()))) file_type = FileUtils.get_file_type(file.get_filename()) date = ImageUtils.get_dt_from_name(file.get_filename()) """ if file_type != "mp4" and file.get_type() != Rules.get_oth_tag(): full_path = file.get_full_path() mp4_full_path = full_path.replace("." + file_type, ".mp4") # mp4_full_path = full_path.replace("." + file_type.swapcase(), ".mp4") print("Date: {0} Filename: {1}".format(date, mp4_full_path)) """ full_filename = file.get_full_path() pdate = ImageUtils.get_dt_from_parser(file.get_full_path()) dt = ImageUtils.get_dt_captured_split(date) if int(pdate[:4]) != int(date[:4]): ImageUtils.set_date(file.get_full_path(), date, dt.year, dt.month, dt.day) # print("Moving..." + full_filename) # FileUtils.move_file(full_filename, r"/Volumes/Elements2TB/Backups/Library/Date_Error/", file.get_filename())
from app import FileUtils from app import ImageUtils from app import Rules # STR_DIR = r"/Users/paulottley/Desktop/SortSource" STR_DIR = r"/Volumes/Elements2TB/Backups/Pictures/images" TGT_DIR = r"/Volumes/MyBook2TB/Backups/SortTarget" for root, dirs, files in os.walk(STR_DIR): for file in files: target_dir = root tgt_folder = FileUtils.get_file_category(file) try: dt, str_dt = ImageUtils.get_dt_from_name(file) if dt is None: print("Zero or None: " + file) target_dir = "{0}{1}{2}{3}".format( root, os.sep, "no_date", os.sep) else: target_dir = "{0}{1}{2}{3}{4}{5}".format( root + os.sep, tgt_folder + os.sep, dt.year,
# @author Paul Ottley # @copyright 2017 # File used to start the FilesSorter launch import os from app import FileUtils STR_DIR3 = r"/Volumes/MyBook2TB/Backups/Library" STR_DIR4 = r"/Users/paulottley/Google Drive/MomsDadsPhotos" TGT_DIR1 = r"/Users/paulottley/Desktop/SortTarget" for root, dirs, files in os.walk(STR_DIR3): for file in files: ext = FileUtils.get_file_type(file) FileUtils.move_file(root + os.sep + file, STR_DIR3 + os.sep + ext + os.sep, file) # Step 1 Walk the directory # Step 2 Determine filetype by extension # Step 3 Move to appropriate folder
STR_DIR3 = r"/Volumes/Elements2TB/Backups/Pictures/images" # STR_DIR3 = r"/Volumes/MyBook2TB/Backups/SortTarget" TGT_DIR1 = r"/Volumes/MyBook2TB/Backups/Library/videos/Cleanup" for root, dirs, files in os.walk(STR_DIR3): for file in files: if "...." in file: new_file = file.replace("...", "") print(root + os.sep + new_file) os.rename(root + os.sep + file, root + os.sep + new_file) if "---" in file: new_file = file.replace("---", "-") print(root + os.sep + new_file) os.rename(root + os.sep + file, root + os.sep + new_file) if "0000-00-00" in file: FileUtils.move_file(file, root + os.sep + "zeros" + os.sep, file) """ new_file = file.replace("0000-00-00", "0000-00-00") if FileUtils.does_file_exist(new_file, root + os.sep) is not True: print(root + os.sep + new_file) os.rename(root + os.sep + file, root + os.sep + new_file) else: print(new_file + "file exists") m = re.search(r"20\d{2}-\d{2}-\d{1}\D", file) # Pull full date from name if m is not None: print(file + " file exists") if " " in file: new_file = file.replace(" ", "")