def comp_file(file_name,file1,file2,level,shift=0): print(sp_str*shift,"comparing "+file_name) f1 = abs_path(file1) f2 = abs_path(file2) ret = comparer(f1,f2,level,shift+1) if(ret!=-1): if(level==0): print(sp_str*shift,"files differ at ",ret)
def comp_folder(folder_name,folder1,folder2,level,shift=0): print(Colour.GREEN + sp_str*shift,"comparing folder "+folder_name + Colour.END) items1 = os.listdir(abs_path(folder1)); items2 = os.listdir(abs_path(folder2)); files,folders = make_same(items1,items2,folder1,folder2,shift+2) for item in files:#files comp_file(item,folder1+"/"+item,folder2+"/"+item,level,shift+2) if(level==4):#yet to be implemented ... still buggy for item in folders: comp_folder(item,folder1+"/"+item,folder2+"/"+item,level,shift+1)
def find_directory(dir_path, filedic, last_modified_dic, children_dic): if debug: print("FOLDER processing : " + "\t" + dir_path) check_folder = should_check_files_or_folder(dir_path, last_modified_dic, filedic) if check_folder: check_all_children_exists(dir_path, filedic, last_modified_dic, children_dic) for fname in os.listdir(dir_path): _path = os.path.join(dir_path, fname) path = abs_path(_path) if os.path.isdir(path): # folder find_directory(path, filedic, last_modified_dic, children_dic) write_in_children_dic(dir_path, path, children_dic) else: # is a file if not check_folder: continue check_file = should_check_files_or_folder(path, last_modified_dic, filedic) if not check_file: continue write_in_children_dic(dir_path, path, children_dic) key = findCheckSumMD5(path) if key in filedic: filedic[key].append(path) else: filedic[key] = [path] continue
def extract_data(): fille = abs_path(users_data) verify_file(fille) data = read_file(fille) if(not 'user_ids' in data.keys()): create_file(fille) data = read_file(fille) return data
def finder(path): path = abs_path(path) if not os.path.exists(path): print("please enter a valid path") sys.exit(1) if os.path.isfile(path): locate_file_duplicates(path) if os.path.isdir(path): process_dir(path)
def extract_data(): fille = abs_path('~/.CheemaFy/bookmarks.json') try: jfile = open(fille) except FileNotFoundError: create_file(fille) jfile = open(fille) data = json.load(jfile) if (not 'paths' in data.keys()): create_file(fille) jfile = open(fille) data = json.load(jfile) return data
def bookmark_read(): data = extract_data() paths = data['paths'] # print(json.dumps(paths,indent=4)) mapp = print_indexed(paths) print("enter number of path : ", end='') index = int(input()) if (index <= 0 or index >= len(mapp)): print("invalid index") return command = abs_path(mapp[index]) print() print(command) return command
def make_same(items1,items2,folder1,folder2,shift): i,j = 0,0 items1 = reactify_list(items1) items2 = reactify_list(items2) notin1 = [] notin2 = [] inboth = [] while(i<len(items1) and j<len(items2)): if(items1[i]<items2[j]): notin2.append(items1[i]) i+=1 elif(items1[i]>items2[j]): notin1.append(items2[j]) j+=1 elif(items1[i]==items2[j]): inboth.append(items1[i]) i = i+1; j = j+1; while(i<len(items1)): notin2.append(items1[i]) i+=1 while(j<len(items2)): notin1.append(items2[j]) j+=1 if(len(notin1) > 0 or len(notin2)>0): print(sp_str*shift,"files not in pc : ",end=" ") for item in notin1: print(item,end=" ") print() print(sp_str*shift,"files not in CheemaFy : ",end=" ") for item in notin2: print(item,end=" ") print() print() folders = [] files = [] for item in inboth: if(os.path.isdir(abs_path(folder1+"/"+item))): folders.append(item) else: files.append(item) return files,folders
from abs_path import abs_path import hashlib path = "~/PycharmProjects/duplicate_finder/venv/bin/scripts/last_modified.json" cache_file_path = "/home/choudhary/PycharmProjects/duplicate_finder/venv/bin/scripts/cache.json" children_json_path = "/home/choudhary/PycharmProjects/duplicate_finder/venv/bin/scripts/children.json" last_modified_json_path = abs_path(path) def findCheckSumMD5(fname): BLOCKSIZE = 65536 hasher = hashlib.md5() with open(fname, 'rb') as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest() time_template = {} children_template = {}