def files_generator(basepath, options): """a generator which yields all files (as file_dicts) including volume, superfolder(s), basepath, and then all subfiles (subject to depth_limit and enumerator options). """ GPR.print_it25("files_generator", basepath, 2) superfolders_list = [] basepath_url = NSURL.fileURLWithPath_(basepath) # begin loop going upwards url = NSURL.fileURLWithPath_(basepath) while True: d1 = GetURLValues(url, enumeratorURLKeys) superfolders_list.insert(0,d1) if d1[NSURLIsVolumeKey]: break # go "upwards" one level (towards volume) url = url.URLByDeletingLastPathComponent() GPR.print_superfolders_list("volume, superfolder(s)", superfolders_list, 4) # now go back down, yielding dict objects at each step: n = len(superfolders_list) for i, superfolder_dict in enumerate(superfolders_list): superfolder_dict['depth'] = i+1-n yield superfolder_dict # last dict in superfolder list is the basepath_dict basepath_dict = superfolder_dict item_is_package = is_item_a_package(basepath_url) if basepath_dict[NSURLIsDirectoryKey] and item_is_package and not options.scan_packages: GPR.print_it("\nbasepath is a directory and a package but we're not scanning packages.\n", 1) return # we've yielded basepath above, don't enumerate if basepath is not a directory (or package and we want packages) if basepath_dict[NSURLIsDirectoryKey]: enumeratorOptionKeys = 0L if not options.scan_packages: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsPackageDescendants if not options.scan_hidden_files: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsHiddenFiles enumerator = sharedFM.enumeratorAtURL_includingPropertiesForKeys_options_errorHandler_( basepath_url, enumeratorURLKeys, enumeratorOptionKeys, error_handler_for_enumerator ) for url in enumerator: item_dict = GetURLValues(url, enumeratorURLKeys) depth = enumerator.level() item_dict['depth'] = depth if options.depth_limit and (depth >= options.depth_limit-1): enumerator.skipDescendants() yield item_dict GPR.print_it2("end files_generator", basepath, verbose_level_threshold=3)
def do_fs_basepath(cnx, basepath, slist, vol_id, item_tally=defaultdict(list), force_folder_scan=False, scan_hidden_files=False, depth_limit=4, scan_packages=False): """do_fs_basepath is a generator yielding an ordered sequence of (status, dictionary) pairs first yield the sequence of directories above the basepath, from top down. could be empty. then yield basepath, then enumerate contents of basepath if it is a directory or package and we want to scan packages """ n = len(slist) for i, superfolder_dict in enumerate(slist[:-1]): # last one is basepath superfolder_dict['vol_id'] = vol_id superfolder_dict['depth'] = i+1-n yield superfolder_dict # for url in enumerator2: basepath_url = NSURL.fileURLWithPath_(basepath) item_dict = slist[-1] depth = 0 item_dict['vol_id'] = vol_id item_dict['depth'] = depth # see if our current item is (effectively) a directory. check/query database if it is. item_is_package = is_item_a_package(basepath_url) if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages): file_exists = db_file_exists(cnx, item_dict, vol_id) item_dict['directory_is_up_to_date'] = not ((not file_exists) or force_folder_scan) if (not file_exists) or force_folder_scan: folder_id = item_dict['NSFileSystemFileNumber'] db_query_folder(cnx, vol_id, item_dict, depth) folder_file_id = item_dict['NSFileSystemFileNumber'] stak.append((depth, folder_file_id)) yield item_dict # fall-through to do enumeration else: if item_dict[NSURLIsDirectoryKey] and item_is_package and not scan_packages: GPR.print_it("\nbasepath is a directory and a package but we're not scanning packages.\n", 2) yield item_dict return # fall-through to do enumeration. # do enumeration if we are a directory or we-are-a-package-and-we-want-package enumeratorOptionKeys = 0L if not scan_packages: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsPackageDescendants if not scan_hidden_files: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsHiddenFiles enumerator2 = sharedFM.enumeratorAtURL_includingPropertiesForKeys_options_errorHandler_( basepath_url, enumeratorURLKeys, enumeratorOptionKeys, error_handler_for_enumerator ) for url in enumerator2: item_dict = GetURLValues(url, enumeratorURLKeys) depth = enumerator2.level() item_dict['vol_id'] = vol_id item_dict['depth'] = depth while len(stak) > depth: stak.pop() # see if our current item is (effectively) a directory. check/query database if it is. item_is_package = is_item_a_package(url) if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages): file_exists = db_file_exists(cnx, item_dict, vol_id) item_dict['directory_is_up_to_date'] = not ((not file_exists) or force_folder_scan) if (not file_exists) or force_folder_scan: folder_id = item_dict['NSFileSystemFileNumber'] db_query_folder(cnx, vol_id, item_dict, depth) # (1) in addition to checking database, also need to add new files to RS2_ins[ (depth-1, folder_id) ] += rs # (2) any completely new directories (ie, not just update of existing directory) won't have # any database contents to check. (this is a lesser optimization?) folder_file_id = item_dict['NSFileSystemFileNumber'] stak.append((depth, folder_file_id)) # see if our current item's folder ID is in our list of (new of forced) folders to be tracked. folder_id = item_dict['NSFileSystemFolderNumber'] item_dict['current_item_directory_is_being_checked'] = (depth-1, folder_id) in RS1_db_rels if (depth-1, folder_id) in RS1_db_rels: file_id = item_dict['NSFileSystemFileNumber'] filename = item_dict[NSURLNameKey] file_mod_date = item_dict[NSURLContentModificationDateKey] s = str(file_mod_date) file_mod_date = s[:-len(" +0000")] rs = ( vol_id, folder_id, filename, file_id, file_mod_date) # if the current item is present in RS1 then it is no longer a "file to be deleted" # if in filesystem but not in database then it is a "file to be inserted" try: RS1_db_rels[ (depth-1, folder_id) ] -= rs except KeyError: RS2_ins[ (depth-1, folder_id) ] += rs yield item_dict # end enumerator return