def too_many_pkgs_found(config, parms, found_pkgs, pkg_results): """ This auto resolves some very bizzare edgecases I have run into. """ fname = 'error.too_many_pkgs_found(' + parms['type'] + parms['id'] + ')' paf.write_to_log(fname, 'Starting Debug Proccess...', config['log']) found_files = utils.trim_pkg_list(paf.basenames(found_pkgs)) search_files = paf.basenames(pkg_results['search']) bad_files = (found_files - search_files) paf.write_to_log( fname, 'Debug Proccess Found ' + str(len(bad_files)) + ' Files That Do Not Belong!', config['log']) if len(found_files) - len(search_files) == len(bad_files): paf.write_to_log(fname, 'Cleaning Found Files...', config['log']) bad_files_full = set() for b in bad_files: for f in found_pkgs: if re.search(b, f): bad_files_full.add(f) for f in bad_files_full: found_pkgs.remove(f) paf.write_to_log(fname, 'Debug Process Was Able to Fix All Issues!', config['log']) return (True, found_pkgs) else: paf.write_to_log(fname, 'Debug Process Was NOT Able to Fix All Issues!', config['log']) return (False, found_pkgs)
def trim_pkg_list(pkg_list): ''' Removes prefix dir and x86_64.pkg.tar.zsd suffix. This seems to be the fastest way too reduce all file paths to a unique list of package versions present on the system. ''' return {'-'.join(pkg.split('-')[:-1]) for pkg in paf.basenames(pkg_list)}
def fresh_install(lang, uc, config): if uc[lang + '_path'] == '/path/here': paf.prWarning('Your Config File Has Not Been Setup for the ' + lang.upper() + ' Stream!') sys.exit('Edit the File ' + config['user_config'] + ' and Re-Run Your Command!') if not os.path.exists(uc[lang + '_path']): os.makedirs(uc[lang + '_path']) paf.prWarning('Scanning File System...') files = paf.basenames(paf.find_files(uc[lang + '_path'])) files = {"http://data.gdeltproject.org/gdeltv2/" + f for f in files} paf.export_iterable(config['base'] + '/prev-' + lang + '.txt', files) paf.export_iterable(config['base'] + '/404-' + lang + '.txt', [])
def fetch(url_list, storage_path): fzf_new = set() folders = set() for f in paf.basenames(url_list): if f: folders.add(str('/' + f[:4] + '/' + f[4:6])) for x in folders: if not os.path.exists(storage_path + x): os.makedirs(storage_path + x) for url in paf.progress_bar(url_list, 'Downloading ' + str(len(url_list)) + ' Files'): try: f = requests.get(url) except Exception: fzf_new.add(url) continue fname = paf.basename(url) folder = str('/' + fname[:4] + '/' + fname[4:6] + '/') with open(storage_path + folder + fname, 'wb') as csv: csv.write(f.content) return fzf_new
def scan_caches(config): ''' Always returns a unique list of pkgs found on the file sys. When searching through rp directories, many 'duplicate' hardlinked files exist. This logic ensures that the list of packages returned is actually unique. ''' fname = 'utils.scan_caches()' paf.write_to_log(fname, 'Started Scaning Directories for Packages...', config['log']) # Searches Known Package Cache Locations pkg_paths = find_pkgs_in_dir(find_cache_paths(config)) unique_pkgs = list(paf.basenames(pkg_paths)) paf.write_to_log(fname, 'Searched ALL Package Cache Locations', config['log']) # Branch If Filter Is Needed if len(pkg_paths) != len(unique_pkgs): # Find Unique Packages By Inode Number inodes = set() inode_filter = set() for x in pkg_paths: i = os.lstat(x)[stat.ST_INO] if i in inodes: pass else: inode_filter.add(x) inodes.add(i) paf.write_to_log( fname, 'Found ' + str(len(inode_filter)) + ' Package Inode\'s!', config['log']) if len(inode_filter) != len(unique_pkgs): # THIS SHOULD BASICALLY NEVER RUN paf.write_to_log( fname, 'File System Contains None-Hardlinked Duplicate Packages!', config['log']) paf.write_to_log(fname, 'Attempting to Filter Packages With Regex...', config['log']) thread_cap = 4 # This Chunks the List of unique_pkgs Into Peices chunk_size = int( round(len(unique_pkgs) / paf.max_threads(thread_cap), 0)) + 1 chunks = [ unique_pkgs[i:i + chunk_size] for i in range(0, len(unique_pkgs), chunk_size) ] # Creates Pool of Threads to Filter Based on File Name with mp.Pool(processes=paf.max_threads(thread_cap)) as pool: filter_fs = pool.starmap( first_pkg_path, zip(chunks, itertools.repeat(inode_filter))) filter_fs = set(itertools.chain(*filter_fs)) else: filter_fs = inode_filter paf.write_to_log( fname, 'Returned ' + str(len(filter_fs)) + ' Unique Cache Packages', config['log']) return filter_fs else: paf.write_to_log( fname, 'Returned ' + str(len(pkg_paths)) + ' Cached Packages', config['log']) return pkg_paths