Пример #1
0
def too_many_pkgs_found(config, parms, found_pkgs, pkg_results):
    """
    This auto resolves some very bizzare edgecases I have run into.
    """
    fname = 'error.too_many_pkgs_found(' + parms['type'] + parms['id'] + ')'
    paf.write_to_log(fname, 'Starting Debug Proccess...', config['log'])

    found_files = utils.trim_pkg_list(paf.basenames(found_pkgs))
    search_files = paf.basenames(pkg_results['search'])
    bad_files = (found_files - search_files)
    paf.write_to_log(
        fname, 'Debug Proccess Found ' + str(len(bad_files)) +
        ' Files That Do Not Belong!', config['log'])

    if len(found_files) - len(search_files) == len(bad_files):
        paf.write_to_log(fname, 'Cleaning Found Files...', config['log'])
        bad_files_full = set()

        for b in bad_files:
            for f in found_pkgs:
                if re.search(b, f):
                    bad_files_full.add(f)

        for f in bad_files_full:
            found_pkgs.remove(f)

        paf.write_to_log(fname, 'Debug Process Was Able to Fix All Issues!',
                         config['log'])
        return (True, found_pkgs)

    else:
        paf.write_to_log(fname,
                         'Debug Process Was NOT Able to Fix All Issues!',
                         config['log'])
        return (False, found_pkgs)
Пример #2
0
def trim_pkg_list(pkg_list):
    '''
    Removes prefix dir and x86_64.pkg.tar.zsd suffix.
    This seems to be the fastest way too reduce all file paths to a unique
    list of package versions present on the system.
    '''
    return {'-'.join(pkg.split('-')[:-1]) for pkg in paf.basenames(pkg_list)}
Пример #3
0
def fresh_install(lang, uc, config):
    if uc[lang + '_path'] == '/path/here':
        paf.prWarning('Your Config File Has Not Been Setup for the ' + lang.upper() + ' Stream!')
        sys.exit('Edit the File ' + config['user_config'] + ' and Re-Run Your Command!')

    if not os.path.exists(uc[lang + '_path']):
        os.makedirs(uc[lang + '_path'])

    paf.prWarning('Scanning File System...')
    files = paf.basenames(paf.find_files(uc[lang + '_path']))
    files = {"http://data.gdeltproject.org/gdeltv2/" + f for f in files}
    paf.export_iterable(config['base'] + '/prev-' + lang + '.txt', files)
    paf.export_iterable(config['base'] + '/404-' + lang + '.txt', [])
Пример #4
0
def fetch(url_list, storage_path):
    fzf_new = set()
    folders = set()
    for f in paf.basenames(url_list):
        if f:
            folders.add(str('/' + f[:4] + '/' + f[4:6]))

    for x in folders:
        if not os.path.exists(storage_path + x):
            os.makedirs(storage_path + x)

    for url in paf.progress_bar(url_list, 'Downloading ' + str(len(url_list)) + ' Files'):
        try:
            f = requests.get(url)
        except Exception:
            fzf_new.add(url)
            continue

        fname = paf.basename(url)
        folder = str('/' + fname[:4] + '/' + fname[4:6] + '/')
        with open(storage_path + folder + fname, 'wb') as csv:
            csv.write(f.content)

    return fzf_new
Пример #5
0
def scan_caches(config):
    '''
    Always returns a unique list of pkgs found on the file sys.
    When searching through rp directories, many 'duplicate' hardlinked files exist.
    This logic ensures that the list of packages returned is actually unique.
    '''
    fname = 'utils.scan_caches()'
    paf.write_to_log(fname, 'Started Scaning Directories for Packages...',
                     config['log'])

    # Searches Known Package Cache Locations
    pkg_paths = find_pkgs_in_dir(find_cache_paths(config))
    unique_pkgs = list(paf.basenames(pkg_paths))
    paf.write_to_log(fname, 'Searched ALL Package Cache Locations',
                     config['log'])

    # Branch If Filter Is Needed
    if len(pkg_paths) != len(unique_pkgs):
        # Find Unique Packages By Inode Number
        inodes = set()
        inode_filter = set()

        for x in pkg_paths:
            i = os.lstat(x)[stat.ST_INO]
            if i in inodes:
                pass
            else:
                inode_filter.add(x)
                inodes.add(i)

        paf.write_to_log(
            fname, 'Found ' + str(len(inode_filter)) + ' Package Inode\'s!',
            config['log'])

        if len(inode_filter) != len(unique_pkgs):
            # THIS SHOULD BASICALLY NEVER RUN
            paf.write_to_log(
                fname,
                'File System Contains None-Hardlinked Duplicate Packages!',
                config['log'])
            paf.write_to_log(fname,
                             'Attempting to Filter Packages With Regex...',
                             config['log'])
            thread_cap = 4

            # This Chunks the List of unique_pkgs Into Peices
            chunk_size = int(
                round(len(unique_pkgs) / paf.max_threads(thread_cap), 0)) + 1
            chunks = [
                unique_pkgs[i:i + chunk_size]
                for i in range(0, len(unique_pkgs), chunk_size)
            ]

            # Creates Pool of Threads to Filter Based on File Name
            with mp.Pool(processes=paf.max_threads(thread_cap)) as pool:
                filter_fs = pool.starmap(
                    first_pkg_path, zip(chunks,
                                        itertools.repeat(inode_filter)))
                filter_fs = set(itertools.chain(*filter_fs))

        else:
            filter_fs = inode_filter

        paf.write_to_log(
            fname,
            'Returned ' + str(len(filter_fs)) + ' Unique Cache Packages',
            config['log'])
        return filter_fs

    else:
        paf.write_to_log(
            fname, 'Returned ' + str(len(pkg_paths)) + ' Cached Packages',
            config['log'])
        return pkg_paths