def mywalk(top, skipdirs=['.snapshot',]): """ returns subset of os.walk """ for root, dirs, files in walk(top,topdown=True,onerror=walkerr): for skipdir in skipdirs: if skipdir in dirs: dirs.remove(skipdir) # don't visit this directory yield root, dirs, files
def parse_dir(dir): ignores = Parser.load_ignores(dir) ignores.extend([".svn", ".hg", ".git"]) def callback(res): dependencies.extend(res) def is_ignored(res, is_dir=False): if is_dir: res = res + "/" for i in ignores: if fnmatch.fnmatch(res, i) or res.startswith(i): return True return False def find_ignored(reslist, is_dir=False): return [res for res in reslist if is_ignored(res, is_dir)] pool = ThreadPool(processes=Parser.concurrency) dependencies = [] for root, dirs, files in scandir.walk(dir): for d in find_ignored(dirs, True): logging.debug("%s is blacklisted" % d) dirs.remove(d) for f in find_ignored(files): logging.debug("%s is blacklisted" % d) files.remove(f) for name in files: pool.apply_async(Parser.parse_file, args = (os.path.join(root, name),), callback = callback) pool.close() pool.join() return dependencies
def download_gallery(self, gallery_url, download_path, options, status, root=False): """ Download an complete gallery, calls download_gallery_images for the actual image download. This creates the folder structure, and walks through it calling download_gallery_images to download the images. """ current_webpage = common.fetch_webpage(session=self.session, url=gallery_url, timeout=45) soup = BeautifulSoup(current_webpage) # # Grab the main web page from the URL to fetch # # Search for folders folder_list = self.search_for_folders(soup_bowl=soup) for (subgallery_name, subgallery_url) in folder_list: # # Process the folder list, and download # the images for the subfolders # if options.downloadlimit > 0 and \ status.return_downloads() >= options.downloadlimit: print "X", return status if subgallery_name != None: subgallery_dl_path = download_path + os.sep +\ common.clean_filename(subgallery_name) + os.sep if subgallery_url != gallery_url: # # Clubs typically have the featured gallery which points to # itself and can cause a recursion loop # status = self.download_gallery(subgallery_url, subgallery_dl_path, options, status, root=False) time.sleep(1) gallery_name = soup.title.text gallery_name = gallery_name[0:gallery_name.find(" by ")].strip() if root: for root, dirnames, filenames in scandir.walk(download_path): for filename in filenames: self.root_checker[filename.lower().strip()] = True status = self.download_gallery_images(gallery_url, download_path, options, status, root=root) return status
def source_directory_path(self): """ Path to the directory where source strings are stored. Paths are identified using a scoring system; more likely directory names get higher scores, as do directories with formats that only used for source strings. """ # If source repository explicitly marked source_repository = self.db_project.source_repository if source_repository.source_repo: return source_repository.checkout_path possible_sources = [] for root, dirnames, filenames in scandir.walk(self.checkout_path): for dirname in dirnames: if dirname in self.SOURCE_DIR_NAMES: score = self.SOURCE_DIR_SCORES[dirname] # Ensure the matched directory contains resources. directory_path = os.path.join(root, dirname) if directory_contains_resources(directory_path): # Extra points for source resources! if directory_contains_resources(directory_path, source_only=True): score += 3 possible_sources.append((directory_path, score)) if possible_sources: return max(possible_sources, key=lambda s: s[1])[0] else: raise MissingSourceDirectoryError( 'No source directory found for project {0}'.format(self.db_project.slug) )
def imgDirectoryProcessing(path): global workerPool, workerOutput workerPool = Pool() workerOutput = [] options.imgIndex = {} options.imgPurgeIndex = [] work = [] pagenumber = 0 for (dirpath, dirnames, filenames) in walk(path): for afile in filenames: pagenumber += 1 work.append([afile, dirpath, options]) if GUI: GUI.progressBarTick.emit(str(pagenumber)) if len(work) > 0: for i in work: workerPool.apply_async(func=imgFileProcessing, args=(i, ), callback=imgFileProcessingTick) workerPool.close() workerPool.join() if GUI and not GUI.conversionAlive: rmtree(os.path.join(path, '..', '..'), True) raise UserWarning("Conversion interrupted.") if len(workerOutput) > 0: rmtree(os.path.join(path, '..', '..'), True) raise RuntimeError("One of workers crashed. Cause: " + workerOutput[0][0], workerOutput[0][1]) for file in options.imgPurgeIndex: if os.path.isfile(file): saferRemove(file) else: rmtree(os.path.join(path, '..', '..'), True) raise UserWarning("Source directory is empty.")
def find_mp3s(path): """ - path: directory path containing mp3s, or a text file playlist """ path = os.path.abspath(os.path.expanduser(path)) if os.path.isfile(path): with open(path, 'r') as fp: text = fp.read() results = [mp3 for mp3 in re.split('\r?\n', text) if mp3] elif os.path.isdir(path): results = [] for dirpath, dirnames, filenames in walk(path): files = [f for f in filenames if f.lower().endswith('.mp3')] if not files: continue results.extend([os.path.join(dirpath, f) for f in files]) else: print('{} is not a file or a directory'.format(repr(path))) import ipdb; ipdb.set_trace() return results
def recursive_gallery_check(path): """ Recursively checks a folder for any potential galleries Returns a list of paths for directories and a list of tuples where first index is path to gallery in archive and second index is path to archive. Like this: ["C:path/to/g"] and [("path/to/g/in/a", "C:path/to/a")] """ gallery_dirs = [] gallery_arch = [] for root, subfolders, files in scandir.walk(path): if files: for f in files: if f.endswith(ARCHIVE_FILES): arch_path = os.path.join(root, f) for g in check_archive(arch_path): gallery_arch.append((g, arch_path)) if not subfolders: if not files: continue gallery_probability = len(files) for f in files: if not f.lower().endswith(IMG_FILES): gallery_probability -= 1 if gallery_probability >= (len(files)*0.8): gallery_dirs.append(root) return gallery_dirs, gallery_arch
def list_dirs(d, suffix=None, reverse=False): """A generator that works much like :py:func:`os.listdir`, only recursively (and only returns files, not directories). :param d: The directory to start in :type d: str :param suffix: Only return files with the given suffix :type suffix: str or list :param reverse: Returns result sorted in reverse alphabetic order :param type: :returns: the full path (starting from d) of each matching file :rtype: generator """ try: from scandir import walk except ImportError: from os import walk if isinstance(suffix, str): suffix = [suffix] for (dirpath, dirnames, filenames) in walk(d, topdown=True): dirnames.sort(reverse=reverse, key=split_numalpha) for filename in sorted(filenames, key=split_numalpha, reverse=reverse): fullpath = dirpath + os.sep + filename if suffix: for s in suffix: if fullpath.endswith(s): yield fullpath else: yield fullpath
def extractCBR(self, targetdir): cbrFile = rarfile.RarFile(self.origFileName) cbrFile.extractall(targetdir) for root, dirnames, filenames in walk(targetdir): for filename in filenames: if filename.startswith('__MACOSX') or filename.endswith('.DS_Store') or filename.endswith('humbs.db'): os.remove(os.path.join(root, filename))
def extract_xpi(xpi, path, expand=False, verify=True): """ If expand is given, will look inside the expanded file and find anything in the allow list and try and expand it as well. It will do up to 10 iterations, after that you are on your own. It will replace the expanded file with a directory and the expanded contents. If you have 'foo.jar', that contains 'some-image.jpg', then it will create a folder, foo.jar, with an image inside. """ expand_allow_list = ['.crx', '.jar', '.xpi', '.zip'] tempdir = extract_zip(xpi) all_files = get_all_files(tempdir) if expand: for x in xrange(0, 10): flag = False for root, dirs, files in scandir.walk(tempdir): for name in files: if os.path.splitext(name)[1] in expand_allow_list: src = os.path.join(root, name) if not os.path.isdir(src): dest = extract_zip( src, remove=True, raise_on_failure=False) all_files.extend(get_all_files( dest, strip_prefix=tempdir, prefix=src)) if dest: copy_over(dest, src) flag = True if not flag: break copy_over(tempdir, path) return all_files
def detectCorruption(tmpPath, orgPath): imageNumber = 0 imageSmaller = 0 for root, dirs, files in walk(tmpPath, False): for name in files: if getImageFileName(name) is not None: path = os.path.join(root, name) pathOrg = orgPath + path.split('OEBPS' + os.path.sep + 'Images')[1] if os.path.getsize(path) == 0: rmtree(os.path.join(tmpPath, '..', '..'), True) raise RuntimeError('Image file %s is corrupted.' % pathOrg) try: img = Image.open(path) img.verify() img = Image.open(path) img.load() imageNumber += 1 if options.profileData[1][0] > img.size[0] and options.profileData[1][1] > img.size[1]: imageSmaller += 1 except Exception as err: rmtree(os.path.join(tmpPath, '..', '..'), True) if 'decoder' in str(err) and 'not available' in str(err): raise RuntimeError('Pillow was compiled without JPG and/or PNG decoder.') else: raise RuntimeError('Image file %s is corrupted.' % pathOrg) else: saferRemove(os.path.join(root, name)) if imageSmaller > imageNumber * 0.25 and not options.upscale and not options.stretch: print("WARNING: More than 1/4 of images are smaller than target device resolution. " "Consider enabling stretching or upscaling to improve readability.") if GUI: GUI.addMessage.emit('More than 1/4 of images are smaller than target device resolution.', 'warning', False) GUI.addMessage.emit('Consider enabling stretching or upscaling to improve readability.', 'warning', False) GUI.addMessage.emit('', '', False)
def find_files(location, pattern, ignore_dirs=[], maxdepth=float('inf')): """ Find paths to images on disk matching an given pattern Args: location (str): root directory to search pattern (str): glob style pattern to search for ignore_dirs (iterable): list of directories to ignore from search maxdepth (int): maximum depth to recursively search Returns: list: list of files within location matching pattern """ results = [] if isinstance(ignore_dirs, str): ignore_dirs = list(ignore_dirs) location = os.path.normpath(location) num_sep = location.count(os.path.sep) - 1 for root, dirs, files in walk(location, followlinks=True): if ignore_dirs: dirs[:] = [d for d in dirs if d not in ignore_dirs] depth = root.count(os.path.sep) - num_sep if depth > maxdepth: dirs[:] = [] files[:] = [] for fname in fnmatch.filter(files, pattern): results.append(os.path.abspath(os.path.join(root, fname))) return results
def get_audio_files(location): for path, dirs, files in walk(location): for f in files: if (f.endswith('.m4a') or f.endswith('.mp3') or f.endswith('.flac')) and not f.startswith('.'): print 'Got audio file:', f yield os.path.join(path, f)
def _find_changes(self): """ Walks the filesystem. Identifies noteworthy files -- those that were added, removed, or changed (size, mtime or type). Returns a 3-tuple of sets of HashEntry objects: [0] added files [1] removed files [2] modified files self.entries is not modified; this method only reports changes. """ added = set() modified = set() existing_files = set() for dirpath_str, _, filenames in walk(str(self.path)): dirpath = Path(dirpath_str) for filename in filenames: if filename == DB_FILENAME: continue abs_filename = (dirpath / filename).absolute() if abs_filename in self.entries: entry = self.entries[abs_filename] existing_files.add(entry) st = lstat(str(abs_filename)) if entry != st: modified.add(entry) else: entry = HashEntry(abs_filename) entry.update_attrs() added.add(entry) removed = set(self.entries.values()) - existing_files return added, removed, modified
def process_directory(self, path, recursive=True, timing=True): """ Processes the specified directory, extracting file sizes for each file and adding to a file extension indexed dictionary. :param path: the path to analyse :param recursive: true if processing should include sub-directories :param timing: true if path should be preprocessed to provide guidance on run-time :return: """ # get number of files - have to scan dir once to start with print "\n\rProcessing {0}...".format(path) bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength) # If user wants more accurate timing, preprocess directory to count files if timing: numfiles = self._count_dirs(path, recursive) bar.start(numfiles) # grab file extension and file sizes across all files in the specified directory for root, dirs, files in scandir.walk(path, followlinks=False): # if only processing the top level, remove dirs so os.walk doesn't progress further if not recursive: del dirs[:] for name in files: filename = os.path.join(root, name) fname, fext = os.path.splitext(filename) fext = self._convert_extension(fext.lower()) # lowercase all filenames if os.path.exists(filename): if fext not in self.filestats: self.filestats[fext] = RunningStat() self.filestats[fext].add(os.stat(filename).st_size) bar.update(bar.value+1) bar.finish()
def iter_files(root, exts=None, recursive=False): """ Iterate over file paths within root filtered by specified extensions. :param str root: Root folder to start collecting files :param iterable exts: Restrict results to given file extensions :param bool recursive: Wether to walk the complete directory tree :rtype collections.Iterable[str]: absolute file paths with given extensions """ if exts is not None: exts = set((x.lower() for x in exts)) def matches(e): return (exts is None) or (e in exts) if recursive is False: for entry in scandir(root): if has_scandir: ext = splitext(entry.name)[-1].lstrip('.').lower() if entry.is_file() and matches(ext): yield entry.path else: ext = splitext(entry)[-1].lstrip('.').lower() if not isdir(entry) and matches(ext): yield join(root, entry) else: for root, folders, files in walk(root): for f in files: ext = splitext(f)[-1].lstrip('.').lower() if matches(ext): yield join(root, f)
def find_file(image): matches = [] for root, dirnames, filenames in scandir.walk(assetDir): for filename in fnmatch.filter(filenames, image): matches.append(os.path.join(root, filename)) return matches
def locale_directory_path(checkout_path, locale_code, parent_directories=None): """ Path to the directory where strings for the given locale are stored. """ possible_paths = [] # Check paths that use underscore as locale/country code separator locale_code_variants = [locale_code, locale_code.replace('-', '_')] # Optimization for directories with a lot of paths: if parent_directories # is provided, we simply join it with locale_code and check if path exists for parent_directory in parent_directories: for locale in locale_code_variants: candidate = os.path.join(parent_directory, locale) if os.path.exists(candidate): possible_paths.append(candidate) if not possible_paths: for root, dirnames, filenames in scandir.walk(checkout_path): for locale in locale_code_variants: if locale in dirnames: possible_paths.append(os.path.join(root, locale)) for possible_path in possible_paths: if directory_contains_resources(possible_path): return possible_path # If locale directory empty (asymmetric formats) if possible_paths: return possible_paths[0] raise IOError('Directory for locale `{0}` not found'.format( locale_code or 'source'))
def getDirectorySize(start_path='.'): total_size = 0 for dirpath, dirnames, filenames in walk(start_path): for f in filenames: fp = os.path.join(dirpath, f) total_size += os.path.getsize(fp) return total_size
def scan_folder(self, path): (pathS, directoriesS, filesS) = ((), (), ()) try: for (pathS, directoriesS, filesS) in myScandir.walk(path): break return (pathS, directoriesS, filesS) except os.error: log("Path", path, "is not accessible.")
def get_file_names(): filenames = [] for folder, _, files in walk(icloudpath): for filename in files: filenames.append((folder + '/' + filename)[len(icloudpath) + 1:]) return filenames
def find_files(self): found_files = [] for base_folder, folders, files in scandir.walk(self.path): for f in files: ext = os.path.splitext(f)[-1].lower() if ext in self.IMAGE_EXTS: found_files.append(os.path.join(base_folder, f)) break return found_files
def list(self): self.log("info", "List of available RAT modules:") for folder, folders, files in walk(os.path.join(VIPER_ROOT, "modules/rats/")): for file_name in files: if not file_name.endswith(".py") or file_name.startswith("__init__"): continue self.log("item", os.path.join(folder, file_name))
def scan_files(path): """ Recursively scan a directory to find all files with the given extension. """ ext = tuple(MEDIA_EXT) for root, _, files in scandir.walk(path): for f in files: if f.endswith(ext): yield Movie(path=root + '/' + f)
def locale_directory_paths(self): """ A map of locale codes and their absolute directory paths. Create locale directory, if not in repository yet. """ locale_directory_paths = {} parent_directories = set() for locale in self.locales: try: if self.configuration: locale_directory_paths[locale.code] = self.configuration.l10n_base else: locale_directory_paths[locale.code] = locale_directory_path( self.checkout_path, locale.code, parent_directories, ) parent_directory = get_parent_directory(locale_directory_paths[locale.code]) except IOError: if not self.db_project.has_multi_locale_repositories: source_directory = self.source_directory_path parent_directory = get_parent_directory(source_directory) locale_code = locale.code if uses_undercore_as_separator(parent_directory): locale_code = locale_code.replace('-', '_') locale_directory = os.path.join(parent_directory, locale_code) # For asymmetric formats, create empty folder if is_asymmetric_resource(next(self.relative_resource_paths())): os.makedirs(locale_directory) # For other formats, copy resources from source directory else: shutil.copytree(source_directory, locale_directory) for root, dirnames, filenames in scandir.walk(locale_directory): for filename in filenames: path = os.path.join(root, filename) if is_resource(filename): os.rename(path, source_to_locale_path(path)) else: os.remove(path) locale_directory_paths[locale.code] = locale_directory else: raise MissingLocaleDirectoryError( 'Directory for locale `{0}` not found'.format(locale.code) ) parent_directories.add(parent_directory) return locale_directory_paths
def list(self): self.log('info', "List of available RAT modules:") for folder, folders, files in walk(os.path.join(VIPER_ROOT, 'modules/rats/')): for file_name in files: if not file_name.endswith('.py') or file_name.startswith('__init__'): continue self.log('item', os.path.join(folder, file_name))
def walkLevel(some_dir, level=1): some_dir = some_dir.rstrip(os.path.sep) assert os.path.isdir(some_dir) num_sep = some_dir.count(os.path.sep) for root, dirs, files in walk(some_dir): dirs, files = walkSort(dirs, files) yield root, dirs, files num_sep_this = root.count(os.path.sep) if num_sep + level <= num_sep_this: del dirs[:]
def get_media_files(path): ''' Using scandir's optimized walking algorithm, we can discard GNU's `find`. Only catches potential files via filename extension, but we could validate this in the future. ''' for root, dirs, files in walk(path): for filename in files: if filename.endswith(('.m4a', '.mp3', '.ogg', '.oga', '.flac')): logger.debug('Found a potential media file: "%s"' % os.path.join(root, filename)) yield os.path.join(root, filename)
def zip_folder_content(folder, filename): """Compress the _content_ of a folder.""" with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as dest: # Add each file/folder from the folder to the zip file. for root, dirs, files in scandir.walk(folder): relative_dir = os.path.relpath(root, folder) for file_ in files: dest.write(os.path.join(root, file_), # We want the relative paths for the files. arcname=os.path.join(relative_dir, file_))
def discover(dirpath='.', depth=None, treantdepth=None): """Find all Treants within given directory, recursively. Parameters ---------- dirpath : string Directory within which to search for Treants. depth : int Maximum directory depth to tolerate while traversing in search of Treants. ``None`` indicates no depth limit. treantdepth : int Maximum depth of Treants to tolerate while traversing in search of Treants. ``None`` indicates no Treant depth limit. Returns ------- found : Bundle Bundle of found Treants. """ from .collections import Bundle found = list() startdepth = len(dirpath.split(os.sep)) treantdirs = set() for root, dirs, files in scandir.walk(dirpath): # depth check; if too deep, next iteration if depth and len(root.split(os.sep)) - startdepth > depth: continue # Treant depth checking if treantdepth: # remove Treant dirs from our set of them if we've backed out for treantdir in list(treantdirs): if treantdir not in root: treantdirs.remove(treantdir) # actual depth check if len(treantdirs) > treantdepth: continue for treanttype in _TREANTS: outnames = fnmatch.filter(files, "{}.*.json".format(treanttype)) if treantdepth and outnames: treantdirs.add(root) paths = [os.path.join(root, file) for file in outnames] found.extend(paths) return Bundle(found)
def scan_archive_struct_stninfo(self, rootdir): # same as scan archive struct but looks for station info files self.archiveroot = rootdir stninfo = [] path2stninfo = [] for path, dirs, files in scandir.walk(rootdir): for file in files: file_path = os.path.join(path, file) if file.endswith(".info"): # only add valid rinex compressed files stninfo.append(file_path.rsplit(rootdir + '/')[1]) path2stninfo.append(file_path) elif file.endswith('DS_Store') or file.startswith('._'): # delete the stupid mac files file_try_remove(file_path) return stninfo, path2stninfo
def findItems(path, depth=3, **kwargs): """ Find and create items by walking the given path. :type path: str :type depth: int :rtype: collections.Iterable[studiolibrary.LibraryItem] """ path = normPath(path) maxDepth = depth startDepth = path.count(os.path.sep) for root, dirs, files in walk(path, followlinks=True): files.extend(dirs) for filename in files: remove = False # Normalise the path for consistent matching path = os.path.join(root, filename) item = itemFromPath(path, **kwargs) if item: # Yield the item that matches/supports the current path yield item # Stop walking the dir if the item doesn't support nested items if not item.ENABLE_NESTED_ITEMS: remove = True if remove and filename in dirs: dirs.remove(filename) if depth == 1: break # Stop walking the directory if the maximum depth has been reached currentDepth = root.count(os.path.sep) if (currentDepth - startDepth) >= maxDepth: del dirs[:]
def _get_rules(self): # Retrieve the list of rules and populate a list. rules = [] count = 1 # We loop through all rules paths (both in share as well as locally) # and we populate the list of rules. for root in self.rules_paths: for folder, folders, files in walk(root): for file_name in files: # Skip if the extension is not right, could cause problems. if not file_name.endswith( '.yar') and not file_name.endswith('.yara'): continue rules.append([count, os.path.join(folder, file_name)]) count += 1 return rules
def get_changed_files(repo_type, path, revision): """Return a list of changed files for the repository.""" repo = VCSRepository.for_type(repo_type, path) log.info("Retrieving changed files for: {}:{}".format(path, revision)) # If there's no latest revision we should return all the files in the latest # version of repository if revision is None: paths = [] for root, _, files in scandir.walk(path): for f in files: if root[0] == "." or "/." in root: continue paths.append(os.path.join(root, f).replace(path + "/", "")) return paths, [] return ( repo.get_changed_files(path, revision), repo.get_removed_files(path, revision), )
def recwalk(inputpath, sorting=True, folders=False, topdown=True): """Recursively walk through a folder. This provides a mean to flatten out the files restitution (necessary to show a progress bar). This is a generator.""" # If it's only a single file, return this single file if os.path.isfile(inputpath): abs_path = fullpath(inputpath) yield os.path.dirname(abs_path), os.path.basename(abs_path) # Else if it's a folder, walk recursively and return every files else: for dirpath, dirs, files in walk(inputpath, topdown=topdown): if sorting: files.sort() dirs.sort() # sort directories in-place for ordered recursive walking # return each file for filename in files: yield (dirpath, filename) # return directory (full path) and filename # return each directory if folders: for folder in dirs: yield (dirpath, folder)
def CreateID_Label(root): # labelList for HMDB labelList = [ 'brush_hair', 'cartwheel', 'catch', 'chew', 'clap', 'climb', 'climb_stairs', 'dive', 'draw_sword', 'dribble', 'drink', 'eat', 'fall_floor', 'fencing', 'flic_flac', 'golf', 'handstand', 'hit', 'hug', 'jump', 'kick', 'kick_ball', 'kiss', 'laugh', 'pick', 'pour', 'pullup', 'punch', 'push', 'pushup', 'ride_bike', 'ride_horse', 'run', 'shake_hands', 'shoot_ball', 'shoot_bow', 'shoot_gun', 'sit', 'situp', 'smile', 'smoke', 'somersault', 'stand', 'swing_baseball', 'sword', 'sword_exercise', 'talk', 'throw', 'turn', 'walk', 'wave' ] labelDict = dict() label = '' for i in range(len(labelList)): labelDict[labelList[i]] = i label = label + "0 " #print labelDict # clear files f = open("../dataset/ID.txt", 'w') f.truncate() f = open("../dataset/Label.txt", 'w') f.truncate() # traverse folder for path, subdsirs, files in scandir.walk(root): for filename in files: ID = filename.split('.')[0] labelName = path[path.rfind("/") + 1:] with open("../dataset/ID.txt", "a") as myfile: myfile.write(ID + '\n') try: LabelIndex = labelDict[labelName] * 2 Label = label[:LabelIndex] + '1' + label[LabelIndex + 1:] except Exception: print "Cannot find this labelName:" + labelName return with open("../dataset/Label.txt", "a") as myfile: myfile.write(Label + '\n')
def extract_xpi(xpi, path, expand=False, verify=True): """ If expand is given, will look inside the expanded file and find anything in the allow list and try and expand it as well. It will do up to 10 iterations, after that you are on your own. It will replace the expanded file with a directory and the expanded contents. If you have 'foo.jar', that contains 'some-image.jpg', then it will create a folder, foo.jar, with an image inside. """ expand_allow_list = ['.crx', '.jar', '.xpi', '.zip'] tempdir = extract_zip(xpi) all_files = get_all_files(tempdir) if expand: for x in xrange(0, 10): flag = False for root, dirs, files in scandir.walk(tempdir): for name in files: if os.path.splitext(name)[1] in expand_allow_list: src = os.path.join(root, name) if not os.path.isdir(src): try: dest = extract_zip(src, remove=True) except zipfile.BadZipfile: # We can safely ignore this here, this is # only for recursive .zip/.jar extractions log.exception( 'Exception during recursive XPI expansion.' ) continue all_files.extend(get_all_files( dest, strip_prefix=tempdir, prefix=src)) if dest: copy_over(dest, src) flag = True if not flag: break copy_over(tempdir, path) return all_files
def source_directory_path(self): """ Path to the directory where source strings are stored. Paths are identified using a scoring system; more likely directory names get higher scores, as do directories with formats that only used for source strings. """ source_repository = self.db_project.source_repository # If project configuration provided, files could be stored in multiple # directories, so we just use the source repository checkout path if self.configuration: return source_repository.checkout_path # If source repository explicitly marked if source_repository.source_repo: return source_repository.checkout_path possible_sources = [] for root, dirnames, filenames in scandir.walk(self.checkout_path): for dirname in dirnames: if dirname in self.SOURCE_DIR_NAMES: score = self.SOURCE_DIR_SCORES[dirname] # Ensure the matched directory contains resources. directory_path = os.path.join(root, dirname) if directory_contains_resources(directory_path): # Extra points for source resources! if directory_contains_resources( directory_path, source_only=True ): score += 3 possible_sources.append((directory_path, score)) if possible_sources: return max(possible_sources, key=lambda s: s[1])[0] else: raise MissingSourceDirectoryError( "No source directory found for project {0}".format(self.db_project.slug) )
def something(job, SIPDirectory, serviceDirectory, objectsDirectory, SIPUUID, date): # exitCode = 435 exitCode = 0 job.pyprint(SIPDirectory) # For every file, & directory Try to find the matching file & directory in the objects directory for (path, dirs, files) in scandir.walk(serviceDirectory): for file in files: servicePreExtension = "_me" originalPreExtension = "_m" file1Full = os.path.join(path, file).replace( SIPDirectory, "%SIPDirectory%", 1 ) # service a = file.rfind(servicePreExtension + ".") if a != -1: file2Full = os.path.join( path, file[:a] + originalPreExtension + "." ).replace( SIPDirectory + "objects/service/", "%SIPDirectory%objects/", 1 ) # service else: a = file.rfind(".") if a != -1: # if a period is found a += 1 # include the period file2Full = os.path.join(path, file[:a]).replace( SIPDirectory + "objects/service/", "%SIPDirectory%objects/", 1 ) # service f = File.objects.get( currentlocation=file1Full, removedtime__isnull=True, sip_id=SIPUUID ) f.filegrpuse = "service" grp_file = File.objects.get( currentlocation__startswith=file2Full, removedtime__isnull=True, sip_id=SIPUUID, ) f.filegrpuuid = grp_file.uuid f.save() return exitCode
def check_repository(self): unknown = [] identified = {} allatt = {} for root, dirs, files in scandir.walk(self.repository): for f in files: fp = osp.join(root, f) res = parsefilepath(fp, self.hierarchy) if not res is None: datatype, att = res identified[fp] = datatype for k,v in att.items(): allatt.setdefault(k, []).append(v) else: unknown.append(fp[len(self.repository)+1:]) res = {'unknown' : unknown, 'identified': identified, 'labels': allatt} print 'done' return res
def writeOF(): root = "../videos" w = 224 h = 224 c = 0 data = {} for path, subdsirs, files in scandir.walk(root): for filename in files: count = ofp.writeOpticalFlow(path, filename, w, h, c) if count: data[filename] = count print filename c += 1 with open("done.txt", "a") as myfile: myfile.write(filename + '-' + str(c) + '\n') with open('../dataset/frame_count.pickle', 'wb') as f: pickle.dump(data, f)
def statsDir(test, testvpxlocation, hn, numOfHost): num=0 p = os.path.join(testvpxlocation, 'traces') t = getTestName(test) for root, dirs, files in scandir.walk(p): for x in fnmatch.filter(dirs, t): statsFile = p + "/" + x + "/stats.html" # sometimes there will be multiple traces folders ; read stats.html to find out the IP to match the test-vpx a = "cat " + statsFile + " | grep -iE 'ESX host' | awk '{print $3}' " chkhostname = cmdline(a) chkhostname = chkhostname[:-1] if chkhostname == hn: if num < numOfHost: # will only output hdd details on different host if test == "combined_long_c1" or test == "combined_long_c2" or test == "7day_stress_c1" or test == "7day_stress_c2" or test == "7day_stress_af_c1" or test == "7day_stress_af_c2": num=num+1 return p else: num=num+1 return statsFile
def _deposit_dip_to_dspace(self, source_path, ds_item, ds_sessionid): base_url = "{}/items/{}".format( self._get_base_url(self.ds_rest_url), ds_item["uuid"] ) for root, __, files in scandir.walk(source_path): for name in files: bitstream_url = "{}/bitstreams?name={}".format( base_url, six.moves.urllib.parse.quote(name.encode("utf-8")) ) try: with open(os.path.join(root, name), "rb") as content: self._post( bitstream_url, data=content, cookies={"JSESSIONID": ds_sessionid}, ) except Exception: raise DSpaceRESTException( "Error sending {} to {}.".format(name, bitstream_url) )
def resource_paths_without_config(self): """ List of absolute paths for all supported source resources found within the given path. """ path = self.source_directory_path for root, dirnames, filenames in scandir.walk(path): if is_hidden(root): continue # Ignore certain files in Mozilla repositories. if self.db_project.repository_url in MOZILLA_REPOS: filenames = [ f for f in filenames if not f.endswith("region.properties") ] for filename in filenames: if is_resource(filename): yield os.path.join(root, filename)
def walk(fpath, **kwargs): ''' Traverse thru a directory tree. :param fpath: `int` The root file path :param excludes: `list` optional directories to exclude :rtype: `generator` ''' kwargs.setdefault('excludes', []) excludes = kwargs.get('excludes') # transform glob patterns to regular expressions excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.' for root, dirs, files in scandir.walk(fpath): # exclude dirs if excludes: dirs[:] = [os.path.join(root, d) for d in dirs] dirs[:] = [d for d in dirs if not re.match(excludes, d)] for name in files: fullpath = os.path.join(root, name) if os.path.isfile(fullpath): yield fullpath
def get_en_US_xpi_file_to_import(subdir): """Return an en-US.xpi file object ready to be imported. The file is generated from utilities/tests/firefox-data/<subdir>. """ # en-US.xpi file is a ZIP file which contains embedded JAR file (which is # also a ZIP file) and a couple of other files. Embedded JAR file is # named 'en-US.jar' and contains translatable resources. # Get the root path where the data to generate .xpi file is stored. test_root = os.path.join(os.path.dirname(lp.translations.__file__), 'utilities/tests/firefox-data', subdir) # First create a en-US.jar file to be included in XPI file. jarfile = tempfile.TemporaryFile() jar = zipfile.ZipFile(jarfile, 'w') jarlist = [] data_dir = os.path.join(test_root, 'en-US-jar/') for root, dirs, files in scandir.walk(data_dir): for name in files: relative_dir = root[len(data_dir):].strip('/') jarlist.append(os.path.join(relative_dir, name)) for file_name in jarlist: f = open(os.path.join(data_dir, file_name), 'r') jar.writestr(file_name, f.read()) jar.close() jarfile.seek(0) # Add remaining bits and en-US.jar to en-US.xpi. xpifile = tempfile.TemporaryFile() xpi = zipfile.ZipFile(xpifile, 'w') for xpi_entry in scandir.scandir(test_root): if xpi_entry.name != 'en-US-jar': with open(xpi_entry.path) as f: xpi.writestr(xpi_entry.name, f.read()) xpi.writestr('chrome/en-US.jar', jarfile.read()) xpi.close() xpifile.seek(0) return xpifile
def test_suite(): suite = unittest.TestSuite() # Find all the doctests in wadllib. packages = [] for dirpath, dirnames, filenames in scandir.walk(topdir): if 'docs' in dirnames: docsdir = os.path.join(dirpath, 'docs')[len(topdir) + 1:] packages.append(docsdir) doctest_files = {} for docsdir in packages: for filename in os.listdir(os.path.join(topdir, docsdir)): if os.path.splitext(filename)[1] == '.txt': doctest_files[filename] = os.path.join(docsdir, filename) # Sort the tests. for filename in sorted(doctest_files): path = doctest_files[filename] doctest = LayeredDocFileSuite(path, package=wadllib) suite.addTest(doctest) return suite
def find_fast(directory, expression): """ Finds all files in the given directory that match the given expression. @param directory The directory. @param expressiion The regular expression. """ logging.debug("Searching expression {0} in directory " "{1}".format(expression, directory)) check.directory_exists(directory) matcher = re.compile(expression) files_found = [] for root, dirs, files in scandir.walk(directory): for file_name in files: if matcher.match(file_name): path = os.path.join(root, file_name) path = os.path.abspath(path) files_found.append(path) return files_found
def find_filetype(dir, filetype): """ Will find all files of a certain type (e.g. .vcf or .bam files) in a directory. Method will enter every subdirectory. Can look for only a single filetype at a time. :param dir: String of directory to walk. :param filetype: String of filetype to search for (e.g. .vcf or .bam) :return: list of tuples of file name and file directory """ assert os.path.exists(dir), "Path {} does not exist.".format(dir) duplicates = 0 # unique_files = dict([]) unique_files = list(()) for (dirpath, dirnames, files) in walk(dir): for name in files: if name.endswith(filetype): if name not in unique_files: unique_files.append((name, os.path.join(dirpath, name))) else: duplicates += 1 return unique_files
def get_batch_logs(self, logDir, umc_instanceids, files_in_buffer=[]): pattern = re.compile(".+_[0-9]+.*\.log.{log_file_group}$".format( log_file_group=self.params.log_file_group)) search_re = logDir + "/[a-zA-Z0-9\._\-]+/([a-zA-Z0-9\-\._]+)" # + "|".join(GlobalContext.config.umc_instanceids(False)) + ")$"; batch = [] cnt = 0 for dirname, dirnames, filenames in walk(logDir): #Msg.info1_msg("walk: %s, filenames=%d"%(dirname,len(filenames))) m = re.match(search_re, dirname) if m and m.group(1) in umc_instanceids: for filename in filenames: fullfname = os.path.join(dirname, filename) if fullfname not in files_in_buffer and pattern.match( filename): cnt = cnt + 1 if cnt <= self.params.max_batchsize_files: batch.append(fullfname) if cnt > self.params.max_batchsize_files: break return sorted(batch, key=lambda fn: os.stat(fn).st_mtime, reverse=True)
def make(self, path): if self.topdirobj: raise Exception("This instance has already been used.") stat = os.stat(path) name = path.split('/')[-1] # setup progress bar items = sum([len(files) for r, d, files in walk(path)]) self.prog = tqdm(desc='Building', total=items, unit=' items', dynamic_ncols=True, leave=True) # build dirobjects self.topdirobj = self.__hashdir(name, path, stat) # close progress bar self.prog.close() sleep(0.2) return self.topdirobj.gethash()
def __find_package_location(self, package): """ Looks for the package location inside the analyzed repository. @param package The package to be found. @return The full path to the package file. """ # FIXME: Currently YUM information about RPM location inside the given # repository is not accessible. That's why we mannualy search files in # the repository. package_name = _get_full_package_name(package) file_name = "{0}.rpm".format(package_name) # Check most probably paths to speed up the search. # This gives speed up from 10.760s to 0.711s of program run time. location = os.path.join(self.repository_path, self.arch, file_name) if os.path.exists(location): return location location = os.path.join(self.repository_path, "noarch", file_name) if os.path.exists(location): return location location = os.path.join(self.repository_path, file_name) if os.path.exists(location): return location location = None for root, dirs, files in scandir.walk(self.repository_path): for existing_file_name in files: if package_name in existing_file_name: location = os.path.join(self.repository_path, existing_file_name) if location is None: raise Exception("Failed to find package {0}!".format(package)) else: return location
def getDirs(directory=os.path.curdir, depth=None, verbose=False, absolute=True, ignore=None): currDepth = 0 folderList = list() if absolute: directory = os.path.abspath(directory) for root, dirs, files in walk(directory, topdown=True): currDepth += 1 for dir in dirs: if dir == ".zfs": continue folderList.append(os.path.join(root, dir)) if depth is None: continue if currDepth >= depth: break return folderList
def search(path, value): check = 0 value = nocase(value) for item in scandir.walk(path): try: clist = [] clist.extend(item[1]), clist.extend(item[2]) for file in clist: if value in file.lower() or value in nocase(file): print 'Found in: ', item[0] if os.path.isfile(os.path.join(item[0], file)): print 'File name: ', file elif os.path.isdir(os.path.join(item[0], file)): print 'Directory name: ', file print '\n' check = 1 break except UnicodeError: print 'Error in directory: ', item continue if check == 0: print 'Not Found'
def get_size_scandir(root_path): if not os.path.exists(root_path): return None if os.path.isfile(root_path): if os.path.islink(root_path): return 0 else: return scandir.stat(root_path).st_size if os.path.isdir(root_path): total_size = 0 for dirpath, dirnames, filenames in scandir.walk(root_path): for f in filenames: fp = os.path.join(dirpath, f) # skip if it is symbolic link if not os.path.islink(fp): # total_size += os.path.getsize(fp) total_size += scandir.stat(fp).st_size return total_size return None
def remove_duplicates(self, repository_path): rpms = dict() for root, dirs, files in scandir.walk(repository_path): for file in files: if file.endswith(".rpm"): split = file.rsplit('.', 3) path = os.path.join(root, file) if split[0] in rpms: path0 = rpms[split[0]] logging.debug("Select between {0} and {1}".format(path, path0)) split0 = path0.rsplit('.', 3) rm_path = '' if split[1] > split0[1]: rpms[split[0]] = path rm_path = os.path.join(repository_path, path0) else: rm_path = os.path.join(repository_path, path) if os.path.exists(rm_path): logging.debug("Removing {0}".format(rm_path)) os.remove(rm_path) else: rpms[split[0]] = path
def writeOF(): root = "../../data/EchoNet-Dynamic/Videos" outpath = "../../data/EchoNet-Dynamic/flow" w = 112 h = 112 c = 0 data = {} # df = pd.read_csv(o.join(root, 'FileList.csv')) for path, subdsirs, files in scandir.walk(root): for filename in files: count = ofp.writeOpticalFlow(path, outpath, filename, w, h, c) if count: data[filename] = count print(filename) c += 1 with open("./doc_vid/done.txt", "a") as myfile: myfile.write(filename + '-' + str(c) + '\n') with open('./doc_vid/frame_count.pickle', 'wb') as f: pickle.dump(data, f)
def scan_archive_struct_stninfo(self, rootdir): # same as scan archive struct but looks for station info files self.archiveroot = rootdir stninfo = [] path2stninfo = [] for path, dirs, files in scandir.walk(rootdir): for file in files: if file.endswith(".info"): # only add valid rinex compressed files stninfo.append(os.path.join(path,file).rsplit(rootdir+'/')[1]) path2stninfo.append(os.path.join(path,file)) else: if file.endswith('DS_Store') or file[0:2] == '._': # delete the stupid mac files try: os.remove(os.path.join(path, file)) except Exception: sys.exc_clear() return stninfo,path2stninfo
def findDuplicateSizes(dirPath): # walks through directory to find files and sizes fileSizes = defaultdict(list) for root, _, fileNames in walk(dirPath): for fileName in fileNames: filePath = os.path.join(root, fileName) try: fileSize = os.stat(filePath).st_size except OSError as e: print "Could not access file {0} \ continuing...".format(filePath) print e # file not accessible - move on continue # use file size as dictionary key to group files by size fileSizes[fileSize].append(filePath) return fileSizes
def writeOF(): root = "F:\Dataset/UCF-101" w = 224 h = 224 c = 0 data = {} for path, subdsirs, files in scandir.walk(root): for filename in files: count = ofp.writeOpticalFlow(path, filename, w, h, c) if count: data[filename] = count print(filename) c += 1 with open("done.txt", "a") as myfile: myfile.write(filename + '-' + str(c) + '\n') with open( 'D:\MyDrivers\software/anaconda\wokspace\Video-Classification-2-Stream-CNN-master\dataset/frame_count.pickle', 'wb') as f: pickle.dump(data, f)