Пример #1
def mywalk(top, skipdirs=['.snapshot',]):
    """ returns subset of os.walk  """
    for root, dirs, files in walk(top,topdown=True,onerror=walkerr): 
        for skipdir in skipdirs:
            if skipdir in dirs:
                dirs.remove(skipdir)  # don't visit this directory 
        yield root, dirs, files 
Пример #2
    def parse_dir(dir): 
        ignores = Parser.load_ignores(dir)
        ignores.extend([".svn", ".hg", ".git"])

        def callback(res):

        def is_ignored(res, is_dir=False):
            if is_dir:
                res = res + "/"
            for i in ignores:
                if fnmatch.fnmatch(res, i) or res.startswith(i):
                    return True
            return False

        def find_ignored(reslist, is_dir=False):
            return [res for res in reslist if is_ignored(res, is_dir)]

        pool = ThreadPool(processes=Parser.concurrency)
        dependencies = []

        for root, dirs, files in scandir.walk(dir):
            for d in find_ignored(dirs, True):
                logging.debug("%s is blacklisted" % d)
            for f in find_ignored(files):
                logging.debug("%s is blacklisted" % d)
            for name in files:
                pool.apply_async(Parser.parse_file, args = (os.path.join(root, name),), callback = callback)

        return dependencies
Пример #3
    def download_gallery(self,
            Download an complete gallery, calls download_gallery_images
            for the actual image download.

            This creates the folder structure, and walks through it
            calling download_gallery_images to download the images.
        current_webpage = common.fetch_webpage(session=self.session,
        soup = BeautifulSoup(current_webpage)
        #   Grab the main web page from the URL to fetch
        #   Search for folders
        folder_list = self.search_for_folders(soup_bowl=soup)
        for (subgallery_name, subgallery_url) in folder_list:
             #   Process the folder list, and download
             #   the images for the subfolders
            if options.downloadlimit > 0 and \
                    status.return_downloads() >= options.downloadlimit:
                print "X",
                return status
            if subgallery_name != None:
                subgallery_dl_path = download_path + os.sep +\
                    common.clean_filename(subgallery_name) + os.sep
            if subgallery_url != gallery_url:
                #   Clubs typically have the featured gallery which points to
                #   itself and can cause a recursion loop
                status = self.download_gallery(subgallery_url,
        gallery_name = soup.title.text
        gallery_name = gallery_name[0:gallery_name.find(" by ")].strip()

        if root:
            for root, dirnames, filenames in scandir.walk(download_path):
                for filename in filenames:
                    self.root_checker[filename.lower().strip()] = True

        status = self.download_gallery_images(gallery_url,

        return status
Пример #4
    def source_directory_path(self):
        Path to the directory where source strings are stored.

        Paths are identified using a scoring system; more likely
        directory names get higher scores, as do directories with
        formats that only used for source strings.
        # If source repository explicitly marked
        source_repository = self.db_project.source_repository
        if source_repository.source_repo:
            return source_repository.checkout_path

        possible_sources = []
        for root, dirnames, filenames in scandir.walk(self.checkout_path):
            for dirname in dirnames:
                if dirname in self.SOURCE_DIR_NAMES:
                    score = self.SOURCE_DIR_SCORES[dirname]

                    # Ensure the matched directory contains resources.
                    directory_path = os.path.join(root, dirname)
                    if directory_contains_resources(directory_path):
                        # Extra points for source resources!
                        if directory_contains_resources(directory_path, source_only=True):
                            score += 3

                        possible_sources.append((directory_path, score))

        if possible_sources:
            return max(possible_sources, key=lambda s: s[1])[0]
            raise MissingSourceDirectoryError(
                'No source directory found for project {0}'.format(self.db_project.slug)
Пример #5
def imgDirectoryProcessing(path):
    global workerPool, workerOutput
    workerPool = Pool()
    workerOutput = []
    options.imgIndex = {}
    options.imgPurgeIndex = []
    work = []
    pagenumber = 0
    for (dirpath, dirnames, filenames) in walk(path):
        for afile in filenames:
            pagenumber += 1
            work.append([afile, dirpath, options])
    if GUI:
    if len(work) > 0:
        for i in work:
            workerPool.apply_async(func=imgFileProcessing, args=(i, ), callback=imgFileProcessingTick)
        if GUI and not GUI.conversionAlive:
            rmtree(os.path.join(path, '..', '..'), True)
            raise UserWarning("Conversion interrupted.")
        if len(workerOutput) > 0:
            rmtree(os.path.join(path, '..', '..'), True)
            raise RuntimeError("One of workers crashed. Cause: " + workerOutput[0][0], workerOutput[0][1])
        for file in options.imgPurgeIndex:
            if os.path.isfile(file):
        rmtree(os.path.join(path, '..', '..'), True)
        raise UserWarning("Source directory is empty.")
Пример #6
def find_mp3s(path):
    - path: directory path containing mp3s, or a text file playlist
    path = os.path.abspath(os.path.expanduser(path))
    if os.path.isfile(path):
        with open(path, 'r') as fp:
            text = fp.read()

        results = [mp3 for mp3 in re.split('\r?\n', text) if mp3]
    elif os.path.isdir(path):
        results = []

        for dirpath, dirnames, filenames in walk(path):
            files = [f for f in filenames if f.lower().endswith('.mp3')]

            if not files:

            results.extend([os.path.join(dirpath, f) for f in files])
        print('{} is not a file or a directory'.format(repr(path)))
        import ipdb; ipdb.set_trace()

    return results
Пример #7
def recursive_gallery_check(path):
	Recursively checks a folder for any potential galleries
	Returns a list of paths for directories and a list of tuples where first
	index is path to gallery in archive and second index is path to archive.
	Like this:
	["C:path/to/g"] and [("path/to/g/in/a", "C:path/to/a")]
	gallery_dirs = []
	gallery_arch = []
	for root, subfolders, files in scandir.walk(path):
		if files:
			for f in files:
				if f.endswith(ARCHIVE_FILES):
					arch_path = os.path.join(root, f)
					for g in check_archive(arch_path):
						gallery_arch.append((g, arch_path))
			if not subfolders:
				if not files:
				gallery_probability = len(files)
				for f in files:
					if not f.lower().endswith(IMG_FILES):
						gallery_probability -= 1
				if gallery_probability >= (len(files)*0.8):
	return gallery_dirs, gallery_arch
Пример #8
def list_dirs(d, suffix=None, reverse=False):
    """A generator that works much like :py:func:`os.listdir`, only
    recursively (and only returns files, not directories).

    :param d: The directory to start in
    :type d: str
    :param suffix: Only return files with the given suffix
    :type suffix: str or list
    :param reverse: Returns result sorted in reverse alphabetic order
    :param type:
    :returns: the full path (starting from d) of each matching file
    :rtype: generator

        from scandir import walk
    except ImportError:
        from os import walk
    if isinstance(suffix, str):
        suffix = [suffix]
    for (dirpath, dirnames, filenames) in walk(d, topdown=True):
        dirnames.sort(reverse=reverse, key=split_numalpha)
        for filename in sorted(filenames, key=split_numalpha, reverse=reverse):
            fullpath = dirpath + os.sep + filename
            if suffix:
                for s in suffix:
                    if fullpath.endswith(s):
                        yield fullpath
                yield fullpath
Пример #9
 def extractCBR(self, targetdir):
     cbrFile = rarfile.RarFile(self.origFileName)
     for root, dirnames, filenames in walk(targetdir):
         for filename in filenames:
             if filename.startswith('__MACOSX') or filename.endswith('.DS_Store') or filename.endswith('humbs.db'):
                 os.remove(os.path.join(root, filename))
Пример #10
def extract_xpi(xpi, path, expand=False, verify=True):
    If expand is given, will look inside the expanded file
    and find anything in the allow list and try and expand it as well.
    It will do up to 10 iterations, after that you are on your own.

    It will replace the expanded file with a directory and the expanded
    contents. If you have 'foo.jar', that contains 'some-image.jpg', then
    it will create a folder, foo.jar, with an image inside.
    expand_allow_list = ['.crx', '.jar', '.xpi', '.zip']
    tempdir = extract_zip(xpi)
    all_files = get_all_files(tempdir)

    if expand:
        for x in xrange(0, 10):
            flag = False
            for root, dirs, files in scandir.walk(tempdir):
                for name in files:
                    if os.path.splitext(name)[1] in expand_allow_list:
                        src = os.path.join(root, name)
                        if not os.path.isdir(src):
                            dest = extract_zip(
                                src, remove=True, raise_on_failure=False)
                                dest, strip_prefix=tempdir, prefix=src))
                            if dest:
                                copy_over(dest, src)
                                flag = True
            if not flag:

    copy_over(tempdir, path)
    return all_files
Пример #11
def detectCorruption(tmpPath, orgPath):
    imageNumber = 0
    imageSmaller = 0
    for root, dirs, files in walk(tmpPath, False):
        for name in files:
            if getImageFileName(name) is not None:
                path = os.path.join(root, name)
                pathOrg = orgPath + path.split('OEBPS' + os.path.sep + 'Images')[1]
                if os.path.getsize(path) == 0:
                    rmtree(os.path.join(tmpPath, '..', '..'), True)
                    raise RuntimeError('Image file %s is corrupted.' % pathOrg)
                    img = Image.open(path)
                    img = Image.open(path)
                    imageNumber += 1
                    if options.profileData[1][0] > img.size[0] and options.profileData[1][1] > img.size[1]:
                        imageSmaller += 1
                except Exception as err:
                    rmtree(os.path.join(tmpPath, '..', '..'), True)
                    if 'decoder' in str(err) and 'not available' in str(err):
                        raise RuntimeError('Pillow was compiled without JPG and/or PNG decoder.')
                        raise RuntimeError('Image file %s is corrupted.' % pathOrg)
                saferRemove(os.path.join(root, name))
    if imageSmaller > imageNumber * 0.25 and not options.upscale and not options.stretch:
        print("WARNING: More than 1/4 of images are smaller than target device resolution. "
              "Consider enabling stretching or upscaling to improve readability.")
        if GUI:
            GUI.addMessage.emit('More than 1/4 of images are smaller than target device resolution.', 'warning', False)
            GUI.addMessage.emit('Consider enabling stretching or upscaling to improve readability.', 'warning', False)
            GUI.addMessage.emit('', '', False)
Пример #12
def find_files(location, pattern, ignore_dirs=[], maxdepth=float('inf')):
    """ Find paths to images on disk matching an given pattern

        location (str): root directory to search
        pattern (str): glob style pattern to search for
        ignore_dirs (iterable): list of directories to ignore from search
        maxdepth (int): maximum depth to recursively search

        list: list of files within location matching pattern

    results = []

    if isinstance(ignore_dirs, str):
        ignore_dirs = list(ignore_dirs)

    location = os.path.normpath(location)
    num_sep = location.count(os.path.sep) - 1

    for root, dirs, files in walk(location, followlinks=True):
        if ignore_dirs:
            dirs[:] = [d for d in dirs if d not in ignore_dirs]

        depth = root.count(os.path.sep) - num_sep
        if depth > maxdepth:
            dirs[:] = []
            files[:] = []

        for fname in fnmatch.filter(files, pattern):
            results.append(os.path.abspath(os.path.join(root, fname)))

    return results
Пример #13
def get_audio_files(location):
    for path, dirs, files in walk(location):
        for f in files:
            if (f.endswith('.m4a') or f.endswith('.mp3')
                    or f.endswith('.flac')) and not f.startswith('.'):
                print 'Got audio file:', f
                yield os.path.join(path, f)
Пример #14
    def _find_changes(self):
        Walks the filesystem. Identifies noteworthy files -- those
        that were added, removed, or changed (size, mtime or type).

        Returns a 3-tuple of sets of HashEntry objects:
        [0] added files
        [1] removed files
        [2] modified files

        self.entries is not modified; this method only reports changes.
        added = set()
        modified = set()
        existing_files = set()
        for dirpath_str, _, filenames in walk(str(self.path)):
            dirpath = Path(dirpath_str)
            for filename in filenames:
                if filename == DB_FILENAME:
                abs_filename = (dirpath / filename).absolute()
                if abs_filename in self.entries:
                    entry = self.entries[abs_filename]
                    st = lstat(str(abs_filename))
                    if entry != st:
                    entry = HashEntry(abs_filename)
        removed = set(self.entries.values()) - existing_files
        return added, removed, modified
Пример #15
    def process_directory(self, path, recursive=True, timing=True):
        """ Processes the specified directory, extracting file sizes for each file and
            adding to a file extension indexed dictionary.
        :param path: the path to analyse
        :param recursive: true if processing should include sub-directories
        :param timing: true if path should be preprocessed to provide guidance on run-time

        # get number of files - have to scan dir once to start with
        print "\n\rProcessing {0}...".format(path)
        bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength)

        # If user wants more accurate timing, preprocess directory to count files
        if timing:
            numfiles = self._count_dirs(path, recursive)

        # grab file extension and file sizes across all files in the specified directory
        for root, dirs, files in scandir.walk(path, followlinks=False):
            # if only processing the top level, remove dirs so os.walk doesn't progress further
            if not recursive:
                del dirs[:]

            for name in files:
                filename = os.path.join(root, name)
                fname, fext = os.path.splitext(filename)
                fext = self._convert_extension(fext.lower())   # lowercase all filenames

                if os.path.exists(filename):
                    if fext not in self.filestats:
                        self.filestats[fext] = RunningStat()
Пример #16
def iter_files(root, exts=None, recursive=False):
    Iterate over file paths within root filtered by specified extensions.

    :param str root: Root folder to start collecting files
    :param iterable exts: Restrict results to given file extensions
    :param bool recursive: Wether to walk the complete directory tree
    :rtype collections.Iterable[str]: absolute file paths with given extensions

    if exts is not None:
        exts = set((x.lower() for x in exts))

    def matches(e):
        return (exts is None) or (e in exts)

    if recursive is False:
        for entry in scandir(root):
            if has_scandir:
                ext = splitext(entry.name)[-1].lstrip('.').lower()
                if entry.is_file() and matches(ext):
                    yield entry.path
                ext = splitext(entry)[-1].lstrip('.').lower()
                if not isdir(entry) and matches(ext):
                    yield join(root, entry)
        for root, folders, files in walk(root):
            for f in files:
                ext = splitext(f)[-1].lstrip('.').lower()
                if matches(ext):
                    yield join(root, f)
Пример #17
def find_file(image):
    matches = []
    for root, dirnames, filenames in scandir.walk(assetDir):
        for filename in fnmatch.filter(filenames, image):
            matches.append(os.path.join(root, filename))

    return matches
Пример #18
def locale_directory_path(checkout_path, locale_code, parent_directories=None):
    Path to the directory where strings for the given locale are
    possible_paths = []

    # Check paths that use underscore as locale/country code separator
    locale_code_variants = [locale_code, locale_code.replace('-', '_')]

    # Optimization for directories with a lot of paths: if parent_directories
    # is provided, we simply join it with locale_code and check if path exists
    for parent_directory in parent_directories:
        for locale in locale_code_variants:
            candidate = os.path.join(parent_directory, locale)
            if os.path.exists(candidate):

    if not possible_paths:
        for root, dirnames, filenames in scandir.walk(checkout_path):
            for locale in locale_code_variants:
                if locale in dirnames:
                    possible_paths.append(os.path.join(root, locale))

    for possible_path in possible_paths:
        if directory_contains_resources(possible_path):
            return possible_path

    # If locale directory empty (asymmetric formats)
    if possible_paths:
        return possible_paths[0]

    raise IOError('Directory for locale `{0}` not found'.format(
                  locale_code or 'source'))
Пример #19
def getDirectorySize(start_path='.'):
    total_size = 0
    for dirpath, dirnames, filenames in walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size
Пример #20
 def scan_folder(self, path):
     (pathS, directoriesS, filesS) = ((), (), ())
         for (pathS, directoriesS, filesS) in myScandir.walk(path):
         return (pathS, directoriesS, filesS)
     except os.error:
         log("Path", path, "is not accessible.")
Пример #21
def get_file_names():
    filenames = []

    for folder, _, files in walk(icloudpath):
        for filename in files:
            filenames.append((folder + '/' + filename)[len(icloudpath) + 1:])

    return filenames
Пример #22
 def find_files(self):
     found_files = []
     for base_folder, folders, files in scandir.walk(self.path):
         for f in files:
             ext = os.path.splitext(f)[-1].lower()
             if ext in self.IMAGE_EXTS:
                 found_files.append(os.path.join(base_folder, f))
     return found_files
Пример #23
    def list(self):
        self.log("info", "List of available RAT modules:")

        for folder, folders, files in walk(os.path.join(VIPER_ROOT, "modules/rats/")):
            for file_name in files:
                if not file_name.endswith(".py") or file_name.startswith("__init__"):

                self.log("item", os.path.join(folder, file_name))
Пример #24
def scan_files(path):
    Recursively scan a directory to find all files with the given extension.
    ext = tuple(MEDIA_EXT)
    for root, _, files in scandir.walk(path):
        for f in files:
            if f.endswith(ext):
                yield Movie(path=root + '/' + f)
Пример #25
    def locale_directory_paths(self):
        A map of locale codes and their absolute directory paths.
        Create locale directory, if not in repository yet.
        locale_directory_paths = {}
        parent_directories = set()

        for locale in self.locales:
                if self.configuration:
                    locale_directory_paths[locale.code] = self.configuration.l10n_base
                    locale_directory_paths[locale.code] = locale_directory_path(
                parent_directory = get_parent_directory(locale_directory_paths[locale.code])

            except IOError:
                if not self.db_project.has_multi_locale_repositories:
                    source_directory = self.source_directory_path
                    parent_directory = get_parent_directory(source_directory)

                    locale_code = locale.code
                    if uses_undercore_as_separator(parent_directory):
                        locale_code = locale_code.replace('-', '_')

                    locale_directory = os.path.join(parent_directory, locale_code)

                    # For asymmetric formats, create empty folder
                    if is_asymmetric_resource(next(self.relative_resource_paths())):

                    # For other formats, copy resources from source directory
                        shutil.copytree(source_directory, locale_directory)

                        for root, dirnames, filenames in scandir.walk(locale_directory):
                            for filename in filenames:
                                path = os.path.join(root, filename)
                                if is_resource(filename):
                                    os.rename(path, source_to_locale_path(path))

                    locale_directory_paths[locale.code] = locale_directory

                    raise MissingLocaleDirectoryError(
                        'Directory for locale `{0}` not found'.format(locale.code)


        return locale_directory_paths
Пример #26
    def list(self):
        self.log('info', "List of available RAT modules:")

        for folder, folders, files in walk(os.path.join(VIPER_ROOT, 'modules/rats/')):
            for file_name in files:
                if not file_name.endswith('.py') or file_name.startswith('__init__'):

                self.log('item', os.path.join(folder, file_name))
Пример #27
def walkLevel(some_dir, level=1):
    some_dir = some_dir.rstrip(os.path.sep)
    assert os.path.isdir(some_dir)
    num_sep = some_dir.count(os.path.sep)
    for root, dirs, files in walk(some_dir):
        dirs, files = walkSort(dirs, files)
        yield root, dirs, files
        num_sep_this = root.count(os.path.sep)
        if num_sep + level <= num_sep_this:
            del dirs[:]
Пример #28
def get_media_files(path):
    ''' Using scandir's optimized walking algorithm, we can discard GNU's `find`. Only catches
        potential files via filename extension, but we could validate this in the future. '''

    for root, dirs, files in walk(path):
        for filename in files:
            if filename.endswith(('.m4a', '.mp3', '.ogg', '.oga', '.flac')):
                logger.debug('Found a potential media file: "%s"' %
                             os.path.join(root, filename))
                yield os.path.join(root, filename)
Пример #29
def zip_folder_content(folder, filename):
    """Compress the _content_ of a folder."""
    with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as dest:
        # Add each file/folder from the folder to the zip file.
        for root, dirs, files in scandir.walk(folder):
            relative_dir = os.path.relpath(root, folder)
            for file_ in files:
                dest.write(os.path.join(root, file_),
                           # We want the relative paths for the files.
                           arcname=os.path.join(relative_dir, file_))
Пример #30
def discover(dirpath='.', depth=None, treantdepth=None):
    """Find all Treants within given directory, recursively.

    dirpath : string
        Directory within which to search for Treants.
    depth : int
        Maximum directory depth to tolerate while traversing in search of
        Treants. ``None`` indicates no depth limit.
    treantdepth : int
        Maximum depth of Treants to tolerate while traversing in search
        of Treants. ``None`` indicates no Treant depth limit.

    found : Bundle
        Bundle of found Treants.

    from .collections import Bundle
    found = list()

    startdepth = len(dirpath.split(os.sep))
    treantdirs = set()

    for root, dirs, files in scandir.walk(dirpath):
        # depth check; if too deep, next iteration
        if depth and len(root.split(os.sep)) - startdepth > depth:

        # Treant depth checking
        if treantdepth:

            # remove Treant dirs from our set of them if we've backed out
            for treantdir in list(treantdirs):
                if treantdir not in root:

            # actual depth check
            if len(treantdirs) > treantdepth:

        for treanttype in _TREANTS:
            outnames = fnmatch.filter(files,

            if treantdepth and outnames:

            paths = [os.path.join(root, file) for file in outnames]

    return Bundle(found)
Пример #31
    def scan_archive_struct_stninfo(self, rootdir):

        # same as scan archive struct but looks for station info files
        self.archiveroot = rootdir

        stninfo = []
        path2stninfo = []
        for path, dirs, files in scandir.walk(rootdir):
            for file in files:
                file_path = os.path.join(path, file)
                if file.endswith(".info"):
                    # only add valid rinex compressed files
                    stninfo.append(file_path.rsplit(rootdir + '/')[1])
                elif file.endswith('DS_Store') or file.startswith('._'):
                    # delete the stupid mac files

        return stninfo, path2stninfo
Пример #32
def findItems(path, depth=3, **kwargs):
    Find and create items by walking the given path.

    :type path: str
    :type depth: int

    :rtype: collections.Iterable[studiolibrary.LibraryItem]
    path = normPath(path)

    maxDepth = depth
    startDepth = path.count(os.path.sep)

    for root, dirs, files in walk(path, followlinks=True):


        for filename in files:
            remove = False

            # Normalise the path for consistent matching
            path = os.path.join(root, filename)
            item = itemFromPath(path, **kwargs)

            if item:
                # Yield the item that matches/supports the current path
                yield item

                # Stop walking the dir if the item doesn't support nested items
                if not item.ENABLE_NESTED_ITEMS:
                    remove = True

            if remove and filename in dirs:

        if depth == 1:

        # Stop walking the directory if the maximum depth has been reached
        currentDepth = root.count(os.path.sep)
        if (currentDepth - startDepth) >= maxDepth:
            del dirs[:]
Пример #33
    def _get_rules(self):
        # Retrieve the list of rules and populate a list.
        rules = []
        count = 1

        # We loop through all rules paths (both in share as well as locally)
        # and we populate the list of rules.
        for root in self.rules_paths:
            for folder, folders, files in walk(root):
                for file_name in files:
                    # Skip if the extension is not right, could cause problems.
                    if not file_name.endswith(
                            '.yar') and not file_name.endswith('.yara'):

                    rules.append([count, os.path.join(folder, file_name)])
                    count += 1

        return rules
Пример #34
def get_changed_files(repo_type, path, revision):
    """Return a list of changed files for the repository."""
    repo = VCSRepository.for_type(repo_type, path)
    log.info("Retrieving changed files for: {}:{}".format(path, revision))
    # If there's no latest revision we should return all the files in the latest
    # version of repository
    if revision is None:
        paths = []
        for root, _, files in scandir.walk(path):
            for f in files:
                if root[0] == "." or "/." in root:
                paths.append(os.path.join(root, f).replace(path + "/", ""))
        return paths, []

    return (
        repo.get_changed_files(path, revision),
        repo.get_removed_files(path, revision),
Пример #35
def recwalk(inputpath, sorting=True, folders=False, topdown=True):
    """Recursively walk through a folder. This provides a mean to flatten out the files restitution (necessary to show a progress bar). This is a generator."""
    # If it's only a single file, return this single file
    if os.path.isfile(inputpath):
        abs_path = fullpath(inputpath)
        yield os.path.dirname(abs_path), os.path.basename(abs_path)
    # Else if it's a folder, walk recursively and return every files
        for dirpath, dirs, files in walk(inputpath, topdown=topdown):	
            if sorting:
                dirs.sort()  # sort directories in-place for ordered recursive walking
            # return each file
            for filename in files:
                yield (dirpath, filename)  # return directory (full path) and filename
            # return each directory
            if folders:
                for folder in dirs:
                    yield (dirpath, folder)
def CreateID_Label(root):

    # labelList for HMDB
    labelList = [
        'brush_hair', 'cartwheel', 'catch', 'chew', 'clap', 'climb',
        'climb_stairs', 'dive', 'draw_sword', 'dribble', 'drink', 'eat',
        'fall_floor', 'fencing', 'flic_flac', 'golf', 'handstand', 'hit',
        'hug', 'jump', 'kick', 'kick_ball', 'kiss', 'laugh', 'pick', 'pour',
        'pullup', 'punch', 'push', 'pushup', 'ride_bike', 'ride_horse', 'run',
        'shake_hands', 'shoot_ball', 'shoot_bow', 'shoot_gun', 'sit', 'situp',
        'smile', 'smoke', 'somersault', 'stand', 'swing_baseball', 'sword',
        'sword_exercise', 'talk', 'throw', 'turn', 'walk', 'wave'
    labelDict = dict()
    label = ''
    for i in range(len(labelList)):
        labelDict[labelList[i]] = i
        label = label + "0 "
    #print labelDict

    # clear files
    f = open("../dataset/ID.txt", 'w')
    f = open("../dataset/Label.txt", 'w')

    # traverse folder
    for path, subdsirs, files in scandir.walk(root):
        for filename in files:
            ID = filename.split('.')[0]
            labelName = path[path.rfind("/") + 1:]
            with open("../dataset/ID.txt", "a") as myfile:
                myfile.write(ID + '\n')

                LabelIndex = labelDict[labelName] * 2
                Label = label[:LabelIndex] + '1' + label[LabelIndex + 1:]
            except Exception:
                print "Cannot find this labelName:" + labelName
            with open("../dataset/Label.txt", "a") as myfile:
                myfile.write(Label + '\n')
Пример #37
def extract_xpi(xpi, path, expand=False, verify=True):
    If expand is given, will look inside the expanded file
    and find anything in the allow list and try and expand it as well.
    It will do up to 10 iterations, after that you are on your own.

    It will replace the expanded file with a directory and the expanded
    contents. If you have 'foo.jar', that contains 'some-image.jpg', then
    it will create a folder, foo.jar, with an image inside.
    expand_allow_list = ['.crx', '.jar', '.xpi', '.zip']
    tempdir = extract_zip(xpi)
    all_files = get_all_files(tempdir)

    if expand:
        for x in xrange(0, 10):
            flag = False
            for root, dirs, files in scandir.walk(tempdir):
                for name in files:
                    if os.path.splitext(name)[1] in expand_allow_list:
                        src = os.path.join(root, name)
                        if not os.path.isdir(src):
                                dest = extract_zip(src, remove=True)
                            except zipfile.BadZipfile:
                                # We can safely ignore this here, this is
                                # only for recursive .zip/.jar extractions
                                    'Exception during recursive XPI expansion.'

                                dest, strip_prefix=tempdir, prefix=src))
                            if dest:
                                copy_over(dest, src)
                                flag = True
            if not flag:

    copy_over(tempdir, path)
    return all_files
Пример #38
    def source_directory_path(self):
        Path to the directory where source strings are stored.

        Paths are identified using a scoring system; more likely
        directory names get higher scores, as do directories with
        formats that only used for source strings.
        source_repository = self.db_project.source_repository

        # If project configuration provided, files could be stored in multiple
        # directories, so we just use the source repository checkout path
        if self.configuration:
            return source_repository.checkout_path

        # If source repository explicitly marked
        if source_repository.source_repo:
            return source_repository.checkout_path

        possible_sources = []
        for root, dirnames, filenames in scandir.walk(self.checkout_path):
            for dirname in dirnames:
                if dirname in self.SOURCE_DIR_NAMES:
                    score = self.SOURCE_DIR_SCORES[dirname]

                    # Ensure the matched directory contains resources.
                    directory_path = os.path.join(root, dirname)
                    if directory_contains_resources(directory_path):
                        # Extra points for source resources!
                        if directory_contains_resources(
                            directory_path, source_only=True
                            score += 3

                        possible_sources.append((directory_path, score))

        if possible_sources:
            return max(possible_sources, key=lambda s: s[1])[0]
            raise MissingSourceDirectoryError(
                "No source directory found for project {0}".format(self.db_project.slug)
def something(job, SIPDirectory, serviceDirectory, objectsDirectory, SIPUUID, date):
    # exitCode = 435
    exitCode = 0
    # For every file, & directory Try to find the matching file & directory in the objects directory
    for (path, dirs, files) in scandir.walk(serviceDirectory):
        for file in files:
            servicePreExtension = "_me"
            originalPreExtension = "_m"
            file1Full = os.path.join(path, file).replace(
                SIPDirectory, "%SIPDirectory%", 1
            )  # service

            a = file.rfind(servicePreExtension + ".")
            if a != -1:
                file2Full = os.path.join(
                    path, file[:a] + originalPreExtension + "."
                    SIPDirectory + "objects/service/", "%SIPDirectory%objects/", 1
                )  # service
                a = file.rfind(".")
                if a != -1:  # if a period is found
                    a += 1  # include the period
                file2Full = os.path.join(path, file[:a]).replace(
                    SIPDirectory + "objects/service/", "%SIPDirectory%objects/", 1
                )  # service

            f = File.objects.get(
                currentlocation=file1Full, removedtime__isnull=True, sip_id=SIPUUID
            f.filegrpuse = "service"

            grp_file = File.objects.get(
            f.filegrpuuid = grp_file.uuid

    return exitCode
Пример #40
 def check_repository(self):
     unknown = []
     identified = {}
     allatt = {}
     for root, dirs, files in scandir.walk(self.repository):
         for f in files:
             fp = osp.join(root, f)
             res = parsefilepath(fp, self.hierarchy)
             if not res is None:
                 datatype, att = res
                 identified[fp] = datatype
                 for k,v in att.items():
                    allatt.setdefault(k, []).append(v)
     res = {'unknown' : unknown,
            'identified': identified,
            'labels': allatt}
     print 'done'
     return res
def writeOF():

    root = "../videos"
    w = 224
    h = 224
    c = 0
    data = {}

    for path, subdsirs, files in scandir.walk(root):
        for filename in files:
            count = ofp.writeOpticalFlow(path, filename, w, h, c)
            if count:
                data[filename] = count
            print filename
            c += 1
            with open("done.txt", "a") as myfile:
                myfile.write(filename + '-' + str(c) + '\n')

    with open('../dataset/frame_count.pickle', 'wb') as f:
        pickle.dump(data, f)
Пример #42
def statsDir(test, testvpxlocation, hn, numOfHost):
    p = os.path.join(testvpxlocation, 'traces')
    t = getTestName(test)
    for root, dirs, files in scandir.walk(p):
	for x in fnmatch.filter(dirs, t):
            statsFile = p + "/" + x + "/stats.html"
            # sometimes there will be multiple traces folders ; read stats.html to find out the IP to match the test-vpx    
            a = "cat " + statsFile + " | grep -iE 'ESX host' | awk '{print $3}' "
            chkhostname = cmdline(a)
            chkhostname = chkhostname[:-1]
            if chkhostname == hn:
                if num < numOfHost:     # will only output hdd details on different host
                    if test == "combined_long_c1" or test == "combined_long_c2" or test == "7day_stress_c1" or test == "7day_stress_c2" or test == "7day_stress_af_c1" or test == "7day_stress_af_c2":
                        return p
                        return statsFile
 def _deposit_dip_to_dspace(self, source_path, ds_item, ds_sessionid):
     base_url = "{}/items/{}".format(
         self._get_base_url(self.ds_rest_url), ds_item["uuid"]
     for root, __, files in scandir.walk(source_path):
         for name in files:
             bitstream_url = "{}/bitstreams?name={}".format(
                 base_url, six.moves.urllib.parse.quote(name.encode("utf-8"))
                 with open(os.path.join(root, name), "rb") as content:
                         cookies={"JSESSIONID": ds_sessionid},
             except Exception:
                 raise DSpaceRESTException(
                     "Error sending {} to {}.".format(name, bitstream_url)
Пример #44
    def resource_paths_without_config(self):
        List of absolute paths for all supported source resources
        found within the given path.
        path = self.source_directory_path

        for root, dirnames, filenames in scandir.walk(path):
            if is_hidden(root):

            # Ignore certain files in Mozilla repositories.
            if self.db_project.repository_url in MOZILLA_REPOS:
                filenames = [
                    f for f in filenames if not f.endswith("region.properties")

            for filename in filenames:
                if is_resource(filename):
                    yield os.path.join(root, filename)
def walk(fpath, **kwargs):
    ''' Traverse thru a directory tree.

    :param fpath: `int` The root file path
    :param excludes: `list` optional directories to exclude
    :rtype: `generator`
    kwargs.setdefault('excludes', [])
    excludes = kwargs.get('excludes')
    # transform glob patterns to regular expressions
    excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
    for root, dirs, files in scandir.walk(fpath):
        # exclude dirs
        if excludes:
            dirs[:] = [os.path.join(root, d) for d in dirs]
            dirs[:] = [d for d in dirs if not re.match(excludes, d)]
        for name in files:
            fullpath = os.path.join(root, name)
            if os.path.isfile(fullpath):
                yield fullpath
Пример #46
def get_en_US_xpi_file_to_import(subdir):
    """Return an en-US.xpi file object ready to be imported.

    The file is generated from utilities/tests/firefox-data/<subdir>.
    # en-US.xpi file is a ZIP file which contains embedded JAR file (which is
    # also a ZIP file) and a couple of other files.  Embedded JAR file is
    # named 'en-US.jar' and contains translatable resources.

    # Get the root path where the data to generate .xpi file is stored.
    test_root = os.path.join(os.path.dirname(lp.translations.__file__),
                             'utilities/tests/firefox-data', subdir)

    # First create a en-US.jar file to be included in XPI file.
    jarfile = tempfile.TemporaryFile()
    jar = zipfile.ZipFile(jarfile, 'w')
    jarlist = []
    data_dir = os.path.join(test_root, 'en-US-jar/')
    for root, dirs, files in scandir.walk(data_dir):
        for name in files:
            relative_dir = root[len(data_dir):].strip('/')
            jarlist.append(os.path.join(relative_dir, name))
    for file_name in jarlist:
        f = open(os.path.join(data_dir, file_name), 'r')
        jar.writestr(file_name, f.read())

    # Add remaining bits and en-US.jar to en-US.xpi.

    xpifile = tempfile.TemporaryFile()
    xpi = zipfile.ZipFile(xpifile, 'w')
    for xpi_entry in scandir.scandir(test_root):
        if xpi_entry.name != 'en-US-jar':
            with open(xpi_entry.path) as f:
                xpi.writestr(xpi_entry.name, f.read())
    xpi.writestr('chrome/en-US.jar', jarfile.read())

    return xpifile
Пример #47
def test_suite():
    suite = unittest.TestSuite()

    # Find all the doctests in wadllib.
    packages = []
    for dirpath, dirnames, filenames in scandir.walk(topdir):
        if 'docs' in dirnames:
            docsdir = os.path.join(dirpath, 'docs')[len(topdir) + 1:]
    doctest_files = {}
    for docsdir in packages:
        for filename in os.listdir(os.path.join(topdir, docsdir)):
            if os.path.splitext(filename)[1] == '.txt':
                doctest_files[filename] = os.path.join(docsdir, filename)
    # Sort the tests.
    for filename in sorted(doctest_files):
        path = doctest_files[filename]
        doctest = LayeredDocFileSuite(path, package=wadllib)

    return suite
Пример #48
def find_fast(directory, expression):
    Finds all files in the given directory that match the given expression.

    @param directory    The directory.
    @param expressiion  The regular expression.
    logging.debug("Searching expression {0} in directory "
                  "{1}".format(expression, directory))

    matcher = re.compile(expression)
    files_found = []
    for root, dirs, files in scandir.walk(directory):
        for file_name in files:
            if matcher.match(file_name):
                path = os.path.join(root, file_name)
                path = os.path.abspath(path)

    return files_found
Пример #49
def find_filetype(dir, filetype):
    Will find all files of a certain type (e.g. .vcf or .bam files) in a directory.
    Method will enter every subdirectory. Can look for only a single filetype at a time.
    :param dir: String of directory to walk.
    :param filetype: String of filetype to search for (e.g. .vcf or .bam)
    :return: list of tuples of file name and file directory
    assert os.path.exists(dir), "Path {} does not exist.".format(dir)
    duplicates = 0
    # unique_files = dict([])
    unique_files = list(())
    for (dirpath, dirnames, files) in walk(dir):
        for name in files:
            if name.endswith(filetype):
                if name not in unique_files:
                    unique_files.append((name, os.path.join(dirpath, name)))
                    duplicates += 1

    return unique_files
Пример #50
    def get_batch_logs(self, logDir, umc_instanceids, files_in_buffer=[]):
        pattern = re.compile(".+_[0-9]+.*\.log.{log_file_group}$".format(
        search_re = logDir + "/[a-zA-Z0-9\._\-]+/([a-zA-Z0-9\-\._]+)"  # + "|".join(GlobalContext.config.umc_instanceids(False)) + ")$";

        batch = []
        cnt = 0
        for dirname, dirnames, filenames in walk(logDir):
            #Msg.info1_msg("walk: %s, filenames=%d"%(dirname,len(filenames)))
            m = re.match(search_re, dirname)
            if m and m.group(1) in umc_instanceids:
                for filename in filenames:
                    fullfname = os.path.join(dirname, filename)
                    if fullfname not in files_in_buffer and pattern.match(
                        cnt = cnt + 1
                        if cnt <= self.params.max_batchsize_files:
            if cnt > self.params.max_batchsize_files:
        return sorted(batch, key=lambda fn: os.stat(fn).st_mtime, reverse=True)
Пример #51
    def make(self, path):
        if self.topdirobj:
            raise Exception("This instance has already been used.")
        stat = os.stat(path)
        name = path.split('/')[-1]

        # setup progress bar
        items = sum([len(files) for r, d, files in walk(path)])
        self.prog = tqdm(desc='Building',
                         unit=' items',

        # build dirobjects
        self.topdirobj = self.__hashdir(name, path, stat)

        # close progress bar
        return self.topdirobj.gethash()
Пример #52
    def __find_package_location(self, package):
        Looks for the package location inside the analyzed repository.

        @param package  The package to be found.

        @return The full path to the package file.
        # FIXME: Currently YUM information about RPM location inside the given
        # repository is not accessible. That's why we mannualy search files in
        # the repository.

        package_name = _get_full_package_name(package)
        file_name = "{0}.rpm".format(package_name)

        # Check most probably paths to speed up the search.
        # This gives speed up from 10.760s to 0.711s of program run time.
        location = os.path.join(self.repository_path, self.arch, file_name)
        if os.path.exists(location):
            return location

        location = os.path.join(self.repository_path, "noarch", file_name)
        if os.path.exists(location):
            return location

        location = os.path.join(self.repository_path, file_name)
        if os.path.exists(location):
            return location

        location = None
        for root, dirs, files in scandir.walk(self.repository_path):
            for existing_file_name in files:
                if package_name in existing_file_name:
                    location = os.path.join(self.repository_path,

        if location is None:
            raise Exception("Failed to find package {0}!".format(package))
            return location
def getDirs(directory=os.path.curdir,
    currDepth = 0
    folderList = list()

    if absolute:
        directory = os.path.abspath(directory)

    for root, dirs, files in walk(directory, topdown=True):
        currDepth += 1
        for dir in dirs:
            if dir == ".zfs":
            folderList.append(os.path.join(root, dir))
        if depth is None:
        if currDepth >= depth:
    return folderList
Пример #54
def search(path, value):
    check = 0
    value = nocase(value)
    for item in scandir.walk(path):
            clist = []
            clist.extend(item[1]), clist.extend(item[2])
            for file in clist:
                if value in file.lower() or value in nocase(file):
                    print 'Found in: ', item[0]
                    if os.path.isfile(os.path.join(item[0], file)):
                        print 'File name: ', file
                    elif os.path.isdir(os.path.join(item[0], file)):
                        print 'Directory name: ', file
                    print '\n'
                    check = 1
        except UnicodeError:
            print 'Error in directory: ', item
    if check == 0:
        print 'Not Found'
Пример #55
def get_size_scandir(root_path):
    if not os.path.exists(root_path):
        return None

    if os.path.isfile(root_path):
        if os.path.islink(root_path):
            return 0
            return scandir.stat(root_path).st_size

    if os.path.isdir(root_path):
        total_size = 0

        for dirpath, dirnames, filenames in scandir.walk(root_path):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                # skip if it is symbolic link
                if not os.path.islink(fp):
                    # total_size += os.path.getsize(fp)
                    total_size += scandir.stat(fp).st_size
        return total_size
    return None
Пример #56
 def remove_duplicates(self, repository_path):
     rpms = dict()
     for root, dirs, files in scandir.walk(repository_path):
         for file in files:
             if file.endswith(".rpm"):
                 split = file.rsplit('.', 3)
                 path = os.path.join(root, file)
                 if split[0] in rpms:
                     path0 = rpms[split[0]]
                     logging.debug("Select between {0} and {1}".format(path, path0))
                     split0 = path0.rsplit('.', 3)
                     rm_path = ''
                     if split[1] > split0[1]:
                         rpms[split[0]] = path
                         rm_path = os.path.join(repository_path, path0)
                         rm_path = os.path.join(repository_path, path)
                     if os.path.exists(rm_path):
                         logging.debug("Removing {0}".format(rm_path))
                     rpms[split[0]] = path
Пример #57
def writeOF():

    root = "../../data/EchoNet-Dynamic/Videos"
    outpath = "../../data/EchoNet-Dynamic/flow"
    w = 112
    h = 112
    c = 0
    data = {}
    # df = pd.read_csv(o.join(root, 'FileList.csv'))

    for path, subdsirs, files in scandir.walk(root):
        for filename in files:
            count = ofp.writeOpticalFlow(path, outpath, filename, w, h, c)
            if count:
                data[filename] = count
            c += 1
            with open("./doc_vid/done.txt", "a") as myfile:
                myfile.write(filename + '-' + str(c) + '\n')

    with open('./doc_vid/frame_count.pickle', 'wb') as f:
        pickle.dump(data, f)
Пример #58
    def scan_archive_struct_stninfo(self, rootdir):

        # same as scan archive struct but looks for station info files
        self.archiveroot = rootdir

        stninfo = []
        path2stninfo = []
        for path, dirs, files in scandir.walk(rootdir):
            for file in files:
                if file.endswith(".info"):
                    # only add valid rinex compressed files
                    if file.endswith('DS_Store') or file[0:2] == '._':
                        # delete the stupid mac files
                            os.remove(os.path.join(path, file))
                        except Exception:

        return stninfo,path2stninfo
Пример #59
def findDuplicateSizes(dirPath):
    # walks through directory to find files and sizes

    fileSizes = defaultdict(list)

    for root, _, fileNames in walk(dirPath):
        for fileName in fileNames:
            filePath = os.path.join(root, fileName)

                fileSize = os.stat(filePath).st_size
            except OSError as e:
                print "Could not access file {0} \
                print e
                # file not accessible - move on

            # use file size as dictionary key to group files by size

    return fileSizes
Пример #60
def writeOF():

    root = "F:\Dataset/UCF-101"
    w = 224
    h = 224
    c = 0
    data = {}

    for path, subdsirs, files in scandir.walk(root):
        for filename in files:
            count = ofp.writeOpticalFlow(path, filename, w, h, c)
            if count:
                data[filename] = count
            c += 1
            with open("done.txt", "a") as myfile:
                myfile.write(filename + '-' + str(c) + '\n')

    with open(
            'wb') as f:
        pickle.dump(data, f)