def generate_index(self, path): """ generate index html page, list all files and dirs. """ if path: files = [item for item in scandir(path)] else: files = [item for item in scandir('.')] files = [(item.name + '/', 0) if item.is_dir() else (item.name, str(item.stat().st_size)) for item in files] html_template = """ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"><html> <title>Directory listing for /{{ path }}</title> <body> <h2>Directory listing for /{{ path }}</h2> <hr> <ul> {% for filename, size in files %} <li><a title="{{ size }}" href="{{ filename }}">{{ filename }}</a> {% end %} </ul> <hr> </body> </html> """ t = tornado.template.Template(html_template) return t.generate(files=files, path=path)
def findTrByID(self, projectId, testId): """ Find a test result according the test id (md5) test id = md5 """ ret = '' path_prj = "%s/%s/" % (self.testsPath, projectId) path_prj = os.path.normpath(path_prj) for entry in list(scandir.scandir(path_prj)): if entry.is_dir(follow_symlinks=False): for entry2 in list(scandir.scandir(entry.path)): fullPath = entry2.path relativePath = fullPath.split(self.testsPath)[1] # compute the md5 hash = hashlib.md5() hash.update(relativePath) if hash.hexdigest() == testId: ret = relativePath break return ret
def optimize(experiment_name): ds = DataSet(experiment_name) if len(ds.metaparts) > DataSet.MAX_FILES_PER_DIR: nesting = DataSet._get_optimal_nesting(len(ds.metaparts)) logging.info('Optimizing dataset by introducing nesting ' + str(nesting)) for olddir in tqdm(ds.partmap_names[1:]): digest = olddir.split('/')[-1][len('part_'):] partdir = 'input/%s' % experiment_name h_idx = 0 for nest in nesting: partdir += '/' + digest[h_idx:h_idx + nest] h_idx += nest try: os.makedirs(partdir) except: pass partdir += '/part_%s' % digest if olddir != partdir: shutil.move(olddir, partdir) logging.info('Removing empty directories') for entry in scandir('input/%s' % experiment_name): if entry.is_dir(): if len(scandir(entry.path)) == 0: shutil.rmtree(entry.path)
def _add_directory(self, dirname, check=False): if dirname not in self.directories: self.directories[dirname] = get_filesystem_info(dirname) if check: try: scandir.scandir(dirname) except Exception as e: self._handle_error(dirname, e, is_dir=True)
def populate(self, rootdir, data_thresh=0, verbose=False): """ Initialises a dataset descriptor by finding data in a given root directory. rootdir - root directory, used to populate dataset data_thresh - minimum examples required per class verbose - prints detailed initialisation information """ print("Finding image data paths and labels...") # Find all class folders class_folders = [ entry.path for entry in scandir.scandir(rootdir) if entry.is_dir() ] class_folders.sort() # Reset class counter self.data["length"] = 0 # Reset data classes self.data["classes"] = [] # Loop through class folders for class_folder in class_folders: # List of class image paths X = [] # List of class labels Y = [] # Find image files in each folder files = scandir.scandir(class_folder) # Create a new list of valid class images class_images = [ i.path for i in files if os.path.splitext(i.path)[1] in ['.jpg', '.jpeg'] ] # Ignore folders lacking data if len(class_images) >= data_thresh: # Count classes in dataset self.data["length"] += 1 # Add class paths and labels # The labels are the class folder name X = class_images Y = len(class_images) * [os.path.basename(class_folder)] self.data["classes"].append({"paths": X, "labels": Y}) if verbose: print("Using %d/%d classes." % (self.data["length"], len(class_folders))) print("Completed.")
def set_chapters(self, gallery_object, add_to_model=True): path = gallery_object.path chap_container = gallerydb.ChaptersContainer(gallery_object) metafile = utils.GMetafile() try: log_d('Listing dir...') con = scandir.scandir(path) # list all folders in gallery dir log_i('Gallery source is a directory') log_d('Sorting') chapters = sorted([ sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES) ]) #subfolders # if gallery has chapters divided into sub folders if len(chapters) != 0: log_d('Chapters divided in folders..') for ch in chapters: chap = chap_container.create_chapter() chap.title = utils.title_parser(ch)['title'] chap.path = os.path.join(path, ch) metafile.update(utils.GMetafile(chap.path)) chap.pages = len(list(scandir.scandir(chap.path))) else: #else assume that all images are in gallery folder chap = chap_container.create_chapter() chap.title = utils.title_parser( os.path.split(path)[1])['title'] chap.path = path metafile.update(utils.GMetafile(path)) chap.pages = len(list(scandir.scandir(path))) except NotADirectoryError: if path.endswith(utils.ARCHIVE_FILES): gallery_object.is_archive = 1 log_i("Gallery source is an archive") archive_g = sorted(utils.check_archive(path)) for g in archive_g: chap = chap_container.create_chapter() chap.path = g chap.in_archive = 1 metafile.update(utils.GMetafile(g, path)) arch = utils.ArchiveFile(path) chap.pages = len(arch.dir_contents(g)) arch.close() metafile.apply_gallery(gallery_object) if add_to_model: self.SERIES.emit([gallery_object]) log_d('Sent gallery to model')
def watch_directories_poll(watched_dirs, shutdown_event, callback, interval=settings.WATCH_DIRECTORY_INTERVAL): """ Watch the directories given via poll. This is a very inefficient way to handle watches, but it is compatible with all operating systems and filesystems. Accepts an iterable of workflow WatchedDir objects, a shutdown event, and a callback to be called when content appears in the watched dir. """ # paths that have already appeared in watch directories known_paths = set() while not shutdown_event.is_set(): current_paths = set() for watched_dir in watched_dirs: path = os.path.join(WATCHED_BASE_DIR, watched_dir.path.lstrip("/")) for item in scandir.scandir(path): if watched_dir.only_dirs and not item.is_dir(): continue elif item.path in known_paths: # Re-add to current entries, so we keep tracking it current_paths.add(item.path) continue current_paths.add(item.path) callback(item.path, watched_dir) # Update what we know about from the last pass, so that it doesn't grow # endlessly known_paths = current_paths time.sleep(interval)
def populate_userdir(fargs): predefined_locations = ['www', 'secure-www'] userdir, checkmodes = fargs locations = [] try: userdir = os.path.abspath(userdir) if not validate_directory(userdir, checkmodes): return locations public_html_location = userdir + '/public_html' if validate_directory(public_html_location, checkmodes): logging.debug('Appending to locations: %s', public_html_location) locations.append(public_html_location) sites_location = userdir + '/sites' if validate_directory(sites_location, checkmodes): for site in scandir.scandir(sites_location): site = site.name sitedir = sites_location + '/' + site if checkmodes: if not check_dir_execution_bit(sitedir): continue for predefined_directory in predefined_locations: sites_location_last = sitedir + '/' + predefined_directory if validate_directory(sites_location_last, checkmodes): logging.debug('Appending to locations: %s', sites_location_last) locations.append(sites_location_last) except Exception: logging.error(traceback.format_exc()) return locations
def yielding_checked_fnwalk(path, fn, sleep_interval=0.01): try: parent, name = os.path.split(path) entry = scandir.GenericDirEntry(parent, name) if fn(entry): yield entry queue = gevent.queue.LifoQueue() if entry.is_dir(): queue.put(path) while True: try: path = queue.get(timeout=0) except gevent.queue.Empty: break else: for entry in scandir.scandir(path): if fn(entry): if entry.is_dir(): queue.put(entry.path) yield entry gevent.sleep(sleep_interval) except Exception as e: logging.exception( 'Exception while directory walking: {}'.format(str(e)))
def scanwalk(self, path, followlinks=False): ''' lists of DirEntries instead of lists of strings ''' dirs, nondirs = [], [] try: for entry in scandir(path): # check if the file contains our pattern for s in self.search_str: if entry.name.lower().find(s) != -1: yield '%s' % entry.path # if directory, be recursive if entry.is_dir(follow_symlinks=followlinks): for res in self.scanwalk(entry.path): yield res # check inside the file to found our pattern else: if self.max_size > entry.stat( follow_symlinks=False).st_size: if entry.name.endswith(self.files_extensions): if self.check_content: for res in self.search_string(entry.path): try: res = res.encode('utf-8') yield '%s > %s' % (entry.path, res) except: pass # try / except used for permission denied except: pass
def dir_tree_info_pars(self, path, dirtrtable, monitor_types): """ Recursively traverses the filesystem, loads the dirtrtable tree object Return a dir_info dict with statistics from it's children adds a dirtrtable dir node if none exises and sets the node content to dir_info. Traverses filesystem using breath first method. Main algorithmic worker for StorageStats. """ if not dirtrtable.is_node_by_name(path): # if this dir has no dir node in dirtrtable make one if not dirtrtable.is_root_set(): # dirtrtable has only a uninitialized root node, root needs initialization dirtrtable.set_root_name(path, {}) # init the root node set to this the first root dir path else: parNodeId = dirtrtable.getnode_idByName(os.path.dirname(path)) dirtrtable.add_child(path, parNodeId, {}) dir_info = self.dir_totals_by_type(path, monitor_types) try: for entry in scandir(path): if entry.is_dir(follow_symlinks=False): temp_dir_info = self.dir_tree_info_pars(os.path.join(path, entry.name), dirtrtable, monitor_types) for each in dir_info: dir_info[each] += temp_dir_info[each] except Exception as e: logging.warn( e ) dirtrtable.up_date_node_by_name(path, dir_info) return dir_info
def imageHistograms(): images = [] for file in scandir.scandir('../dataSets/'): if file.is_file(): images.append(file.path) print images
def from_directory(directory, calculate_bounding_box=False, index_subdirs=True): ''' Loads a section from a directory without loading any images. If the directory does not seem to be a section or is not ready, return None. ''' if index_subdirs: fovs = [] for f in Util.listdir(directory): if f == 'AFASFailure': continue fov_path = os.path.join(directory, f) # if not os.path.isdir(fov_path): # # fovs always reside in directories # continue fov = FoV.from_directory(fov_path, calculate_bounding_box) if fov: fovs.append(fov) else: fovs = None # Read the LUTS file in the directory, if one exists # Should either be None or a mapping of a tile filename to its base64 luts string luts64_map = None if settings.LUTS_FILE_SUFFIX is not None: #section_dir_name = os.path.split(directory)[-1] #luts_fname = os.path.join(directory, '{}{}'.format(section_dir_name, settings.LUTS_FILE_SUFFIX)) luts_fname = '' # Assuming there is only a single file with that prefix, use it all_dir_files = scandir.scandir(directory) for entry in all_dir_files: if entry.name.endswith(settings.LUTS_FILE_SUFFIX): luts_fname = os.path.join(directory, entry.name) break if os.path.exists(luts_fname): # print "Using LUTS file: {}".format(luts_fname) data = None with open(luts_fname, 'r') as f: data = f.readlines() # Map between a file name and its luts base64 string luts64_map = {} for line in data: tile_full_name, b64_str = line.split('\t') tile_fname = tile_full_name.split('\\')[-1].lower( ) # Assuming Zeiss microscope system will always stay in windows b64_str = b64_str[: -2] # Remove \r\n from the end of the string luts64_map[tile_fname] = b64_str section = Section(directory, fovs, calculate_bounding_box, luts64_map) return section
def load_Images_From_Folder(path): image_list = [] for image in scandir(path): # conversion from pngs to jpgs because code was crashing on png im = Image.open(path + '/' + image.name) image_list.append(im) return image_list
def iter_files(root, exts=None, recursive=False): """ Iterate over file paths within root filtered by specified extensions. :param str root: Root folder to start collecting files :param iterable exts: Restrict results to given file extensions :param bool recursive: Wether to walk the complete directory tree :rtype collections.Iterable[str]: absolute file paths with given extensions """ if exts is not None: exts = set((x.lower() for x in exts)) def matches(e): return (exts is None) or (e in exts) if recursive is False: for entry in scandir(root): if has_scandir: ext = splitext(entry.name)[-1].lstrip('.').lower() if entry.is_file() and matches(ext): yield entry.path else: ext = splitext(entry)[-1].lstrip('.').lower() if not isdir(entry) and matches(ext): yield join(root, entry) else: for root, folders, files in walk(root): for f in files: ext = splitext(f)[-1].lstrip('.').lower() if matches(ext): yield join(root, f)
def build_test_suite(base_dir, special_tests={}, layer=None, setUp=setUp, tearDown=tearDown, pageTestsSetUp=None): """Build a test suite from a directory containing test files. The parent's 'stories' subdirectory will be checked for pagetests and the parent's 'doc' subdirectory will be checked for doctests. :param base_dir: The tests subdirectory that. :param special_tests: A dict mapping filenames to TestSuite objects. These files need special treatment (for instance, they should be run in a different layer, or they need custom setup/teardown). The given TestSuite object will be used for that file, rather than a new one generated. :param layer: The layer in which to run the tests. """ from lp.testing.layers import DatabaseFunctionalLayer from lp.testing.pages import ( PageTestSuite, setUpGlobs, ) if layer is None: layer = DatabaseFunctionalLayer if pageTestsSetUp is None: pageTestsSetUp = setUpGlobs suite = unittest.TestSuite() # Tests are run relative to the calling module, not this module. package = doctest._normalize_module(None) # Add the pagetests. stories_dir = os.path.join(os.path.pardir, 'stories') stories_path = os.path.join(base_dir, stories_dir) if os.path.exists(stories_path): suite.addTest(PageTestSuite(stories_dir, package, setUp=pageTestsSetUp)) for story_entry in scandir.scandir(stories_path): if not story_entry.is_dir(): continue story_path = os.path.join(stories_dir, story_entry.name) if story_path in special_tests: continue suite.addTest( PageTestSuite(story_path, package, setUp=pageTestsSetUp)) # Add the special doctests. for key, special_suite in sorted(special_tests.items()): suite.addTest(special_suite) tests_path = os.path.join(os.path.pardir, 'doc') suite.addTest( build_doctest_suite(base_dir, tests_path, special_tests, layer, setUp, tearDown, package)) return suite
def RemoveFolderAtPath(folderPath, deleteFilesInFolder): """Removes a folder at path Args: folderPath (str): os.path.iddir valid formatted path deleteFilesInFolder (bool): delete files in folder automatically or not Raises: OSError: OS Error if file doesn't exist or inaccessible ValueError: if folderPath not os.path.isdir valid formatted path TypeError: if folderPath not str or deleteFilesInFolder not bool """ if (isinstance(folderPath, str) and isinstance(deleteFilesInFolder, bool)): if (os.path.isdir(folderPath)): if (len(os.listdir(folderPath)) > 0 and deleteFilesInFolder): for file in scandir.scandir(folderPath): RemoveFileAtPath(file.path) try: os.rmdir(folderPath ) # Error is thrown by system if folder is not empty return None except OSError as inAccessiblefile: raise inAccessiblefile else: raise ValueError( "folderPath is not os.path.isdir valid. Got {}".format( folderPath)) else: raise TypeError("Expecting a str. Got {}".format(type(folderPath)))
def fnwalk(path, fn, shallow=False): """ Walk directory tree top-down until directories of desired length are found This generator function takes a ``path`` from which to begin the traversal, and a ``fn`` object that selects the paths to be returned. It calls ``os.listdir()`` recursively until either a full path is flagged by ``fn`` function as valid (by returning a truthy value) or ``os.listdir()`` fails with ``OSError``. This function has been added specifically to deal with large and deep directory trees, and it's therefore not advisable to convert the return values to lists and similar memory-intensive objects. The ``shallow`` flag is used to terminate further recursion on match. If ``shallow`` is ``False``, recursion continues even after a path is matched. For example, given a path ``/foo/bar/bar``, and a matcher that matches ``bar``, with ``shallow`` flag set to ``True``, only ``/foo/bar`` is matched. Otherwise, both ``/foo/bar`` and ``/foo/bar/bar`` are matched. """ if fn(path): yield path if shallow: return try: entries = scandir.scandir(path) except OSError: return for entry in entries: if entry.is_dir(): for child in fnwalk(entry.path, fn, shallow): yield child
def gf_listdir(path): if scandir_present: for entry in scandir.scandir(path): yield entry else: for name in os.listdir(path): yield SmallDirEntry(path, name, DT_UNKNOWN)
def count(path, recursive=False, delete=False): global counter for entry in scandir(path): if filter_name and not filter_name in os.path.basename(entry.path): continue counter += 1 if delete and entry.path.startswith(path): print "\nRemoving: " + entry.path, time.sleep(0.1) # Trying to ease the system load try: #os.remove(entry.path) print 'delete' except OSError as e: print " Error({0}): {1}".format(e.errno, e.strerror) if recursive and entry.is_dir(follow_symlinks=False): count(entry.path, recursive, delete) if sys.stdout.isatty(): if verbose: line = ('%s %s' % (str(counter).ljust(20), entry.path))[-width:].ljust(width) + '\r' else: line = ('%s %s' % (str(counter).ljust(20), path))[-width:].ljust(width) + '\r' print line, else: line = ('%s %s' % (str(i+counter).ljust(20), path))[-width:].ljust(width) + '\r' sys.stderr.write(line) sys.stderr.flush()
def main(argv=None): # find all the files in the directory for dir_entry in scandir.scandir(BASE1): if dir_entry.is_file: subdir, hardlink = s3_key_to_fullpath(dir_entry.name) linksource = os.path.join(BASE1, dir_entry.name) create_link(subdir, linksource, hardlink)
def GenerateDictionaryFBXAndHierarchyFromFolder(directoryPath): """Iterate over files @ directoryPath and return Dict of filename:List(Fbx hierarchy) Args: directoryPath (string): An os.path.isdir valid formatted path to a direcotry with .fbx files Raises: TypeError: if the directoryPath != str ValueError: if directoryPath not a valid direcotry formatted path str Returns: dict(): fbx filename: List(File Hierarchy} """ if(isinstance(directoryPath,str)): if(os.path.isdir(directoryPath)): outputDictionary = dict() count = 0 totalFiles = len(os.listdir(directoryPath)) ProgressBarUpdate(count,totalFiles,"Generating Dictionary from FBX's Hierarchy From Folder") for entry in scandir.scandir(directoryPath): if (entry.path.endswith(".fbx")): fbxName = entry.name.replace(".fbx","") outputDictionary[fbxName] = GetHierarchyListFromFBXFile(entry.path) count += 1 ProgressBarUpdate(count,totalFiles,"Generating Dictionary from FBX's Hierarchy From Folder") return outputDictionary else: raise(ValueError("directoryPath needs to be directory formatted str. Got {}".format(directoryPath))) else: raise(TypeError("expecting a str for directoryPath got {}".format(type(directoryPath))))
def fts_scandir(path, *, logical=False, nochdir=False, nostat=False, seedot=False, xdev=False, _level=0): for entry in scandir.scandir(path): direntry = ScandirDirEntry(entry) direntry.error = None direntry.level = _level direntry.postorder = False direntry.skip = False if not nostat: try: direntry.lstat() except OSError as e: direntry.error = e if direntry.is_dir(): yield direntry if not direntry.skip: yield from fts_scandir(os.path.join(path, direntry.name), logical=logical, nochdir=nochdir, nostat=nostat, seedot=seedot, xdev=xdev, _level=_level+1) direntry = copy.copy(direntry) direntry.postorder = True yield direntry else: yield direntry
def dir_totals_by_type(self, path, monitor_types): """ Returns a dictionary with a keys set to each monitor types The value is the long total size in bytes. Fof each monitor file type an additional key is produced "types +'_Cn'" for file count value set to integer file count for that type. all files not falling under monitor_types are summarized in the default category 'other' """ dir_info = {} other, other_Cn = 0, 0 for k in monitor_types: dir_info[k], dir_info[k+'_Cn'] = 0, 0 try: dir_entry_list = scandir(path) for entry in dir_entry_list: if not entry.is_dir(follow_symlinks=False): this_type = entry.name.split('.')[-1] if this_type in monitor_types: dir_info[this_type] += entry.stat(follow_symlinks=False).st_size dir_info[this_type + '_Cn'] += 1 else: other += entry.stat(follow_symlinks=False).st_size other_Cn += 1 except Exception as e: logging.warn( e ) dir_info['other'], dir_info['other_Cn'] = other, other_Cn return dir_info
def populate_predefined(self, startdir, checkmodes): if not isinstance(startdir, str): logging.debug( 'populate_predefined: value startdir not a string. "%s" with type %s' % (startdir, type(startdir))) sys.exit( 'populate_predefined: value startdir not a string. "%s" with type %s' % (startdir, type(startdir))) try: logging.debug('Populating predefined directories: %s', startdir) starttime = time.time() p = Pool() dirs = (startdir + '/' + d.name for d in scandir.scandir(startdir)) udirs = p.imap_unordered(populate_userdir, \ ((d, checkmodes) for d in dirs), \ chunksize=200) p.close() p.join() locations = [item for sublist in udirs for item in sublist] logging.info('Total amount of locations: %s, time elapsed: %.4f', \ len(locations), time.time() - starttime) self.populate(locations, checkmodes) except Exception: logging.error(traceback.format_exc())
def main(): parser = argparse.ArgumentParser() parser.add_argument('--images_dir', default="/media/sdf/IMAGENET/ILSVRC12/train") parser.add_argument( '--out_dir', default= "/media/sdf/AI-Challenger/faces-NonFaces/background/train-set/extra_ImageNet_train" ) args = parser.parse_args() count = 0 for folder in next(os.walk(args.images_dir))[1]: # return folder if count > 775000: break images_path = os.path.join(args.images_dir, folder) for files in scandir(images_path): if files.is_file() and files.name.endswith( '.JPEG'): # ImageNet is in *.JPEG format image_name = os.path.join(images_path, files.name) print("processing: ", image_name) img_base_name = os.path.splitext(files.name)[0] image = cv2.imread(image_name) box = generate_neg_samples(image) if box == None: continue crop = image[box[1]:box[3], box[0]:box[2]] resized = cv2.resize(crop, (224, 224)) name = args.out_dir + '/' + img_base_name + '.jpg' out = cv2.imwrite(name, resized) count += 1
def clear_folder(self): """ Deletes the path (if local) or unlinks all keys in the bucket folder (if S3) .. warning:: This is a destructive function, use with caution! Usage:: from pewtils.io import FileHandler >>> h = FileHandler("./", use_s3=False) >>> len(list(h.iterate_path())) 3 >>> h.clear_folder() >>> len(list(h.iterate_path())) 0 """ if self.use_s3: for key in self.s3.list(prefix=self.path): key.delete() else: for f in scandir(self.path): os.unlink(os.path.join(self.path, f.name))
def find_newest_file(files_path): """Get the filepath of the mostrecently modified/created file, of files in a directory. Also returns a list of the older files. example: new, old_list = find_newest_file('C:\directory') print(new) print(old_list) >C:\today.txt >['C:\yesterday.txt', 'C:\last_week.txt'] """ import scandir print('checking files') # dummy data to start latest_file = ['', system.date.parse("1970-01-01 00:00:00")] # find the newest file, delete the others delete_list = [] # for each file in the directory for inv_file in scandir.scandir(files_path): # when was this file modified file_tstamp = get_mod_time(inv_file) # if it's the newest found, save it if latest_file[1] < file_tstamp: print('newer file', inv_file.path) delete_list.append(latest_file[0]) latest_file = [inv_file.path, file_tstamp] else: delete_list.append(inv_file.path) return latest_file, delete_list
def get_files_and_dirs(path): """Return all files and directories at a given path""" if os.path.isdir(path): return scandir(path) return []
def process_dir(self, path, st): """ i_dir should be absolute path st is the stat object associated with the directory """ last_report = MPI.Wtime() count = 0 try: with timeout(seconds=30): entries = scandir(path) except OSError as e: log.warn(e, extra=self.d) self.skipped += 1 except TimeoutError as e: log.error("%s when scandir() on %s" % (e, path), extra=self.d) self.skipped += 1 else: for entry in entries: if entry.is_symlink(): self.sym_links += 1 elif entry.is_file(): self.circle.enq(entry.path) else: self.circle.preq(entry.path) count += 1 if (MPI.Wtime() - last_report) > self.interval: print("Rank %s : Scanning [%s] at %s" % (self.circle.rank, path, count)) last_report = MPI.Wtime() log.info("Finish scan of [%s], count=%s" % (path, count), extra=self.d) if count > self.maxfiles: self.maxfiles = count self.maxfiles_dir = path
def imageHistograms(): global images for file in scandir.scandir('./dataSet/'): if file.is_file(): images.append(file.path) print "Number of Images = ", len(images) # convert each image into Grey image greyImages = [] imgArray = [] for img_file in images: print "processing image = ", img_file img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) imgHist = cv2.calcHist([img], [0], None, [256], [0, 256]) cv2.normalize(imgHist, imgHist, 0, 255, cv2.NORM_MINMAX, -1) #imgHist = np.int32(np.around(imgHist)) imgHist = imgHist.transpose() print "Image = ", img_file, "Len = ", len( imgHist[0]), "Graylevel Histogram = ", imgHist[0] imgArray.append(imgHist[0]) return imgArray
def getDirFiles(mainDir, acceptedExtensions): metaData = [] # The returned metadata [path,mtime,ctime,fileSize] # Check first to see that the folder exists, return nothing otherwise if not os.path.isdir(mainDir): return metaData # The directories yet to be scanned toScanDirs = [mainDir] # print('Scanning archive directory for changes...') while len(toScanDirs) != 0: toAddToScan = [] # Directories to be found in this iteration for aDir in toScanDirs: for entry in scandir(aDir): # If this is a directory, scan it later if entry.is_dir(): toAddToScan.append(entry.path) continue # Add to the file metadata list if is an accepted file type if entry.name.split('.')[-1] in acceptedExtensions: aStat = entry.stat() metaData.append([ entry.path, aStat.st_mtime, aStat.st_ctime, aStat.st_size ]) # Add all of the new directories which were found toScanDirs = toAddToScan # print('...scan complete') return metaData
def iter_folders(self): """Return a generator of folder names.""" for entry in scandir.scandir(self._path): entry = entry.name if len(entry) > 1 and entry[0] == '.' and \ os.path.isdir(os.path.join(self._path, entry)): yield entry[1:]
def scandirwalk_worker(): dirs = [] nondirs = [] while True: path = q_paths.get() try: q_paths_in_progress.put(path) for entry in scandir(path): if entry.is_dir(follow_symlinks=False) and not dir_excluded( entry.path): dirs.append(entry.name) elif entry.is_file(follow_symlinks=False): nondirs.append(entry.name) q_paths_results.put((path, dirs[:], nondirs[:])) except (OSError, IOError) as e: print("OS/IO Exception caused by: %s" % e) pass except Exception as e: print("Exception caused by: %s" % e) pass finally: q_paths_in_progress.get() del dirs[:] del nondirs[:] q_paths.task_done()
def _refresh(self): """Update table of contents mapping.""" # If it has been less than two seconds since the last _refresh() call, # we have to unconditionally re-read the mailbox just in case it has # been modified, because os.path.mtime() has a 2 sec resolution in the # most common worst case (FAT) and a 1 sec resolution typically. This # results in a few unnecessary re-reads when _refresh() is called # multiple times in that interval, but once the clock ticks over, we # will only re-read as needed. Because the filesystem might be being # served by an independent system with its own clock, we record and # compare with the mtimes from the filesystem. Because the other # system's clock might be skewing relative to our clock, we add an # extra delta to our wait. The default is one tenth second, but is an # instance variable and so can be adjusted if dealing with a # particularly skewed or irregular system. if time.time() - self._last_read > 2 + self._skewfactor: refresh = False for subdir in self._toc_mtimes: mtime = os.path.getmtime(self._paths[subdir]) if mtime > self._toc_mtimes[subdir]: refresh = True self._toc_mtimes[subdir] = mtime if not refresh: return # Refresh toc self._toc = {} for subdir in self._toc_mtimes: path = self._paths[subdir] for entry in scandir.scandir(path): if entry.is_dir(): continue entry = entry.name uniq = entry.split(self.colon)[0] self._toc[uniq] = os.path.join(subdir, entry) self._last_read = time.time()
def load(cls, filepath): """ Loads a model from the filepath (string). You can pass '<your-path>/*' as filepath, in this case the latest model from that directory will be loaded """ if filepath.endswith('/*'): latest_entry = None latest_ctime = None for entry in scandir(filepath[:-2]): if entry.name.endswith('.dill'): if latest_ctime is None or entry.stat().st_ctime > latest_ctime: latest_ctime = entry.stat().st_ctime latest_entry = entry if latest_entry is None: raise Exception('No models found in %s' % filepath) filepath = latest_entry.path f = open(filepath, 'rb') result = dill.load(f) f.close() metapath = filepath.replace('.dill', '.json') if os.path.isfile(metapath): f = open(metapath, 'rb') result.meta_data = json.load(f) f.close() return result
def iterate_path(self): """ Iterates over the directory and returns a list of filenames or S3 object keys :return: Yields a list of filenames or S3 keys :rtype: iterable Usage:: from pewtils.io import FileHandler >>> h = FileHandler("./", use_s3=False) >>> for file in h.iterate_path(): print(file) file1.csv file2.pkl file3.json """ if self.use_s3: for key in self.s3.list(prefix=self.path): yield key else: for f in scandir(self.path): yield f.name
def find_f_img_archive(extract=True): zip = ArchiveFile(temp_p) if extract: gui_constants.NOTIF_BAR.add_text('Extracting...') t_p = os.path.join('temp', str(uuid.uuid4())) os.mkdir(t_p) if is_archive or chapterpath.endswith(ARCHIVE_FILES): if os.path.isdir(chapterpath): t_p = chapterpath elif chapterpath.endswith(ARCHIVE_FILES): zip2 = ArchiveFile(chapterpath) f_d = sorted(zip2.dir_list(True)) if f_d: f_d = f_d[0] t_p = zip2.extract(f_d, t_p) else: t_p = zip2.extract('', t_p) else: t_p = zip.extract(chapterpath, t_p) else: zip.extract_all(t_p) # Compatibility reasons.. TODO: REMOVE IN BETA filepath = os.path.join(t_p, [x for x in sorted([y.name for y in scandir.scandir(t_p)])\ if x.lower().endswith(IMG_FILES)][0]) # Find first page filepath = os.path.abspath(filepath) else: if is_archive: con = zip.dir_contents('') f_img = [x for x in sorted(con) if x.lower().endswith(IMG_FILES)] if not f_img: log_w('Extracting archive.. There are no images in the top-folder. ({})'.format(archive)) return find_f_img_archive() filepath = os.path.normpath(archive) else: raise ValueError("Unsupported gallery version") return filepath
def __init__(self, path=None, archive=''): self.metadata = { "title": '', "artist": '', "type": '', "tags": {}, "language": '', "pub_date": '', "link": '', } self.files = [] if path is None: return if archive: zip = ArchiveFile(archive) c = zip.dir_contents(path) for x in c: if x.endswith(app_constants.GALLERY_METAFILE_KEYWORDS): self.files.append(open(zip.extract(x))) else: for p in scandir.scandir(path): if p.name in app_constants.GALLERY_METAFILE_KEYWORDS: self.files.append(open(p.path)) if self.files: self.detect() else: log_i('No metafile found...')
def _scan_disk(self, on_disk, path): for entry in scandir.scandir(path): if not entry.name.startswith(".") and entry.is_dir(): self._scan_disk(on_disk, entry.path) elif entry.is_file(): on_disk[entry.path] = entry.stat().st_mtime return on_disk
def __init__(self, path=None, archive=''): self.metadata = { "title":'', "artist":'', "type":'', "tags":{}, "language":'', "pub_date":'', "link":'', "info":'', } self.files = [] if path is None: return if archive: zip = ArchiveFile(archive) c = zip.dir_contents(path) for x in c: if x.endswith(app_constants.GALLERY_METAFILE_KEYWORDS): self.files.append(open(zip.extract(x), encoding='utf-8')) else: for p in scandir.scandir(path): if p.name in app_constants.GALLERY_METAFILE_KEYWORDS: self.files.append(open(p.path, encoding='utf-8')) if self.files: self.detect() else: log_d('No metafile found...')
def scanwalk(self, path, followlinks=False): ''' lists of DirEntries instead of lists of strings ''' dirs, nondirs = [], [] try: for entry in scandir(path): # check if the file contains our pattern for s in self.search_str: if entry.name.lower().find(s) != -1: yield '%s' % entry.path # if directory, be recursive if entry.is_dir(follow_symlinks=followlinks): for res in self.scanwalk(entry.path): yield res # check inside the file to found our pattern else: if self.max_size > entry.stat(follow_symlinks=False).st_size: if entry.name.endswith(self.files_extensions): if self.check_content: for res in self.search_string(entry.path): try: res = res.encode('utf-8') yield '%s > %s' % (entry.path, res) except: pass # try / except used for permission denied except: pass
def gen_gallery_hashes(gallery): "Generates hashes for gallery's first chapter and inserts them to DB" if gallery.id: chap_id = ChapterDB.get_chapter_id(gallery.id, 0) try: if gallery.is_archive: raise NotADirectoryError chap_path = gallery.chapters[0] imgs = scandir.scandir(chap_path) # filter except NotADirectoryError: # HACK: Do not need to extract all.. can read bytes from acrhive!!! t_p = os.path.join(gui_constants.temp_dir, str(uuid.uuid4())) try: if gallery.is_archive: zip = ArchiveFile(gallery.path) chap_path = zip.extract(gallery.chapters[0], t_p) else: chap_path = t_p zip = ArchiveFile(gallery.chapters[0]) zip.extract_all(chap_path) except CreateArchiveFail: log_e('Could not generate hashes: CreateZipFail') return [] imgs = scandir.scandir(chap_path) except FileNotFoundError: return False # filter imgs = [x.path for x in imgs if x.name.lower().endswith(tuple(IMG_FILES))] hashes = [] for n, i in enumerate(sorted(imgs)): with open(i, 'rb') as img: hashes.append(generate_img_hash(img)) if gallery.id and chap_id: executing = [] for hash in hashes: executing.append(["""INSERT INTO hashes(hash, series_id, chapter_id, page) VALUES(?, ?, ?, ?)""", (hash, gallery.id, chap_id, n)]) CommandQueue.put(executing) c = ResultQueue.get() del c return hashes
def from_directory( directory, calculate_bounding_box=False, index_subdirs=True): ''' Loads a section from a directory without loading any images. If the directory does not seem to be a section or is not ready, return None. ''' if index_subdirs: fovs = [] for f in Util.listdir(directory): fov_path = os.path.join(directory, f) # if not os.path.isdir(fov_path): # # fovs always reside in directories # continue fov = FoV.from_directory(fov_path, calculate_bounding_box) if fov: fovs.append(fov) else: fovs = None # Read the LUTS file in the directory, if one exists # Should either be None or a mapping of a tile filename to its base64 luts string luts64_map = None if settings.LUTS_FILE_SUFFIX is not None: #section_dir_name = os.path.split(directory)[-1] #luts_fname = os.path.join(directory, '{}{}'.format(section_dir_name, settings.LUTS_FILE_SUFFIX)) luts_fname = '' # Assuming there is only a single file with that prefix, use it all_dir_files = scandir.scandir(directory) for entry in all_dir_files: if entry.name.endswith(settings.LUTS_FILE_SUFFIX): luts_fname = os.path.join(directory, entry.name) break if os.path.exists(luts_fname): # print "Using LUTS file: {}".format(luts_fname) data = None with open(luts_fname, 'r') as f: data = f.readlines() # Map between a file name and its luts base64 string luts64_map = {} for line in data: tile_full_name, b64_str = line.split('\t') tile_fname = tile_full_name.split('\\')[-1].lower() # Assuming Zeiss microscope system will always stay in windows b64_str = b64_str[:-2] # Remove \r\n from the end of the string luts64_map[tile_fname] = b64_str section = Section(directory, fovs, calculate_bounding_box, luts64_map) return section
def filewalk(root): """Discover and yield all files found in the specified `root` folder.""" for entry in scandir.scandir(root): if entry.is_dir(): for child in filewalk(entry.path): yield child else: yield entry.path
def clean(self): """Delete old files in "tmp".""" now = time.time() for entry in scandir.scandir(os.path.join(self._path, 'tmp')): entry = entry.name path = os.path.join(self._path, 'tmp', entry) if now - os.path.getatime(path) > 129600: # 60 * 60 * 36 os.remove(path)
def test_symlink(self): if not hasattr(os, 'symlink'): return entries = sorted(scandir.scandir(test_path), key=lambda e: e.name) self.assertEqual([(e.name, e.is_symlink()) for e in entries], [('file1.txt', False), ('file2.txt', False), ('link_to_dir', True), ('link_to_file', True), ('subdir', False)])
def set_chapters(self, gallery_object, add_to_model=True): path = gallery_object.path chap_container = gallerydb.ChaptersContainer(gallery_object) metafile = utils.GMetafile() try: log_d('Listing dir...') con = scandir.scandir(path) # list all folders in gallery dir log_i('Gallery source is a directory') log_d('Sorting') chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)]) #subfolders # if gallery has chapters divided into sub folders if len(chapters) != 0: log_d('Chapters divided in folders..') for ch in chapters: chap = chap_container.create_chapter() chap.title = utils.title_parser(ch)['title'] chap.path = os.path.join(path, ch) metafile.update(utils.GMetafile(chap.path)) chap.pages = len(list(scandir.scandir(chap.path))) else: #else assume that all images are in gallery folder chap = chap_container.create_chapter() chap.title = utils.title_parser(os.path.split(path)[1])['title'] chap.path = path metafile.update(utils.GMetafile(path)) chap.pages = len(list(scandir.scandir(path))) except NotADirectoryError: if path.endswith(utils.ARCHIVE_FILES): gallery_object.is_archive = 1 log_i("Gallery source is an archive") archive_g = sorted(utils.check_archive(path)) for g in archive_g: chap = chap_container.create_chapter() chap.path = g chap.in_archive = 1 metafile.update(utils.GMetafile(g, path)) arch = utils.ArchiveFile(path) chap.pages = len(arch.dir_contents(g)) arch.close() metafile.apply_gallery(gallery_object) if add_to_model: self.SERIES.emit([gallery_object]) log_d('Sent gallery to model')
def scantree(path): if not os.path.exists(path): print('Path not exists: {}'.format(path)) return for entry in scandir.scandir(path): if entry.is_dir(follow_symlinks=False): yield from scantree(entry.path) # see below for Python 2.x else: yield entry
def test_stat(self): entries = list(scandir.scandir(test_path)) for entry in entries: os_stat = os.lstat(os.path.join(test_path, entry.name)) scandir_stat = entry.lstat() self.assertEquals(os_stat.st_mode, scandir_stat.st_mode) self.assertEquals(int(os_stat.st_mtime), int(scandir_stat.st_mtime)) self.assertEquals(int(os_stat.st_ctime), int(scandir_stat.st_ctime)) self.assertEquals(os_stat.st_size, scandir_stat.st_size)
def flat_sort(root, out_folders, exclude=None): for entry in scan.scandir(root): if not entry.name.startswith(u'.') and entry.is_file(): filetype = get_filetype(entry.name) if filetype in out_folders: print "moving %s to %s" % (os.path.join(os.getcwd(), entry.name), os.path.join(root, out_folders[filetype]) ) shutil.move(os.path.join(os.getcwd(), entry.name), os.path.join(root, out_folders[filetype]))
def get_tree_size(path): """Return total size of files in given path and subdirs.""" total = 0 for entry in scandir.scandir(path): if entry.is_dir(follow_symlinks=False): total += get_tree_size(entry.path) else: total += entry.stat(follow_symlinks=False).st_size return total
def paths(self, root=None): root = root or self.root_directory for entry in scandir.scandir(root): if entry.is_dir(): for filename in self.paths(entry.path): yield filename elif self.regexp.match(os.path.join(root, entry.name)): self.count += 1 yield os.path.join(root, entry.name)
def _scantree_rec(path, include=[], files=[]): for entry in scandir(path): if entry.is_dir(follow_symlinks=False): _scantree_rec(entry.path, include, files) elif entry.is_file(): if os.path.splitext(entry.path)[1][1:] in include: files.append(entry.path) return files
def getNewFiles(self, path): files = [] for entry in scandir(path): if entry.is_file(): files.append('%s/%s' % (path, entry.name)) else: files.extend(self.getNewFiles('%s/%s' % (path, entry.name))) return files
def rm_py_along_so(prefix): """remove .py (.pyc) files alongside .so or .pyd files""" files = list(scandir(prefix)) for fn in files: if fn.is_file() and fn.name.endswith(('.so', '.pyd')): name, _ = os.path.splitext(fn.path) for ext in '.py', '.pyc', '.pyo': if name + ext in files: os.unlink(name + ext)
def find_image(self): if not self.content_path or not os.path.exists(self.content_path): return None for entry in scandir.scandir(self.content_path): extension = os.path.splitext(entry.name)[1].lower() if extension in self.IMAGE_EXTENSIONS: return entry.name return None