def copy(src_dic, dst, pattern, pretend): for dirname, subdirs, subfiles, dirfd in os.fwalk(dst): for fname in flt(subfiles, pattern): src_info = src_dic.get(fname) if not src_info: continue if not isinstance(src_info, dict): src_info = src_dic[fname] = hash_files(src_info) st = os.stat(fname, dir_fd=dirfd) if not stat.S_ISREG(st.st_mode): continue h = hash_file(fname, dir_fd=dirfd) src_file = src_info.get(h) if src_file: dst_path = os.path.join(dirname, fname) src_mtime_ns = src_file.mtime_ns print('{h} {mtime:%Y-%m-%d %H:%M:%S.%f} {src} => {dst}'.format( src=src_file.path, dst=dst_path, mtime=datetime.fromtimestamp(src_mtime_ns / 1.e9), h=binascii.hexlify(h).decode())) if not pretend: os.utime(fname, dir_fd=dirfd, ns=(src_mtime_ns, src_mtime_ns))
def clean_empty_temp_dirs(args): for dirpath, dirnames, filenames, dirfd in os.fwalk('.', topdown=False, dir_fd=args.temp_fd): if not dirnames and not filenames: try: os.rmdir(dirpath, dir_fd=args.temp_fd) except OSError: pass
def find_local_finished_files(args, remote_file_set): res = [] for dirpath, dirnames, filenames, dirfd in os.fwalk('.', dir_fd=args.temp_fd): for name in filenames: full = os.path.normpath(os.path.join(dirpath, name)) if full not in remote_file_set: res.append(full) return res
def purge_directory(directory, max_age): """ Remove all expired tiles in cache subdirectory. `directory` should be a relative directory under :attr:`poor.conf.CACHE_HOME_DIR`. `max_age` should be age in days, tiles older which to remove. """ if not directory: return basename = directory if not poor.CACHE_HOME_DIR: # This shouldn't happen, but just in case it does, # let's try to avoid a disaster. raise Exception("poor.CACHE_HOME_DIR not set") directory = os.path.join(poor.CACHE_HOME_DIR, directory) directory = os.path.realpath(directory) if not os.path.isdir(directory): return if not "poor-maps" in directory: # This shouldn't happen, but just in case it does, # let's try to avoid a disaster. raise Exception("Suspicious value for cache directory: {}" .format(repr(directory))) print("Purging cache >{:3.0f}d {:28s}..." .format(max_age, basename), end="") cutoff = time.time() - max_age * 86400 total = removed = 0 # Only follow symlinks for the directory itself, not its children # in order to simplify matters and do a safe bottomup walk. for root, dirs, files, rootfd in os.fwalk( directory, topdown=False, follow_symlinks=False): total += len(files) for name in files: if os.stat(name, dir_fd=rootfd).st_mtime < cutoff: with poor.util.silent(OSError): os.remove(name, dir_fd=rootfd) removed += 1 for name in dirs: with poor.util.silent(OSError): # Fails if the directory is not empty. # Fails if the directory is a symlink. os.rmdir(name, dir_fd=rootfd) # Release GIL to let other threads do something more important. time.sleep(0.000001) with poor.util.silent(OSError): # Fails if the directory is not empty. # Fails if the directory is a symlink. os.rmdir(directory) print(" {:6d} rm, {:6d} left." .format(removed, total - removed)) if removed > 0: # Make sure application doesn't try to use tiles that were allocated # before this purge, but whose files have now been removed. poor.app.tilecollection.clear_removed() pyotherside.send("queue-update")
def remove(self): def onerror(exc): raise exc for root, dirs, files, root_fd in fwalk(dir_fd = self.backupdir_fd, topdown = False, onerror = onerror): for name in files: unlink(name, dir_fd = root_fd) for name in dirs: rmdir(name, dir_fd = root_fd) rmdir(str(self.backupdir), dir_fd = self.host.hostdir_fd)
def should_build(self): try: destination_time = getmtime(self.destination) except FileNotFoundError: return True for root, dirs, files, rootfd in fwalk(self.src): for filename in files: file_path = join(root, filename) if destination_time < getmtime(file_path): return True return False
def init_src_dic(src, pattern): dic = {} for dirname, subdirs, subfiles, dirfd in os.fwalk(src): for skip in SKIP_DIRS: if skip in subdirs: subdirs.remove(skip) for fname in flt(subfiles, pattern): st = os.stat(fname, dir_fd=dirfd, follow_symlinks=False) if stat.S_ISREG(st.st_mode): fullname = os.path.join(dirname, fname) dic.setdefault(fname, []).append(fullname) return dic
def backup(self): backupdir = self.backupdir backupdir_fd = self.backupdir_fd flock(backupdir_fd, LOCK_EX|LOCK_NB) def onerror(exc): raise exc for root, dirs, files, root_fd in fwalk(dir_fd = backupdir_fd, topdown = False, onerror = onerror): for name in files: unlink(name, dir_fd = root_fd) for name in dirs: rmdir(name, dir_fd = root_fd) env = self.env config = self.config shares_info = {} info = dict(level = self.level, failed = False, shares = shares_info) with config.setenv(env): self.pre_command(fruitbak = self.fruitbak, host = self.host, newbackup = self) info['startTime'] = time_ns() for share_config in self.shares: combined_config = config.copy() combined_config.update(share_config) share = NewShare(config = combined_config, newbackup = self) shares_info[share.name] = share.backup() self.agent.sync() info['endTime'] = time_ns() self.post_command(fruitbak = self.fruitbak, host = self.host, newbackup = self) with open('info.json', 'w', opener = backupdir_fd.opener) as fp: dump_json(info, fp) hostdir_fd = self.host.hostdir_fd self.hashes_fp.close() Hashset.sortfile('hashes', self.fruitbak.hash_size, dir_fd = backupdir_fd) rename('new', str(self.index), src_dir_fd = hostdir_fd, dst_dir_fd = hostdir_fd) return info
def get_files(path=settings.COURSE_PATH): """ Find sub folders and files on the path """ if path[-1] != "/": path += "/" directories = defaultdict(dict) for root, folder, files, _ in os.fwalk(path): files = [file for file in files if not re.search("^\.", file)] root_without_path = root.replace(path, "").split("/") if root_without_path[0]: index = directories for level, under_level in enumerate(root_without_path, start=1): if under_level in index: index = index[under_level]['folders'] else: get_folder_file(files, under_level, index) elif files: folder = root.split("/")[-2] get_folder_file(files, folder, directories) return directories
def main(): home_path = os.path.expanduser('~') files_path = os.path.join(home_path, 'Documents') file_list = os.fwalk(files_path) counter = 0 for loc_f in file_list: # print() for i_file in loc_f[2]: counter += 1 file_path = os.path.join(loc_f[0], i_file) # print(file_path) file_hash = get_hash(file_path) # print(file_hash) to_dict(file_hash) pprint.pprint(let_dict) print('Overall count: ', counter) print('Std is: ', np.std([i for i in let_dict.values()]))
def copytree_owner(src, dest, userid): """ Copy the contents of a directory but ignore files not owned by `userid`. """ src = os.path.abspath(src) dest = os.path.abspath(dest) for root, dirs, files, rootfd in os.fwalk(src): assert root.startswith(src) local_dest = root[len(src) + 1:] local_dest = os.path.join(dest, local_dest) root_owner = os.fstat(rootfd).st_uid if root != src and root_owner != userid: logger.critical( "Found dir with invalid owner. %s should be " "owned by %s but is owned by %s. Can not write " "to dropbox", root, userid, root_owner) continue for file in files: def opener(f, flags): return os.open(f, flags, dir_fd=rootfd) with open(file, 'rb', opener=opener) as fsrc: owner = os.fstat(fsrc.fileno()).st_uid if userid is not None and owner != userid: logger.critical( "Found file with invalid owner. %s should " "be owned by %s but is owned by %s. Can " "not write to dropbox", os.path.join(root, file), userid, owner) continue with open(os.path.join(local_dest, file), 'wb') as fdst: shutil.copyfileobj(fsrc, fdst) for dir in dirs: os.mkdir(os.path.join(local_dest, dir), 0o700)
def deepscan_dir(dirname, crud_object, indexfile=None): if not indexfile: indexfile = os.path.join(dirname, '.index.db') index = shelve.open(indexfile) for root, dirs, files, dir_fd in os.fwalk(dirname): current_state = DirObject(files, dirs, dir_fd) if root not in index: previous_state = DirObject([], [], None) else: previous_state = index[root] # deleted files for name in previous_state.mtimed_files.keys( ) - current_state.mtimed_files.keys(): crud_object.delete(os.path.join(root, name)) # created files for name in current_state.mtimed_files.keys( ) - previous_state.mtimed_files.keys(): crud_object.create(os.path.join(root, name)) # updated files for name in current_state.mtimed_files: if name in previous_state.mtimed_files and \ previous_state.mtimed_files[name]!=current_state.mtimed_files[name]: crud_object.update(os.path.join(root, name)) # deleted dirs (TODO: recursively) roots_to_delete = [] for dirname in previous_state.dirs_set - current_state.dirs_set: prefix = dirname for root_dirname, dirobject in index.items(): if root_dirname.startswith(prefix): for name in dirobject.mtimed_files: crud_object.delete(os.path.join(root_dirname, name)) roots_to_delete.append(root_dirname) for root_dirname in roots_to_delete: del index[root_dirname] if root not in index or index[root] != current_state: index[root] = current_state index.close()
def copy_common_notebooks(staging_notebooks_dir): common_folders_files = [f for f in os.listdir('pynq/notebooks/')] for basename in common_folders_files: if basename != 'arch': dst_folder_file = os.path.join(staging_notebooks_dir, basename) src_folder_file = os.path.join('pynq/notebooks/', basename) if os.path.isdir(src_folder_file): copy_tree(src_folder_file, dst_folder_file) elif os.path.isfile(src_folder_file): copy_file(src_folder_file, dst_folder_file) if os.path.exists(os.path.join('pynq/notebooks/arch', CPU_ARCH)): dir_fd = os.open(os.path.join('pynq/notebooks/arch', CPU_ARCH), os.O_RDONLY) dirs = os.fwalk(dir_fd=dir_fd) for dir, _, files, _ in dirs: if not os.path.exists(os.path.join(staging_notebooks_dir, dir)): os.mkdir(os.path.join(staging_notebooks_dir, dir)) for f in files: copy_file( os.path.join('pynq/notebooks/arch', CPU_ARCH, dir, f), os.path.join(staging_notebooks_dir, dir, f)) os.close(dir_fd)
def compile_all(self): import time t_ini = time.time() with futures.ThreadPoolExecutor(max_workers=5) as tpe: for cur_dir, dirs, files, root_fd in os.fwalk(top=self.src_path): rel_path = os.path.relpath(cur_dir, self.src_path) compiled_dir_path = os.path.join(self.out_path, rel_path) mkdirs_called = False for file_i in files: if self.exclude_regex is not None and self.exclude_regex.match(file_i): continue # only create dirs and files if the compilation is successful and the files are not excluded if not mkdirs_called: os.makedirs(compiled_dir_path, exist_ok=True) mkdirs_called = True file_src_i = os.path.join(cur_dir, file_i) file_i_name, file_i_ext = os.path.splitext(file_i) compiled_out_path = os.path.normpath( os.path.join(compiled_dir_path, file_i_name + self.ext_replace) ) tpe.submit(self.compile_and_save, file_src_i, compiled_out_path) tpe.shutdown(wait=True) print(time.time()-t_ini)
def stat_directory(directory): """ Return file count and total size of cache subdirectory. `directory` should be a relative directory under :attr:`poor.conf.CACHE_HOME_DIR`. """ count = 0 bytes = 0 basename = directory directory = os.path.join(poor.CACHE_HOME_DIR, directory) directory = os.path.realpath(directory) if os.path.isdir(directory): for root, dirs, files, rootfd in os.fwalk(directory, follow_symlinks=False): count += len(files) bytes += sum(os.stat(x, dir_fd=rootfd).st_size for x in files) names = dict((x["pid"], x["name"]) for x in poor.util.get_tilesources()) name = names.get(basename, basename) return dict(directory=basename, name=name, count=count, bytes=bytes, size=poor.util.format_filesize(bytes))
def build_mlflow_model(homedir): from sklearn import datasets from sklearn.ensemble import RandomForestClassifier import mlflow import mlflow.sklearn from mlflow.models.signature import infer_signature import pandas as pd from mlflow.tracking._model_registry import fluent mlflow.set_tracking_uri('http://localhost:5000') with mlflow.start_run() as run: iris = datasets.load_iris() iris_train = pd.DataFrame(iris.data, columns=iris.feature_names) clf = RandomForestClassifier(max_depth=7, random_state=0) clf.fit(iris_train, iris.target) signature = infer_signature(iris_train, clf.predict(iris_train)) model_name = "iris_rf" mlflow.sklearn.log_model(clf, model_name, signature=signature, registered_model_name=model_name) logging.info('runs:', os.fwalk(homedir)) return fluent.MlflowClient().get_model_version_download_uri( name=model_name, version=1)
def stat_directory(directory): """ Return file count and total size of cache subdirectory. `directory` should be a relative directory under :attr:`poor.conf.CACHE_HOME_DIR`. """ count = 0 bytes = 0 basename = directory directory = os.path.join(poor.CACHE_HOME_DIR, directory) directory = os.path.realpath(directory) if os.path.isdir(directory): for root, dirs, files, rootfd in os.fwalk( directory, follow_symlinks=False): count += len(files) bytes += sum(os.stat(x, dir_fd=rootfd).st_size for x in files) names = dict((x["pid"], x["name"]) for x in poor.util.get_tilesources()) name = names.get(basename, basename) return dict(directory=basename, name=name, count=count, bytes=bytes, size=poor.util.format_filesize(bytes))
def clean_temp(self): for it in os.fwalk('/tmp'): if 'OVAL_NVD_' in it[0]: shutil.rmtree(it[0]) return
DELIMITER ';', NULL '', HEADER false, QUOTE '"', ENCODING 'LATIN-1' ); """ % (table_name, file_csv) get_ipython().magic('sql {copy}') # In[ ]: import codecs consulta_cand_dir = e2016_tse_home + "/consulta_cand_2016" for root, dirs, files, rootfd in os.fwalk(consulta_cand_dir, topdown=False): for filename in files: file_full_name = consulta_cand_dir + '/' + filename name = filename.split(".") if name[1] == "txt": fname = name[0] uf = fname[-2:] table_name = ('tse_' + uf + '.' + fname).lower() print(table_name, ' <-- ', file_full_name) create_table_consulta_cand(table_name) load_csv_into_table(table_name, file_full_name) # ## Importa arquivos prestacao_contas_final_2016 # In[ ]:
def walk(self): for root, dirs, files, rootfd in os.fwalk(self._topdir): self.get_directory_entries(root, rootfd, files)
import os for i in os.fwalk("."): print(i) for f in i[2]: print(f) for i in os.walk("."): print(i) process = os.popen("ls -la") print(process)
def find_function_files() -> List[str]: path, base, files, _ = list(os.fwalk())[0] server_funcs = [ i for i in files if i[:len('server_func')] == 'server_func' ] return server_funcs
def empty_the_dir(top): '''remove files and folders from the bottom of the tree upwards''' for _, dirs, files, rootfd in os.fwalk(top, topdown=False): _ = [os.remove(name, dir_fd=rootfd) for name in files] _ = [os.rmdir(name, dir_fd=rootfd) for name in dirs]
import sys, os, re if len(sys.argv) != 3: print("Usage: {} <directory> <suffix>".format(sys.argv[0])) exit(1) directory, suffix = sys.argv[1:3] regex = re.compile("\.{}$".format(suffix)) for root, dirs, files, rootfd in os.fwalk(directory): for filename in files: if regex.search(filename): print(os.path.abspath(os.path.join(root, filename)))
def _walk( self, directory: ty.Union[ty.AnyStr, int], directory_str: ty.Optional[ty.AnyStr], matcher: Matcher[ty.AnyStr], follow_symlinks: bool, intermediate_dirs: bool ) -> ty.Generator[FSNodeEntry, ty.Any, None]: sep = ( utils.maybe_fsencode(os.path.sep, directory_str) # type: ty.AnyStr if directory_str is not None else os.path.sep) dot = utils.maybe_fsencode(".", sep) # type: ty.AnyStr # Identify the leading portion of the `dirpath` returned by `os.walk` # that should be dropped if not isinstance(directory, int): while directory.endswith(sep): directory = directory[:-len(sep)] prefix = (directory if not isinstance(directory, int) else dot) + sep reported_directories = set() # type: ty.Set[ty.AnyStr] # Always report the top-level directory even if nothing therein is matched reported_directories.add(utils.maybe_fsencode("", sep)) yield FSNodeEntry(type=FSNodeType.DIRECTORY, path=prefix[:-len(sep)], relpath=dot, name=dot, parentfd=None) if not isinstance(directory, int): walk_iter = os.walk(directory, followlinks=follow_symlinks) else: walk_iter = os.fwalk(dot, dir_fd=directory, follow_symlinks=follow_symlinks) try: for result in walk_iter: dirpath, dirnames, filenames = result[0:3] dirfd = result[3] if len(result) > 3 else None # Remove the directory prefix from the received path _, _, dirpath = dirpath.partition(prefix) # Keep track of reported intermediaries, so that we only check for # these at most once per directory base intermediates_reported = False # type: bool for filename, is_dir in self._join_dirs_and_files( list(dirnames), filenames): filepath = os.path.join(dirpath, filename) # Check if matcher thinks we should descend into this directory if is_dir and not matcher.should_descend(filepath): dirnames.remove(filename) # Check if matcher thinks we should report this node if not matcher.should_report(filepath, is_dir=is_dir): continue # Ensure that all containing directories are reported # before reporting this node if not intermediates_reported and intermediate_dirs: parts = dirpath.split(sep) for end_offset in range(len(parts)): parent_dirpath = sep.join(parts[0:(end_offset + 1)]) if parent_dirpath not in reported_directories: reported_directories.add(parent_dirpath) yield FSNodeEntry(type=FSNodeType.DIRECTORY, path=(prefix + parent_dirpath), relpath=parent_dirpath, name=parts[end_offset], parentfd=None) intermediates_reported = True # Report the target file or directory if is_dir: reported_directories.add(filepath) yield FSNodeEntry(type=FSNodeType.DIRECTORY, path=(prefix + filepath), relpath=filepath, name=filename, parentfd=dirfd) else: yield FSNodeEntry(type=FSNodeType.FILE, path=(prefix + filepath), relpath=filepath, name=filename, parentfd=dirfd) finally: # Make sure the file descriptors bound by `os.fwalk` are freed on error walk_iter.close() # Close root file descriptor of `os.fwalk` as well if self._close_fd is not None: os.close(self._close_fd) self._close_fd = None
if __name__ == "__main__": parser = OptionParser() parser.add_option("-d", "--dir", dest="dirname", action="store", type="string", help="write report to FILE", default="/home/dev/githubClone/shadowsocks") (options, args) = parser.parse_args() import pdb pdb.set_trace() db = SQLiteWraper('root', 'root', '127.0.0.1', 'walkdir') dirwalk = os.fwalk(options.dirname) smbwalk = getsmbwalklist('192.168.1.1', 'Expansion_Drive(1)', 'admin', 'Ab860813', '/qzfs/yy/aido') # composewalk = chain(dirwalk,smbwalk); for root, dirs, files, rootfd in dirwalk: #print(root, dirs, files, rootfd) for f in files: appendfix = getappendfix(f) filehere = os.path.join(root, f) dbresult = db.select("select * from filedict where filedir = '" + filehere.replace("'", "_") + "'") if (len(dbresult) == 0): insertsql = "insert into filedict (filedir,filename,appendfix) values ('" + filehere.replace( "'", "_") + "','" + f.replace( "'", "_") + "','" + appendfix + "')" db.execute(insertsql)
os.symlink(src="src", dst="dst") # $ getAPathArgument="src" getAPathArgument="dst" os.truncate("path", 42) # $ getAPathArgument="path" os.truncate(path="path", length=42) # $ getAPathArgument="path" os.unlink("path") # $ getAPathArgument="path" os.unlink(path="path") # $ getAPathArgument="path" os.utime("path") # $ getAPathArgument="path" os.utime(path="path") # $ getAPathArgument="path" os.walk("top") # $ getAPathArgument="top" os.walk(top="top") # $ getAPathArgument="top" os.fwalk("top") # $ getAPathArgument="top" os.fwalk(top="top") # $ getAPathArgument="top" # Linux only os.getxattr("path", "attribute") # $ getAPathArgument="path" os.getxattr(path="path", attribute="attribute") # $ getAPathArgument="path" # Linux only os.listxattr("path") # $ getAPathArgument="path" os.listxattr(path="path") # $ getAPathArgument="path" # Linux only os.removexattr("path", "attribute") # $ getAPathArgument="path" os.removexattr(path="path", attribute="attribute") # $ getAPathArgument="path" # Linux only
def all_files(which_dir): for songs in os.fwalk(which_dir): return songs[2]
import os import random problems = [] for path, d, *files, _ in os.fwalk(): for file in files[0]: if file[-3:] == ".sh": problems.append(os.path.join(path, file)) random.shuffle(problems) print('\n'.join(problems[0:10]))
def __init__(self, root, sorton='name', filtercount=None, showerrors=True): assert sorton in ('name', 'size') rp = pathlib.Path(root).expanduser().resolve() basedepth = len(str(rp).split('/')) xgen = os.fwalk(str(rp), ) xgc = 0 print('in', rp) for dirpath, dirnames, filenames, dir_fd in xgen: pathsplit = dirpath.split('/') try: finfos = [(fn, os.stat(fn, dir_fd=dir_fd).st_size) for fn in filenames] except: finfos = [] for fn in filenames: try: finfos.append((fn, os.stat(fx, dir_fd=dir_fd).st_size)) except: if showerrors: print('oops at', fn, 'in', dirpath) filtcount = 0 if not filtercount is None: for fn in filenames: if fn.endswith(filtercount): filtcount += 1 if sorton == 'name': sortix = 0 sreverse = False elif sorton == 'size': sortix = 1 sreverse = True else: raise ValueError('I cannot sort on' + str(sorton)) finfos.sort(key=lambda x: x[sortix], reverse=sreverse) dirdict = { 'folds': {}, 'files': dict(finfos), 'path': dirpath, 'name': pathsplit[-1] } pcount = len(pathsplit) - basedepth if pcount == 0: self.data = dirdict plist = [dirdict] elif pcount == len(plist): plist[-1]['folds'][pathsplit[-1]] = dirdict plist.append(dirdict) elif pcount > len(plist): print('NEVERRRRRRRRRRR') elif pcount < len(plist): while pcount < len(plist): finished = plist.pop(-1) finished['size'] = sum(finished['files'].values()) + sum( f['size'] for f in finished['folds'].values()) finished['filtc'] = len(finished['files']) + sum( f['filtc'] for f in finished['folds'].values()) if len(finished['folds']) > 0: sx = list(finished['folds'].items()) sx.sort(key=lambda kv: kv[1][sorton], reverse=sreverse) finished['folds'] = dict(sx) plist[-1]['folds'][pathsplit[-1]] = dirdict plist.append(dirdict) else: print('too steep?') xgc += 1 while len(plist) > 0: finished = plist.pop(-1) finished['size'] = sum(finished['files'].values()) + sum( f['size'] for f in finished['folds'].values()) if len(finished['folds']) > 0: sx = list(finished['folds'].items()) sx.sort(key=lambda kv: kv[1][sorton], reverse=sreverse) finished['folds'] = dict(sx) print(sizestr(self.data['size']))
import os for root, dirs, files, rootfd in os.fwalk('/usr/lib/python3.6'): print(root, "consumes", end=" ") print(sum([os.stat(name, dir_fd=rootfd).st_size for name in files]), end="") print("bytes in", len(files), "non-directory files") if 'CVS' in dirs: dirs.remove('CVS') #don't visit CVS directories
import os for root, dirs, files, rootfd in os.fwalk( '/home/senthil/hg/cpython/Lib/email'): print(root, "consumes ", end="") print(sum([os.fstatat(rootfd, name).st_size for name in files]), end="") print("bytes in", len(files), "non-directory files") if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories
import os for root, dirs, files, rootfd in os.fwalk('/home/albert/data'): size = sum([os.stat(name, dir_fd=rootfd).st_size for name in files]) print("{} consumes {} bytes in {} non-directory files".format( root, size, len(files)))
if l[0] != '[': continue bracket_index = l.index(']:') if bracket_index < 0: continue key = l[1:bracket_index].lower() # TODO : Log key value found in DEBUG mode text = l[bracket_index + 2:].strip() refs[key] = [path, lineno, text, 0] # TODO : Log number of key / values retrieved in DEBUG mode except IOError: sys.exit('Error opening ' + filepath) import pprint for dirpath, subdirs, subfiles, dir_fd in os.fwalk(skc_base_path, '_data'): for filename in subfiles: if not filename.endswith('.md'): continue with(os.fdopen(os.open(filename, os.O_RDONLY, dir_fd=dir_fd))) as mdfile: for l in mdfile: for match in RE_MDLINK.finditer(l): key, value = match.group(2, 1) if not key: key = value key = key.lower() if key.startswith('/'): pass elif key in skc_refs: # Increment reference count skc_refs[key][3] += 1
def main(args=None): import argparse parser = argparse.ArgumentParser( description='Surviving Mars music shuffler script.' ' Produces radio-like playlist with radio blurbs/talks interspersed with music.' ' Source files can be extracted from hpk files with https://github.com/nickelc/hpk') parser.add_argument('src_dir', help='Source path with mp3/ogg/opus music files.' ' These should be organized as "station/file.ogg",' ' with filenames preserved from hpk, except for filename' ' extensions (can be mp3, ogg or original opus).') parser.add_argument('dst_pls', help='Destination pls file to produce.') parser.add_argument('--chance-pre', type=float, default=0.8, metavar='float', help='Chance of adding blurb/talk before music track (range: 0 - 1.0). Default: %(default)s') parser.add_argument('--chance-talk', type=float, default=0.7, metavar='float', help='Chance of adding talk segment instead' ' of blurb before music track (if any, range: 0 - 1.0). Default: %(default)s') parser.add_argument('--chance-ad', type=float, default=0.5, metavar='float', help='Chance of adding commercial segment before' ' any blurb/talk/music track combo (range: 0 - 1.0). Default: %(default)s') opts = parser.parse_args(sys.argv[1:] if args is None else args) ### Find tracks src = pl.Path(opts.src_dir) src_lists = dict() src_t_res = dict( blurb=re.compile(r'^Blurb_'), talk=re.compile(r'^Talks_'), ad=re.compile('^Commercials_'), music=re.compile('.') ) src_rp = str(src.resolve()) for root, dirs, files, dir_fd in os.fwalk(src, follow_symlinks=True): root = pl.Path(root) st = str(root.resolve()) if st.startswith(src_rp): st = st[len(src_rp)+1:] if st: st = st.rsplit('/', 1)[-1] for p in files: if not re.search(r'(?i)\.(ogg|oga|mp3|opus)$', p): continue track = re.sub(r'^Radio_[^_]+_', '', p.rsplit('.', 1)[0]) for t, rx in src_t_res.items(): if not rx.search(track): continue if st not in src_lists: src_lists[st] = adict() if t not in src_lists[st]: src_lists[st][t] = adict() src_lists[st][t][track] = root / p break else: raise RuntimeError(f'Failed to detect track type: {track} [{root} / {p}]') ### Assemble playlist pls = list() if '' in src_lists: tracks = src_lists.pop('').music.values() random.shuffle(tracks) pls.extend(tracks) # Weighted random is used so that longest track-list won't end up in the tail src_weights = adict((k, len(v.music)) for k,v in src_lists.items()) while src_weights: t, = random.choices(list(src_weights.keys()), src_weights.values()) src_list = src_lists[t] if not src_list.music: src_weights.pop(t) continue if random.random() < opts.chance_ad and src_list.get('ad'): k = random.choice(list(src_list.ad)) p = src_list.ad.pop(k) pls.append(p) if random.random() < opts.chance_pre: k = 'blurb' if random.random() > opts.chance_talk else 'talk' files = src_list.get(k) if not files: files = src_list.get(next(iter({'blurb', 'talk'}.difference([k])))) if files: k = random.choice(list(files)) p = files.pop(k) pls.append(p) k = random.choice(list(src_list.music)) p = src_list.music.pop(k) pls.append(p) ### Write playlist pl.Path(opts.dst_pls).write_text(''.join(f'{p}\n' for p in pls))
steamworks_present = False for entry in line_list: if entry.startswith( "[OnlineSubsystemSteamworks.KFWorkshopSteamworks]"): steamworks_present = True entry = entry + "ServerSubscribedWorkshopItems=" + id + "\n" file.write(entry) if not steamworks_present: entry = "[OnlineSubsystemSteamworks.KFWorkshopSteamworks]\n" + "ServerSubscribedWorkshopItems=" + id + "\n" file.write(entry) # Call startstop subprocess.call("./startstop.sh") # Grab file name for root, dirs, files, rootfd in os.fwalk('KFGame/Cache/' + id): filelist = files map = filelist.pop()[:-4] # Data Store Prototype with open('KFGame/Config/LinuxServer-KFGame.ini', 'r+') as file: line_list = [] for line in file: line_list.append(line) file.seek(0) for entry in line_list: if entry.startswith("[KF-Default KFMapSummary]"): entry = map + " KFMapSummary]\nMapName=" + map + "\nMapAssociation=0\nScreenshotPathName=UI_MapPreview_TEX.UI_MapPreview_Placeholder\n\n" + entry file.write(entry) # #Add map to map cycle
def _body(self): """Streams the contents of the selected directory as binary chunks.""" def match_short_path(short_path): # Remove initial path component so that all files are based in # the target directory itself (not one level above) if os.path.sep in short_path: path = short_path.split(os.path.sep, 1)[1] else: return False # Convert all path seperators to POSIX style path = path.replace(os.path.sep, '/') # Do the matching and the simplified path for pattern in self.patterns: if pattern.match(path): return True return False visited_directories = set() # Normalize directory path without destroying symlinks sep = os.path.sep directory = self.directory if not isinstance(self.directory, int): directory = os.fspath(directory) if hasattr(os, "fspath") else directory if isinstance(directory, six.text_type) and not isinstance(sep, six.text_type): #PY2 import sys sep = sep.decode(sys.getfilesystemencoding()) elif isinstance(directory, six.binary_type) and not isinstance(sep, six.binary_type): #PY3 noqa sep = os.fsencode(sep) while sep * 2 in directory: directory.replace(sep * 2, sep) if directory.endswith(sep): directory = directory[:-len(sep)] # Determine base directory name to send to IPFS (required and also used # as part of the wrap_with_directory feature) if self.dirname: dirname = self.dirname elif not isinstance(directory, int): dirname = os.path.basename(directory) dirname = dirname if isinstance(dirname, str) else os.fsdecode(dirname) else: dirname = "_" assert type(directory) == type(dirname) or isinstance(directory, int) # Identify the unnecessary portion of the relative path truncate = (directory if not isinstance(directory, int) else ".") + sep # Traverse the filesystem downward from the target directory's uri # Errors: `os.walk()` will simply return an empty generator if the # target directory does not exist. wildcard_directories = set() if not isinstance(self.directory, int): walk_iter = os.walk(self.directory) else: walk_iter = os.fwalk(dir_fd=self.directory) for result in walk_iter: cur_dir, filenames = result[0], result[2] dir_fd = -1 if not isinstance(self.directory, int) else result[3] # find the path relative to the directory being added if len(truncate) > 0: _, _, short_path = cur_dir.partition(truncate) else: short_path = cur_dir # remove leading / or \ if it is present if short_path.startswith(os.path.sep): short_path = short_path[len(os.path.sep):] short_path = os.path.join(dirname, short_path) if short_path else dirname wildcard_directory = False if os.path.split(short_path)[0] in wildcard_directories: # Parent directory has matched a pattern, all sub-nodes should # be added too wildcard_directories.add(short_path) wildcard_directory = True else: # Check if directory path matches one of the patterns if match_short_path(short_path): # Directory matched pattern and it should therefor # be added along with all of its contents wildcard_directories.add(short_path) wildcard_directory = True # Always add directories within wildcard directories - even if they # are empty if wildcard_directory: #PY2: Use `yield from` instead for chunk in self._body_directory(short_path, visited_directories): yield chunk # Iterate across the files in the current directory for filename in filenames: # Find the filename relative to the directory being added short_file_path = os.path.join(short_path, filename) if dir_fd < 0: file_location = os.path.join(cur_dir, filename) else: file_location = filename if wildcard_directory: # Always add files in wildcard directories #PY2: Use `yield from` instead for chunk in self._body_file(short_file_path, file_location, dir_fd=dir_fd): yield chunk else: # Add file (and all missing intermediary directories) # if it matches one of the patterns if match_short_path(short_file_path): #PY2: Use `yield from` instead for chunk in self._body_directory(short_path, visited_directories): yield chunk for chunk in self._body_file(short_file_path, file_location, dir_fd=dir_fd): yield chunk #PY2: Use `yield from` instead for chunk in self._gen_end(): yield chunk
if l[0] != '[': continue bracket_index = l.index(']:') if bracket_index < 0: continue key = l[1:bracket_index].lower() # TODO : Log key value found in DEBUG mode text = l[bracket_index + 2:].strip() refs[key] = [path, lineno, text, 0] # TODO : Log number of key / values retrieved in DEBUG mode except IOError: sys.exit('Error opening ' + filepath) import pprint for dirpath, subdirs, subfiles, dir_fd in os.fwalk(sky_base_path, 'content'): for filename in subfiles: if not filename.endswith('.md'): continue with (os.fdopen(os.open(filename, os.O_RDONLY, dir_fd=dir_fd))) as mdfile: for l in mdfile: for match in RE_MDLINK.finditer(l): key, value = match.group(2, 1) if not key: key = value key = key.lower() if key.startswith('/'): pass elif key in sky_refs: # Increment reference count
def calculate_size(files): sum_ = 0 for root, dirs, files, rootfd in os.fwalk(files): sum_ += sum([os.stat(name, dir_fd=rootfd).st_size for name in files]) return sum_
def _body(self): """Streams the contents of the selected directory as binary chunks.""" def match_short_path(short_path): # Remove initial path component so that all files are based in # the target directory itself (not one level above) if os.path.sep in short_path: path = short_path.split(os.path.sep, 1)[1] else: return False # Convert all path seperators to POSIX style path = path.replace(os.path.sep, '/') # Do the matching and the simplified path for pattern in self.patterns: if pattern.match(path): return True return False visited_directories = set() # Normalize directory path without destroying symlinks sep = os.path.sep directory = self.directory if not isinstance(self.directory, int): directory = os.fspath(directory) if hasattr(os, "fspath") else directory if not isinstance(directory, str): sep = os.fsencode(sep) while sep * 2 in directory: directory.replace(sep * 2, sep) if directory.endswith(sep): directory = directory[:-len(sep)] # Determine base directory name to send to IPFS (required and also used # as part of the wrap_with_directory feature) if self.dirname: dirname = self.dirname elif not isinstance(directory, int): dirname = os.path.basename(directory) dirname = dirname if isinstance(dirname, str) else os.fsdecode(dirname) else: dirname = "_" if isinstance(directory, (str, int)) else os.fsencode("_") assert(type(directory) == type(dirname) or isinstance(directory, int)) # Identify the unnecessary portion of the relative path truncate = (directory if not isinstance(directory, int) else ".") + sep # Traverse the filesystem downward from the target directory's uri # Errors: `os.walk()` will simply return an empty generator if the # target directory does not exist. wildcard_directories = set() if not isinstance(self.directory, int): walk_iter = os.walk(self.directory) else: walk_iter = os.fwalk(dir_fd=self.directory) for result in walk_iter: cur_dir, filenames = result[0], result[2] dir_fd = -1 if not isinstance(self.directory, int) else result[3] # find the path relative to the directory being added if len(truncate) > 0: _, _, short_path = cur_dir.partition(truncate) else: short_path = cur_dir # remove leading / or \ if it is present if short_path.startswith(os.path.sep): short_path = short_path[len(os.path.sep):] short_path = os.path.join(dirname, short_path) wildcard_directory = False if os.path.split(short_path)[0] in wildcard_directories: # Parent directory has matched a pattern, all sub-nodes should # be added too wildcard_directories.add(short_path) wildcard_directory = True else: # Check if directory path matches one of the patterns if match_short_path(short_path): # Directory matched pattern and it should therefor # be added along with all of its contents wildcard_directories.add(short_path) wildcard_directory = True # Always add directories within wildcard directories - even if they # are empty if wildcard_directory: #PY2: Use `yield from` instead for chunk in self._body_directory(short_path, visited_directories): yield chunk # Iterate across the files in the current directory for filename in filenames: # Find the filename relative to the directory being added short_file_path = os.path.join(short_path, filename) if dir_fd < 0: file_location = os.path.join(cur_dir, filename) else: file_location = filename if wildcard_directory: # Always add files in wildcard directories #PY2: Use `yield from` instead for chunk in self._body_file(short_file_path, file_location, dir_fd=dir_fd): yield chunk else: # Add file (and all missing intermediary directories) # if it matches one of the patterns if match_short_path(short_file_path): #PY2: Use `yield from` instead for chunk in self._body_directory(short_path, visited_directories): yield chunk for chunk in self._body_file(short_file_path, file_location, dir_fd=dir_fd): yield chunk #PY2: Use `yield from` instead for chunk in self._gen_end(): yield chunk
def backup(src_path, backend, recipients, recipients_files, reuse_backup_count=30, follow_symlinks=False): t0 = monotime() encryption_key = os.urandom(32) encryption_key_sha1 = hashlib.new('sha1', encryption_key).hexdigest() if recipients or recipients_files: age_encrypted_encryption_key = encrypt_with_age( encryption_key, recipients=recipients, recipients_files=recipients_files) else: logger.info( 'No recipients specified - the data file AES key will be stored in metadata file unencrypted' ) age_encrypted_encryption_key = None backup_id = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') adapter = ChunkAdapter(backend) logger.info('Backing up %s to %s - backup id %s', src_path, backend, backup_id) with ExitStack() as stack: stack.callback(backend.close_data_file) temp_dir = Path( stack.enter_context( TemporaryDirectory(prefix=f'baq.{backup_id}.'))) reuse_encryption_keys, reuse_blocks = load_previous_backup_for_reuse( backend, temp_dir, reuse_backup_count) meta_path = temp_dir / f'baq.{backup_id}.meta' meta_file = stack.enter_context(gzip.open(meta_path, mode='wb')) meta_file.write( to_json( generate_header( backup_id=backup_id, encryption_key=encryption_key, encryption_key_sha1=encryption_key_sha1, age_encrypted_encryption_key=age_encrypted_encryption_key, reuse_encryption_keys=reuse_encryption_keys))) for dir_path, dirs, files, dir_fd in os.fwalk( src_path, follow_symlinks=follow_symlinks): #logger.debug('fwalk -> %s, %s, %s, %s', dir_path, dirs, files, dir_fd) dir_stat = os.fstat(dir_fd) meta_file.write( to_json({ 'directory': { 'path': str(Path(dir_path).relative_to(src_path)), 'mode': dir_stat.st_mode, 'uid': dir_stat.st_uid, 'gid': dir_stat.st_gid, 'atime': dir_stat.st_atime, 'ctime': dir_stat.st_ctime, 'mtime': dir_stat.st_mtime, } })) for file_name in files: file_path = str( Path(dir_path).relative_to(src_path) / file_name) try: file_stat = os.stat(file_name, dir_fd=dir_fd, follow_symlinks=follow_symlinks) except FileNotFoundError as e: logger.warning('Cannot stat file %s: %s', file_path, e) continue if stat.S_ISLNK(file_stat.st_mode): try: symlink_target = os.readlink(file_name, dir_fd=dir_fd) except Exception as e: logger.warning('Cannot read symlink target: %s - %r', file_path, e) else: meta_file.write( to_json({ 'symlink': { 'path': file_path, 'target': symlink_target, 'mode': file_stat.st_mode, 'uid': file_stat.st_uid, 'gid': file_stat.st_gid, 'atime': file_stat.st_atime, 'ctime': file_stat.st_ctime, 'mtime': file_stat.st_mtime, } })) continue elif not stat.S_ISREG(file_stat.st_mode): logger.warning('Skipping file with unknown type: %s', file_path) continue assert stat.S_ISREG(file_stat.st_mode) try: file_stream = open(file_name, mode='rb', opener=partial(os.open, dir_fd=dir_fd)) except PermissionError as e: logger.warning('Cannot open file %s: %s', file_path, e) continue with file_stream: logger.debug('Processing file %s', file_path) file_hash = hashlib.new('sha3_512') file_stat = os.fstat(file_stream.fileno()) meta_file.write( to_json({ 'file': { 'path': file_path, 'mode': file_stat.st_mode, 'uid': file_stat.st_uid, 'gid': file_stat.st_gid, 'atime': file_stat.st_atime, 'ctime': file_stat.st_ctime, 'mtime': file_stat.st_mtime, } })) while True: pos = file_stream.tell() chunk = file_stream.read(chunk_size) if not chunk: break #logger.debug('Read %d bytes from file %s pos %s: %s', len(chunk), file_name, pos, smart_repr(chunk)) file_hash.update(chunk) chunk_hash = hashlib.new('sha3_512', chunk).digest() if chunk_hash in reuse_blocks: meta_file.write( to_json({ 'content': { 'offset': pos, 'sha3_512': chunk_hash.hex(), 'df_name': reuse_blocks[chunk_hash]['df_name'], 'df_offset': reuse_blocks[chunk_hash]['df_offset'], 'df_size': reuse_blocks[chunk_hash]['df_size'], 'encryption_key_sha1': reuse_blocks[chunk_hash] ['encryption_key_sha1'], } })) else: chunk_df = adapter.write_data_chunk( backup_id, chunk, encryption_key=encryption_key) meta_file.write( to_json({ 'content': { 'offset': pos, 'sha3_512': chunk_hash.hex(), 'df_name': chunk_df.name, 'df_offset': chunk_df.offset, 'df_size': chunk_df.size, 'encryption_key_sha1': encryption_key_sha1, } })) reuse_blocks[chunk_hash] = { 'df_name': chunk_df.name, 'df_offset': chunk_df.offset, 'df_size': chunk_df.size, 'encryption_key_sha1': encryption_key_sha1, } del chunk meta_file.write( to_json({ 'file_done': { 'sha3_512': file_hash.hexdigest(), } })) adapter.close_data_file() meta_file.write( to_json({ 'done': { 'backup_id': backup_id, 'date': datetime.utcnow().strftime('%Y%m%dT%H%M%SZ'), } })) meta_file.close() backend.store_file(meta_path, name=meta_path.name) logger.info('Backup id %s done in %.3f s', backup_id, monotime() - t0) return BackupResult(backup_id)
def usage(): print("Usage: lib-finder lib-folder") print() print( " lib-folder: path of the folder that contains he libraries to search in Maven " ) if len(sys.argv) < 2: usage() exit(1) libFolder = sys.argv[1] libraries = next(os.fwalk(libFolder))[2] libraries = list( filter(lambda x: not x.startswith(".") and not x.startswith("biospace"), libraries)) equivalences = [] customEquivalences = load_custom_equivalences() for library in libraries: if not library.endswith(".jar"): print(f"! {library} does not look like a jar, ignoring...") equivalences.append((library, "?")) continue manualEquivalence = get_configured_equivalence(customEquivalences, library) if manualEquivalence is not None: equivalences.append((library, manualEquivalence))
import os for root, dirs, files, rootfd in os.fwalk("/home/senthil/hg/cpython/Lib/email"): print(root, "consumes ", end="") print(sum([os.fstatat(rootfd, name).st_size for name in files]), end="") print("bytes in", len(files), "non-directory files") if "CVS" in dirs: dirs.remove("CVS") # don't visit CVS directories