def write_permacache_from_dir(dirname): # we want the whole list so that we can display accurate progress # information. If we're operating on more than tens of millions of # files, we should either bail out or tweak this to not need the # whole list at once allfiles = [] for root, dirs, files in os.walk(dirname): for f in files: allfiles.append(os.path.join(root, f)) for fname in progress(allfiles, persec=True): try: write_permacache_from_file(fname) os.unlink(fname) except: mr_tools.status("failed on %r" % fname) raise mr_tools.status("Removing empty directories") for root, dirs, files in os.walk(dirname, topdown=False): for d in dirs: dname = os.path.join(root, d) try: os.rmdir(dname) except OSError as e: if e.errno == errno.ENOTEMPTY: mr_tools.status("%s not empty" % (dname,)) else: raise
def test_coerce_pycache_to_old_style(): cwd = os.getcwd() with TemporaryDirectory() as tmp: os.makedirs(os.path.join(tmp, '__pycache__')) os.makedirs(os.path.join(tmp, 'testdir', '__pycache__')) with open(os.path.join(tmp, 'test.py'), 'w') as f: f.write("\n") with open(os.path.join(tmp, '__pycache__', 'test.cpython-{0}{1}.pyc'.format( sys.version_info.major, sys.version_info.minor)), 'w') as f: f.write("\n") with open(os.path.join(tmp, 'testdir', 'test.py'), 'w') as f: f.write("\n") with open(os.path.join(tmp, 'testdir', '__pycache__', 'test.cpython-{0}{1}.pyc'.format( sys.version_info.major, sys.version_info.minor)), 'w') as f: f.write("\n") os.chdir(tmp) for root, dirs, files in os.walk(tmp): fs = [os.path.join(root, _) for _ in files] post.coerce_pycache_to_old_style(fs, cwd=tmp) try: assert os.path.isfile(os.path.join(tmp, 'test.pyc')), os.listdir(tmp) assert os.path.isfile(os.path.join(tmp, 'testdir', 'test.pyc')), \ os.listdir(os.path.join(tmp, 'testdir')) for root, dirs, files in os.walk(tmp): assert '__pycache__' not in dirs except: raise finally: os.chdir(cwd)
def native_report2(src): data = {} sum = 0 c = "" for root, versions, ds in os.walk(src): if root != src: continue for version in sorted(versions, key = str.lower, reverse = True): sum = 0 data = {} dd = os.path.join(root, version) for d_version, dirs, files in os.walk(dd): for d in dirs: p = os.path.join(d_version, d) + os.sep + "*.log" #p = os.path.join(root, d) + os.sep + "*" s = len(glob.glob(p)) sum += s name = os.path.join(root, d) if name.startswith(src): name = name[len(src):] if name.startswith("/"): name = name[1:] #data[name] = s name = d_version + os.sep + name data[name] = s c += html_report(data, sum, version) + "<br/><br/>" #c = "<br/><br/>" + html_report(data, sum) open(os.path.join(src, "index.html"), "w").write(c)
def copy_template(): config_prompt(template) shutil.copytree(template, name) if os.path.exists('%s/%s' % (name, 'config.yaml')): os.remove('%s/%s' % (name, 'config.yaml')) for dirname, dirnames, files in os.walk(name): for d in dirnames: if d == options.template: shutil.copytree('%s/%s' % (dirname, d), '%s/%s' % (dirname, name)) shutil.rmtree('%s/%s' % (dirname, d)) for dirname, dirnames, files in os.walk(name): for filename in files: f = open('%s/%s' % (dirname, filename), 'r') lines = f.readlines() f.close() first_pass = [re.sub('{{\s*(\w+)\s*}}', replace_variable, line) for line in lines] new_lines = [re.sub('__config_(\w+)__', replace_variable, line) for line in first_pass] f = open('%s/%s' % (dirname, filename), 'w') f.write(''.join(new_lines)) f.close()
def walkDir(path,formats = False): """遍历目录下的所有文件""" result = {"files":[],"dirs":[]} if formats == "absolute": for item in path: for root, dirs, files in os.walk(item): for f in files: result["files"].append(os.path.join(root,f)) for d in dirs: result["dirs"].append(os.path.join(root,d)) elif formats == "relative": for item in path: for root, dirs, files in os.walk(item): for f in files: result["files"].append(re.sub(item + "/","",os.path.join(root,f))) for d in dirs: result["files"].append(re.sub(item + "/","",os.path.join(root,d))) else: for item in path: for root, dirs, files in os.walk(item): for f in files: result["files"].append(f) for d in dirs: result["dirs"].append(d) return result
def _fswalk_follow_symlinks(path): ''' Walk filesystem, following symbolic links (but without recursion), on python2.4 and later If a symlink directory loop is detected, emit a warning and skip. E.g.: dir1/dir2/sym-dir -> ../dir2 ''' assert os.path.isdir(path) # only designed for directory argument walkdirs = set([path]) for dirpath, dirnames, filenames in os.walk(path): handle_exclude_include_walk(dirpath, dirnames, []) real_dirpath = os.path.realpath(dirpath) for dirname in dirnames: current = os.path.join(dirpath, dirname) real_current = os.path.realpath(current) if os.path.islink(current): if (real_dirpath == real_current or real_dirpath.startswith(real_current + os.path.sep)): warning("Skipping recursively symlinked directory %s" % dirname) else: walkdirs.add(current) for walkdir in walkdirs: for dirpath, dirnames, filenames in os.walk(walkdir): handle_exclude_include_walk(dirpath, dirnames, []) yield (dirpath, dirnames, filenames)
def test_tmp_dir_normal_1(self): tempdir = tempfile.gettempdir() # assert temp directory is empty self.assertListEqual(list(os.walk(tempdir)), [(tempdir, [], [])]) witness = [] @with_tempdir def createfile(list): fd1, fn1 = tempfile.mkstemp() fd2, fn2 = tempfile.mkstemp() dir = tempfile.mkdtemp() fd3, fn3 = tempfile.mkstemp(dir=dir) tempfile.mkdtemp() list.append(True) for fd in (fd1, fd2, fd3): os.close(fd) self.assertFalse(witness) createfile(witness) self.assertTrue(witness) self.assertEqual(tempfile.gettempdir(), tempdir) # assert temp directory is empty self.assertListEqual(list(os.walk(tempdir)), [(tempdir, [], [])])
def test_tmp_dir_normal_2(self): tempdir = tempfile.gettempdir() # assert temp directory is empty self.assertListEqual(list(os.walk(tempfile.tempdir)), [(tempfile.tempdir, [], [])]) class WitnessException(Exception): pass @with_tempdir def createfile(): fd1, fn1 = tempfile.mkstemp() fd2, fn2 = tempfile.mkstemp() dir = tempfile.mkdtemp() fd3, fn3 = tempfile.mkstemp(dir=dir) tempfile.mkdtemp() for fd in (fd1, fd2, fd3): os.close(fd) raise WitnessException() self.assertRaises(WitnessException, createfile) # assert tempdir didn't change self.assertEqual(tempfile.gettempdir(), tempdir) # assert temp directory is empty self.assertListEqual(list(os.walk(tempdir)), [(tempdir, [], [])])
def extract_rollouts(dataset='train', n_folders=20, n_images=1): """ Extracts rollout images from input folder, and copies it to dataset folder. """ for root, dirs, files in os.walk(rollout_dir): np.random.shuffle(dirs) for i in range(min(n_folders, len(dirs))): print "Folder {}".format(i) rollout_folder = dirs[i] for _, _, files in os.walk(os.path.join(rollout_dir, rollout_folder)): images = [x for x in files if x.endswith('.jpg')] if dataset == 'templates': np.random.shuffle(images) for im in images[:n_images]: src = os.path.join(rollout_dir, rollout_folder, im) dst = os.path.join(templates_dir, im) copy_processed_image(src, dst) elif dataset == 'test': im = sorted(images)[-1] print "Image: {}".format(im) src = os.path.join(rollout_dir, rollout_folder, im) dst, new_im_name = label_image(src, im, test_dir) copy_processed_image(src, dst) copy_state_label(rollout_dir, rollout_folder, im, new_im_name) break
def nuke(self, all=False, certs=False, reinit=True): """Cleanup local registry DB, plus various additional filesystem cleanups optionally""" from sfa.storage.dbschema import DBSchema from sfa.util.sfalogging import _SfaLogger logger = _SfaLogger(logfile='/var/log/sfa_import.log', loggername='importlog') logger.setLevelFromOptVerbose(self.api.config.SFA_API_LOGLEVEL) logger.info("Purging SFA records from database") dbschema=DBSchema() dbschema.nuke() # for convenience we re-create the schema here, so there's no need for an explicit # service sfa restart # however in some (upgrade) scenarios this might be wrong if reinit: logger.info("re-creating empty schema") dbschema.init_or_upgrade() # remove the server certificate and all gids found in /var/lib/sfa/authorities if certs: logger.info("Purging cached certificates") for (dir, _, files) in os.walk('/var/lib/sfa/authorities'): for file in files: if file.endswith('.gid') or file == 'server.cert': path=dir+os.sep+file os.unlink(path) # just remove all files that do not match 'server.key' or 'server.cert' if all: logger.info("Purging registry filesystem cache") preserved_files = [ 'server.key', 'server.cert'] for (dir,_,files) in os.walk(Hierarchy().basedir): for file in files: if file in preserved_files: continue path=dir+os.sep+file os.unlink(path)
def index_json(request): jsonp = request.REQUEST.get('jsonp', False) matches = [] for whisper_dir in settings.WHISPER_DIRS: for root, dirs, files in os.walk(whisper_dir): root = root.replace(whisper_dir, '') for basename in files: if fnmatch.fnmatch(basename, '*.wsp'): matches.append(os.path.join(root, basename)) for root, dirs, files in os.walk(settings.CERES_DIR): root = root.replace(settings.CERES_DIR, '') for filename in files: if filename == '.ceres-node': matches.append(root) matches = [ m .replace('.wsp', '') .replace('.rrd', '') .replace('/', '.') .lstrip('.') for m in sorted(matches) ] return json_response_for(request, matches, jsonp=jsonp)
def test_arc_from_dir_re5(tmpdir, arc_file): """get an arc file (ideally from the game), unpack it, repackit, unpack it again compare the 2 arc files and the 2 output folders""" arc_original = Arc(file_path=arc_file) arc_original_out = os.path.join(str(tmpdir), os.path.basename(arc_file).replace('.arc', '')) arc_original.unpack(arc_original_out) arc_from_dir = Arc.from_dir(arc_original_out) arc_from_dir_out = os.path.join(str(tmpdir), 'arc-from-dir.arc') with open(arc_from_dir_out, 'wb') as w: w.write(arc_from_dir) arc_from_arc_from_dir = Arc(file_path=arc_from_dir_out) arc_from_arc_from_dir_out = os.path.join(str(tmpdir), 'arc-from-arc-from-dir') arc_from_arc_from_dir.unpack(arc_from_arc_from_dir_out) files_extracted_1 = [f for _, _, files in os.walk(arc_original_out) for f in files] files_extracted_2 = [f for _, _, files in os.walk(arc_from_arc_from_dir_out) for f in files] # Assumming zlib default compression used in all original arc files. assert os.path.getsize(arc_file) == os.path.getsize(arc_from_dir_out) # The hashes would be different due to the file_paths ordering assert arc_original.files_count == arc_from_arc_from_dir.files_count assert sorted(files_extracted_1) == sorted(files_extracted_2) assert arc_from_arc_from_dir.file_entries[0].offset == 32768
def findModulesInPackage(package, name, fileNameFilters=[]): """ Returns a dictionnary where the key is the path to the package or subpackage. The value is the list of modules in which the string 'name' was found. Name can be a regular expression.Using '^' as a first symbol to match string at the begining of the lines is faster. """ if name[0]=='^': candidates = {} for root, dirs, files in os.walk(package): # remove directories not to visit for rem in ['CVS', 'regression', 'Tutorial', 'test', 'Doc', 'doc', 'Icons','Tests']: if rem in dirs: dirs.remove(rem) # look for files that contain the string NodeLibrary newfiles = [] for fi in files: if fi[-3:]=='.py' and not fi[0] in ['#', '.']: for i in fileNameFilters: if i in fi : continue Lines =[] f = open( os.path.join(root, fi) ) data = f.readlines() f.close() found = 0 Lines =filter(lambda x:x.startswith(name[1:]),data) if Lines!=[]: if not candidates.has_key(root): candidates[root] = [] candidates[root].append(fi) else: # use re import re pat = re.compile(name) candidates = {} for root, dirs, files in os.walk(package): # remove directories not to visit for rem in ['CVS', 'regression', 'Tutorial', 'test', 'Doc', 'doc', 'Icons','Tests']: if rem in dirs: dirs.remove(rem) # look for files that contain the string NodeLibrary newfiles = [] for fi in files: if fi[-3:]=='.py' and not fi[0] in ['#', '.']: for i in fileNameFilters: if i in fi : continue Lines =[] f = open( os.path.join(root, fi) ) data = f.readlines() f.close() found = 0 for line in data: match = pat.search(line) if match: if not candidates.has_key(root): candidates[root] = [] candidates[root].append(fi) break return candidates
def function0(): d={} for root, dirs, files in os.walk('C:\\Users\\Та\\Desktop\\универр'): for i in dirs: for root1, dirs1, files1 in os.walk('C:\\Users\\Та\\Desktop\\универр\\' + i): d[len(files1)]=i return d
def findReplace(directory, find, replace, filePattern): #print '-------------------------------------------------' restart = True while restart: restart = False for path, dirs, files in os.walk(os.path.abspath(directory)): newpath = path.replace(find, replace) if(newpath != path): os.rename(path, newpath) #print 'rename_dir[' + path + ']' restart = True break #print '-------------------------------------------------' for path, dirs, files in os.walk(os.path.abspath(directory)): for filename in fnmatch.filter(files, filePattern): if(filename == __file__): continue newfilename = filename.replace(find, replace) if(newfilename != filename): os.rename(os.path.join(path, filename), os.path.join(path, newfilename)) filename = newfilename #print 'rename_filename[' + filename + ']' filepath = os.path.join(path, filename) with open(filepath) as f: s = f.read() if(s.find(find)): s = s.replace(find, replace) filepath = filepath.replace(find, replace) #print 'changed_file[' + filepath + ']' with open(filepath, "w") as f: f.write(s)
def find_data_files(srcdir, destdir, *wildcards, **kw): """ get a list of all files under the srcdir matching wildcards, returned in a format to be used for install_data """ def walk_helper(arg, dirname, files): if '.svn' in dirname: return names = [] lst, wildcards, dirnameconverter, destdir = arg for wc in wildcards: wc_name = os.path.normpath(os.path.join(dirname, wc)) for f in files: filename = os.path.normpath(os.path.join(dirname, f)) if fnmatch.fnmatch(filename, wc_name) and not os.path.isdir(filename): names.append(filename) if names: destdirname = dirnameconverter.sub(destdir, dirname) lst.append( (destdirname, names ) ) file_list = [] recursive = kw.get('recursive', True) converter = re.compile('^({0})'.format(srcdir)) if recursive: walk(srcdir, walk_helper, (file_list, wildcards, converter, destdir)) else: walk_helper((file_list, wildcards, converter, destdir), srcdir, [os.path.basename(f) for f in glob.glob(os.path.join(srcdir, '*'))]) return file_list
def generate_file_map(self): # Read all the files in the given folder. # We gather them all and then send them up to GAE. # We do this rather than processing template locally. Because local processing file_map = dict() for root, dirs, files in os.walk(self.path): for filename in files: if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']): contents = read_file(os.path.join(root, filename)) file_map[filename] = contents # Read all the image files for this partner. Obviously, this is inefficient, and we should probably # only read the files that are used in the html file. # But we have no facilities for this kind of processing here, since it is a PITA to install pip # packages through a sublimetext plugin. # But we might have to figure this out if it becomes a performance bottleneck. I think it is ok # as long as you are on a fast connection. # image_path = os.path.abspath(os.path.join(self.path, "img")) for root, dirs, files in os.walk(self.image_path): for filename in files: image_path = os.path.abspath(os.path.join(root, filename)) contents = encode_image(image_path) file_map[filename] = contents return file_map
def generate_file_map(self): # Read all the files in the given folder. # We gather them all and then send them up to GAE. # We do this rather than processing template locally. Because local processing file_map = dict() fdir = os.path.dirname(self.view.file_name()).replace(self.parent_path+'/', '') for root, dirs, files in os.walk(self.path): for filename in files: if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']): contents = read_file(os.path.join(root, filename)) file_map['%s/%s' % (fdir, filename)] = contents # file_map[filename] = contents for root, dirs, files in os.walk(self.image_path): for filename in files: image_path = os.path.abspath(os.path.join(root, filename)) contents = encode_image(image_path) file_map[filename] = contents for root, dirs, files in os.walk(self.parent_path): for filename in files: if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']): contents = read_file(os.path.join(root, filename)) file_map[filename] = contents print(file_map.keys()) return file_map
def _read_descriptor_files(self): new_processed_files = {} remaining_files = list(self._targets) while remaining_files and not self._is_stopped.isSet(): target = remaining_files.pop(0) if not os.path.exists(target): self._notify_skip_listeners(target, FileMissing()) continue if os.path.isdir(target): if stem.prereq.is_python_26(): walker = os.walk(target, followlinks = self._follow_links) else: walker = os.walk(target) self._handle_walker(walker, new_processed_files) else: self._handle_file(target, new_processed_files) self._processed_files = new_processed_files if not self._is_stopped.isSet(): self._unreturned_descriptors.put(FINISHED) self._iter_notice.set()
def EmptyDir(d): if debugMode(): print("EmptyDir",d) if d==None: return if os.path.isdir(d): files=os.walk(d) # delete all the files for item in files: for sdir in item[1]: EmptyDir(item[0]+os.sep+sdir) for f in item[2]: ff = item[0]+os.sep+f os.remove(ff) if debugMode(): print(" removed",ff) else: os.mkdir(d) print("created",d) # delete any subdirectories dirs = os.walk(d) for dd in dirs: for ddir in dd[1]: EmptyDir(dd[0]+os.sep+ddir) os.rmdir(dd[0]+os.sep+ddir) if debugMode(): print("all files deleted from",d)
def create_dependency_tree(research_dir): #print subprocess.check_output("find "+search_dir, shell=True) print "going.. " + research_dir total_file_num = 0 file_num = 0 for root, dirs, files in os.walk(research_dir): for new_file in files: total_file_num = total_file_num + 1 for root, dirs, files in os.walk(research_dir): #Analyze only files, not links for new_file in files: file_num = file_num + 1 print ("Analyze " + str(file_num) +"/"+ str(total_file_num) ) sys.stdout.write("\033[F") pathname = os.path.join(root, new_file) mode = os.lstat(pathname).st_mode #links are skipped if S_ISLNK(mode): #print "link " + pathname + " " + str(mode) pass elif S_ISREG(mode): # It's a file, call the recursive function to analyze it #print "analyze " + pathname analyze(pathname, "nobody") else: # Unknown file type, print a message print 'Skipping %s' % pathname pass
def test_imports(): u"""Vérifie qu'il n'existe pas d'imports relatifs implicites.""" # On liste les modules locaux locaux = set() def test(line): assert not re.search('(from|import) (' + '|'.join(locaux) + ')[. ]', line) for root, dirs, files in walk(WXGEODIR): if 'sympy' in dirs: dirs.remove('sympy') if 'sympy_OLD' in dirs: dirs.remove('sympy_OLD') for name in files: if name.endswith('.py'): locaux.add(name[:-3]) for name in dirs: if isfile(join(root, name, '__init__.py')): locaux.add(name) assert 'sympy' not in locaux and 'trigonometry' not in locaux # on teste les imports for root, dirs, files in walk(WXGEODIR): for name in files: if name.endswith('.py'): with open(join(root, name)) as f: for n, line in enumerate(f): if 'from ' in line or 'import ' in line: assert test(line), join(root, name) + ' L' + str(n + 1)
def get_package_data(): # pragma: no cover ASDF_STANDARD_ROOT = os.environ.get("ASDF_STANDARD_ROOT", "asdf-standard") schemas = [] root = os.path.join(ASDF_STANDARD_ROOT, "schemas") for node, dirs, files in os.walk(root): for fname in files: if fname.endswith('.yaml'): schemas.append( os.path.relpath( os.path.join(node, fname), root)) reference_files = [] root = os.path.join(ASDF_STANDARD_ROOT, "reference_files") for node, dirs, files in os.walk(root): for fname in files: if fname.endswith('.yaml') or fname.endswith('.asdf'): reference_files.append( os.path.relpath( os.path.join(node, fname), root)) return { str('asdf.schemas'): schemas, str('asdf.reference_files'): reference_files }
def __config_unix__(self): if os.path.isdir("/var/log") and bool(os.stat("/var/log").st_mode & stat.S_IRWXG): self._path = "/var/log/Neofelis" elif os.path.isdir("~") and bool(os.stat("~").st_mode & stat.S_IRWXG): self._path = "~/log" else: raise lexcep("Permission Error: Unable to access log directory").with_traceback(sys.exc_info()[2]) try: if not os.path.isdir(self._path): os.mkdir(self._path) except IOError as e: raise lexcep(str(e)).with_traceback(sys.exc_info()[2]) self._size = 0 try: for (path, dirs, files) in os.walk(self._path): for file in files: f = os.path.join(path, file) self._size += os.path.getsize(f) if self._size / (1024 * 1024.0) > 10: for (path, dirs, files) in os.walk(self._path): for file in files: f = os.path.join(path, file) os.remove(f) except IOError as e: raise lexcep(str(e)).with_traceback(sys.exc_info()[2]) except Exception as e: raise lexcep(str(e)).with_traceback(sys.exc_info()[2]) self._fn = self._path + strftime("%Y%m%d%H%M%S", gmtime()) + ".log"
def make_zipfile(zip_filename, base_dir, verbose=0, dry_run=0, compress=True, mode='w'): """Create a zip file from all the files under 'base_dir'. The output zip file will be named 'base_dir' + ".zip". Uses either the "zipfile" Python module (if available) or the InfoZIP "zip" utility (if installed and found on the default search path). If neither tool is available, raises DistutilsExecError. Returns the name of the output zip file. """ import zipfile mkpath(os.path.dirname(zip_filename), dry_run=dry_run) log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir) def visit(z, dirname, names): for name in names: path = os.path.normpath(os.path.join(dirname, name)) if os.path.isfile(path): p = path[len(base_dir) + 1:] if not dry_run: z.write(path, p) log.debug("adding '%s'" % p) compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED if not dry_run: z = zipfile.ZipFile(zip_filename, mode, compression=compression) for dirname, dirs, files in os.walk(base_dir): visit(z, dirname, files) z.close() else: for dirname, dirs, files in os.walk(base_dir): visit(None, dirname, files) return zip_filename
def main(root, componentsToMask, dryRun): buildRoot = os.path.abspath(os.path.join(root, '../build')) emptyFolder(root, 'buildtools', dryRun) emptyFolder(root, 'boost', dryRun) emptyFolder(root, 'sample-data', dryRun) componentsToMask = [x[0:x.find(' ')] for x in componentsToMask] for c in componentsToMask: path = os.path.join(root, c) print('Masking %s...' % path) for folder, dirs, files in os.walk(path): print('folder = %s' % folder) if '.svn' in dirs: dirs.remove('.svn') svnPath = os.path.join(folder, '.svn') print(svnPath) if not dryRun: shutil.rmtree(svnPath) for f in files: if shouldMask(folder, f): mask(folder, f, dryRun) if os.path.isdir(buildRoot): emptyFolder(buildRoot, 'Testing', dryRun) for c in componentsToMask: path = os.path.join(root, c) for folder, dirs, files in os.walk(path): for f in files: if shouldMask(folder, f): mask(folder, f, dryRun)
def getMeasurement(self, l): """ Return the list of files of measurements. @type l: C{String} @param l: label of a study @rtype: C{List} @return: C{List} of list of nodes <plot>, and C{List} of files of measurements """ nodes = [] files = [] for node in self.getStudyNode(l).getElementsByTagName("measurement"): nodes.append(node.getElementsByTagName("plot")) fileName = node.attributes["file"].value filePath = node.attributes["path"].value if filePath == "": for root, dirs, fs in os.walk(os.path.join(self.getRepository(), l)): if fileName in fs: filePath = root break else: # for Code_Saturne exp data are supposed to be in POST for root, dirs, fs in os.walk(os.path.join(self.getRepository(), l, 'POST', filePath)): if fileName in fs: filePath = root break files.append(os.path.join(filePath, fileName)) return nodes, files
def _detect_treestyle(self): try: dirlisting = os.walk(self.get_real_path()) dirpath, dirnames, filenames = dirlisting.next() if not dirnames: # No subdirectories if filter(self.file_belongs_to_project, filenames): # Translation files found, assume gnu return "gnu" # There are subdirectories if filter(lambda dirname: dirname == 'templates' or langcode_re.match(dirname), dirnames): # Found language dirs assume nongnu return "nongnu" # No language subdirs found, look for any translation file for dirpath, dirnames, filenames in os.walk(self.get_real_path()): if filter(self.file_belongs_to_project, filenames): return "gnu" except: pass # Unsure return None
def convert_to_ascii(inputName, dirName): ascii_convert = int(nzbtomedia.CFG["ASCII"]["convert"]) if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and "\" is replaced!. return inputName, dirName encoded, inputName = CharReplace(inputName) dir, base = os.path.split(dirName) if not base: # ended with "/" dir, base = os.path.split(dir) encoded, base2 = CharReplace(base) if encoded: dirName = os.path.join(dir, base2) logger.info("Renaming directory to: %s." % (base2), 'ENCODER') os.rename(os.path.join(dir,base), dirName) if os.environ.has_key('NZBOP_SCRIPTDIR'): print "[NZB] DIRECTORY=%s" % (dirName) # Return the new directory to NZBGet. for dirname, dirnames, filenames in os.walk(dirName, topdown=False): for subdirname in dirnames: encoded, subdirname2 = CharReplace(subdirname) if encoded: logger.info("Renaming directory to: %s." % (subdirname2), 'ENCODER') os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2)) for dirname, dirnames, filenames in os.walk(dirName): for filename in filenames: encoded, filename2 = CharReplace(filename) if encoded: logger.info("Renaming file to: %s." % (filename2), 'ENCODER') os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2)) return inputName, dirName
def _get_files_matching_extensions(paths, extensions=[]): if isinstance(extensions, strbase): extensions = [extensions] matched_files = defaultdict(lambda: []) for path in paths.split(os.pathsep): # bad idea... also our current directory isn't meaningful from a WindowCommand if path == '.': continue # !! sometimes occurs in the results on POSIX; remove them path = path.replace(u'!!', u'') path = os.path.normpath(path) if not os.path.exists(path): # ensure path exists continue if len(extensions) > 0: for _, _, files in os.walk(path): for f in files: for ext in extensions: if f.endswith(u''.join((os.extsep, ext))): matched_files[ext].append(os.path.splitext(f)[0]) else: for _, _, files in os.walk(path): for f in files: matched_files['*'].append(os.path.splitext(f)[0]) matched_files = dict([(key, sorted(set(value), key=lambda s: s.lower())) for key, value in matched_files.items()]) return matched_files
def read_file(filename): """Read a file into a string""" path = os.path.abspath(os.path.dirname(__file__)) filepath = os.path.join(path, filename) try: return open(filepath).read() except IOError: return '' packages, data_files = [], [] root_dir = os.path.dirname(__file__) if root_dir: os.chdir(root_dir) for dirpath, dirnames, filenames in os.walk('postoffice'): # Ignore dirnames that start with '.' dirnames[:] = [d for d in dirnames if not d.startswith('.')] if '__init__.py' in filenames: pkg = dirpath.replace(os.path.sep, '.') if os.path.altsep: pkg = pkg.replace(os.path.altsep, '.') packages.append(pkg) elif filenames: prefix = dirpath[11:] # Strip "postoffice/" or "postoffice\" for f in filenames: data_files.append(os.path.join(prefix, f)) setup( name='django-postoffice', description='',
import pandas as pd import os pd.set_option('display.max_columns',None) pd.options.display.float_format = '{:.4f}'.format #----------------------------------------------------------------------------------------------------------------------# #Import and Append all dataframes #----------------------------------------------------------------------------------------------------------------------# '''Get Path Names''' #----------------------------------------------------------------------------------------------------------------------# path_name_pbp = [] path_name_box = [] path_players = [] for root, dirs, files in os.walk('nba_analysis/'): for file in files: if file.startswith("df_box"): path_name_box.append(os.path.join(root, file)) elif file.startswith("df_pbp"): path_name_pbp.append(os.path.join(root, file)) elif file.startswith('xxx'): path_players.append(os.path.join(root, file)) del dirs, file, files, root #----------------------------------------------------------------------------------------------------------------------# ''' Combine Files ''' #----------------------------------------------------------------------------------------------------------------------# df_box = pd.DataFrame() for f in path_name_box: data = pd.read_csv(f) df_box = df_box.append(data) df_pbp = pd.DataFrame() for f in path_name_pbp:
# code to create repos and make sure everything is executable filelocation = os.getcwd() directories = [ "bash", "config" + gemmachModelType, "config" + uMistModelType, "config" + fWorkModelType, "rarc", "output", "UMOSTreating", "extracted" + gemmachModelType, "extracted" + uMistModelType, "extracted" + fWorkModelType, "imgTemp", "output_csv", "output_img", "output_excel" ] for i in directories: if not os.path.exists(filelocation + "/" + i): os.mkdir(filelocation + "/" + i) os.system("chmod -R 744 " + filelocation) filedirectory = next(os.walk('.'))[1] # this function just writes all the user input into all files def UpdateEverything(): a = enteredDate.get() b = enteredEndDate.get() sTime = sHourcombo.get() eTime = eHourCombo.get() h_00 = var_00.get() h_12 = var_12.get() O3 = var_O3.get() NO2 = var_NO2.get() PM25 = var_PM25.get() Um.inputStartDate(a)
#print(os.getcwd()) #os.chdir("/Users/sametcelikbicak/Desktop") #print(os.getcwd()) #print(os.listdir()) # for i in os.listdir(): # print(i) #os.mkdir("Deneme1") #os.mkdir("Deneme2/Deneme3")#hata veriyor iç içe klasör oluşturma başka fonksiyonla yapılıyor #os.makedirs("Deneme2/Deneme3") #os.rmdir("Deneme2/Deneme3") #os.mkdir("Deneme2/Deneme3") #os.rmdir("Deneme1") #os.removedirs("Deneme2/Deneme3") #os.rename("test.txt","test2.txt") #os.rename("test2.txt","test.txt") #print(os.stat("test2.txt")) #print(os.stat("test2.txt").st_mtime) #print(datetime.fromtimestamp(os.stat("test2.txt").st_mtime)) # print(os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python")) # for klasor_yolu,klasor_isimleri,dosya_isimleri in os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python"): # print("Klasör Yolu",klasor_yolu) # print("Klasör İsimleri", klasor_isimleri) # print("Dosya İsimleri", dosya_isimleri) # print("*******************************************************") for klasor_yolu,klasor_isimleri,dosya_isimleri in os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python"): for i in dosya_isimleri: if (i.endswith(".py")): print(i)
def zipdir(path, ziph): for root, dirs, files in os.walk(path): if ".git" not in root: for file in files: ziph.write(os.path.join(root, file))
def package_files(*root_directories): return [ os.path.join('..', path, filename) for directory in root_directories for (path, directories, filenames) in os.walk(directory) for filename in filenames ]
return [tail] + result if head == path: return result return fullsplit(head, [tail] + result) for scheme in INSTALL_SCHEMES.values(): scheme['data'] = scheme['purelib'] packages, data_files = [], [] root_dir = os.path.dirname(__file__) if root_dir != '': os.chdir(root_dir) enum_dir = 'django_enumfield_named_choices' for dirpath, dirnames, filenames in os.walk(enum_dir): if os.path.basename(dirpath).startswith("."): continue if '__init__.py' in filenames: packages.append('.'.join(fullsplit(dirpath))) elif filenames: data_files.append( [dirpath, [os.path.join(dirpath, f) for f in filenames]]) version = __import__('django_enumfield_named_choices').__version__ setup( name="django-enumfield-named-choices", version=version, description="Custom Django field for using enumerations of named constants", long_description=open(os.path.join(os.path.dirname(__file__),
# -*- coding: utf-8 -*- #研究python os walk功能 """ Created on Fri Dec 1 09:49:31 2017 @author: vizance """ import os search_path = "C:\\Users\\vizance\\Desktop\\Python相關資料" for dirPath, dirNames, fileNames in os.walk(search_path): #dirPath資料夾路徑名稱;dirNames是資料夾名稱的list;fileNames是檔案名稱的list print("dirpath→{}".format(dirPath)) for direlement in dirNames: print("dirNames→{}".format(dirNames)) if fileNames: for file in fileNames: print(os.path.join(dirPath, file))
import os from zipfile import ZipFile this_dir = os.path.dirname(os.path.abspath(__file__)) if not os.path.exists(os.path.join(this_dir, "_build")): os.makedirs(os.path.join(this_dir, "_build")) for this_dir, dirs, files in os.walk(this_dir): for d in dirs: if d not in ["build", "_build", "__pycache__"]: with ZipFile(os.path.join("_build", d + ".zip"), "w") as zf: zf.write(os.path.join(this_dir, d, "LICENSE.txt"), "LICENSE.txt") zf.write(os.path.join(this_dir, d, d + ".py"), d + ".py") zf.write(os.path.join(this_dir, d, d + ".xlsm"), d + ".xlsm") if d == "database": zf.write( os.path.join(this_dir, d, "chinook.sqlite"), "chinook.sqlite" )
return lis sample_informations = {} with open(samples_informations_file, 'r') as f: for line in f: if line.startswith('SRR'): line = map(str.strip, line.split(',')) sample_informations.setdefault(line[0], line[1]) cwd = filter(os.path.isdir, os.listdir(os.getcwd())) all_available_sites = [] sample_edited_sites = {} for directory in cwd: if directory.startswith('SRR'): path = list(os.walk(directory + '/editing/')) table = path[1][0] + '/' + path[1][-1][-1] with open(table, 'r') as a: for line in a: if line.startswith('chr'): s = map(str.strip, line.split("\t")) if s[7] == 'AG': site, freq, coverage = s[0] + "_" + s[1], s[8], s[4] freq_gnum_cov = '%s^%s^%s' % (s[8], eval( s[6])[2], s[4]) if site not in all_available_sites: all_available_sites.append(site) if (int(coverage) >= min_coverage) and ( float(freq) >= min_edit_frequency): sample_edited_sites.setdefault( (directory, site), []).append(
def handle(self, **options): target = options.pop('directory') # if some directory is given, make sure it's nicely expanded top_dir = path.abspath(path.expanduser(target)) if not path.exists(top_dir): raise CommandError("Destination directory '%s' does not " "exist, please init first." % top_dir) if not path.exists(path.join(top_dir, 'manage.py')): raise CommandError("Current directory '%s' is not " "a django project dir, please init first. " "(bk-admin init ${app_code})" % top_dir) base_subdir = 'wxapp_template' append_file_tuple = (('', 'requirements.txt'), ) # Setup a stub settings environment for template rendering if not settings.configured: settings.configure() django.setup() template_dir = path.join(blueapps.__path__[0], 'conf', base_subdir) run_ver = None conf_file = open(path.join(os.getcwd(), 'config', '__init__.py')) for line in conf_file.readlines(): if line.startswith('RUN_VER'): run_ver = line[11:-2] conf_file.close() if run_ver != u'ieod': self.stderr.write( "Error: Currently only ieod version is supported. " "Your version is %s" % run_ver) sys.exit(-1) prefix_length = len(template_dir) + 1 for root, dirs, files in os.walk(template_dir): relative_dir = root[prefix_length:] target_dir = path.join(top_dir, relative_dir) if not path.exists(target_dir): os.mkdir(target_dir) flag = root.endswith('sites') for dirname in dirs[:]: if (dirname.startswith('.') or # noqa dirname == '__pycache__' or # noqa (flag and dirname != run_ver)): dirs.remove(dirname) for filename in files: if filename.endswith(('.pyo', '.pyc', '.py.class', '.json')): # Ignore some files as they cause various breakages. if filename != u'app.json': continue old_path = path.join(root, filename) new_path = path.join(top_dir, relative_dir, filename) for old_suffix, new_suffix in self.rewrite_template_suffixes: if new_path.endswith(old_suffix): new_path = new_path[:-len(old_suffix)] + new_suffix break # Only rewrite once with io.open(old_path, 'rb') as template_file: content = template_file.read() w_mode = 'wb' for _root, _filename in append_file_tuple: if _root == relative_dir and _filename == filename: w_mode = 'ab' with io.open(new_path, w_mode) as new_file: new_file.write(content) try: shutil.copymode(old_path, new_path) self.make_writeable(new_path) except OSError: self.stderr.write( "Notice: Couldn't set permission bits on %s. You're " "probably using an uncommon filesystem setup. No " "problem." % new_path, self.style.NOTICE)
import os, shutil path = raw_input("Enter path: ") for root, dirs, files in os.walk(path): for name in files: fullpath = os.path.join(root, name) shutil.copy(fullpath, "/Users/elon/Desktop/python midi scripts/old/new2")
def submit(): form = MyForm() if form.validate_on_submit(): genre = form.dropdown.data file = form.file.data if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(filename)) else: flash('mp3 file format is required') return redirect('/') if genre == 'Pop': directory = 'music/Pop' index = joblib.load('index_pop.pkl') elif genre == 'Hiphop': directory = 'music/Hiphop' index = joblib.load('index_hiphop.pkl') elif genre == 'Folk': directory = 'music/Folk' index = joblib.load('index_folk.pkl') elif genre == 'Rock': directory = 'music/Rock' index = joblib.load('index_rock.pkl') else: directory = 'music' index = joblib.load('index_all.pkl') path_f = [] for d, dirs, files in os.walk(directory): audio = filter(lambda x: x.endswith('.mp3'), files) for f in audio: path = os.path.join(d, f) # формирование адреса path_f.append(path) # добавление адреса в список # print(path_f) def read_and_resample(path, sample_rate): # read and resample to 22KHz y, sr = librosa.load(path, sr=sample_rate) # print(f"{path}") return y sample_rate = 22050 # reading request audio request_data = read_and_resample(filename, sample_rate) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(request_data, sr=sample_rate, n_mels=128) neighborhood_size = 10 # sec/sample - constant for all files wav = request_data time_resolution = (wav.shape[0] / sample_rate) / S.shape[1] # print("Time resolution:", time_resolution) def form_constellation(wav, sample_rate): S = librosa.feature.melspectrogram(wav, sr=sample_rate, n_mels=256, fmax=4000) S = librosa.power_to_db(S, ref=np.max) # get local maxima Sb = maximum_filter(S, neighborhood_size) == S Sbd, num_objects = ndimage.label(Sb) objs = ndimage.find_objects(Sbd) points = [] for dy, dx in objs: x_center = (dx.start + dx.stop - 1) // 2 y_center = (dy.start + dy.stop - 1) // 2 if (dx.stop - dx.start) * (dy.stop - dy.start) == 1: points.append((x_center, y_center)) # print(len(points)) return sorted(points) request_constellation = form_constellation(request_data, sample_rate) target = (int(1 / time_resolution), int(3 / time_resolution), -30, 30) # start, end, Hz low, Hz high def build_constellation_index(constellation_collection, target): result_index = {} for name, points in constellation_collection.items(): # print(name) for point in points: f1 = point[1] tg = [p for p in points if point[0] + target[0] <= p[0] < point[0] + target[1] and point[1] + target[2] <= p[1] < point[1] + target[3] ] for p in tg: f2 = p[1] dt = p[0] - point[0] t = p[0] if (f1, f2, dt) in result_index: result_index[(f1, f2, dt)].append((t, name)) else: result_index[(f1, f2, dt)] = [(t, name)] return result_index request = build_constellation_index({filename: request_constellation}, target) # print(path_f) times = dict((name, []) for name in path_f) for key, v in request.items(): if key in index: for t_r, name_r in v: for pair in index[key]: t_i, name_i = pair times[name_i].append(t_i - t_r) # print(times) result = [] for name, matches in times.items(): if matches: result.append((name, max(matches))) # print(result) result_sorted = sorted(result, key=lambda x: x[1], reverse=True) output = result_sorted[0][0] output1 = output.split('/') output2 = output1[2].split('.mp3') final_result = output2[0] # print(final_result) return redirect(url_for('result', result=final_result)) return render_template('submit.html', form=form)
def find_files(pattern='*.ps*', root=TEST_ROOT): for root, dirnames, filenames in os.walk(root): for filename in fnmatch.filter(filenames, pattern): yield os.path.join(root, filename)
package_info['package_data'][PACKAGENAME].append('data/*') # Define entry points for command-line scripts entry_points = {'console_scripts': []} if conf.has_section('entry_points'): entry_point_list = conf.items('entry_points') for entry_point in entry_point_list: entry_points['console_scripts'].append('{0} = {1}'.format( entry_point[0], entry_point[1])) # Include all .c files, recursively, including those generated by # Cython, since we can not do this in MANIFEST.in with a "dynamic" # directory name. c_files = [] for root, dirs, files in os.walk(PACKAGENAME): for filename in files: if filename.endswith('.c'): c_files.append( os.path.join(os.path.relpath(root, PACKAGENAME), filename)) package_info['package_data'][PACKAGENAME].extend(c_files) # Note that requires and provides should not be included in the call to # ``setup``, since these are now deprecated. See this link for more details: # https://groups.google.com/forum/#!topic/astropy-dev/urYO8ckB2uM setup(name=PACKAGENAME, version=VERSION, description=DESCRIPTION, scripts=scripts, install_requires=[
or (c >= 'A' and c <= 'Z')): l.append(c) return ''.join(l) def siftOnGraph(imgName): print("Processing {0} ...".format(imgName)) img_data_color = cv2.imread(imgName, cv2.IMREAD_COLOR) img_data = cv2.imread(imgName, cv2.IMREAD_GRAYSCALE) dataImgL = getIMGpyramid(img_data) KPDESlist = [] for j in range(len(dataImgL)): img_data = dataImgL[j] kp2, des2 = getKPandDES(img_data, 200) kp2list = [] for k in range(len(kp2)): kp2list.append((kp2[k].pt[0], kp2[k].pt[1], kp2[k].size)) KPDESlist.append({"kp": kp2list, "des": des2, "gsize": img_data.shape}) pkname = os.path.join("./dataset", getValidFileName(imgName)) pkfile = open(pkname + ".pkl", "wb") ob = {"img": img_data_color, "filename": imgName, "KPDESlist": KPDESlist} pk.dump(ob, pkfile) pkfile.close() if __name__ == "__main__": rootPath = "./dataset" for dirpath, dirnames, filenames in os.walk(rootPath): for filename in filenames: filename = os.path.join(dirpath, filename) siftOnGraph(filename)
join("$PLATFORMFW_DIR", "system", "libsam")) env.VariantDirWrap( join("$BUILD_DIR", "FrameworkArduinoInc"), join("$PLATFORMFW_DIR", "cores", "${BOARD_OPTIONS['build']['core']}")) env.Append(CPPPATH=[ join("$BUILD_DIR", "FrameworkCMSISInc"), join("$BUILD_DIR", "FrameworkLibSam"), join("$BUILD_DIR", "FrameworkLibSam", "include"), join("$BUILD_DIR", "FrameworkDeviceInc"), join("$BUILD_DIR", "FrameworkDeviceInc", "sam3xa", "include") ]) # search relative includes in lib SAM directories core_dir = join(env.subst("$PLATFORMFW_DIR"), "system", "libsam") for root, _, files in walk(core_dir): for lib_file in files: file_path = join(root, lib_file) if not isfile(file_path): continue content = None content_changed = False with open(file_path) as fp: content = fp.read() if '#include "../' in content: content_changed = True content = content.replace('#include "../', '#include "') if not content_changed: continue with open(file_path, "w") as fp: fp.write(content)
def add(): form = MyForm() if form.validate_on_submit(): genre = form.dropdown.data file = form.file.data if genre == 'Pop': directory = 'music/Pop' elif genre == 'Hiphop': directory = 'music/Hiphop' elif genre == 'Folk': directory = 'music/Folk' elif genre == 'Rock': directory = 'music/Rock' else: directory = 'music' if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(directory + '/' + filename)) else: flash('mp3 file format is required') return redirect('/add') path_f = [] for d, dirs, files in os.walk(directory): audio = filter(lambda x: x.endswith('.mp3'), files) for f in audio: path = os.path.join(d, f) # формирование адреса path_f.append(path) # добавление адреса в список # print(path_f) def read_and_resample(path, sample_rate): # read and resample to 22KHz y, sr = librosa.load(path, sr=sample_rate) # print(f"{path}") return y dataset = {} sample_rate = 22050 # reading all audios for path in path_f: dataset[path] = read_and_resample(path, sample_rate) y = dataset[directory + '/' + filename] # # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sample_rate, n_mels=128) neighborhood_size = 10 # sec/sample - constant for all files wav = dataset[directory + '/' + filename] time_resolution = (wav.shape[0] / sample_rate) / S.shape[1] # print("Time resolution:", time_resolution) def form_constellation(wav, sample_rate): S = librosa.feature.melspectrogram(wav, sr=sample_rate, n_mels=256, fmax=4000) S = librosa.power_to_db(S, ref=np.max) # get local maxima Sb = maximum_filter(S, neighborhood_size) == S Sbd, num_objects = ndimage.label(Sb) objs = ndimage.find_objects(Sbd) points = [] for dy, dx in objs: x_center = (dx.start + dx.stop - 1) // 2 y_center = (dy.start + dy.stop - 1) // 2 if (dx.stop - dx.start) * (dy.stop - dy.start) == 1: points.append((x_center, y_center)) # print(len(points)) return sorted(points) constellations = {} for name, wav in dataset.items(): constellations[name] = form_constellation(wav, sample_rate) target = (int(1 / time_resolution), int(3 / time_resolution), -30, 30) # start, end, Hz low, Hz high def build_constellation_index(constellation_collection, target): result_index = {} for name, points in constellation_collection.items(): # print(name) for point in points: f1 = point[1] tg = [p for p in points if point[0] + target[0] <= p[0] < point[0] + target[1] and point[1] + target[2] <= p[1] < point[1] + target[3] ] for p in tg: f2 = p[1] dt = p[0] - point[0] t = p[0] if (f1, f2, dt) in result_index: result_index[(f1, f2, dt)].append((t, name)) else: result_index[(f1, f2, dt)] = [(t, name)] return result_index index = build_constellation_index(constellations, target) if genre == 'Pop': joblib.dump(index, 'index_pop.pkl') elif genre == 'Hiphop': joblib.dump(index, 'index_hiphop.pkl') elif genre == 'Folk': joblib.dump(index, 'index_folk.pkl') elif genre == 'Rock': joblib.dump(index, 'index_rock.pkl') else: joblib.dump(index, 'index_all.pkl') # rebuilding index for all songs directory_all = 'music' path_all = [] for d, dirs, files in os.walk(directory_all): audio = filter(lambda x: x.endswith('.mp3'), files) for f in audio: path = os.path.join(d, f) # формирование адреса path_f.append(path) # добавление адреса в список # print(path_all) dataset_all = {} # reading all audios for path in path_all: dataset_all[path] = read_and_resample(path, sample_rate) constellations_all = {} for name, wav in dataset_all.items(): constellations_all[name] = form_constellation(wav, sample_rate) index_all = build_constellation_index(constellations_all, target) joblib.dump(index_all, 'index_all.pkl') flash('File was successfully added to the database') return redirect('/') return render_template('add.html', form=form)
if os.path.isdir(args.input) and os.path.isdir(args.output): input = args.input # Set the current workspace output = args.output # Results are saved in the following directory else: raise argparse.ArgumentTypeError(f"input or output is not a valid path") if isinstance(args.cpu, numbers.Integral): nbrOfCpus = args.cpu # Set the number of CUPs else: raise argparse.ArgumentTypeError(f"Number of CUPs should be an integer number") # find all of the DEM files in the input folder DEMs = [] for root, dirs, files in os.walk(input): for file in files: if file.endswith(".tif"): DEMs.append(file) # methods used for calculating surface area methods = ['WA9', 'li', 'biLi4', 'biQuad9', 'biCub16'] # Declare variables to keep track of time for each method to compare the efficiency of each method Time = namedtuple('Time', methods) for dem in DEMs: # surface area rasters are calculated for each input DEM print(f'Calculating surface area for {dem} ...') # create a raster object to get its properties and numpy arrays inputRasterObj = Raster(f'{input}/{dem}') xres, yres = inputRasterObj.xres, inputRasterObj.yres
#!/usr/bin/env python from distutils.core import setup import re import os base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'fab_deploy2')) data_files = [] for dirpath, dirnames, filenames in os.walk(os.path.join(base_path, 'default-configs')): # Ignore dirnames that start with '.' for i, dirname in enumerate(dirnames): if dirname.startswith('.'): del dirnames[i] files = [os.path.join(dirpath, f)[len(base_path)+1:] \ for f in filenames if not f.endswith('.pyc')] data_files.extend(files) setup( name = 'red-fab-deploy2', packages=[ 'fab_deploy2', 'fab_deploy2.base', 'fab_deploy2.local', 'fab_deploy2.joyent', 'fab_deploy2.operating_systems', 'fab_deploy2.operating_systems.ubuntu', 'fab_deploy2.operating_systems.redhat', 'fab_deploy2.operating_systems.smartos', 'fab_deploy2.joyent', 'fab_deploy2.joyent.smartos', 'fab_deploy2.joyent.ubuntu', 'fab_deploy2.amazon',
# read existing csv data to string for searching for existing files csvData = "" with open(labelsFileName, 'r') as labelsFile: csvData = labelsFile.read() # Prepare terminal for single character input fd = sys.stdin.fileno() oldterm = termios.tcgetattr(fd) newattr = termios.tcgetattr(fd) newattr[3] = newattr[3] & ~termios.ICANON & ~termios.ECHO termios.tcsetattr(fd, termios.TCSANOW, newattr) # iterate over all images found in imageDirectory count = 0; try: for root, dirs, files in os.walk(imageDirectory): for f in files: if (f.endswith("jpg") or f.endswith("jpeg")): count += 1 if (csvData.find(f) != -1): print "skipping tagged image " + f else: # print out image path image = root + "/" + f imageRel = image[len(imageDirectory):] print "***************************************" print "Image #", count print imageRel os.system(pictureViewer + " " + image + " 2>/dev/null &") # display available labels
IMAGES_PER_GPU = 1 config = InferenceConfig() # Create model object in inference mode. model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config) # Load weights trained on MS-COCO # model_path = model.find_last() model_path = os.path.join(ROOT_DIR, "logs/coco_vrepall_1002.h5") # model.load_weights(COCO_MODEL_PATH, by_name=True) model.load_weights(model_path, by_name=True) class_names = ['BG', 'SlidingDoor', 'Wall', 'Shelf', 'Robot', 'Human', 'ConveyorBelt', 'Dockstation', 'Product'] # Load a random image from the images folder file_names = next(os.walk(IMAGE_DIR))[2] image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names))) # calculate time cost for detection, a is start time, b is end time a = time.time() # Run detection # if verbose == 1, more information print on terminal results = model.detect([image], verbose=1) b = time.time() load_detect_cost = b - a # Visualize results print("----------------------------------------------------------------------") print("Loading weights from ", model_path) print("Load and Detection time for this image is %.3f seconds" % load_detect_cost ) r = results[0]
def process(self, dataSource, progressBar): # we don't know how much work there is yet progressBar.switchToIndeterminate() self.log(Level.INFO,dataSource.getUniquePath()) # Use blackboard class to index blackboard artifacts for keyword search blackboard = Case.getCurrentCase().getServices().getBlackboard() self.art_contacts = self.create_artifact_type("Labcif-MSTeams_CONTACTS_"," Contacts", blackboard) self.art_messages = self.create_artifact_type("Labcif-MSTeams_MESSAGES_"," MESSAGES", blackboard) self.art_messages_reacts = self.create_artifact_type("Labcif-MSTeams_MESSAGES_REACTS"," REACTS", blackboard) self.art_messages_files = self.create_artifact_type("Labcif-MSTeams_MESSAGES_FILES"," FILES", blackboard) self.art_call = self.create_artifact_type("Labcif-MSTeams_CALLS_", " Call history", blackboard) self.art_call_one_to_one = self.create_artifact_type("Labcif-MSTeams_CALLS_ONE_TO_ONE", " Call history one to one", blackboard) self.art_teams = self.create_artifact_type("Labcif-MSTeams_TEAMS_"," Teams", blackboard) # contactos self.att_name = self.create_attribute_type('Labcif-MSTeams_CONTACT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Name", blackboard) self.att_email = self.create_attribute_type('Labcif-MSTeams_CONTACT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Email", blackboard) self.att_orgid = self.create_attribute_type('Labcif-MSTeams_CONTACT_ORGID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Orgid", blackboard) self.att_user_contacts = self.create_attribute_type('Labcif-MSTeams_USERNAME_CONTACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_contacts = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CONTACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # reacts self.att_message_id_reacts = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard) self.att_sender_name_react = self.create_attribute_type('Labcif-MSTeams_MESSAGE_SENDER_NAME_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Who reacted", blackboard) self.att_reacted_with = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_LOCAL_EMOJI_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Emoji", blackboard) self.att_react_time= self.create_attribute_type('Labcif-MSTeams_MESSAGE_REACT_TIME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "React time", blackboard) self.att_user_message_reacts = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_reacts = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # mensagens self.att_message_id = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard) self.att_message = self.create_attribute_type('Labcif-MSTeams_MESSAGE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message", blackboard) self.att_sender_name = self.create_attribute_type('Labcif-MSTeams_SENDER', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Senders name", blackboard) self.att_time = self.create_attribute_type('Labcif-MSTeams_TIME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message time", blackboard) self.att_cvid = self.create_attribute_type('Labcif-MSTeams_CONVERSATION_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "CV", blackboard) self.att_user_message = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_message = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_MESSAGES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # ficheiros self.att_message_id_files = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard) self.att_file_name = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "File name", blackboard) self.att_file_local = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_LINK', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "File Link", blackboard) self.att_user_message_files = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE_FILES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_files = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_FILES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # calls one to one self.att_date_start_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_TIME_START', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one time start", blackboard) self.att_date_finish_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_TIME_FINISH', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one time finish", blackboard) self.att_creator_name_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Creator Name", blackboard) self.att_creator_email_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Creator Email", blackboard) self.att_participant_name_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Participant Name", blackboard) self.att_participant_email_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_PARTICIPANT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Participant Email", blackboard) self.att_state_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_STATE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one state", blackboard) self.att_user_calls_one_to_one = self.create_attribute_type('Labcif-MSTeams_USERNAME_CALLS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_calls_one_to_one = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CALLS_ONE_TO_ONE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # teams self.att_cv_id_teams = self.create_attribute_type('Labcif-MSTeams_CV_ID_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Conversation ID teams", blackboard) self.att_creator_name_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Creator Name", blackboard) self.att_creator_email_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Creator Email", blackboard) self.att_participant_name_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Participant Name", blackboard) self.att_participant_email_teams = self.create_attribute_type('Labcif-MSTeams_teams_PARTICIPANT_EMAIL_ONE_TO_ONE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Participant Email", blackboard) self.att_user_teams = self.create_attribute_type('Labcif-MSTeams_USERNAME_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_teams = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # calls self.att_date = self.create_attribute_type('Labcif-MSTeams_CALL_DATE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call Date", blackboard) self.att_creator_name = self.create_attribute_type('Labcif-MSTeams_CALL_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Creator Name", blackboard) self.att_creator_email = self.create_attribute_type('Labcif-MSTeams_CALL_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Creator Email", blackboard) self.att_count_people_in = self.create_attribute_type('Labcif-MSTeams_CALL_AMOUNT_PEOPLE_IN', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Amount of people in call", blackboard) self.att_duration = self.create_attribute_type('Labcif-MSTeams_CALL_DURANTION', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call Duration", blackboard) self.att_participant_name = self.create_attribute_type('Labcif-MSTeams_CALL_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Participant Name", blackboard) self.att_participant_email = self.create_attribute_type('Labcif-MSTeams_CALL_PARTICIPANT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Participant Email", blackboard) self.att_user_calls = self.create_attribute_type('Labcif-MSTeams_USERNAME_CALLS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard) self.att_folder_extract_calls = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CALL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) # For our example, we will use FileManager to get all # files with the word "test" # in the name and then count and read them # FileManager API: http://sleuthkit.org/autopsy/docs/api-docs/latest/classorg_1_1sleuthkit_1_1autopsy_1_1casemodule_1_1services_1_1_file_manager.html fileManager = Case.getCurrentCase().getServices().getFileManager() files = fileManager.findFiles(dataSource, "%.ldb","https_teams.microsoft.com_") numFiles = len(files) progressBar.switchToDeterminate(numFiles) fileCount = 0 for file in files: # Check if the user pressed cancel while we were busy if self.context.isJobCancelled(): return IngestModule.ProcessResult.OK fileCount += 1 # Make an artifact on the blackboard. TSK_INTERESTING_FILE_HIT is a generic type of # artfiact. Refer to the developer docs for other examples. src = file.getParentPath() pathSplited=src.split("/") user=pathSplited[2] if user not in users: users.append(user) buffer = jarray.zeros(file.getSize(), "b") file.read(buffer,0,file.getSize()) if "lost" not in src and "Roaming" in file.getParentPath() and "ProjetoEI" not in file.getParentPath(): if src not in paths: tm = datetime.fromtimestamp(math.floor(tim.time())).strftime("%m-%d-%Y_%Hh-%Mm-%Ss") paths[src]="Analysis_Autopsy_LDB_{}_{}".format(user,tm) if not os.path.exists(os.path.join(projectEIAppDataPath,paths[src])): try: os.mkdir(os.path.join(projectEIAppDataPath,paths[src])) except OSError: print("Creation of the directory %s failed" % os.path.join(projectEIAppDataPath,paths[src])) else: print("Successfully created the directory %s " % os.path.join(projectEIAppDataPath,paths[src])) f = open(os.path.join(os.path.join(projectEIAppDataPath,paths[src]),file.getName()),"wb") f.write(buffer.tostring()) f.close() # try: # # index the artifact for keyword search # blackboard.indexArtifact(art) # except Blackboard.BlackboardException as e: # self.log(Level.SEVERE, "Error indexing artifact " + art.getDisplayName()+str(e)) # To further the example, this code will read the contents of the file and count the number of bytes # Update the progress bar progressBar.progress(fileCount) for src, path in paths.items(): complementaryFiles=fileManager.findFilesByParentPath(dataSource.getId(),src) for file in complementaryFiles: if "lost" not in file.getParentPath() and ".ldb" not in file.getName() and "lost" not in file.getName() and "Roaming" in file.getParentPath() and "ProjetoEI" not in file.getParentPath(): if file.getName() == "." or file.getName() == ".." or "-slack" in file.getName(): continue buffer = jarray.zeros(file.getSize(), "b") if src not in paths: tm = datetime.fromtimestamp(math.floor(tim.time())).strftime("%m-%d-%Y_%Hh-%Mm-%Ss") paths[src] = "Analysis_Autopsy_LDB_{}_{}".format(user,tm) if not os.path.exists(os.path.join(projectEIAppDataPath,paths[src])): try: os.mkdir(os.path.join(projectEIAppDataPath,paths[src])) except OSError: print("Creation of the directory %s failed" % os.path.join(projectEIAppDataPath,paths[src])) else: print("Successfully created the directory %s " % os.path.join(projectEIAppDataPath,paths[src])) try: f = open(os.path.join(os.path.join(projectEIAppDataPath,paths[src]),file.getName()),"a") file.read(buffer,0,file.getSize()) f.write(buffer.tostring()) f.close() except : self.log(Level.INFO,"File Crash") pathModule = os.path.realpath(__file__) indexCutPath=pathModule.rfind("\\") pathModule=pathModule[0:indexCutPath+1] # message = IngestMessage.createMessage( # IngestMessage.MessageType.DATA, Labcif-MSTeamsFactory.moduleName, # str(self.filesFound) + " files found") analysisPath = "" result = {} for key,value in paths.items(): if key not in result: result[key] = value for key, value in result.items(): p = subprocess.Popen([r"{}EI\EI.exe".format(pathModule),"--pathToEI",r"{}EI\ ".format(pathModule), "-a", value],stderr=subprocess.PIPE) out = p.stderr.read() self.log(Level.INFO, out) p.wait() # os.system("cmd /c \"{}EI\\EI.exe\" --pathToEI \"{}EI\\\" -a {}".format(pathModule,pathModule,value)) results=[] pathResults="Analise Autopsy" for u in users: pathLDB="" for key,value in paths.items(): if "Analysis_Autopsy_LDB_{}".format(u) in value: pathLDB=value break for root, dirs, files in os.walk(projectEIAppDataPath, topdown=False): for name in dirs: if pathResults in name and os.stat(os.path.join(projectEIAppDataPath,pathLDB)).st_mtime < os.stat(os.path.join(projectEIAppDataPath,name)).st_mtime: pathsLDB[pathLDB]=os.path.join(projectEIAppDataPath,name) results.append(os.path.join(projectEIAppDataPath,name)) f = open(os.path.join(projectEIAppDataPath,"filesToReport.txt"),"w") for r in results: for files in os.walk(r,topdown=False): for name in files: for fileName in name: if ".csv" in fileName or ".html" in fileName or ".css" in fileName: f.write(os.path.join(r,fileName)+"\n") f.close() f = open(os.path.join(projectEIAppDataPath,"filesToReport.txt"), "r") for line in f: line = line.replace("\n","") pathExtract="" if ".csv" in line: # ok if "EventCall" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_call.getTypeID()) dura=str(int(float(row[4]))) art.addAttribute(BlackboardAttribute(self.att_date, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_creator_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_creator_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_count_people_in, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_duration, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName,dura )) art.addAttribute(BlackboardAttribute(self.att_participant_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5]))) art.addAttribute(BlackboardAttribute(self.att_participant_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[6]))) art.addAttribute(BlackboardAttribute(self.att_user_calls, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[7]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_calls, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() # ok elif "Conversations" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_teams.getTypeID()) art.addAttribute(BlackboardAttribute(self.att_cv_id_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_creator_name_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_creator_email_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_participant_name_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_participant_email_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4]))) art.addAttribute(BlackboardAttribute(self.att_user_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() # ok elif "CallOneToOne" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_call_one_to_one.getTypeID()) art.addAttribute(BlackboardAttribute(self.att_date_start_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_date_finish_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_creator_name_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_creator_email_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_participant_name_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4]))) art.addAttribute(BlackboardAttribute(self.att_participant_email_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5]))) art.addAttribute(BlackboardAttribute(self.att_state_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[6]))) art.addAttribute(BlackboardAttribute(self.att_user_calls_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[7]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_calls_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() elif "Files" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_messages_files.getTypeID()) art.addAttribute(BlackboardAttribute(self.att_message_id_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_file_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_file_local, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_user_message_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() elif "Mensagens" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break idMessage="" message="" sender="" timee="" cvid="" userMessage="" with open(line) as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: self.log(Level.INFO,str(row)) if rowcount!=0: if len(row) == 1: row = row[0].split(";") idMessage=str(row[0]) message=str(row[1]) timee=str(row[2]) sender=str(row[3]) cvid=str(row[4]) userMessage=str(row[5]) else: partOne = row[0].split(";") idMessage=str(partOne[0]) lastPart=row[len(row)-1].split(";") timee=str(lastPart[1]) sender=str(lastPart[2]) cvid=str(lastPart[3]) userMessage=str(lastPart[4]) message=str(partOne[1])+"," if len(row)!=2: for x in range(1,len(row)-1): message+=str(row[x])+"," message+=str(lastPart[0]) art = dataSource.newArtifact(self.art_messages.getTypeID()) art.addAttribute(BlackboardAttribute(self.att_message_id, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, idMessage)) art.addAttribute(BlackboardAttribute(self.att_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, message)) art.addAttribute(BlackboardAttribute(self.att_sender_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, sender)) art.addAttribute(BlackboardAttribute(self.att_time, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, timee)) art.addAttribute(BlackboardAttribute(self.att_cvid, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, cvid)) art.addAttribute(BlackboardAttribute(self.att_user_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, userMessage)) art.addAttribute(BlackboardAttribute(self.att_folder_extract_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() elif "Reacts" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_messages_reacts.getTypeID()) try: art.addAttribute(BlackboardAttribute(self.att_message_id_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_reacted_with, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_sender_name_react, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_react_time, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_user_message_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: pass else: pass except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() elif "Contactos.csv" in line: rowcount=0 for key,value in pathsLDB.items(): if value in line: for k,v in paths.items(): if v == key: pathExtract=k break with io.open(line,encoding="utf-8") as csvfile: reader = csv.reader(x.replace('\0', '') for x in csvfile) for row in reader: # each row is a list try: row = row[0].split(";") if rowcount!=0: art = dataSource.newArtifact(self.art_contacts.getTypeID()) art.addAttribute(BlackboardAttribute(self.att_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0]))) art.addAttribute(BlackboardAttribute(self.att_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1]))) art.addAttribute(BlackboardAttribute(self.att_orgid, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2]))) art.addAttribute(BlackboardAttribute(self.att_user_contacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3]))) art.addAttribute(BlackboardAttribute(self.att_folder_extract_contacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract)) except: self.log(Level.INFO,"File empty") rowcount+=1 csvfile.close() rowcount=0 #Post a message to the ingest messages in box. message = IngestMessage.createMessage(IngestMessage.MessageType.DATA, "Sample Jython Data Source Ingest Module", "Please run MSTeams Report") IngestServices.getInstance().postMessage(message) return IngestModule.ProcessResult.OK
def find_files(pattern, root=os.curdir): for path, dirs, files in os.walk(os.path.abspath(root)): for filename in fnmatch.filter(files, pattern): yield os.path.join(path, filename)
include_dirs=include_dirs, libraries=libraries) datafiles = [] def getext(filename): os.path.splitext(filename)[1] for datadir in datadirs: datafiles.extend( [(root, [os.path.join(root, f) for f in files if getext(f) in dataexts]) for root, dirs, files in os.walk(datadir)]) detected_docs = [] for docname in standard_docs: for ext in standard_doc_exts: filename = "".join((docname, ext)) if os.path.isfile(filename): detected_docs.append(filename) datafiles.append(('.', detected_docs)) init_py_path = os.path.join(libname, '__init__.py') version = '0.0.unknown' try: with open(init_py_path) as f: for line in f: if line.startswith('__version__'):
#!/usr/bin/python import os.path import os whitelist = ["../../ssd/testing/codefile"] rootDir = "../../" for d, _, fl in os.walk(rootDir): if d.find("/tool") >= 0: continue for fn in fl: ext = os.path.splitext(d + "/" + fn) if ext[1] in (".py"): # we don't want to process the _pb2 file if ext[0].endswith("_pb2"): continue if fn == "__init__.py": continue if ext[0] in whitelist: print "{} skipt since in whitelist".format(d + '/' + fn) continue # print "The file name is ", fn # print "Processing the file", fn content = None with open(d + "/" + fn) as f: content = f.read() # content = re.sub("HAL", "Hal", conetent) # delete the first line
mindiff = a[2][num].minTime - b[2][num].minTime else: mindiff = b[2][num].minTime - a[2][num].minTime if a[2][num].maxTime > b[2][num].maxTime: maxdiff = a[2][num].maxTime - b[2][num].maxTime else: maxdiff = b[2][num].maxTime - a[2][num].maxTime soidifferences.append(mindiff) soidifferences.append(maxdiff) #print (a[1][num].mark.strip(), b[1][num].mark.strip(), mindiff, maxdiff) totalsoidiff = sum(soidifferences) / len(soidifferences) return totalsoidiff #print ('Average difference for words:', totalworddiff) firstdir = [] for root, dirs, files in os.walk(tgdirectory1): for name in files: tgfile1 = os.path.join(root, name) if tgfile1.endswith('TextGrid'): firstdir.append(tgfile1) print (len(firstdir)) seconddir = [] for root, dirs, files in os.walk(tgdirectory2): for name in files: tgfile2 = os.path.join(root, name) if tgfile2.endswith('TextGrid'): seconddir.append(tgfile2) print (len(seconddir)) csv_columns = ['tg','Average difference for words','Average difference for phones', 'Average difference for SOI', 'Difference in silence counts']
def listSubFolders(folder): return [x[0] for x in os.walk(directory)][1:]
def build(self, run_epubcheck: bool, build_kobo: bool, build_kindle: bool, output_directory: Path, proof: bool, build_covers: bool) -> None: """ Entry point for `se build` """ # Check for some required tools if build_kindle: which_ebook_convert = shutil.which("ebook-convert") if which_ebook_convert: ebook_convert_path = Path(which_ebook_convert) else: # Look for default Mac calibre app path if none found in path ebook_convert_path = Path("/Applications/calibre.app/Contents/MacOS/ebook-convert") if not ebook_convert_path.exists(): raise se.MissingDependencyException("Couldn’t locate [bash]ebook-convert[/]. Is [bash]calibre[/] installed?") if run_epubcheck: if not shutil.which("java"): raise se.MissingDependencyException("Couldn’t locate [bash]java[/]. Is it installed?") # Check the output directory and create it if it doesn't exist try: output_directory = output_directory.resolve() output_directory.mkdir(parents=True, exist_ok=True) except Exception: raise se.FileExistsException(f"Couldn’t create output directory: [path][link=file://{output_directory}]{output_directory}[/][/].") # All clear to start building! metadata_xml = self.metadata_xml with tempfile.TemporaryDirectory() as temp_directory: work_directory = Path(temp_directory) work_epub_root_directory = work_directory / "src" copy_tree(self.path, str(work_directory)) try: shutil.rmtree(work_directory / ".git") except Exception: pass # By convention the ASIN is set to the SHA-1 sum of the book's identifying URL try: identifier = self.metadata_dom.xpath("//dc:identifier")[0].inner_xml().replace("url:", "") asin = sha1(identifier.encode("utf-8")).hexdigest() except: raise se.InvalidSeEbookException(f"Missing [xml]<dc:identifier>[/] element in [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/].") if not self.metadata_dom.xpath("//dc:title"): raise se.InvalidSeEbookException(f"Missing [xml]<dc:title>[/] element in [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/].") output_filename = identifier.replace("https://standardebooks.org/ebooks/", "").replace("/", "_") url_author = "" for author in self.metadata_dom.xpath("//dc:creator"): url_author = url_author + se.formatting.make_url_safe(author.inner_xml()) + "_" url_author = url_author.rstrip("_") epub_output_filename = f"{output_filename}{'.proof' if proof else ''}.epub" epub3_output_filename = f"{output_filename}{'.proof' if proof else ''}.epub3" kobo_output_filename = f"{output_filename}{'.proof' if proof else ''}.kepub.epub" kindle_output_filename = f"{output_filename}{'.proof' if proof else ''}.azw3" # Clean up old output files if any se.quiet_remove(output_directory / f"thumbnail_{asin}_EBOK_portrait.jpg") se.quiet_remove(output_directory / "cover.jpg") se.quiet_remove(output_directory / "cover-thumbnail.jpg") se.quiet_remove(output_directory / epub_output_filename) se.quiet_remove(output_directory / epub3_output_filename) se.quiet_remove(output_directory / kobo_output_filename) se.quiet_remove(output_directory / kindle_output_filename) # Are we including proofreading CSS? if proof: with open(work_epub_root_directory / "epub" / "css" / "local.css", "a", encoding="utf-8") as local_css_file: with importlib_resources.open_text("se.data.templates", "proofreading.css", encoding="utf-8") as proofreading_css_file: local_css_file.write(proofreading_css_file.read()) # Update the release date in the metadata and colophon if self.last_commit: last_updated_iso = regex.sub(r"\.[0-9]+$", "", self.last_commit.timestamp.isoformat()) + "Z" last_updated_iso = regex.sub(r"\+.+?Z$", "Z", last_updated_iso) # In the line below, we can't use %l (unpadded 12 hour clock hour) because it isn't portable to Windows. # Instead we use %I (padded 12 hour clock hour) and then do a string replace to remove leading zeros. last_updated_friendly = f"{self.last_commit.timestamp:%B %e, %Y, %I:%M <abbr class=\"time eoc\">%p</abbr>}".replace(" 0", " ") last_updated_friendly = regex.sub(r"\s+", " ", last_updated_friendly).replace("AM", "a.m.").replace("PM", "p.m.").replace(" <abbr", " <abbr") # Set modified date in content.opf self.metadata_xml = regex.sub(r"<meta property=\"dcterms:modified\">[^<]+?</meta>", f"<meta property=\"dcterms:modified\">{last_updated_iso}</meta>", self.metadata_xml) with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file: file.seek(0) file.write(self.metadata_xml) file.truncate() # Update the colophon with release info with open(work_epub_root_directory / "epub" / "text" / "colophon.xhtml", "r+", encoding="utf-8") as file: xhtml = file.read() xhtml = xhtml.replace("<p>The first edition of this ebook was released on<br/>", f"<p>This edition was released on<br/>\n\t\t\t<b>{last_updated_friendly}</b><br/>\n\t\t\tand is based on<br/>\n\t\t\t<b>revision {self.last_commit.short_sha}</b>.<br/>\n\t\t\tThe first edition of this ebook was released on<br/>") file.seek(0) file.write(xhtml) file.truncate() # Output the pure epub3 file se.epub.write_epub(work_epub_root_directory, output_directory / epub3_output_filename) # Now add epub2 compatibility. # Include compatibility CSS with open(work_epub_root_directory / "epub" / "css" / "core.css", "a", encoding="utf-8") as core_css_file: with importlib_resources.open_text("se.data.templates", "compatibility.css", encoding="utf-8") as compatibility_css_file: core_css_file.write(compatibility_css_file.read()) # Simplify CSS and tags total_css = "" # Simplify the CSS first. Later we'll update the document to match our simplified selectors. # While we're doing this, we store the original css into a single variable so we can extract the original selectors later. for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in fnmatch.filter(filenames, "*.css"): filename = Path(root) / filename_string with open(filename, "r+", encoding="utf-8") as file: css = file.read() # Before we do anything, we process a special case in core.css if filename.name == "core.css": css = regex.sub(r"abbr{.+?}", "", css, flags=regex.DOTALL) total_css = total_css + css + "\n" file.seek(0) file.write(se.formatting.simplify_css(css)) file.truncate() # Now get a list of original selectors # Remove @supports(){} total_css = regex.sub(r"@supports.+?{(.+?)}\s*}", "\\1}", total_css, flags=regex.DOTALL) # Remove CSS rules total_css = regex.sub(r"{[^}]+}", "", total_css) # Remove trailing commas total_css = regex.sub(r",", "", total_css) # Remove comments total_css = regex.sub(r"/\*.+?\*/", "", total_css, flags=regex.DOTALL) # Remove @ defines total_css = regex.sub(r"^@.+", "", total_css, flags=regex.MULTILINE) # Construct a dictionary of the original selectors selectors = {line for line in total_css.splitlines() if line != ""} # Get a list of .xhtml files to simplify for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in fnmatch.filter(filenames, "*.xhtml"): filename = (Path(root) / filename_string).resolve() # Don't mess with the ToC, since if we have ol/li > first-child selectors we could screw it up if filename.name == "toc.xhtml": continue with open(filename, "r+", encoding="utf-8") as file: # We have to remove the default namespace declaration from our document, otherwise # xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python xhtml = file.read().replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "") processed_xhtml = xhtml try: tree = etree.fromstring(str.encode(xhtml)) except Exception as ex: raise se.InvalidXhtmlException(f"Error parsing XHTML file: [path][link=file://{filename}]{filename}[/][/]. Exception: {ex}") # Now iterate over each CSS selector and see if it's used in any of the files we found for selector in selectors: try: # Add classes to elements that match any of our selectors to simplify. For example, if we select :first-child, add a "first-child" class to all elements that match that. for selector_to_simplify in se.SELECTORS_TO_SIMPLIFY: while selector_to_simplify in selector: # Potentially the pseudoclass we’ll simplify isn’t at the end of the selector, # so we need to temporarily remove the trailing part to target the right elements. split_selector = regex.split(fr"({selector_to_simplify}(\(.*?\))?)", selector, 1) target_element_selector = ''.join(split_selector[0:2]) replacement_class = split_selector[1].replace(":", "").replace("(", "-").replace("n-", "n-minus-").replace("n+", "n-plus-").replace(")", "") selector = selector.replace(split_selector[1], "." + replacement_class, 1) sel = se.easy_xml.css_selector(target_element_selector) for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES): current_class = element.get("class") if current_class is not None and replacement_class not in current_class: current_class = current_class + " " + replacement_class else: current_class = replacement_class element.set("class", current_class) except lxml.cssselect.ExpressionError: # This gets thrown if we use pseudo-elements, which lxml doesn't support pass except lxml.cssselect.SelectorSyntaxError as ex: raise se.InvalidCssException(f"Couldn’t parse CSS in or near this line: [css]{selector}[/]. Exception: {ex}") # We've already replaced attribute/namespace selectors with classes in the CSS, now add those classes to the matching elements if "[epub|type" in selector: for namespace_selector in regex.findall(r"\[epub\|type\~\=\"[^\"]*?\"\]", selector): sel = se.easy_xml.css_selector(namespace_selector) for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES): new_class = regex.sub(r"^\.", "", se.formatting.namespace_to_class(namespace_selector)) current_class = element.get("class", "") if new_class not in current_class: current_class = f"{current_class} {new_class}".strip() element.set("class", current_class) processed_xhtml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + etree.tostring(tree, encoding=str, pretty_print=True) # We do this round in a second pass because if we modify the tree like this, it screws up how lxml does processing later. # If it's all done in one pass, we wind up in a race condition where some elements are fixed and some not tree = etree.fromstring(str.encode(processed_xhtml)) for selector in selectors: try: sel = se.easy_xml.css_selector(selector) except lxml.cssselect.ExpressionError: # This gets thrown if we use pseudo-elements, which lxml doesn't support continue except lxml.cssselect.SelectorSyntaxError as ex: raise se.InvalidCssException(f"Couldn’t parse CSS in or near this line: [css]{selector}[/]. Exception: {ex}") # Convert <abbr> to <span> if "abbr" in selector: for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES): # Why would you want the tail to output by default?!? raw_string = etree.tostring(element, encoding=str, with_tail=False) # lxml--crap as usual--includes a bunch of namespace information in every element we print. # Remove it here. raw_string = raw_string.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "") raw_string = raw_string.replace(" xmlns:epub=\"http://www.idpf.org/2007/ops\"", "") raw_string = raw_string.replace(" xmlns:m=\"http://www.w3.org/1998/Math/MathML\"", "") # Now lxml doesn't let us modify the tree, so we just do a straight up regex replace to turn this into a span processed_string = raw_string.replace("<abbr", "<span") processed_string = processed_string.replace("</abbr", "</span") # Now we have a nice, fixed string. But, since lxml can't replace elements, we write it ourselves. processed_xhtml = processed_xhtml.replace(raw_string, processed_string) tree = etree.fromstring(str.encode(processed_xhtml)) # Now we just remove all stray abbr tags that were not styled by CSS processed_xhtml = regex.sub(r"</?abbr[^>]*?>", "", processed_xhtml) # Remove datetime="" attribute in <time> tags, which is not always understood by epubcheck processed_xhtml = regex.sub(r" datetime=\"[^\"]+?\"", "", processed_xhtml) tree = etree.fromstring(str.encode(processed_xhtml)) if processed_xhtml != xhtml: file.seek(0) file.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + etree.tostring(tree, encoding=str, pretty_print=True).replace("<html", "<html xmlns=\"http://www.w3.org/1999/xhtml\"")) file.truncate() # Done simplifying CSS and tags! # Extract cover and cover thumbnail cover_svg_file = work_epub_root_directory / "epub" / "images" / "cover.svg" if not os.path.isfile(cover_svg_file): raise se.MissingDependencyException("Cover image is missing. Did you run [bash]se build-images[/]?") svg2png(url=str(cover_svg_file), write_to=str(work_directory / "cover.png")) cover = Image.open(work_directory / "cover.png") cover = cover.convert("RGB") # Remove alpha channel from PNG if necessary cover.save(work_epub_root_directory / "epub" / "images" / "cover.jpg") (work_directory / "cover.png").unlink() if build_covers: shutil.copy2(work_epub_root_directory / "epub" / "images" / "cover.jpg", output_directory / "cover.jpg") shutil.copy2(cover_svg_file, output_directory / "cover-thumbnail.svg") # Path arguments must be cast to string svg2png(url=str(output_directory / "cover-thumbnail.svg"), write_to=str(work_directory / "cover-thumbnail.png")) cover = Image.open(work_directory / "cover-thumbnail.png") cover = cover.resize((COVER_THUMBNAIL_WIDTH, COVER_THUMBNAIL_HEIGHT)) cover = cover.convert("RGB") # Remove alpha channel from PNG if necessary cover.save(output_directory / "cover-thumbnail.jpg") (work_directory / "cover-thumbnail.png").unlink() (output_directory / "cover-thumbnail.svg").unlink() cover_svg_file.unlink() # Massage image references in content.opf metadata_xml = metadata_xml.replace("cover.svg", "cover.jpg") metadata_xml = metadata_xml.replace(".svg", ".png") metadata_xml = metadata_xml.replace("id=\"cover.jpg\" media-type=\"image/svg+xml\"", "id=\"cover.jpg\" media-type=\"image/jpeg\"") metadata_xml = metadata_xml.replace("image/svg+xml", "image/png") metadata_xml = regex.sub(r" properties=\"([^\"]*?)svg([^\"]*?)\"", r''' properties="\1\2"''', metadata_xml) # We may also have the `mathml` property metadata_xml = regex.sub(r" properties=\"([^\s]*?)\s\"", r''' properties="\1"''', metadata_xml) # Clean up trailing white space in property attributes introduced by the above line metadata_xml = regex.sub(r" properties=\"\s*\"", "", metadata_xml) # Remove any now-empty property attributes # Add an element noting the version of the se tools that built this ebook metadata_xml = regex.sub(r"<dc:publisher", f"<meta property=\"se:built-with\">{se.VERSION}</meta>\n\t\t<dc:publisher", metadata_xml) # Google Play Books chokes on https XML namespace identifiers (as of at least 2017-07) metadata_xml = metadata_xml.replace("https://standardebooks.org/vocab/1.0", "http://standardebooks.org/vocab/1.0") # Output the modified content.opf so that we can build the kobo book before making more epub2 compatibility hacks with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file: file.write(metadata_xml) file.truncate() # Recurse over xhtml files to make some compatibility replacements for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in filenames: filename = Path(root) / filename_string if filename.suffix == ".svg": # For night mode compatibility, give the titlepage a 1px white stroke attribute if filename.name in("titlepage.svg", "logo.svg"): with open(filename, "r+", encoding="utf-8") as file: svg = file.read() paths = svg # What we're doing here is faking the `stroke-align: outside` property, which is an unsupported draft spec right now. # We do this by duplicating all the SVG paths, and giving the duplicates a 2px stroke. The originals are directly on top, # so the 2px stroke becomes a 1px stroke that's *outside* of the path instead of being *centered* on the path border. # This looks much nicer, but we also have to increase the image size by 2px in both directions, and re-center the whole thing. if filename.name == "titlepage.svg": stroke_width = SVG_TITLEPAGE_OUTER_STROKE_WIDTH else: stroke_width = SVG_OUTER_STROKE_WIDTH # First, strip out non-path, non-group elements paths = regex.sub(r"<\?xml[^<]+?\?>", "", paths) paths = regex.sub(r"</?svg[^<]*?>", "", paths) paths = regex.sub(r"<title>[^<]+?</title>", "", paths) paths = regex.sub(r"<desc>[^<]+?</desc>", "", paths) # `paths` is now our "duplicate". Add a 2px stroke. paths = paths.replace("<path", f"<path style=\"stroke: #ffffff; stroke-width: {stroke_width}px;\"") # Inject the duplicate under the old SVG paths. We do this by only replacing the first regex match for <g> or <path> svg = regex.sub(r"(<g|<path)", f"{paths}\\1", svg, 1) # If this SVG specifies height/width, then increase height and width by 2 pixels and translate everything by 1px try: height = int(regex.search(r"<svg[^>]+?height=\"([0-9]+)\"", svg).group(1)) + stroke_width svg = regex.sub(r"<svg([^<]*?)height=\"[0-9]+\"", f"<svg\\1height=\"{height}\"", svg) width = int(regex.search(r"<svg[^>]+?width=\"([0-9]+)\"", svg).group(1)) + stroke_width svg = regex.sub(r"<svg([^<]*?)width=\"[0-9]+\"", f"<svg\\1width=\"{width}\"", svg) # Add a grouping element to translate everything over 1px svg = regex.sub(r"(<g|<path)", "<g transform=\"translate({amount}, {amount})\">\n\\1".format(amount=(stroke_width / 2)), svg, 1) svg = svg.replace("</svg>", "</g>\n</svg>") except AttributeError: # Thrown when the regex doesn't match (i.e. SVG doesn't specify height/width) pass file.seek(0) file.write(svg) file.truncate() # Convert SVGs to PNGs at 2x resolution # Path arguments must be cast to string svg2png(url=str(filename), write_to=str(filename.parent / (str(filename.stem) + ".png")), scale=2) (filename).unlink() if filename.suffix == ".xhtml": with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = xhtml # Check if there's any MathML to convert. # We expect MathML to be the "content" type (versus the "presentational" type). # We use an XSL transform to convert from "content" to "presentational" MathML. # If we start with presentational, then nothing will be changed. # Kobo supports presentational MathML. After we build kobo, we convert the presentational MathML to PNG for the rest of the builds. mathml_transform = None for line in regex.findall(r"<(?:m:)?math[^>]*?>(.+?)</(?:m:)?math>", processed_xhtml, flags=regex.DOTALL): mathml_content_tree = se.easy_xml.EasyXhtmlTree("<?xml version=\"1.0\" encoding=\"utf-8\"?><math xmlns=\"http://www.w3.org/1998/Math/MathML\">{}</math>".format(regex.sub(r"<(/?)m:", "<\\1", line))) # Initialize the transform object, if we haven't yet if not mathml_transform: with importlib_resources.path("se.data", "mathmlcontent2presentation.xsl") as mathml_xsl_filename: mathml_transform = etree.XSLT(etree.parse(str(mathml_xsl_filename))) # Transform the mathml and get a string representation # XSLT comes from https://github.com/fred-wang/webextension-content-mathml-polyfill mathml_presentation_tree = mathml_transform(mathml_content_tree.etree) mathml_presentation_xhtml = etree.tostring(mathml_presentation_tree, encoding="unicode", pretty_print=True, with_tail=False).strip() # Plop our string back in to the XHTML we're processing processed_xhtml = regex.sub(r"<(?:m:)?math[^>]*?>\{}\</(?:m:)?math>".format(regex.escape(line)), mathml_presentation_xhtml, processed_xhtml, flags=regex.MULTILINE) if filename.name == "endnotes.xhtml": # iOS renders the left-arrow-hook character as an emoji; this fixes it and forces it to render as text. # See https://github.com/standardebooks/tools/issues/73 # See http://mts.io/2015/04/21/unicode-symbol-render-text-emoji/ processed_xhtml = processed_xhtml.replace("\u21a9", "\u21a9\ufe0e") # Since we added an outlining stroke to the titlepage/publisher logo images, we # want to remove the se:color-depth.black-on-transparent semantic if filename.name in ("colophon.xhtml", "imprint.xhtml", "titlepage.xhtml"): processed_xhtml = regex.sub(r"\s*se:color-depth\.black-on-transparent\s*", "", processed_xhtml) # Add ARIA roles, which are just mostly duplicate attributes to epub:type for role in ARIA_ROLES: processed_xhtml = regex.sub(fr"(epub:type=\"[^\"]*?{role}[^\"]*?\")", f"\\1 role=\"doc-{role}\"", processed_xhtml) # Some ARIA roles can't apply to some elements. # For example, epilogue can't apply to <article> processed_xhtml = regex.sub(r"<article ([^>]*?)role=\"doc-epilogue\"", "<article \\1", processed_xhtml) if filename.name == "toc.xhtml": landmarks_xhtml = regex.findall(r"<nav epub:type=\"landmarks\">.*?</nav>", processed_xhtml, flags=regex.DOTALL) landmarks_xhtml = regex.sub(r" role=\"doc-.*?\"", "", landmarks_xhtml[0]) processed_xhtml = regex.sub(r"<nav epub:type=\"landmarks\">.*?</nav>", landmarks_xhtml, processed_xhtml, flags=regex.DOTALL) # But, remove ARIA roles we added to h# tags, because tyically those roles are for sectioning content. # For example, we might have an h2 that is both a title and dedication. But ARIA can't handle it being a dedication. # See The Man Who Was Thursday by G K Chesterton processed_xhtml = regex.sub(r"(<h[1-6] [^>]*) role=\".*?\">", "\\1>", processed_xhtml) # Google Play Books chokes on https XML namespace identifiers (as of at least 2017-07) processed_xhtml = processed_xhtml.replace("https://standardebooks.org/vocab/1.0", "http://standardebooks.org/vocab/1.0") # We converted svgs to pngs, so replace references processed_xhtml = processed_xhtml.replace("cover.svg", "cover.jpg") processed_xhtml = processed_xhtml.replace(".svg", ".png") # To get popup footnotes in iBooks, we have to change epub:endnote to epub:footnote. # Remember to get our custom style selectors too. processed_xhtml = regex.sub(r"epub:type=\"([^\"]*?)endnote([^\"]*?)\"", "epub:type=\"\\1footnote\\2\"", processed_xhtml) processed_xhtml = regex.sub(r"class=\"([^\"]*?)epub-type-endnote([^\"]*?)\"", "class=\"\\1epub-type-footnote\\2\"", processed_xhtml) # Include extra lang tag for accessibility compatibility. processed_xhtml = regex.sub(r"xml:lang\=\"([^\"]+?)\"", "lang=\"\\1\" xml:lang=\"\\1\"", processed_xhtml) # Typography: replace double and triple em dash characters with extra em dashes. processed_xhtml = processed_xhtml.replace("⸺", f"—{se.WORD_JOINER}—") processed_xhtml = processed_xhtml.replace("⸻", f"—{se.WORD_JOINER}—{se.WORD_JOINER}—") # Typography: replace some other less common characters. processed_xhtml = processed_xhtml.replace("⅒", "1/10") processed_xhtml = processed_xhtml.replace("℅", "c/o") processed_xhtml = processed_xhtml.replace("✗", "×") processed_xhtml = processed_xhtml.replace(" ", f"{se.NO_BREAK_SPACE}{se.NO_BREAK_SPACE}") # em-space to two nbsps # Many e-readers don't support the word joiner character (U+2060). # They DO, however, support the now-deprecated zero-width non-breaking space (U+FEFF) # For epubs, do this replacement. Kindle now seems to handle everything fortunately. processed_xhtml = processed_xhtml.replace(se.WORD_JOINER, se.ZERO_WIDTH_SPACE) # Some minor code style cleanup processed_xhtml = processed_xhtml.replace(" >", ">") processed_xhtml = regex.sub(r"""\s*epub:type=""\s*""", "", processed_xhtml) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if filename.suffix == ".css": with open(filename, "r+", encoding="utf-8") as file: css = file.read() processed_css = css # To get popup footnotes in iBooks, we have to change epub:endnote to epub:footnote. # Remember to get our custom style selectors too. processed_css = processed_css.replace("endnote", "footnote") # page-break-* is deprecated in favor of break-*. Add page-break-* aliases for compatibility in older ereaders. processed_css = regex.sub(r"(\s+)break-(.+?:\s.+?;)", "\\1break-\\2\t\\1page-break-\\2", processed_css) # `page-break-*: page;` should be come `page-break-*: always;` processed_css = regex.sub(r"(\s+)page-break-(before|after):\s+page;", "\\1page-break-\\2: always;", processed_css) if processed_css != css: file.seek(0) file.write(processed_css) file.truncate() if build_kobo: with tempfile.TemporaryDirectory() as temp_directory: kobo_work_directory = Path(temp_directory) copy_tree(str(work_epub_root_directory), str(kobo_work_directory)) for root, _, filenames in os.walk(kobo_work_directory): # Add a note to content.opf indicating this is a transform build for filename_string in fnmatch.filter(filenames, "content.opf"): with open(Path(root) / filename_string, "r+", encoding="utf-8") as file: xhtml = file.read() xhtml = regex.sub(r"<dc:publisher", "<meta property=\"se:transform\">kobo</meta>\n\t\t<dc:publisher", xhtml) file.seek(0) file.write(xhtml) file.truncate() # Kobo .kepub files need each clause wrapped in a special <span> tag to enable highlighting. # Do this here. Hopefully Kobo will get their act together soon and drop this requirement. for filename_string in fnmatch.filter(filenames, "*.xhtml"): kobo.paragraph_counter = 1 kobo.segment_counter = 1 filename = (Path(root) / filename_string).resolve() # Don't add spans to the ToC if filename.name == "toc.xhtml": continue with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() # Note: Kobo supports CSS hyphenation, but it can be improved with soft hyphens. # However we can't insert them, because soft hyphens break the dictionary search when # a word is highlighted. # Kobos don't have fonts that support the ↩ character in endnotes, so replace it with ← if filename.name == "endnotes.xhtml": # Note that we replaced ↩ with \u21a9\ufe0e in an earlier iOS compatibility fix xhtml = regex.sub(r"epub:type=\"backlink\">\u21a9\ufe0e</a>", "epub:type=\"backlink\">←</a>", xhtml) # We have to remove the default namespace declaration from our document, otherwise # xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python try: tree = etree.fromstring(str.encode(xhtml.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", ""))) except Exception as ex: raise se.InvalidXhtmlException(f"Error parsing XHTML file: [path][link=file://{filename}]{filename}[/][/]. Exception: {ex}") kobo.add_kobo_spans_to_node(tree.xpath("./body", namespaces=se.XHTML_NAMESPACES)[0]) xhtml = etree.tostring(tree, encoding="unicode", pretty_print=True, with_tail=False) xhtml = regex.sub(r"<html:span", "<span", xhtml) xhtml = regex.sub(r"html:span>", "span>", xhtml) xhtml = regex.sub(r"<span xmlns:html=\"http://www.w3.org/1999/xhtml\"", "<span", xhtml) xhtml = regex.sub(r"<html", "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\"", xhtml) file.seek(0) file.write(xhtml) file.truncate() # All done, clean the output # Note that we don't clean .xhtml files, because the way kobo spans are added means that it will screw up spaces inbetween endnotes. for filepath in se.get_target_filenames([kobo_work_directory], (".svg", ".opf", ".ncx")): se.formatting.format_xml_file(filepath) se.epub.write_epub(kobo_work_directory, output_directory / kobo_output_filename) # Now work on more epub2 compatibility # Recurse over css files to make some compatibility replacements. for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in filenames: filename = Path(root) / filename_string if filename.suffix == ".css": with open(filename, "r+", encoding="utf-8") as file: css = file.read() processed_css = css processed_css = regex.sub(r"(page\-break\-(before|after|inside)\s*:\s*(.+))", "\\1\n\t-webkit-column-break-\\2: \\3 /* For Readium */", processed_css) processed_css = regex.sub(r"^\s*hyphens\s*:\s*(.+)", "\thyphens: \\1\n\tadobe-hyphenate: \\1\n\t-webkit-hyphens: \\1\n\t-epub-hyphens: \\1\n\t-moz-hyphens: \\1", processed_css, flags=regex.MULTILINE) processed_css = regex.sub(r"^\s*hyphens\s*:\s*none;", "\thyphens: none;\n\tadobe-text-layout: optimizeSpeed; /* For Nook */", processed_css, flags=regex.MULTILINE) if processed_css != css: file.seek(0) file.write(processed_css) file.truncate() # Sort out MathML compatibility has_mathml = "mathml" in metadata_xml if has_mathml: # We import this late because we don't want to load selenium if we're not going to use it! from se import browser # pylint: disable=import-outside-toplevel # We wrap this whole thing in a try block, because we need to call # driver.quit() if execution is interrupted (like by ctrl + c, or by an unhandled exception). If we don't call driver.quit(), # Firefox will stay around as a zombie process even if the Python script is dead. try: driver = browser.initialize_selenium_firefox_webdriver() mathml_count = 1 for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in filenames: filename = Path(root) / filename_string if filename.suffix == ".xhtml": with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = xhtml replaced_mathml: List[str] = [] # Check if there's MathML we want to convert # We take a naive approach and use some regexes to try to simplify simple MathML expressions. # For each MathML expression, if our round of regexes finishes and there is still MathML in the processed result, we abandon the attempt and render to PNG using Firefox. for line in regex.findall(r"<(?:m:)?math[^>]*?>(?:.+?)</(?:m:)?math>", processed_xhtml, flags=regex.DOTALL): if line not in replaced_mathml: replaced_mathml.append(line) # Store converted lines to save time in case we have multiple instances of the same MathML mathml_tree = se.easy_xml.EasyXhtmlTree("<?xml version=\"1.0\" encoding=\"utf-8\"?>{}".format(regex.sub(r"<(/?)m:", "<\\1", line))) processed_line = line # If the mfenced element has more than one child, they are separated by commas when rendered. # This is too complex for our naive regexes to work around. So, if there is an mfenced element with more than one child, abandon the attempt. if not mathml_tree.css_select("mfenced > * + *"): processed_line = regex.sub(r"</?(?:m:)?math[^>]*?>", "", processed_line) processed_line = regex.sub(r"<!--.+?-->", "", processed_line) processed_line = regex.sub(r"<(?:m:)?mfenced/>", "()", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "<i>\\4</i><\\2><i>\\6</i></\\2>", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "<i>\\4</i><\\2>\\6</\\2>", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "\\4<\\2>\\6</\\2>", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "\\4<\\2><i>\\6</i></\\2>", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "\\4<\\2><i>\\6</i></\\2>", processed_line) processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "\\4<\\2>\\6</\\2>", processed_line) processed_line = regex.sub(fr"<(?:m:)?mo>{se.FUNCTION_APPLICATION}</(?:m:)?mo>", "", processed_line, flags=regex.IGNORECASE) # The ignore case flag is required to match here with the special FUNCTION_APPLICATION character, it's unclear why processed_line = regex.sub(r"<(?:m:)?mfenced><((?:m:)(?:mo|mi|mn|mrow))>(.+?)</\1></(?:m:)?mfenced>", "(<\\1>\\2</\\1>)", processed_line) processed_line = regex.sub(r"<(?:m:)?mrow>([^>].+?)</(?:m:)?mrow>", "\\1", processed_line) processed_line = regex.sub(r"<(?:m:)?mi>([^<]+?)</(?:m:)?mi>", "<i>\\1</i>", processed_line) processed_line = regex.sub(r"<(?:m:)?mi mathvariant=\"normal\">([^<]+?)</(?:m:)?mi>", "\\1", processed_line) processed_line = regex.sub(r"<(?:m:)?mo>([+\-−=×])</(?:m:)?mo>", " \\1 ", processed_line) processed_line = regex.sub(r"<((?:m:)?m[no])>(.+?)</\1>", "\\2", processed_line) processed_line = regex.sub(r"</?(?:m:)?mrow>", "", processed_line) processed_line = processed_line.strip() processed_line = regex.sub(r"</i><i>", "", processed_line, flags=regex.DOTALL) # Did we succeed? Is there any more MathML in our string? if regex.findall("</?(?:m:)?m", processed_line): # Failure! Abandon all hope, and use Firefox to convert the MathML to PNG. se.images.render_mathml_to_png(driver, regex.sub(r"<(/?)m:", "<\\1", line), work_epub_root_directory / "epub" / "images" / f"mathml-{mathml_count}.png", work_epub_root_directory / "epub" / "images" / f"mathml-{mathml_count}-2x.png") processed_xhtml = processed_xhtml.replace(line, f"<img class=\"mathml epub-type-se-image-color-depth-black-on-transparent\" epub:type=\"se:image.color-depth.black-on-transparent\" src=\"../images/mathml-{mathml_count}.png\" srcset=\"../images/mathml-{mathml_count}-2x.png 2x, ../images/mathml-{mathml_count}.png 1x\" />") mathml_count = mathml_count + 1 else: # Success! Replace the MathML with our new string. processed_xhtml = processed_xhtml.replace(line, processed_line) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() except KeyboardInterrupt as ex: # Bubble the exception up, but proceed to `finally` so we quit the driver raise ex finally: try: driver.quit() except Exception: # We might get here if we ctrl + c before selenium has finished initializing the driver pass # Include epub2 cover metadata cover_id = self.metadata_dom.xpath("//item[@properties=\"cover-image\"]/@id")[0].replace(".svg", ".jpg") metadata_xml = regex.sub(r"(<metadata[^>]+?>)", f"\\1\n\t\t<meta content=\"{cover_id}\" name=\"cover\" />", metadata_xml) # Add metadata to content.opf indicating this file is a Standard Ebooks compatibility build metadata_xml = metadata_xml.replace("<dc:publisher", "<meta property=\"se:transform\">compatibility</meta>\n\t\t<dc:publisher") # Add any new MathML images we generated to the manifest if has_mathml: for root, _, filenames in os.walk(work_epub_root_directory / "epub" / "images"): filenames = natsorted(filenames) filenames.reverse() for filename_string in filenames: filename = Path(root) / filename_string if filename.name.startswith("mathml-"): metadata_xml = metadata_xml.replace("<manifest>", f"<manifest><item href=\"images/{filename.name}\" id=\"{filename.name}\" media-type=\"image/png\"/>") metadata_xml = regex.sub(r"properties=\"([^\"]*?)mathml([^\"]*?)\"", "properties=\"\\1\\2\"", metadata_xml) metadata_xml = regex.sub(r"properties=\"\s*\"", "", metadata_xml) # Generate our NCX file for epub2 compatibility. # First find the ToC file. toc_filename = self.metadata_dom.xpath("//item[@properties=\"nav\"]/@href")[0] metadata_xml = metadata_xml.replace("<spine>", "<spine toc=\"ncx\">") metadata_xml = metadata_xml.replace("<manifest>", "<manifest><item href=\"toc.ncx\" id=\"ncx\" media-type=\"application/x-dtbncx+xml\" />") # Now use an XSLT transform to generate the NCX with importlib_resources.path("se.data", "navdoc2ncx.xsl") as navdoc2ncx_xsl_filename: toc_tree = se.epub.convert_toc_to_ncx(work_epub_root_directory, toc_filename, navdoc2ncx_xsl_filename) # Convert the <nav> landmarks element to the <guide> element in content.opf guide_xhtml = "<guide>" for element in toc_tree.xpath("//nav[@epub:type=\"landmarks\"]/ol/li/a"): element_xhtml = element.tostring() element_xhtml = regex.sub(r"epub:type=\"([^\"]*)(\s*frontmatter\s*|\s*backmatter\s*)([^\"]*)\"", "type=\"\\1\\3\"", element_xhtml) element_xhtml = regex.sub(r"epub:type=\"[^\"]*(acknowledgements|bibliography|colophon|copyright-page|cover|dedication|epigraph|foreword|glossary|index|loi|lot|notes|preface|bodymatter|titlepage|toc)[^\"]*\"", "type=\"\\1\"", element_xhtml) element_xhtml = element_xhtml.replace("type=\"copyright-page", "type=\"copyright page") # We add the 'text' attribute to the titlepage to tell the reader to start there element_xhtml = element_xhtml.replace("type=\"titlepage", "type=\"title-page text") element_xhtml = regex.sub(r"type=\"\s*\"", "", element_xhtml) element_xhtml = element_xhtml.replace("<a", "<reference") element_xhtml = regex.sub(r">(.+)</a>", " title=\"\\1\" />", element_xhtml) # Replace instances of the `role` attribute since it's illegal in content.opf element_xhtml = regex.sub(r" role=\".*?\"", "", element_xhtml) guide_xhtml = guide_xhtml + element_xhtml guide_xhtml = guide_xhtml + "</guide>" metadata_xml = metadata_xml.replace("</package>", "") + guide_xhtml + "</package>" # Guide is done, now write content.opf and clean it. # Output the modified content.opf before making more epub2 compatibility hacks. with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file: file.write(metadata_xml) file.truncate() # All done, clean the output for filepath in se.get_target_filenames([work_epub_root_directory], (".xhtml", ".svg", ".opf", ".ncx")): se.formatting.format_xml_file(filepath) # Write the compatible epub se.epub.write_epub(work_epub_root_directory, output_directory / epub_output_filename) if run_epubcheck: # Path arguments must be cast to string for Windows compatibility. with importlib_resources.path("se.data.epubcheck", "epubcheck.jar") as jar_path: try: epubcheck_result = subprocess.run(["java", "-jar", str(jar_path), "--quiet", str(output_directory / epub_output_filename)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False) epubcheck_result.check_returncode() except subprocess.CalledProcessError: output = epubcheck_result.stdout.decode().strip() # Get the epubcheck version to print to the console version_output = subprocess.run(["java", "-jar", str(jar_path), "--version"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False).stdout.decode().strip() version = regex.search(r"[0-9]+\.([0-9]+\.?)*", version_output, flags=regex.MULTILINE).group(0) # The last two lines from epubcheck output are not necessary. Remove them here. # Remove them as lines instead of as a matching regex to work with localized output strings. split_output = output.split("\n") output = "\n".join(split_output[:-2]) # Try to linkify files in output if we can find them try: output = regex.sub(r"(ERROR\(.+?\): )(.+?)(\([0-9]+,[0-9]+\))", lambda match: match.group(1) + "[path][link=file://" + str(self.path / "src" / regex.sub(fr"^\..+?\.epub{os.sep}", "", match.group(2))) + "]" + match.group(2) + "[/][/]" + match.group(3), output) except: # If something goes wrong, just pass through the usual output pass raise se.BuildFailedException(f"[bash]epubcheck[/] v{version} failed with:\n{output}") if build_kindle: # There's a bug in Calibre <= 3.48.0 where authors who have more than one MARC relator role # display as "unknown author" in the Kindle interface. # See: https://bugs.launchpad.net/calibre/+bug/1844578 # Until the bug is fixed, we simply remove any other MARC relator on the dc:creator element. # Once the bug is fixed, we can remove this block. with open(work_epub_root_directory / "epub" / "content.opf", "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = xhtml for match in regex.findall(r"<meta property=\"role\" refines=\"#author\" scheme=\"marc:relators\">.*?</meta>", xhtml): if ">aut<" not in match: processed_xhtml = processed_xhtml.replace(match, "") if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() # Kindle doesn't go more than 2 levels deep for ToC, so flatten it here. with open(work_epub_root_directory / "epub" / toc_filename, "r+", encoding="utf-8") as file: xhtml = file.read() soup = BeautifulSoup(xhtml, "lxml") for match in soup.select("ol > li > ol > li > ol"): match.parent.insert_after(match) match.unwrap() file.seek(0) file.write(str(soup)) file.truncate() # Rebuild the NCX with importlib_resources.path("se.data", "navdoc2ncx.xsl") as navdoc2ncx_xsl_filename: toc_tree = se.epub.convert_toc_to_ncx(work_epub_root_directory, toc_filename, navdoc2ncx_xsl_filename) # Clean just the ToC and NCX for filepath in [work_epub_root_directory / "epub" / "toc.ncx", work_epub_root_directory / "epub" / toc_filename]: se.formatting.format_xml_file(filepath) # Convert endnotes to Kindle popup compatible notes if (work_epub_root_directory / "epub/text/endnotes.xhtml").is_file(): with open(work_epub_root_directory / "epub/text/endnotes.xhtml", "r+", encoding="utf-8") as file: xhtml = file.read() # We have to remove the default namespace declaration from our document, otherwise # xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python try: tree = etree.fromstring(str.encode(xhtml.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", ""))) except Exception as ex: raise se.InvalidXhtmlException(f"Error parsing XHTML [path][link=file://{(work_epub_root_directory / 'epub/text/endnotes.xhtml').resolve()}]endnotes.xhtml[/][/]. Exception: {ex}") notes = tree.xpath("//li[@epub:type=\"endnote\" or @epub:type=\"footnote\"]", namespaces=se.XHTML_NAMESPACES) processed_endnotes = "" for note in notes: note_id = note.get("id") note_number = note_id.replace("note-", "") # First, fixup the reference link for this endnote try: ref_link = etree.tostring(note.xpath("p[last()]/a[last()]")[0], encoding="unicode", pretty_print=True, with_tail=False).replace(" xmlns:epub=\"http://www.idpf.org/2007/ops\"", "").strip() except Exception: raise se.InvalidXhtmlException(f"Can’t find ref link for [url]#{note_id}[/].") new_ref_link = regex.sub(r">.*?</a>", ">" + note_number + "</a>.", ref_link) # Now remove the wrapping li node from the note note_text = regex.sub(r"^<li[^>]*?>(.*)</li>$", r"\1", etree.tostring(note, encoding="unicode", pretty_print=True, with_tail=False), flags=regex.IGNORECASE | regex.DOTALL) # Insert our new ref link result = regex.subn(r"^\s*<p([^>]*?)>", "<p\\1 id=\"" + note_id + "\">" + new_ref_link + " ", note_text) # Sometimes there is no leading <p> tag (for example, if the endnote starts with a blockquote # If that's the case, just insert one in front. note_text = result[0] if result[1] == 0: note_text = "<p id=\"" + note_id + "\">" + new_ref_link + "</p>" + note_text # Now remove the old ref_link note_text = note_text.replace(ref_link, "") # Trim trailing spaces left over after removing the ref link note_text = regex.sub(r"\s+</p>", "</p>", note_text).strip() # Sometimes ref links are in their own p tag--remove that too note_text = regex.sub(r"<p>\s*</p>", "", note_text) processed_endnotes += note_text + "\n" # All done with endnotes, so drop them back in xhtml = regex.sub(r"<ol>.*</ol>", processed_endnotes, xhtml, flags=regex.IGNORECASE | regex.DOTALL) file.seek(0) file.write(xhtml) file.truncate() # While Kindle now supports soft hyphens, popup endnotes break words but don't insert the hyphen characters. So for now, remove soft hyphens from the endnotes file. with open(work_epub_root_directory / "epub" / "text" / "endnotes.xhtml", "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = xhtml processed_xhtml = processed_xhtml.replace(se.SHY_HYPHEN, "") if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() # Do some compatibility replacements for root, _, filenames in os.walk(work_epub_root_directory): for filename_string in filenames: filename = Path(root) / filename_string if filename.suffix == ".xhtml": with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = xhtml # Kindle doesn't recognize most zero-width spaces or word joiners, so just remove them. # It does recognize the word joiner character, but only in the old mobi7 format. The new format renders them as spaces. processed_xhtml = processed_xhtml.replace(se.ZERO_WIDTH_SPACE, "") # Remove the epub:type attribute, as Calibre turns it into just "type" processed_xhtml = regex.sub(r"epub:type=\"[^\"]*?\"", "", processed_xhtml) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() # Include compatibility CSS with open(work_epub_root_directory / "epub" / "css" / "core.css", "a", encoding="utf-8") as core_css_file: with importlib_resources.open_text("se.data.templates", "kindle.css", encoding="utf-8") as compatibility_css_file: core_css_file.write(compatibility_css_file.read()) # Add soft hyphens for filepath in se.get_target_filenames([work_epub_root_directory], (".xhtml",)): se.typography.hyphenate_file(filepath, None, True) # Build an epub file we can send to Calibre se.epub.write_epub(work_epub_root_directory, work_directory / epub_output_filename) # Generate the Kindle file # We place it in the work directory because later we have to update the asin, and the mobi.update_asin() function will write to the final output directory cover_path = work_epub_root_directory / "epub" / self.metadata_dom.xpath("//item[@properties=\"cover-image\"]/@href")[0].replace(".svg", ".jpg") # Path arguments must be cast to string for Windows compatibility. return_code = subprocess.run([str(ebook_convert_path), str(work_directory / epub_output_filename), str(work_directory / kindle_output_filename), "--pretty-print", "--no-inline-toc", "--max-toc-links=0", "--prefer-metadata-cover", f"--cover={cover_path}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode if return_code: raise se.InvalidSeEbookException("[bash]ebook-convert[/] failed.") # Success, extract the Kindle cover thumbnail # Update the ASIN in the generated file mobi.update_asin(asin, work_directory / kindle_output_filename, output_directory / kindle_output_filename) # Extract the thumbnail kindle_cover_thumbnail = Image.open(work_epub_root_directory / "epub" / "images" / "cover.jpg") kindle_cover_thumbnail = kindle_cover_thumbnail.convert("RGB") # Remove alpha channel from PNG if necessary kindle_cover_thumbnail = kindle_cover_thumbnail.resize((432, 648)) kindle_cover_thumbnail.save(output_directory / f"thumbnail_{asin}_EBOK_portrait.jpg")
import os for root, dirs, files in os.walk(os.getcwd()): if '.git' in root or '.vscode' in root: continue for file in files: if file.startswith("._") or file.endswith(".out") or file.endswith('.txt') or '.' not in file: os.remove(os.path.join(root, file))