Python walk示例，os.walk Python示例

示例#1

1

显示文件

文件： mr_permacache.py 项目： new-day-international/reddit

def write_permacache_from_dir(dirname):
    # we want the whole list so that we can display accurate progress
    # information. If we're operating on more than tens of millions of
    # files, we should either bail out or tweak this to not need the
    # whole list at once
    allfiles = []
    for root, dirs, files in os.walk(dirname):
        for f in files:
            allfiles.append(os.path.join(root, f))

    for fname in progress(allfiles, persec=True):
        try:
            write_permacache_from_file(fname)
            os.unlink(fname)
        except:
            mr_tools.status("failed on %r" % fname)
            raise

    mr_tools.status("Removing empty directories")
    for root, dirs, files in os.walk(dirname, topdown=False):
        for d in dirs:
            dname = os.path.join(root, d)
            try:
                os.rmdir(dname)
            except OSError as e:
                if e.errno == errno.ENOTEMPTY:
                    mr_tools.status("%s not empty" % (dname,))
                else:
                    raise

示例#2

0

显示文件

文件： test_post.py 项目： e-gillies-ix/conda-build

def test_coerce_pycache_to_old_style():
    cwd = os.getcwd()
    with TemporaryDirectory() as tmp:
        os.makedirs(os.path.join(tmp, '__pycache__'))
        os.makedirs(os.path.join(tmp, 'testdir', '__pycache__'))
        with open(os.path.join(tmp, 'test.py'), 'w') as f:
            f.write("\n")
        with open(os.path.join(tmp, '__pycache__', 'test.cpython-{0}{1}.pyc'.format(
                sys.version_info.major, sys.version_info.minor)), 'w') as f:
            f.write("\n")
        with open(os.path.join(tmp, 'testdir', 'test.py'), 'w') as f:
            f.write("\n")
        with open(os.path.join(tmp, 'testdir', '__pycache__', 'test.cpython-{0}{1}.pyc'.format(
                sys.version_info.major, sys.version_info.minor)), 'w') as f:
            f.write("\n")

        os.chdir(tmp)
        for root, dirs, files in os.walk(tmp):
            fs = [os.path.join(root, _) for _ in files]
            post.coerce_pycache_to_old_style(fs, cwd=tmp)
        try:
            assert os.path.isfile(os.path.join(tmp, 'test.pyc')), os.listdir(tmp)
            assert os.path.isfile(os.path.join(tmp, 'testdir', 'test.pyc')), \
                os.listdir(os.path.join(tmp, 'testdir'))
            for root, dirs, files in os.walk(tmp):
                assert '__pycache__' not in dirs
        except:
            raise
        finally:
            os.chdir(cwd)

示例#3

0

显示文件

文件： native_report2.py 项目： windream/CrashLogAnalysis

def native_report2(src):
	data = {}
	sum = 0
	c = ""
	for root, versions, ds in os.walk(src):
		if root != src:
			continue
		for version in sorted(versions, key = str.lower, reverse = True):
			sum = 0
			data = {}
			dd = os.path.join(root, version)
			for d_version, dirs, files in os.walk(dd):
				for d in dirs:
					p = os.path.join(d_version, d) + os.sep + "*.log"
					#p = os.path.join(root, d) + os.sep + "*"
					s = len(glob.glob(p))
					sum += s
					name = os.path.join(root, d) 
					if name.startswith(src):
						name = name[len(src):]
					if name.startswith("/"):
						name = name[1:]
					#data[name] = s
					name = d_version + os.sep + name
					data[name] = s
			c += html_report(data, sum, version) + "<br/><br/>"
			#c = "<br/><br/>" + html_report(data, sum)
	open(os.path.join(src, "index.html"), "w").write(c)

示例#4

0

显示文件

文件： kickstart.py 项目： rlayte/kickstart

    def copy_template():
        config_prompt(template)
        shutil.copytree(template, name)

        if os.path.exists('%s/%s' % (name, 'config.yaml')):
            os.remove('%s/%s' % (name, 'config.yaml'))

        for dirname, dirnames, files in os.walk(name):
            for d in dirnames:
                if d == options.template:
                    shutil.copytree('%s/%s' % (dirname, d), '%s/%s' % (dirname, name))
                    shutil.rmtree('%s/%s' % (dirname, d))

        for dirname, dirnames, files in os.walk(name):
            for filename in files:
                f = open('%s/%s' % (dirname, filename), 'r')
                lines = f.readlines()
                f.close()

                first_pass = [re.sub('{{\s*(\w+)\s*}}', replace_variable, line) for line in lines]
                new_lines = [re.sub('__config_(\w+)__', replace_variable, line) for line in first_pass]

                f = open('%s/%s' % (dirname, filename), 'w')
                f.write(''.join(new_lines))
                f.close()

示例#5

0

显示文件

文件： utils.py 项目： CageLiu/cobra

def walkDir(path,formats = False):
    """遍历目录下的所有文件"""

    result = {"files":[],"dirs":[]}

    if formats == "absolute":
        for item in path:
            for root, dirs, files in os.walk(item):
                for f in files:
                    result["files"].append(os.path.join(root,f))
                for d in dirs:
                    result["dirs"].append(os.path.join(root,d))
    elif formats == "relative":
        for item in path:
            for root, dirs, files in os.walk(item):
                for f in files:
                    result["files"].append(re.sub(item + "/","",os.path.join(root,f)))
                for d in dirs:
                    result["files"].append(re.sub(item + "/","",os.path.join(root,d)))
    else:
        for item in path:
            for root, dirs, files in os.walk(item):
                for f in files:
                    result["files"].append(f)
                for d in dirs:
                    result["dirs"].append(d)
    return result

示例#6

0

显示文件

文件： FileLists.py 项目： Aupajo/s3cmd

def _fswalk_follow_symlinks(path):
    '''
    Walk filesystem, following symbolic links (but without recursion), on python2.4 and later

    If a symlink directory loop is detected, emit a warning and skip.
    E.g.: dir1/dir2/sym-dir -> ../dir2
    '''
    assert os.path.isdir(path) # only designed for directory argument
    walkdirs = set([path])
    for dirpath, dirnames, filenames in os.walk(path):
        handle_exclude_include_walk(dirpath, dirnames, [])
        real_dirpath = os.path.realpath(dirpath)
        for dirname in dirnames:
            current = os.path.join(dirpath, dirname)
            real_current = os.path.realpath(current)
            if os.path.islink(current):
                if (real_dirpath == real_current or
                    real_dirpath.startswith(real_current + os.path.sep)):
                    warning("Skipping recursively symlinked directory %s" % dirname)
                else:
                    walkdirs.add(current)
    for walkdir in walkdirs:
        for dirpath, dirnames, filenames in os.walk(walkdir):
            handle_exclude_include_walk(dirpath, dirnames, [])
            yield (dirpath, dirnames, filenames)

示例#7

0

显示文件

文件： unittest_testlib.py 项目： Chaos99/cachetools

    def test_tmp_dir_normal_1(self):
        tempdir = tempfile.gettempdir()
        # assert temp directory is empty
        self.assertListEqual(list(os.walk(tempdir)),
            [(tempdir, [], [])])

        witness = []

        @with_tempdir
        def createfile(list):
            fd1, fn1 = tempfile.mkstemp()
            fd2, fn2 = tempfile.mkstemp()
            dir = tempfile.mkdtemp()
            fd3, fn3 = tempfile.mkstemp(dir=dir)
            tempfile.mkdtemp()
            list.append(True)
            for fd in (fd1, fd2, fd3):
                os.close(fd)

        self.assertFalse(witness)
        createfile(witness)
        self.assertTrue(witness)

        self.assertEqual(tempfile.gettempdir(), tempdir)

        # assert temp directory is empty
        self.assertListEqual(list(os.walk(tempdir)),
            [(tempdir, [], [])])

示例#8

0

显示文件

文件： unittest_testlib.py 项目： Chaos99/cachetools

    def test_tmp_dir_normal_2(self):
        tempdir = tempfile.gettempdir()
        # assert temp directory is empty
        self.assertListEqual(list(os.walk(tempfile.tempdir)),
            [(tempfile.tempdir, [], [])])


        class WitnessException(Exception):
            pass

        @with_tempdir
        def createfile():
            fd1, fn1 = tempfile.mkstemp()
            fd2, fn2 = tempfile.mkstemp()
            dir = tempfile.mkdtemp()
            fd3, fn3 = tempfile.mkstemp(dir=dir)
            tempfile.mkdtemp()
            for fd in (fd1, fd2, fd3):
                os.close(fd)
            raise WitnessException()

        self.assertRaises(WitnessException, createfile)

        # assert tempdir didn't change
        self.assertEqual(tempfile.gettempdir(), tempdir)

        # assert temp directory is empty
        self.assertListEqual(list(os.walk(tempdir)),
            [(tempdir, [], [])])

示例#9

0

显示文件

文件： create_template_dataset.py 项目： WesleyHsieh/vision-amt

def extract_rollouts(dataset='train', n_folders=20, n_images=1):
	"""
	Extracts rollout images from input folder,
	and copies it to dataset folder.
	"""
	for root, dirs, files in os.walk(rollout_dir):
		np.random.shuffle(dirs)
		for i in range(min(n_folders, len(dirs))):
			print "Folder {}".format(i)
			rollout_folder = dirs[i]
			for _, _, files in  os.walk(os.path.join(rollout_dir, rollout_folder)):
				images =  [x for x in files if x.endswith('.jpg')]
				if dataset == 'templates':
					np.random.shuffle(images)
					for im in images[:n_images]:
						src = os.path.join(rollout_dir, rollout_folder, im)
						dst = os.path.join(templates_dir, im)
						copy_processed_image(src, dst)
				elif dataset == 'test':
					im = sorted(images)[-1]
					print "Image: {}".format(im)
					src = os.path.join(rollout_dir, rollout_folder, im)
					dst, new_im_name = label_image(src, im, test_dir)
					copy_processed_image(src, dst)
					copy_state_label(rollout_dir, rollout_folder, im, new_im_name)
		break

示例#10

0

显示文件

文件： sfaadmin.py 项目： kongseokhwan/sfa

    def nuke(self, all=False, certs=False, reinit=True):
        """Cleanup local registry DB, plus various additional filesystem cleanups optionally"""
        from sfa.storage.dbschema import DBSchema
        from sfa.util.sfalogging import _SfaLogger
        logger = _SfaLogger(logfile='/var/log/sfa_import.log', loggername='importlog')
        logger.setLevelFromOptVerbose(self.api.config.SFA_API_LOGLEVEL)
        logger.info("Purging SFA records from database")
        dbschema=DBSchema()
        dbschema.nuke()

        # for convenience we re-create the schema here, so there's no need for an explicit
        # service sfa restart
        # however in some (upgrade) scenarios this might be wrong
        if reinit:
            logger.info("re-creating empty schema")
            dbschema.init_or_upgrade()

        # remove the server certificate and all gids found in /var/lib/sfa/authorities
        if certs:
            logger.info("Purging cached certificates")
            for (dir, _, files) in os.walk('/var/lib/sfa/authorities'):
                for file in files:
                    if file.endswith('.gid') or file == 'server.cert':
                        path=dir+os.sep+file
                        os.unlink(path)

        # just remove all files that do not match 'server.key' or 'server.cert'
        if all:
            logger.info("Purging registry filesystem cache")
            preserved_files = [ 'server.key', 'server.cert']
            for (dir,_,files) in os.walk(Hierarchy().basedir):
                for file in files:
                    if file in preserved_files: continue
                    path=dir+os.sep+file
                    os.unlink(path)

示例#11

0

显示文件

文件： views.py 项目： joemiller/graphite-web

def index_json(request):
  jsonp = request.REQUEST.get('jsonp', False)
  matches = []

  for whisper_dir in settings.WHISPER_DIRS:
    for root, dirs, files in os.walk(whisper_dir):
      root = root.replace(whisper_dir, '')
      for basename in files:
        if fnmatch.fnmatch(basename, '*.wsp'):
          matches.append(os.path.join(root, basename))

  for root, dirs, files in os.walk(settings.CERES_DIR):
    root = root.replace(settings.CERES_DIR, '')
    for filename in files:
      if filename == '.ceres-node':
        matches.append(root)

  matches = [
    m
    .replace('.wsp', '')
    .replace('.rrd', '')
    .replace('/', '.')
    .lstrip('.')
    for m in sorted(matches)
  ]
  return json_response_for(request, matches, jsonp=jsonp)

示例#12

0

显示文件

文件： test_mtframework_arc.py 项目： Brachi/albam

def test_arc_from_dir_re5(tmpdir, arc_file):
    """get an arc file (ideally from the game), unpack it, repackit, unpack it again
    compare the 2 arc files and the 2 output folders"""
    arc_original = Arc(file_path=arc_file)
    arc_original_out = os.path.join(str(tmpdir), os.path.basename(arc_file).replace('.arc', ''))
    arc_original.unpack(arc_original_out)

    arc_from_dir = Arc.from_dir(arc_original_out)
    arc_from_dir_out = os.path.join(str(tmpdir), 'arc-from-dir.arc')
    with open(arc_from_dir_out, 'wb') as w:
        w.write(arc_from_dir)

    arc_from_arc_from_dir = Arc(file_path=arc_from_dir_out)
    arc_from_arc_from_dir_out = os.path.join(str(tmpdir), 'arc-from-arc-from-dir')
    arc_from_arc_from_dir.unpack(arc_from_arc_from_dir_out)

    files_extracted_1 = [f for _, _, files in os.walk(arc_original_out) for f in files]
    files_extracted_2 = [f for _, _, files in os.walk(arc_from_arc_from_dir_out) for f in files]

    # Assumming zlib default compression used in all original arc files.
    assert os.path.getsize(arc_file) == os.path.getsize(arc_from_dir_out)
    # The hashes would be different due to the file_paths ordering
    assert arc_original.files_count == arc_from_arc_from_dir.files_count
    assert sorted(files_extracted_1) == sorted(files_extracted_2)
    assert arc_from_arc_from_dir.file_entries[0].offset == 32768

示例#13

0

显示文件

文件： packageFilePath.py 项目： ruschecker/DrugDiscovery-Home

def findModulesInPackage(package, name, fileNameFilters=[]):
    """
    Returns a dictionnary where the key is the path to the package or
subpackage. The value is the list of modules in which the string 'name'
was found.  Name can be a regular expression.Using '^' as a first symbol
to match string at the begining of the lines is faster.
    """

    if name[0]=='^':
        candidates = {}
        for root, dirs, files in os.walk(package):
            # remove directories not to visit
            for rem in ['CVS', 'regression', 'Tutorial', 'test', 'Doc', 'doc', 'Icons','Tests']:
                if rem in dirs:
                    dirs.remove(rem)
            # look for files that contain the string NodeLibrary
            newfiles = []
            for fi in files:
                if fi[-3:]=='.py' and not fi[0] in ['#', '.']:
                    for i in fileNameFilters:
                        if i in fi :
                            continue
                    Lines =[]        
                    f = open( os.path.join(root, fi) )
                    data = f.readlines()
                    f.close()
                    found = 0
                    Lines =filter(lambda x:x.startswith(name[1:]),data)
                    if Lines!=[]:
                        if not candidates.has_key(root): candidates[root] = []
                        candidates[root].append(fi)    
    else:  # use re
        import re
        pat = re.compile(name)
        
        candidates = {}
        for root, dirs, files in os.walk(package):
            # remove directories not to visit
            for rem in ['CVS', 'regression', 'Tutorial', 'test', 'Doc', 'doc', 'Icons','Tests']:
                if rem in dirs:
                    dirs.remove(rem)
            # look for files that contain the string NodeLibrary
            newfiles = []
            for fi in files:
                if fi[-3:]=='.py' and not fi[0] in ['#', '.']:
                    for i in fileNameFilters:
                        if i in fi :
                            continue
                    Lines =[]        
                    f = open( os.path.join(root, fi) )
                    data = f.readlines()
                    f.close()
                    found = 0
                    for line in data:
                        match = pat.search(line)
                        if match:
                            if not candidates.has_key(root): candidates[root] = []
                            candidates[root].append(fi)
                            break
    return candidates

示例#14

0

显示文件

文件： КостяницынаАпрог12.py 项目： Stoneberry/HSE1

def function0():
    d={}
    for root, dirs, files in os.walk('C:\\Users\\Та\\Desktop\\универр'):
        for i in dirs:
            for root1, dirs1, files1 in os.walk('C:\\Users\\Та\\Desktop\\универр\\' + i):
                d[len(files1)]=i
    return d

示例#15

0

显示文件

文件： Generate.Library.py 项目： TouchStar/scaffolds

def findReplace(directory, find, replace, filePattern):
	#print '-------------------------------------------------'
	restart = True
	while restart:
		restart = False
		for path, dirs, files in os.walk(os.path.abspath(directory)):
			newpath = path.replace(find, replace)			
			if(newpath != path):
				os.rename(path, newpath)
				#print 'rename_dir[' + path + ']'
				restart = True
				break
	#print '-------------------------------------------------'
	for path, dirs, files in os.walk(os.path.abspath(directory)):
		for filename in fnmatch.filter(files, filePattern):
			if(filename == __file__):
				continue
			
			newfilename = filename.replace(find, replace)
			if(newfilename != filename):
				os.rename(os.path.join(path, filename), os.path.join(path, newfilename))
				filename = newfilename
				#print 'rename_filename[' + filename + ']'

			filepath = os.path.join(path, filename)
			with open(filepath) as f:
				s = f.read()

			if(s.find(find)):
				s = s.replace(find, replace)            
				filepath = filepath.replace(find, replace)

				#print 'changed_file[' + filepath + ']'
				with open(filepath, "w") as f:
					f.write(s)

示例#16

0

显示文件

文件： setup.py 项目： mherkazandjian/amuse

def find_data_files(srcdir, destdir, *wildcards, **kw):
    """
    get a list of all files under the srcdir matching wildcards,
    returned in a format to be used for install_data
    """
    def walk_helper(arg, dirname, files):
        if '.svn' in dirname:
            return
        names = []
        lst, wildcards, dirnameconverter, destdir = arg
        for wc in wildcards:
            wc_name = os.path.normpath(os.path.join(dirname, wc))
            for f in files:
                filename = os.path.normpath(os.path.join(dirname, f))

                if fnmatch.fnmatch(filename, wc_name) and not os.path.isdir(filename):
                    names.append(filename)
        if names:
            destdirname = dirnameconverter.sub(destdir, dirname)
            lst.append( (destdirname, names ) )

    file_list = []
    recursive = kw.get('recursive', True)
    converter = re.compile('^({0})'.format(srcdir))
    if recursive:
        walk(srcdir, walk_helper, (file_list, wildcards, converter, destdir))
    else:
        walk_helper((file_list, wildcards, converter, destdir),
                    srcdir,
                    [os.path.basename(f) for f in glob.glob(os.path.join(srcdir, '*'))])
    return file_list

示例#17

0

显示文件

文件： triggermail_templates.py 项目： TriggerMail/triggermail_sublimetext_plugin

    def generate_file_map(self):
        # Read all the files in the given folder.
        # We gather them all and then send them up to GAE.
        # We do this rather than processing template locally. Because local processing
        file_map = dict()
        for root, dirs, files in os.walk(self.path):
            for filename in files:
                if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']):
                    contents = read_file(os.path.join(root, filename))
                    file_map[filename] = contents

        # Read all the image files for this partner. Obviously, this is inefficient, and we should probably
        # only read the files that are used in the html file.
        # But we have no facilities for this kind of processing here, since it is a PITA to install pip
        # packages through a sublimetext plugin.
        # But we might have to figure this out if it becomes a performance bottleneck. I think it is ok
        # as long as you are on a fast connection.
        # image_path = os.path.abspath(os.path.join(self.path, "img"))

        for root, dirs, files in os.walk(self.image_path):
            for filename in files:
                image_path = os.path.abspath(os.path.join(root, filename))
                contents = encode_image(image_path)
                file_map[filename] = contents

        return file_map

示例#18

0

显示文件

文件： triggermail_templates.py 项目： TriggerMail/triggermail_sublimetext_plugin

    def generate_file_map(self):
        # Read all the files in the given folder.
        # We gather them all and then send them up to GAE.
        # We do this rather than processing template locally. Because local processing
        file_map = dict()
        fdir = os.path.dirname(self.view.file_name()).replace(self.parent_path+'/', '')
        for root, dirs, files in os.walk(self.path):
            for filename in files:
                if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']):
                    contents = read_file(os.path.join(root, filename))
                    file_map['%s/%s' % (fdir, filename)] = contents
                    # file_map[filename] = contents
        for root, dirs, files in os.walk(self.image_path):
            for filename in files:
                image_path = os.path.abspath(os.path.join(root, filename))
                contents = encode_image(image_path)
                file_map[filename] = contents
        for root, dirs, files in os.walk(self.parent_path):
            for filename in files:
                if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']):
                    contents = read_file(os.path.join(root, filename))
                    file_map[filename] = contents
        print(file_map.keys())

        return file_map

示例#19

0

显示文件

文件： reader.py 项目： bwrichte/TorFinalProject

  def _read_descriptor_files(self):
    new_processed_files = {}
    remaining_files = list(self._targets)

    while remaining_files and not self._is_stopped.isSet():
      target = remaining_files.pop(0)

      if not os.path.exists(target):
        self._notify_skip_listeners(target, FileMissing())
        continue

      if os.path.isdir(target):
        if stem.prereq.is_python_26():
          walker = os.walk(target, followlinks = self._follow_links)
        else:
          walker = os.walk(target)

        self._handle_walker(walker, new_processed_files)
      else:
        self._handle_file(target, new_processed_files)

    self._processed_files = new_processed_files

    if not self._is_stopped.isSet():
      self._unreturned_descriptors.put(FINISHED)

    self._iter_notice.set()

示例#20

0

显示文件

文件： manifest2ditawp.py 项目： rjohnson8103/WParchive

def EmptyDir(d):
    if debugMode():
        print("EmptyDir",d)

    if d==None:
        return
    
    if os.path.isdir(d):
      files=os.walk(d)
      
      # delete all the files
      for item in files:
          for sdir in item[1]:
              EmptyDir(item[0]+os.sep+sdir)
          for f in item[2]:
              ff = item[0]+os.sep+f
              os.remove(ff)
              if debugMode():
                print("  removed",ff)
    else:
        os.mkdir(d)
        print("created",d)
    # delete any subdirectories
    dirs = os.walk(d)
    for dd in dirs:
        for ddir in dd[1]:
            EmptyDir(dd[0]+os.sep+ddir)
            os.rmdir(dd[0]+os.sep+ddir)

    if debugMode():
        print("all files deleted from",d)

示例#21

0

显示文件

文件： ldutil.py 项目： ipittau/ldutil

def create_dependency_tree(research_dir):
    #print subprocess.check_output("find "+search_dir, shell=True)

    print "going.. " + research_dir
    total_file_num = 0
    file_num = 0
    for root, dirs, files in os.walk(research_dir):
        for new_file in files:
          total_file_num = total_file_num + 1 
  
    for root, dirs, files in os.walk(research_dir):
        #Analyze only files, not links
        for new_file in files:
          file_num = file_num + 1 
          print ("Analyze " + str(file_num) +"/"+ str(total_file_num) ) 
          sys.stdout.write("\033[F")
          pathname = os.path.join(root, new_file)
          mode = os.lstat(pathname).st_mode
          #links are skipped
          if S_ISLNK(mode): 
              #print "link " + pathname + " " + str(mode)
              pass
          elif S_ISREG(mode):
              # It's a file, call the recursive function to analyze it
              #print "analyze " + pathname            
              analyze(pathname, "nobody")
          else:
              # Unknown file type, print a message
              print 'Skipping %s' % pathname
              pass

示例#22

0

显示文件

文件： test_meta.py 项目： Grahack/geophar

def test_imports():
    u"""Vérifie qu'il n'existe pas d'imports relatifs implicites."""
    # On liste les modules locaux
    locaux = set()
    def test(line):
        assert not re.search('(from|import) (' + '|'.join(locaux) + ')[. ]', line)

    for root, dirs, files in walk(WXGEODIR):
        if 'sympy' in dirs:
            dirs.remove('sympy')
        if 'sympy_OLD' in dirs:
            dirs.remove('sympy_OLD')
        for name in files:
            if name.endswith('.py'):
                locaux.add(name[:-3])
        for name in dirs:
            if isfile(join(root, name, '__init__.py')):
                locaux.add(name)
    assert 'sympy' not in locaux and 'trigonometry' not in locaux
    # on teste les imports
    for root, dirs, files in walk(WXGEODIR):
        for name in files:
            if name.endswith('.py'):
                with open(join(root, name)) as f:
                    for n, line in enumerate(f):
                        if 'from ' in line or 'import ' in line:
                            assert test(line), join(root, name) + ' L' + str(n + 1)

示例#23

0

显示文件

文件： setup_package.py 项目： vmarkovtsev/asdf

def get_package_data():  # pragma: no cover
    ASDF_STANDARD_ROOT = os.environ.get("ASDF_STANDARD_ROOT", "asdf-standard")

    schemas = []
    root = os.path.join(ASDF_STANDARD_ROOT, "schemas")
    for node, dirs, files in os.walk(root):
        for fname in files:
            if fname.endswith('.yaml'):
                schemas.append(
                    os.path.relpath(
                        os.path.join(node, fname),
                        root))

    reference_files = []
    root = os.path.join(ASDF_STANDARD_ROOT, "reference_files")
    for node, dirs, files in os.walk(root):
        for fname in files:
            if fname.endswith('.yaml') or fname.endswith('.asdf'):
                reference_files.append(
                    os.path.relpath(
                        os.path.join(node, fname),
                        root))

    return {
        str('asdf.schemas'): schemas,
        str('asdf.reference_files'): reference_files
    }

示例#24

0

显示文件

文件： Log.py 项目： csbfa/Neofelis

    def __config_unix__(self):

        
        if os.path.isdir("/var/log") and bool(os.stat("/var/log").st_mode & stat.S_IRWXG):
                self._path = "/var/log/Neofelis"
        elif os.path.isdir("~") and bool(os.stat("~").st_mode & stat.S_IRWXG):
            self._path = "~/log"
        else:
            raise lexcep("Permission Error: Unable to access log directory").with_traceback(sys.exc_info()[2])
        
        try:
            if not os.path.isdir(self._path):
                os.mkdir(self._path)
        except IOError as e:
            raise lexcep(str(e)).with_traceback(sys.exc_info()[2])

        self._size = 0

        try:
            for (path, dirs, files) in os.walk(self._path):
                for file in files:
                    f = os.path.join(path, file)
                    self._size += os.path.getsize(f)

            if self._size / (1024 * 1024.0) > 10:
                for (path, dirs, files) in os.walk(self._path):
                    for file in files:
                        f = os.path.join(path, file)
                        os.remove(f)
        except IOError as e:
            raise lexcep(str(e)).with_traceback(sys.exc_info()[2])
        except Exception as e:
            raise lexcep(str(e)).with_traceback(sys.exc_info()[2])

        self._fn = self._path + strftime("%Y%m%d%H%M%S", gmtime()) + ".log"

示例#25

0

显示文件

文件： bdist_egg.py 项目： axwack/rebalancer

def make_zipfile(zip_filename, base_dir, verbose=0, dry_run=0, compress=True,
                 mode='w'):
    """Create a zip file from all the files under 'base_dir'.  The output
    zip file will be named 'base_dir' + ".zip".  Uses either the "zipfile"
    Python module (if available) or the InfoZIP "zip" utility (if installed
    and found on the default search path).  If neither tool is available,
    raises DistutilsExecError.  Returns the name of the output zip file.
    """
    import zipfile

    mkpath(os.path.dirname(zip_filename), dry_run=dry_run)
    log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir)

    def visit(z, dirname, names):
        for name in names:
            path = os.path.normpath(os.path.join(dirname, name))
            if os.path.isfile(path):
                p = path[len(base_dir) + 1:]
                if not dry_run:
                    z.write(path, p)
                log.debug("adding '%s'" % p)

    compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
    if not dry_run:
        z = zipfile.ZipFile(zip_filename, mode, compression=compression)
        for dirname, dirs, files in os.walk(base_dir):
            visit(z, dirname, files)
        z.close()
    else:
        for dirname, dirs, files in os.walk(base_dir):
            visit(None, dirname, files)
    return zip_filename

示例#26

0

显示文件

文件： mask_ip.py 项目： perfectsearch/sandman

def main(root, componentsToMask, dryRun):
    buildRoot = os.path.abspath(os.path.join(root, '../build'))
    emptyFolder(root, 'buildtools', dryRun)
    emptyFolder(root, 'boost', dryRun)
    emptyFolder(root, 'sample-data', dryRun)
    componentsToMask = [x[0:x.find(' ')] for x in componentsToMask]
    for c in componentsToMask:
        path = os.path.join(root, c)
        print('Masking %s...' % path)
        for folder, dirs, files in os.walk(path):
            print('folder = %s' % folder)
            if '.svn' in dirs:
                dirs.remove('.svn')
                svnPath = os.path.join(folder, '.svn')
                print(svnPath)
                if not dryRun:
                    shutil.rmtree(svnPath)
            for f in files:
                if shouldMask(folder, f):
                    mask(folder, f, dryRun)
    if os.path.isdir(buildRoot):
        emptyFolder(buildRoot, 'Testing', dryRun)
        for c in componentsToMask:
            path = os.path.join(root, c)
            for folder, dirs, files in os.walk(path):
                for f in files:
                    if shouldMask(folder, f):
                        mask(folder, f, dryRun)

示例#27

0

显示文件

文件： Parser.py 项目： paulochon8616/CS4.0-EDL

    def getMeasurement(self, l):
        """
        Return the list of files of measurements.
        @type l: C{String}
        @param l: label of a study
        @rtype: C{List}
        @return: C{List} of list of nodes <plot>, and C{List} of files of measurements
        """
        nodes = []
        files = []

        for node in self.getStudyNode(l).getElementsByTagName("measurement"):
            nodes.append(node.getElementsByTagName("plot"))
            fileName = node.attributes["file"].value
            filePath = node.attributes["path"].value

            if filePath == "":
              for root, dirs, fs in os.walk(os.path.join(self.getRepository(), l)):
                  if fileName in fs:
                      filePath = root
                      break
            else: # for Code_Saturne exp data are supposed to be in POST
              for root, dirs, fs in os.walk(os.path.join(self.getRepository(), l, 'POST', filePath)):
                  if fileName in fs:
                      filePath = root
                      break

            files.append(os.path.join(filePath, fileName))

        return nodes, files

示例#28

0

显示文件

文件： models.py 项目： AshishNamdev/pootle

    def _detect_treestyle(self):
        try:
            dirlisting = os.walk(self.get_real_path())
            dirpath, dirnames, filenames = dirlisting.next()

            if not dirnames:
                # No subdirectories
                if filter(self.file_belongs_to_project, filenames):
                    # Translation files found, assume gnu
                    return "gnu"

            # There are subdirectories
            if filter(lambda dirname: dirname == 'templates' or
                      langcode_re.match(dirname), dirnames):
                # Found language dirs assume nongnu
                return "nongnu"

            # No language subdirs found, look for any translation file
            for dirpath, dirnames, filenames in os.walk(self.get_real_path()):
                if filter(self.file_belongs_to_project, filenames):
                    return "gnu"
        except:
            pass

        # Unsure
        return None

示例#29

0

显示文件

文件： nzbToMediaUtil.py 项目： rhettabutler/nzbToMedia

def convert_to_ascii(inputName, dirName):
    ascii_convert = int(nzbtomedia.CFG["ASCII"]["convert"])
    if ascii_convert == 0 or os.name == 'nt':  # just return if we don't want to convert or on windows os and "\" is replaced!.
        return inputName, dirName

    encoded, inputName = CharReplace(inputName)

    dir, base = os.path.split(dirName)
    if not base:  # ended with "/"
        dir, base = os.path.split(dir)

    encoded, base2 = CharReplace(base)
    if encoded:
        dirName = os.path.join(dir, base2)
        logger.info("Renaming directory to: %s." % (base2), 'ENCODER')
        os.rename(os.path.join(dir,base), dirName)
        if os.environ.has_key('NZBOP_SCRIPTDIR'):
            print "[NZB] DIRECTORY=%s" % (dirName)  # Return the new directory to NZBGet.

    for dirname, dirnames, filenames in os.walk(dirName, topdown=False):
        for subdirname in dirnames:
            encoded, subdirname2 = CharReplace(subdirname)
            if encoded:
                logger.info("Renaming directory to: %s." % (subdirname2), 'ENCODER')
                os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))

    for dirname, dirnames, filenames in os.walk(dirName):
        for filename in filenames:
            encoded, filename2 = CharReplace(filename)
            if encoded:
                logger.info("Renaming file to: %s." % (filename2), 'ENCODER')
                os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))

    return inputName, dirName

示例#30

0

显示文件

文件： latex_installed_packages.py 项目： benslice/LaTeXTools

def _get_files_matching_extensions(paths, extensions=[]):
    if isinstance(extensions, strbase):
        extensions = [extensions]

    matched_files = defaultdict(lambda: [])

    for path in paths.split(os.pathsep):
        # bad idea... also our current directory isn't meaningful from a WindowCommand
        if path == '.':
            continue

        # !! sometimes occurs in the results on POSIX; remove them
        path = path.replace(u'!!', u'')
        path = os.path.normpath(path)
        if not os.path.exists(path):  # ensure path exists
            continue

        if len(extensions) > 0:
            for _, _, files in os.walk(path):
                for f in files:
                    for ext in extensions:
                        if f.endswith(u''.join((os.extsep, ext))):
                            matched_files[ext].append(os.path.splitext(f)[0])
        else:
            for _, _, files in os.walk(path):
                for f in files:
                    matched_files['*'].append(os.path.splitext(f)[0])

    matched_files = dict([(key, sorted(set(value), key=lambda s: s.lower()))
        for key, value in matched_files.items()])

    return matched_files

示例#31

0

显示文件

文件： setup.py 项目： jbradberry/django-postoffice

def read_file(filename):
    """Read a file into a string"""
    path = os.path.abspath(os.path.dirname(__file__))
    filepath = os.path.join(path, filename)
    try:
        return open(filepath).read()
    except IOError:
        return ''


packages, data_files = [], []
root_dir = os.path.dirname(__file__)
if root_dir:
    os.chdir(root_dir)

for dirpath, dirnames, filenames in os.walk('postoffice'):
    # Ignore dirnames that start with '.'
    dirnames[:] = [d for d in dirnames if not d.startswith('.')]
    if '__init__.py' in filenames:
        pkg = dirpath.replace(os.path.sep, '.')
        if os.path.altsep:
            pkg = pkg.replace(os.path.altsep, '.')
        packages.append(pkg)
    elif filenames:
        prefix = dirpath[11:] # Strip "postoffice/" or "postoffice\"
        for f in filenames:
            data_files.append(os.path.join(prefix, f))

setup(
    name='django-postoffice',
    description='',

示例#32

0

显示文件

文件： clean_combine_files.py 项目： fyurekli/projects_python

import pandas as pd
import os
pd.set_option('display.max_columns',None)
pd.options.display.float_format = '{:.4f}'.format
#----------------------------------------------------------------------------------------------------------------------#
#Import and Append all dataframes
#----------------------------------------------------------------------------------------------------------------------#
'''Get Path Names'''
#----------------------------------------------------------------------------------------------------------------------#
path_name_pbp = []
path_name_box = []
path_players = []
for root, dirs, files in os.walk('nba_analysis/'):
    for file in files:
        if file.startswith("df_box"):
            path_name_box.append(os.path.join(root, file))   
        elif file.startswith("df_pbp"):
            path_name_pbp.append(os.path.join(root, file))
        elif file.startswith('xxx'):
            path_players.append(os.path.join(root, file))
del dirs, file, files, root  
#----------------------------------------------------------------------------------------------------------------------#                          
''' Combine Files '''
#----------------------------------------------------------------------------------------------------------------------#  
df_box = pd.DataFrame()
for f in path_name_box:
    data = pd.read_csv(f)
    df_box = df_box.append(data)
    
df_pbp = pd.DataFrame()
for f in path_name_pbp:

示例#33

0

显示文件

# code to create repos and make sure everything is executable
filelocation = os.getcwd()
directories = [
    "bash", "config" + gemmachModelType, "config" + uMistModelType,
    "config" + fWorkModelType, "rarc", "output", "UMOSTreating",
    "extracted" + gemmachModelType, "extracted" + uMistModelType,
    "extracted" + fWorkModelType, "imgTemp", "output_csv", "output_img",
    "output_excel"
]

for i in directories:
    if not os.path.exists(filelocation + "/" + i):
        os.mkdir(filelocation + "/" + i)
os.system("chmod -R 744 " + filelocation)
filedirectory = next(os.walk('.'))[1]


# this function just writes all the user input into all files
def UpdateEverything():
    a = enteredDate.get()
    b = enteredEndDate.get()
    sTime = sHourcombo.get()
    eTime = eHourCombo.get()
    h_00 = var_00.get()
    h_12 = var_12.get()
    O3 = var_O3.get()
    NO2 = var_NO2.get()
    PM25 = var_PM25.get()

    Um.inputStartDate(a)

示例#34

0

显示文件

文件： Os.py 项目： sametcelikbicak/Python

#print(os.getcwd())
#os.chdir("/Users/sametcelikbicak/Desktop")
#print(os.getcwd())
#print(os.listdir())

# for i in os.listdir():
#     print(i)

#os.mkdir("Deneme1")
#os.mkdir("Deneme2/Deneme3")#hata veriyor iç içe klasör oluşturma başka fonksiyonla yapılıyor
#os.makedirs("Deneme2/Deneme3")
#os.rmdir("Deneme2/Deneme3")
#os.mkdir("Deneme2/Deneme3")
#os.rmdir("Deneme1")
#os.removedirs("Deneme2/Deneme3")
#os.rename("test.txt","test2.txt")
#os.rename("test2.txt","test.txt")
#print(os.stat("test2.txt"))
#print(os.stat("test2.txt").st_mtime)
#print(datetime.fromtimestamp(os.stat("test2.txt").st_mtime))
# print(os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python"))
# for klasor_yolu,klasor_isimleri,dosya_isimleri in os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python"):
#     print("Klasör Yolu",klasor_yolu)
#     print("Klasör İsimleri", klasor_isimleri)
#     print("Dosya İsimleri", dosya_isimleri)
#     print("*******************************************************")
for klasor_yolu,klasor_isimleri,dosya_isimleri in os.walk("/Users/sametcelikbicak/Projects/UDEMY/Python"):
    for i in dosya_isimleri:
        if (i.endswith(".py")):
            print(i)

示例#35

0

显示文件

def zipdir(path, ziph):
		for root, dirs, files in os.walk(path):
				if ".git" not in root:
						for file in files:
								ziph.write(os.path.join(root, file))

示例#36

0

显示文件

def package_files(*root_directories):
    return [
        os.path.join('..', path, filename) for directory in root_directories
        for (path, directories, filenames) in os.walk(directory)
        for filename in filenames
    ]

示例#37

0

显示文件

        return [tail] + result
    if head == path:
        return result
    return fullsplit(head, [tail] + result)


for scheme in INSTALL_SCHEMES.values():
    scheme['data'] = scheme['purelib']

packages, data_files = [], []
root_dir = os.path.dirname(__file__)
if root_dir != '':
    os.chdir(root_dir)
enum_dir = 'django_enumfield_named_choices'

for dirpath, dirnames, filenames in os.walk(enum_dir):
    if os.path.basename(dirpath).startswith("."):
        continue
    if '__init__.py' in filenames:
        packages.append('.'.join(fullsplit(dirpath)))
    elif filenames:
        data_files.append(
            [dirpath, [os.path.join(dirpath, f) for f in filenames]])

version = __import__('django_enumfield_named_choices').__version__

setup(
    name="django-enumfield-named-choices",
    version=version,
    description="Custom Django field for using enumerations of named constants",
    long_description=open(os.path.join(os.path.dirname(__file__),

示例#38

0

显示文件

# -*- coding: utf-8 -*-
#研究python os walk功能
"""
Created on Fri Dec  1 09:49:31 2017

@author: vizance
"""
import os

search_path = "C:\\Users\\vizance\\Desktop\\Python相關資料"
for dirPath, dirNames, fileNames in os.walk(search_path):
    #dirPath資料夾路徑名稱；dirNames是資料夾名稱的list；fileNames是檔案名稱的list
    print("dirpath→{}".format(dirPath))
    for direlement in dirNames:
        print("dirNames→{}".format(dirNames))
    if fileNames:
        for file in fileNames:
            print(os.path.join(dirPath, file))

示例#39

0

显示文件

import os
from zipfile import ZipFile

this_dir = os.path.dirname(os.path.abspath(__file__))
if not os.path.exists(os.path.join(this_dir, "_build")):
    os.makedirs(os.path.join(this_dir, "_build"))

for this_dir, dirs, files in os.walk(this_dir):
    for d in dirs:
        if d not in ["build", "_build", "__pycache__"]:
            with ZipFile(os.path.join("_build", d + ".zip"), "w") as zf:
                zf.write(os.path.join(this_dir, d, "LICENSE.txt"), "LICENSE.txt")
                zf.write(os.path.join(this_dir, d, d + ".py"), d + ".py")
                zf.write(os.path.join(this_dir, d, d + ".xlsm"), d + ".xlsm")
                if d == "database":
                    zf.write(
                        os.path.join(this_dir, d, "chinook.sqlite"), "chinook.sqlite"
                    )

示例#40

0

显示文件

    return lis


sample_informations = {}
with open(samples_informations_file, 'r') as f:
    for line in f:
        if line.startswith('SRR'):
            line = map(str.strip, line.split(','))
            sample_informations.setdefault(line[0], line[1])

cwd = filter(os.path.isdir, os.listdir(os.getcwd()))
all_available_sites = []
sample_edited_sites = {}
for directory in cwd:
    if directory.startswith('SRR'):
        path = list(os.walk(directory + '/editing/'))
        table = path[1][0] + '/' + path[1][-1][-1]
        with open(table, 'r') as a:
            for line in a:
                if line.startswith('chr'):
                    s = map(str.strip, line.split("\t"))
                    if s[7] == 'AG':
                        site, freq, coverage = s[0] + "_" + s[1], s[8], s[4]
                        freq_gnum_cov = '%s^%s^%s' % (s[8], eval(
                            s[6])[2], s[4])
                        if site not in all_available_sites:
                            all_available_sites.append(site)
                        if (int(coverage) >= min_coverage) and (
                                float(freq) >= min_edit_frequency):
                            sample_edited_sites.setdefault(
                                (directory, site), []).append(

示例#41

0

显示文件

    def handle(self, **options):
        target = options.pop('directory')

        # if some directory is given, make sure it's nicely expanded
        top_dir = path.abspath(path.expanduser(target))
        if not path.exists(top_dir):
            raise CommandError("Destination directory '%s' does not "
                               "exist, please init first." % top_dir)
        if not path.exists(path.join(top_dir, 'manage.py')):
            raise CommandError("Current directory '%s' is not "
                               "a django project dir, please init first. "
                               "(bk-admin init ${app_code})" % top_dir)

        base_subdir = 'wxapp_template'

        append_file_tuple = (('', 'requirements.txt'), )

        # Setup a stub settings environment for template rendering
        if not settings.configured:
            settings.configure()
            django.setup()

        template_dir = path.join(blueapps.__path__[0], 'conf', base_subdir)
        run_ver = None
        conf_file = open(path.join(os.getcwd(), 'config', '__init__.py'))
        for line in conf_file.readlines():
            if line.startswith('RUN_VER'):
                run_ver = line[11:-2]
        conf_file.close()

        if run_ver != u'ieod':
            self.stderr.write(
                "Error: Currently only ieod version is supported. "
                "Your version is %s" % run_ver)
            sys.exit(-1)

        prefix_length = len(template_dir) + 1

        for root, dirs, files in os.walk(template_dir):

            relative_dir = root[prefix_length:]

            target_dir = path.join(top_dir, relative_dir)
            if not path.exists(target_dir):
                os.mkdir(target_dir)

            flag = root.endswith('sites')
            for dirname in dirs[:]:
                if (dirname.startswith('.') or  # noqa
                        dirname == '__pycache__' or  # noqa
                    (flag and dirname != run_ver)):
                    dirs.remove(dirname)

            for filename in files:
                if filename.endswith(('.pyo', '.pyc', '.py.class', '.json')):
                    # Ignore some files as they cause various breakages.
                    if filename != u'app.json':
                        continue
                old_path = path.join(root, filename)
                new_path = path.join(top_dir, relative_dir, filename)
                for old_suffix, new_suffix in self.rewrite_template_suffixes:
                    if new_path.endswith(old_suffix):
                        new_path = new_path[:-len(old_suffix)] + new_suffix
                        break  # Only rewrite once

                with io.open(old_path, 'rb') as template_file:
                    content = template_file.read()
                w_mode = 'wb'
                for _root, _filename in append_file_tuple:
                    if _root == relative_dir and _filename == filename:
                        w_mode = 'ab'
                with io.open(new_path, w_mode) as new_file:
                    new_file.write(content)

                try:
                    shutil.copymode(old_path, new_path)
                    self.make_writeable(new_path)
                except OSError:
                    self.stderr.write(
                        "Notice: Couldn't set permission bits on %s. You're "
                        "probably using an uncommon filesystem setup. No "
                        "problem." % new_path, self.style.NOTICE)

示例#42

0

显示文件

文件： copy file.py 项目： eea76/python-misc-scripts

import os, shutil

path = raw_input("Enter path: ")

for root, dirs, files in os.walk(path):
    for name in files:
        fullpath = os.path.join(root, name)
        shutil.copy(fullpath, "/Users/elon/Desktop/python midi scripts/old/new2")

示例#43

0

显示文件

文件： main.py 项目： Marina-Ivanova/music-search

def submit():
    form = MyForm()

    if form.validate_on_submit():
        genre = form.dropdown.data
        file = form.file.data
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(filename))
        else:
            flash('mp3 file format is required')
            return redirect('/')

        if genre == 'Pop':
            directory = 'music/Pop'
            index = joblib.load('index_pop.pkl')
        elif genre == 'Hiphop':
            directory = 'music/Hiphop'
            index = joblib.load('index_hiphop.pkl')
        elif genre == 'Folk':
            directory = 'music/Folk'
            index = joblib.load('index_folk.pkl')
        elif genre == 'Rock':
            directory = 'music/Rock'
            index = joblib.load('index_rock.pkl')
        else:
            directory = 'music'
            index = joblib.load('index_all.pkl')

        path_f = []
        for d, dirs, files in os.walk(directory):
            audio = filter(lambda x: x.endswith('.mp3'), files)
            for f in audio:
                path = os.path.join(d, f)  # формирование адреса
                path_f.append(path)  # добавление адреса в список

        # print(path_f)

        def read_and_resample(path, sample_rate):
            # read and resample to 22KHz
            y, sr = librosa.load(path, sr=sample_rate)
            # print(f"{path}")
            return y

        sample_rate = 22050
        # reading request audio
        request_data = read_and_resample(filename, sample_rate)
        # Let's make and display a mel-scaled power (energy-squared) spectrogram
        S = librosa.feature.melspectrogram(request_data, sr=sample_rate, n_mels=128)
        neighborhood_size = 10
        # sec/sample - constant for all files
        wav = request_data
        time_resolution = (wav.shape[0] / sample_rate) / S.shape[1]
        # print("Time resolution:", time_resolution)

        def form_constellation(wav, sample_rate):
            S = librosa.feature.melspectrogram(wav, sr=sample_rate, n_mels=256, fmax=4000)
            S = librosa.power_to_db(S, ref=np.max)
            # get local maxima
            Sb = maximum_filter(S, neighborhood_size) == S

            Sbd, num_objects = ndimage.label(Sb)
            objs = ndimage.find_objects(Sbd)
            points = []
            for dy, dx in objs:
                x_center = (dx.start + dx.stop - 1) // 2
                y_center = (dy.start + dy.stop - 1) // 2
                if (dx.stop - dx.start) * (dy.stop - dy.start) == 1:
                    points.append((x_center, y_center))

            # print(len(points))
            return sorted(points)

        request_constellation = form_constellation(request_data, sample_rate)
        target = (int(1 / time_resolution), int(3 / time_resolution), -30, 30)  # start, end, Hz low, Hz high

        def build_constellation_index(constellation_collection, target):
            result_index = {}
            for name, points in constellation_collection.items():
                # print(name)
                for point in points:
                    f1 = point[1]
                    tg = [p for p in points if
                          point[0] + target[0] <= p[0] < point[0] + target[1]
                          and
                          point[1] + target[2] <= p[1] < point[1] + target[3]
                          ]
                    for p in tg:
                        f2 = p[1]
                        dt = p[0] - point[0]
                        t = p[0]

                        if (f1, f2, dt) in result_index:
                            result_index[(f1, f2, dt)].append((t, name))
                        else:
                            result_index[(f1, f2, dt)] = [(t, name)]
            return result_index

        request = build_constellation_index({filename: request_constellation}, target)
        # print(path_f)
        times = dict((name, []) for name in path_f)
        for key, v in request.items():
            if key in index:
                for t_r, name_r in v:
                    for pair in index[key]:
                        t_i, name_i = pair
                        times[name_i].append(t_i - t_r)
        # print(times)
        result = []
        for name, matches in times.items():
            if matches:
                result.append((name, max(matches)))
        # print(result)

        result_sorted = sorted(result, key=lambda x: x[1], reverse=True)
        output = result_sorted[0][0]
        output1 = output.split('/')
        output2 = output1[2].split('.mp3')
        final_result = output2[0]
        # print(final_result)
        return redirect(url_for('result', result=final_result))
    return render_template('submit.html', form=form)

示例#44

0

显示文件

文件： utils.py 项目： zhenzi0322/psd-tools

def find_files(pattern='*.ps*', root=TEST_ROOT):
    for root, dirnames, filenames in os.walk(root):
        for filename in fnmatch.filter(filenames, pattern):
            yield os.path.join(root, filename)

示例#45

0

显示文件

文件： setup.py 项目： knut0815/star-forming-regions

package_info['package_data'][PACKAGENAME].append('data/*')

# Define entry points for command-line scripts
entry_points = {'console_scripts': []}

if conf.has_section('entry_points'):
    entry_point_list = conf.items('entry_points')
    for entry_point in entry_point_list:
        entry_points['console_scripts'].append('{0} = {1}'.format(
            entry_point[0], entry_point[1]))

# Include all .c files, recursively, including those generated by
# Cython, since we can not do this in MANIFEST.in with a "dynamic"
# directory name.
c_files = []
for root, dirs, files in os.walk(PACKAGENAME):
    for filename in files:
        if filename.endswith('.c'):
            c_files.append(
                os.path.join(os.path.relpath(root, PACKAGENAME), filename))
package_info['package_data'][PACKAGENAME].extend(c_files)

# Note that requires and provides should not be included in the call to
# ``setup``, since these are now deprecated. See this link for more details:
# https://groups.google.com/forum/#!topic/astropy-dev/urYO8ckB2uM

setup(name=PACKAGENAME,
      version=VERSION,
      description=DESCRIPTION,
      scripts=scripts,
      install_requires=[

示例#46

0

显示文件

                or (c >= 'A' and c <= 'Z')):
            l.append(c)
    return ''.join(l)


def siftOnGraph(imgName):
    print("Processing {0} ...".format(imgName))
    img_data_color = cv2.imread(imgName, cv2.IMREAD_COLOR)
    img_data = cv2.imread(imgName, cv2.IMREAD_GRAYSCALE)
    dataImgL = getIMGpyramid(img_data)
    KPDESlist = []
    for j in range(len(dataImgL)):
        img_data = dataImgL[j]
        kp2, des2 = getKPandDES(img_data, 200)
        kp2list = []
        for k in range(len(kp2)):
            kp2list.append((kp2[k].pt[0], kp2[k].pt[1], kp2[k].size))
        KPDESlist.append({"kp": kp2list, "des": des2, "gsize": img_data.shape})
    pkname = os.path.join("./dataset", getValidFileName(imgName))
    pkfile = open(pkname + ".pkl", "wb")
    ob = {"img": img_data_color, "filename": imgName, "KPDESlist": KPDESlist}
    pk.dump(ob, pkfile)
    pkfile.close()


if __name__ == "__main__":
    rootPath = "./dataset"
    for dirpath, dirnames, filenames in os.walk(rootPath):
        for filename in filenames:
            filename = os.path.join(dirpath, filename)
            siftOnGraph(filename)

示例#47

0

显示文件

文件： arduino.py 项目： skorokithakis/platformio

                       join("$PLATFORMFW_DIR", "system", "libsam"))

    env.VariantDirWrap(
        join("$BUILD_DIR", "FrameworkArduinoInc"),
        join("$PLATFORMFW_DIR", "cores", "${BOARD_OPTIONS['build']['core']}"))
    env.Append(CPPPATH=[
        join("$BUILD_DIR", "FrameworkCMSISInc"),
        join("$BUILD_DIR", "FrameworkLibSam"),
        join("$BUILD_DIR", "FrameworkLibSam", "include"),
        join("$BUILD_DIR", "FrameworkDeviceInc"),
        join("$BUILD_DIR", "FrameworkDeviceInc", "sam3xa", "include")
    ])

    # search relative includes in lib SAM directories
    core_dir = join(env.subst("$PLATFORMFW_DIR"), "system", "libsam")
    for root, _, files in walk(core_dir):
        for lib_file in files:
            file_path = join(root, lib_file)
            if not isfile(file_path):
                continue
            content = None
            content_changed = False
            with open(file_path) as fp:
                content = fp.read()
                if '#include "../' in content:
                    content_changed = True
                    content = content.replace('#include "../', '#include "')
                if not content_changed:
                    continue
                with open(file_path, "w") as fp:
                    fp.write(content)

示例#48

0

显示文件

文件： main.py 项目： Marina-Ivanova/music-search

def add():
    form = MyForm()

    if form.validate_on_submit():
        genre = form.dropdown.data
        file = form.file.data
        if genre == 'Pop':
            directory = 'music/Pop'
        elif genre == 'Hiphop':
            directory = 'music/Hiphop'
        elif genre == 'Folk':
            directory = 'music/Folk'
        elif genre == 'Rock':
            directory = 'music/Rock'
        else:
            directory = 'music'

        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(directory + '/' + filename))
        else:
            flash('mp3 file format is required')
            return redirect('/add')


        path_f = []
        for d, dirs, files in os.walk(directory):
            audio = filter(lambda x: x.endswith('.mp3'), files)
            for f in audio:
                path = os.path.join(d, f)  # формирование адреса
                path_f.append(path)  # добавление адреса в список

        # print(path_f)

        def read_and_resample(path, sample_rate):
            # read and resample to 22KHz
            y, sr = librosa.load(path, sr=sample_rate)
            # print(f"{path}")
            return y

        dataset = {}
        sample_rate = 22050
        # reading all audios
        for path in path_f:
            dataset[path] = read_and_resample(path, sample_rate)

        y = dataset[directory + '/' + filename]
        # # Let's make and display a mel-scaled power (energy-squared) spectrogram
        S = librosa.feature.melspectrogram(y, sr=sample_rate, n_mels=128)
        neighborhood_size = 10
        # sec/sample - constant for all files
        wav = dataset[directory + '/' + filename]
        time_resolution = (wav.shape[0] / sample_rate) / S.shape[1]
        # print("Time resolution:", time_resolution)

        def form_constellation(wav, sample_rate):
            S = librosa.feature.melspectrogram(wav, sr=sample_rate, n_mels=256, fmax=4000)
            S = librosa.power_to_db(S, ref=np.max)
            # get local maxima
            Sb = maximum_filter(S, neighborhood_size) == S

            Sbd, num_objects = ndimage.label(Sb)
            objs = ndimage.find_objects(Sbd)
            points = []
            for dy, dx in objs:
                x_center = (dx.start + dx.stop - 1) // 2
                y_center = (dy.start + dy.stop - 1) // 2
                if (dx.stop - dx.start) * (dy.stop - dy.start) == 1:
                    points.append((x_center, y_center))

            # print(len(points))
            return sorted(points)

        constellations = {}
        for name, wav in dataset.items():
            constellations[name] = form_constellation(wav, sample_rate)

        target = (int(1 / time_resolution), int(3 / time_resolution), -30, 30)  # start, end, Hz low, Hz high

        def build_constellation_index(constellation_collection, target):
            result_index = {}
            for name, points in constellation_collection.items():
                # print(name)
                for point in points:
                    f1 = point[1]
                    tg = [p for p in points if
                          point[0] + target[0] <= p[0] < point[0] + target[1]
                          and
                          point[1] + target[2] <= p[1] < point[1] + target[3]
                          ]
                    for p in tg:
                        f2 = p[1]
                        dt = p[0] - point[0]
                        t = p[0]

                        if (f1, f2, dt) in result_index:
                            result_index[(f1, f2, dt)].append((t, name))
                        else:
                            result_index[(f1, f2, dt)] = [(t, name)]
            return result_index

        index = build_constellation_index(constellations, target)
        if genre == 'Pop':
            joblib.dump(index, 'index_pop.pkl')
        elif genre == 'Hiphop':
            joblib.dump(index, 'index_hiphop.pkl')
        elif genre == 'Folk':
            joblib.dump(index, 'index_folk.pkl')
        elif genre == 'Rock':
            joblib.dump(index, 'index_rock.pkl')
        else:
            joblib.dump(index, 'index_all.pkl')

        # rebuilding index for all songs
        directory_all = 'music'
        path_all = []
        for d, dirs, files in os.walk(directory_all):
            audio = filter(lambda x: x.endswith('.mp3'), files)
            for f in audio:
                path = os.path.join(d, f)  # формирование адреса
                path_f.append(path)  # добавление адреса в список

        # print(path_all)

        dataset_all = {}
        # reading all audios
        for path in path_all:
            dataset_all[path] = read_and_resample(path, sample_rate)

        constellations_all = {}
        for name, wav in dataset_all.items():
            constellations_all[name] = form_constellation(wav, sample_rate)

        index_all = build_constellation_index(constellations_all, target)
        joblib.dump(index_all, 'index_all.pkl')
        flash('File was successfully added to the database')
        return redirect('/')
    return render_template('add.html', form=form)

示例#49

0

显示文件

    if os.path.isdir(args.input) and os.path.isdir(args.output):
        input = args.input # Set the current workspace
        output = args.output # Results are saved in the following directory
    else:
        raise argparse.ArgumentTypeError(f"input or output is not a valid path")

    if isinstance(args.cpu, numbers.Integral):
        nbrOfCpus = args.cpu # Set the number of CUPs
    else:
        raise argparse.ArgumentTypeError(f"Number of CUPs should be an integer number")


    # find all of the DEM files in the input folder
    DEMs = []
    for root, dirs, files in os.walk(input):
        for file in files:
            if file.endswith(".tif"):
                DEMs.append(file)

    # methods used for calculating surface area
    methods = ['WA9', 'li', 'biLi4', 'biQuad9', 'biCub16']

    # Declare variables to keep track of time for each method to compare the efficiency of each method
    Time = namedtuple('Time', methods)

    for dem in DEMs:  # surface area rasters are calculated for each input DEM
        print(f'Calculating surface area for {dem} ...')
        # create a raster object to get its properties and numpy arrays
        inputRasterObj = Raster(f'{input}/{dem}')
        xres, yres = inputRasterObj.xres, inputRasterObj.yres

示例#50

0

显示文件

#!/usr/bin/env python
from distutils.core import setup
import re
import os


base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'fab_deploy2'))
data_files = []
for dirpath, dirnames, filenames in os.walk(os.path.join(base_path, 'default-configs')):
    # Ignore dirnames that start with '.'
    for i, dirname in enumerate(dirnames):
        if dirname.startswith('.'): del dirnames[i]
    files = [os.path.join(dirpath, f)[len(base_path)+1:] \
                            for f in filenames if not f.endswith('.pyc')]
    data_files.extend(files)

setup(
    name = 'red-fab-deploy2',
    packages=[
        'fab_deploy2',
        'fab_deploy2.base',
        'fab_deploy2.local',
        'fab_deploy2.joyent',
        'fab_deploy2.operating_systems',
        'fab_deploy2.operating_systems.ubuntu',
        'fab_deploy2.operating_systems.redhat',
        'fab_deploy2.operating_systems.smartos',
        'fab_deploy2.joyent',
        'fab_deploy2.joyent.smartos',
        'fab_deploy2.joyent.ubuntu',
        'fab_deploy2.amazon',

示例#51

0

显示文件

# read existing csv data to string for searching for existing files
csvData = ""
with open(labelsFileName, 'r') as labelsFile:
    csvData = labelsFile.read()

# Prepare terminal for single character input
fd = sys.stdin.fileno()
oldterm = termios.tcgetattr(fd)
newattr = termios.tcgetattr(fd)
newattr[3] = newattr[3] & ~termios.ICANON & ~termios.ECHO
termios.tcsetattr(fd, termios.TCSANOW, newattr)

# iterate over all images found in imageDirectory
count = 0;
try:
    for root, dirs, files in os.walk(imageDirectory):
        for f in files:
            if (f.endswith("jpg") or f.endswith("jpeg")):
                count += 1
                if (csvData.find(f) != -1):
                    print "skipping tagged image " + f
                else:
                    # print out image path
                    image = root +  "/" + f
                    imageRel = image[len(imageDirectory):]
                    print "***************************************"
                    print "Image #", count
                    print imageRel
                    os.system(pictureViewer + " " + image + " 2>/dev/null &")

                    # display available labels

示例#52

0

显示文件

    IMAGES_PER_GPU = 1

config = InferenceConfig()

# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
# model_path = model.find_last()
model_path = os.path.join(ROOT_DIR, "logs/coco_vrepall_1002.h5")
# model.load_weights(COCO_MODEL_PATH, by_name=True)
model.load_weights(model_path, by_name=True)

class_names = ['BG', 'SlidingDoor', 'Wall', 'Shelf', 'Robot', 'Human', 'ConveyorBelt', 'Dockstation', 'Product']
# Load a random image from the images folder
file_names = next(os.walk(IMAGE_DIR))[2]
image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))

# calculate time cost for detection, a is start time, b is end time
a = time.time() 
# Run detection
# if verbose == 1, more information print on terminal
results = model.detect([image], verbose=1)
b = time.time() 
load_detect_cost = b - a

# Visualize results
print("----------------------------------------------------------------------")
print("Loading weights from ", model_path)
print("Load and Detection time for this image is %.3f seconds" % load_detect_cost )
r = results[0]

示例#53

0

显示文件

文件： teste.py 项目： labcif/MSTeams

    def process(self, dataSource, progressBar):

        # we don't know how much work there is yet
        progressBar.switchToIndeterminate()
        self.log(Level.INFO,dataSource.getUniquePath())
        # Use blackboard class to index blackboard artifacts for keyword search
        blackboard = Case.getCurrentCase().getServices().getBlackboard()
        self.art_contacts = self.create_artifact_type("Labcif-MSTeams_CONTACTS_"," Contacts", blackboard)
        self.art_messages = self.create_artifact_type("Labcif-MSTeams_MESSAGES_"," MESSAGES", blackboard)
        self.art_messages_reacts = self.create_artifact_type("Labcif-MSTeams_MESSAGES_REACTS"," REACTS", blackboard)
        self.art_messages_files = self.create_artifact_type("Labcif-MSTeams_MESSAGES_FILES"," FILES", blackboard)
        self.art_call = self.create_artifact_type("Labcif-MSTeams_CALLS_", " Call history", blackboard)
        self.art_call_one_to_one = self.create_artifact_type("Labcif-MSTeams_CALLS_ONE_TO_ONE", " Call history one to one", blackboard)
        self.art_teams = self.create_artifact_type("Labcif-MSTeams_TEAMS_"," Teams", blackboard)
        
        # contactos
        self.att_name = self.create_attribute_type('Labcif-MSTeams_CONTACT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Name", blackboard)
        self.att_email = self.create_attribute_type('Labcif-MSTeams_CONTACT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Email", blackboard)
        self.att_orgid = self.create_attribute_type('Labcif-MSTeams_CONTACT_ORGID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Orgid", blackboard)
        self.att_user_contacts = self.create_attribute_type('Labcif-MSTeams_USERNAME_CONTACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_contacts = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CONTACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
        # reacts
        self.att_message_id_reacts = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard)
        self.att_sender_name_react = self.create_attribute_type('Labcif-MSTeams_MESSAGE_SENDER_NAME_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Who reacted", blackboard)
        self.att_reacted_with = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_LOCAL_EMOJI_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Emoji", blackboard)
        self.att_react_time= self.create_attribute_type('Labcif-MSTeams_MESSAGE_REACT_TIME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "React time", blackboard)
        self.att_user_message_reacts = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_reacts = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_REACTS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
        # mensagens
        self.att_message_id = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard)
        self.att_message = self.create_attribute_type('Labcif-MSTeams_MESSAGE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message", blackboard)
        self.att_sender_name = self.create_attribute_type('Labcif-MSTeams_SENDER', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Senders name", blackboard)
        self.att_time = self.create_attribute_type('Labcif-MSTeams_TIME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message time", blackboard) 
        self.att_cvid = self.create_attribute_type('Labcif-MSTeams_CONVERSATION_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "CV", blackboard)
        self.att_user_message = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_message = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_MESSAGES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
         # ficheiros
        self.att_message_id_files = self.create_attribute_type('Labcif-MSTeams_MESSAGE_ID', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Message ID", blackboard)
        self.att_file_name = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "File name", blackboard)
        self.att_file_local = self.create_attribute_type('Labcif-MSTeams_MESSAGE_FILE_LINK', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "File Link", blackboard)
        self.att_user_message_files = self.create_attribute_type('Labcif-MSTeams_USERNAME_MESSAGE_FILES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_files = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_FILES', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
        # calls one to one 
        self.att_date_start_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_TIME_START', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one time start", blackboard) 
        self.att_date_finish_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_TIME_FINISH', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one time finish", blackboard) 
        self.att_creator_name_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Creator Name", blackboard)
        self.att_creator_email_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Creator Email", blackboard)
        self.att_participant_name_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Participant Name", blackboard)
        self.att_participant_email_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_PARTICIPANT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one Participant Email", blackboard)
        self.att_state_one_to_one = self.create_attribute_type('Labcif-MSTeams_CALL_ONE_TO_ONE_STATE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call one to one state", blackboard)
        self.att_user_calls_one_to_one = self.create_attribute_type('Labcif-MSTeams_USERNAME_CALLS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_calls_one_to_one = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CALLS_ONE_TO_ONE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
        # teams
        self.att_cv_id_teams = self.create_attribute_type('Labcif-MSTeams_CV_ID_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Conversation ID teams", blackboard) 
        self.att_creator_name_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Creator Name", blackboard)
        self.att_creator_email_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Creator Email", blackboard)
        self.att_participant_name_teams = self.create_attribute_type('Labcif-MSTeams_TEAMS_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Participant Name", blackboard)
        self.att_participant_email_teams = self.create_attribute_type('Labcif-MSTeams_teams_PARTICIPANT_EMAIL_ONE_TO_ONE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Teams Participant Email", blackboard)
        self.att_user_teams = self.create_attribute_type('Labcif-MSTeams_USERNAME_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_teams = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_TEAMS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard) 
        # calls
        self.att_date = self.create_attribute_type('Labcif-MSTeams_CALL_DATE', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call Date", blackboard) 
        self.att_creator_name = self.create_attribute_type('Labcif-MSTeams_CALL_CREATOR_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Creator Name", blackboard)
        self.att_creator_email = self.create_attribute_type('Labcif-MSTeams_CALL_CREATOR_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Creator Email", blackboard)
        self.att_count_people_in = self.create_attribute_type('Labcif-MSTeams_CALL_AMOUNT_PEOPLE_IN', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Amount of people in call", blackboard)
        self.att_duration = self.create_attribute_type('Labcif-MSTeams_CALL_DURANTION', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Call Duration", blackboard) 
        self.att_participant_name = self.create_attribute_type('Labcif-MSTeams_CALL_PARTICIPANT_NAME', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Participant Name", blackboard)
        self.att_participant_email = self.create_attribute_type('Labcif-MSTeams_CALL_PARTICIPANT_EMAIL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Participant Email", blackboard)
        self.att_user_calls = self.create_attribute_type('Labcif-MSTeams_USERNAME_CALLS', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "User", blackboard)
        self.att_folder_extract_calls = self.create_attribute_type('Labcif-MSTeams_FOLDER_EXTRACT_CALL', BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, "Folder of extraction", blackboard)
        
        
        # For our example, we will use FileManager to get all
        # files with the word "test"
        # in the name and then count and read them
        # FileManager API: http://sleuthkit.org/autopsy/docs/api-docs/latest/classorg_1_1sleuthkit_1_1autopsy_1_1casemodule_1_1services_1_1_file_manager.html
        fileManager = Case.getCurrentCase().getServices().getFileManager()
        files = fileManager.findFiles(dataSource, "%.ldb","https_teams.microsoft.com_")
        

        numFiles = len(files)
        progressBar.switchToDeterminate(numFiles)
        fileCount = 0
        for file in files:

            # Check if the user pressed cancel while we were busy
            if self.context.isJobCancelled():
                return IngestModule.ProcessResult.OK

            fileCount += 1

            # Make an artifact on the blackboard.  TSK_INTERESTING_FILE_HIT is a generic type of
            # artfiact.  Refer to the developer docs for other examples.

            src = file.getParentPath()
            pathSplited=src.split("/")
            user=pathSplited[2]
            if user not in users:
                users.append(user)
            buffer = jarray.zeros(file.getSize(), "b")
            file.read(buffer,0,file.getSize())
            if "lost" not in src and "Roaming" in file.getParentPath() and "ProjetoEI" not in file.getParentPath():
                if src not in paths:
                    tm = datetime.fromtimestamp(math.floor(tim.time())).strftime("%m-%d-%Y_%Hh-%Mm-%Ss")
                    paths[src]="Analysis_Autopsy_LDB_{}_{}".format(user,tm)
                if not os.path.exists(os.path.join(projectEIAppDataPath,paths[src])):
                    try:
                        os.mkdir(os.path.join(projectEIAppDataPath,paths[src]))
                    except OSError:
                        print("Creation of the directory %s failed" % os.path.join(projectEIAppDataPath,paths[src]))
                    else:
                        print("Successfully created the directory %s " % os.path.join(projectEIAppDataPath,paths[src]))
                f = open(os.path.join(os.path.join(projectEIAppDataPath,paths[src]),file.getName()),"wb")
                f.write(buffer.tostring())
                f.close()
            # try:
            #     # index the artifact for keyword search
            #     blackboard.indexArtifact(art)
            # except Blackboard.BlackboardException as e:
            #     self.log(Level.SEVERE, "Error indexing artifact " + art.getDisplayName()+str(e))

            # To further the example, this code will read the contents of the file and count the number of bytes
            # Update the progress bar
            progressBar.progress(fileCount)
            for src, path in paths.items():
                complementaryFiles=fileManager.findFilesByParentPath(dataSource.getId(),src)
                for file in complementaryFiles:
                    if "lost" not in file.getParentPath() and ".ldb" not in file.getName() and "lost" not in file.getName() and "Roaming" in file.getParentPath() and "ProjetoEI" not in file.getParentPath():
                        if file.getName() == "." or file.getName() == ".." or "-slack" in file.getName():
                            continue
                        buffer = jarray.zeros(file.getSize(), "b")
                        if src not in paths:
                            tm = datetime.fromtimestamp(math.floor(tim.time())).strftime("%m-%d-%Y_%Hh-%Mm-%Ss")
                            paths[src] = "Analysis_Autopsy_LDB_{}_{}".format(user,tm)
                        if not os.path.exists(os.path.join(projectEIAppDataPath,paths[src])):
                            try:
                                os.mkdir(os.path.join(projectEIAppDataPath,paths[src]))
                            except OSError:
                                print("Creation of the directory %s failed" % os.path.join(projectEIAppDataPath,paths[src]))
                            else:
                                print("Successfully created the directory %s " % os.path.join(projectEIAppDataPath,paths[src]))
                        try:
                            f = open(os.path.join(os.path.join(projectEIAppDataPath,paths[src]),file.getName()),"a")
                            file.read(buffer,0,file.getSize())
                            f.write(buffer.tostring())
                            f.close()
                        except :
                            self.log(Level.INFO,"File Crash")
        pathModule = os.path.realpath(__file__)
        indexCutPath=pathModule.rfind("\\")
        pathModule=pathModule[0:indexCutPath+1]
        # message = IngestMessage.createMessage(
        #     IngestMessage.MessageType.DATA, Labcif-MSTeamsFactory.moduleName,
        #         str(self.filesFound) + " files found")
        analysisPath = ""
        result = {}
        for key,value in paths.items():
            if key not in result:
                result[key] = value
    
        for key, value in result.items():
            p = subprocess.Popen([r"{}EI\EI.exe".format(pathModule),"--pathToEI",r"{}EI\ ".format(pathModule), "-a", value],stderr=subprocess.PIPE)
            out = p.stderr.read()
            self.log(Level.INFO, out) 
            p.wait()
            # os.system("cmd /c \"{}EI\\EI.exe\" --pathToEI \"{}EI\\\" -a {}".format(pathModule,pathModule,value))
        results=[]
        pathResults="Analise Autopsy"
        for u in users:
            pathLDB=""
            for key,value in paths.items():
                if "Analysis_Autopsy_LDB_{}".format(u) in value:
                    pathLDB=value
                    break
            for root, dirs, files in os.walk(projectEIAppDataPath, topdown=False):
                for name in dirs:
                    if pathResults in name and os.stat(os.path.join(projectEIAppDataPath,pathLDB)).st_mtime < os.stat(os.path.join(projectEIAppDataPath,name)).st_mtime:
                        pathsLDB[pathLDB]=os.path.join(projectEIAppDataPath,name)
                        results.append(os.path.join(projectEIAppDataPath,name))
        f = open(os.path.join(projectEIAppDataPath,"filesToReport.txt"),"w")
        for r in results:
            for files in os.walk(r,topdown=False):
                for name in files:
                    for fileName in name:
                        if ".csv" in fileName or ".html" in fileName or ".css" in fileName:
                            f.write(os.path.join(r,fileName)+"\n")
                        
        f.close()

        f = open(os.path.join(projectEIAppDataPath,"filesToReport.txt"), "r")
        for line in f:
            line = line.replace("\n","")
            pathExtract=""
            if ".csv" in line:
                # ok
                if "EventCall" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_call.getTypeID())
                                    dura=str(int(float(row[4])))
                                    art.addAttribute(BlackboardAttribute(self.att_date, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                    art.addAttribute(BlackboardAttribute(self.att_count_people_in, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                    art.addAttribute(BlackboardAttribute(self.att_duration, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName,dura ))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5])))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[6])))
                                    art.addAttribute(BlackboardAttribute(self.att_user_calls, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[7])))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_calls, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                # ok
                elif "Conversations" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_teams.getTypeID())
                                    art.addAttribute(BlackboardAttribute(self.att_cv_id_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_name_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_email_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_name_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_email_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4])))
                                    art.addAttribute(BlackboardAttribute(self.att_user_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5])))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_teams, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                # ok
                elif "CallOneToOne" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_call_one_to_one.getTypeID())
                                    art.addAttribute(BlackboardAttribute(self.att_date_start_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                    art.addAttribute(BlackboardAttribute(self.att_date_finish_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_name_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                    art.addAttribute(BlackboardAttribute(self.att_creator_email_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_name_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4])))
                                    art.addAttribute(BlackboardAttribute(self.att_participant_email_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[5])))
                                    art.addAttribute(BlackboardAttribute(self.att_state_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[6])))
                                    art.addAttribute(BlackboardAttribute(self.att_user_calls_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[7])))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_calls_one_to_one, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                elif "Files" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_messages_files.getTypeID())
                                    art.addAttribute(BlackboardAttribute(self.att_message_id_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                    art.addAttribute(BlackboardAttribute(self.att_file_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                    art.addAttribute(BlackboardAttribute(self.att_file_local, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                    art.addAttribute(BlackboardAttribute(self.att_user_message_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_files, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                elif "Mensagens" in line:
                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    idMessage=""
                    message=""
                    sender=""
                    timee=""
                    cvid=""
                    userMessage=""
                    with open(line) as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)                     
                        for row in reader: # each row is a list
                            try:
                                self.log(Level.INFO,str(row))
                                if rowcount!=0:
                                    if len(row) == 1:
                                        row = row[0].split(";")
                                        idMessage=str(row[0])                                
                                        message=str(row[1])
                                        timee=str(row[2])
                                        sender=str(row[3])
                                        cvid=str(row[4])
                                        userMessage=str(row[5])
                                    else:
                                        partOne = row[0].split(";")
                                        idMessage=str(partOne[0])
                                        lastPart=row[len(row)-1].split(";")
                                        timee=str(lastPart[1])
                                        sender=str(lastPart[2])
                                        cvid=str(lastPart[3])
                                        userMessage=str(lastPart[4])
                                        message=str(partOne[1])+","
                                        if len(row)!=2:
                                            for x in range(1,len(row)-1):
                                                message+=str(row[x])+","
                                        message+=str(lastPart[0])
                                    art = dataSource.newArtifact(self.art_messages.getTypeID())
                                    art.addAttribute(BlackboardAttribute(self.att_message_id, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, idMessage))
                                    art.addAttribute(BlackboardAttribute(self.att_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, message))
                                    art.addAttribute(BlackboardAttribute(self.att_sender_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, sender))
                                    art.addAttribute(BlackboardAttribute(self.att_time, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, timee))
                                    art.addAttribute(BlackboardAttribute(self.att_cvid, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, cvid))
                                    art.addAttribute(BlackboardAttribute(self.att_user_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, userMessage))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_message, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                elif "Reacts" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_messages_reacts.getTypeID())
                                    try:
                                        art.addAttribute(BlackboardAttribute(self.att_message_id_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                        art.addAttribute(BlackboardAttribute(self.att_reacted_with, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                        art.addAttribute(BlackboardAttribute(self.att_sender_name_react, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                        art.addAttribute(BlackboardAttribute(self.att_react_time, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                        art.addAttribute(BlackboardAttribute(self.att_user_message_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[4])))
                                        art.addAttribute(BlackboardAttribute(self.att_folder_extract_reacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                                    except:
                                        pass
                                    else:
                                        pass
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()
                elif "Contactos.csv" in line:

                    rowcount=0
                    for key,value in pathsLDB.items():
                        if value in line:
                            for k,v in paths.items():
                                if v == key:
                                    pathExtract=k
                                    break
                    with io.open(line,encoding="utf-8") as csvfile:
                        reader = csv.reader(x.replace('\0', '') for x in csvfile)  
                        for row in reader: # each row is a list
                            try:
                                row = row[0].split(";")
                                if rowcount!=0:
                                    art = dataSource.newArtifact(self.art_contacts.getTypeID())
                                    art.addAttribute(BlackboardAttribute(self.att_name, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[0])))
                                    art.addAttribute(BlackboardAttribute(self.att_email, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[1])))
                                    art.addAttribute(BlackboardAttribute(self.att_orgid, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[2])))
                                    art.addAttribute(BlackboardAttribute(self.att_user_contacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, str(row[3])))
                                    art.addAttribute(BlackboardAttribute(self.att_folder_extract_contacts, LabcifMSTeamsDataSourceIngestModuleFactory.moduleName, pathExtract))
                            except:
                                self.log(Level.INFO,"File empty")
                            rowcount+=1
                        csvfile.close()

            rowcount=0
        
        #Post a message to the ingest messages in box.
        message = IngestMessage.createMessage(IngestMessage.MessageType.DATA,
            "Sample Jython Data Source Ingest Module", "Please run MSTeams Report")
        IngestServices.getInstance().postMessage(message)

        return IngestModule.ProcessResult.OK

示例#54

0

显示文件

文件： util.py 项目： acaciawater/acaciadata

def find_files(pattern, root=os.curdir):
    for path, dirs, files in os.walk(os.path.abspath(root)):
        for filename in fnmatch.filter(files, pattern):
            yield os.path.join(path, filename)

示例#55

0

显示文件

文件： setup.py 项目： Sandy4321/wildboar

        include_dirs=include_dirs,
        libraries=libraries)


datafiles = []


def getext(filename):
    os.path.splitext(filename)[1]


for datadir in datadirs:
    datafiles.extend(
        [(root,
          [os.path.join(root, f) for f in files if getext(f) in dataexts])
         for root, dirs, files in os.walk(datadir)])

detected_docs = []
for docname in standard_docs:
    for ext in standard_doc_exts:
        filename = "".join((docname, ext))
        if os.path.isfile(filename):
            detected_docs.append(filename)
datafiles.append(('.', detected_docs))

init_py_path = os.path.join(libname, '__init__.py')
version = '0.0.unknown'
try:
    with open(init_py_path) as f:
        for line in f:
            if line.startswith('__version__'):

示例#56

0

显示文件

文件： addCopyright.py 项目： hujiangyi/or

#!/usr/bin/python

import os.path
import os

whitelist = ["../../ssd/testing/codefile"]

rootDir = "../../"

for d, _, fl in os.walk(rootDir):
    if d.find("/tool") >= 0:
        continue
    for fn in fl:
        ext = os.path.splitext(d + "/" + fn)
        if ext[1] in (".py"):
            # we don't want to process the _pb2 file
            if ext[0].endswith("_pb2"):
                continue
            if fn == "__init__.py":
                continue
            if ext[0] in whitelist:
                print "{} skipt since in whitelist".format(d + '/' + fn)
                continue
# print "The file name is ", fn
#            print "Processing the file", fn
            content = None
            with open(d + "/" + fn) as f:
                content = f.read()
                # content = re.sub("HAL", "Hal", conetent)

            # delete the first line

示例#57

0

显示文件

                mindiff = a[2][num].minTime - b[2][num].minTime
            else:
                mindiff = b[2][num].minTime - a[2][num].minTime
            if a[2][num].maxTime > b[2][num].maxTime:
                maxdiff = a[2][num].maxTime - b[2][num].maxTime
            else:
                maxdiff = b[2][num].maxTime - a[2][num].maxTime
            soidifferences.append(mindiff)
            soidifferences.append(maxdiff)
            #print (a[1][num].mark.strip(), b[1][num].mark.strip(), mindiff, maxdiff)
        totalsoidiff = sum(soidifferences) / len(soidifferences)
        return totalsoidiff
        #print ('Average difference for words:', totalworddiff)

firstdir = []
for root, dirs, files in os.walk(tgdirectory1):
    for name in files:
        tgfile1 = os.path.join(root, name)
        if tgfile1.endswith('TextGrid'):
            firstdir.append(tgfile1)
print (len(firstdir))

seconddir = []
for root, dirs, files in os.walk(tgdirectory2):
    for name in files:
        tgfile2 = os.path.join(root, name)
        if tgfile2.endswith('TextGrid'):
            seconddir.append(tgfile2)
print (len(seconddir))

csv_columns = ['tg','Average difference for words','Average difference for phones', 'Average difference for SOI', 'Difference in silence counts']

示例#58

0

显示文件

def listSubFolders(folder):
    return [x[0] for x in os.walk(directory)][1:]

示例#59

0

显示文件

文件： se_epub_build.py 项目： nebulon42/tools

def build(self, run_epubcheck: bool, build_kobo: bool, build_kindle: bool, output_directory: Path, proof: bool, build_covers: bool) -> None:
	"""
	Entry point for `se build`
	"""

	# Check for some required tools
	if build_kindle:
		which_ebook_convert = shutil.which("ebook-convert")
		if which_ebook_convert:
			ebook_convert_path = Path(which_ebook_convert)
		else:
			# Look for default Mac calibre app path if none found in path
			ebook_convert_path = Path("/Applications/calibre.app/Contents/MacOS/ebook-convert")
			if not ebook_convert_path.exists():
				raise se.MissingDependencyException("Couldn’t locate [bash]ebook-convert[/]. Is [bash]calibre[/] installed?")

	if run_epubcheck:
		if not shutil.which("java"):
			raise se.MissingDependencyException("Couldn’t locate [bash]java[/]. Is it installed?")

	# Check the output directory and create it if it doesn't exist
	try:
		output_directory = output_directory.resolve()
		output_directory.mkdir(parents=True, exist_ok=True)
	except Exception:
		raise se.FileExistsException(f"Couldn’t create output directory: [path][link=file://{output_directory}]{output_directory}[/][/].")

	# All clear to start building!
	metadata_xml = self.metadata_xml

	with tempfile.TemporaryDirectory() as temp_directory:
		work_directory = Path(temp_directory)
		work_epub_root_directory = work_directory / "src"

		copy_tree(self.path, str(work_directory))
		try:
			shutil.rmtree(work_directory / ".git")
		except Exception:
			pass

		# By convention the ASIN is set to the SHA-1 sum of the book's identifying URL
		try:
			identifier = self.metadata_dom.xpath("//dc:identifier")[0].inner_xml().replace("url:", "")
			asin = sha1(identifier.encode("utf-8")).hexdigest()
		except:
			raise se.InvalidSeEbookException(f"Missing [xml]<dc:identifier>[/] element in [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/].")

		if not self.metadata_dom.xpath("//dc:title"):
			raise se.InvalidSeEbookException(f"Missing [xml]<dc:title>[/] element in [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/].")

		output_filename = identifier.replace("https://standardebooks.org/ebooks/", "").replace("/", "_")
		url_author = ""
		for author in self.metadata_dom.xpath("//dc:creator"):
			url_author = url_author + se.formatting.make_url_safe(author.inner_xml()) + "_"

		url_author = url_author.rstrip("_")

		epub_output_filename = f"{output_filename}{'.proof' if proof else ''}.epub"
		epub3_output_filename = f"{output_filename}{'.proof' if proof else ''}.epub3"
		kobo_output_filename = f"{output_filename}{'.proof' if proof else ''}.kepub.epub"
		kindle_output_filename = f"{output_filename}{'.proof' if proof else ''}.azw3"

		# Clean up old output files if any
		se.quiet_remove(output_directory / f"thumbnail_{asin}_EBOK_portrait.jpg")
		se.quiet_remove(output_directory / "cover.jpg")
		se.quiet_remove(output_directory / "cover-thumbnail.jpg")
		se.quiet_remove(output_directory / epub_output_filename)
		se.quiet_remove(output_directory / epub3_output_filename)
		se.quiet_remove(output_directory / kobo_output_filename)
		se.quiet_remove(output_directory / kindle_output_filename)

		# Are we including proofreading CSS?
		if proof:
			with open(work_epub_root_directory / "epub" / "css" / "local.css", "a", encoding="utf-8") as local_css_file:
				with importlib_resources.open_text("se.data.templates", "proofreading.css", encoding="utf-8") as proofreading_css_file:
					local_css_file.write(proofreading_css_file.read())

		# Update the release date in the metadata and colophon
		if self.last_commit:
			last_updated_iso = regex.sub(r"\.[0-9]+$", "", self.last_commit.timestamp.isoformat()) + "Z"
			last_updated_iso = regex.sub(r"\+.+?Z$", "Z", last_updated_iso)
			# In the line below, we can't use %l (unpadded 12 hour clock hour) because it isn't portable to Windows.
			# Instead we use %I (padded 12 hour clock hour) and then do a string replace to remove leading zeros.
			last_updated_friendly = f"{self.last_commit.timestamp:%B %e, %Y, %I:%M <abbr class=\"time eoc\">%p</abbr>}".replace(" 0", " ")
			last_updated_friendly = regex.sub(r"\s+", " ", last_updated_friendly).replace("AM", "a.m.").replace("PM", "p.m.").replace(" <abbr", " <abbr")

			# Set modified date in content.opf
			self.metadata_xml = regex.sub(r"<meta property=\"dcterms:modified\">[^<]+?</meta>", f"<meta property=\"dcterms:modified\">{last_updated_iso}</meta>", self.metadata_xml)

			with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file:
				file.seek(0)
				file.write(self.metadata_xml)
				file.truncate()

			# Update the colophon with release info
			with open(work_epub_root_directory / "epub" / "text" / "colophon.xhtml", "r+", encoding="utf-8") as file:
				xhtml = file.read()

				xhtml = xhtml.replace("<p>The first edition of this ebook was released on<br/>", f"<p>This edition was released on<br/>\n\t\t\t<b>{last_updated_friendly}</b><br/>\n\t\t\tand is based on<br/>\n\t\t\t<b>revision {self.last_commit.short_sha}</b>.<br/>\n\t\t\tThe first edition of this ebook was released on<br/>")

				file.seek(0)
				file.write(xhtml)
				file.truncate()

		# Output the pure epub3 file
		se.epub.write_epub(work_epub_root_directory, output_directory / epub3_output_filename)

		# Now add epub2 compatibility.

		# Include compatibility CSS
		with open(work_epub_root_directory / "epub" / "css" / "core.css", "a", encoding="utf-8") as core_css_file:
			with importlib_resources.open_text("se.data.templates", "compatibility.css", encoding="utf-8") as compatibility_css_file:
				core_css_file.write(compatibility_css_file.read())

		# Simplify CSS and tags
		total_css = ""

		# Simplify the CSS first.  Later we'll update the document to match our simplified selectors.
		# While we're doing this, we store the original css into a single variable so we can extract the original selectors later.
		for root, _, filenames in os.walk(work_epub_root_directory):
			for filename_string in fnmatch.filter(filenames, "*.css"):
				filename = Path(root) / filename_string
				with open(filename, "r+", encoding="utf-8") as file:
					css = file.read()

					# Before we do anything, we process a special case in core.css
					if filename.name == "core.css":
						css = regex.sub(r"abbr{.+?}", "", css, flags=regex.DOTALL)

					total_css = total_css + css + "\n"
					file.seek(0)
					file.write(se.formatting.simplify_css(css))
					file.truncate()

		# Now get a list of original selectors
		# Remove @supports(){}
		total_css = regex.sub(r"@supports.+?{(.+?)}\s*}", "\\1}", total_css, flags=regex.DOTALL)

		# Remove CSS rules
		total_css = regex.sub(r"{[^}]+}", "", total_css)

		# Remove trailing commas
		total_css = regex.sub(r",", "", total_css)

		# Remove comments
		total_css = regex.sub(r"/\*.+?\*/", "", total_css, flags=regex.DOTALL)

		# Remove @ defines
		total_css = regex.sub(r"^@.+", "", total_css, flags=regex.MULTILINE)

		# Construct a dictionary of the original selectors
		selectors = {line for line in total_css.splitlines() if line != ""}

		# Get a list of .xhtml files to simplify
		for root, _, filenames in os.walk(work_epub_root_directory):
			for filename_string in fnmatch.filter(filenames, "*.xhtml"):
				filename = (Path(root) / filename_string).resolve()

				# Don't mess with the ToC, since if we have ol/li > first-child selectors we could screw it up
				if filename.name == "toc.xhtml":
					continue

				with open(filename, "r+", encoding="utf-8") as file:
					# We have to remove the default namespace declaration from our document, otherwise
					# xpath won't find anything at all.  See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
					xhtml = file.read().replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")
					processed_xhtml = xhtml
					try:
						tree = etree.fromstring(str.encode(xhtml))
					except Exception as ex:
						raise se.InvalidXhtmlException(f"Error parsing XHTML file: [path][link=file://{filename}]{filename}[/][/]. Exception: {ex}")

					# Now iterate over each CSS selector and see if it's used in any of the files we found
					for selector in selectors:
						try:
							# Add classes to elements that match any of our selectors to simplify. For example, if we select :first-child, add a "first-child" class to all elements that match that.
							for selector_to_simplify in se.SELECTORS_TO_SIMPLIFY:
								while selector_to_simplify in selector:
									# Potentially the pseudoclass we’ll simplify isn’t at the end of the selector,
									# so we need to temporarily remove the trailing part to target the right elements.
									split_selector = regex.split(fr"({selector_to_simplify}(\(.*?\))?)", selector, 1)
									target_element_selector = ''.join(split_selector[0:2])

									replacement_class = split_selector[1].replace(":", "").replace("(", "-").replace("n-", "n-minus-").replace("n+", "n-plus-").replace(")", "")
									selector = selector.replace(split_selector[1], "." + replacement_class, 1)
									sel = se.easy_xml.css_selector(target_element_selector)
									for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES):
										current_class = element.get("class")
										if current_class is not None and replacement_class not in current_class:
											current_class = current_class + " " + replacement_class
										else:
											current_class = replacement_class

										element.set("class", current_class)

						except lxml.cssselect.ExpressionError:
							# This gets thrown if we use pseudo-elements, which lxml doesn't support
							pass
						except lxml.cssselect.SelectorSyntaxError as ex:
							raise se.InvalidCssException(f"Couldn’t parse CSS in or near this line: [css]{selector}[/]. Exception: {ex}")

						# We've already replaced attribute/namespace selectors with classes in the CSS, now add those classes to the matching elements
						if "[epub|type" in selector:
							for namespace_selector in regex.findall(r"\[epub\|type\~\=\"[^\"]*?\"\]", selector):
								sel = se.easy_xml.css_selector(namespace_selector)

								for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES):
									new_class = regex.sub(r"^\.", "", se.formatting.namespace_to_class(namespace_selector))
									current_class = element.get("class", "")

									if new_class not in current_class:
										current_class = f"{current_class} {new_class}".strip()
										element.set("class", current_class)

					processed_xhtml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + etree.tostring(tree, encoding=str, pretty_print=True)

					# We do this round in a second pass because if we modify the tree like this, it screws up how lxml does processing later.
					# If it's all done in one pass, we wind up in a race condition where some elements are fixed and some not
					tree = etree.fromstring(str.encode(processed_xhtml))

					for selector in selectors:
						try:
							sel = se.easy_xml.css_selector(selector)
						except lxml.cssselect.ExpressionError:
							# This gets thrown if we use pseudo-elements, which lxml doesn't support
							continue
						except lxml.cssselect.SelectorSyntaxError as ex:
							raise se.InvalidCssException(f"Couldn’t parse CSS in or near this line: [css]{selector}[/]. Exception: {ex}")

						# Convert <abbr> to <span>
						if "abbr" in selector:
							for element in tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES):
								# Why would you want the tail to output by default?!?
								raw_string = etree.tostring(element, encoding=str, with_tail=False)

								# lxml--crap as usual--includes a bunch of namespace information in every element we print.
								# Remove it here.
								raw_string = raw_string.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")
								raw_string = raw_string.replace(" xmlns:epub=\"http://www.idpf.org/2007/ops\"", "")
								raw_string = raw_string.replace(" xmlns:m=\"http://www.w3.org/1998/Math/MathML\"", "")

								# Now lxml doesn't let us modify the tree, so we just do a straight up regex replace to turn this into a span
								processed_string = raw_string.replace("<abbr", "<span")
								processed_string = processed_string.replace("</abbr", "</span")

								# Now we have a nice, fixed string.  But, since lxml can't replace elements, we write it ourselves.
								processed_xhtml = processed_xhtml.replace(raw_string, processed_string)

								tree = etree.fromstring(str.encode(processed_xhtml))

					# Now we just remove all stray abbr tags that were not styled by CSS
					processed_xhtml = regex.sub(r"</?abbr[^>]*?>", "", processed_xhtml)

					# Remove datetime="" attribute in <time> tags, which is not always understood by epubcheck
					processed_xhtml = regex.sub(r" datetime=\"[^\"]+?\"", "", processed_xhtml)

					tree = etree.fromstring(str.encode(processed_xhtml))

					if processed_xhtml != xhtml:
						file.seek(0)
						file.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + etree.tostring(tree, encoding=str, pretty_print=True).replace("<html", "<html xmlns=\"http://www.w3.org/1999/xhtml\""))
						file.truncate()

		# Done simplifying CSS and tags!

		# Extract cover and cover thumbnail
		cover_svg_file = work_epub_root_directory / "epub" / "images" / "cover.svg"
		if not os.path.isfile(cover_svg_file):
			raise se.MissingDependencyException("Cover image is missing. Did you run [bash]se build-images[/]?")

		svg2png(url=str(cover_svg_file), write_to=str(work_directory / "cover.png"))
		cover = Image.open(work_directory / "cover.png")
		cover = cover.convert("RGB") # Remove alpha channel from PNG if necessary
		cover.save(work_epub_root_directory / "epub" / "images" / "cover.jpg")
		(work_directory / "cover.png").unlink()

		if build_covers:
			shutil.copy2(work_epub_root_directory / "epub" / "images" / "cover.jpg", output_directory / "cover.jpg")
			shutil.copy2(cover_svg_file, output_directory / "cover-thumbnail.svg")
			# Path arguments must be cast to string
			svg2png(url=str(output_directory / "cover-thumbnail.svg"), write_to=str(work_directory / "cover-thumbnail.png"))
			cover = Image.open(work_directory / "cover-thumbnail.png")
			cover = cover.resize((COVER_THUMBNAIL_WIDTH, COVER_THUMBNAIL_HEIGHT))
			cover = cover.convert("RGB") # Remove alpha channel from PNG if necessary
			cover.save(output_directory / "cover-thumbnail.jpg")
			(work_directory / "cover-thumbnail.png").unlink()
			(output_directory / "cover-thumbnail.svg").unlink()

		cover_svg_file.unlink()

		# Massage image references in content.opf
		metadata_xml = metadata_xml.replace("cover.svg", "cover.jpg")
		metadata_xml = metadata_xml.replace(".svg", ".png")
		metadata_xml = metadata_xml.replace("id=\"cover.jpg\" media-type=\"image/svg+xml\"", "id=\"cover.jpg\" media-type=\"image/jpeg\"")
		metadata_xml = metadata_xml.replace("image/svg+xml", "image/png")
		metadata_xml = regex.sub(r" properties=\"([^\"]*?)svg([^\"]*?)\"", r''' properties="\1\2"''', metadata_xml) # We may also have the `mathml` property
		metadata_xml = regex.sub(r" properties=\"([^\s]*?)\s\"", r''' properties="\1"''', metadata_xml) # Clean up trailing white space in property attributes introduced by the above line
		metadata_xml = regex.sub(r" properties=\"\s*\"", "", metadata_xml) # Remove any now-empty property attributes

		# Add an element noting the version of the se tools that built this ebook
		metadata_xml = regex.sub(r"<dc:publisher", f"<meta property=\"se:built-with\">{se.VERSION}</meta>\n\t\t<dc:publisher", metadata_xml)

		# Google Play Books chokes on https XML namespace identifiers (as of at least 2017-07)
		metadata_xml = metadata_xml.replace("https://standardebooks.org/vocab/1.0", "http://standardebooks.org/vocab/1.0")

		# Output the modified content.opf so that we can build the kobo book before making more epub2 compatibility hacks
		with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file:
			file.write(metadata_xml)
			file.truncate()

		# Recurse over xhtml files to make some compatibility replacements
		for root, _, filenames in os.walk(work_epub_root_directory):
			for filename_string in filenames:
				filename = Path(root) / filename_string

				if filename.suffix == ".svg":
					# For night mode compatibility, give the titlepage a 1px white stroke attribute
					if filename.name in("titlepage.svg", "logo.svg"):
						with open(filename, "r+", encoding="utf-8") as file:
							svg = file.read()
							paths = svg

							# What we're doing here is faking the `stroke-align: outside` property, which is an unsupported draft spec right now.
							# We do this by duplicating all the SVG paths, and giving the duplicates a 2px stroke.  The originals are directly on top,
							# so the 2px stroke becomes a 1px stroke that's *outside* of the path instead of being *centered* on the path border.
							# This looks much nicer, but we also have to increase the image size by 2px in both directions, and re-center the whole thing.

							if filename.name == "titlepage.svg":
								stroke_width = SVG_TITLEPAGE_OUTER_STROKE_WIDTH
							else:
								stroke_width = SVG_OUTER_STROKE_WIDTH

							# First, strip out non-path, non-group elements
							paths = regex.sub(r"<\?xml[^<]+?\?>", "", paths)
							paths = regex.sub(r"</?svg[^<]*?>", "", paths)
							paths = regex.sub(r"<title>[^<]+?</title>", "", paths)
							paths = regex.sub(r"<desc>[^<]+?</desc>", "", paths)

							# `paths` is now our "duplicate".  Add a 2px stroke.
							paths = paths.replace("<path", f"<path style=\"stroke: #ffffff; stroke-width: {stroke_width}px;\"")

							# Inject the duplicate under the old SVG paths.  We do this by only replacing the first regex match for <g> or <path>
							svg = regex.sub(r"(<g|<path)", f"{paths}\\1", svg, 1)

							# If this SVG specifies height/width, then increase height and width by 2 pixels and translate everything by 1px
							try:
								height = int(regex.search(r"<svg[^>]+?height=\"([0-9]+)\"", svg).group(1)) + stroke_width
								svg = regex.sub(r"<svg([^<]*?)height=\"[0-9]+\"", f"<svg\\1height=\"{height}\"", svg)

								width = int(regex.search(r"<svg[^>]+?width=\"([0-9]+)\"", svg).group(1)) + stroke_width
								svg = regex.sub(r"<svg([^<]*?)width=\"[0-9]+\"", f"<svg\\1width=\"{width}\"", svg)

								# Add a grouping element to translate everything over 1px
								svg = regex.sub(r"(<g|<path)", "<g transform=\"translate({amount}, {amount})\">\n\\1".format(amount=(stroke_width / 2)), svg, 1)
								svg = svg.replace("</svg>", "</g>\n</svg>")
							except AttributeError:
								# Thrown when the regex doesn't match (i.e. SVG doesn't specify height/width)
								pass

							file.seek(0)
							file.write(svg)
							file.truncate()

					# Convert SVGs to PNGs at 2x resolution
					# Path arguments must be cast to string
					svg2png(url=str(filename), write_to=str(filename.parent / (str(filename.stem) + ".png")), scale=2)
					(filename).unlink()

				if filename.suffix == ".xhtml":
					with open(filename, "r+", encoding="utf-8") as file:
						xhtml = file.read()
						processed_xhtml = xhtml

						# Check if there's any MathML to convert.
						# We expect MathML to be the "content" type (versus the "presentational" type).
						# We use an XSL transform to convert from "content" to "presentational" MathML.
						# If we start with presentational, then nothing will be changed.
						# Kobo supports presentational MathML. After we build kobo, we convert the presentational MathML to PNG for the rest of the builds.
						mathml_transform = None
						for line in regex.findall(r"<(?:m:)?math[^>]*?>(.+?)</(?:m:)?math>", processed_xhtml, flags=regex.DOTALL):
							mathml_content_tree = se.easy_xml.EasyXhtmlTree("<?xml version=\"1.0\" encoding=\"utf-8\"?><math xmlns=\"http://www.w3.org/1998/Math/MathML\">{}</math>".format(regex.sub(r"<(/?)m:", "<\\1", line)))

							# Initialize the transform object, if we haven't yet
							if not mathml_transform:
								with importlib_resources.path("se.data", "mathmlcontent2presentation.xsl") as mathml_xsl_filename:
									mathml_transform = etree.XSLT(etree.parse(str(mathml_xsl_filename)))

							# Transform the mathml and get a string representation
							# XSLT comes from https://github.com/fred-wang/webextension-content-mathml-polyfill
							mathml_presentation_tree = mathml_transform(mathml_content_tree.etree)
							mathml_presentation_xhtml = etree.tostring(mathml_presentation_tree, encoding="unicode", pretty_print=True, with_tail=False).strip()

							# Plop our string back in to the XHTML we're processing
							processed_xhtml = regex.sub(r"<(?:m:)?math[^>]*?>\{}\</(?:m:)?math>".format(regex.escape(line)), mathml_presentation_xhtml, processed_xhtml, flags=regex.MULTILINE)

						if filename.name == "endnotes.xhtml":
							# iOS renders the left-arrow-hook character as an emoji; this fixes it and forces it to render as text.
							# See https://github.com/standardebooks/tools/issues/73
							# See http://mts.io/2015/04/21/unicode-symbol-render-text-emoji/
							processed_xhtml = processed_xhtml.replace("\u21a9", "\u21a9\ufe0e")

						# Since we added an outlining stroke to the titlepage/publisher logo images, we
						# want to remove the se:color-depth.black-on-transparent semantic
						if filename.name in ("colophon.xhtml", "imprint.xhtml", "titlepage.xhtml"):
							processed_xhtml = regex.sub(r"\s*se:color-depth\.black-on-transparent\s*", "", processed_xhtml)

						# Add ARIA roles, which are just mostly duplicate attributes to epub:type
						for role in ARIA_ROLES:
							processed_xhtml = regex.sub(fr"(epub:type=\"[^\"]*?{role}[^\"]*?\")", f"\\1 role=\"doc-{role}\"", processed_xhtml)

						# Some ARIA roles can't apply to some elements.
						# For example, epilogue can't apply to <article>
						processed_xhtml = regex.sub(r"<article ([^>]*?)role=\"doc-epilogue\"", "<article \\1", processed_xhtml)

						if filename.name == "toc.xhtml":
							landmarks_xhtml = regex.findall(r"<nav epub:type=\"landmarks\">.*?</nav>", processed_xhtml, flags=regex.DOTALL)
							landmarks_xhtml = regex.sub(r" role=\"doc-.*?\"", "", landmarks_xhtml[0])
							processed_xhtml = regex.sub(r"<nav epub:type=\"landmarks\">.*?</nav>", landmarks_xhtml, processed_xhtml, flags=regex.DOTALL)

						# But, remove ARIA roles we added to h# tags, because tyically those roles are for sectioning content.
						# For example, we might have an h2 that is both a title and dedication. But ARIA can't handle it being a dedication.
						# See The Man Who Was Thursday by G K Chesterton
						processed_xhtml = regex.sub(r"(<h[1-6] [^>]*) role=\".*?\">", "\\1>", processed_xhtml)

						# Google Play Books chokes on https XML namespace identifiers (as of at least 2017-07)
						processed_xhtml = processed_xhtml.replace("https://standardebooks.org/vocab/1.0", "http://standardebooks.org/vocab/1.0")

						# We converted svgs to pngs, so replace references
						processed_xhtml = processed_xhtml.replace("cover.svg", "cover.jpg")
						processed_xhtml = processed_xhtml.replace(".svg", ".png")

						# To get popup footnotes in iBooks, we have to change epub:endnote to epub:footnote.
						# Remember to get our custom style selectors too.
						processed_xhtml = regex.sub(r"epub:type=\"([^\"]*?)endnote([^\"]*?)\"", "epub:type=\"\\1footnote\\2\"", processed_xhtml)
						processed_xhtml = regex.sub(r"class=\"([^\"]*?)epub-type-endnote([^\"]*?)\"", "class=\"\\1epub-type-footnote\\2\"", processed_xhtml)

						# Include extra lang tag for accessibility compatibility.
						processed_xhtml = regex.sub(r"xml:lang\=\"([^\"]+?)\"", "lang=\"\\1\" xml:lang=\"\\1\"", processed_xhtml)

						# Typography: replace double and triple em dash characters with extra em dashes.
						processed_xhtml = processed_xhtml.replace("⸺", f"—{se.WORD_JOINER}—")
						processed_xhtml = processed_xhtml.replace("⸻", f"—{se.WORD_JOINER}—{se.WORD_JOINER}—")

						# Typography: replace some other less common characters.
						processed_xhtml = processed_xhtml.replace("⅒", "1/10")
						processed_xhtml = processed_xhtml.replace("℅", "c/o")
						processed_xhtml = processed_xhtml.replace("✗", "×")
						processed_xhtml = processed_xhtml.replace(" ", f"{se.NO_BREAK_SPACE}{se.NO_BREAK_SPACE}") # em-space to two nbsps

						# Many e-readers don't support the word joiner character (U+2060).
						# They DO, however, support the now-deprecated zero-width non-breaking space (U+FEFF)
						# For epubs, do this replacement.  Kindle now seems to handle everything fortunately.
						processed_xhtml = processed_xhtml.replace(se.WORD_JOINER, se.ZERO_WIDTH_SPACE)

						# Some minor code style cleanup
						processed_xhtml = processed_xhtml.replace(" >", ">")
						processed_xhtml = regex.sub(r"""\s*epub:type=""\s*""", "", processed_xhtml)

						if processed_xhtml != xhtml:
							file.seek(0)
							file.write(processed_xhtml)
							file.truncate()

				if filename.suffix == ".css":
					with open(filename, "r+", encoding="utf-8") as file:
						css = file.read()
						processed_css = css

						# To get popup footnotes in iBooks, we have to change epub:endnote to epub:footnote.
						# Remember to get our custom style selectors too.
						processed_css = processed_css.replace("endnote", "footnote")

						# page-break-* is deprecated in favor of break-*. Add page-break-* aliases for compatibility in older ereaders.
						processed_css = regex.sub(r"(\s+)break-(.+?:\s.+?;)", "\\1break-\\2\t\\1page-break-\\2", processed_css)

						# `page-break-*: page;` should be come `page-break-*: always;`
						processed_css = regex.sub(r"(\s+)page-break-(before|after):\s+page;", "\\1page-break-\\2: always;", processed_css)

						if processed_css != css:
							file.seek(0)
							file.write(processed_css)
							file.truncate()

		if build_kobo:
			with tempfile.TemporaryDirectory() as temp_directory:
				kobo_work_directory = Path(temp_directory)
				copy_tree(str(work_epub_root_directory), str(kobo_work_directory))

				for root, _, filenames in os.walk(kobo_work_directory):
					# Add a note to content.opf indicating this is a transform build
					for filename_string in fnmatch.filter(filenames, "content.opf"):
						with open(Path(root) / filename_string, "r+", encoding="utf-8") as file:
							xhtml = file.read()

							xhtml = regex.sub(r"<dc:publisher", "<meta property=\"se:transform\">kobo</meta>\n\t\t<dc:publisher", xhtml)

							file.seek(0)
							file.write(xhtml)
							file.truncate()

					# Kobo .kepub files need each clause wrapped in a special <span> tag to enable highlighting.
					# Do this here. Hopefully Kobo will get their act together soon and drop this requirement.
					for filename_string in fnmatch.filter(filenames, "*.xhtml"):
						kobo.paragraph_counter = 1
						kobo.segment_counter = 1

						filename = (Path(root) / filename_string).resolve()

						# Don't add spans to the ToC
						if filename.name == "toc.xhtml":
							continue

						with open(filename, "r+", encoding="utf-8") as file:
							xhtml = file.read()

							# Note: Kobo supports CSS hyphenation, but it can be improved with soft hyphens.
							# However we can't insert them, because soft hyphens break the dictionary search when
							# a word is highlighted.

							# Kobos don't have fonts that support the ↩ character in endnotes, so replace it with ←
							if filename.name == "endnotes.xhtml":
								# Note that we replaced ↩ with \u21a9\ufe0e in an earlier iOS compatibility fix
								xhtml = regex.sub(r"epub:type=\"backlink\">\u21a9\ufe0e</a>", "epub:type=\"backlink\">←</a>", xhtml)

							# We have to remove the default namespace declaration from our document, otherwise
							# xpath won't find anything at all.  See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
							try:
								tree = etree.fromstring(str.encode(xhtml.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")))
							except Exception as ex:
								raise se.InvalidXhtmlException(f"Error parsing XHTML file: [path][link=file://{filename}]{filename}[/][/]. Exception: {ex}")

							kobo.add_kobo_spans_to_node(tree.xpath("./body", namespaces=se.XHTML_NAMESPACES)[0])

							xhtml = etree.tostring(tree, encoding="unicode", pretty_print=True, with_tail=False)
							xhtml = regex.sub(r"<html:span", "<span", xhtml)
							xhtml = regex.sub(r"html:span>", "span>", xhtml)
							xhtml = regex.sub(r"<span xmlns:html=\"http://www.w3.org/1999/xhtml\"", "<span", xhtml)
							xhtml = regex.sub(r"<html", "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\"", xhtml)

							file.seek(0)
							file.write(xhtml)
							file.truncate()

				# All done, clean the output
				# Note that we don't clean .xhtml files, because the way kobo spans are added means that it will screw up spaces inbetween endnotes.
				for filepath in se.get_target_filenames([kobo_work_directory], (".svg", ".opf", ".ncx")):
					se.formatting.format_xml_file(filepath)

				se.epub.write_epub(kobo_work_directory, output_directory / kobo_output_filename)

		# Now work on more epub2 compatibility

		# Recurse over css files to make some compatibility replacements.
		for root, _, filenames in os.walk(work_epub_root_directory):
			for filename_string in filenames:
				filename = Path(root) / filename_string

				if filename.suffix == ".css":
					with open(filename, "r+", encoding="utf-8") as file:
						css = file.read()
						processed_css = css

						processed_css = regex.sub(r"(page\-break\-(before|after|inside)\s*:\s*(.+))", "\\1\n\t-webkit-column-break-\\2: \\3 /* For Readium */", processed_css)
						processed_css = regex.sub(r"^\s*hyphens\s*:\s*(.+)", "\thyphens: \\1\n\tadobe-hyphenate: \\1\n\t-webkit-hyphens: \\1\n\t-epub-hyphens: \\1\n\t-moz-hyphens: \\1", processed_css, flags=regex.MULTILINE)
						processed_css = regex.sub(r"^\s*hyphens\s*:\s*none;", "\thyphens: none;\n\tadobe-text-layout: optimizeSpeed; /* For Nook */", processed_css, flags=regex.MULTILINE)

						if processed_css != css:
							file.seek(0)
							file.write(processed_css)
							file.truncate()

		# Sort out MathML compatibility
		has_mathml = "mathml" in metadata_xml
		if has_mathml:
			# We import this late because we don't want to load selenium if we're not going to use it!
			from se import browser # pylint: disable=import-outside-toplevel

			# We wrap this whole thing in a try block, because we need to call
			# driver.quit() if execution is interrupted (like by ctrl + c, or by an unhandled exception). If we don't call driver.quit(),
			# Firefox will stay around as a zombie process even if the Python script is dead.
			try:
				driver = browser.initialize_selenium_firefox_webdriver()

				mathml_count = 1
				for root, _, filenames in os.walk(work_epub_root_directory):
					for filename_string in filenames:
						filename = Path(root) / filename_string
						if filename.suffix == ".xhtml":
							with open(filename, "r+", encoding="utf-8") as file:
								xhtml = file.read()
								processed_xhtml = xhtml
								replaced_mathml: List[str] = []

								# Check if there's MathML we want to convert
								# We take a naive approach and use some regexes to try to simplify simple MathML expressions.
								# For each MathML expression, if our round of regexes finishes and there is still MathML in the processed result, we abandon the attempt and render to PNG using Firefox.
								for line in regex.findall(r"<(?:m:)?math[^>]*?>(?:.+?)</(?:m:)?math>", processed_xhtml, flags=regex.DOTALL):
									if line not in replaced_mathml:
										replaced_mathml.append(line) # Store converted lines to save time in case we have multiple instances of the same MathML
										mathml_tree = se.easy_xml.EasyXhtmlTree("<?xml version=\"1.0\" encoding=\"utf-8\"?>{}".format(regex.sub(r"<(/?)m:", "<\\1", line)))
										processed_line = line

										# If the mfenced element has more than one child, they are separated by commas when rendered.
										# This is too complex for our naive regexes to work around. So, if there is an mfenced element with more than one child, abandon the attempt.
										if not mathml_tree.css_select("mfenced > * + *"):
											processed_line = regex.sub(r"</?(?:m:)?math[^>]*?>", "", processed_line)
											processed_line = regex.sub(r"<!--.+?-->", "", processed_line)
											processed_line = regex.sub(r"<(?:m:)?mfenced/>", "()", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "<i>\\4</i><\\2><i>\\6</i></\\2>", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "<i>\\4</i><\\2>\\6</\\2>", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "\\4<\\2>\\6</\\2>", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "\\4<\\2><i>\\6</i></\\2>", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)</\3><((?:m:)?mi)>(.+?)</\5></\1>", "\\4<\\2><i>\\6</i></\\2>", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)</\3><((?:m:)?mn)>(.+?)</\5></\1>", "\\4<\\2>\\6</\\2>", processed_line)
											processed_line = regex.sub(fr"<(?:m:)?mo>{se.FUNCTION_APPLICATION}</(?:m:)?mo>", "", processed_line, flags=regex.IGNORECASE) # The ignore case flag is required to match here with the special FUNCTION_APPLICATION character, it's unclear why
											processed_line = regex.sub(r"<(?:m:)?mfenced><((?:m:)(?:mo|mi|mn|mrow))>(.+?)</\1></(?:m:)?mfenced>", "(<\\1>\\2</\\1>)", processed_line)
											processed_line = regex.sub(r"<(?:m:)?mrow>([^>].+?)</(?:m:)?mrow>", "\\1", processed_line)
											processed_line = regex.sub(r"<(?:m:)?mi>([^<]+?)</(?:m:)?mi>", "<i>\\1</i>", processed_line)
											processed_line = regex.sub(r"<(?:m:)?mi mathvariant=\"normal\">([^<]+?)</(?:m:)?mi>", "\\1", processed_line)
											processed_line = regex.sub(r"<(?:m:)?mo>([+\-−=×])</(?:m:)?mo>", " \\1 ", processed_line)
											processed_line = regex.sub(r"<((?:m:)?m[no])>(.+?)</\1>", "\\2", processed_line)
											processed_line = regex.sub(r"</?(?:m:)?mrow>", "", processed_line)
											processed_line = processed_line.strip()
											processed_line = regex.sub(r"</i><i>", "", processed_line, flags=regex.DOTALL)

										# Did we succeed? Is there any more MathML in our string?
										if regex.findall("</?(?:m:)?m", processed_line):
											# Failure! Abandon all hope, and use Firefox to convert the MathML to PNG.
											se.images.render_mathml_to_png(driver, regex.sub(r"<(/?)m:", "<\\1", line), work_epub_root_directory / "epub" / "images" / f"mathml-{mathml_count}.png", work_epub_root_directory / "epub" / "images" / f"mathml-{mathml_count}-2x.png")

											processed_xhtml = processed_xhtml.replace(line, f"<img class=\"mathml epub-type-se-image-color-depth-black-on-transparent\" epub:type=\"se:image.color-depth.black-on-transparent\" src=\"../images/mathml-{mathml_count}.png\" srcset=\"../images/mathml-{mathml_count}-2x.png 2x, ../images/mathml-{mathml_count}.png 1x\" />")
											mathml_count = mathml_count + 1
										else:
											# Success! Replace the MathML with our new string.
											processed_xhtml = processed_xhtml.replace(line, processed_line)

								if processed_xhtml != xhtml:
									file.seek(0)
									file.write(processed_xhtml)
									file.truncate()
			except KeyboardInterrupt as ex:
				# Bubble the exception up, but proceed to `finally` so we quit the driver
				raise ex
			finally:
				try:
					driver.quit()
				except Exception:
					# We might get here if we ctrl + c before selenium has finished initializing the driver
					pass

		# Include epub2 cover metadata
		cover_id = self.metadata_dom.xpath("//item[@properties=\"cover-image\"]/@id")[0].replace(".svg", ".jpg")
		metadata_xml = regex.sub(r"(<metadata[^>]+?>)", f"\\1\n\t\t<meta content=\"{cover_id}\" name=\"cover\" />", metadata_xml)

		# Add metadata to content.opf indicating this file is a Standard Ebooks compatibility build
		metadata_xml = metadata_xml.replace("<dc:publisher", "<meta property=\"se:transform\">compatibility</meta>\n\t\t<dc:publisher")

		# Add any new MathML images we generated to the manifest
		if has_mathml:
			for root, _, filenames in os.walk(work_epub_root_directory / "epub" / "images"):
				filenames = natsorted(filenames)
				filenames.reverse()
				for filename_string in filenames:
					filename = Path(root) / filename_string
					if filename.name.startswith("mathml-"):
						metadata_xml = metadata_xml.replace("<manifest>", f"<manifest><item href=\"images/{filename.name}\" id=\"{filename.name}\" media-type=\"image/png\"/>")

			metadata_xml = regex.sub(r"properties=\"([^\"]*?)mathml([^\"]*?)\"", "properties=\"\\1\\2\"", metadata_xml)

		metadata_xml = regex.sub(r"properties=\"\s*\"", "", metadata_xml)

		# Generate our NCX file for epub2 compatibility.
		# First find the ToC file.
		toc_filename = self.metadata_dom.xpath("//item[@properties=\"nav\"]/@href")[0]
		metadata_xml = metadata_xml.replace("<spine>", "<spine toc=\"ncx\">")
		metadata_xml = metadata_xml.replace("<manifest>", "<manifest><item href=\"toc.ncx\" id=\"ncx\" media-type=\"application/x-dtbncx+xml\" />")

		# Now use an XSLT transform to generate the NCX
		with importlib_resources.path("se.data", "navdoc2ncx.xsl") as navdoc2ncx_xsl_filename:
			toc_tree = se.epub.convert_toc_to_ncx(work_epub_root_directory, toc_filename, navdoc2ncx_xsl_filename)

		# Convert the <nav> landmarks element to the <guide> element in content.opf
		guide_xhtml = "<guide>"
		for element in toc_tree.xpath("//nav[@epub:type=\"landmarks\"]/ol/li/a"):
			element_xhtml = element.tostring()
			element_xhtml = regex.sub(r"epub:type=\"([^\"]*)(\s*frontmatter\s*|\s*backmatter\s*)([^\"]*)\"", "type=\"\\1\\3\"", element_xhtml)
			element_xhtml = regex.sub(r"epub:type=\"[^\"]*(acknowledgements|bibliography|colophon|copyright-page|cover|dedication|epigraph|foreword|glossary|index|loi|lot|notes|preface|bodymatter|titlepage|toc)[^\"]*\"", "type=\"\\1\"", element_xhtml)
			element_xhtml = element_xhtml.replace("type=\"copyright-page", "type=\"copyright page")

			# We add the 'text' attribute to the titlepage to tell the reader to start there
			element_xhtml = element_xhtml.replace("type=\"titlepage", "type=\"title-page text")

			element_xhtml = regex.sub(r"type=\"\s*\"", "", element_xhtml)
			element_xhtml = element_xhtml.replace("<a", "<reference")
			element_xhtml = regex.sub(r">(.+)</a>", " title=\"\\1\" />", element_xhtml)

			# Replace instances of the `role` attribute since it's illegal in content.opf
			element_xhtml = regex.sub(r" role=\".*?\"", "", element_xhtml)

			guide_xhtml = guide_xhtml + element_xhtml

		guide_xhtml = guide_xhtml + "</guide>"

		metadata_xml = metadata_xml.replace("</package>", "") + guide_xhtml + "</package>"

		# Guide is done, now write content.opf and clean it.
		# Output the modified content.opf before making more epub2 compatibility hacks.
		with open(work_epub_root_directory / "epub" / "content.opf", "w", encoding="utf-8") as file:
			file.write(metadata_xml)
			file.truncate()

		# All done, clean the output
		for filepath in se.get_target_filenames([work_epub_root_directory], (".xhtml", ".svg", ".opf", ".ncx")):
			se.formatting.format_xml_file(filepath)

		# Write the compatible epub
		se.epub.write_epub(work_epub_root_directory, output_directory / epub_output_filename)

		if run_epubcheck:
			# Path arguments must be cast to string for Windows compatibility.
			with importlib_resources.path("se.data.epubcheck", "epubcheck.jar") as jar_path:
				try:
					epubcheck_result = subprocess.run(["java", "-jar", str(jar_path), "--quiet", str(output_directory / epub_output_filename)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
					epubcheck_result.check_returncode()
				except subprocess.CalledProcessError:
					output = epubcheck_result.stdout.decode().strip()
					# Get the epubcheck version to print to the console
					version_output = subprocess.run(["java", "-jar", str(jar_path), "--version"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False).stdout.decode().strip()
					version = regex.search(r"[0-9]+\.([0-9]+\.?)*", version_output, flags=regex.MULTILINE).group(0)

					# The last two lines from epubcheck output are not necessary. Remove them here.
					# Remove them as lines instead of as a matching regex to work with localized output strings.
					split_output = output.split("\n")
					output = "\n".join(split_output[:-2])

					# Try to linkify files in output if we can find them
					try:
						output = regex.sub(r"(ERROR\(.+?\): )(.+?)(\([0-9]+,[0-9]+\))", lambda match: match.group(1) + "[path][link=file://" + str(self.path / "src" / regex.sub(fr"^\..+?\.epub{os.sep}", "", match.group(2))) + "]" + match.group(2) + "[/][/]" + match.group(3), output)
					except:
						# If something goes wrong, just pass through the usual output
						pass

					raise se.BuildFailedException(f"[bash]epubcheck[/] v{version} failed with:\n{output}")

		if build_kindle:
			# There's a bug in Calibre <= 3.48.0 where authors who have more than one MARC relator role
			# display as "unknown author" in the Kindle interface.
			# See: https://bugs.launchpad.net/calibre/+bug/1844578
			# Until the bug is fixed, we simply remove any other MARC relator on the dc:creator element.
			# Once the bug is fixed, we can remove this block.
			with open(work_epub_root_directory / "epub" / "content.opf", "r+", encoding="utf-8") as file:
				xhtml = file.read()

				processed_xhtml = xhtml

				for match in regex.findall(r"<meta property=\"role\" refines=\"#author\" scheme=\"marc:relators\">.*?</meta>", xhtml):
					if ">aut<" not in match:
						processed_xhtml = processed_xhtml.replace(match, "")

				if processed_xhtml != xhtml:
					file.seek(0)
					file.write(processed_xhtml)
					file.truncate()

			# Kindle doesn't go more than 2 levels deep for ToC, so flatten it here.
			with open(work_epub_root_directory / "epub" / toc_filename, "r+", encoding="utf-8") as file:
				xhtml = file.read()

				soup = BeautifulSoup(xhtml, "lxml")

				for match in soup.select("ol > li > ol > li > ol"):
					match.parent.insert_after(match)
					match.unwrap()

				file.seek(0)
				file.write(str(soup))
				file.truncate()

			# Rebuild the NCX
			with importlib_resources.path("se.data", "navdoc2ncx.xsl") as navdoc2ncx_xsl_filename:
				toc_tree = se.epub.convert_toc_to_ncx(work_epub_root_directory, toc_filename, navdoc2ncx_xsl_filename)

			# Clean just the ToC and NCX
			for filepath in [work_epub_root_directory / "epub" / "toc.ncx", work_epub_root_directory / "epub" / toc_filename]:
				se.formatting.format_xml_file(filepath)

			# Convert endnotes to Kindle popup compatible notes
			if (work_epub_root_directory / "epub/text/endnotes.xhtml").is_file():
				with open(work_epub_root_directory / "epub/text/endnotes.xhtml", "r+", encoding="utf-8") as file:
					xhtml = file.read()

					# We have to remove the default namespace declaration from our document, otherwise
					# xpath won't find anything at all.  See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
					try:
						tree = etree.fromstring(str.encode(xhtml.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")))
					except Exception as ex:
						raise se.InvalidXhtmlException(f"Error parsing XHTML [path][link=file://{(work_epub_root_directory / 'epub/text/endnotes.xhtml').resolve()}]endnotes.xhtml[/][/]. Exception: {ex}")

					notes = tree.xpath("//li[@epub:type=\"endnote\" or @epub:type=\"footnote\"]", namespaces=se.XHTML_NAMESPACES)

					processed_endnotes = ""

					for note in notes:
						note_id = note.get("id")
						note_number = note_id.replace("note-", "")

						# First, fixup the reference link for this endnote
						try:
							ref_link = etree.tostring(note.xpath("p[last()]/a[last()]")[0], encoding="unicode", pretty_print=True, with_tail=False).replace(" xmlns:epub=\"http://www.idpf.org/2007/ops\"", "").strip()
						except Exception:
							raise se.InvalidXhtmlException(f"Can’t find ref link for [url]#{note_id}[/].")

						new_ref_link = regex.sub(r">.*?</a>", ">" + note_number + "</a>.", ref_link)

						# Now remove the wrapping li node from the note
						note_text = regex.sub(r"^<li[^>]*?>(.*)</li>$", r"\1", etree.tostring(note, encoding="unicode", pretty_print=True, with_tail=False), flags=regex.IGNORECASE | regex.DOTALL)

						# Insert our new ref link
						result = regex.subn(r"^\s*<p([^>]*?)>", "<p\\1 id=\"" + note_id + "\">" + new_ref_link + " ", note_text)

						# Sometimes there is no leading <p> tag (for example, if the endnote starts with a blockquote
						# If that's the case, just insert one in front.
						note_text = result[0]
						if result[1] == 0:
							note_text = "<p id=\"" + note_id + "\">" + new_ref_link + "</p>" + note_text

						# Now remove the old ref_link
						note_text = note_text.replace(ref_link, "")

						# Trim trailing spaces left over after removing the ref link
						note_text = regex.sub(r"\s+</p>", "</p>", note_text).strip()

						# Sometimes ref links are in their own p tag--remove that too
						note_text = regex.sub(r"<p>\s*</p>", "", note_text)

						processed_endnotes += note_text + "\n"

					# All done with endnotes, so drop them back in
					xhtml = regex.sub(r"<ol>.*</ol>", processed_endnotes, xhtml, flags=regex.IGNORECASE | regex.DOTALL)

					file.seek(0)
					file.write(xhtml)
					file.truncate()

				# While Kindle now supports soft hyphens, popup endnotes break words but don't insert the hyphen characters.  So for now, remove soft hyphens from the endnotes file.
				with open(work_epub_root_directory / "epub" / "text" / "endnotes.xhtml", "r+", encoding="utf-8") as file:
					xhtml = file.read()
					processed_xhtml = xhtml

					processed_xhtml = processed_xhtml.replace(se.SHY_HYPHEN, "")

					if processed_xhtml != xhtml:
						file.seek(0)
						file.write(processed_xhtml)
						file.truncate()

			# Do some compatibility replacements
			for root, _, filenames in os.walk(work_epub_root_directory):
				for filename_string in filenames:
					filename = Path(root) / filename_string
					if filename.suffix == ".xhtml":
						with open(filename, "r+", encoding="utf-8") as file:
							xhtml = file.read()
							processed_xhtml = xhtml

							# Kindle doesn't recognize most zero-width spaces or word joiners, so just remove them.
							# It does recognize the word joiner character, but only in the old mobi7 format.  The new format renders them as spaces.
							processed_xhtml = processed_xhtml.replace(se.ZERO_WIDTH_SPACE, "")

							# Remove the epub:type attribute, as Calibre turns it into just "type"
							processed_xhtml = regex.sub(r"epub:type=\"[^\"]*?\"", "", processed_xhtml)

							if processed_xhtml != xhtml:
								file.seek(0)
								file.write(processed_xhtml)
								file.truncate()

			# Include compatibility CSS
			with open(work_epub_root_directory / "epub" / "css" / "core.css", "a", encoding="utf-8") as core_css_file:
				with importlib_resources.open_text("se.data.templates", "kindle.css", encoding="utf-8") as compatibility_css_file:
					core_css_file.write(compatibility_css_file.read())

			# Add soft hyphens
			for filepath in se.get_target_filenames([work_epub_root_directory], (".xhtml",)):
				se.typography.hyphenate_file(filepath, None, True)

			# Build an epub file we can send to Calibre
			se.epub.write_epub(work_epub_root_directory, work_directory / epub_output_filename)

			# Generate the Kindle file
			# We place it in the work directory because later we have to update the asin, and the mobi.update_asin() function will write to the final output directory
			cover_path = work_epub_root_directory / "epub" / self.metadata_dom.xpath("//item[@properties=\"cover-image\"]/@href")[0].replace(".svg", ".jpg")

			# Path arguments must be cast to string for Windows compatibility.
			return_code = subprocess.run([str(ebook_convert_path), str(work_directory / epub_output_filename), str(work_directory / kindle_output_filename), "--pretty-print", "--no-inline-toc", "--max-toc-links=0", "--prefer-metadata-cover", f"--cover={cover_path}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode

			if return_code:
				raise se.InvalidSeEbookException("[bash]ebook-convert[/] failed.")

			# Success, extract the Kindle cover thumbnail

			# Update the ASIN in the generated file
			mobi.update_asin(asin, work_directory / kindle_output_filename, output_directory / kindle_output_filename)

			# Extract the thumbnail
			kindle_cover_thumbnail = Image.open(work_epub_root_directory / "epub" / "images" / "cover.jpg")
			kindle_cover_thumbnail = kindle_cover_thumbnail.convert("RGB") # Remove alpha channel from PNG if necessary
			kindle_cover_thumbnail = kindle_cover_thumbnail.resize((432, 648))
			kindle_cover_thumbnail.save(output_directory / f"thumbnail_{asin}_EBOK_portrait.jpg")

示例#60

0

显示文件

文件： delete_unnecessary.py 项目： coder38611/BOJ-Solutions

import os
for root, dirs, files in os.walk(os.getcwd()):
    if '.git' in root or '.vscode' in root:
        continue
    for file in files:
        if file.startswith("._") or file.endswith(".out") or file.endswith('.txt') or '.' not in file:
            os.remove(os.path.join(root, file))