def process(arg): global not_deleted_list, update_time curs = _connect.cursor() res = curs.execute("SELECT BookId FROM libbook WHERE NOT (Deleted&1) and FileType = 'fb2' ") not_deleted_list = curs.fetchall() not_deleted_list = set([i[0] for i in not_deleted_list]) curs.execute('SELECT * FROM librusec') update_time = curs.fetchone()[0] for fn in walk(arg): for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'): process_file(fn, ftype, z_filename, data) if options.search_deleted: deleted = set() for fn in walk(options.search_deleted): bookid = base_name(fn, '.fb2.zip') try: bookid = int(bookid) except ValueError: continue if bookid in not_deleted_list: deleted.append(fn) for fn in deleted: for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'): ret = process_file(fn, ftype, z_filename, data) if ret: print_log('restore deleted:', bookid) print print 'processed:', stats.total print 'passed:', stats.passed print 'fixed:', stats.fixed print 'errors:', stats.errors if options.not_found: fd = open(options.not_found, 'w') for bookid in not_deleted_list: print >> fd, bookid
def update(self, transect): """ Updates the container data to a profile that intersect the transect line. Returns nothing. Sets attributes as a side effect. Args: transect (LineString): A transect line. """ Notice.info("Updating " + self.__class__.__name__) # Preprocess prepared = prep(transect.buffer(self.settings['buffer'])) # Get the intersecting points points = filter(prepared.contains, self.lookup.keys()) self.reset_data() self.names = [] for point in points: name = self.lookup[point] self.names.append(name) print name, pattern = "^" + name + "_out.las" for fname in utils.walk(self.well_dir, pattern): # This is a loop but there should only be one matching file. well = Well(fname, null_subs=np.nan) print well.curves.names self.data.append(well) self.log_lookup[name] = self.data[-1] if not self.log_lookup.get(name): print self.data.append(None) sl_name = getattr(self, 'striplog', None) sl = None if sl_name and (name == self.feature_well): lexicon = Lexicon.default() pattern = "^" + name + ".*striplog.las" for fname in utils.walk(self.well_dir, pattern): # Load the striplog. sl = Well(fname, lexicon=lexicon, null_subs=np.nan) # Add it to the well self.log_lookup[name].add_striplog(sl.striplog[sl_name], sl_name) self.coords.append(transect.project(point))
def build(self): if platform == 'android': perms = ["android.permission.READ_EXTERNAL_STORAGE", "android.permission.WRITE_EXTERNAL_STORAGE", "android.permission.CAMERA", "android.permission.ACCESS_FINE_LOCATION"] haveperms = utils.acquire_permissions(perms) self.gps = plyer.gps self.gps.configure(self.gps_onlocation, self.gps_onstatus) import my_camera self.camera = my_camera.MyAndroidCamera() Window.bind(on_keyboard=self.popScreen) os.makedirs(utils.getDataDir() + "/images", exist_ok=True) self.markerMap = {} self.settings_cls = SettingsWithSidebar self.curMarker = None self.relocated = 0 self.dialog = None self.baseConfig = config.Config() self.error = self.baseConfig.getErrors() if self.error: Clock.schedule_once(self.show_error, 2) self.store = JsonStore("base.json") self.root = Page() try: base = self.store.get("base")["base"] self.baseConfig.getBase(self.base) except: base = self.baseConfig.getNames()[0] print("base", base) print("----------- /data/user/0/de.adfcmuenchen.abstellanlagen") utils.walk("/data/user/0/de.adfcmuenchen.abstellanlagen") # print("----------- cwd", os.getcwd()) # utils.walk(".") # print("------------getDataDir", utils.getDataDir()) # utils.walk(utils.getDataDir()) # print("------------getExternalFilesDir", utils.getExternalFilesDir()) # utils.walk(utils.getExternalFilesDir()) self.executor = ThreadPoolExecutor(max_workers=1) self.future = None laststored = self.getConfigValue("gespeichert") if not laststored: self.setConfigValue("gespeichert", time.strftime("%Y.%m.%d %H:%M:%S")) self.useGoogle = bool(self.getConfigValue("useGoogle")) self.setup(base) return self.root
def test_walk(self): data = { 'foo': 'bar', 'bam': { 'foo': 'bar2', 'foo2': ['bar3', 'bar4'], } } actual = [] callback = lambda item, key, node: actual.append(item) utils.walk(data, callback) expected = ['bar', 'bar2', 'bar3', 'bar4'] self.assertItemsEqual(expected, actual)
def cleanDirs(root, dirNamesList): import shutil folders = walk(root, targetDirs=dirNamesList).get("dirs") for dirSet in folders: path = os.path.join(root, *dirSet) shutil.rmtree(path, ignore_errors=True)
def CDNF(): t = 0 if EQ(True) == True: return True else: u = EQ(True) label .zero t = t + 1 (H[t], S[t], a[t]) = (False, None, u) label .one if EQ(invjoin(H, 'and')) == True: return invjoin(H, 'and') else: u = EQ(invjoin(H, 'and')) I = [] for i in range(0, len(H)): if evaluate(u, H[i])==False: I.append(i) if I==[]: goto .zero mu = [] for i in I: mu.append(walk(u, a[i])) S.append(mu[-1] ^ a[i]) for i in range(0,t): H[i] = MDNF(S[i]) goto .one
def extract_dataset(dataset, extractor="arcface", gpu=-1): if extractor == "arcface": face = ArcFace(gpu) else: face = FaceNet(gpu) dataset_path = os.path.join(os.path.abspath(""), "images", dataset) file_cnt = len(walk(dataset_path)) features = np.zeros((file_cnt, 513)) features_flip = np.zeros((file_cnt, 513)) image_cnt = 0 for subject_id, subject in enumerate(os.listdir(dataset_path)): draw_progress(dataset + " " + extractor, float(image_cnt + 1) / file_cnt) for image in os.listdir(os.path.join(dataset_path, subject)): image = cv2.imread(os.path.join(dataset_path, subject, image)) feature = face.extract(image) features[image_cnt, :] = np.append(feature, subject_id + 1) feature_flip = face.extract(cv2.flip(image, 1)) features_flip[image_cnt, :] = np.append( feature_flip, subject_id + 1) image_cnt += 1 return features, features_flip
def extractMarksThroughDataset(datasetPath, categories=None, subcategories=None, parallel=False, threads=16): # cleanOldMarks(datasetPath) frames = walk(datasetPath, targetDirs=const.frames).get("dirs") frames = filterFolders(frames, categories, subcategories) if parallel: threads = min(threads, mp.cpu_count()) else: threads = 1 threadsList = [] with mp.Pool(threads) as pool: for dirsSet in frames: dirsSet = dirsSet[:-1] categoryDir = os.path.join(datasetPath, *dirsSet) threadsList.append( pool.apply_async(extractMarks, args=(categoryDir, ))) for r in threadsList: r.get()
def main(): # parsing command-line options global options, markup option_list = [ make_option("-o", "--out", dest="outfile", help="write result to FILE", metavar="FILE"), make_option("-m", "--html", dest="html", action="store_true", default=False, help="output in HTML"), make_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="show errors only"), ] parser = OptionParser(option_list=option_list, usage="usage: %prog [options] files|dirs", version="%prog "+prog_version) options, args = parser.parse_args() LogOptions.level = 0 # show all errors if options.html: markup = html_markup LogOptions.br = '<br />' LogOptions.escape = True if options.outfile: LogOptions.outfile = open(options.outfile, 'at') else: LogOptions.outfile = sys.stdout errors = 0 for f in walk(args): errors += process_file(f) sys.exit(errors)
def updateCategoriesIndices(datasetPath, categories): from utils import walk, makeJSONname from verifier import getFullCategory marks = walk(datasetPath, targetFiles=makeJSONname(const.marks)).get("files") for mrk in marks: try: marksPath = os.path.join(datasetPath, *mrk) category, subcategory = mrk[-3:-1] fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: continue marks = openJsonSafely(marksPath) for f, value in marks.items(): fullCategory = value[const.fullCategory] value[const.ctgIdx] = categories.index(fullCategory) json.dump(marks, open(marksPath, "w"), indent=3) print(f"{Fore.BLUE}JSON file {marksPath} has been fixed{Style.RESET_ALL}") except Exception as e: print(e)
def test_walk_ignorable(self): os.chdir(sys.path[0]) self.assertEqual(sorted([ Path('ignorable/b.txt'), Path('ignorable/c/e.txt'), Path('ignorable/f/g.txt') ]), sorted(list(walk(Path('ignorable'), []))))
def check(args: argparse.Namespace) -> None: """Verify that index matches files, print out any mismatches :param args: must have attr cold_dir: str """ cold_dir = Path(args.cold_dir) assert cold_dir.is_dir(), "cold_dir not found!" index = Index(cold_dir) fail_count = 0 # Set up progress bar total = sum([(cold_dir / p).stat().st_size if (cold_dir / p).exists() else 0 for p in index.keys()]) with tqdm(total=total, unit="B", unit_scale=True) as pbar: # Check that index is correct for p, h in index.items(): if h != hash_file(cold_dir / p, pbar): print(f"Verification failed: '{p}'.", file=sys.stderr) fail_count += 1 # Additionally check that index is complete for file in walk( cold_dir, [PathAwareGitWildMatchPattern('index.txt', cold_dir)]): rel_path: PurePath = file.relative_to(cold_dir) if rel_path not in index: print(f"File missing from index: '{rel_path}'.", file=sys.stderr) fail_count += 1 if fail_count == 0: print("OK: Data is intact!") else: print(f"FAIL: There were {fail_count} failures!")
def process_dee(self, dir_path, ext) -> List[Dict[str, Any]]: # Process dependees (libraries) to get function implementations indexes = [] #_, nwo = remap_nwo(nwo) #if nwo is None: #return indexes #tmp_dir = download(nwo) files = walk(dir_path, ext) # files = glob.iglob(tmp_dir.name + '/**/*.{}'.format(ext), recursive=True) sha = None for f in files: definitions = self.get_function_definitions(f) if definitions is None: continue ''' if sha is None: sha = get_sha(dir, nwo) ''' nwo, path, functions = definitions indexes.extend((self.extract_function_data(func, nwo, path, sha) for func in functions if len(func['function_tokens']) > 1)) return indexes
def sync(args: argparse.Namespace) -> None: """Prompt user for each change towards getting hot dir, cold dir and cold index synced :param args: must have attrs hot_dir:str and cold_dir: str """ hot_dir, cold_dir = Path(args.hot_dir), Path(args.cold_dir) assert hot_dir.is_dir(), "hot_dir not found!" assert cold_dir.is_dir(), "cold_dir not found!" index = Index(cold_dir) # inv_index = defaultdict(list) # for k, v in index.items(): # inv_index[v].append(k) # Set up progress bar total = 0 for file in itertools.chain(walk(hot_dir, []), walk(cold_dir, [])): total += file.stat().st_size with tqdm(total=total, unit="B", unit_scale=True) as pbar: # Find all changes required changes = walk_trees(PurePath(), index, hot_dir, cold_dir, [], [], pbar) # TODO: calculate reverse indices recursively for added and removed # TODO: find all moved. Can be also moved into added or out from removed # for file in hot_only: # h = hash_file(os.path.join(args.hot_dir, file), pbar) # if h in inv_index and set(cold_only) & set(inv_index[h]): # print(set(cold_only) & set(inv_index[h]), "moved to", file) # Confirm each change with the user changes.sort(key=attrgetter('name')) actions = [] action_total = 0 for change in changes: if yesno(str(change), default=False): actions.append(change) action_total += change.size # Carry out all confirmed changes with tqdm(total=action_total, unit="B", unit_scale=True) as pbar: for change in actions: change.apply(args.hot_dir, args.cold_dir, index) pbar.update(change.size) index.store() print("OK: Done!")
def sign(self): """return sign in the form {relpath: (origin layer, SHA256)} """ sigs = {} for entry, sig in utils.walk(self.target, utils.sign, kind="files"): relpath = entry.relpath(self._target.directory) sigs[relpath] = (self.interface.url, "static", sig) return sigs
def prepareVideo(rpath, wpath): videos = walk(rpath, targetExtensions=Extensions.videos()).get("extensions") for vset in videos: vpath = os.path.join(rpath, *vset) for frame in generateFrames(vpath): cv2.imwrite(os.path.join(wpath, "negative-{}{}".format(uuid.uuid1(), Extensions.jpg)), frame)
def create_plist(): with open(e('${GUI_DESTDIR}/gui-plist'), 'w') as f: for i in walk('${GUI_DESTDIR}'): if not os.path.isdir(e('${GUI_DESTDIR}/${i}')): f.write(e('/usr/local/www/gui/${i}\n')) with open(e('${GUI_STAGEDIR}/custom-plist')) as c: f.write(c.read())
def process_dent(self, nwo, ext, library_candidates) -> Tuple[List[Dict[str, Any]], List[Tuple[str, str]]]: # Process dependents (applications) to get function calls dents = [] edges = [] _, nwo = remap_nwo(nwo) if nwo is None: return dents, edges tmp_dir = download(nwo) files = walk(tmp_dir, ext) sha = None for f in files: context_and_calls = self.get_context_and_function_calls(f) if context_and_calls is None: continue if sha is None: sha = get_sha(tmp_dir, nwo) nwo, path, context, calls = context_and_calls libraries = [] for cxt in context: if type(cxt) == dict: libraries.extend([v.split('.')[0] for v in cxt.values()]) elif type(cxt) == list: libraries.extend(cxt) match_scopes = {} for cxt in set(libraries): if cxt in library_candidates: match_scopes[cxt] = library_candidates[cxt] for call in calls: for depended_library_name, dependend_library_functions in match_scopes.items(): for depended_library_function in dependend_library_functions: # Other potential filters: len(call['identifier']) > 6 or len(call['identifier'].split('_')) > 1 if (call['identifier'] not in self.language_parser.STOPWORDS and ((depended_library_function['identifier'].split('.')[-1] == '__init__' and call['identifier'] == depended_library_function['identifier'].split('.')[0]) or ((len(call['identifier']) > 9 or (not call['identifier'].startswith('_') and len(call['identifier'].split('_')) > 1)) and call['identifier'] == depended_library_function['identifier']) )): dent = { 'nwo': nwo, 'sha': sha, 'path': path, 'language': self.language, 'identifier': call['identifier'], 'argument_list': call['argument_list'], 'url': 'https://github.com/{}/blob/{}/{}#L{}-L{}'.format(nwo, sha, path, call['start_point'][0] + 1, call['end_point'][0] + 1) } dents.append(dent) edges.append((dent['url'], depended_library_function['url'])) return dents, edges
def sign(self): """return sign in the form {relpath: (origin layer, SHA256)} """ sigs = {} for entry, sig in utils.walk(self.target_file.dirname(), utils.sign, kind="files"): relpath = entry.relpath(self._target.directory) sigs[relpath] = (self.current.url, "dynamic", sig) return sigs
def find_scc(G): GT = tr(G) sccs, seen = [], set() res = dfs_topsort(G) for u in res: if u in seen: continue C = walk(GT, u, seen) seen.update(C) sccs.append(C) return sccs
def __init__(self, well_dir, params): # First generate the parent object. super(LogContainer, self).__init__(params) self.well_dir = well_dir self.reset_all() for shp in utils.walk(well_dir, '\\.shp$'): with fiona.open(shp, "r") as wells: for well in wells: shp = shape(well['geometry']) self.lookup[shp] = well["properties"]['name']
def actualizeInfoWithFrames(datasetPath): print("\nActualizing info...") actualInfo = {} os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True) frames = walk(datasetPath, targetDirs=const.frames) frames = frames.get("dirs") for idx, dirsList in enumerate(frames): dirsList = dirsList[:-1] fullpath = os.path.join(datasetPath, *dirsList) images = walk(fullpath, targetExtensions=Extensions.images()).get("extensions") # TODO : некоторые картинки могут не содержать категории putNested(dictionary=actualInfo, keys=dirsList, value=len(images)) dirsList[-1] = const.overall updateNested(dictionary=actualInfo, keys=dirsList, value=len(images)) print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="") print() json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
def extract_dataset(dataset: str, extractor: str = "arcface", gpu: int = -1) -> np.ndarray: """Extract feature vectors of each image within a dataset. Return array conatining all extracted features. Parameters ---------- dataset: str Dataset to extract features from. Examples would be gtdb or lfw extractor: str = "arcface" Model to use for feature extraction. Currently supported options are arcface/facenet gpu: int = -1 GPU id to use for feature extraction and preprocessing models. If -1 is given, CPU is used rather than GPU Returns ------- np.ndarray Array of features corresponding each image from a dataset. Subject ids are appended to end of feature vectors. Resulting output will be of shape (number of dataset images)x513 """ if extractor == "arcface": face = ArcFace(gpu) else: face = FaceNet(gpu) dataset_path = f"images/{dataset}" file_cnt = len(walk(dataset_path)) features = np.zeros((file_cnt, 513)) subjects = sorted(os.listdir(dataset_path), key=lambda subject: subject.lower()) image_cnt = 0 for subject_id, subject in enumerate(subjects): progress_bar(f"{dataset} {extractor}", (image_cnt + 1) / file_cnt) for image in os.listdir(f"{dataset_path}/{subject}"): image = cv2.imread(f"{dataset_path}/{subject}/{image}") feature = face.extract(image) features[image_cnt, :] = np.append(feature, subject_id + 1) image_cnt += 1 return features
def main(): # parsing command-line options global options option_list = [ make_option("-o", "--out", dest="outfile", help="write result to FILE", metavar="FILE"), make_option("-d", "--dest-dir", dest="dest_dir", help="save result files to DIR", metavar="DIR"), make_option("-z", "--zip", dest="zip", action="store_true", default=False, help="zip result file"), make_option("-n", "--no-zip", dest="nozip", action="store_true", default=False, help="don't zip result file"), make_option("-c", "--check-only", dest="check_only", action="store_true", default=False, help="check only, do not save result"), make_option("-f", "--force", dest="force", action="store_true", default=False, help="don't validate XML"), ## make_option("-b", "--pre-fb2-lint", dest="pre_fb2lint", ## action="store_true", default=False, ## help="pre process FB2 validation"), ## make_option("-a", "--post-fb2-lint", dest="post_fb2lint", ## action="store_true", default=False, ## help="post process FB2 validation"), make_option("-e", "--output-encoding", dest="output_encoding", default = 'utf-8', metavar="ENC", help="fb2 output encoding"), make_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="more info"), make_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="less info"), ] parser = OptionParser(option_list=option_list, usage="usage: %prog [options] files|dirs", version="%prog "+prog_version) options, args = parser.parse_args() if options.verbose: LogOptions.level = 0 elif options.quiet: LogOptions.level = 2 starttime = time.time() # walk a files for filename in walk(args): process_file(filename) # print stats if options.verbose: et = time.time() - starttime print 'elapsed time: %.2f secs' % et print 'average: %.3f secs' % (et/total_files)
def process(arg): global not_deleted_list, update_time curs = _connect.cursor() res = curs.execute( "SELECT BookId FROM libbook WHERE NOT (Deleted&1) and FileType = 'fb2' " ) not_deleted_list = curs.fetchall() not_deleted_list = set([i[0] for i in not_deleted_list]) curs.execute('SELECT * FROM librusec') update_time = curs.fetchone()[0] for fn in walk(arg): for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'): process_file(fn, ftype, z_filename, data) if options.search_deleted: deleted = set() for fn in walk(options.search_deleted): bookid = base_name(fn, '.fb2.zip') try: bookid = int(bookid) except ValueError: continue if bookid in not_deleted_list: deleted.append(fn) for fn in deleted: for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'): ret = process_file(fn, ftype, z_filename, data) if ret: print_log('restore deleted:', bookid) print print 'processed:', stats.total print 'passed:', stats.passed print 'fixed:', stats.fixed print 'errors:', stats.errors if options.not_found: fd = open(options.not_found, 'w') for bookid in not_deleted_list: print >> fd, bookid
def fixJsons(): import os from utils import walk, readLines categories = readLines(Path.categories) jsons = walk(Path.dataset, targetFiles="marks.json").get("files") for i, jsn in enumerate(jsons): print(f"\rProcessing {i} json file", end="") path = os.path.join(Path.dataset, *jsn) marks = json.load(open(path, "r")) for name, items in marks.items(): ctgIdx = categories.index(items[Constants.fullCategory]) items[Constants.ctgIdx] = ctgIdx json.dump(marks, open(path, "w"), indent=4)
def plan_layers(self, layers, output_files): config = ComposerConfig() config = config.add_config( layers["layers"][0] / ComposerConfig.DEFAULT_FILE, True) for i, layer in enumerate(layers["layers"]): log.info("Processing layer: %s", layer.url) if i + 1 < len(layers["layers"]): next_layer = layers["layers"][i + 1] config = config.add_config( next_layer / ComposerConfig.DEFAULT_FILE, True) list(e for e in utils.walk(layer.directory, self.build_tactics, current=layer, config=config, output_files=output_files)) plan = [t for t in output_files.values() if t] return plan
def main(): LogOptions.level = 0 Stat.starttime = time.time() for f in walk(sys.argv[1:]): process_file(f) # print stats def p_stat(msg, v): print '%s: %d (%d%%)' % (msg, v, round(v*100./Stat.total)) print 'total files:', Stat.total p_stat('not an xml file', Stat.not_xml) p_stat('sax parsing error', Stat.sax_errors) p_stat('dom parsing error', Stat.xml_errors) p_stat('fb2 schema violation', Stat.fb2_errors) p_stat('inconsistent fb2 file', Stat.extra_errors) p_stat('good files', Stat.good) et = time.time() - Stat.starttime print 'elapsed time: %.2f secs' % et print 'average: %.3f secs' % (et/Stat.total)
def content(ctx, **opts): ''' content processing ''' def process(action, path, expr=None, profile=None): _content = content.Content('content', codecs.open(path, 'r', 'utf-8').read()) if expr: method, expr = expr.split(':', 1) if method not in ('xpath', 'css'): utils.show_help(ctx) if action == 'select': print json.dumps(_content.select(expr, method).extract()) elif action == 'remove': print json.dumps(_content.remove(expr, method).extract()) if profile: result = _content.process(profile) if isinstance(result, content.Content): print json.dumps(result.extract()) elif isinstance(result, dict): print json.dumps(result) else: raise RuntimeError('Unknown result type, %s' % type(result)) if not opts['path']: utils.show_help(ctx) if opts['action'] in ('select', 'remove') and not opts['expr']: utils.show_help(ctx) import content _profile = None if opts['profile'] and os.path.exists(opts['profile']): _profile = codecs.open(opts['profile'], 'r', 'utf-8').read() for path in utils.walk(opts['path']): process(opts['action'], path, expr=opts['expr'], profile=_profile) try: pass except Exception, err: print >> sys.stderr, "[ERROR] Cannot process the file, %s. Error: %s" % (path, err)
def main(): LogOptions.level = 0 Stat.starttime = time.time() for f in walk(sys.argv[1:]): process_file(f) # print stats def p_stat(msg, v): print '%s: %d (%d%%)' % (msg, v, round(v * 100. / Stat.total)) print 'total files:', Stat.total p_stat('not an xml file', Stat.not_xml) p_stat('sax parsing error', Stat.sax_errors) p_stat('dom parsing error', Stat.xml_errors) p_stat('fb2 schema violation', Stat.fb2_errors) p_stat('inconsistent fb2 file', Stat.extra_errors) p_stat('good files', Stat.good) et = time.time() - Stat.starttime print 'elapsed time: %.2f secs' % et print 'average: %.3f secs' % (et / Stat.total)
def extractCropsThroughDataset(datasetPath, extractionPath=None, categories=None, subcategories=None, extension=Extensions.png, params=None, parallel=True, threads=16): frames = walk(datasetPath, targetDirs=const.frames).get("dirs") frames = filterFolders(frames, categories, subcategories) if parallel: threads = min(threads, mp.cpu_count()) else: threads = 1 globalIdx = 0 threadsList = [] with mp.Pool(threads) as pool: for dirsSet in frames: dirsSet = dirsSet[:-1] categoryDir = os.path.join(datasetPath, *dirsSet) length = len( openJsonSafely( os.path.join(categoryDir, makeJSONname(const.marks)))) threadsList.append( pool.apply_async(extractCrops, args=(categoryDir, ), kwds={ "extractionPath": extractionPath, "extension": extension, "params": params, "globalIdx": globalIdx })) globalIdx += length for r in threadsList: r.get()
def linkextract(ctx, **opts): ''' link extractor ''' if not opts['path']: utils.how_help(ctx) import re import extract URLFILTER = None if opts['filter']: URLFILTER = re.compile(opts['filter']) le = extract.LinkExtractor() for path in utils.walk(opts['path']): try: le.feed(open(path).read().decode('utf-8')) for link in filter(lambda u: URLFILTER.search(u) if URLFILTER else True, le.links): print link except Exception, err: print >> sys.stderr, "[ERROR] Cannot process the file, %s" % (path,)
def extract_dataset(dataset, extractor="arcface", gpu=-1): if extractor == "arcface": face = ArcFace(gpu) else: face = FaceNet(gpu) dataset_path = os.path.join(os.path.abspath(""), "images", dataset) #dataset will be "lfw" or "gtdb" file_cnt = len(walk(dataset_path)) features = np.zeros((file_cnt, 513)) #features_flip = np.zeros((file_cnt, 513)) #omitted by Kai image_cnt = 0 subjects = os.listdir(dataset_path) subjects = [ x for _, x in sorted( zip([subject.lower() for subject in subjects], subjects)) ] # this is to do case-insensitive sorting for subject_id, subject in enumerate(subjects): progress_bar(dataset + " " + extractor, float(image_cnt + 1) / file_cnt) for image in os.listdir(os.path.join(dataset_path, subject)): image = cv2.imread(os.path.join(dataset_path, subject, image)) feature = face.extract( image ) #the return value of extract here should be a row vector of 512 elements features[image_cnt, :] = np.append( feature, subject_id + 1 ) #the return value of append here should be a row vector of 513 elements #feature_flip = face.extract(cv2.flip(image, 1)) #omitted by Kai #features_flip[image_cnt, :] = np.append(feature_flip, subject_id + 1) #omitted by Kai image_cnt += 1 #return features, features_flip #omitted by Kai return features
def __call__(self): # copy the entire tree into the # hooks/relations/<interface> # directory log.debug("Copying Interface %s: %s", self.interface.name, self.target) # Ensure the path exists if self.target.exists(): # XXX: fix this to do actual updates return ignorer = utils.ignore_matcher(self.config.ignores) for entity, _ in utils.walk(self.interface.directory, lambda x: True, matcher=ignorer, kind="files"): target = entity.relpath(self.interface.directory) target = (self.target / target).normpath() target.parent.makedirs_p() entity.copy2(target) init = self.target / "__init__.py" if not init.exists(): # ensure we can import from here directly init.touch()
def main(): # parsing command-line options global options, markup option_list = [ make_option("-o", "--out", dest="outfile", help="write result to FILE", metavar="FILE"), make_option("-m", "--html", dest="html", action="store_true", default=False, help="output in HTML"), make_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="show errors only"), ] parser = OptionParser(option_list=option_list, usage="usage: %prog [options] files|dirs", version="%prog " + prog_version) options, args = parser.parse_args() LogOptions.level = 0 # show all errors if options.html: markup = html_markup LogOptions.br = '<br />' LogOptions.escape = True if options.outfile: LogOptions.outfile = open(options.outfile, 'at') else: LogOptions.outfile = sys.stdout errors = 0 for f in walk(args): errors += process_file(f) sys.exit(errors)
def actualizeInfoWithJsons(datasetPath): print("\nActualizing info...") actualInfo = {} os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True) frames = walk(datasetPath, targetDirs=const.frames) frames = frames.get("dirs") for idx, dirsList in enumerate(frames): dirsList = dirsList[:-1] fullpath = os.path.join(datasetPath, *dirsList, makeJSONname(const.marks)) marks = json.load(open(fullpath, "r")) putNested(dictionary=actualInfo, keys=dirsList, value=len(marks)) dirsList[-1] = const.overall updateNested(dictionary=actualInfo, keys=dirsList, value=len(marks)) print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="") print() json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
def load_plugins(directories, interface): strip_ext = lambda f: os.path.splitext(os.path.basename(f))[0] is_valid = lambda obj: inspect.isclass(obj) and issubclass(obj, interface) for directory in directories: for dirname, _, filenames in utils.walk(directory, ['*.py','*.pyc']): added = False if dirname not in sys.path: sys.path.insert(0, dirname) added = True for module_name in set(strip_ext(f) for f in filenames): try: module_info = imp.find_module(module_name, [dirname]) # This do a reload if already imported. module = imp.load_module(module_name, *module_info) for name, declaration in inspect.getmembers(module, is_valid): yield PluginProxy(name, declaration) except: pass finally: module_info[0].close() if added: sys.path.remove(dirname)
def augmentDatasetWithRepeats(augmentationName, augmentations, imageExtension, repeats=1, params=None): actualInfo = downloadActualInfo().get(const.original, {}) target = getTargetCount(actualInfo, targetType="max") # вообще не самый хороший выбор path = os.path.join(Path.dataset, const.original) keys = walk(path, targetDirs=const.frames).get("dirs") for set_ in keys: set_ = set_[:-1] count = getNested(dictionary=actualInfo, keys=set_, default=0) category, subcategory = set_ categoryPath = os.path.join(path, category, subcategory) if count == 0: print( f"{Fore.RED}Update actual info for {categoryPath} {Style.RESET_ALL}" ) continue multiplier = int(target // count) ctgRepeats = repeats * multiplier augmentCategoryWithRepeats( categoryPath=categoryPath, fullCategory=getFullCategory(category, subcategory), augmentPath=os.path.join(Path.dataset, augmentationName), augmentations=augmentations, extension=imageExtension, repeats=ctgRepeats, params=params)
def inspect(charm): tw = utils.TermWriter() manp = charm / ".composer.manifest" comp = charm / "composer.yaml" if not manp.exists() or not comp.exists(): return manifest = json.loads(manp.text()) composer = yaml.load(comp.open()) a, c, d = utils.delta_signatures(manp) # ordered list of layers used for legend layers = list(manifest["layers"]) def get_depth(e): rel = e.relpath(charm) depth = len(rel.splitall()) - 2 return rel, depth def get_suffix(rel): suffix = "" if rel in a: suffix = "+" elif rel in c: suffix = "*" return suffix def get_color(rel): # name of layer this belongs to color = tw.term.normal if rel in manifest["signatures"]: layer = manifest["signatures"][rel][0] layer_key = layers.index(layer) color = getattr(tw, theme.get(layer_key, "normal")) else: if entry.isdir(): color = tw.blue return color tw.write("Inspect %s\n" % composer["is"]) for layer in layers: tw.write( "# {color}{layer}{t.normal}\n", color=getattr(tw, theme.get(layers.index(layer), "normal")), layer=layer ) tw.write("\n") tw.write("{t.blue}{target}{t.normal}\n", target=charm) ignorer = utils.ignore_matcher(config.DEFAULT_IGNORES) walk = sorted(utils.walk(charm, get_depth), key=lambda x: x[1][0]) for i in range(len(walk) - 1): entry, (rel, depth) = walk[i] nEnt, (nrel, ndepth) = walk[i + 1] if not ignorer(rel): continue tw.write( "{prefix}{layerColor}{entry} " "{t.bold}{suffix}{t.normal}\n", prefix=get_prefix(walk, i, depth, ndepth), layerColor=get_color(rel), suffix=get_suffix(rel), entry=rel.name, )
'coffee_rings': 0, 'distort': False, 'scribble': False, } for k, v in defaults.items(): if cfg.get(k) is None: cfg[k] = v cfg['outfile'] = args.out # Gather files to work on, then go and do them. if os.path.isfile(target): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) Notice.hr_header("Done") elif os.path.isdir(target): if args.recursive: Notice.info("Looking for SEGY files in {} and its subdirectories".format(target)) for target in utils.walk(target, "\\.se?gy$"): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) else: Notice.info("Finding SEGY files in {}".format(target)) for target in utils.listdir(target, "\\.se?gy$"): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) Notice.hr_header("Done") else: Notice.fail("Not a file or directory.")
def sgy2shp(input_dir, output_dir, convert=False): """ Extracts trace location from SEGY files and saves it in a shape file. A shape file is generated for each SEGY file. Returns nothing, side effect: writes the shape files. Args: input_dir (str): Directory containing SEGY files. output_dir (str): Directory to save shape files. """ line_out_file = os.path.join(output_dir, "seismic_lines.shp") if os.path.exists(line_out_file): raise ShapeFileExists # Set up the shapefile schema. line_schema = {'geometry': 'LineString', 'properties': {'segyfile': 'str', 'line': 'str' } } with fiona.open(line_out_file, "w", driver="ESRI Shapefile", crs=crs.from_epsg(26920), schema=line_schema) as line_out: for path in utils.walk(input_dir, "\\.se?gy$"): filebase = os.path.splitext(os.path.basename(path))[0] # Read in the headers. segy = obspy.read(path, headonly=True, unpack_trace_header=True) points = [] point_out_file = os.path.join(output_dir, "." + filebase + '.shp') # Set up the shapefile schema. point_schema = {'geometry': 'Point', 'properties': {'line': 'str', 'segyfile': 'str', 'trace': 'int' } } with fiona.open(point_out_file, "w", driver="ESRI Shapefile", crs=crs.from_epsg(26920), schema=point_schema) as trace_out: for i, trace in enumerate(segy): header = trace.stats.segy.trace_header scalar = header.scalar_to_be_applied_to_all_coordinates if scalar == -100: gain = 0.01 elif scalar == -10: gain = 0.1 else: gain = 1.0 x = float(header.source_coordinate_x) * gain y = float(header.source_coordinate_y) * gain # Sanity check for geometry order of magnitude. if x > 9e5 or y > 55e6: if x > 9e6 or y > 55e7: log.info('Found weird coords: dividing by 100') x = x / 100.0 y = y / 100.0 else: log.info('Found weird coords: dividing by 10') x = x / 10.0 y = y / 10.0 else: pass if convert: log.info("Converting from NAD27 to NAD83") x, y = pp.transform(utm_nad27, utm_nad83, x, y) p = Point(x, y) points.append(p) trace_out.write({'geometry': mapping(p), 'properties': {'line': filebase, 'segyfile': path, 'trace': i} }) # We need this to plot seismic lines on the map. linestring = LineString(points) line_out.write({'geometry': mapping(linestring), 'properties': {'segyfile': path, 'line': filebase} })
links = {} content = '' with open(path, 'r') as input: for line in input: # See if this line is a footnote link. m = re.search(' \[([0-9]+)\]: (http://[\S]+)(?: \(([^\)]+)\))?$', line) if m: # Yup, key it's number. num, link, desc = m.groups('') links[num] = (link, desc) else: # Not a link line, so add it content += line # Now replace all of the footnoted links with inline ones. for num, pair in links.iteritems(): def replace_link(m): if pair[1] != '': return '[%s](%s "%s")' % (m.group(1), pair[0], pair[1]) else: return '[%s](%s)' % (m.group(1), pair[0]) pattern = '\[([^\]]+)\]\[%s\]' % (num) content = re.sub(pattern, replace_link, content) # Save the file back out with open(path, 'w') as output: output.write(content) utils.walk('posts', fix_file)
if last_length > 0: print ("\r" + (" " * last_length) + "\r"), print text, sys.stdout.flush() last_length = len(text) # clean out the output directory utils.kill_dir('html') # copy over the static content shutil.copytree('static', 'html') # load the template page post_template = open('templates/post.html', 'r').read() def strip_newline(line): return line.rstrip() utils.walk('posts', read_post, '.md') years = get_years(posts) tags = get_tags(posts) i = 1 for post in posts: post.output(years, tags) write_line('%s/%s' % (i, len(posts))) i += 1 write_line('Processed %s posts.' % (len(posts),))
# Parses the titles and permalinks from markdown. import utils import re def fix_file(path): with open(path, 'r') as input: content = input.read() # Parse the title and link m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content) # Add the title result = 'title = ' + m.group(1) + '\n' perma = utils.linkify(m.group(1)) if perma != m.group(2): result = result + 'permalink = ' + m.group(2) + '\n' print path print perma print m.group(2) print '---' result = result + content # Save the file back out with open(path, 'w') as output: output.write(result) utils.walk('posts', fix_file, '.md')
# Renames the post files to Jekyll-style. import os.path import utils import re # http://journal.stuffwithstuff.com/2008/02/09/ def fix_file(path): with open(path, 'r') as input: print path m = re.search('(.*)\.markdown', os.path.basename(path)) date = m.group(1) content = input.read() m = re.search('title: "(.*)"', content) perm = utils.linkify(m.group(1)) # Save the file back out with open('new/%s-%s.md' % (date, perm), 'w') as output: output.write(content) utils.ensure_dir('new') utils.walk('_posts', fix_file, '.markdown')
# Removes markdown titles. import utils import re def fix_file(path): with open(path, "r") as input: content = input.read() # Parse the title and link content = re.sub( "\n+\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/\)\n+", "\n", content ) # Save the file back out with open(path, "w") as output: output.write(content) utils.walk("posts", fix_file, ".md")
# See if this line ends the code block. if line.startswith(indent): content += empty_lines empty_lines = "" content += line[len(indent) :] else: content += "{% endhighlight %}\n" content += empty_lines empty_lines = "" content += line indent = None language = None else: content += empty_lines empty_lines = "" # See if this line starts a code block. m = re.search("(( )+):::(.*)\n$", line) if m: indent = m.group(1) language = m.group(3) content += "{% highlight " + language + " %}\n" else: content += line # Save the file back out with open(path, "w") as output: output.write(content) utils.walk("_posts", fix_file)