def timing(name=""): t = perf_counter() yield print("%s: %0.06f" % (name, perf_counter() - t))
def json(self, path, wcontext=None, ext=".json", conditional=True, postprocess=True, save_to_cache=True, extra_context=None, searcher=None, allow_redirect=False): store = self.store path = self._check_source(path) jsonpath = paths.basepath(path) + ext if wcontext is None: wcontext = self.wiki_context( path, conditional=conditional, save_to_cache=save_to_cache, searcher=searcher ) if extra_context: wcontext.update(extra_context) else: old_context = wcontext wcontext = wcontext.push({"path": path}) wcontext.searcher = searcher or old_context.searcher wcontext.pages = self # Set up holders for cached and parse times in the context; these may # be useful for debugging if "parse_time" not in wcontext: wcontext["parse_time"] = {} if "cached" not in wcontext: wcontext["cached"] = set() times = wcontext["parse_time"] cached = wcontext["cached"] jsondata = None # Try to get the JSON data from the cache if wcontext.get("conditional") and self.caching: jsondata = self.get_cached_json(path, jsonpath) if jsondata is not None: # print("From cache", path) # Add the file to the context's debug list of cached files cached.add(path) if jsondata is None: # It wasn't in the cache, so we'll have to load and parse it t = compat.perf_counter() # Load the content of the file source = store.content(path, "utf8") # Parse the wiki markup jsondata = parse_string(source) # Run preprocessers self.pre_pipe().apply(jsondata, wcontext) # Store the parsing time in the context for debugging times[path] = compat.perf_counter() - t # If we're caching, save the parsed JSON to a file in the cache. # Note that we don't add the json data to the mem cache here; we # only do that when a cached json file is loaded. This has the # effect of only caching a document in memory if it's been accessed # at least *twice*. if self.caching and save_to_cache: jsonified = json.dumps(jsondata) self.put_cache_file(jsonpath, jsonified.encode("utf8")) if postprocess: # Run postprocessors self.post_pipe().apply(jsondata, wcontext) attrs = jsondata.get("attrs") if allow_redirect and attrs and "redirect" in attrs: fullpath = self.full_path(path, attrs["redirect"]) raise Redirect(fullpath) return jsondata
def update(self, pages, prefix="", clean=False): if clean: schema = self.searchables.schema() self.index = index.create_in(self.indexdir, schema=schema) idx = self.index # self.logger.info("Indexing %s files to %s", # ("all" if clean else "changed"), self.indexdir) doccount = 0 pagecount = 0 t = compat.perf_counter() try: w = idx.writer() except index.LockError: raise LockError new, changed, deleted = self._find_files(pages, prefix, w.reader(), clean) didsomething = False if new or changed or deleted: if deleted: didsomething = True for delpath in sorted(changed | deleted): delpath = paths.basepath(delpath) self.logger.info("Deleting %s from index", delpath) # w.delete_unique("path", delpath) w.delete_by_query(query.Term("path", delpath)) w.delete_by_query(query.Prefix("path", delpath + "#")) for addpath in sorted(new | changed): addpath = paths.basepath(addpath) added = False if addpath in changed: self.logger.info("Removing %s from index", addpath) w.delete_by_query(query.Term("path", addpath)) w.delete_by_query(query.Prefix("path", addpath + "#")) didsomething = True for doc in self.documents(pages, addpath): self._sanitize_doc(doc) self.logger.debug("Indexing %s", doc["path"]) try: if clean or "#" in doc["path"]: w.add_document(**doc) else: w.update_document(**doc) except ValueError: self.logger.error("Error indexing %r", doc) raise added = True doccount += 1 if added: pagecount += 1 didsomething = True if didsomething: self.logger.info("Committing index changes") w.commit() self.logger.info("Indexed %d docs from %d pages in %.06f seconds", doccount, pagecount, compat.perf_counter() - t) else: # self.logger.info("No changes to commit") w.cancel() return didsomething