def visit(self, node): if type(node) in (ast.ClassDef, ast.FunctionDef): self.stack.append(node.name) # print(self.stack) oname = ".".join(self.stack) if oname in self.config["skip"]: print("SKIPPPING", oname) self.stack.pop() return super().visit(node) self.stack.pop() elif type(node) in ( ast.Load, ast.Name, ast.Call, ast.Compare, ast.Attribute, ast.Expr, ast.arguments, ast.Import, ast.alias, ast.Constant, ast.Store, ast.Assign, ast.arg, ): super().visit(node) else: # print(type(node)) super().visit(node)
def gen_main(names, infer, exec_): """ main entry point """ import toml import os conffile = Path("~/.papyri/papyri.toml").expanduser() conf = toml.loads(conffile.read_text()) global_conffile = Path("~/.papyri/config.toml").expanduser() global_conf = toml.loads(global_conffile.read_text()) # tp = global_conf.get("global", {}).get("target_path", ".") tp = os.path.expanduser("~/.papyri/data") target_dir = Path(tp).expanduser() if not target_dir.exists(): target_dir.mkdir(parents=True, exist_ok=True) print(target_dir) g = Gen() g.do_one_mod(names, infer, exec_, conf) p = target_dir / (g.root + "_" + g.version) p.mkdir(exist_ok=True) g.clean(p) g.write(p)
def do_docs(self, path): """ Crawl the filesystem for all docs/rst files """ print("scaraping documentation") for p in path.glob("**/*"): if p.is_file(): parts = p.relative_to(path).parts if parts[-1].endswith("rst"): data = tsparse(p.read_bytes()) blob = DocBlob() blob.arbitrary = data blob.content = {} blob.ordered_sections = [] blob.item_file = None blob.item_line = None blob.item_type = None blob.aliases = [] blob.example_section_data = Section() blob.see_also = [] blob.signature = None blob.references = None blob.refs = [] self.docs[parts] = json.dumps(blob.to_json(), indent=2) else: pass
def math(s): assert isinstance(s, list) for x in s: assert isinstance(x, str) res = [c.convert(_) for _ in s] print(res) return res
def visit(self, obj): pp = False if self.stack == ["scipy", "fft", "set_workers"]: pp = True try: qa = full_qual(obj) except Exception as e: raise RuntimeError(f"error visiting {'.'.join(self.stack)}") from e if not qa: return if not qa.startswith(self.root.__name__): return if obj in self.obj.values(): fq = [k for k, v in self.obj.items() if obj is v][0] sn = ".".join(self.stack) if fq != sn: self.aliases[qa].append(sn) if pp: print("SKIP", obj, fq, qa) return if (qa in self.obj) and self.obj[qa] != obj: pass self.obj[qa] = obj if (sn := ".".join(self.stack)) != qa: self.aliases[qa].append(sn)
def __init__(self, lines=None, wh=None, ind=None): if None in (lines, wh, ind): return self.lines = lines self.wh = wh self.ind = ind # numpy doc bug l = lines[0]._line if l.startswith("..version"): lines[0]._line = ".. " + l[2:] # end numpy doc bug # scipy bug.... if lines[0].startswith("..Deprecated"): lines[0]._line = ".. deprecated:: 1.5.0" # end scipy bug. # assert lines[0].startswith(".. "), lines l0 = lines[0] pred, *postd = l0.split("::") # assert pred.startswith(".. ") self.directive_name = pred[3:].strip() if pred.startswith(".. |"): # TODO: print("replacement not implemented yet") elif " " in self.directive_name: assert False, repr(pred) self.args0 = postd if self.ind: self.inner = Paragraph.parse_lines( [x.text for x in self.ind.dedented()]) else: self.inner = None
def format_RRY(cls, name, ps): out = name + "\n" out += "-" * len(name) + "\n" if name == "Returns": if len(ps) > 1: # do heuristic to check we actually have a description list and not a paragraph pass for i, p in enumerate(ps): # if i: # out += "\n" if p.type and re.match("\w+:`", p.type): print( "Warning numpydoc may have misparsed this section.", p.name, p.type ) if p.name and p.type: out += f"""{p.name.strip()} : {p.type.strip()}\n""" elif p.name: out += f"""{p.name.strip()}\n""" else: out += f"""{p.type.strip()}\n""" if p.desc: out += indent("\n".join(p.desc), " ") out += "\n" return out
def parse_examples(self, lines, indent=4): # this is bad practice be we do want normalisation here for now # to check that parse->format->parse is idempotent. # this can be done if we had a separate "normalize" step. global BLACK_REFORMAT if BLACK_REFORMAT: try: lines = reformat_example_lines(lines, indent=indent) except Exception: print("black failed") print("\n".join(lines)) raise return lines
def serve(): app = QuartTrio(__name__) store = Store(str(ingest_dir)) gstore = GraphStore(ingest_dir) async def full(package, version, sub, ref): return await _route(ref, store, version, gstore=gstore) async def full_gallery(module, version): return await gallery(module, store, version, gstore=gstore) async def g(module): return await gallery(module, store, gstore=gstore) async def gr(): return await gallery("*", store, gstore=gstore) async def index(): import papyri v = str(papyri.__version__) return redirect(f"/p/papyri/{v}/api/papyri") async def ex(module, version, subpath): return await examples(module=module, store=store, version=version, subpath=subpath) # return await _route(ref, GHStore(Path('.'))) app.route("/logo.png")(logo) app.route("/favicon.ico")(static("favicon.ico")) # sub here is likely incorrect app.route("/p/<package>/<version>/img/<path:subpath>")(img) app.route("/p/<module>/<version>/examples/<path:subpath>")(ex) app.route("/p/<module>/<version>/gallery")(full_gallery) app.route("/p/<package>/<version>/<sub>/<ref>")(full) app.route("/gallery/")(gr) app.route("/gallery/<module>")(g) app.route("/")(index) port = os.environ.get("PORT", 5000) print("Seen config port ", port) prod = os.environ.get("PROD", None) if prod: app.run(port=port, host="0.0.0.0") else: app.run(port=port)
def load_one( bytes_: bytes, bytes2_: bytes, known_refs: FrozenSet[RefInfo] = None, strict=False ) -> IngestedBlobs: data = json.loads(bytes_) assert "backrefs" not in data # OK to mutate we are the only owners and don't return it. data["backrefs"] = json.loads(bytes2_) if bytes2_ else [] blob = IngestedBlobs.from_json(data) # TODO move that one up. if known_refs is None: known_refs = frozenset() if not strict: targets = blob.process(known_refs=known_refs, aliases=None) if targets: print("OA", len(targets)) return blob
def normalize(self): """ Apply a bunch of heuristic that try to normalise the data. """ if (params := self["Parameters"]) : for i, p in enumerate(params): if not p.type and (":" in p.name) and not p.name.endswith(":"): if p.name.startswith(".."): continue if re.match(":\w+:`", p.name): print("may have a directive", p.name) try: name, type_ = [ _.strip() for _ in p.name.split(": ", maxsplit=1) ] except Exception as e: raise type(e)(p.name) params[i] = nds.Parameter(name, type_, p[2])
def gen_main(names, infer, exec_): """ main entry point """ import toml import os conffile = Path("~/.papyri/papyri.toml").expanduser() if conffile.exists(): conf = toml.loads(conffile.read_text()) else: print( "Per library configuration not implmented yet, you may want to symlink" " ./papyri.toml at the root of the papyri repo to ~/.papyri/papyri.toml " ) conf = {} global_conffile = Path("~/.papyri/config.toml").expanduser() if conffile.exists(): global_conf = toml.loads(global_conffile.read_text()) else: global_conf = {} # tp = global_conf.get("global", {}).get("target_path", ".") tp = os.path.expanduser("~/.papyri/data") target_dir = Path(tp).expanduser() if not target_dir.exists(): target_dir.mkdir(parents=True, exist_ok=True) print(target_dir) g = Gen() g.do_one_mod(names, infer, exec_, conf) docs_path: str = conf.get(names[0]).get("docs_path", None) if docs_path is not None: path = Path(docs_path).expanduser() g.do_docs(path) p = target_dir / (g.root + "_" + g.version) p.mkdir(exist_ok=True) g.clean(p) g.write(p)
def reformat_example_lines(ex, indent=4): from there import print oo = [] # print(ex) try: blocks = splitblank(ex) for block in blocks: # print(block) codes = splitcode(block) for (in_, out) in codes: oo.extend(insert_promt(reformat(in_, indent=4))) if out: oo.extend(out) oo.append("") return oo[:-1] except Exception: print(block) import sys raise
async def _route(ref, store, version=None, env=None, template=None, gstore=None): assert not ref.endswith(".html") if env is None: env = Environment( loader=FileSystemLoader(os.path.dirname(__file__)), autoescape=select_autoescape(["html", "tpl.j2"]), undefined=StrictUndefined, ) env.globals["len"] = len env.globals["url"] = url env.globals["unreachable"] = unreachable # env.globals["unreachable"] = lambda *x: "UNREACHABLELLLLL" + str(x) if template is None: template = env.get_template("core.tpl.j2") if ref == "": # root = "*" # print("GLOB", f"{root}/*/papyri.json") ref = "papyri" import papyri version = papyri.__version__ root = ref.split(".")[0] # papp_files = store.glob(f"{root}/*/papyri.json") # TODO: deal with versions # for p in papp_files: # aliases = json.loads(await p.read_text()) known_refs, ref_map = find_all_refs(store) x_, y_ = find_all_refs(gstore) assert x_ == known_refs assert y_ == ref_map assert version is not None siblings = compute_siblings_II(ref, known_refs) # print(siblings) # End computing siblings. if version is not None: file_ = store / root / version / "module" / f"{ref}" else: assert False # files = list((store / root).glob(f"*/module/{ge(ref)}")) if await file_.exists(): # The reference we are trying to view exists; # we will now just render it. # bytes_ = await file_.read_text() key = (root, version, "module", ref) gbytes = gstore.get((root, version, "module", ref)).decode() # assert len(gbytes) == len(bytes_), (len(gbytes), len(bytes_)) # assert gbytes == bytes_, (gbytes[:10], bytes_[:10]) assert root is not None # assert version is not None brpath = store / root / version / "module" / f"{ref}.br" print(brpath) if await brpath.exists(): br = await brpath.read_text() # TODO: update to new way of getting backrefs. br = None else: br = None gbr_data = gstore.get_backref(key) gbr_bytes = json.dumps([RefInfo(*x).to_json() for x in gbr_data]).encode() # print("bytes_", bytes_[:40], "...") doc_blob = load_one(gbytes, gbr_bytes, known_refs=known_refs, strict=True) data = compute_graph(gstore, doc_blob, (root, version, "module", ref)) json_str = json.dumps(data) parts_links = {} acc = "" for k in siblings.keys(): acc += k parts_links[k] = acc acc += "." css_data = HtmlFormatter(style="pastie").get_style_defs(".highlight") return render_one( template=template, doc=doc_blob, qa=ref, ext="", parts=siblings, parts_links=parts_links, backrefs=doc_blob.backrefs, pygment_css=css_data, graph=json_str, ) else: # The reference we are trying to render does not exists # just try to have a nice error page and try to find local reference and # use the phantom file to list the backreferences to this. # it migt be a page, or a module we do not have documentation about. r = ref.split(".")[0] this_module_known_refs = [ str(s.name) for s in store.glob(f"{r}/*/module/{ref}") if not s.name.endswith(".br") ] x2 = [x.path for x in gstore.glob((r, None, "module", ref))] assert set(x2) == set(this_module_known_refs), ( set(x2) - set(this_module_known_refs), (set(this_module_known_refs) - set(x2)), ) brpath = store / "__phantom__" / f"{ref}.json" if await brpath.exists(): br = json.loads(await brpath.read_text()) else: br = [] # compute a tree from all the references we have to have a nice browsing # interfaces. tree = {} for f in this_module_known_refs: sub = tree parts = f.split(".")[len(ref.split(".")):] for i, part in enumerate(parts): if part not in sub: sub[part] = {} sub = sub[part] sub["__link__"] = f error = env.get_template("404.tpl.j2") return error.render(backrefs=list(set(br)), tree=tree, ref=ref, module=root)
async def main(ascii, html, dry_run): gstore = GraphStore(ingest_dir, {}) store = Store(ingest_dir) files = store.glob("*/*/module/*.json") gfiles = list(gstore.glob((None, None, "module", None))) css_data = HtmlFormatter(style="pastie").get_style_defs(".highlight") env = Environment( loader=FileSystemLoader(os.path.dirname(__file__)), autoescape=select_autoescape(["html", "tpl.j2"]), undefined=StrictUndefined, ) env.globals["len"] = len env.globals["unreachable"] = unreachable env.globals["url"] = url template = env.get_template("core.tpl.j2") if dry_run: output_dir = None else: output_dir = html_dir / "p" output_dir.mkdir(exist_ok=True) document: Store x_, y_ = find_all_refs(store) known_refs, ref_map = find_all_refs(gstore) assert x_ == known_refs assert y_ == ref_map # end family = frozenset(_.path for _ in known_refs) tree = make_tree(family) print("going to erase", html_dir) # input("press enter to continue...") shutil.rmtree(html_dir) random.shuffle(files) random.shuffle(gfiles) # Gallery mv2 = gstore.glob((None, None)) for _, (module, version) in progress(set(mv2), description="Rendering galleries..."): # version, module = item.path.name, item.path.parent.name data = await gallery(module, store, version, ext=".html", gstore=gstore) (output_dir / module / version / "gallery").mkdir(parents=True, exist_ok=True) with (output_dir / module / version / "gallery" / "index.html").open("w") as f: f.write(data) for p, key in progress(gfiles, description="Rendering..."): module, v = key.module, key.version if ascii: qa = key.path await _ascii_render(qa, store=gstore, version=v) if html: doc_blob, qa, siblings, parts_links = await loc( key, store=gstore, tree=tree, known_refs=known_refs, ref_map=ref_map, ) data = compute_graph(gstore, doc_blob, key) json_str = json.dumps(data) data = render_one( template=template, doc=doc_blob, qa=qa, ext=".html", parts=siblings, parts_links=parts_links, backrefs=doc_blob.backrefs, pygment_css=css_data, graph=json_str, ) if not dry_run: (output_dir / module / v / "api").mkdir(parents=True, exist_ok=True) with (output_dir / module / v / "api" / f"{qa}.html").open("w") as f: f.write(data) import papyri key = Key("papyri", str(papyri.__version__), "module", "papyri") module, v = "papyri", str(papyri.__version__) if html: doc_blob, qa, siblings, parts_links = await loc( key, store=gstore, tree=tree, known_refs=known_refs, ref_map=ref_map, ) data = render_one( template=template, doc=doc_blob, qa=qa, ext=".html", parts=siblings, parts_links=parts_links, backrefs=doc_blob.backrefs, pygment_css=css_data, ) if not dry_run: with (html_dir / "index.html").open("w") as f: f.write(data) if not dry_run: assets_2 = gstore.glob((None, None, "assets", None)) for _, asset in progress(assets_2, description="Copying assets"): b = html_dir / "p" / asset.module / asset.version / "img" b.mkdir(parents=True, exist_ok=True) data = gstore.get(asset) (b / asset.path).write_bytes(data)
def get_example_data(doc, infer=True, obj=None, exec_=True, qa=None, config=None): """Extract example section data from a NumpyDocstring One of the section in numpydoc is "examples" that usually consist of number if paragraph, interleaved with examples starting with >>> and ..., This attempt to parse this into structured data, with text, input and output as well as to infer the types of each token in the input examples. This is currently relatively limited as the inference does not work across code blocks. Parameters ---------- doc a docstring parsed into a NnumpyDoc document. infer : bool whether to run type inference; which can be time consuming. """ assert qa is not None if not config: config = {} blocks = list(map(splitcode, splitblank(doc["Examples"]))) example_section_data = Section() import matplotlib.pyplot as plt from matplotlib import _pylab_helpers acc = "" import numpy as np counter = 0 ns = {"np": np, "plt": plt, obj.__name__: obj} executor = BlockExecutor(ns) figs = [] fig_managers = _pylab_helpers.Gcf.get_all_fig_managers() assert (len(fig_managers)) == 0, f"init fail in {qa} {len(fig_managers)}" wait_for_show = config.get("wait_for_plt_show", True) with executor: for b in blocks: for item in b: if isinstance(item, InOut): script = "\n".join(item.in_) figname = None ce_status = "None" try: compile(script, "<>", "exec") ce_status = "compiled" except SyntaxError: ce_status = "syntax_error" pass raise_in_fig = "?" did_except = False if exec_: try: if not wait_for_show: assert len(fig_managers) == 0 try: res, fig_managers = executor.exec(script) ce_status = "execed" except Exception: ce_status = "exception_in_exec" if config.get("exec_failure", "") != "fallback": raise if fig_managers and (("plt.show" in script) or not wait_for_show): raise_in_fig = True for fig in executor.get_figs(): counter += 1 figname = f"fig-{qa}-{counter}.png" figs.append((figname, fig)) plt.close("all") raise_in_fig = False except Exception: did_except = True print(f"exception executing... {qa}") fig_managers = _pylab_helpers.Gcf.get_all_fig_managers( ) if raise_in_fig: raise finally: if not wait_for_show: if fig_managers: plt.close("all") fig_managers = _pylab_helpers.Gcf.get_all_fig_managers( ) assert len(fig_managers) == 0, fig_managers + [ did_except, ] infer_exclude = config.get("exclude_jedi", frozenset()) if qa in infer_exclude: print("Turning off type inference for this function:", qa) inf = False else: inf = infer entries = list( parse_script(script, ns=ns, infer=inf, prev=acc, config=config)) acc += "\n" + script example_section_data.append( Code(entries, "\n".join(item.out), ce_status)) if figname: example_section_data.append(Fig(figname)) else: assert isinstance(item.out, list) example_section_data.append(Text("\n".join(item.out))) # TODO fix this if plt.close not called and still a ligering figure. fig_managers = _pylab_helpers.Gcf.get_all_fig_managers() if len(fig_managers) != 0: plt.close("all") return processed_example_data(example_section_data), figs
"plot", "seealso", "moduleauthor", "data", "WARNING", "currentmodule", "important", "code-block", "image", "rubric", "inheritance-diagram", "table", ]: # print("TODO:", block_directive.directive_name) return [block_directive] print(block_directive.directive_name, self.qa) return [block_directive] def _resolve(self, loc, text): assert isinstance(text, str) return resolve_(self.qa, self.known_refs, loc, text, rev_aliases=self.rev_aliases) def replace_Directive(self, directive: Directive): if (directive.domain, directive.role) == ("py", "func"): pass elif (directive.domain, directive.role) == (None, None) and directive.text in (
def post(self): if self.config.forward_staging_url: try: def fn(req, url): try: import requests headers = { k: req.headers[k] for k in ( "content-type", "User-Agent", "X-GitHub-Delivery", "X-GitHub-Event", "X-Hub-Signature", ) } req = requests.Request("POST", url, headers=headers, data=req.body) prepared = req.prepare() with requests.Session() as s: res = s.send(prepared) return res except Exception: import traceback traceback.print_exc() pool.submit(fn, self.request, self.config.forward_staging_url) except Exception: print(red + "failure to forward") import traceback traceback.print_exc() if "X-Hub-Signature" not in self.request.headers: add_event("attack", {"type": "no X-Hub-Signature"}) return self.error("WebHook not configured with secret") if not verify_signature( self.request.body, self.request.headers["X-Hub-Signature"], self.config.webhook_secret, ): add_event("attack", {"type": "wrong signature"}) return self.error( "Cannot validate GitHub payload with provided WebHook secret") payload = tornado.escape.json_decode(self.request.body) org = payload.get("repository", {}).get("owner", {}).get("login") if not org: org = payload.get("issue", {}).get("repository", {}).get("owner", {}).get("login") print("org in issue", org) if payload.get("action", None) in [ "edited", "assigned", "labeled", "opened", "created", "submitted", ]: add_event("ignore_org_missing", {"edited": "reason"}) else: if hasattr(self.config, "org_whitelist") and (org not in self.config.org_whitelist): add_event("post", {"reject_organisation": org}) sender = payload.get("sender", {}).get("login", {}) if hasattr(self.config, "user_blacklist") and (sender in self.config.user_blacklist): add_event("post", {"blocked_user": sender}) self.finish("Blocked user.") return action = payload.get("action", None) add_event("post", {"accepted_action": action}) unknown_repo = red + "<unknown repo>" + normal repo = payload.get("repository", {}).get("full_name", unknown_repo) if repo == unknown_repo: import there there.print(json.dumps(payload)) if payload.get("commits"): # TODO etype = self.request.headers.get("X-GitHub-Event") num = payload.get("size") ref = payload.get("ref") by = payload.get("pusher", {}).get("name") print( green + f"(https://github.com/{repo}) `{num}` commit(s) were pushed to `{ref}` by `{by}` – type: {etype}" ) self.finish("commits were pushed to {repo}") return if action: return self.dispatch_action(action, payload) else: event_type = self.request.headers.get("X-GitHub-Event") if event_type == "pull_request": return self.finish() if event_type in { "status", "fork", "deployment_status", "deployment", "delete", "push", "create", }: print( f"(https://github.com/{repo}) Not handling event type `{event_type}` yet." ) return self.finish() print(f"({repo}) No action available for the webhook :", event_type)
def ingest(self, path: Path, check: bool): gstore = self.gstore known_refs, _ = find_all_refs(gstore) nvisited_items = {} ### meta_path = path / "papyri.json" data = json.loads(meta_path.read_text()) version = data["version"] root = data["module"] logo = data.get("logo", None) # long : short aliases: Dict[str, str] = data.get("aliases", {}) rev_aliases = {v: k for k, v in aliases.items()} for _, fe in progress((path / "examples/").glob("*"), description=f"Reading {path.name} Examples"): s = Section.from_json(json.loads(fe.read_text())) gstore.put( (root, version, "examples", fe.name), json.dumps(s.to_json(), indent=2).encode(), [], ) for _, f1 in progress( (path / "module").glob("*"), description=f"Reading {path.name} doc bundle files ...", ): assert f1.name.endswith(".json") qa = f1.name[:-5] if check: rqa = normalise_ref(qa) if rqa != qa: # numpy weird thing print(f"skip {qa}") continue assert rqa == qa, f"{rqa} !+ {qa}" try: nvisited_items[qa] = load_one_uningested( f1.read_text(), None, qa=qa, known_refs=known_refs, aliases=aliases, ) assert hasattr(nvisited_items[qa], "arbitrary") except Exception as e: raise RuntimeError(f"error Reading to {f1}") from e known_refs_II = frozenset(nvisited_items.keys()) # TODO :in progress, crosslink needs version information. known_ref_info = frozenset( RefInfo(root, version, "module", qa) for qa in known_refs_II).union(known_refs) for _, (qa, doc_blob) in progress(nvisited_items.items(), description="Cross referencing"): refs = doc_blob.process(known_ref_info, verbose=False, aliases=aliases) doc_blob.logo = logo # todo: warning mutation. for sa in doc_blob.see_also: r = resolve_( qa, known_ref_info, frozenset(), sa.name.name, rev_aliases=rev_aliases, ) resolved, exists = r.path, r.kind if exists == "module": sa.name.exists = True sa.name.ref = resolved for _, f2 in progress( (path / "assets").glob("*"), description=f"Reading {path.name} image files ...", ): gstore.put((root, version, "assets", f2.name), f2.read_bytes(), []) gstore.put((root, version, "papyri.json"), json.dumps(aliases, indent=2).encode(), []) for _, (qa, doc_blob) in progress(nvisited_items.items(), description="Writing..."): # we might update other modules with backrefs for k, v in doc_blob.content.items(): assert isinstance( v, Section), f"section {k} is not a Section: {v!r}" mod_root = qa.split(".")[0] assert mod_root == root, f"{mod_root}, {root}" doc_blob.version = version assert hasattr(doc_blob, "arbitrary") js = doc_blob.to_json() del js["backrefs"] # TODO: FIX # when walking the tree of figure we can't properly crosslink # as we don't know the version number. # fix it at serialisation time. rr = [] for r in js["refs"]: if r["version"] == "??": r["version"] = version rr.append(r) js["refs"] = rr refs = [(b["module"], b["version"], b["kind"], b["path"]) for b in js.get("refs", [])] for r in refs: assert None not in r try: key = (mod_root, version, "module", qa) assert mod_root is not None assert version is not None assert None not in key gstore.put( key, json.dumps(js, indent=2).encode(), refs, ) except Exception as e: raise RuntimeError(f"error writing to {path}") from e
def main(): _config = ConfigParser() patterns = [] if Path("setup.cfg").exists(): _config.read("setup.cfg") patterns = [ SkipPattern(x.strip()) for x in _config.get("velin", "ignore_patterns", fallback="").split("\n") if x ] parser = argparse.ArgumentParser(description="reformat the docstrigns of some file") parser.add_argument( "paths", metavar="path", type=str, nargs="+", help="Files or folder to reformat", ) parser.add_argument( "--context", metavar="context", type=int, default=3, help="Number of context lines in the diff", ) parser.add_argument( "--unsafe", action="store_true", help="Lift some safety feature (don't fail if updating the docstring is not indempotent", ) parser.add_argument( "--check", action="store_true", help="Print the list of files/lines number and exit with a non-0 exit status, Use it for CI.", ) parser.add_argument( "--no-diff", action="store_false", dest="print_diff", help="Do not print the diff", ) parser.add_argument( "--black", action="store_true", dest="run_black", help="Do not run black on examples", ) parser.add_argument( "--with-placeholder", action="store_true", dest="with_placeholder", help="insert missing sections/parameters placehoders", ) parser.add_argument("--no-color", action="store_false", dest="do_highlight") parser.add_argument("--compact", action="store_true", help="Please ignore") parser.add_argument("--no-fail", action="store_false", dest="fail") parser.add_argument( "--space-in-see-also-title", action="store_true", dest="space_in_see_also_title" ) parser.add_argument( "--space-in-notes-title", action="store_true", dest="space_in_notes_title" ) parser.add_argument( "--no-fixers", action="store_false", dest="run_fixers", help="try to only reformat and does not run fixers heuristics", ) parser.add_argument( "--write", dest="write", action="store_true", help="Try to write the updated docstring to the files", ) parser.add_argument( "--verbose", action="store_true", help="increase the verbosity of the output", ) args = parser.parse_args() from types import SimpleNamespace config = Config( { "with_placeholder": args.with_placeholder, "compact_param": args.compact, "space_in_see_also_title": args.space_in_see_also_title, "space_in_notes_title": args.space_in_notes_title, "run_fixers": args.run_fixers, } ) global BLACK_REFORMAT if args.run_black: BLACK_REFORMAT = True else: BLACK_REFORMAT = False global print if args.verbose: try: from there import print except ImportError: pass to_format = [] for f in args.paths: p = Path(f) if p.is_dir(): for sf in p.glob("**/*.py"): to_format.append(sf) else: to_format.append(p) def to_skip(file, patterns): for p in patterns: if re.match(p.file, file): if p.obj_pattern is None: return True else: return False return False need_changes = [] for file in to_format: if to_skip(str(file), patterns): print("ignoring", file) continue try: with open(file) as f: data = f.read() except Exception as e: # continue continue raise RuntimeError(f"Fail reading {file}") from e obj_p = [p.obj_pattern for p in patterns if re.match(p.file, str(file))] new = reformat_file( data, file, args.compact, args.unsafe, fail=args.fail, config=config, obj_p=obj_p, ) # test(docstring, file) if new != data: need_changes.append(str(file)) dold = data.splitlines() dnew = new.splitlines() diffs = list( difflib.unified_diff( dold, dnew, n=args.context, fromfile=str(file), tofile=str(file) ), ) if args.print_diff and not args.write: code = "\n".join(diffs) if args.do_highlight: from pygments import highlight from pygments.formatters import TerminalFormatter from pygments.lexers import DiffLexer code = highlight(code, DiffLexer(), TerminalFormatter()) print(code) if args.write: with open(file, "w") as f: f.write(new) if args.check: if len(need_changes) != 0: sys.exit( "Some files/functions need updates:\n - " + "\n - ".join(need_changes) ) else: sys.exit(0)
def reformat_file(data, filename, compact, unsafe, fail=False, config=None, obj_p=None): """ Parameters ---------- compact : bool wether to use compact formatting data : <Insert Type here> <Multiline Description Here> unsafe : bool <Multiline Description Here> fail : <Insert Type here> <Multiline Description Here> config : <Insert Type here> <Multiline Description Here> filename : <Insert Type here> <Multiline Description Here> """ assert config is not None tree = ast.parse(data) new = data # funcs = [t for t in tree.body if isinstance(t, ast.FunctionDef)] funcs = NodeVisitor({"skip": obj_p}) funcs.visit(tree) funcs = funcs.items for i, (func, meta, qname) in enumerate(funcs[:]): # print(i, "==", func.name, "==") try: e0 = func.body[0] if not isinstance(e0, ast.Expr): continue # e0.value is _likely_ a Constant node. docstring = e0.value.s func_name = func.name except AttributeError: continue if not isinstance(docstring, str): continue start, nindent, stop = ( func.body[0].lineno, func.body[0].col_offset, func.body[0].end_lineno, ) # if not docstring in data: # print(f"skip {file}: {func.name}, can't do replacement yet") try: new_doc, d_, jump_to_loc = compute_new_doc( docstring, filename, level=nindent, compact=compact, meta=meta, func_name=func_name, config=config, ) if jump_to_loc: print("mvim", f"+{start}", filename) pass # call editor with file and line number elif not unsafe: _, d2, _ = compute_new_doc( docstring, filename, level=nindent, compact=compact, meta=meta, func_name=func_name, config=config, ) if not d2._parsed_data == d_._parsed_data: secs1 = { k: v for k, v in d2._parsed_data.items() if v != d_._parsed_data[k] } secs2 = { k: v for k, v in d_._parsed_data.items() if v != d2._parsed_data[k] } raise ValueError( "Numpydoc parsing seem to differ after reformatting, this may be a reformatting bug. Rerun with `velin --unsafe " + str(filename) + "`\n" + str(secs1) + "\n" + str(secs2), ) except Exception as e: print(f"something went wrong with {filename}:{qname} :\n\n{docstring}") if fail: raise continue if not docstring.strip(): print("DOCSTRING IS EMPTY !!!", func.name) # test(docstring, file) if new_doc.strip() and new_doc != docstring: # need_changes.append(str(filename) + f":{start}:{func.name}") if ('"""' in new_doc) or ("'''" in new_doc): # print( # "SKIPPING", filename, func.name, "triple quote not handled", new_doc # ) pass else: # if docstring not in new: # print("ESCAPE issue:", docstring) new = new.replace(docstring, new_doc) return new
def process(self, known_refs, aliases, verbose=True): local_refs = [] sections_ = [ "Parameters", "Returns", "Raises", "Yields", "Attributes", "Other Parameters", "Warns", ## "Warnings", "Methods", # "Summary", "Receives", # "Notes", # "Signature", #'Extended Summary', #'References' #'See Also' #'Examples' ] if self.refs is None: self.refs = [] for r in self.refs: assert None not in r if aliases is None: aliases = {} for s in sections_: local_refs = local_refs + [[u.strip() for u in x[0].split(",")] for x in self.content[s] if isinstance(x, Param)] def flat(l): return [y for x in l for y in x] local_refs = frozenset(flat(local_refs)) assert isinstance(known_refs, frozenset) visitor = DVR(self.qa, known_refs, local_refs, aliases) for section in ["Extended Summary", "Summary", "Notes"] + sections_: assert section in self.content self.content[section] = visitor.visit(self.content[section]) if (len(visitor.local) or len(visitor.total)) and verbose: # TODO: reenable assert len(visitor.local) == 0, f"{visitor.local} | {self.qa}" print(f"Newly found {len(visitor.total)} links in {self.qa}:") for a, b in visitor.total: print(" ", repr(a), "refers to", repr(b)) self.example_section_data = visitor.visit(self.example_section_data) self.arbitrary = [visitor.visit(s) for s in self.arbitrary] for d in self.see_also: new_desc = [] for dsc in d.descriptions: new_desc.append(visitor.visit(dsc)) d.descriptions = new_desc try: for r in visitor._targets: assert None not in r, r self.refs = list(set(visitor._targets).union(set(self.refs))) for r in self.refs: assert None not in r except Exception as e: raise type(e)(self.refs)
def compute_graph(gs, blob, key): # nodes_names = [b.path for b in blob.backrefs + blob.refs] + [key[3]] # nodes_names = [n for n in nodes_names if n.startswith('numpy')] weights = {} all_nodes = [tuple(x) for x in blob.backrefs + blob.refs] raw_edges = [] for k in blob.backrefs + blob.refs: name = tuple(k)[3] neighbors_refs = gs.get_backref(tuple(k)) weights[name] = len(neighbors_refs) orig = [x[3] for x in neighbors_refs] all_nodes.extend([tuple(x) for x in neighbors_refs]) for o in orig: raw_edges.append((k.path, o)) data = {"nodes": [], "links": []} if len(weights) > 50: for thresh in sorted(set(weights.values())): print(f"{len(weights)} items ; remove items {thresh} or lower") weights = {k: v for k, v in weights.items() if v > thresh} print(f"down to {len(weights)} items") if len(weights) < 50: break all_nodes = set(all_nodes) nums_ = set() edges = list(raw_edges) nodes = list(set(weights.keys())) for a, b in edges: if (a not in nodes) or (b not in nodes): continue nums_.add(a) nums_.add(b) nums = {x: i for i, x in enumerate(nodes, start=1)} for i, (from_, to) in enumerate(edges): if from_ == to: continue if from_ not in nodes: continue if to not in nodes: continue if key[3] in (to, from_): continue data["links"].append({ "source": nums[from_], "target": nums[to], "id": i }) x = nums.keys() for node in nodes: diam = 8 if node == key[3]: continue diam = 18 elif node in weights: import math diam = 8 + math.sqrt(weights[node]) candidates = [n for n in all_nodes if n[3] == node and "??" not in n] if not candidates: uu = None else: assert len(candidates) == 1, (candidates, node) uu = url(RefInfo(*candidates[0])) data["nodes"].append({ "id": nums[node], "val": diam, "label": node, "mod": ".".join(node.split(".")[0:1]), "url": uu, }) return data
def do_one_mod(self, names: List[str], infer: bool, exec_: bool, conf: dict): """ Crawl one modules and stores resulting docbundle in self.store. Parameters ---------- names : List[str] list of (sub)modules names to generate docbundle for. The first is considered the root module. infer : bool Wether to run type inference with jedi. exec_ : bool Wether to try to execute the code blocks and embed resulting values like plots. """ p = lambda: Progress( TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(bar_width=None), "[progress.percentage]{task.percentage:>3.1f}%", "[progress.completed]{task.completed} / {task.total}", TimeElapsedColumn(), ) # step one collect all the modules instances we want to analyse. modules = [] for name in names: x, *r = name.split(".") n0 = __import__(name) for sub in r: n0 = getattr(n0, sub) modules.append(n0) # step 2 try to guess the version number from the top module. version = getattr(modules[0], "__version__", "???") root = names[0].split(".")[0] module_conf = conf.get(root, {}) examples_folder = module_conf.get("examples_folder", None) print("EF", examples_folder) if examples_folder is not None: examples_folder = Path(examples_folder).expanduser() examples_data = self.collect_examples(examples_folder) for edoc, figs in examples_data: self.examples.update( {k: json.dumps(v.to_json()) for k, v in edoc.items()}) for name, data in figs: print("put one fig", name) self.put_raw(name, data) print("Configuration:", json.dumps(module_conf, indent=2)) self.root = root self.version = version subs = module_conf.get("submodules", []) extra_from_conf = [root + "." + s for s in subs] for name in extra_from_conf: x, *r = name.split(".") n0 = __import__(name) for sub in r: n0 = getattr(n0, sub) modules.append(n0) # print(modules) collector = DFSCollector(modules[0], modules[1:]) collected: Dict[str, Any] = collector.items() # collect all items we want to document. for qa, item in collected.items(): if (nqa := full_qual(item)) != qa: print("after import qa differs : {qa} -> {nqa}") if collected[nqa] == item: print("present twice") del collected[nqa] else: print("differs: {item} != {other}")
def replace_Directive(self, directive: Directive): if (directive.domain, directive.role) == ("py", "func"): pass elif (directive.domain, directive.role) == (None, None) and directive.text in ( # TODO: link to stdlib "None", "True", "False", ): return [Verbatim([directive.text])] elif (directive.domain is not None) or (directive.role not in (None, "mod", "class", "func", "meth", "any")): # TODO :many of these directive need to be implemented if directive.role == "math": m = Math(directive.value) return [m] if directive.role not in ( "attr", "meth", "doc", "ref", "func", "mod", "class", "term", "exc", "obj", "data", "sub", "program", "file", "command", "sup", "rc", # matplotlib ): print("TODO role:", directive.role) return [directive] loc: FrozenSet[str] if directive.role not in ["any", None]: loc = frozenset() else: loc = self.local_refs text = directive.text # TODO: wrong, there should not be any ` left that is likely a # verbatim vs directive parsing issue. text = text.strip("`") to_resolve = text if " <" in text and text.endswith(">"): try: text, to_resolve = text.split(" <") except Exception: assert False, directive.text assert to_resolve.endswith(">"), (text, to_resolve) to_resolve = to_resolve.rstrip(">") r = self._resolve(loc, to_resolve) # this is now likely incorrect as Ref kind should not be exists, # but things like "local", "api", "gallery..." ref, exists = r.path, r.kind if exists != "missing": if exists == "local": self.local.append(text) else: self.total.append((text, ref)) if r.kind != "local": assert None not in r, r self._targets.add(r) return [Link(text, r, exists, exists != "missing")] return [directive]
class Gen: def __init__(self): self.data = {} self.bdata = {} self.metadata = {} self.examples = {} def clean(self, where: Path): for _, path in progress((where / "module").glob("*.json"), description="cleaning previous bundle"): path.unlink() for _, path in progress((where / "assets").glob("*"), description="cleaning previous bundle"): path.unlink() if (where / "module").exists(): (where / "module").rmdir() if (where / "assets").exists(): (where / "assets").rmdir() if (where / "papyri.json").exists(): (where / "papyri.json").unlink() def write(self, where: Path): (where / "module").mkdir(exist_ok=True) for k, v in self.data.items(): with (where / "module" / k).open("w") as f: f.write(v) (where / "examples").mkdir(exist_ok=True) for k, v in self.examples.items(): with (where / "examples" / k).open("w") as f: f.write(v) assets = where / "assets" assets.mkdir() for k, v in self.bdata.items(): with (assets / k).open("wb") as f: f.write(v) with (where / "papyri.json").open("w") as f: f.write(json.dumps(self.metadata, indent=2)) def put(self, path: str, data): self.data[path + ".json"] = data def put_raw(self, path: str, data): self.bdata[path] = data def do_one_item(self, target_item: Any, ndoc, infer: bool, exec_: bool, qa: str, config=None) -> Tuple[DocBlob, List]: """ Get documentation information for one item Returns ------- Tuple of two items, ndoc: DocBundle with info for current object. figs: dict mapping figure names to figure data. """ if config is None: config = {} blob = DocBlob() blob.content = {k: v for k, v in ndoc._parsed_data.items()} item_file = None item_line = None item_type = None try: item_file = inspect.getfile(target_item) item_line = inspect.getsourcelines(target_item)[1] item_type = str(type(target_item)) except (AttributeError, TypeError): pass except OSError: pass if not blob.content["Signature"]: sig = None try: sig = str(inspect.signature(target_item)) sig = qa.split(".")[-1] + sig except (ValueError, TypeError): pass # mutate argument ! BAD blob.content["Signature"] = sig new_see_also = ndoc["See Also"] refs = [] if new_see_also: for line in new_see_also: rt, desc = line assert isinstance(desc, list), line for ref, type_ in rt: refs.append(ref) try: ndoc.example_section_data, figs = get_example_data(ndoc, infer, obj=target_item, exec_=exec_, qa=qa, config=config) ndoc.figs = figs except Exception as e: ndoc.example_section_data = Section() print("Error getting example data in ", qa) raise ValueError("Error getting example data in ", qa) from e ndoc.figs = [] ndoc.refs = list({ u[1] for span in ndoc.example_section_data if span.__class__.__name__ == "Code" for u in span.entries if u[1] }) blob.example_section_data = ndoc.example_section_data ndoc.refs.extend(refs) ndoc.refs = [normalise_ref(r) for r in sorted(set(ndoc.refs))] figs = ndoc.figs del ndoc.figs blob.ordered_sections = ndoc.ordered_sections blob.refs = ndoc.refs blob.item_file = item_file blob.item_line = item_line blob.item_type = item_type return blob, figs def collect_examples(self, folder): acc = [] examples = list(folder.glob("*.py")) for example in examples: executor = BlockExecutor({}) with executor: script = example.read_text() executor.exec(script) figs = [(f"ex-{example.name}-{i}.png", f) for i, f in enumerate(executor.get_figs())] entries = list(parse_script(script, ns={}, infer=True, prev="")) s = Section([Code(entries, "", "execed")] + [Fig(name) for name, _ in figs]) s = processed_example_data(s) acc.append(( { example.name: s }, figs, )) return acc def do_one_mod(self, names: List[str], infer: bool, exec_: bool, conf: dict): """ Crawl one modules and stores resulting docbundle in self.store. Parameters ---------- names : List[str] list of (sub)modules names to generate docbundle for. The first is considered the root module. infer : bool Wether to run type inference with jedi. exec_ : bool Wether to try to execute the code blocks and embed resulting values like plots. """ p = lambda: Progress( TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(bar_width=None), "[progress.percentage]{task.percentage:>3.1f}%", "[progress.completed]{task.completed} / {task.total}", TimeElapsedColumn(), ) # step one collect all the modules instances we want to analyse. modules = [] for name in names: x, *r = name.split(".") n0 = __import__(name) for sub in r: n0 = getattr(n0, sub) modules.append(n0) # step 2 try to guess the version number from the top module. version = getattr(modules[0], "__version__", "???") root = names[0].split(".")[0] module_conf = conf.get(root, {}) examples_folder = module_conf.get("examples_folder", None) print("EF", examples_folder) if examples_folder is not None: examples_folder = Path(examples_folder).expanduser() examples_data = self.collect_examples(examples_folder) for edoc, figs in examples_data: self.examples.update( {k: json.dumps(v.to_json()) for k, v in edoc.items()}) for name, data in figs: print("put one fig", name) self.put_raw(name, data) print("Configuration:", json.dumps(module_conf, indent=2)) self.root = root self.version = version subs = module_conf.get("submodules", []) extra_from_conf = [root + "." + s for s in subs] for name in extra_from_conf: x, *r = name.split(".") n0 = __import__(name) for sub in r: n0 = getattr(n0, sub) modules.append(n0) # print(modules) collector = DFSCollector(modules[0], modules[1:]) collected: Dict[str, Any] = collector.items() # collect all items we want to document. for qa, item in collected.items(): if (nqa := full_qual(item)) != qa: print("after import qa differs : {qa} -> {nqa}") if collected[nqa] == item: print("present twice") del collected[nqa] else: print("differs: {item} != {other}") for target in module_conf.get("exclude", []): print("exclude tgt:", target) del collected[target] # p = nullcontext with p() as p2: # just nice display of progression. taskp = p2.add_task(description="parsing", total=len(collected)) for qa, target_item in collected.items(): short_description = (qa[:19] + "..") if len(qa) > 21 else qa p2.update(taskp, description=short_description.ljust(17)) p2.advance(taskp) item_docstring = target_item.__doc__ # TODO: we may not want tosip items as they may have children # right now keep modules, but we may want to keep classes if # they have documented descendants. if item_docstring is None and not isinstance( target_item, ModuleType): continue elif item_docstring is None and isinstance( target_item, ModuleType): item_docstring = """This module has no documentation""" # progress.console.print(qa) try: if tsparse is None: print( "please see how to install Tree-sitter in the readme to parse complex RST documents" ) arbitrary = tsparse( dedent_but_first(item_docstring).encode()) except Exception as e: print(f"TS could not parse: {qa}") raise ValueError(f"from {qa}") from e arbitrary = [] # raise try: ndoc = NumpyDocString(dedent_but_first(item_docstring)) except Exception: if not isinstance(target_item, ModuleType): p2.console.print( "Unexpected error parsing", target_item, target_item.__name__, ) if isinstance(target_item, ModuleType): # from .take2 import main # main(item_docstring) ndoc = NumpyDocString( f"Was not able to parse docstring for {qa}") else: continue if not isinstance(target_item, ModuleType): arbitrary = [] execute_exclude_patterns = module_conf.get( "execute_exclude_patterns", None) ex = exec_ if execute_exclude_patterns and exec_: for pat in execute_exclude_patterns: if qa.startswith(pat): ex = False break # else: # print("will run", qa) try: doc_blob, figs = self.do_one_item(target_item, ndoc, infer, ex, qa, config=module_conf) doc_blob.arbitrary = arbitrary except Exception: raise if module_conf.get("exec_failure", None) == "fallback": print("Re-analysing ", qa, "without execution") # debug: doc_blob, figs = self.do_one_item(target_item, ndoc, infer, False, qa, config=module_conf) doc_blob.aliases = collector.aliases[qa] # processing.... doc_blob.signature = doc_blob.content.pop("Signature") try: for section in [ "Extended Summary", "Summary", "Notes", "Warnings" ]: if section in doc_blob.content: if data := doc_blob.content[section]: PX = P2(data) doc_blob.content[section] = Section(PX) else: doc_blob.content[section] = Section() except Exception as e: raise type(e)(f"during {qa}") doc_blob.references = doc_blob.content.pop("References") if isinstance(doc_blob.references, str): if doc_blob.references == "": doc_blob.references = None else: assert False doc_blob.references = list(doc_blob.references) assert (isinstance(doc_blob.references, list) or doc_blob.references is None) del doc_blob.content["Examples"] del doc_blob.content["index"] sections_ = [ "Parameters", "Returns", "Raises", "Yields", "Attributes", "Other Parameters", "Warns", ##"Warnings", "Methods", # "Summary", "Receives", ] from .take2 import Param # new_doc_blob._content["Parameters"] = [ # Parameter(a, b, c) # for (a, b, c) in new_doc_blob._content.get("Parameters", []) # ] for s in sections_: if s in doc_blob.content: assert isinstance(doc_blob.content[s], list), f"{s}, {doc_blob.content[s]} " new_content = Section() for param, type_, desc in doc_blob.content[s]: assert isinstance(desc, list) items = [] if desc: items = P2(desc) new_content.append(Param(param, type_, items)) doc_blob.content[s] = new_content doc_blob.see_also = [] if (see_also := doc_blob.content.get("See Also", None)): for nts, d0 in see_also: try: d = d0 for (name, type_or_description) in nts: if type_or_description and not d: desc = type_or_description if isinstance(desc, str): desc = [desc] assert isinstance(desc, list) desc = paragraphs(desc) type_ = None else: desc = d0 type_ = type_or_description assert isinstance(desc, list) desc = paragraphs(desc) sai = SeeAlsoItem(Ref(name, None, None), desc, type_) doc_blob.see_also.append(sai) del desc del type_ except Exception as e: raise ValueError( f"Error {qa}: {see_also=} | {nts=} | {d0=}" ) from e del doc_blob.content["See Also"] for k, v in doc_blob.content.items(): assert isinstance(v, Section), f"{k} is not a section {v}" # end processing self.put(qa, json.dumps(doc_blob.to_json(), indent=2)) for name, data in figs: self.put_raw(name, data) found = {} not_found = [] for k, v in collector.aliases.items(): if [item for item in v if item != k]: if shorter := find_cannonical(k, v): found[k] = shorter else: not_found.append((k, v))
def do_one_item(self, target_item: Any, ndoc, infer: bool, exec_: bool, qa: str, config=None) -> Tuple[DocBlob, List]: """ Get documentation information for one item Returns ------- Tuple of two items, ndoc: DocBundle with info for current object. figs: dict mapping figure names to figure data. """ if config is None: config = {} blob = DocBlob() blob.content = {k: v for k, v in ndoc._parsed_data.items()} item_file = None item_line = None item_type = None try: item_file = inspect.getfile(target_item) item_line = inspect.getsourcelines(target_item)[1] item_type = str(type(target_item)) except (AttributeError, TypeError): pass except OSError: pass if not blob.content["Signature"]: sig = None try: sig = str(inspect.signature(target_item)) sig = qa.split(".")[-1] + sig except (ValueError, TypeError): pass # mutate argument ! BAD blob.content["Signature"] = sig new_see_also = ndoc["See Also"] refs = [] if new_see_also: for line in new_see_also: rt, desc = line assert isinstance(desc, list), line for ref, type_ in rt: refs.append(ref) try: ndoc.example_section_data, figs = get_example_data(ndoc, infer, obj=target_item, exec_=exec_, qa=qa, config=config) ndoc.figs = figs except Exception as e: ndoc.example_section_data = Section() print("Error getting example data in ", qa) raise ValueError("Error getting example data in ", qa) from e ndoc.figs = [] ndoc.refs = list({ u[1] for span in ndoc.example_section_data if span.__class__.__name__ == "Code" for u in span.entries if u[1] }) blob.example_section_data = ndoc.example_section_data ndoc.refs.extend(refs) ndoc.refs = [normalise_ref(r) for r in sorted(set(ndoc.refs))] figs = ndoc.figs del ndoc.figs blob.ordered_sections = ndoc.ordered_sections blob.refs = ndoc.refs blob.item_file = item_file blob.item_line = item_line blob.item_type = item_type return blob, figs
doc = [x for pairs in doc for x in header_pass(pairs)] doc = header_level_pass(doc) doc = [x for pairs in doc for x in example_pass(pairs)] doc = [x for pairs in doc for x in block_directive_pass(pairs)] doc = deflist_pass(doc) doc = [x for pairs in doc for x in paragraphs_pass(pairs)] # TODO: third pass to set the header level for each header. # TODO: forth pass to make sections. # print(b) # print(ex) # for w in [80, 120]: # p = Paragraph.parse_lines(ex.split("\n")) # p.width = w # print(p) # print() return doc if __name__ == "__main__": if len(sys.argv) > 1: what = sys.argv[1] else: what = "numpy" ex = get_object(what).__doc__ ex = dedent_but_first(ex) doc = main(ex) for b in doc: print(b)
def parse_script(script, ns=None, infer=None, prev="", config=None): """ Parse a script into tokens and use Jedi to infer the fully qualified names of each token. Parameters ---------- script : str the script to tokenize and infer types on ns : dict extra namespace to use with jedi's Interpreter. infer : bool whether to run jedi type inference that can be quite time consuming. prev : str previous lines that lead to this. Yields ------ index index in the tokenstream type pygments token type text text of the token reference : str fully qualified name of the type of current token """ jeds = [] import warnings warnings.simplefilter("ignore", UserWarning) l_delta = len(prev.split("\n")) contextscript = prev + "\n" + script if ns: jeds.append(jedi.Interpreter(contextscript, namespaces=[ns])) jeds.append(jedi.Script(prev + "\n" + script)) P = PythonLexer() for index, type_, text in P.get_tokens_unprocessed(script): line_n, col_n = pos_to_nl(script, index) line_n += l_delta try: ref = None for jed in jeds: failed = "" try: if infer and (text not in (" .=()[],")) and text.isidentifier(): inf = jed.infer(line_n + 1, col_n) if inf: ref = inf[0].full_name # if ref.startswith('builtins'): # ref = '' else: ref = "" except (AttributeError, TypeError, Exception) as e: raise type( e )(f"{contextscript}, {line_n=}, {col_n=}, {prev=}, {jed=}" ) from e failed = "(jedi failed inference)" print("failed inference on ", script, ns, jed, col_n, line_n + 1) break except IndexError: raise ref = "" yield text + failed, ref warnings.simplefilter("default", UserWarning)
def parameter_fixer(params, meta_arg, meta, fname, func_name, config, doc): assert "Parameters" in doc jump_to_location = False if not config.run_fixers: return params, jump_to_location pnames = [o.strip() for p in params for o in p.name.split(",") if p.name] if meta_arg and meta_arg[0] in ["self", "cls"]: meta_arg = meta_arg[1:] doc_missing = set(meta_arg) - set(pnames) - {"cls"} doc_extra = {x for x in set(pnames) - set(meta_arg) if not x.startswith("*")} - { "cls", } for p in params: if p[1].startswith("<"): jump_to_location = True assert doc_extra != {""}, (set(a), set(meta_arg), params) # don't considert template parameter from numpy/scipy doc_extra = {x for x in doc_extra if not (("$" in x) or ("%" in x))} def rename_param(source, target): renamed = False for i, p in enumerate(params): if p.name == source: name = p.name params[i] = nds.Parameter(target, *p[1:]) renamed = True break return renamed if doc_missing and doc_extra: # we need to match them maybe: # are we missing *, ** in args, and kwargs ? for stars in ("*", "**"): n_star_missing = doc_missing.intersection({stars + k for k in doc_extra}) if n_star_missing: correct = list(n_star_missing)[0] incorrect = correct[len(stars) :] rename_param(incorrect, correct) doc_missing.remove(correct) doc_extra.remove(incorrect) for param in list(doc_extra): if ( param.startswith(('"', "'")) and param.endswith(('"', "'")) and param[1:-1] in doc_missing ): correct = param[1:-1] rename_param(param, correct) print("unquote", param, "to", correct) doc_missing.remove(correct) doc_extra.remove(param) if len(doc_missing) == len(doc_extra) == 1: correct = list(doc_missing)[0] incorrect = list(doc_extra)[0] do_rename = True if "*" in correct and ("*" not in incorrect): if correct.replace("*", "") != incorrect.replace("*", ""): # this is likely undocumented **kwargs. do_rename = False if do_rename: if rename_param(incorrect, correct): print(f"{fname}:{func_name}") print(f" renamed {incorrect!r} to {correct!r}") doc_missing = {} doc_extra = {} else: print(" could not fix:", doc_missing, doc_extra) if doc_missing and not doc_extra and config.with_placeholder: for param in doc_missing: if "*" in param: continue annotation_str = "<Insert Type here>" current_param = [m for m in meta["simple"] if m.arg == param] assert len(current_param) == 1, (current_param, meta, param) current_param = current_param[0] if type(current_param.annotation).__name__ == "Name": annotation_str = str(current_param.annotation.id) doc["Parameters"].append( nds.Parameter( param, f"{annotation_str}", [f"<Multiline Description Here>"], ) ) elif ( (not doc_missing) and doc_extra and ("Parameters" in doc) # always True and (not meta["varkwargs"]) ): print(f"{fname}:{func_name}") to_remove = [p for p in doc["Parameters"] if p[0] in doc_extra] for remove_me in to_remove: if " " in remove_me.name and not remove_me.type and not remove_me.desc: # this is likely some extra text continue print(" removing parameters", remove_me.name) params.remove(remove_me) elif doc_missing or doc_extra: print(f"{fname}:{func_name}") if doc_missing: print(" missing:", doc_missing) if doc_extra: print(" extra:", doc_extra) return params, jump_to_location