def test_walk_directory(siteroot: SiteRoot) -> None: file1 = siteroot.write_text(siteroot.contents / "dir1/file1", "") siteroot.write_text(siteroot.contents / "dir1/file1.props.yml", "name: value") file2 = siteroot.write_text(siteroot.contents / "dir1/dir2/file2", "") siteroot.write_text(siteroot.contents / "dir1/file3.bak", "") results = loader.walk_directory(siteroot.contents, set(["*.bak"])) all = sorted(results, key=lambda d: str(d.srcpath)) assert len(all) == 2 assert all[0] == ContentSrc( srcpath=str(file2), contentpath=( ( "dir1", "dir2", ), "file2", ), package="", metadata={}, mtime=all[0].mtime, ) assert all[1] == ContentSrc( srcpath=str(file1), contentpath=(("dir1", ), "file1"), package="", metadata={"name": "value"}, mtime=all[1].mtime, )
def split_cells( src: ContentSrc, cells: List[Dict[str, Any]]) -> List[Tuple[ContentSrc, List[Dict[str, Any]]]]: if not cells: return [(src, cells)] cell = cells[0] filename = get_cellfilename(cell) if not filename: return [(src, cells)] ret = [(src.copy()._replace(contentpath=(src.contentpath[0], filename)), [cell])] for cell in cells[1:]: filename = get_cellfilename(cell) if not filename: ret[-1][-1].append(cell) else: subsrc = src.copy()._replace(contentpath=(src.contentpath[0], filename)) ret.append((subsrc, [cell])) return ret
def yamlloader(site: site.Site, src: ContentSrc) -> Sequence[Tuple[ContentSrc, None]]: text = src.read_bytes() metadata = yaml.load(text, Loader=yaml.FullLoader) or {} if not isinstance(metadata, (dict, list, tuple)): logger.error(f"Error: {src.repr_filename()} is not valid YAML file.") if "type" not in metadata: metadata["type"] = "config" src.metadata.update(metadata) src.metadata["loader"] = "yaml" return [(src, None)]
def walk_package(package: str, path: str, ignores: Set[str]) -> Iterator[ContentSrc]: logger.info(f"Loading {package}/{path}") if not path.endswith("/"): path = path + "/" packagepath = importlib_resources.files(package) # type: ignore root = packagepath / path if not root.is_dir(): return for srcpath in _iter_package_files(root, ignores): destname = posixpath.relpath(srcpath, root) dirname, fname = posixpath.split(posixpath.relpath(srcpath, packagepath)) metadatapath = ( srcpath.parent / f"{srcpath.name}{miyadaiku.METADATA_FILE_SUFFIX}" ) if metadatapath.exists(): text = metadatapath.read_bytes() metadata = yaml.load(text, Loader=yaml.FullLoader) or {} else: metadata = {} yield ContentSrc( package=package, srcpath=str(posixpath.relpath(srcpath, packagepath)), metadata=metadata, contentpath=to_contentpath(destname), mtime=None, )
def loadfile( site: site.Site, src: ContentSrc, bin: bool, filecache: shelve.DbfilenameShelf ) -> List[Tuple[ContentSrc, Optional[bytes]]]: curstat = src.stat() key = f"{src.package}_::::_{src.srcpath}" stat, bodies = filecache.get(key, (None, None)) if stat: if stat == curstat: return cast(List[Tuple[ContentSrc, Optional[bytes]]], bodies) if not bin: assert src.srcpath ext = os.path.splitext(src.srcpath)[1] loader = FILELOADERS.get(ext, binloader) else: loader = binloader ret: List[Tuple[ContentSrc, Optional[bytes]]] = [] for contentsrc, body in loader(site, src): assert contentsrc.metadata["loader"] if isinstance(body, bytes): ret.append((contentsrc, body)) if isinstance(body, str): ret.append((contentsrc, body.encode("utf-8"))) else: ret.append((contentsrc, None)) filecache[key] = curstat, ret return ret
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]: if src.package: s = src.read_text() metadata, body = _load_string(s) else: assert src.srcpath metadata, body = _load_file(src.srcpath) src.metadata.update(metadata) return [(src, body)]
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]: s = src.read_text() ret = [] srces = parsesrc.splitsrc(src, s) for src, txt in srces: meta, html = _load_string(src, txt) src.metadata.update(meta) ret.append((src, html)) return ret
def test_walkpackage() -> None: results = loader.walk_package("package1", "contents", {"*.bak", ".*"}) all = sorted(results, key=lambda d: str(d.srcpath)) assert len(all) == 7 assert all[0] == ContentSrc( package="package1", srcpath="contents/dir1/a", contentpath=(("dir1", ), "a"), metadata={"test": "value"}, mtime=all[0].mtime, )
def add_bytes(self, type: str, path: str, body: bytes) -> Content: cpath = to_contentpath(path) contentsrc = ContentSrc( package=None, srcpath=None, metadata={"type": type}, contentpath=cpath, mtime=0, ) content = contents.build_content(contentsrc, body) self._contentfiles[content.src.contentpath] = content return content
def test_package(siteroot: SiteRoot) -> None: site = siteroot.load({}, {}) ipynb.init(site) contentsrc = ContentSrc( package="pkg_ipynb", srcpath="files/test.ipynb", metadata={}, contentpath=((), "test.html"), mtime=0, ) ((src, text), ) = ipynb.load(contentsrc) assert src.metadata["type"] == "article" print(text)
def test_split(siteroot: SiteRoot) -> None: site = siteroot.load({}, {}) ipynb.init(site) contentsrc = ContentSrc( package=None, srcpath=str(DIR / "test_splitsrc.ipynb"), metadata={}, contentpath=((), "test_splitsrc.ipynb"), mtime=0, ) ( (src1, text1), (src2, text2), ) = ipynb.load(contentsrc) assert src1.contentpath == ((), "file1") soup = BeautifulSoup(text1, "html.parser") print(soup.text) assert "%%%" not in soup.text assert "1+1" in soup.text assert "2+2" in soup.text assert "meta" not in soup.text assert "test1" in soup.text assert src1.metadata == { "type": "article", "meta": "value1", "has_jinja": True, "loader": "ipynb", } assert src2.contentpath == ((), "file2") soup = BeautifulSoup(text2, "html.parser") print(soup.text) assert "%%%" not in soup.text assert "3+3" in soup.text assert "meta" not in soup.text assert src2.metadata == { "type": "article", "meta": "value2", "has_jinja": True, "loader": "ipynb", }
def test_load(siteroot: SiteRoot) -> None: site = siteroot.load({}, {}) ipynb.init(site) contentsrc = ContentSrc( package=None, srcpath=str(DIR / "test.ipynb"), metadata={}, contentpath=((), "test.html"), mtime=0, ) ((src, text), ) = ipynb.load(contentsrc) assert src.metadata["type"] == "article" assert "{{ 1+1 }}" in text assert "<p>{{ 1+1 }}</p>" not in text assert "hidden cell" not in text
def walk_directory(path: Path, ignores: Set[str]) -> Iterator[ContentSrc]: logger.info(f"Loading {path}") path = path.expanduser().resolve() if not path.is_dir(): return for root, dirs, files in os.walk(path): rootpath = Path(root) if rootpath.stem.startswith("."): continue dirs[:] = (dirname for dirname in dirs if not is_ignored(ignores, dirname)) filenames = ( filename for filename in files if not is_ignored(ignores, filename) ) for name in filenames: filename = (rootpath / name).resolve() dirname, fname = os.path.split(filename) metadatafile = os.path.join( dirname, f"{fname}{miyadaiku.METADATA_FILE_SUFFIX}" ) metadata: Dict[Any, Any] = {} if os.path.isfile(metadatafile): text = open(metadatafile, encoding=miyadaiku.YAML_ENCODING).read() metadata = yaml.load(text, Loader=yaml.FullLoader) or {} mtime = filename.stat().st_mtime yield ContentSrc( package="", srcpath=str(filename), metadata=metadata, contentpath=to_contentpath(str(filename.relative_to(path))), mtime=mtime, )
def to_contentsrc(path: pathlib.Path) -> ContentSrc: return ContentSrc(package="", srcpath=str(path), metadata={}, contentpath=((), path.name), mtime=0)
def load(site: site.Site, src: ContentSrc) -> List[Tuple[ContentSrc, str]]: meta, html = _load_string(src.read_text()) src.metadata.update(meta) return [(src, html)]
def load(src: ContentSrc) -> List[Tuple[ContentSrc, str]]: s = src.read_text() json = nbformat.reads(s, nbformat.current_nbformat) cells = split_cells(src, json.get("cells", [])) ret = [] for subsrc, subcells in cells: subjson = copy.deepcopy(json) cellmeta: Dict[str, Any] = {} if subcells: top = subcells[0] if top.get("cell_type", "") in ("markdown", "raw"): srcstr = top.get("source", "") if srcstr: cellmeta, srcstr = parsesrc.split_yaml(srcstr, "---") top["source"] = srcstr # remove raw cells newcells = [c for c in subcells if c.get("cell_type", "") != "raw"] jinjatags = {} idx = 0 def conv_jinjatag(s: str) -> str: nonlocal idx idx += 1 digest = hashlib.md5(s.encode("utf-8")).hexdigest() + str(idx) jinjatags[digest] = s return digest # save jinja tag if cellmeta.get("has_jinja", True): for cell in subcells: if cell.get("cell_type", "") == "markdown": newsrc = parsesrc.replace_jinjatag(cell.get("source", ""), conv_jinjatag) cell["source"] = newsrc # remove empty cells at bottom while len(newcells) > 1: c = newcells[-1] celltype = c.get("cell_type", "") if celltype == "markdown": if c["source"].strip(): break elif celltype == "code": if c["source"].strip() or c["outputs"]: break else: break # remove cell del newcells[-1] subjson["cells"] = newcells meta, html = _export( subjson, cellmeta.get("nbconvert_template", None), cellmeta.get("nbconvert_templatefile", None), ) meta.update(cellmeta) subsrc.metadata.update(meta) # restore jinja tag html = html.translate({ord("{"): "{", ord("}"): "}"}) for hash, s in jinjatags.items(): html = re.sub(rf"(<p>\s*{hash}\s*</p>)|{hash}", s, html, 1) ret.append((subsrc, html)) return ret
def binloader( site: site.Site, src: ContentSrc ) -> Sequence[Tuple[ContentSrc, Optional[str]]]: src.metadata["type"] = "binary" src.metadata["loader"] = "binary" return [(src, None)]