def parse_toc(toc_file): """Parse a ToC file to a site-map YAML.""" site_map = parse_toc_yaml(toc_file) click.echo( yaml.dump(site_map.as_json(), sort_keys=False, default_flow_style=False))
def test_get_lesson_markdown(): toc_path = root_dir / "jupyter-book/_toc.yml" site_map = parse_toc_yaml(toc_path) json_info = site_map.as_json() documents = json_info["documents"] print( get_lesson_markdown(documents[ "predictive_modeling_pipeline/01_tabular_data_exploration_index"]))
def test_get_module_markdown(): toc_path = Path("jupyter-book/_toc.yml") site_map = parse_toc_yaml(toc_path) json_info = site_map.as_json() documents = json_info["documents"] print( get_module_markdown( { "items": [ "predictive_modeling_pipeline/predictive_modeling_module_intro", "predictive_modeling_pipeline/01_tabular_data_exploration_index", "predictive_modeling_pipeline/02_numerical_pipeline_index", "predictive_modeling_pipeline/03_categorical_pipeline_index", "predictive_modeling_pipeline/wrap_up_quiz", "predictive_modeling_pipeline/predictive_modeling_module_take_away", ], "caption": "The predictive modeling pipeline", }, documents, ))
def get_full_index_markdown(toc_path): site_map = parse_toc_yaml(toc_path) json_info = site_map.as_json() documents = json_info["documents"] root_doc = documents[json_info["root"]] def should_keep_module(module_title): # Not sure exactly why but index is listed and has a None caption ... is_index = module_title is None if is_index: return False is_wip = "🚧" in module_title is_appendix = "Appendix" in module_title return not is_wip and not is_appendix content = "\n\n".join( get_module_markdown(module, documents) for module in root_doc["subtrees"] if should_keep_module(module["caption"])) return content
def test_json_manipulation(): """Gives a few hints about the json format""" # %% toc_path = root_dir / "jupyter-book/_toc.yml" site_map = parse_toc_yaml(toc_path) json_info = site_map.as_json() documents = json_info["documents"] root_doc = documents[json_info["root"]] root_doc # This looks like this has only the first level of information (lesson and # not individual notebooks) # {'docname': 'toc', # 'subtrees': [{'items': ['index'], # 'caption': None, # ...}, # {'items': ['ml_concepts/slides', 'ml_concepts/quiz_intro_01'], # 'caption': 'Machine Learning Concepts', # ...} # {'items': ['predictive_modeling_pipeline/predictive_modeling_module_intro', # 'predictive_modeling_pipeline/01_tabular_data_exploration_index', # 'predictive_modeling_pipeline/02_numerical_pipeline_index', # 'predictive_modeling_pipeline/03_categorical_pipeline_index', # 'predictive_modeling_pipeline/wrap_up_quiz', # 'predictive_modeling_pipeline/predictive_modeling_module_take_away'], # 'caption': 'The predictive modeling pipeline', # ...}, # } # %% # The root doc has 'subtrees' key which is a list of modules dict. Each module # dict has a 'items' key with only the first level (typically the *_index files) root_doc["subtrees"][:3] # %% # You can access more info of each document directly by name. For example the # _index files will have a 'subtrees' key with only one element. documents["predictive_modeling_pipeline/01_tabular_data_exploration_index"] # So for a lesson index you need to access subtrees to have the individual notebooks # {'docname': 'predictive_modeling_pipeline/01_tabular_data_exploration_index', # 'title': None, # 'subtrees': [{'items': ['python_scripts/01_tabular_data_exploration', # 'python_scripts/01_tabular_data_exploration_ex_01', # 'python_scripts/01_tabular_data_exploration_sol_01', # 'predictive_modeling_pipeline/01_tabular_data_exploration_quiz_m1_01'], # 'caption': None, # 'hidden': True, # 'maxdepth': -1, # 'numbered': False, # 'reversed': False, # 'titlesonly': True}]} # %% (module, ) = [ module for module in root_doc["subtrees"] if module["caption"] == "The predictive modeling pipeline" ] module # %% lessons = module["items"] # without subtrees (predictive modeling intro in this case) documents[lessons[0]] # %% # with subtrees (tabular data exploration lesson index) documents[lessons[1]]
def test_malformed_file_parse(path: Path): message = ERROR_MESSAGES[path.name] with pytest.raises(MalformedError, match=message): parse_toc_yaml(path)
def test_create_toc_dict(path: Path, data_regression): site_map = parse_toc_yaml(path) data = create_toc_dict(site_map) data_regression.check(data)
def test_file_to_sitemap(path: Path, data_regression): site_map = parse_toc_yaml(path) data_regression.check(site_map.as_json())
def build( path_source, path_output, config, toc, warningiserror, nitpick, keep_going, freshenv, builder, custom_builder, verbose, quiet, individualpages, get_config_only=False, ): """Convert your book's or page's content to HTML or a PDF.""" from sphinx_external_toc.parsing import MalformedError, parse_toc_yaml from jupyter_book import __version__ as jbv from jupyter_book.sphinx import build_sphinx if not get_config_only: click.secho(f"Running Jupyter-Book v{jbv}", bold=True, fg="green") # Paths for the notebooks PATH_SRC_FOLDER = Path(path_source).absolute() config_overrides = {} use_external_toc = True found_config = find_config_path(PATH_SRC_FOLDER) BUILD_PATH = path_output if path_output is not None else found_config[0] # Set config for --individualpages option (pages, documents) if individualpages: if builder != "pdflatex": _error(""" Specified option --individualpages only works with the following builders: pdflatex """) # Build Page if not PATH_SRC_FOLDER.is_dir(): # it is a single file build_type = "page" use_external_toc = False subdir = None PATH_SRC = Path(path_source) PATH_SRC_FOLDER = PATH_SRC.parent.absolute() PAGE_NAME = PATH_SRC.with_suffix("").name # checking if the page is inside a sub directory # then changing the build_path accordingly if str(BUILD_PATH) in str(PATH_SRC_FOLDER): subdir = str(PATH_SRC_FOLDER.relative_to(BUILD_PATH)) if subdir and subdir != ".": subdir = subdir.replace("/", "-") subdir = subdir + "-" + PAGE_NAME BUILD_PATH = Path(BUILD_PATH).joinpath("_build", "_page", subdir) else: BUILD_PATH = Path(BUILD_PATH).joinpath("_build", "_page", PAGE_NAME) # Find all files that *aren't* the page we're building and exclude them to_exclude = [ op.relpath(ifile, PATH_SRC_FOLDER) for ifile in iglob(str(PATH_SRC_FOLDER.joinpath("**", "*")), recursive=True) if ifile != str(PATH_SRC.absolute()) ] to_exclude.extend( ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]) # Now call the Sphinx commands to build config_overrides = { "master_doc": PAGE_NAME, "exclude_patterns": to_exclude, "html_theme_options": { "single_page": True }, # --individualpages option set to True for page call "latex_individualpages": True, } # Build Project else: build_type = "book" PAGE_NAME = None BUILD_PATH = Path(BUILD_PATH).joinpath("_build") # Table of contents toc = PATH_SRC_FOLDER.joinpath("_toc.yml") if toc is None else Path( toc) if not get_config_only: if not toc.exists(): _error("Couldn't find a Table of Contents file. " "To auto-generate one, run:" f"\n\n\tjupyter-book toc from-project {path_source}") # we don't need to read the toc here, but do so to control the error message try: parse_toc_yaml(toc) except MalformedError as exc: _error(f"The Table of Contents file is malformed: {exc}\n" "You may need to migrate from the old format, using:" f"\n\n\tjupyter-book toc migrate {toc} -o {toc}") # TODO could also check/warn if the format is not set to jb-article/jb-book? config_overrides["external_toc_path"] = toc.as_posix() # Builder-specific overrides if builder == "pdfhtml": config_overrides["html_theme_options"] = {"single_page": True} # --individualpages option passthrough config_overrides["latex_individualpages"] = individualpages # Use the specified configuration file, or one found in the root directory path_config = config or (found_config[0].joinpath("_config.yml") if found_config[1] else None) if path_config and not Path(path_config).exists(): raise IOError(f"Config file path given, but not found: {path_config}") if builder in ["html", "pdfhtml", "linkcheck"]: OUTPUT_PATH = BUILD_PATH.joinpath("html") elif builder in ["latex", "pdflatex"]: OUTPUT_PATH = BUILD_PATH.joinpath("latex") elif builder in ["dirhtml"]: OUTPUT_PATH = BUILD_PATH.joinpath("dirhtml") elif builder in ["singlehtml"]: OUTPUT_PATH = BUILD_PATH.joinpath("singlehtml") elif builder in ["custom"]: OUTPUT_PATH = BUILD_PATH.joinpath(custom_builder) BUILDER_OPTS["custom"] = custom_builder if nitpick: config_overrides["nitpicky"] = True # If we only wan config (e.g. for printing/validation), stop here if get_config_only: return (path_config, PATH_SRC_FOLDER, config_overrides) # print information about the build click.echo( click.style("Source Folder: ", bold=True, fg="blue") + click.format_filename(f"{PATH_SRC_FOLDER}")) click.echo( click.style("Config Path: ", bold=True, fg="blue") + click.format_filename(f"{path_config}")) click.echo( click.style("Output Path: ", bold=True, fg="blue") + click.format_filename(f"{OUTPUT_PATH}")) # Now call the Sphinx commands to build result = build_sphinx( PATH_SRC_FOLDER, OUTPUT_PATH, use_external_toc=use_external_toc, noconfig=True, path_config=path_config, confoverrides=config_overrides, builder=BUILDER_OPTS[builder], warningiserror=warningiserror, keep_going=keep_going, freshenv=freshenv, verbosity=verbose, quiet=quiet > 0, really_quiet=quiet > 1, ) builder_specific_actions(result, builder, OUTPUT_PATH, build_type, PAGE_NAME, click.echo)