def check_for_broken_links(notes_folder: str, cache_folder: str): logger: Logger = get_logger() # figure out which files have changed since the last time we ran the # static site generator, so that we only check if there are broken links # in those files state_file: dict = util.read_existing_json_state_file( location=cache_folder) files_to_check_as_they_may_not_exist: Set[str] = set() for file_name in os.listdir(notes_folder): if not util.is_md(file_name): continue key: str = util.strip_file_extension(file_name) if state_file['files'][key]['last_checked'] == state_file['runtime']: # add all of the markdown links in this file to the set of files # to check with open(util.path(notes_folder, file_name), 'r') as f: contents = f.read() # the results of re.findall() will look something like # [('Page B', 'pageB.md')] # where the link in markdown would've been [Page B](pageB.md) for _, link in util.md_links.findall(contents): if util.is_md(link): files_to_check_as_they_may_not_exist.add(link) to_report: Set[str] = set() for file_name in files_to_check_as_they_may_not_exist: try: with open(util.path(notes_folder, file_name), 'r') as f: pass except FileNotFoundError: to_report.add(file_name) if len(to_report) > 0: for missing_file in to_report: logger.error('missing file \'%s\' is referenced in a bad link', missing_file) # fail with an error, and do not continue with site generation raise Exception( f"{len(to_report)} broken links were found in your notes")
def setup_json_state_file(location: str, notes_folder: str) -> None: """ The main orchestrator of the state file mechanics. This method must be idempotent. Args: location (str): The relative or absolute location of the folder that contains the JSON state file notes_folder (str): The relative or absolute location of the folder that contains all of your markdown ntoes """ state_file: dict = util.read_existing_json_state_file(location=location) now: datetime = datetime.utcnow() now_str: str = now.strftime(DATE_TIME_FORMAT) # record current script runtime state_file['runtime'] = now_str # ensure that the files section of the state file exists if 'files' not in state_file: state_file['files'] = {} # ensure that file data is up to date for file_name_ in os.listdir(notes_folder): if not util.is_md(file_name_): continue file_path: str = util.path(notes_folder, file_name_) key: str = util.strip_file_extension(file_name_) # if it's a new file, populate the metadata if key not in state_file['files']: logger.info(f'adding new key in files: {key}') state_file['files'][key]: dict = {} state_file['files'][key]['sha256']: str = util.sha256(file_path) state_file['files'][key]['last_checked']: str = now_str # we are done processing this file, move to the next one continue # if the file was modified since we last checked it (which we know # has happened if the hash has changed) then update its info current_file_hash: str = util.sha256(file_path) if current_file_hash != state_file['files'][key]['sha256']: logger.info(f'updating changed key: {key}') state_file['files'][key]['sha256']: str = current_file_hash state_file['files'][key]['last_checked']: str = now_str # save the new state of the JSON file to disk so that we can use it # the next time the script is run util.persist_json(state_file, location)
def todo_data(folder_path: str) -> List[Tuple[str, List[str]]]: tmp = [] for file_name_ in os.listdir(folder_path): if not util.is_md(file_name_): continue todos: List[str] = util.extract_todos( util.path(folder_path, file_name_)) if len(todos) > 0: tmp.append((file_name_, todos)) return tmp
def link_data(folder_path: str) -> List[Tuple[str, str]]: tmp = [] for file_name_ in os.listdir(folder_path): if not util.is_md(file_name_): continue note_title = util.note_title(util.path(folder_path, file_name_)) tmp.append((file_name_, note_title)) tmp.sort( key=lambda pair: os.path.getmtime(util.path(folder_path, pair[0])), reverse=True) return tmp
def generate_backlinks_files(notes_folder: str, backlinks_folder: str) -> None: logger: Logger = get_logger() file_names: List[str] = markdown_filenames(folder_path=notes_folder) logger.info(f'Found {len(file_names)} files in {notes_folder}') util.create_folder(location=backlinks_folder) logger.info(f'Will put backlinks into: {backlinks_folder}/') # find all of the files that have changed since the last script run by # looking into the JSON state file to speed up the backlinks generation state_file: dict = util.read_existing_json_state_file( location=backlinks_folder) relevant_file_names: Set[str] = set() for file_name in file_names: key: str = util.strip_file_extension(file_name) if state_file['files'][key]['last_checked'] == state_file['runtime']: relevant_file_names.add(file_name) # ensure that we also refresh the backlinks for the files that are # referenced by this file (since the links go two ways) with open(util.path(notes_folder, file_name), 'r') as f: contents = f.read() # the results of re.findall() will look something like # [('Page B', 'pageB.md')] # where the link in markdown would've been [Page B](pageB.md) for _, link in util.md_links.findall(contents): if util.is_md(link): relevant_file_names.add(link) # create the backlinks files for file_name in relevant_file_names: logger.info(f'refreshing backlinks for {file_name}') # a list of all of the files that reference this one references = [] # look in all of the other files for references and put them in the # above list if we find any for other_file in file_names: if other_file == file_name: continue if other_file == 'index.md': # the index file is supposed to reference a lot of stuff # so I don't want it to pollute the backlinks continue with open(f'{notes_folder}/{other_file}', 'r') as f: contents = f.read() # the results of re.findall() will look something like # [('Page B', 'pageB.md')] # where the link in markdown would've been [Page B](pageB.md) for _, link in util.md_links.findall(contents): if link == file_name: logger.debug( f'{file_name}: referenced by {other_file}') title = util.note_title(f'{notes_folder}/{other_file}') references.append((other_file, title)) # write out all of the backlinks using some properly styled markdown. # this bit will be appended to the original note later on when it is # converted to a standalone HTML page backlinks_file_path = f'{backlinks_folder}/{file_name}.backlinks' with open(backlinks_file_path, 'w') as f: f.write(backlinks_html(refs=references))
def markdown_filenames(folder_path: str) -> List[str]: return [fn for fn in os.listdir(folder_path) if util.is_md(fn)]
def do_pandoc_generation(notes_folder: str, temp_folder: str, html_folder: str) -> None: logger: Logger = get_logger() for folder in [notes_folder, temp_folder, html_folder]: logger.info('creating folder: \'%s\' if it doesn\'t exist already', folder) util.create_folder(folder) # only queue up files for pandoc generation if they (or the files that # point to them) have been modified recently, so that we don't have to # regenerate everything each time we make one change in one file. state_file: dict = util.read_existing_json_state_file(location=temp_folder) relevant_file_names: Set[str] = set() for file_name in os.listdir(notes_folder): if not util.is_md(file_name): continue key: str = util.strip_file_extension(file_name) if state_file['files'][key]['last_checked'] == state_file['runtime']: relevant_file_names.add(file_name) # ensure that we also refresh the backlinks for the files that are # referenced by this file (since the links go two ways) with open(util.path(notes_folder, file_name), 'r') as f: contents = f.read() # the results of re.findall() will look something like # [('Page B', 'pageB.md')] # where the link in markdown would've been [Page B](pageB.md) for _, link in util.md_links.findall(contents): if util.is_md(link): relevant_file_names.add(link) for file in relevant_file_names: # the path to the note is always gonna be in the notes_folder file_full_path: str = util.path(notes_folder, file) note_title = util.note_title(file_full_path) # the output HTML file should have the same name as the note but with # the .html suffix and it should be in the html folder file_html: str = util.path(html_folder, file) file_html: str = util.change_file_extension(file_html, '.html') # the backlinks file should have the same name as the note but with # the .md.backlinks suffix, and it should be in the temp folder file_backlinks: str = util.path(temp_folder, file + '.backlinks') logger.info('converting %s to html, title=%s', file, note_title) util.do_run(cmd=[ 'pandoc', file_full_path, file_backlinks, f'--defaults=pandoc.yaml', f'--id-prefix={util.to_footnote_id(file)}', f'--output={file_html}', f'--metadata=pagetitle:{note_title}' ]) # if the index.md was generated in the temp folder, pandocify it index_file_name = 'index.md' generated_index_file = util.path(temp_folder, index_file_name) if util.check_file_exists(generated_index_file): output_file = util.path( html_folder, util.change_file_extension(index_file_name, '.html')) index_title = util.note_title(generated_index_file) logger.debug('converting %s to html, title=%s', generated_index_file, index_title) util.do_run(cmd=[ 'pandoc', generated_index_file, f'--defaults=pandoc.yaml', f'--id-prefix={util.to_footnote_id(index_file_name)}', f'--output={output_file}', f'--metadata=pagetitle:{index_title}' ])