def check_for_broken_links(notes_folder: str, cache_folder: str):
    logger: Logger = get_logger()

    # figure out which files have changed since the last time we ran the
    # static site generator, so that we only check if there are broken links
    # in those files
    state_file: dict = util.read_existing_json_state_file(
        location=cache_folder)
    files_to_check_as_they_may_not_exist: Set[str] = set()
    for file_name in os.listdir(notes_folder):
        if not util.is_md(file_name):
            continue
        key: str = util.strip_file_extension(file_name)
        if state_file['files'][key]['last_checked'] == state_file['runtime']:
            # add all of the markdown links in this file to the set of files
            # to check
            with open(util.path(notes_folder, file_name), 'r') as f:
                contents = f.read()
                # the results of re.findall() will look something like
                # [('Page B', 'pageB.md')]
                # where the link in markdown would've been [Page B](pageB.md)
                for _, link in util.md_links.findall(contents):
                    if util.is_md(link):
                        files_to_check_as_they_may_not_exist.add(link)

    to_report: Set[str] = set()
    for file_name in files_to_check_as_they_may_not_exist:
        try:
            with open(util.path(notes_folder, file_name), 'r') as f:
                pass
        except FileNotFoundError:
            to_report.add(file_name)

    if len(to_report) > 0:
        for missing_file in to_report:
            logger.error('missing file \'%s\' is referenced in a bad link',
                         missing_file)

        # fail with an error, and do not continue with site generation
        raise Exception(
            f"{len(to_report)} broken links were found in your notes")
예제 #2
0
def setup_json_state_file(location: str, notes_folder: str) -> None:
    """
    The main orchestrator of the state file mechanics. This method must be
    idempotent.

    Args:
        location (str): The relative or absolute location of the folder that
        contains the JSON state file
        notes_folder (str): The relative or absolute location of the folder
        that contains all of your markdown ntoes
    """
    state_file: dict = util.read_existing_json_state_file(location=location)

    now: datetime = datetime.utcnow()
    now_str: str = now.strftime(DATE_TIME_FORMAT)

    # record current script runtime
    state_file['runtime'] = now_str

    # ensure that the files section of the state file exists
    if 'files' not in state_file:
        state_file['files'] = {}

    # ensure that file data is up to date
    for file_name_ in os.listdir(notes_folder):
        if not util.is_md(file_name_):
            continue

        file_path: str = util.path(notes_folder, file_name_)

        key: str = util.strip_file_extension(file_name_)

        # if it's a new file, populate the metadata
        if key not in state_file['files']:
            logger.info(f'adding new key in files: {key}')
            state_file['files'][key]: dict = {}
            state_file['files'][key]['sha256']: str = util.sha256(file_path)
            state_file['files'][key]['last_checked']: str = now_str

            # we are done processing this file, move to the next one
            continue

        # if the file was modified since we last checked it (which we know
        # has happened if the hash has changed) then update its info
        current_file_hash: str = util.sha256(file_path)
        if current_file_hash != state_file['files'][key]['sha256']:
            logger.info(f'updating changed key: {key}')
            state_file['files'][key]['sha256']: str = current_file_hash
            state_file['files'][key]['last_checked']: str = now_str

    # save the new state of the JSON file to disk so that we can use it
    # the next time the script is run
    util.persist_json(state_file, location)
def todo_data(folder_path: str) -> List[Tuple[str, List[str]]]:
    tmp = []
    for file_name_ in os.listdir(folder_path):
        if not util.is_md(file_name_):
            continue

        todos: List[str] = util.extract_todos(
            util.path(folder_path, file_name_))
        if len(todos) > 0:
            tmp.append((file_name_, todos))

    return tmp
def link_data(folder_path: str) -> List[Tuple[str, str]]:
    tmp = []
    for file_name_ in os.listdir(folder_path):
        if not util.is_md(file_name_):
            continue

        note_title = util.note_title(util.path(folder_path, file_name_))
        tmp.append((file_name_, note_title))

    tmp.sort(
        key=lambda pair: os.path.getmtime(util.path(folder_path, pair[0])),
        reverse=True)
    return tmp
예제 #5
0
def generate_backlinks_files(notes_folder: str, backlinks_folder: str) -> None:
    logger: Logger = get_logger()

    file_names: List[str] = markdown_filenames(folder_path=notes_folder)
    logger.info(f'Found {len(file_names)} files in {notes_folder}')

    util.create_folder(location=backlinks_folder)
    logger.info(f'Will put backlinks into: {backlinks_folder}/')

    # find all of the files that have changed since the last script run by
    # looking into the JSON state file to speed up the backlinks generation
    state_file: dict = util.read_existing_json_state_file(
        location=backlinks_folder)
    relevant_file_names: Set[str] = set()
    for file_name in file_names:
        key: str = util.strip_file_extension(file_name)
        if state_file['files'][key]['last_checked'] == state_file['runtime']:
            relevant_file_names.add(file_name)
            # ensure that we also refresh the backlinks for the files that are
            # referenced by this file (since the links go two ways)
            with open(util.path(notes_folder, file_name), 'r') as f:
                contents = f.read()
                # the results of re.findall() will look something like
                # [('Page B', 'pageB.md')]
                # where the link in markdown would've been [Page B](pageB.md)
                for _, link in util.md_links.findall(contents):
                    if util.is_md(link):
                        relevant_file_names.add(link)

    # create the backlinks files
    for file_name in relevant_file_names:
        logger.info(f'refreshing backlinks for {file_name}')
        # a list of all of the files that reference this one
        references = []

        # look in all of the other files for references and put them in the
        # above list if we find any
        for other_file in file_names:
            if other_file == file_name:
                continue
            if other_file == 'index.md':
                # the index file is supposed to reference a lot of stuff
                # so I don't want it to pollute the backlinks
                continue

            with open(f'{notes_folder}/{other_file}', 'r') as f:
                contents = f.read()
                # the results of re.findall() will look something like
                # [('Page B', 'pageB.md')]
                # where the link in markdown would've been [Page B](pageB.md)
                for _, link in util.md_links.findall(contents):
                    if link == file_name:
                        logger.debug(
                            f'{file_name}: referenced by {other_file}')
                        title = util.note_title(f'{notes_folder}/{other_file}')
                        references.append((other_file, title))

        # write out all of the backlinks using some properly styled markdown.
        # this bit will be appended to the original note later on when it is
        # converted to a standalone HTML page
        backlinks_file_path = f'{backlinks_folder}/{file_name}.backlinks'
        with open(backlinks_file_path, 'w') as f:
            f.write(backlinks_html(refs=references))
예제 #6
0
def markdown_filenames(folder_path: str) -> List[str]:
    return [fn for fn in os.listdir(folder_path) if util.is_md(fn)]
예제 #7
0
def do_pandoc_generation(notes_folder: str, temp_folder: str, html_folder: str) -> None:
    logger: Logger = get_logger()

    for folder in [notes_folder, temp_folder, html_folder]:
        logger.info('creating folder: \'%s\' if it doesn\'t exist already', folder)
        util.create_folder(folder)

    # only queue up files for pandoc generation if they (or the files that
    # point to them) have been modified recently, so that we don't have to
    # regenerate everything each time we make one change in one file.
    state_file: dict = util.read_existing_json_state_file(location=temp_folder)
    relevant_file_names: Set[str] = set()
    for file_name in os.listdir(notes_folder):
        if not util.is_md(file_name):
            continue
        key: str = util.strip_file_extension(file_name)
        if state_file['files'][key]['last_checked'] == state_file['runtime']:
            relevant_file_names.add(file_name)
            # ensure that we also refresh the backlinks for the files that are
            # referenced by this file (since the links go two ways)
            with open(util.path(notes_folder, file_name), 'r') as f:
                contents = f.read()
                # the results of re.findall() will look something like
                # [('Page B', 'pageB.md')]
                # where the link in markdown would've been [Page B](pageB.md)
                for _, link in util.md_links.findall(contents):
                    if util.is_md(link):
                        relevant_file_names.add(link)

    for file in relevant_file_names:
        # the path to the note is always gonna be in the notes_folder
        file_full_path: str = util.path(notes_folder, file)
        note_title = util.note_title(file_full_path)

        # the output HTML file should have the same name as the note but with
        # the .html suffix and it should be in the html folder
        file_html: str = util.path(html_folder, file)
        file_html: str = util.change_file_extension(file_html, '.html')

        # the backlinks file should have the same name as the note but with
        # the .md.backlinks suffix, and it should be in the temp folder
        file_backlinks: str = util.path(temp_folder, file + '.backlinks')

        logger.info('converting %s to html, title=%s', file, note_title)
        util.do_run(cmd=[
            'pandoc',
            file_full_path, file_backlinks,
            f'--defaults=pandoc.yaml',
            f'--id-prefix={util.to_footnote_id(file)}',
            f'--output={file_html}',
            f'--metadata=pagetitle:{note_title}'
        ])

    # if the index.md was generated in the temp folder, pandocify it
    index_file_name = 'index.md'
    generated_index_file = util.path(temp_folder, index_file_name)
    if util.check_file_exists(generated_index_file):
        output_file = util.path(
            html_folder, util.change_file_extension(index_file_name, '.html'))
        index_title = util.note_title(generated_index_file)
        logger.debug('converting %s to html, title=%s', generated_index_file, index_title)
        util.do_run(cmd=[
            'pandoc',
            generated_index_file,
            f'--defaults=pandoc.yaml',
            f'--id-prefix={util.to_footnote_id(index_file_name)}',
            f'--output={output_file}',
            f'--metadata=pagetitle:{index_title}'
        ])