Python MarkdownReader示例，pelican.readers.MarkdownReader Python示例

示例#1

0

显示文件

    def read(self, filename):
        """Convert a revealjs markdown file to html and return it

        Requires pypandoc (https://github.com/bebraw/pypandoc) and pandoc
        (https://pandoc.org/) to work correctly.
        """

        # TODO: use markdown reader to parse the reveal.js markdown
        # https://github.com/danielfrg/pelican-ipynb/blob/master/markup.py#L62
        reader = MarkdownReader(self.settings)
        md_content, metadata = reader.read(filename)
        metadata["template"] = "blank"

        # TODO: using the markdown reader converts the file contents to HTML,
        # but we just want plain text because pandoc should be converting it
        # instead. The trouble is, we also want to get the metadata

        md_converter = Markdown(**self.settings["MARKDOWN"])
        md_converter.convertFile(filename, output=os.devnull)

        md_content = "\n".join(md_converter.lines)
        # metadata = getattr(md_converter, "Meta", {})

        revealjs_content = pypandoc.convert_text(md_content, to="revealjs",
            format="md",
            extra_args=[
                "-s",
                "-V", "revealjs-url=https://revealjs.com",
                # "--slide-level", "2",
            ],
        )

        return (revealjs_content, metadata)

示例#2

0

显示文件

文件： markup.py 项目： junfenglx/pelican-ipynb

    def read(self, filepath):
        metadata = {}
        metadata['ipython'] = True

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            notebook_metadata = json.load(ipynb_file)['metadata']

            # Change to standard pelican metadata
            for key, value in notebook_metadata.items():
                key = key.lower()
                if key in ("title", "date", "category", "tags", "slug",
                           "author"):
                    metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date', 'slug']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception(
                    "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file."
                )
            else:
                raise Exception(
                    "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, "
                    "assuming that this notebook is for liquid tag usage if true ignore this error"
                )

        content, info = get_html_from_filepath(filepath)

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in [key.lower() for key in self.settings.keys()]:
            content = '<body>{0}</body>'.format(
                content)  # So Pelican HTMLReader works
            parser = MyHTMLParser(self.settings, filename)
            # Python 3 str don't have decode
            try:
                c = content.decode("utf-8")
            except AttributeError as e:
                c = content
            parser.feed(c)
            parser.close()
            content = parser.body
            metadata['summary'] = parser.summary

        content = fix_css(content, info)
        return content, metadata

示例#3

0

显示文件

文件： pelican_comment_system.py 项目： chenguoming1/lnmp100.github.io

def add_static_comments(gen, content):
    if gen.settings['PELICAN_COMMENT_SYSTEM'] != True:
        return

    content.comments_count = 0
    content.comments = []

    #Modify the local context, so we get proper values for the feed
    context = copy.copy(gen.context)
    context['SITEURL'] += "/" + content.url
    context['SITENAME'] = "Comments for: " + content.title
    context['SITESUBTITLE'] = ""
    path = gen.settings['PELICAN_COMMENT_SYSTEM_FEED'] % content.slug
    writer = Writer(gen.output_path, settings=gen.settings)

    folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'],
                          content.slug)

    if not os.path.isdir(folder):
        logger.debug("No comments found for: " + content.slug)
        writer.write_feed([], context, path)
        return

    reader = MarkdownReader(gen.settings)
    comments = []
    replies = []

    for file in os.listdir(folder):
        name, extension = os.path.splitext(file)
        if extension[1:].lower() in reader.file_extensions:
            com_content, meta = reader.read(os.path.join(folder, file))

            avatar_path = avatars.getAvatarPath(name, meta)

            com = Comment(file, avatar_path, com_content, meta, gen.settings,
                          file, context)

            if 'replyto' in meta:
                replies.append(com)
            else:
                comments.append(com)

    writer.write_feed(comments + replies, context, path)

    #TODO: Fix this O(n²) loop
    for reply in replies:
        for comment in chain(comments, replies):
            if comment.id == reply.metadata['replyto']:
                comment.addReply(reply)

    count = 0
    for comment in comments:
        comment.sortReplies()
        count += comment.countReplies()

    comments = sorted(comments)

    content.comments_count = len(comments) + count
    content.comments = comments

示例#4

0

显示文件

文件： ipythonnb.py 项目： 5n1p/pelican-ipythonnb

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standars
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
            metadata['ipython'] = True

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderTransformer': {'enabled': True,
                         'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(config=config, template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_filename(filepath)

        # Process using Pelican HTMLReader
        content = '<body>{0}</body>'.format(content)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        metadata['summary'] = summary

        # Remove some CSS styles, so it doesn't break the themes.
        def filter_tags(style_text):
            style_list = style_text.split('\n')
            exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li',
                       '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text',
                       'code', 'pre', 'div.text_cell_render']
            style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0]
            ans = '\n'.join(style_list)
            return '<style type=\"text/css\">{0}</style>'.format(ans)

        css = '\n'.join(filter_tags(css) for css in info['inlining']['css'])
        css = css + CUSTOM_CSS
        body = css + body

        return body, metadata

示例#5

0

显示文件

文件： markup.py 项目： jlopezra/2020F-AC295

    def read(self, filepath):
        metadata = {}
        metadata['ipython'] = True

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = os.path.splitext(filename)[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file,
            # process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            notebook_metadata = json.load(ipynb_file)['metadata']

            # Change to standard pelican metadata
            for key, value in notebook_metadata.items():
                key = key.lower()
                if key in ("title", "date", "category", "tags", "slug", "author"):
                    metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.")
            else:
                raise Exception("Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, "
                      "assuming that this notebook is for liquid tag usage if true ignore this error")

        content, info = get_html_from_filepath(filepath)

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in [key.lower() for key in self.settings.keys()]:
            parser = MyHTMLParser(self.settings, filename)
            if isinstance(content, six.binary_type): # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str)
                # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8.
                content = '<body>{0}</body>'.format(content.decode("utf-8"))
            else:
                # Content already decoded
                content = '<body>{0}</body>'.format(content)
            parser.feed(content)
            parser.close()
            content = parser.body
            if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \
              self.settings['IPYNB_USE_META_SUMMARY'] == False) or \
              'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
                metadata['summary'] = parser.summary

        ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys() else False
        content = fix_css(content, info, ignore_css=ignore_css)
        return content, metadata

示例#6

0

显示文件

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standards
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
            metadata['ipython'] = True

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderTransformer': {'enabled': True,
                         'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(config=config, template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_filename(filepath)

        # Process using Pelican HTMLReader
        content = '<body>{0}</body>'.format(content)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        metadata['summary'] = summary

        # Remove some CSS styles, so it doesn't break the themes.
        def filter_tags(style_text):
            style_list = style_text.split('\n')
            exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li',
                       '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text',
                       'code', 'pre', 'div.text_cell_render']
            style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0]
            ans = '\n'.join(style_list)
            return '<style type=\"text/css\">{0}</style>'.format(ans)

        css = '\n'.join(filter_tags(css) for css in info['inlining']['css'])
        css = css + CUSTOM_CSS
        body = css + body

        return body, metadata

示例#7

0

显示文件

文件： markup.py 项目： Huaguiyuan/phys_codes

    def read(self, filepath):
        metadata = {}
        metadata['ipython'] = True

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file,
            # process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            notebook_metadata = json.load(ipynb_file)['metadata']

            # Change to standard pelican metadata
            for key, value in notebook_metadata.items():
                key = key.lower()
                if key in ("title", "date", "category", "tags", "slug", "author"):
                    metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.")
            else:
                raise Exception("Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, "
                      "assuming that this notebook is for liquid tag usage if true ignore this error")

        content, info = get_html_from_filepath(filepath)

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in [key.lower() for key in self.settings.keys()]:
            parser = MyHTMLParser(self.settings, filename)
            if isinstance(content, six.binary_type): # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str)
                # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8.
                content = '<body>{0}</body>'.format(content.decode("utf-8"))
            else:
                # Content already decoded
                content = '<body>{0}</body>'.format(content)
            parser.feed(content)
            parser.close()
            content = parser.body
            if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \
              self.settings['IPYNB_USE_META_SUMMARY'] == False) or \
              'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
                metadata['summary'] = parser.summary

        ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys() else False
        content = fix_css(content, info, ignore_css=ignore_css)
        return content, metadata

示例#8

0

显示文件

文件： pelican_comment_system.py 项目： Rojok/pelican-plugins

def add_static_comments(gen, content):
	if gen.settings['PELICAN_COMMENT_SYSTEM'] != True:
		return

	content.comments_count = 0
	content.comments = []

	#Modify the local context, so we get proper values for the feed
	context = copy.copy(gen.context)
	context['SITEURL'] += "/" + content.url
	context['SITENAME'] = "Comments for: " + content.title
	context['SITESUBTITLE'] = ""
	path = gen.settings['PELICAN_COMMENT_SYSTEM_FEED'] % content.slug
	writer = Writer(gen.output_path, settings=gen.settings)

	folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], content.slug)

	if not os.path.isdir(folder):
		logger.debug("No comments found for: " + content.slug)
		writer.write_feed( [], context, path)
		return

	reader = MarkdownReader(gen.settings)
	comments = []
	replies = []

	for file in os.listdir(folder):
		name, extension = os.path.splitext(file)
		if extension[1:].lower() in reader.file_extensions:
			com_content, meta = reader.read(os.path.join(folder, file))
			
			avatar_path = avatars.getAvatarPath(name, meta)

			com = Comment(file, avatar_path, com_content, meta, gen.settings, file, context)

			if 'replyto' in meta:
				replies.append( com )
			else:
				comments.append( com )

	writer.write_feed( comments + replies, context, path)

	#TODO: Fix this O(n²) loop
	for reply in replies:
		for comment in chain(comments, replies):
			if comment.id == reply.metadata['replyto']:
				comment.addReply(reply)

	count = 0
	for comment in comments:
		comment.sortReplies()
		count += comment.countReplies()

	comments = sorted(comments)

	content.comments_count = len(comments) + count
	content.comments = comments

示例#9

0

显示文件

    def read(self, filepath):
        metadata = {'ipython': True}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            notebook_metadata = json.load(ipynb_file)['metadata']

            # Change to standard pelican metadata
            for key, value in notebook_metadata.items():
                key = key.lower()
                if key in ("title", "date", "category", "tags", "slug",
                           "author"):
                    metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not {'title', 'date'}.issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception(
                    "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file."
                )
            else:
                raise Exception(
                    "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, "
                    "assuming that this notebook is for liquid tag usage if true ignore this error"
                )

        content, info = get_html_from_filepath(filepath)

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in [key.lower() for key in self.settings.keys()]:
            parser = MyHTMLParser(self.settings, filename)
            parser.feed('<body>')
            parser.feed(content)
            parser.feed('</body>')
            parser.close()

            if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and
                        self.settings['IPYNB_USE_META_SUMMARY'] == False) or \
                            'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
                metadata['summary'] = parser.summary

        return content, metadata

示例#10

0

显示文件

文件： markup.py 项目： bokeh-cookbook/bokeh-cookbook

    def read(self, filepath):
        metadata = {}
        metadata['ipython'] = True

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            notebook_metadata = json.load(ipynb_file)['metadata']

            # Change to standard pelican metadata
            for key, value in notebook_metadata.items():
                key = key.lower()
                if key in ("title", "date", "category", "tags", "slug", "author"):
                    metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.")
            else:
                raise Exception("""Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file,
                                assuming that this notebook is for liquid tag usage if true ignore this error""")

        content, info = get_html_from_filepath(filepath)

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in [key.lower() for key in self.settings.keys()]:
            parser = MyHTMLParser(self.settings, filename)
            if hasattr(content, 'decode'): # PY2
                content = '<body>%s</body>' % content.encode('utf-8')
                content = content.decode("utf-8")
            else:
                content = '<body>%s</body>' % content
            parser.feed(content)
            parser.close()
            content = parser.body
            if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and self.settings['IPYNB_USE_META_SUMMARY'] is False) or 'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
                metadata['summary'] = parser.summary

        content = fix_css(content, info)
        return content, metadata

示例#11

0

显示文件

文件： watcher.py 项目： karpitsky/simplereddit

 def _read_articles(self):
     content_path = os.path.join(PROJECT_DIR, PATH)
     reader = MarkdownReader({
         'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra']
     })
     for file in os.listdir(content_path):
         if file.endswith('.md'):
             content, metadata = reader.read(
                 os.path.join(content_path, file)
             )
             self.articles.append({
                     'content': content,
                     'metadata': metadata
                 })

示例#12

0

显示文件

文件： pelican_comment_system.py 项目： BenoitDherin/website

def add_static_comments(gen, metadata):
    if gen.settings['PELICAN_COMMENT_SYSTEM'] != True:
        return

    metadata['comments_count'] = 0
    metadata['comments'] = []

    if not 'slug' in metadata:
        logger.warning(
            "pelican_comment_system: cant't locate comments files without slug tag in the article"
        )
        return

    reader = MarkdownReader(gen.settings)
    comments = []
    replies = []
    folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'],
                          metadata['slug'])

    if not os.path.isdir(folder):
        logger.debug("No comments found for: " + metadata['slug'])
        return

    for file in os.listdir(folder):
        name, extension = os.path.splitext(file)
        if extension[1:].lower() in reader.file_extensions:
            content, meta = reader.read(folder + "/" + file)
            meta['locale_date'] = strftime(meta['date'],
                                           gen.settings['DEFAULT_DATE_FORMAT'])
            com = Comment(name, meta, content)
            if 'replyto' in meta:
                replies.append(com)
            else:
                comments.append(com)

    #TODO: Fix this O(n²) loop
    for reply in replies:
        for comment in chain(comments, replies):
            if comment.id == reply.metadata['replyto']:
                comment.addReply(reply)

    count = 0
    for comment in comments:
        comment.sortReplies()
        count += comment.countReplies()

    comments = sorted(comments)

    metadata['comments_count'] = len(comments) + count
    metadata['comments'] = comments

示例#13

0

显示文件

    def read(self, filename):
        """Convert a revealjs markdown file to html and return it

        Requires pypandoc (https://github.com/bebraw/pypandoc) and pandoc
        (https://pandoc.org/) to work correctly.
        """

        # TODO: use markdown reader to parse the reveal.js markdown
        # github.com/danielfrg/pelican-ipynb/blob/master/markup.py#L62
        reader = MarkdownReader(self.settings)
        md_content, metadata = reader.read(filename)

        # TODO: using the markdown reader converts the file contents to HTML,
        # but we just want plain text because pandoc should be converting it
        # instead. The trouble is, we also want to get the metadata

        extracmd = ""

        if "theme" in metadata:
            extracmd = extracmd + " " + "--variable theme=%s" % metadata[
                "theme"]

        if "revealoptions" in metadata:
            if "transition" in metadata["revealoptions"]:
                extracmd = (extracmd + " " + "--variable transition=%s" %
                            metadata["revealoptions"]["transition"])

        command = "pandoc --to revealjs -f markdown  %s %s" % (extracmd,
                                                               filename)

        # Define template for Pelican
        metadata["template"] = "revealmd"

        p = subprocess.Popen(command.split(),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)

        try:
            stdout, stderr = p.communicate(str.encode("utf8"))
        except OSError:
            raise RuntimeError(
                'Pandoc died with exitcode "%s" during conversion.' %
                p.returncode)

        revealjs_content = stdout.decode("utf8")

        # Patch revealjs_content to convert 'back' "{" and "}"
        returntext = revealjs_content.replace("%7B", "{").replace("%7D", "}")

        return returntext, metadata

示例#14

0

显示文件

文件： pdf.py 项目： wombelix/pdf

    def _create_pdf(self, obj, output_path):
        filename = obj.slug + ".pdf"
        output_pdf = os.path.join(output_path, filename)
        mdreader = MarkdownReader(self.settings)
        _, ext = os.path.splitext(obj.source_path)

        if ext == ".rst":
            with open(obj.source_path, encoding="utf-8") as f:
                text = f.read()

            header = ""
        elif ext[1:] in mdreader.file_extensions and mdreader.enabled:
            text, meta = mdreader.read(obj.source_path)
            header = ""

            if "title" in meta:
                title = meta["title"]
                header = title + "\n" + "#" * len(title) + "\n\n"
                del meta["title"]

            for k in list(meta):
                # We can't support all fields, so we strip the ones that won't
                # look good
                if k not in self.supported_md_fields:
                    del meta[k]

            header += "\n".join([":{}: {}".format(k, meta[k]) for k in meta])
            header += "\n\n.. raw:: html\n\n\t"
            text = text.replace("\n", "\n\t")

            # rst2pdf casts the text to str and will break if it finds
            # non-escaped characters. Here we nicely escape them to XML/HTML
            # entities before proceeding
            text = text.encode("ascii", "xmlcharrefreplace").decode()
        else:
            # We don't support this format
            logger.warn("Ignoring unsupported file " + obj.source_path)
            return

        # Find intra-site links and replace placeholder with actual path / url
        hrefs = self._get_intrasite_link_regex()
        text = hrefs.sub(lambda m: obj._link_replacer(obj.get_siteurl(), m),
                         text)

        logger.info(" [ok] writing %s" % output_pdf)

        self.pdfcreator.createPdf(text=(header + text), output=output_pdf)

示例#15

0

显示文件

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standards
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
            metadata['ipython'] = True

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderTransformer': {'enabled': True,
                                                      'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(
            config=config,
            template_file='plugins/ipynb/templates/dsbytes_full',
            filters={'highlight2html': custom_highlighter}
        )

        content, info = exporter.from_filename(filepath)

        content = '<html><body>' + content + '</body></html>'
        # Process using Pelican HTMLReader
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary
        metadata['summary'] = summary
        return body, metadata

示例#16

0

显示文件

文件： pelican_comment_system.py 项目： BenoitDherin/data-analysis-with-R

def add_static_comments(gen, metadata):
	if gen.settings['PELICAN_COMMENT_SYSTEM'] != True:
		return

	metadata['comments_count'] = 0
	metadata['comments'] = []

	if not 'slug' in metadata:
		logger.warning("pelican_comment_system: cant't locate comments files without slug tag in the article")
		return

	reader = MarkdownReader(gen.settings)
	comments = []
	replies = []
	folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], metadata['slug'])

	if not os.path.isdir(folder):
		logger.debug("No comments found for: " + metadata['slug'])
		return

	for file in os.listdir(folder):
		name, extension = os.path.splitext(file)
		if extension[1:].lower() in reader.file_extensions:
			content, meta = reader.read(folder + "/" + file)
			meta['locale_date'] = strftime(meta['date'], gen.settings['DEFAULT_DATE_FORMAT'])
			com = Comment(name, meta, content)
			if 'replyto' in meta:
				replies.append( com )
			else:
				comments.append( com )

	#TODO: Fix this O(n²) loop
	for reply in replies:
		for comment in chain(comments, replies):
			if comment.id == reply.metadata['replyto']:
				comment.addReply(reply)

	count = 0
	for comment in comments:
		comment.sortReplies()
		count += comment.countReplies()

	comments = sorted(comments)

	metadata['comments_count'] = len(comments) + count
	metadata['comments'] = comments

示例#17

0

显示文件

    def _create_pdf(self, obj, output_path):
        filename = obj.slug + '.pdf'
        output_pdf = os.path.join(output_path, filename)
        mdreader = MarkdownReader(self.settings)
        _, ext = os.path.splitext(obj.source_path)
        if ext == '.rst':
            with open(obj.source_path, encoding='utf-8') as f:
                text = f.read()
            header = ''
        elif ext[1:] in mdreader.file_extensions and mdreader.enabled:
            text, meta = mdreader.read(obj.source_path)
            header = ''

            if 'title' in meta:
                title = meta['title']
                header = title + '\n' + '#' * len(title) + '\n\n'
                del meta['title']

            for k in meta.keys():
                # We can't support all fields, so we strip the ones that won't
                # look good
                if k not in self.supported_md_fields:
                    del meta[k]

            header += '\n'.join([':%s: %s' % (k, meta[k]) for k in meta])
            header += '\n\n.. raw:: html\n\n\t'
            text = text.replace('\n', '\n\t')

            # rst2pdf casts the text to str and will break if it finds
            # non-escaped characters. Here we nicely escape them to XML/HTML
            # entities before proceeding
            text = text.encode('ascii', 'xmlcharrefreplace')
        else:
            # We don't support this format
            logger.warn('Ignoring unsupported file ' + obj.source_path)
            return

        logger.info(' [ok] writing %s' % output_pdf)
        self.pdfcreator.createPdf(text=(header+text),
                                  output=output_pdf)

示例#18

0

显示文件

文件： test_toc.py 项目： blinkov/pelican-toc

class TestToCGeneration(unittest.TestCase):

    def setUp(self):
        toc.init_default_config(None)
        self.settings = get_settings()
        self.md_reader = MarkdownReader(self.settings)

    def _handle_article_generation(self, path):
        content, metadata = self.md_reader.read(path)
        return Article(content=content, metadata=metadata)

    def _generate_toc(self, article_path, expected_path):
        result = self._handle_article_generation(article_path)
        toc.generate_toc(result)
        expected = ""
        with open(expected_path, 'r') as f:
            expected = f.read()
        return result, expected


    def test_toc_generation(self):
        result, expected = self._generate_toc(
                "test_data/article_with_headers.md",
                "test_data/article_with_headers_toc.html"
            )
        self.assertEqual(result.toc, expected)

    def test_toc_generation_nonascii(self):
        result, expected = self._generate_toc(
                "test_data/article_with_headers_nonascii.md",
                "test_data/article_with_headers_toc_nonascii.html"
            )
        self.assertEqual(result.toc, expected)

    def test_toc_generation_exclude_small_headers(self):
        self.settings['TOC']['EXCLUDE_SMALL_HEADERS'] = True
        result, expected = self._generate_toc(
                "test_data/article_with_headers_exclude_small_headers.md",
                "test_data/article_with_headers_toc_exclude_small_headers.html"
            )
        self.assertEqual(result.toc, expected)

    def test_no_toc_generation(self):
        article_without_headers_path = "test_data/article_without_headers.md"
        article_without_headers = self._handle_article_generation(
            article_without_headers_path)
        toc.generate_toc(article_without_headers)
        with self.assertRaises(AttributeError):
            self.assertIsNone(article_without_headers.toc)

示例#19

0

显示文件

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)
        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standards
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
        metadata['ipython'] = True
        # Convert ipython notebook to html
        config = Config({
            'CSSHTMLHeaderTransformer': {
                'enabled': True,
                'highlight_class': '.highlight-ipynb'
            }
        })
        exporter = HTMLExporter(config=config,
                                template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_filename(filepath)

        if BeautifulSoup:
            soup = BeautifulSoup(content)
            for i in soup.findAll("div", {"class": "input"}):
                if i.findChildren()[1].find(text='#ignore') is not None:
                    i.extract()
        else:
            soup = content

        # Process using Pelican HTMLReader
        content = '<body>{0}</body>'.format(
            soup)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \
          self.settings['IPYNB_USE_META_SUMMARY'] == False) or \
          'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
            metadata['summary'] = parser.summary

        def filter_css(style_text):
            '''
            HACK: IPython returns a lot of CSS including its own bootstrap.
            Get only the IPython Notebook CSS styles.
            '''
            index = style_text.find('/*!\n*\n* IPython notebook\n*\n*/')
            if index > 0:
                style_text = style_text[index:]
            index = style_text.find('/*!\n*\n* IPython notebook webapp\n*\n*/')
            if index > 0:
                style_text = style_text[:index]

            style_text = re.sub(r'color\:\#0+(;)?', '', style_text)
            style_text = re.sub(
                r'\.rendered_html[a-z0-9,._ ]*\{[a-z0-9:;%.#\-\s\n]+\}', '',
                style_text)

            return '<style type=\"text/css\">{0}</style>'.format(style_text)

        ipython_css = '\n'.join(
            filter_css(css_style) for css_style in info['inlining']['css'])
        body = ipython_css + body + LATEX_CUSTOM_SCRIPT

        return body, metadata

示例#20

0

显示文件

            'css_class': 'highlight'
        },
        'markdown.extensions.extra': {},
        'markdown.extensions.meta': {},
    },
    'output_format': 'html5',
}

THEME = "./Flex"
# does nothing?
# THEME_COLOR = 'light'

# defines order of page titles in the header
PAGE_ORDER_BY = 'page-order'

# prevent Pelican from reading files matching the following patterns
IGNORE_FILES = ['.#*', 'includes', 'templates', 'README.md']
# place files replacing theme templates in ./content/templates
THEME_TEMPLATES_OVERRIDES = ['./content/templates']
DIRECT_TEMPLATES = (('index', ))

# copied to /output without modification
STATIC_PATHS = ['images', 'css']

# render markdown contents from files in /content/includes and make
# accesible from INCLUDES variable in html templates
INCLUDES = {}
for fname in glob.glob('./content/includes/*.md'):
    pth = Path(fname)
    INCLUDES[pth.stem], _ = MarkdownReader(config).read(fname)

示例#21

0

显示文件

文件： test_toc.py 项目： blinkov/pelican-toc

 def setUp(self):
     toc.init_default_config(None)
     self.settings = get_settings()
     self.md_reader = MarkdownReader(self.settings)

示例#22

0

显示文件

    def read(self, filepath):
        metadata = {}
        metadata["jupyter_notebook"] = True
        start = 0
        end = None

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = os.path.splitext(filename)[0] + ".nbdata"
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # Found and .nbdata file
            # Process it using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        elif self.settings.get("IPYNB_MARKUP_USE_FIRST_CELL"):
            # No external .md file:
            # Load metadata from the first cell of the notebook file
            with open(filepath) as ipynb_file:
                nb_json = json.load(ipynb_file)

            metacell = "\n".join(nb_json["cells"][0]["source"])
            # Convert Markdown title and listings to standard metadata items
            metacell = re.sub(r"^#+\s+",
                              "title: ",
                              metacell,
                              flags=re.MULTILINE)
            metacell = re.sub(r"^\s*[*+-]\s+",
                              "",
                              metacell,
                              flags=re.MULTILINE)
            # Unfortunately we can not pass MarkdownReader an in-memory
            # string, so we have to work with a temporary file
            with tempfile.NamedTemporaryFile(
                    "w+", encoding="utf-8") as metadata_file:
                md_reader = MarkdownReader(self.settings)
                metadata_file.write(metacell)
                metadata_file.flush()
                _content, metadata = md_reader.read(metadata_file.name)
            # Skip metacell
            start = 1
        else:
            raise Exception(
                "Error processing f{filepath}: "
                "Could not find metadata in: .nbdata file or in the first cell of the notebook."
                "If this notebook is used with liquid tags then you can safely ignore this error."
            )

        if "subcells" in metadata:
            start, end = ast.literal_eval(metadata["subcells"])

        preprocessors = self.settings.get("IPYNB_PREPROCESSORS", [])
        template = self.settings.get("IPYNB_EXPORT_TEMPLATE", None)
        content, info = get_html_from_filepath(
            filepath,
            start=start,
            end=end,
            preprocessors=preprocessors,
            template=template,
            colorscheme=self.settings.get("IPYNB_COLORSCHEME"),
        )

        # Generate summary: Do it before cleaning CSS
        keys = [k.lower() for k in metadata.keys()]
        use_meta_summary = self.settings.get("IPYNB_GENERATE_SUMMARY", True)
        if "summary" not in keys and use_meta_summary:
            parser = MyHTMLParser(self.settings, filename)
            content = "<body>{0}</body>".format(content)
            parser.feed(content)
            parser.close()
            # content = parser.body
            metadata["summary"] = parser.summary

        # Write/fix content
        fix_css = self.settings.get("IPYNB_FIX_CSS", True)
        ignore_css = self.settings.get("IPYNB_SKIP_CSS", False)
        content = parse_css(content,
                            info,
                            fix_css=fix_css,
                            ignore_css=ignore_css)
        if self.settings.get("IPYNB_NB_SAVE_AS"):
            output_path = self.settings.get("OUTPUT_PATH")
            nb_output_fullpath = self.settings.get("IPYNB_NB_SAVE_AS").format(
                **metadata)
            nb_output_dir = os.path.join(output_path,
                                         os.path.dirname(nb_output_fullpath))
            if not os.path.isdir(nb_output_dir):
                os.makedirs(nb_output_dir, exist_ok=True)
            copyfile(filepath, os.path.join(output_path, nb_output_fullpath))
            metadata["nb_path"] = nb_output_fullpath
        return content, metadata

示例#23

0

显示文件

文件： markup.py 项目： danielfrg/pelican-ipynb

    def read(self, filepath):
        metadata = {}
        metadata['jupyter_notebook'] = True
        start = 0
        end = None

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = os.path.splitext(filename)[0] + '.nbdata'
        metadata_filepath = os.path.join(filedir, metadata_filename)
        
        if os.path.exists(metadata_filepath):
            # When metadata is in an external file, process the MD file using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # No external .md file: Load metadata from ipython notebook file
            with open(filepath) as ipynb_file:
                doc = json.load(ipynb_file)
            if self.settings.get('IPYNB_USE_METACELL'):
                # Option 2: Use metadata on the first notebook cell
                metacell = "\n".join(doc['cells'][0]['source'])
                # Convert Markdown title and listings to standard metadata items
                metacell = re.sub(r'^#+\s+', 'title: ', metacell, flags=re.MULTILINE)
                metacell = re.sub(r'^\s*[*+-]\s+', '', metacell, flags=re.MULTILINE)
                # Unfortunately we can not pass MarkdownReader an in-memory
                # string, so we have to work with a temporary file
                with tempfile.NamedTemporaryFile('w+', encoding='utf-8') as metadata_file:
                    md_reader = MarkdownReader(self.settings)
                    metadata_file.write(metacell)
                    metadata_file.flush()
                    _content, metadata = md_reader.read(metadata_file.name)
                # Skip metacell
                start = 1
            else:
                # Option 3: Read metadata from inside the notebook
                notebook_metadata = doc['metadata']
                # Change to standard pelican metadata
                for key, value in notebook_metadata.items():
                    key = key.lower()
                    if key in ("title", "date", "category", "tags", "slug", "author"):
                        metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception("Could not find metadata in `.nbdata` file or inside `.ipynb`")
            else:
                raise Exception("Could not find metadata in `.nbdata` file or inside `.ipynb` but found `.md` file, "
                      "assuming that this notebook is for liquid tag usage if true ignore this error")

        if 'subcells' in metadata:
            start, end = ast.literal_eval(metadata['subcells'])

        preprocessors = self.settings.get('IPYNB_PREPROCESSORS', [])
        template = self.settings.get('IPYNB_EXPORT_TEMPLATE', None)
        content, info = get_html_from_filepath(filepath,
                                               start=start, end=end,
                                               preprocessors=preprocessors,
                                               template=template,
                                            )

        # Generate summary: Do it before cleaning CSS
        use_meta_summary = self.settings.get('IPYNB_GENERATE_SUMMARY', True)
        if 'summary' not in keys and use_meta_summary:
            parser = MyHTMLParser(self.settings, filename)
            if isinstance(content, six.binary_type):
                # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8.
                content = '<body>{0}</body>'.format(content.decode("utf-8"))
            else:
                # Content already decoded
                content = '<body>{0}</body>'.format(content)
            parser.feed(content)
            parser.close()
            # content = parser.body
            metadata['summary'] = parser.summary

        # Write/fix content
        fix_css = self.settings.get('IPYNB_FIX_CSS', True)
        ignore_css = self.settings.get('IPYNB_SKIP_CSS', False)
        content = parse_css(content, info, fix_css=fix_css, ignore_css=ignore_css)
        if self.settings.get('IPYNB_NB_SAVE_AS'):
            output_path = self.settings.get('OUTPUT_PATH')
            nb_output_fullpath = self.settings.get('IPYNB_NB_SAVE_AS').format(**metadata)
            nb_output_dir = os.path.join(output_path, os.path.dirname(nb_output_fullpath))
            if not os.path.isdir(nb_output_dir):
                os.makedirs(nb_output_dir, exist_ok=True)
            copyfile(filepath, os.path.join(output_path, nb_output_fullpath))
            metadata['nb_path'] = nb_output_fullpath
        return content, metadata

示例#24

0

显示文件

import logging
logger = logging.getLogger(__name__)

import os
import codecs

from pelican.readers import MarkdownReader
from pelican.settings import DEFAULT_CONFIG

ARTICLE_DIR = 'content/articles'

# Iterate over all files in ARTICLE_DIR (recursively with walk)
# and try parsing them with pelicans' MarkdownReader
md_reader = MarkdownReader(DEFAULT_CONFIG)
for dirpath, subdirs, files in os.walk(ARTICLE_DIR):
    for file in files:
        logger.warn('[MD] Trying to parse {}...'.format(file))
        path = os.path.join(dirpath, file)
        md_reader.read(path)

示例#25

0

显示文件

文件： test_toc.py 项目： Alephsa/alephsa-blog

 def setUpClass(cls):
     toc.init_default_config(None)
     cls.settings = get_settings()
     cls.md_reader = MarkdownReader(cls.settings)

示例#26

0

显示文件

    def read(self, filepath):
        metadata = {}
        metadata['jupyter_notebook'] = True
        start = 0
        end = None

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = os.path.splitext(filename)[0] + '.nbdata'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        if os.path.exists(metadata_filepath):
            # When metadata is in an external file, process the MD file using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # No external .md file: Load metadata from ipython notebook file
            with open(filepath) as ipynb_file:
                doc = json.load(ipynb_file)
            if self.settings.get('IPYNB_USE_METACELL'):
                # Option 2: Use metadata on the first notebook cell
                metacell = "\n".join(doc['cells'][0]['source'])
                # Convert Markdown title and listings to standard metadata items
                metacell = re.sub(r'^#+\s+',
                                  'title: ',
                                  metacell,
                                  flags=re.MULTILINE)
                metacell = re.sub(r'^\s*[*+-]\s+',
                                  '',
                                  metacell,
                                  flags=re.MULTILINE)
                # Unfortunately we can not pass MarkdownReader an in-memory
                # string, so we have to work with a temporary file
                with tempfile.NamedTemporaryFile(
                        'w+', encoding='utf-8') as metadata_file:
                    md_reader = MarkdownReader(self.settings)
                    metadata_file.write(metacell)
                    metadata_file.flush()
                    _content, metadata = md_reader.read(metadata_file.name)
                # Skip metacell
                start = 1
            else:
                # Option 3: Read metadata from inside the notebook
                notebook_metadata = doc['metadata']
                # Change to standard pelican metadata
                for key, value in notebook_metadata.items():
                    key = key.lower()
                    if key in ("title", "date", "category", "tags", "slug",
                               "author"):
                        metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception(
                    "Could not find metadata in `.nbdata` file or inside `.ipynb`"
                )
            else:
                raise Exception(
                    "Could not find metadata in `.nbdata` file or inside `.ipynb` but found `.md` file, "
                    "assuming that this notebook is for liquid tag usage if true ignore this error"
                )

        if 'subcells' in metadata:
            start, end = ast.literal_eval(metadata['subcells'])

        preprocessors = self.settings.get('IPYNB_PREPROCESSORS', [])
        template = self.settings.get('IPYNB_EXPORT_TEMPLATE', None)
        content, info = get_html_from_filepath(
            filepath,
            start=start,
            end=end,
            preprocessors=preprocessors,
            template=template,
            colorscheme=self.settings.get('IPYNB_COLORSCHEME'),
        )

        # Generate summary: Do it before cleaning CSS
        use_meta_summary = self.settings.get('IPYNB_GENERATE_SUMMARY', True)
        if 'summary' not in keys and use_meta_summary:
            parser = MyHTMLParser(self.settings, filename)
            if isinstance(content, six.binary_type):
                # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8.
                content = '<body>{0}</body>'.format(content.decode("utf-8"))
            else:
                # Content already decoded
                content = '<body>{0}</body>'.format(content)
            parser.feed(content)
            parser.close()
            # content = parser.body
            metadata['summary'] = parser.summary

        # Write/fix content
        fix_css = self.settings.get('IPYNB_FIX_CSS', True)
        ignore_css = self.settings.get('IPYNB_SKIP_CSS', False)
        content = parse_css(content,
                            info,
                            fix_css=fix_css,
                            ignore_css=ignore_css)
        if self.settings.get('IPYNB_NB_SAVE_AS'):
            output_path = self.settings.get('OUTPUT_PATH')
            nb_output_fullpath = self.settings.get('IPYNB_NB_SAVE_AS').format(
                **metadata)
            nb_output_dir = os.path.join(output_path,
                                         os.path.dirname(nb_output_fullpath))
            if not os.path.isdir(nb_output_dir):
                os.makedirs(nb_output_dir, exist_ok=True)
            copyfile(filepath, os.path.join(output_path, nb_output_fullpath))
            metadata['nb_path'] = nb_output_fullpath
        return content, metadata

示例#27

0

显示文件

文件： ipythonnb.py 项目： anemes/pelican-ipythonnb

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standars
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
            metadata['ipython'] = True

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderTransformer': {'enabled': True,
                         'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(config=config, template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_filename(filepath)

        soup = BeautifulSoup(content)

        # find all the inputs with hide and strip them out
        inputs = 0
        for i in soup.findAll("div", {"class" : "input"}):
            if i.findChildren()[1].findChild().findChild().findChild().find(text='#HIDE') != None:
                i.extract()
            else:
                inputs = inputs+1
                tag = soup.new_tag('a', href="javascript:toggle('input%s');" % inputs , target='_self')
                tag.string = i.findChildren()[0].text.strip()
                i.findChildren()[0].clear()
                i.findChildren()[0].append(tag)
                try:
                    i.find("div", {"class" :"input_area"})['id'] = 'input%s' %inputs
                except:
                    print i


        # Process using Pelican HTMLReader
        content = '<body>{0}</body>'.format(unicode(soup))  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        metadata['summary'] = summary

        # Remove some CSS styles, so it doesn't break the themes.
        def filter_tags(style_text):
            style_list = style_text.split('\n')
            exclude = ['body','p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li',
                       '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text',
                       'code', 'pre', 'div.text_cell_render','body']
            style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0]
            ans = '\n'.join(style_list)
            return '<style type=\"text/css\">{0}</style>'.format(ans)

        css = '\n'.join(filter_tags(css) for css in info['inlining']['css'])
        css = CUSTOM_CSS
        body = css + body
        body = body + js
        return body, metadata

示例#28

0

显示文件

文件： markup.py 项目： aakinlalu/jupyter-blog

    def read(self, filepath):
        metadata = {}
        metadata['ipython'] = True
        start = 0
        end = None

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = os.path.splitext(filename)[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # When metadata is in a external file, process using Pelican MD Reader
        md_reader = MarkdownReader(self.settings)

        if os.path.exists(metadata_filepath):
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            with open(filepath) as ipynb_file:
                doc = json.load(ipynb_file)
            if self.settings.get('IPYNB_USE_METACELL'):
                metacell = "\n".join(doc['cells'][0]['source'])
                # Convert Markdown title and listings to standard metadata items
                metacell = re.sub(r'^#+\s+',
                                  'title: ',
                                  metacell,
                                  flags=re.MULTILINE)
                metacell = re.sub(r'^\s*[*+-]\s+',
                                  '',
                                  metacell,
                                  flags=re.MULTILINE)
                # Unfortunately we can not pass MarkdownReader an in-memory
                # string, so we have to work with a temporary file
                with tempfile.NamedTemporaryFile(
                        'w+', encoding='utf-8') as metadata_file:
                    metadata_file.write(metacell)
                    metadata_file.flush()
                    _content, metadata = md_reader.read(metadata_file.name)
                # Skip metacell
                start = 1
            else:
                notebook_metadata = doc['metadata']
                # Change to standard pelican metadata
                for key, value in notebook_metadata.items():
                    key = key.lower()
                    if key in ("title", "date", "category", "tags", "slug",
                               "author"):
                        metadata[key] = self.process_metadata(key, value)

        keys = [k.lower() for k in metadata.keys()]
        if not set(['title', 'date']).issubset(set(keys)):
            # Probably using ipynb.liquid mode
            md_filename = filename.split('.')[0] + '.md'
            md_filepath = os.path.join(filedir, md_filename)
            if not os.path.exists(md_filepath):
                raise Exception(
                    "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file."
                )
            else:
                raise Exception(
                    "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, "
                    "assuming that this notebook is for liquid tag usage if true ignore this error"
                )

        if 'subcells' in metadata:
            start, end = ast.literal_eval(metadata['subcells'])

        content, info = get_html_from_filepath(
            filepath,
            preprocessors=self.settings.get('IPYNB_PREPROCESSORS', []),
            start=start,
            end=end,
            template=self.settings.get('IPYNB_EXPORT_TEMPLATE'))

        # Generate Summary: Do it before cleaning CSS
        if 'summary' not in keys:
            parser = MyHTMLParser(self.settings, filename)
            if isinstance(
                    content, six.binary_type
            ):  # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str)
                # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8.
                content = '<body>{0}</body>'.format(content.decode("utf-8"))
            else:
                # Content already decoded
                content = '<body>{0}</body>'.format(content)
            parser.feed(content)
            parser.close()
            content = parser.body
            if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \
              self.settings['IPYNB_USE_META_SUMMARY'] == False) or \
              'IPYNB_USE_META_SUMMARY' not in self.settings.keys():
                metadata['summary'] = parser.summary

        ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys(
        ) else False
        content = fix_css(content, info, ignore_css=ignore_css)
        return content, metadata

示例#29

0

显示文件

文件： allPythonContent.py 项目： Mondego/pyreco

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split(".")[0] + ".ipynb-meta"
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)["metadata"]

            # Fix metadata to pelican standards
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
            metadata["ipython"] = True

        # Convert ipython notebook to html
        config = Config({"CSSHTMLHeaderTransformer": {"enabled": True, "highlight_class": ".highlight-ipynb"}})
        exporter = HTMLExporter(config=config, template_file="basic", filters={"highlight2html": custom_highlighter})

        content, info = exporter.from_filename(filepath)

        # Process using Pelican HTMLReader
        content = "<body>{0}</body>".format(content)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        metadata["summary"] = summary

        # Remove some CSS styles, so it doesn't break the themes.
        def filter_tags(style_text):
            style_list = style_text.split("\n")
            exclude = [
                "p",
                "h1",
                "h2",
                "h3",
                "h4",
                "h5",
                "h6",
                "a",
                "ul",
                "ol",
                "li",
                ".rendered_html",
                "@media",
                ".navbar",
                "nav.navbar",
                ".navbar-text",
                "code",
                "pre",
                "div.text_cell_render",
            ]
            style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0]
            ans = "\n".join(style_list)
            return '<style type="text/css">{0}</style>'.format(ans)

        css = "\n".join(filter_tags(css) for css in info["inlining"]["css"])
        css = css + CUSTOM_CSS
        body = css + body

        return body, metadata

示例#30

0

显示文件

文件： ipynb.py 项目： TheNeuralBit/theneuralbit

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # Load metadata
        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, metadata = md_reader.read(metadata_filepath)
        else:
            # Load metadata from ipython notebook file
            ipynb_file = open(filepath)
            metadata = json.load(ipynb_file)['metadata']

            # Fix metadata to pelican standards
            for key, value in metadata.items():
                del metadata[key]
                key = key.lower()
                metadata[key] = self.process_metadata(key, value)
        metadata['ipython'] = True

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderTransformer': {'enabled': True,
                         'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(config=config, template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_filename(filepath)

        if BeautifulSoup:
            soup = BeautifulSoup(content)
            for i in soup.findAll("div", {"class" : "input"}):
                if i.findChildren()[1].find(text='#ignore') is not None:
                    i.extract()
        else:
            soup = content

        # Process using Pelican HTMLReader
        content = '<body>{0}</body>'.format(soup)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        metadata['summary'] = summary

        def filter_css(style_text):
            '''
            HACK: IPython returns a lot of CSS including its own bootstrap.
            Get only the IPython Notebook CSS styles.
            '''
            index = style_text.find('/*!\n*\n* IPython notebook\n*\n*/')
            if index > 0:
                style_text = style_text[index:]
            index = style_text.find('/*!\n*\n* IPython notebook webapp\n*\n*/')
            if index > 0:
                style_text = style_text[:index]

            style_text = re.sub(r'color\:\#0+(;)?', '', style_text)
            style_text = re.sub(r'\.rendered_html[a-z0-9 ]*\{[a-z0-9:;%.#\-\s\n]+\}', '', style_text)

            return '<style type=\"text/css\">{0}</style>'.format(style_text)

        ipython_css = '\n'.join(filter_css(css_style) for css_style in info['inlining']['css'])
        body = ipython_css + body + LATEX_CUSTOM_SCRIPT

        return body, metadata

示例#31

0

显示文件

    def read(self, filepath):
        metadata = {}

        # Files
        filedir = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        metadata_filename = filename.split('.')[0] + '.ipynb-meta'
        metadata_filepath = os.path.join(filedir, metadata_filename)

        # If filename starts with draft, set the status accordingly
        if filename.lower().startswith('draft'):
            metadata['status'] = 'draft'

        with open(filepath) as f:
            nb = nbformat.read(f, 'ipynb') # readin ipynb content

        first_cell = nb.worksheets[0].cells[0]

        # Read in metadata
        metadata = join_metadata(metadata, nb.metadata)

        if 'pelican' in first_cell.metadata:
            m = first_cell.metadata['pelican']
            metadata = join_metadata(metadata, m)

        if os.path.exists(metadata_filepath):
            # Metadata is on a external file, process using Pelican MD Reader
            md_reader = MarkdownReader(self.settings)
            _content, m = md_reader.read(metadata_filepath)
            metadata = join_metadata(metadata, m)

        # Reformat metadata into pelican acceptable format
        for k, v in metadata.items():
            del metadata[k]
            k = k.lower()
            metadata[k] = self.process_metadata(k, v)

        metadata['ipython'] = True

        # use first cell as the title if flag is set
        field = 'IPYNB_FIRST_CELL_HEADING_AS_TITLE'
        if self.settings.get(field, False) and first_cell.cell_type == 'heading':
            metadata['title'] = first_cell.source
            # Truncate the first cell from notebook
            nb.worksheets[0].cells = nb.worksheets[0].cells[1:]

        # Convert ipython notebook to html
        config = Config({'CSSHTMLHeaderPreprocessor': {'enabled': True,
                         'highlight_class': '.highlight-ipynb'}})
        exporter = HTMLExporter(config=config, template_file='basic',
                                filters={'highlight2html': custom_highlighter})

        content, info = exporter.from_notebook_node(nb)

        if BeautifulSoup:
            soup = BeautifulSoup(content)
            for i in soup.findAll("div", {"class" : "input"}):
                if i.findChildren()[1].find(text='#ignore') is not None:
                    i.extract()
        else:
            soup = content

        content = '<body>{0}</body>'.format(soup)  # So Pelican HTMLReader works
        parser = MyHTMLParser(self.settings, filename)
        parser.feed(content)
        parser.close()
        body = parser.body
        summary = parser.summary

        field = 'IPYNB_FIRST_CONTENT_AS_SUMMARY'
        first_cell = nb.worksheets[0].cells[0]
        if self.settings.get(field, False) and first_cell.cell_type == 'markdown':
            raw = nb.worksheets[0].cells[0].source
            md = markdown.Markdown()
            metadata['summary'] = md.convert(raw)
        else:
            metadata['summary'] = summary

        # Remove some CSS styles, so it doesn't break the theme.
        def filter_tags(style_text):
            style_list = style_text.split('\n')
            exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li',
                       '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text',
                       'code', 'pre', 'div.text_cell_render']
            style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0]
            ans = '\n'.join(style_list)
            return '<style type=\"text/css\">{0}</style>'.format(ans)

        css = '\n'.join(filter_tags(css) for css in info['inlining']['css'])
        css = css + CUSTOM_CSS
        body = css + body

        return body, metadata