Python publish_from_doctree示例，docutils.core.publish_from_doctree Python示例

示例#1

0

显示文件

文件： plugin_catalog.py 项目： AG4GitHub/leo

    def run(self):
        """run with the supplied options, see make_parser()"""

        opt = self.opt

        doc_strings = self.get_doc_strings()

        big_doc = self.make_document(doc_strings)

        settings_overrides = {}
        if opt.css_file:
            settings_overrides['stylesheet_path'] = opt.css_file

        open(opt.output, 'wb').write(
            publish_from_doctree(big_doc,
                                 writer_name='html',
                                 settings_overrides=settings_overrides))
        err("Wrote '%s'\n" % opt.output)

        if opt.xml_output:
            open(opt.xml_output, 'wb').write(
                publish_from_doctree(big_doc,
                                     writer_name='xml',
                                     settings_overrides={'indents': True}))
            err("Wrote '%s'\n" % opt.xml_output)

示例#2

0

显示文件

def generate_documentation(mapping: Dict[str, List[Dict]], items):
    docs = {}

    for url, sections in mapping.items():
        result = requests.get(url)
        doctree = publish_doctree(source=result.text)
        # logger.debug("%s", publish_from_doctree(document=doctree).decode("utf8"))

        for section in sections:
            name = section["name"]
            anchor = section["source"].split("#")[1]

            item = items.get(name, None)
            if not item:
                logger.warning("Skipping '%s', missing implementation", name)
                continue

            subtree = list(doctree.traverse(condition=find_section(anchor)))[0]

            document = new_document(f"{url}" + f"#{anchor}")
            document += subtree

            logger.debug(
                "%s",
                publish_from_doctree(document=document).decode("utf8"))
            section.update(
                publish_from_doctree(document=document, writer=MdWriter(url)))

            mod, impl_name = get_impl_name(item)
            dotted_name = f"{mod}.{impl_name}"
            docs[f"{name}({dotted_name})"] = section

    return docs

示例#3

0

显示文件

文件： plugin_catalog.py 项目： chiamingyen/misc

    def run(self):

        """run with the supplied options, see make_parser()"""

        opt = self.opt  

        doc_strings = self.get_doc_strings()

        big_doc = self.make_document(doc_strings)

        settings_overrides = {}
        if opt.css_file:
            settings_overrides['stylesheet_path'] = opt.css_file

        open(opt.output, 'wb').write(
            publish_from_doctree(big_doc, writer_name='html',
                settings_overrides = settings_overrides)
        )
        err("Wrote '%s'\n" % opt.output)

        if opt.xml_output:
            open(opt.xml_output, 'wb').write(
                publish_from_doctree(big_doc, writer_name='xml',
                    settings_overrides = {'indents': True})
            )
            err("Wrote '%s'\n" % opt.xml_output)

示例#4

0

显示文件

文件： test_rst.py 项目： cadithealth/pyramid_describe

  def test_ids_generated(self):
    from docutils import utils, nodes
    from docutils.core import publish_from_doctree
    doc = utils.new_document('<program>')
    docsect = nodes.section('')
    docsect['classes'] = ('c1 c2',)
    docsect['ids'] = ('my-test-id',)
    docsect['target-ids'] = ('my-test-id',)
    docsect.append(nodes.title('', '', nodes.Text('Title')))
    docsect.append(nodes.paragraph('', '', nodes.Text('some text.')))
    docsect.append(
      nodes.section(
        '',
        nodes.title('', '', nodes.Text('Sub-Title')),
        nodes.paragraph('', '', nodes.Text('some more text'))))
    doc.append(docsect)
    chk = '''\
.. class:: c1 c2

.. _`my-test-id`:

======
Title
======

some text.

---------
Sub-Title
---------

some more text
'''
    out = publish_from_doctree(doc, writer=rst.Writer())
    self.assertMultiLineEqual(out, chk)

示例#5

0

显示文件

文件： test_rst.py 项目： cadithealth/pyramid_describe

  def test_meta(self):
    from docutils import utils, nodes
    from docutils.parsers.rst.directives.html import MetaBody
    from docutils.core import publish_from_doctree
    doc = utils.new_document('<program>')
    doc.append(nodes.title('', '', nodes.Text('Title')))
    doc.append(nodes.paragraph('', '', nodes.Text('some text.')))
    doc.append(MetaBody('').meta('', name='title', content='Title'))
    doc.append(MetaBody('').meta('', name='generator', content='pyramid_describe/0.0.0'))
    doc.append(MetaBody('').meta('', name='location', content='http://example.com/'))
    doc.append(MetaBody('').meta('', name='one-digit', content='3'))
    chk = '''\
======
Title
======

some text.

.. meta::
    :title: Title
    :generator: pyramid_describe/0.0.0
    :location: http://example.com/
    :one-digit: 3
'''
    out = publish_from_doctree(
      doc, writer=rst.Writer(), settings_overrides={'explicit_title': False})
    self.assertMultiLineEqual(out, chk)

示例#6

0

显示文件

文件： sphinx_xwiki_builder.py 项目： Abstrys/sphinx-xwiki-builder

    def write_doc(self, docname: str, doctree: Node) -> None:
        # get the output from the writer
        writer_output = publish_from_doctree(doctree, writer=self.writer)

        # make it easy to write code that uses config values (we're going to do that more than a few
        # times...)
        config = self.app.config

        # choose the output filename (either snake2camel, or through the xwiki_page_name_overrides
        # mapping)
        output_filename = None
        if (hasattr(config, 'xwiki_page_name_overrides')
            and (config.xwiki_page_name_overrides != None)
            and (docname in config.xwiki_page_name_overrides.keys())):
            # if there was an override, then use it.
            output_filename = os.path.join(self.outdir, config.xwiki_page_name_overrides[docname])
        else:
            # otherwise, do the standard snake2camel mapping.
            output_filename = os.path.join(self.outdir, snake2camel(docname) + ".xwiki")

        # check if there's a jinja template. If there is, then pass the page output through that
        # first.
        if hasattr(self, 'page_template'):
            writer_output = self.page_template.render(docname=docname, page_contents=writer_output)

        # write the file.
        output_file = codecs.open(output_filename, 'w', encoding="utf-8")
        output_file.write(writer_output.decode("utf-8"))
        output_file.close()

示例#7

0

显示文件

文件： test_publisher.py 项目： pombredanne/docutils

    def test_publish_pickle(self):
        # Test publishing a document tree with pickling and unpickling.

        # Produce the document tree.
        doctree = core.publish_doctree(
            source=test_document, reader_name="standalone", parser_name="restructuredtext", settings_spec=self
        )
        self.assertTrue(isinstance(doctree, nodes.document))

        # Pickle the document.  Note: if this fails, some unpickleable
        # reference has been added somewhere within the document tree.
        # If so, you need to fix that.
        #
        # Note: Please do not remove this test, this is an important
        # requirement, applications will be built on the assumption
        # that we can pickle the document.

        # Remove the reporter and the transformer before pickling.
        doctree.reporter = None
        doctree.transformer = None

        doctree_pickled = pickle.dumps(doctree)
        self.assertTrue(isinstance(doctree_pickled, bytes))
        del doctree

        # Unpickle the document.
        doctree_zombie = pickle.loads(doctree_pickled)
        self.assertTrue(isinstance(doctree_zombie, nodes.document))

        # Write out the document:
        output = core.publish_from_doctree(doctree_zombie, writer_name="pseudoxml", settings_spec=self)
        self.assertEqual(output, pseudoxml_output)

示例#8

0

显示文件

文件： read_rst.py 项目： suaannihilant/webber

def htmlize(params):
    "Parse text as RST and convert it to HTML"

    file = params.file
    if not file.rel_path.endswith(".rst"):
        return

    contents = file.contents

    settings = {
        # cloak email addresses to reduce spam
        'cloak_email_addresses': 1,
        # Emit headers as H2, because H1 is already used
        'doctitle_xform': False,
        'strip_comments': 'true',
        #'dump_pseudo_xml': 'true',
        #'dump_settings': 'true',
        #'dump_transforms': 'true',
        # TODO: language_code?
    }
    # http://docutils.sourceforge.net/docs/dev/hacking.html
    # /usr/share/doc/python-docutils/
    document = core.publish_doctree(source_path=params.file.rel_path,
                                    source=contents,
                                    settings_overrides=settings)
    return core.publish_from_doctree(document,
                                     writer=WebWriter(),
                                     writer_name='html',
                                     destination_path=params.file.rel_path,
                                     settings_overrides=settings)

示例#9

0

显示文件

文件： rst.py 项目： cadithealth/pyramid_describe

def rst2html(data, text):

  css = [
    urllib.parse.quote('data:text/css;charset=UTF-8,' + resolvecss(data, e))
    for e in aslist(data.options.cssPath or '')]
  # todo: add the docutils default css as well...

  settings = dict(
    # input_encoding     = 'UTF-8',
    output_encoding      = data.options.encoding,
    embed_stylesheet     = data.options.cssEmbed,
    stylesheet_path      = css,
    doctitle_xform       = False,
    sectsubtitle_xform   = False,
    )

  pub = core.Publisher(None, None, None,
                       source_class=io.StringInput,
                       destination_class=io.NullOutput)
  pub.set_components('standalone', 'restructuredtext', 'html')
  pub.process_programmatic_settings(None, settings, None)
  pub.set_source(text, None)
  pub.set_destination(None, None)
  pub.publish(enable_exit_status=False)

  doc = pub.document
  doc.walk(HtmlDoctreeFixer(doc))
  doc = runFilters(data.options.filters, doc, data)

  html = core.publish_from_doctree(
    pub.document, writer_name='html', settings_overrides=settings)

  return html

示例#10

0

显示文件

文件： test_publisher.py 项目： danielhanold/dummy-repo

    def test_publish_pickle(self):
        # Test publishing a document tree with pickling and unpickling.

        # Produce the document tree.
        doctree = core.publish_doctree(source=test_document,
                                       reader_name='standalone',
                                       parser_name='restructuredtext',
                                       settings_spec=self)
        self.assertTrue(isinstance(doctree, nodes.document))

        # Pickle the document.  Note: if this fails, some unpickleable
        # reference has been added somewhere within the document tree.
        # If so, you need to fix that.
        #
        # Note: Please do not remove this test, this is an important
        # requirement, applications will be built on the assumption
        # that we can pickle the document.

        # Remove the reporter and the transformer before pickling.
        doctree.reporter = None
        doctree.transformer = None

        doctree_pickled = pickle.dumps(doctree)
        self.assertTrue(isinstance(doctree_pickled, bytes))
        del doctree

        # Unpickle the document.
        doctree_zombie = pickle.loads(doctree_pickled)
        self.assertTrue(isinstance(doctree_zombie, nodes.document))

        # Write out the document:
        output = core.publish_from_doctree(doctree_zombie,
                                           writer_name='pseudoxml',
                                           settings_spec=self)
        self.assertEqual(output, pseudoxml_output)

示例#11

0

显示文件

文件： read_rst.py 项目： schors/webber

def htmlize(params):
	"Parse text as RST and convert it to HTML"

	file = params.file
	if not file.rel_path.endswith(".rst"):
		return

	contents = file.contents

	settings = {
		# cloak email addresses to reduce spam
		'cloak_email_addresses': 1,
		# Emit headers as H2, because H1 is already used
		'doctitle_xform': False,
		'strip_comments': 'true',
		#'dump_pseudo_xml': 'true',
		#'dump_settings': 'true',
		#'dump_transforms': 'true',
		# TODO: language_code?
		}
	# http://docutils.sourceforge.net/docs/dev/hacking.html
	# /usr/share/doc/python-docutils/
	document = core.publish_doctree(
		source_path=params.file.rel_path,
		source=contents,
		settings_overrides=settings)
	return core.publish_from_doctree(document,
		writer=WebWriter(),
		writer_name='html',
		destination_path=params.file.rel_path,
		settings_overrides=settings)

示例#12

0

显示文件

def spec_show(name, txt=False):
    # check if that file actually exists
    path = safe_join(SPEC_DIR, name + '.rst')
    if not os.path.exists(path):
        abort(404)

    # read file
    with codecs.open(path, encoding='utf-8') as fd:
        content = fd.read()

    if txt:
        # Strip out RST
        content = content.replace('.. meta::\n', '')
        content = content.replace('.. contents::\n\n', '')
        content = content.replace('.. raw:: html\n\n', '')
        content = content.replace('\n.. [', '\n[')
        content = content.replace(']_.', '].')
        content = content.replace(']_', '] ')
        # Change highlight formatter
        content = content.replace('{% highlight',
                                  "{% highlight formatter='textspec'")
        # Other string changes
        content = content.replace('    :accuratefor', '- Accurate for')
        content = content.replace('    :category', '- Category')
        content = content.replace('    :lastupdated', '- Last updated')

    # render the post with Jinja2 to handle URLs etc.
    rendered_content = render_template_string(content)
    rendered_content = rendered_content.replace('</pre></div>',
                                                '  </pre></div>')

    if txt:
        # Send response
        r = make_response(rendered_content)
        r.mimetype = 'text/plain'
        return r

    # Render the ToC
    doctree = publish_doctree(source=rendered_content)
    bullet_list = doctree[1][1]
    doctree.clear()
    doctree.append(bullet_list)
    toc = publish_from_doctree(doctree, writer_name='html')

    # Remove the ToC from the main document
    rendered_content = rendered_content.replace('.. contents::\n', '')

    # publish the spec with docutils
    parts = publish_parts(source=rendered_content,
                          source_path=SPEC_DIR,
                          writer_name="html")
    meta = get_metadata_from_meta(parts['meta'])

    return render_template('spec/show.html',
                           title=parts['title'],
                           toc=toc,
                           body=parts['fragment'],
                           name=name,
                           meta=meta)

示例#13

0

显示文件

文件： tests.py 项目： lovelysystems/lovely.pyrest

def render_doc_node(node, writer_name='pseudoxml'):
    """ Renderers a docutils node """
    # Create an empty document
    doc = publish_doctree("")
    # append the node
    doc.children.append(node)
    # publish the document and return the output
    return publish_from_doctree(doc, writer_name=writer_name)

示例#14

0

显示文件

文件： test_nodes.py 项目： Akasurde/pylatest

def publish_pseudoxml(doctree):
    """
    Returns string with pseudo xml rendering of ``doctree``.
    """
    output = publish_from_doctree(
        doctree,
        settings_overrides={'output_encoding': 'unicode'},)
    return output

示例#15

0

显示文件

文件： __init__.py 项目： vilos/rstlibrary

def publish(doctree, template = 'template.txt'):
    output_settings = dict(output_encoding='unicode',
                           template=os.path.join(os.path.dirname(__file__), template))
    contents = publish_from_doctree(
            doctree,
            writer = HTMLWriter(),
            settings_overrides=output_settings
            )
    return contents

示例#16

0

显示文件

文件： utils.py 项目： sphinx-contrib/restbuilder

def run_parse_test(src_dir, expected_dir, output_dir, subdir, files):
    src_dir = join(src_dir, subdir)
    expected_dir = join(expected_dir, subdir)
    output_dir = join(output_dir, subdir)
    build_sphinx(src_dir, output_dir, files)

    for file in files:
        output_doc = parse_doc(output_dir, file)
        expected_doc = parse_doc(expected_dir, file)
        try:
            assert_doc_equal(output_doc, expected_doc)
        except AssertionError:
            # output XML version of doctree for easier debugging
            with open(join(output_dir, file + '.output.xml'), 'wb') as fw:
                fw.write(publish_from_doctree(output_doc, writer_name='xml'))
            with open(join(output_dir, file + '.expected.xml'), 'wb') as fw:
                fw.write(publish_from_doctree(expected_doc, writer_name='xml'))
            raise

示例#17

0

显示文件

def publish_pseudoxml(doctree):
    """
    Returns string with pseudo xml rendering of ``doctree``.
    """
    output = publish_from_doctree(
        doctree,
        settings_overrides={'output_encoding': 'unicode'},
    )
    return output

示例#18

0

显示文件

def convert_to_html(doc, css_selector=None):
    """Converts docutils document to HTML and select part of it with CSS
    selector.
    """
    html_str = publish_from_doctree(doc, writer_name="html").decode("utf-8")
    html_node = BeautifulSoup(html_str, features="html.parser")

    if css_selector is not None:
        html_node = html_node.select_one(css_selector)

    return html_node

示例#19

0

显示文件

文件： test_rst.py 项目： cadithealth/pyramid_describe

 def test_altered_title(self):
   from docutils import utils, nodes
   from docutils.core import publish_from_doctree
   doc = utils.new_document('<program>')
   doc['title']   = 'Altered Title'
   doc.append(nodes.title('', '', nodes.Text('Title')))
   doc.append(nodes.paragraph('', '', nodes.Text('some text.')))
   chk = '.. title:: Altered Title\n\n======\nTitle\n======\n\nsome text.\n'
   out = publish_from_doctree(
     doc, writer=rst.Writer(), settings_overrides={'explicit_title': False})
   self.assertMultiLineEqual(out, chk)

示例#20

0

显示文件

文件： test_nodes.py 项目： Akasurde/pylatest

def publish_html(doctree):
    """
    Returns string with html rendering of ``doctree``.
    """
    output = publish_from_doctree(
        doctree,
        writer_name='html',
        settings_overrides={
            # 'output_encoding': 'unicode',
            'stylesheet_path': None,
            })
    return output

示例#21

0

显示文件

def publish_html(doctree):
    """
    Returns string with html rendering of ``doctree``.
    """
    output = publish_from_doctree(
        doctree,
        writer_name='html',
        settings_overrides={
            # 'output_encoding': 'unicode',
            'stylesheet_path': None,
        })
    return output

示例#22

0

显示文件

文件： html.py 项目： pombredanne/rst2a

def doctree_to_html(doctree, stylesheet_url='',
    settings=DEFAULT_HTML_OVERRIDES, tidy_output=True,
    tidy_settings=DEFAULT_TIDY_HTML_OPTIONS, *args, **kwargs):
    conversion_settings = copy(settings)
    if tidy is None:
        tidy_output = False
    if is_url(stylesheet_url, net_loc=('http', 'ftp')):
        conversion_settings['stylesheet-path'] = stylesheet_url
    html_string = publish_from_doctree(doctree, writer_name='html4css1',
        settings_overrides=conversion_settings, *args, **kwargs)
    if tidy_output:
        html_string = str(tidy.parseString(html_string, **tidy_settings))
    return html_string

示例#23

0

显示文件

def generate_documentation(url, mapping, items):
    docs = {}
    license_url = "https://docutils.sourceforge.io/docs/"

    with urlopen(url) as request:
        text = request.read()
        doctree = publish_doctree(source=text)

        for anchor, name in mapping.items():
            name = anchor if name == "-" else name
            item = items.get(name, None)

            subtree = list(doctree.traverse(condition=find_section(anchor)))[0]
            section_url = f"{url}#{anchor}".replace(".txt", ".html")

            document = new_document(section_url)
            document += subtree

            logger.debug(
                "%s",
                publish_from_doctree(document=document).decode("utf8"))
            documentation = publish_from_doctree(document=document,
                                                 writer=Directive(
                                                     url, section_url,
                                                     license_url))

            if isinstance(item, tuple):
                item = resolve_directive(item)

            if item:
                try:
                    dotted_name = f"{item.__module__}.{item.__name__}"
                except AttributeError:
                    dotted_name = f"{item.__module__}.{item.__class__.__name__}"

                docs[f"{name}({dotted_name})"] = documentation

    return docs

示例#24

0

显示文件

文件： views.py 项目： openbazarrclone/i2p.www

def spec_show(name, txt=False):
    # check if that file actually exists
    path = safe_join(SPEC_DIR, name + '.rst')
    if not os.path.exists(path):
        abort(404)

    # read file
    with codecs.open(path, encoding='utf-8') as fd:
        content = fd.read()

    if txt:
        # Strip out RST
        content = content.replace('.. meta::\n', '')
        content = content.replace('.. contents::\n\n', '')
        content = content.replace('.. raw:: html\n\n', '')
        content = content.replace('\n.. [', '\n[')
        content = content.replace(']_.', '].')
        content = content.replace(']_', '] ')
        # Change highlight formatter
        content = content.replace('{% highlight', "{% highlight formatter='textspec'")
        # Other string changes
        content = content.replace('    :accuratefor', '- Accurate for')
        content = content.replace('    :category', '- Category')
        content = content.replace('    :lastupdated', '- Last updated')

    # render the post with Jinja2 to handle URLs etc.
    rendered_content = render_template_string(content)
    rendered_content = rendered_content.replace('</pre></div>', '  </pre></div>')

    if txt:
        # Send response
        r = make_response(rendered_content)
        r.mimetype = 'text/plain'
        return r

    # Render the ToC
    doctree = publish_doctree(source=rendered_content)
    bullet_list = doctree[1][1]
    doctree.clear()
    doctree.append(bullet_list)
    toc = publish_from_doctree(doctree, writer_name='html')

    # Remove the ToC from the main document
    rendered_content = rendered_content.replace('.. contents::\n', '')

    # publish the spec with docutils
    parts = publish_parts(source=rendered_content, source_path=SPEC_DIR, writer_name="html")
    meta = get_metadata_from_meta(parts['meta'])

    return render_template('spec/show.html', title=parts['title'], toc=toc, body=parts['fragment'], name=name, meta=meta)

示例#25

0

显示文件

文件： latex.py 项目： pombredanne/rst2a

def doctree_to_latex(doctree, img_localizer, stylesheet_url="", settings=DEFAULT_LATEX_OVERRIDES, *args, **kwargs):
    cleanup_stylesheet = False
    if not isfile(str(stylesheet_url)) or is_filelike(stylesheet_url):
        stylesheet_url = create_temp_file(stylesheet_url, suffix=".tex")
        cleanup_stylesheet = True
    conversion_settings = copy(settings)
    conversion_settings["stylesheet-path"] = stylesheet_url
    img_localizer.localize_images(doctree)
    latex_string = publish_from_doctree(
        doctree, writer_name="latex", settings_overrides=conversion_settings, *args, **kwargs
    )
    temp_files = sorted(list(set(img_localizer.values())))
    if cleanup_stylesheet:
        temp_files = [stylesheet_path] + temp_files
    return latex_string, temp_files

示例#26

0

显示文件

 def test_publish_from_doctree(self):
     """Ignore the Docutils-generated ToC, when ``use_latex_toc``
     is True. (This did happen when publishing from a doctree.)
     """
     settings_overrides = {
         'output_encoding': 'unicode',
         '_disable_config': True,
     }
     doctree = core.publish_doctree(contents_test_input,
                                    settings_overrides=settings_overrides)
     result = core.publish_from_doctree(
         doctree,
         writer_name='latex',
         settings_overrides=settings_overrides)
     self.assertNotIn(r'\item \hyperref[foo]{foo}', result)

示例#27

0

显示文件

def document2rst(doc):
    '''
  Serializes the docutils Doctree structure `doc` to a
  reStructuredText-formatted text string.
  '''
    if not doc:
        return ''
    from pyramid_describe.writers.rst import Writer
    settings = dict(
        doctitle_xform=False,
        sectsubtitle_xform=False,
    )
    return publish_from_doctree(doc,
                                writer=Writer(),
                                settings_overrides=settings)

示例#28

0

显示文件

文件： describer.py 项目： cadithealth/pyramid_describe

 def render_rst(self, data):
   doc = self.doctree_render(data)
   # todo: should this runFilters be moved int doctree_render?...
   #       currently it is only being called from here, so not much
   #       of an issue, but if ever it isn't, then this behaviour might
   #       not be expected.
   doc = runFilters(data.options.filters, doc, data)
   writer = resolve(data.options.rstWriter)()
   settings = dict(
     doctitle_xform       = False,
     sectsubtitle_xform   = False,
   )
   if data.options.rstMax:
     settings['explicit_title'] = True
   return publish_from_doctree(
     doc, writer=writer, settings_overrides=settings)

示例#29

0

显示文件

文件： test_publisher.py 项目： danielhanold/dummy-repo

    def test_publish_doctree(self):
        # Test `publish_doctree` and `publish_from_doctree`.

        # Produce the document tree.
        doctree = core.publish_doctree(source=test_document,
                                       reader_name='standalone',
                                       parser_name='restructuredtext',
                                       settings_spec=self,
                                       settings_overrides={
                                           'expose_internals':
                                           ['refnames', 'do_not_expose'],
                                           'report_level':
                                           5
                                       })
        self.assertTrue(isinstance(doctree, nodes.document))

        # Confirm that transforms have been applied (in this case, the
        # DocTitle transform):
        self.assertTrue(isinstance(doctree[0], nodes.title))
        self.assertTrue(isinstance(doctree[1], nodes.paragraph))
        # Confirm that the Messages transform has not yet been applied:
        self.assertEqual(len(doctree), 2)

        # The `do_not_expose` attribute may not show up in the
        # pseudoxml output because the expose_internals transform may
        # not be applied twice.
        doctree.do_not_expose = 'test'
        # Write out the document:
        output = core.publish_from_doctree(doctree,
                                           writer_name='pseudoxml',
                                           settings_spec=self,
                                           settings_overrides={
                                               'expose_internals':
                                               ['refnames', 'do_not_expose'],
                                               'report_level':
                                               1
                                           })
        self.assertEqual(output, exposed_pseudoxml_output)

        # Test publishing parts using document as the source.
        parts = core.publish_parts(reader_name='doctree',
                                   source_class=io.DocTreeInput,
                                   source=doctree,
                                   source_path='test',
                                   writer_name='html',
                                   settings_spec=self)
        self.assertTrue(isinstance(parts, dict))

示例#30

0

显示文件

 def render_rst(self, data):
     doc = self.doctree_render(data)
     # todo: should this runFilters be moved int doctree_render?...
     #       currently it is only being called from here, so not much
     #       of an issue, but if ever it isn't, then this behaviour might
     #       not be expected.
     doc = runFilters(data.options.filters, doc, data)
     writer = resolve(data.options.rstWriter)()
     settings = dict(
         doctitle_xform=False,
         sectsubtitle_xform=False,
     )
     if data.options.rstMax:
         settings['explicit_title'] = True
     return publish_from_doctree(doc,
                                 writer=writer,
                                 settings_overrides=settings)

示例#31

0

显示文件

文件： rst2wiki.py 项目： jamert/rst2wiki

def generate_content(filename, tip_lang):
    click.echo('Preparing content...')
    with open(filename) as f:
        rst = f.read()
    try:
        doctree = publish_doctree(rst)
        content = publish_from_doctree(doctree, writer=confluence.Writer())
    except Exception:
        click.echo('There was error on processing ReST file')
        raise click.Abort()

    metadata = extract_metadata(doctree)
    if metadata:
        tip_lang = tip_lang or metadata.get('warning')
    warning = autogen_warning.get(tip_lang, '')

    return warning + content, metadata

示例#32

0

显示文件

文件： rst2wiki.py 项目： jamert/rst2wiki

def generate_content(filename, tip_lang):
    click.echo('Preparing content...')
    with open(filename) as f:
        rst = f.read()
    try:
        doctree = publish_doctree(rst)
        content = publish_from_doctree(doctree, writer=confluence.Writer())
    except Exception:
        click.echo('There was error on processing ReST file')
        raise click.Abort()

    metadata = extract_metadata(doctree)
    if metadata:
        tip_lang = tip_lang or metadata.get('warning')
    warning = autogen_warning.get(tip_lang, '')

    return warning + content, metadata

示例#33

0

显示文件

文件： rst2a.py 项目： pombredanne/rst2a

 def to_latex(self, stylesheet_path='', settings=DEFAULT_LATEX_OVERRIDES,
     *args, **kwargs):
     cleanup_stylesheet = False
     if not isfile(str(stylesheet_path)) or is_filelike(stylesheet_path):
         stylesheet_path = create_temp_file(stylesheet_path, suffix='.tex')
         cleanup_stylesheet = True
     conversion_settings = copy(self.default_settings)
     conversion_settings.update(settings)
     conversion_settings['stylesheet-path'] = stylesheet_path
     self.localize_images()
     latex_string = publish_from_doctree(self.document, writer_name='latex',
         settings_overrides=conversion_settings, *args, **kwargs)
     if cleanup_stylesheet:
         return latex_string, [stylesheet_path] + \
             list(set(self.img_localizer.values()))
     else:
         return latex_string, list(set(self.img_localizer.values()))

示例#34

0

显示文件

文件： gen_doc.py 项目： droslaw/GitHooks

def extract_statuses_table(docstring):
    """Extract statuses table from passed docstring.

    Doc string should be written in rst. Should contain table with title "Result status".
    Result is rst code of first table which match to this description.
    """
    try:
        doc_tree = core.publish_doctree(docstring)
        table = _find_element(doc_tree, nodes.table)
        title = _find_element(table, nodes.title)
        if title[0] == 'Result status':
            document = utils.new_document('<string>')
            document += table
        else:
            raise LookupError('Statuses table not found')
    except IndexError:
        raise LookupError('Statuses table not found')
    return core.publish_from_doctree(document, writer=Writer()).decode()

示例#35

0

显示文件

def extract_statuses_table(docstring):
    """Extract statuses table from passed docstring.

    Doc string should be written in rst. Should contain table with title "Result status".
    Result is rst code of first table which match to this description.
    """
    try:
        doc_tree = core.publish_doctree(docstring)
        table = _find_element(doc_tree, nodes.table)
        title = _find_element(table, nodes.title)
        if title[0] == 'Result status':
            document = utils.new_document('<string>')
            document += table
        else:
            raise LookupError('Statuses table not found')
    except IndexError:
        raise LookupError('Statuses table not found')
    return core.publish_from_doctree(document, writer=Writer()).decode()

示例#36

0

显示文件

文件： test_publisher.py 项目： pombredanne/docutils

    def test_publish_doctree(self):
        # Test `publish_doctree` and `publish_from_doctree`.

        # Produce the document tree.
        doctree = core.publish_doctree(
            source=test_document,
            reader_name="standalone",
            parser_name="restructuredtext",
            settings_spec=self,
            settings_overrides={"expose_internals": ["refnames", "do_not_expose"], "report_level": 5},
        )
        self.assertTrue(isinstance(doctree, nodes.document))

        # Confirm that transforms have been applied (in this case, the
        # DocTitle transform):
        self.assertTrue(isinstance(doctree[0], nodes.title))
        self.assertTrue(isinstance(doctree[1], nodes.paragraph))
        # Confirm that the Messages transform has not yet been applied:
        self.assertEqual(len(doctree), 2)

        # The `do_not_expose` attribute may not show up in the
        # pseudoxml output because the expose_internals transform may
        # not be applied twice.
        doctree.do_not_expose = "test"
        # Write out the document:
        output = core.publish_from_doctree(
            doctree,
            writer_name="pseudoxml",
            settings_spec=self,
            settings_overrides={"expose_internals": ["refnames", "do_not_expose"], "report_level": 1},
        )
        self.assertEqual(output, exposed_pseudoxml_output)

        # Test publishing parts using document as the source.
        parts = core.publish_parts(
            reader_name="doctree",
            source_class=io.DocTreeInput,
            source=doctree,
            source_path="test",
            writer_name="html",
            settings_spec=self,
        )
        self.assertTrue(isinstance(parts, dict))

示例#37

0

显示文件

文件： publish.py 项目： pyzh/lift-tutorial

def render_rst(blob, path):
    doc = publish_doctree(blob.as_raw_string())
    for node in doc.traverse(nodes.reference):
        uri = urlparse.urlparse(node['refuri'])
        if not uri.netloc and os.path.basename(uri.path) == "README.rst":
            node['refuri'] = urlparse.urlunparse(
                (uri.scheme, uri.netloc, uri.path[:-10] or "./", uri.params, uri.query, uri.fragment))

    output = publish_from_doctree(
        doc,
        destination_path=path,
        writer=MyWriter(),
        settings_overrides = {
            'embed_stylesheet': False,
            'xml_declaration': False,
            'math_output': 'mathjax'})

    new_blob = Blob.from_string(output)
    store.add_object(new_blob)
    return new_blob.id

示例#38

0

显示文件

文件： docbook_builder.py 项目： Abstrys/rst2db

    def write_doc(self, docname, doctree):

        # If there's an output filename, use its basename as the root
        # element's ID.
        #(path, filename) = os.path.split(self.output_filename)
        #(doc_id, ext) = os.path.splitext(filename)

        docutils_writer = DocBookWriter(self.root_element, docname,
                output_xml_header=(self.template_filename == None))

        # get the docbook output.
        docbook_contents = publish_from_doctree(doctree,
                writer=docutils_writer)

        # process the output with a template if a template name was supplied.
        if self.template_filename != None:
            docbook_contents = self.process_with_template(docbook_contents)

        output_file = open(os.path.join(self.outdir, '%s.xml' % docname), 'w+')
        output_file.write(docbook_contents)

示例#39

0

显示文件

    def test_publish_doctree(self):
        # Test `publish_doctree` and `publish_from_doctree`.

        # Produce the document tree.
        doctree = core.publish_doctree(
            source=test_document, reader_name='standalone',
            parser_name='restructuredtext', settings_spec=self,
            settings_overrides={'expose_internals':
                                ['refnames', 'do_not_expose'],
                                'report_level': 5})
        self.assert_(isinstance(doctree, nodes.document))

        # Confirm that transforms have been applied (in this case, the
        # DocTitle transform):
        self.assert_(isinstance(doctree[0], nodes.title))
        self.assert_(isinstance(doctree[1], nodes.paragraph))
        # Confirm that the Messages transform has not yet been applied:
        self.assertEquals(len(doctree), 2)

        # The `do_not_expose` attribute may not show up in the
        # pseudoxml output because the expose_internals transform may
        # not be applied twice.
        doctree.do_not_expose = 'test'
        # Write out the document:
        output = core.publish_from_doctree(
            doctree, writer_name='pseudoxml',
            settings_spec=self,
            settings_overrides={'expose_internals':
                                ['refnames', 'do_not_expose'],
                                'report_level': 1})
        self.assertEquals(output, exposed_pseudoxml_output)

        # Test publishing parts using document as the source.
        parts = core.publish_parts(
           reader_name='doctree', source_class=io.DocTreeInput,
           source=doctree, source_path='test', writer_name='html',
           settings_spec=self)
        self.assert_(isinstance(parts, DictType))

示例#40

0

显示文件

文件： rst.py 项目： cadithealth/pyramid_describe

def rst2html(data, text):

    css = [
        urllib.parse.quote('data:text/css;charset=UTF-8,' +
                           resolvecss(data, e))
        for e in aslist(data.options.cssPath or '')
    ]
    # todo: add the docutils default css as well...

    settings = dict(
        # input_encoding     = 'UTF-8',
        output_encoding=data.options.encoding,
        embed_stylesheet=data.options.cssEmbed,
        stylesheet_path=css,
        doctitle_xform=False,
        sectsubtitle_xform=False,
    )

    pub = core.Publisher(None,
                         None,
                         None,
                         source_class=io.StringInput,
                         destination_class=io.NullOutput)
    pub.set_components('standalone', 'restructuredtext', 'html')
    pub.process_programmatic_settings(None, settings, None)
    pub.set_source(text, None)
    pub.set_destination(None, None)
    pub.publish(enable_exit_status=False)

    doc = pub.document
    doc.walk(HtmlDoctreeFixer(doc))
    doc = runFilters(data.options.filters, doc, data)

    html = core.publish_from_doctree(pub.document,
                                     writer_name='html',
                                     settings_overrides=settings)

    return html

示例#41

0

显示文件

文件： core.py 项目： stancke/gestorfinanceiro

def document(report, fmt='html'):
    '''
    Creates a document in the desired `fmt`. Defaults to html.
    '''

    if fmt == 'csv':
        return report2csv(report)

    doctree = doctree_factory(report, fmt=fmt)

    dt_settings = {
        'input_encoding': 'unicode',
    }

    if fmt == 'pdf':
        if not RST2PDF:
            raise Exception('rst2pdf is not available')
            return None
        buff = StringIO()
        rst2pdf = RstToPdf()
        rst2pdf.createPdf(doctree=doctree, output=buff)
        return buff.getvalue()

    if  fmt in DOCUTILS_WRITER_NAMES:
        if fmt == 'odt':
            fmt = 'odf_odt'  # Docutils writer name
        if fmt in ('txt', 'rst'):
            return gen_rst(doctree, 0)
        return publish_from_doctree(
            doctree,
            writer_name=fmt,
            settings_overrides=dt_settings
        )
    else:
        raise Exception('Format not supported. Not in %s' % (
                SUPPORTED_FORMATS,
            )
        )

示例#42

0

显示文件

 def __xform_xmlrpc(self, odic):
     """
     Transform dictionary values to be returnable thru xmlrpc.
     Returns a new dictionary.
     """
     dic = odic.copy()
     for k, v in dic.iteritems():
         if k == 'time':
             dic[k] = v.isoformat()
         elif k in (
                 'errors',
                 'source',
         ):
             dic[k] = xmlrpclib.Binary(v.encode('UTF-8'))
         elif k == 'doctree':
             doctree_utf8, parts = core.publish_from_doctree(
                 v,
                 writer_name='pseudoxml',
                 settings_overrides={'output_encoding': 'UTF-8'},
             )
             dic['%s_str' % k] = xmlrpclib.Binary(doctree_utf8)
             del dic[k]
     return dic

示例#43

0

显示文件

文件： page_views.py 项目： hforge/wiki

    def GET(self, resource, context):
        # Check if pdflatex exists
        try:
            call(['pdflatex', '-version'])
        except OSError:
            msg = ERR_PDFLATEX_MISSING
            return context.come_back(msg)

        doctree = resource.get_doctree()
        # We hack a bit the document tree to enhance the PDF produced
        resolve_references(doctree, resource, context)
        resolve_images(doctree, resource, context)

        # Make some modifications
        overrides = dict(resource.overrides)
        overrides['stylesheet'] = 'style.tex'
        try:
            output = publish_from_doctree(doctree, writer_name='latex',
                    settings_overrides=overrides)
        except NotImplementedError, e:
            if str(e).endswith('visiting unknown node type: book'):
                message = ERR_PDF_BOOK
                return context.come_back(message)
            raise

示例#44

0

显示文件

文件： test_rst.py 项目： cadithealth/pyramid_describe

 def rt(self, data, writer=None, settings=None):
   from docutils.core import publish_doctree, publish_from_doctree
   dt = publish_doctree(data, settings_overrides=settings)
   return publish_from_doctree(dt,
                               writer=writer or rst.Writer(),
                               settings_overrides=settings)

示例#45

0

显示文件

文件： render.py 项目： bhuztez/awesome-ancient-chinese-books

        render_list(x,y,d)


import urlparse
from docutils.core import publish_doctree, publish_from_doctree
from docutils import nodes


with open("README.rst", "r") as f:
    rst = f.read()

doc = publish_doctree(rst)
for node in doc.traverse(nodes.reference):
    uri = urlparse.urlparse(node['refuri'])
    if not uri.netloc and uri.path.endswith(".yaml"):
        node['refuri'] = urlparse.urlunparse(
            (uri.scheme, uri.netloc, uri.path[:-5] + ".html", uri.params, uri.query, uri.fragment))

output = publish_from_doctree(
    doc,
    writer_name='html4css1',
    destination_path="index.html",
    settings_overrides = {
        'stylesheet_path': [],
        'embed_stylesheet': False,
        'xml_declaration': False})

with open("html/index.html", "w") as f:
    f.write(output)

示例#46

0

显示文件

def node2html(node):
    parts = core.publish_from_doctree(node, writer_name="html")
    return parts['body']

示例#47

0

显示文件

文件： gendoc_pdf.py 项目： adam-urbanczyk/chemshapes

def rest2docbook(rest_filename, docbook_filename):
    print 'Reading %s' % rest_filename

    input_dir = os.path.dirname(rest_filename)
    output_dir = os.path.dirname(docbook_filename)

    # Read rest doctree
    doctree = publish_doctree(open(rest_filename).read(),
                              source_path=rest_filename)

    # Remove fields (docbook writer ignores but warns)
    for field in [n for n in doctree.traverse() if isinstance(n, nodes.field)]:
        field.parent.remove(field)

    # Remove line nodes (docbook crashes on them)
    for line in [n for n in doctree.traverse() if isinstance(n, nodes.line)]:
        line.parent.replace(line, line.children)

    # Copy images
    for img in [n for n in doctree.traverse() if isinstance(n, nodes.image)]:
        img['scale'] = '50'
        srcfile = os.path.join(input_dir, img['uri'])
        destfile = os.path.join(output_dir, img['uri'])
        shutil.copyfile(srcfile, destfile)

    print 'Writing %s' % docbook_filename

    # Write docbook xml
    writer = docbook.Writer()
    settings = {
        'doctype': 'book',
    }
    docbook_xml = publish_from_doctree(doctree,     
                                   writer=writer,
                                   settings_overrides=settings)
    open(docbook_filename, 'w').write(docbook_xml)

    # Open docbook xml and fix it
    print 'Reading %s' % docbook_filename
    doc = parse(docbook_filename)

    # Strip leading newline from programlisting
    for elem in elements(doc, 'programlisting'):
        if elem.childNodes and elem.childNodes[0].nodeType == elem.TEXT_NODE:
            elem.childNodes[0].nodeValue = elem.childNodes[0].nodeValue.strip()

    # Dodgy hack to compensate for FOP's lack of table layout.
    # Programming guide tables need more room in the first (header) column than
    # right-hand columns.
    for elem in elements(doc, 'colspec'):
        if elem.getAttribute('colname') == 'col_1':
            elem.attributes['colwidth'] = '2*'
        else:
            elem.attributes['colwidth'] = '1*'

    # Strip table of contents (docbook creates its own)
    for title in elements(doc, 'title'):
        if title.childNodes[0].nodeType == title.TEXT_NODE and \
           title.childNodes[0].nodeValue == 'Contents':
            section = title.parentNode
            if section.nodeType == section.ELEMENT_NODE and \
               section.nodeName == 'section':
                section.parentNode.removeChild(section)

    # Strip local contents
    for section in elements(doc, 'section'):
        for child in section.childNodes:
            if child.nodeType == child.ELEMENT_NODE:
                break
        if child.nodeName == 'itemizedlist':
            section.parentNode.removeChild(section)

    # Pull everything before first chapter into a preface
    preface_nodes = []
    preface = doc.createElement('preface')
    preface_title = doc.createElement('title')
    preface_title.appendChild(doc.createTextNode('Welcome'))
    preface.appendChild(preface_title)
    for child in doc.documentElement.childNodes:
        if child.nodeType == child.ELEMENT_NODE:
            if child.nodeName == 'chapter':
                for node in preface_nodes:
                    doc.documentElement.removeChild(node)
                for node in preface_nodes:
                    preface.appendChild(node)
                doc.documentElement.insertBefore(preface, child)
                break
            elif child.nodeName != 'bookinfo':
                preface_nodes.append(child)

    # Scale screenshots of windows down (programming guide hack to fit in
    # table)
    for imagedata in elements(doc, 'imagedata'):
        fileref = imagedata.getAttribute('fileref')
        if fileref.startswith('window_xp_') or fileref.startswith('window_osx'):
            imagedata.attributes['scale'] = '25'

    # Write fixed docbook
    print 'Writing %s' % docbook_filename
    open(docbook_filename, 'w').write(doc.toxml())

示例#48

0

显示文件

文件： help_texts_extractor.py 项目： khchine5/lino

def node2html(node):
    parts = core.publish_from_doctree(node, writer_name="html")
    return parts['body']

示例#49

0

显示文件

def gendoc_html(input_file, html_dir, api_objects, options):
    input_dir = os.path.dirname(input_file)
    files = []
    titles = {}

    # XXX Should calculate how apidoc_dir is relative to html_dir...
    apidoc_dir_rel = '../api' 

    # Read root doctree
    doctree = publish_doctree(open(input_file).read(), source_path=input_file)

    # Convert images
    for image in [n for n in doctree.traverse() if isinstance(n, nodes.image)]:
        uri = image.attributes['uri']
        image.attributes['uri'] = convert_image(uri, input_dir, html_dir)

    # Colorize literal blocks
    for block in [n for n in doctree.traverse() \
                  if (isinstance(n, nodes.literal_block))]:
        pysrc = block.astext()
        html = PythonColorizer().colorize_codeblock(pysrc)
        raw = nodes.raw(text=html, format='html')
        block.replace_self(raw)

    # Recursively split sections down to depth N into separate doctrees
    root_filename = os.path.splitext(os.path.basename(input_file))[0]
    root_page = Page(doctree, 
                     '%s.html' % root_filename,
                     None, [])
    if options.depth:
        root_page.split(options.depth)

    # Add refuri to all references that use refid, to point to the
    # appropriate page.
    id_map = {} # Map id of nodes to uri
    root_page.collect_ids(id_map)
    # Only works for explicit section links; see future uses of id_map
    root_page.add_refuri(id_map) 

    # Get page connectivity and add navigation
    pages = [n for n in root_page.preorder()]
    if options.add_navigation:
        for i, page in enumerate(pages):
            if i > 0:
                previous = pages[i - 1]
            else:
                previous = None
            if i + 1 < len(pages):
                next = pages[i + 1]
            else:
                next = None
            page.add_navigation(previous, next)


    for page in pages:
        # Resolve links
        linked_objects = set()
        for ref in [n for n in page.document.traverse() \
                    if isinstance(n, nodes.title_reference)]:
            title = ref.astext()
            url = None
            if title.endswith('.py'):
                # Copy in referenced example program and link.
                shutil.copy(title, html_dir)
                url = os.path.basename(title)
                canonical = title
                link_class = 'filelink'
            elif title in api_objects and api_objects[title][0]:
                # Link to API page
                canonical, uri = api_objects[title]
                url = os.path.join(apidoc_dir_rel, uri)
                link_class = 'apilink'
            elif title.lower().replace(' ', '-') in id_map:
                # Section link (using `xx` instead of `xx`_).
                canonical = title
                url = id_map[title.lower().replace(' ', '-')]
                link_class = 'sectionlink'

            # Only link once per page, to avoid littering the text
            # with too many links
            if url and url not in linked_objects:
                linked_objects.add(url)

                newref = nodes.reference()
                newref.children = [c.deepcopy() for c in ref.children]
                newref['refuri'] = url
                if canonical != title:
                    newref['title'] = canonical # tooltip is canonical name
                if link_class:
                    newref['link_class'] = link_class
                ref.replace_self(newref)

        # Write page
        settings = {
            'embed_stylesheet': False,
            'stylesheet': 'doc.css',
            'stylesheet_path': None,
        }
        writer = HTMLWriter()
        html = publish_from_doctree(page.document, 
                                    writer=writer,
                                    settings_overrides=settings)
        output_file = open(os.path.join(html_dir, page.filename), 'w')
        output_file.write(html)

示例#50

0

显示文件

        curdir = getattr(document.settings, "_source", None) or os.getcwd()
        filepath = os.path.join(curdir, filename)

        with open(filepath) as rf:
            text = rf.read()
            subdocument = new_document(filepath)
            parser.parse(text, subdocument)
        return subdocument.children


directives.register_directive("mdinclude", MDInclude)

text = """
hello(mdinclude)
========================================

this is content

subsection
----------------------------------------

- foo
- bar
- boo

.. mdinclude:: sub.md
"""

doc = publish_doctree(text)
print(publish_from_doctree(doc).decode("utf-8"))

示例#51

0

显示文件

文件： yml_test.py 项目： intelkevinputnam/md-to-pptx

            border: 1px solid #000000;
            text-align: center;
            padding: 8px;
        }
        tr:nth-child(even) {
            background-color: #dddddd;
        }
    </style>
  </head>
  <body>"""

if 'md' in yamlObject['additional_content']:
    with open(yamlObject['additional_content']['md']) as mdF:
        mdContent = mdF.read()
        html += markdown.markdown(mdContent)

if 'rst' in yamlObject['additional_content']:
    with open(yamlObject['additional_content']['rst']) as rstF:
        rstContent = rstF.read()
        tree = publish_doctree(rstContent)
        htmlOutput = publish_from_doctree(tree, writer_name='html').decode()
        soup = BeautifulSoup(htmlOutput, features='lxml')
        body = soup.find('body')
        htmlOutput = body.findChildren()
        html += str(htmlOutput[0])

html += """  </body>
</html>"""

with open("testing.html", 'w') as f:
    f.write(html)

示例#52

0

显示文件

    def _ebookize_all_news(self, parsed_articles):
        """
        Adds the previously processed news data to the ebook.
        :param parsed_articles: The previously processed news data.
        """
        print("* Ebook-izing downloaded headlines. *")
        # some initialization
        template = self.env.get_template('tmpl/article_template.html')
        self.article_toc_list = []

        # put each into ebook
        for a in parsed_articles:
            print("Loading #{} into ebook: {}".format(a["count"], a["title"]))

            if a["top_image"] is not None:
                img_file_name = "art_img/image_{:03d}".format(a["count"])
                epimg = epub.EpubImage()
                epimg.file_name = img_file_name
                epimg.media_type = "image/jpeg"
                img_resp = requests.get(a["top_image"])
                img = img_resp.content
                epimg.set_content(img)
                self.book.add_item(epimg)

                a["top_image"] = img_file_name

            c = epub.EpubHtml(title=a["title"], file_name="article_{}.xhtml".format(a["count"]), lang='en')
            tree = publish_doctree(a["article_text"])
            html = publish_from_doctree(tree, writer_name='html').decode()
            soup = BeautifulSoup(html, 'lxml')
            body_only = soup.find('body').find('div', {"class": "document"})

            # skip articles that have barred keywords
            if any(kw in a["title"].lower() for kw in settings.TITLE_EXCLUSIONS):
                print("\tArticle title contains a barred keyword. Skipping.")
                continue

            if len(body_only.findAll('p')) < settings.MIN_PARAGRAPHS_FOR_AN_ARTICLE:
                print(
                    "\tArticle from {} too short. It may be paywalled or a video. It may also have been parsed incorrectly."
                    "\n\tURL: {}".format(a["source"], a["url"]))
                # fall back to justext to synthesize article
                a["article_text"] = ""
                count = 0
                paragraphs = justext.justext(requests.get(a["url"]).content, justext.get_stoplist("English"))
                for paragraph in paragraphs:
                    if not paragraph.is_boilerplate:
                        count += 1
                        a["article_text"] += "<p>{}</p>".format(paragraph.text)
                if count < settings.MIN_PARAGRAPHS_FOR_AN_ARTICLE:
                    print("\t\tArticle parsed correctly but actually short. Skipping.")
                    continue  # if it's still short, then it's actually short and not parsed incorrectly...continue
                else:
                    print("\t\tArticle was indeed parsed incorrectly. Fallback has parsed it correctly.")
            else:
                a["article_text"] = body_only

            c.set_content(template.render(article=a))
            self.chaps.append(c)
            self.book.add_item(c)
            self.article_toc_list.append(
                epub.Link("article_{}.xhtml".format(a["count"]), "{} - {}".format(a["title"], a["source"]),
                          "art%d" % a["count"]))

示例#53

0

显示文件

def parse_rst(content):
    """Parse multilingual rst document. Content should contain a metadata
    section which is the same for all languages, and sections for each
    language. the sections should be separated by comment of "--"". e.g::

        :slug: some-post-title-slugified
        :draft: 1
        :datetime: 2012-09-12 16:03:15
        :excerpt: Short description
        :image: /img/some_image.png

        This will be ignored in main meta section

        .. --

        =================
        English title
        =================

        :lang: en
        :tags: Tag1, Tag2

        The content of the English post

        And another paragraph

        .. --

        ====================
        כותרת עברית
        ====================

        :lang: he
        :tags: פייתון|python, Heb Tag2|slug

        The content of the post in Hebrew

    Returned value is a genearator::

        (common metadata, '', content),
        (metadata, title, content),
        (metadata, title, content) ...
    """

    parts = re.split(r'^\.\.\s+--\s*$', content, flags=re.M)

    for part in parts:
        content = ''
        title = ''
        metadata = {}

        tree = publish_doctree(part, settings_overrides=DEFAULTS)

        for info in tree.traverse(docinfo):
            for field in info.children:
                name_el, body_el = field.children
                name = name_el.astext().lower()
                if name in FIELDS:
                    body = body_el.astext()
                    transform = FIELDS[name]

                    metadata[name] = transform(body) if transform else body

        writer = html5writer.SemanticHTML5Writer()
        publish_from_doctree(tree, writer=writer)
        content = writer.parts['body']
        title = writer.parts['title']
        yield metadata, title, content

示例#54

0

显示文件

def rest2docbook(rest_filename, docbook_filename):
    print 'Reading %s' % rest_filename

    input_dir = os.path.dirname(rest_filename)
    output_dir = os.path.dirname(docbook_filename)

    # Read rest doctree
    doctree = publish_doctree(open(rest_filename).read(),
                              source_path=rest_filename)

    # Remove fields (docbook writer ignores but warns)
    for field in [n for n in doctree.traverse() if isinstance(n, nodes.field)]:
        field.parent.remove(field)

    # Remove line nodes (docbook crashes on them)
    for line in [n for n in doctree.traverse() if isinstance(n, nodes.line)]:
        line.parent.replace(line, line.children)

    # Copy images
    for img in [n for n in doctree.traverse() if isinstance(n, nodes.image)]:
        img['scale'] = '50'
        srcfile = os.path.join(input_dir, img['uri'])
        destfile = os.path.join(output_dir, img['uri'])
        shutil.copyfile(srcfile, destfile)

    print 'Writing %s' % docbook_filename

    # Write docbook xml
    writer = docbook.Writer()
    settings = {
        'doctype': 'book',
    }
    docbook_xml = publish_from_doctree(doctree,     
                                   writer=writer,
                                   settings_overrides=settings)
    open(docbook_filename, 'w').write(docbook_xml)

    # Open docbook xml and fix it
    print 'Reading %s' % docbook_filename
    doc = parse(docbook_filename)

    # Strip leading newline from programlisting
    for elem in elements(doc, 'programlisting'):
        if elem.childNodes and elem.childNodes[0].nodeType == elem.TEXT_NODE:
            elem.childNodes[0].nodeValue = elem.childNodes[0].nodeValue.strip()

    # Dodgy hack to compensate for FOP's lack of table layout.
    # Programming guide tables need more room in the first (header) column than
    # right-hand columns.
    for elem in elements(doc, 'colspec'):
        if elem.getAttribute('colname') == 'col_1':
            elem.attributes['colwidth'] = '2*'
        else:
            elem.attributes['colwidth'] = '1*'

    # Strip table of contents (docbook creates its own)
    for title in elements(doc, 'title'):
        if title.childNodes[0].nodeType == title.TEXT_NODE and \
           title.childNodes[0].nodeValue == 'Contents':
            section = title.parentNode
            if section.nodeType == section.ELEMENT_NODE and \
               section.nodeName == 'section':
                section.parentNode.removeChild(section)

    # Strip local contents
    for section in elements(doc, 'section'):
        for child in section.childNodes:
            if child.nodeType == child.ELEMENT_NODE:
                break
        if child.nodeName == 'itemizedlist':
            section.parentNode.removeChild(section)

    # Pull everything before first chapter into a preface
    preface_nodes = []
    preface = doc.createElement('preface')
    preface_title = doc.createElement('title')
    preface_title.appendChild(doc.createTextNode('Welcome'))
    preface.appendChild(preface_title)
    for child in doc.documentElement.childNodes:
        if child.nodeType == child.ELEMENT_NODE:
            if child.nodeName == 'chapter':
                for node in preface_nodes:
                    doc.documentElement.removeChild(node)
                for node in preface_nodes:
                    preface.appendChild(node)
                doc.documentElement.insertBefore(preface, child)
                break
            elif child.nodeName != 'bookinfo':
                preface_nodes.append(child)

    # Scale screenshots of windows down (programming guide hack to fit in
    # table)
    for imagedata in elements(doc, 'imagedata'):
        fileref = imagedata.getAttribute('fileref')
        if fileref.startswith('window_xp_') or fileref.startswith('window_osx'):
            imagedata.attributes['scale'] = '25'

    # Write fixed docbook
    print 'Writing %s' % docbook_filename
    open(docbook_filename, 'w').write(doc.toxml())