import dotmpe.du from dotmpe.du.util import get_log import transform #import extractor import node # import and register import dotmpe.du.ext.parser import dotmpe.du.ext.reader import dotmpe.du.ext.writer from dotmpe.du.ext.parser.rst.directive.margin import Margin from dotmpe.du.ext.parser.rst.directive.images import Figure logger = get_log(__name__) "" "Register left_margin/right_margin directives. " docutils.parsers.rst.directives.register_directive('margin', Margin) #"Override include directive registration. " # FIXME: better use another directive name #from dotmpe.du.ext.parser.rst.directive.include import Include #directives.register_directive('include', Include) "Override figure, enable 'label' for figure directive. " # FIXME: ugly.. need to dream up new directive names.. if 'figure' in docutils.parsers.rst.directives._directive_registry: del docutils.parsers.rst.directives._directive_registry['figure']
from datetime import datetime from pprint import pformat import json from sqlalchemy import Table from docutils import nodes from dotmpe.du import util from dotmpe.du.util import SqlBase, get_session from dotmpe.du.ext import extractor logger = util.get_log(__name__) class HtdocsExtractor(extractor.Extractor): """ TODO: store titles in rel. DB. Record: - value (unicode string) - xml-path (used to infer type?) - file - char_offset (if I can get it from the parser) - line_offset Some global identifiers could be inferred. Make up some schemes.. titles, definition terms, roles.
target_format: %s, tag: %s, action: %s""" % (source_format, target_format, tag, action) print >>sys.stderr, """reader_name: %s, parser_name: %s, writer_name: %s, builder_module: %s""" % (reader_name, parser_name, writer_name, module_name) if source_format == 'mime': parser = comp.get_parser_class('rst')(rfc2822=1) else: parser = comp.get_parser_class(parser_name)() # Main log = util.get_log(None, fout=False, stdout=True) if action == 'proc': log.info("Starting Du processor: "+tag) assert target_format == 'pseudoxml' # TODO: use source_format #frontend.cli_process( # sys.argv[1:], builder_name=module_name) frontend.cli_process(sys.argv[1:], None, 'dotmpe.du.builder.'+tag) #frontend.cli_process( # sys.argv[1:], builder_name=module_name) elif action == 'pub': log.info("Starting Du publish") frontend.cli_render( sys.argv[1:], builder_name=module_name)
2. If scheme. 1. Try and find protocol resolver 2. Resolve resource and note Status, Locator and ID. """ import os, pickle, socket, urlparse from docutils import nodes, frontend import uriref from dotmpe.du import util from dotmpe.du.ext import extractor logger = util.get_log(__name__, fout=False) logger = util.get_log(__name__) class ReferenceExtractor(extractor.Extractor): """ Stores all external references in an index. """ settings_spec = ( 'Reference Extractor Options', """The reference extractor analyzes URLs from the document, and classes them into four types: external references for other domains, local references for inter-document links, and two uncovered rest groups: query/fragment references in the same protocol, and other sorts of URI which do not dereference
import sys import traceback from pprint import pprint, pformat from docutils.core import publish_cmdline from docutils.parsers.rst import Parser from docutils import Component, core, SettingsSpec, frontend #import nabu.server #import nabu.process from dotmpe.du import comp, util import dotmpe.du.ext from dotmpe.du.ext.parser import Inliner logger = util.get_log(__name__, fout_level=logging.INFO, stdout=True) def cli_process(argv, builder=None, builder_name='mpe', description=''): """ - Load builder for given name or use provided instance. Make one or more invocations to process for given source files, process will run all extractors of the given builder. - CLI arguments for subsequent calls are separated by '--'. TODO: Extractors should be initialized only once (ie. using initial options only). Settings are updated from additional options if provided.
import types import StringIO import docutils.core from docutils.core import Publisher from docutils import SettingsSpec, frontend, utils, transforms import sqlite3 #import nabu #import nabu.server import dotmpe from dotmpe.du.util import get_session, SqlBase from dotmpe.du import comp, util logger = util.get_log(__name__)#, stdout=True, fout=False) class Builder(SettingsSpec, Publisher): """ Each builder is a static configuration of Docutils and Nabu components. Usefull during development of new docutils publisher chains. Behind it are Du Publisher and Nabu data extraction routines. This implementation tries to stay close to the publisher, but adds the routines needed for process documents from the command line without rendering. It does not borrow much of Nabu except the Extractor interface/base-class. Like the du publisher, it retrieves settings from the commandline arguments using process_command_line.
import types import StringIO import docutils.core from docutils.core import Publisher from docutils import SettingsSpec, frontend, utils, transforms import sqlite3 #import nabu #import nabu.server import dotmpe from dotmpe.du.mpe_du_util import get_session, SqlBase from dotmpe.du import comp, util logger = util.get_log(__name__, fout_level=logging.INFO) class Builder(SettingsSpec, Publisher): """ Each builder is a static configuration of Docutils and Nabu components. Usefull during development of new docutils publisher chains. Behind it are Du Publisher and Nabu data extraction routines. This implementation tries to stay close to the publisher, but adds the routines needed for process documents from the command line without rendering. It does not borrow much of Nabu except the Extractor interface/base-class. Like the du publisher, it retrieves settings from the commandline arguments