def inspect(filename, source_path=None): "returns the document object before any transforms)" from docutils.core import Publisher pub = Publisher(source_class=io.FileInput) pub.set_reader("standalone", None, "restructuredtext") pub.process_programmatic_settings(None, None, None) pub.set_source(source_path=source_path) pub.set_io() return pub.reader.read(pub.source, pub.parser, pub.settings)
def inspect( filename, source_path=None, ): "returns the document object before any transforms)" from docutils.core import Publisher pub = Publisher(source_class=io.FileInput, ) pub.set_reader('standalone', None, "restructuredtext") pub.process_programmatic_settings(None, None, None) pub.set_source(source_path=source_path) pub.set_io() return pub.reader.read(pub.source, pub.parser, pub.settings)
def check_for_errors(content, filepath=None): """Lint reStructuredText and return errors :param string content: reStructuredText to be linted :param string filepath: Optional path to file, this will be returned as the source :rtype list: List of errors. Each error will contain a line, source (filepath), message (error message), and full message (error message + source lines) """ # Generate a new parser (copying `rst2html.py` flow) # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/tools/rst2html.py # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l348 pub = Publisher(None, None, None, settings=None) pub.set_components('standalone', 'restructuredtext', 'pseudoxml') # Configure publisher # DEV: We cannot use `process_command_line` since it processes `sys.argv` which is for `rst-lint`, not `docutils` # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l201 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l143 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l118 settings = pub.get_settings(halt_level=5) pub.set_io() # Prepare a document to parse on # DEV: We avoid the `read` method because when `source` is `None`, it attempts to read from `stdin`. # However, we already know our content. # DEV: We create our document without `parse` because we need to attach observer's before parsing # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/readers/__init__.py#l66 reader = pub.reader document = utils.new_document(filepath, settings) # Disable stdout # TODO: Find a more proper way to do this # TODO: We might exit the program if a certain error level is reached document.reporter.stream = None # Collect errors via an observer errors = [] def error_collector(data): # Mutate the data since it was just generated data.line = data.get('line') data.source = data['source'] data.level = data['level'] data.type = data['type'] data.message = Element.astext(data.children[0]) data.full_message = Element.astext(data) # Save the error errors.append(data) document.reporter.attach_observer(error_collector) # Parse the content (and collect errors) # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/readers/__init__.py#l75 reader.parser.parse(content, document) # Apply transforms (and more collect errors) # DEV: We cannot use `apply_transforms` since it has `attach_observer` baked in. We want only our listener. # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l195 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/transforms/__init__.py#l159 document.transformer.populate_from_components( (pub.source, pub.reader, pub.reader.parser, pub.writer, pub.destination) ) transformer = document.transformer while transformer.transforms: if not transformer.sorted: # Unsorted initially, and whenever a transform is added. transformer.transforms.sort() transformer.transforms.reverse() transformer.sorted = 1 priority, transform_class, pending, kwargs = transformer.transforms.pop() transform = transform_class(transformer.document, startnode=pending) transform.apply(**kwargs) transformer.applied.append((priority, transform_class, pending, kwargs)) return errors
def lint(content, filepath=None): """Lint reStructuredText and return errors :param string content: reStructuredText to be linted :param string filepath: Optional path to file, this will be returned as the source :rtype list: List of errors. Each error will contain a line, source (filepath), message (error message), and full message (error message + source lines) """ # Generate a new parser (copying `rst2html.py` flow) # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/tools/rst2html.py # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l348 pub = Publisher(None, None, None, settings=None) pub.set_components('standalone', 'restructuredtext', 'pseudoxml') # Configure publisher # DEV: We cannot use `process_command_line` since it processes `sys.argv` which is for `rst-lint`, not `docutils` # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l201 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l143 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l118 settings = pub.get_settings(halt_level=5) pub.set_io() # Prepare a document to parse on # DEV: We avoid the `read` method because when `source` is `None`, it attempts to read from `stdin`. # However, we already know our content. # DEV: We create our document without `parse` because we need to attach observer's before parsing # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/readers/__init__.py#l66 reader = pub.reader document = utils.new_document(filepath, settings) # Disable stdout # TODO: Find a more proper way to do this # TODO: We might exit the program if a certain error level is reached document.reporter.stream = None # Collect errors via an observer errors = [] def error_collector(data): # Mutate the data since it was just generated data.line = data.get('line') data.source = data['source'] data.level = data['level'] data.type = data['type'] data.message = Element.astext(data.children[0]) data.full_message = Element.astext(data) # Save the error errors.append(data) document.reporter.attach_observer(error_collector) # Parse the content (and collect errors) # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/readers/__init__.py#l75 reader.parser.parse(content, document) # Apply transforms (and more collect errors) # DEV: We cannot use `apply_transforms` since it has `attach_observer` baked in. We want only our listener. # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/core.py#l195 # http://repo.or.cz/w/docutils.git/blob/422cede485668203abc01c76ca317578ff634b30:/docutils/docutils/transforms/__init__.py#l159 document.transformer.populate_from_components( (pub.source, pub.reader, pub.reader.parser, pub.writer, pub.destination)) transformer = document.transformer while transformer.transforms: if not transformer.sorted: # Unsorted initially, and whenever a transform is added. transformer.transforms.sort() transformer.transforms.reverse() transformer.sorted = 1 priority, transform_class, pending, kwargs = transformer.transforms.pop( ) transform = transform_class(transformer.document, startnode=pending) transform.apply(**kwargs) transformer.applied.append( (priority, transform_class, pending, kwargs)) return errors
def build_pegtable(): """ Search all subdirs of working directory for peg files and parses peg metadata from them. Returns the table containing metadata from all the pegs. """ pegtable = [] pegdirs = [d for d in os.listdir(config.working_directory) if os.path.isdir(slashify(config.working_directory)+d) and d != 'CVS'] init_working_directory = config.working_directory for pegdir in pegdirs: dbg('Processing PEG ' + pegdir) config.working_directory = slashify(init_working_directory)+pegdir peg = {'authors': [], 'status': config.pegboard_undefined, 'topic': pegdir, 'stakeholders': [], 'last-modified': '', 'dir': pegdir, 'files': '', 'html': '', 'rst': '', 'rstfiles': [], 'ignore': [] } peg['files'] = [f for f in os.listdir(config.working_directory) \ if os.path.isfile(slashify(config.working_directory)+f) and not f.startswith('.') and '#' not in f and '~' not in f] if peg['files'].count('peg.rst') > 0: peg['rst'] = 'peg.rst' else: for pegfile in peg['files']: if pegfile.endswith('.rst'): peg['rst'] = pegfile rstfiles = [f for f in peg['files'] if f.endswith('.rst')] config.dbg.mute('docutils') config.mp_generate = 0 for rstfile in rstfiles: config.input_filename = rstfile config.output_filename = rstfile[0:len(rstfile)-4]+config.midfix+'.html' pub = Publisher() pub.set_reader('standalone', None, 'restructuredtext') filename = slashify(config.working_directory)+rstfile pub.process_command_line(argv=('--config '+config.docutils_conf+' '+filename+'').split()) #conversion may fail because of bad restructuredtext try: pub.set_io() document = pub.reader.read(pub.source, pub.parser, pub.settings) pub.apply_transforms(document) peg['ignore'].append(config.output_filename) #conversion have succeeded so far, parsing peg's metadata #from its document tree if rstfile == peg['rst']: peg['html'] = rstfile[0:len(rstfile)-4]+config.midfix+'.html' peg['topic'] = getTagValue(document, 'title', always_raw=1) peg['topic'] = peg['topic'] peg['last-modified'] = getFieldTagValue(document, 'last-modified') #we may have got 'rawsource', which needs some tidying if peg['last-modified'].startswith('$Date'): peg['last-modified'] = peg['last-modified'][7:len(peg['last-modified'])-11].replace('/', '-') peg['status'] = getTagValue(document, 'status') or config.pegboard_undefined stakeholders = getFieldTagValue(document, 'stakeholder') if not stakeholders: stakeholders = getFieldTagValue(document, 'stakeholders') peg['stakeholders'] = [s.strip() for s in stakeholders.split(',')] peg['authors'] = getTagValue(document, 'author', all=1) else: status = getTagValue(document, 'status') if status: peg['rstfiles'].append({'filename': rstfile, 'status': status}) except: dbg_fail('PEG %s: Docutil raised an exception while converting %s. ' % (pegdir, rstfile)) dbg_fail('Conversion failed and pegbaord data could not be collected.\n') config.dbg.enable('docutils') config.mp_generate = 1 if not peg['html']: for file in peg['files']: if file[len(file)-5:len(file)] == '.html': peg['html'] = file break elif file[len(file)-4:len(file)] in ('.rst', '.txt'): peg['html'] = file break config.intput_filename = '' #finally adds peg's metadata into pegtable pegtable.append(peg) config.working_directory = init_working_directory return pegtable