Exemplo n.º 1
0
def introspect_docstring_lineno(api_doc):
    """
    Try to determine the line number on which the given item's
    docstring begins.  Return the line number, or C{None} if the line
    number can't be determined.  The line number of the first line in
    the file is 1.
    """
    if api_doc.docstring_lineno is not UNKNOWN:
        return api_doc.docstring_lineno
    if isinstance(api_doc, ValueDoc) and api_doc.pyval is not UNKNOWN:
        try:
            lines, lineno = inspect.findsource(api_doc.pyval)
            if not isinstance(api_doc, ModuleDoc): lineno += 1
            for lineno in range(lineno, len(lines)):
                if lines[lineno].split('#', 1)[0].strip():
                    api_doc.docstring_lineno = lineno + 1
                    return lineno + 1
        except IOError:
            pass
        except TypeError:
            pass
        except IndexError:
            log.warning('inspect.findsource(%s) raised IndexError' %
                        api_doc.canonical_name)
    return None
Exemplo n.º 2
0
    def load_records(self, records):
        """
        Read a sequence of pairs name -> url and populate the internal maps.

        :Parameters:
          records : iterable
            the sequence of pairs (*name*, *url*) to add to the maps.
        """
        for name, url in records:
            cname = self.get_canonical_name(name)
            if not cname:
                log.warning("invalid object name in '%s': '%s'"
                    % (self._filename, name))
                continue

            # discard duplicates
            if name in self._exact_matches:
                continue

            self._exact_matches[name] = url
            self._exact_matches[cname] = url

            # Link the different ambiguous fragments to the url
            for i in range(1, len(cname)):
                self._partial_names.setdefault(cname[i:], []).append(name)
Exemplo n.º 3
0
def get_canonical_name(value, strict=False):
    """
    @return: the canonical name for C{value}, or C{UNKNOWN} if no
    canonical name can be found.  Currently, C{get_canonical_name}
    can find canonical names for: modules; functions; non-nested
    classes; methods of non-nested classes; and some class methods
    of non-nested classes.
    
    @rtype: L{DottedName} or C{UNKNOWN}
    """
    if not hasattr(value, '__name__'): return UNKNOWN

    # Get the name via introspection.
    if isinstance(value, ModuleType):
        try:
            dotted_name = DottedName(value.__name__, strict=strict)
            # If the module is shadowed by a variable in its parent
            # package(s), then add a prime mark to the end, to
            # differentiate it from the variable that shadows it.
            if verify_name(value, dotted_name) is UNKNOWN:
                log.warning("Module %s is shadowed by a variable with "
                            "the same name." % dotted_name)
                # Note -- this return bypasses verify_name check:
                return DottedName(value.__name__+"'")
        except DottedName.InvalidDottedName:
            # Name is not a valid Python identifier -- treat as script.
            if hasattr(value, '__file__'):
                filename = '%s' % value.__str__
                dotted_name = DottedName(munge_script_name(filename))
        
    elif isclass(value):
        if value.__module__ in ('__builtin__', 'builtins'):
            dotted_name = DottedName(value.__name__, strict=strict)
        else:
            dotted_name = DottedName(value.__module__, value.__name__,
                                     strict=strict)
            
    elif (inspect.ismethod(value) and value.__self__ is not None and
          value.__self__.__class__ is ClassType and
          not value.__name__.startswith('<')): # class method.
        class_name = get_canonical_name(value.__self__)
        if class_name is UNKNOWN: return UNKNOWN
        dotted_name = DottedName(class_name, value.__name__, strict=strict)
    elif (inspect.ismethod(value) and
          not value.__name__.startswith('<')):
        class_name = get_canonical_name(value.__self__.__class__)
        if class_name is UNKNOWN: return UNKNOWN
        dotted_name = DottedName(class_name, value.__name__, strict=strict)
    elif (isinstance(value, FunctionType) and
          not value.__name__.startswith('<')):
        module_name = _find_function_module(value)
        if module_name is None: return UNKNOWN
        dotted_name = DottedName(module_name, value.__name__, strict=strict)
    else:
        return UNKNOWN

    return verify_name(value, dotted_name)
Exemplo n.º 4
0
def _parse_warn(estr):
    """
    Print a warning message.  If the given error has already been
    printed, then do nothing.
    """
    global _parse_warnings
    if estr in _parse_warnings: return
    _parse_warnings[estr] = 1
    log.warning(estr)
Exemplo n.º 5
0
def _parse_warn(estr):
    """
    Print a warning message.  If the given error has already been
    printed, then do nothing.
    """
    global _parse_warnings
    if estr in _parse_warnings: return
    _parse_warnings[estr] = 1
    log.warning(estr)
def get_canonical_name(value, strict=False):
    """
    @return: the canonical name for C{value}, or C{UNKNOWN} if no
    canonical name can be found.  Currently, C{get_canonical_name}
    can find canonical names for: modules; functions; non-nested
    classes; methods of non-nested classes; and some class methods
    of non-nested classes.
    
    @rtype: L{DottedName} or C{UNKNOWN}
    """
    if not hasattr(value, '__name__'): return UNKNOWN

    # Get the name via introspection.
    if isinstance(value, ModuleType):
        try:
            dotted_name = DottedName(value.__name__, strict=strict)
            # If the module is shadowed by a variable in its parent
            # package(s), then add a prime mark to the end, to
            # differentiate it from the variable that shadows it.
            if verify_name(value, dotted_name) is UNKNOWN:
                log.warning("Module %s is shadowed by a variable with "
                            "the same name." % dotted_name)
                # Note -- this return bypasses verify_name check:
                return DottedName(value.__name__+"'")
        except DottedName.InvalidDottedName:
            # Name is not a valid Python identifier -- treat as script.
            if hasattr(value, '__file__'):
                filename = '%s' % value.__str__
                dotted_name = DottedName(munge_script_name(filename))
        
    elif isclass(value):
        if value.__module__ == '__builtin__':
            dotted_name = DottedName(value.__name__, strict=strict)
        else:
            dotted_name = DottedName(value.__module__, value.__name__,
                                     strict=strict)
            
    elif (inspect.ismethod(value) and value.im_self is not None and
          value.im_class is ClassType and
          not value.__name__.startswith('<')): # class method.
        class_name = get_canonical_name(value.im_self)
        if class_name is UNKNOWN: return UNKNOWN
        dotted_name = DottedName(class_name, value.__name__, strict=strict)
    elif (inspect.ismethod(value) and
          not value.__name__.startswith('<')):
        class_name = get_canonical_name(value.im_class)
        if class_name is UNKNOWN: return UNKNOWN
        dotted_name = DottedName(class_name, value.__name__, strict=strict)
    elif (isinstance(value, FunctionType) and
          not value.__name__.startswith('<')):
        module_name = _find_function_module(value)
        if module_name is None: return UNKNOWN
        dotted_name = DottedName(module_name, value.__name__, strict=strict)
    else:
        return UNKNOWN

    return verify_name(value, dotted_name)
Exemplo n.º 7
0
 def linesub(match):
     line = match.group()
     for token in TOKEN_RE.findall(line):
         if token in names:
             targets = names[token]
             fdist.inc(token)
             if len(targets) > 1:
                 log.warning('%s is ambiguous: %s' % (token, ', '.join(
                     str(v.canonical_name) for v in names[token])))
             line += INDEXTERM % token
             #line += INDEXTERM % names[token][0].canonical_name
     return line
Exemplo n.º 8
0
    def _iter_tuples(self, f):
        """Iterate on a file returning 2-tuples."""
        for nrow, row in enumerate(f):
            # skip blank lines
            row = row.rstrip()
            if not row: continue

            rec = row.split('\t', 2)
            if len(rec) == 2:
                yield rec
            else:
                log.warning("invalid row in '%s' row %d: '%s'"
                            % (self._filename, nrow+1, row))
Exemplo n.º 9
0
 def linesub(match):
     line = match.group()
     for token in TOKEN_RE.findall(line):
         if token in names:
             targets = names[token]
             fdist.inc(token)
             if len(targets) > 1:
                 log.warning('%s is ambiguous: %s' % (
                     token, ', '.join(str(v.canonical_name)
                                      for v in names[token])))
             line += INDEXTERM % token
             #line += INDEXTERM % names[token][0].canonical_name
     return line
Exemplo n.º 10
0
def get_docstring(value, module_name=None):
    """
    Return the docstring for the given value; or C{None} if it
    does not have a docstring.
    @rtype: C{unicode}
    """
    docstring = getattr(value, "__doc__", None)
    if docstring is None:
        return None
    elif isinstance(docstring, unicode):
        return docstring
    elif isinstance(docstring, str):
        try:
            return unicode(docstring, "ascii")
        except UnicodeDecodeError:
            if module_name is None:
                module_name = get_containing_module(value)
            if module_name is not None:
                try:
                    module = get_value_from_name(module_name)
                    filename = py_src_filename(module.__file__)
                    encoding = epydoc.docparser.get_module_encoding(filename)
                    return unicode(docstring, encoding)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass
            if hasattr(value, "__name__"):
                name = value.__name__
            else:
                name = repr(value)
            log.warning(
                "%s's docstring is not a unicode string, but it "
                "contains non-ascii data -- treating it as "
                "latin-1." % name
            )
            return unicode(docstring, "latin-1")
        return None
    elif value is BuiltinMethodType:
        # Don't issue a warning for this special case.
        return None
    else:
        if hasattr(value, "__name__"):
            name = value.__name__
        else:
            name = repr(value)
        log.warning("%s's docstring is not a string -- ignoring it." % name)
        return None
Exemplo n.º 11
0
def document(options, cancel, done):
    """
    Create the documentation for C{modules}, using the options
    specified by C{options}.  C{document} is designed to be started in
    its own thread by L{EpydocGUI._go}.

    @param options: The options to use for generating documentation.
        This includes keyword options that can be given to
        L{docwriter.html.HTMLWriter}, as well as the option C{target}, which
        controls where the output is written to.
    @type options: C{dictionary}
    """
    from epydoc.docwriter.html import HTMLWriter
    from epydoc.docbuilder import build_doc_index
    import epydoc.docstringparser

    # Set the default docformat.
    docformat = options.get('docformat', 'epytext')
    epydoc.docstringparser.DEFAULT_DOCFORMAT = docformat

    try:
        parse = options['introspect_or_parse'] in ('parse', 'both')
        introspect = options['introspect_or_parse'] in ('introspect', 'both')
        docindex = build_doc_index(options['modules'], parse, introspect)
        html_writer = HTMLWriter(docindex, **options)
        log.start_progress('Writing HTML docs to %r' % options['target'])
        html_writer.write(options['target'])
        log.end_progress()
    
        # We're done.
        log.warning('Finished!')
        done[0] = 'done'

    except SystemExit:
        # Cancel.
        log.error('Cancelled!')
        done[0] ='cancel'
        raise
    except Exception as e:
        # We failed.
        log.error('Internal error: %s' % e)
        done[0] ='cancel'
        raise
    except:
        # We failed.
        log.error('Internal error!')
        done[0] ='cancel'
        raise
Exemplo n.º 12
0
def _fix_self_shadowing_var(var_doc, varname, docindex):
    # If possible, find another name for the shadowed value.
    cname = var_doc.value.canonical_name
    for i in range(1, len(cname) - 1):
        new_name = cname[:i] + (cname[i] + "'") + cname[i + 1:]
        val_doc = docindex.get_valdoc(new_name)
        if val_doc is not None:
            log.warning("%s shadows its own value -- using %s instead" %
                        (varname, new_name))
            var_doc.value = val_doc
            return

    # If we couldn't find the actual value, then at least
    # invalidate the canonical name.
    log.warning('%s shadows itself' % varname)
    del var_doc.value.canonical_name
Exemplo n.º 13
0
def _fix_self_shadowing_var(var_doc, varname, docindex):
    # If possible, find another name for the shadowed value.
    cname = var_doc.value.canonical_name
    for i in range(1, len(cname)-1):
        new_name = cname[:i] + (cname[i]+"'") + cname[i+1:]
        val_doc = docindex.get_valdoc(new_name)
        if val_doc is not None:
            log.warning("%s shadows its own value -- using %s instead" %
                     (varname, new_name))
            var_doc.value = val_doc
            return

    # If we couldn't find the actual value, then at least
    # invalidate the canonical name.
    log.warning('%s shadows itself' % varname)
    del var_doc.value.canonical_name
Exemplo n.º 14
0
def get_docstring(value, module_name=None):
    """
    Return the docstring for the given value; or C{None} if it
    does not have a docstring.
    @rtype: C{unicode}
    """
    docstring = getattr(value, '__doc__', None)
    if docstring is None:
        return None
    elif isinstance(docstring, unicode):
        return docstring
    elif isinstance(docstring, str):
        try:
            return unicode(docstring, 'ascii')
        except UnicodeDecodeError:
            if module_name is None:
                module_name = get_containing_module(value)
            if module_name is not None:
                try:
                    module = get_value_from_name(module_name)
                    filename = py_src_filename(module.__file__)
                    encoding = epydoc.docparser.get_module_encoding(filename)
                    return unicode(docstring, encoding)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass
            if hasattr(value, '__name__'):
                name = value.__name__
            else:
                name = repr(value)
            log.warning("%s's docstring is not a unicode string, but it "
                        "contains non-ascii data -- treating it as "
                        "latin-1." % name)
            return unicode(docstring, 'latin-1')
        return None
    elif value is BuiltinMethodType:
        # Don't issue a warning for this special case.
        return None
    else:
        if hasattr(value, '__name__'):
            name = value.__name__
        else:
            name = repr(value)
        log.warning("%s's docstring is not a string -- ignoring it." % name)
        return None
Exemplo n.º 15
0
def document(options, cancel, done):
    """
    Create the documentation for C{modules}, using the options
    specified by C{options}.  C{document} is designed to be started in
    its own thread by L{EpydocGUI._go}.

    @param options: The options to use for generating documentation.
        This includes keyword options that can be given to
        L{docwriter.html.HTMLWriter}, as well as the option C{target}, which
        controls where the output is written to.
    @type options: C{dictionary}
    """
    from epydoc.docwriter.html import HTMLWriter
    from epydoc.docbuilder import build_doc_index
    import epydoc.docstringparser

    # Set the default docformat.
    docformat = options.get("docformat", "epytext")
    epydoc.docstringparser.DEFAULT_DOCFORMAT = docformat

    try:
        parse = options["introspect_or_parse"] in ("parse", "both")
        introspect = options["introspect_or_parse"] in ("introspect", "both")
        docindex = build_doc_index(options["modules"], parse, introspect)
        html_writer = HTMLWriter(docindex, **options)
        log.start_progress("Writing HTML docs to %r" % options["target"])
        html_writer.write(options["target"])
        log.end_progress()

        # We're done.
        log.warning("Finished!")
        done[0] = "done"

    except SystemExit:
        # Cancel.
        log.error("Cancelled!")
        done[0] = "cancel"
        raise
    except Exception, e:
        # We failed.
        log.error("Internal error: %s" % e)
        done[0] = "cancel"
        raise
Exemplo n.º 16
0
def is_future_feature(object):
    """
    Return True if C{object} results from a C{from __future__ import feature}
    statement.
    """
    # Guard from unexpected implementation changes of the __future__ module.
    global __future_check_works
    if __future_check_works is not None:
        if __future_check_works:
            import __future__
            return isinstance(object, __future__._Feature)
        else:
            return False
    else:
        __future_check_works = True
        try:
            return is_future_feature(object)
        except:
            __future_check_works = False
            log.warning("Troubles inspecting __future__. Python implementation"
                        " may have been changed.")
            return False
def is_future_feature(object):
    """
    Return True if C{object} results from a C{from __future__ import feature}
    statement.
    """
    # Guard from unexpected implementation changes of the __future__ module.
    global __future_check_works
    if __future_check_works is not None:
        if __future_check_works:
            import __future__
            return isinstance(object, __future__._Feature)
        else:
            return False
    else:
        __future_check_works = True
        try:
            return is_future_feature(object)
        except:
            __future_check_works = False
            log.warning("Troubles inspecting __future__. Python implementation"
                        " may have been changed.")
            return False
def introspect_docstring_lineno(api_doc):
    """
    Try to determine the line number on which the given item's
    docstring begins.  Return the line number, or C{None} if the line
    number can't be determined.  The line number of the first line in
    the file is 1.
    """
    if api_doc.docstring_lineno is not UNKNOWN:
        return api_doc.docstring_lineno
    if isinstance(api_doc, ValueDoc) and api_doc.pyval is not UNKNOWN:
        try:
            lines, lineno = inspect.findsource(api_doc.pyval)
            if not isinstance(api_doc, ModuleDoc): lineno += 1
            for lineno in range(lineno, len(lines)):
                if lines[lineno].split('#', 1)[0].strip():
                    api_doc.docstring_lineno = lineno + 1
                    return lineno + 1
        except IOError: pass
        except TypeError: pass
        except IndexError:
            log.warning('inspect.findsource(%s) raised IndexError'
                        % api_doc.canonical_name)
    return None
Exemplo n.º 19
0
    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.pos = 0
        self.cur_line = []
        self.context = []
        self.context_types = []
        self.indents = []
        self.lineno = 1
        self.def_name = None
        self.def_type = None
        self.has_decorators = False

        # Cache, used so we only need to list the target elements once
        # for each variable.
        self.doclink_targets_cache = {}

        # Load the module's text.
        self.text = open(self.module_filename).read()
        self.text = self.text.expandtabs(self.tab_width).rstrip() + '\n'

        # Construct the line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count('\n') + 1
        self.linenum_size = len(repr(num_lines + 1))

        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            output = StringIO()
            self.out = output.write
            tokenize.tokenize(StringIO(self.text).readline, self.tokeneater)
            html = output.getvalue()
            if self.has_decorators:
                html = self._FIX_DECORATOR_RE.sub(r'\2\1', html)
        except tokenize.TokenError as ex:
            html = self.text

        # Check for a unicode encoding declaration.
        m = self.UNICODE_CODING_RE.match(self.text)
        if m: coding = m.group(1)
        else: coding = 'iso-8859-1'

        # Decode the html string into unicode, and then encode it back
        # into ascii, replacing any non-ascii characters with xml
        # character references.
        try:
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except LookupError:
            coding = 'iso-8859-1'
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except UnicodeDecodeError as e:
            log.warning("Unicode error while generating syntax-highlighted "
                        "source code: %s (%s)" % (e, self.module_filename))
            html = html.decode(coding,
                               'ignore').encode('ascii', 'xmlcharrefreplace')

        # Call expandto.
        html += PYSRC_EXPANDTO_JAVASCRIPT

        return html
Exemplo n.º 20
0
        write_latex(docindex, options, options.action)
    elif options.action == 'text':
        write_text(docindex, options)
    elif options.action == 'check':
        check_docs(docindex, options)
    else:
        print >>sys.stderr, '\nUnsupported action %s!' % options.action

    # If we supressed docstring warnings, then let the user know.
    if logger is not None and logger.supressed_docstring_warning:
        if logger.supressed_docstring_warning == 1:
            prefix = '1 markup error was found'
        else:
            prefix = ('%d markup errors were found' %
                      logger.supressed_docstring_warning)
        log.warning("%s while processing docstrings.  Use the verbose "
                    "switch (-v) to display markup errors." % prefix)

    # Basic timing breakdown:
    if options.verbosity >= 2 and logger is not None:
        logger.print_times()

def write_html(docindex, options):
    from epydoc.docwriter.html import HTMLWriter
    html_writer = HTMLWriter(docindex, **options.__dict__)
    if options.verbose > 0:
        log.start_progress('Writing HTML docs to %r' % options.target)
    else:
        log.start_progress('Writing HTML docs')
    html_writer.write(options.target)
    log.end_progress()
def introspect_docs(value=None, name=None, filename=None, context=None,
                    is_script=False, module_name=None):
    """
    Generate the API documentation for a specified object by
    introspecting Python values, and return it as a L{ValueDoc}.  The
    object to generate documentation for may be specified using
    the C{value} parameter, the C{filename} parameter, I{or} the
    C{name} parameter.  (It is an error to specify more than one
    of these three parameters, or to not specify any of them.)

    @param value: The python object that should be documented.
    @param filename: The name of the file that contains the python
        source code for a package, module, or script.  If
        C{filename} is specified, then C{introspect} will return a
        C{ModuleDoc} describing its contents.
    @param name: The fully-qualified python dotted name of any
        value (including packages, modules, classes, and
        functions).  C{DocParser} will automatically figure out
        which module(s) it needs to import in order to find the
        documentation for the specified object.
    @param context: The API documentation for the class of module
        that contains C{value} (if available).
    @param module_name: The name of the module where the value is defined.
        Useful to retrieve the docstring encoding if there is no way to
        detect the module by introspection (such as in properties)
    """
    if value is None and name is not None and filename is None:
        value = get_value_from_name(DottedName(name))
    elif value is None and name is None and filename is not None:
        if is_script:
            value = get_value_from_scriptname(filename)
        else:
            value = get_value_from_filename(filename, context)
    elif name is None and filename is None:
        # it's ok if value is None -- that's a value, after all.
        pass 
    else:
        raise ValueError("Expected exactly one of the following "
                         "arguments: value, name, filename")
    
    pyid = id(value)

    # If we've already introspected this value, then simply return
    # its ValueDoc from our cache.
    if pyid in _introspected_values:
        # If the file is a script, then adjust its name.
        if is_script and filename is not None:
            _valuedoc_cache[pyid].canonical_name = DottedName(
                munge_script_name(str(filename)))
        return _valuedoc_cache[pyid]

    # Create an initial value doc for this value & add it to the cache.
    val_doc = _get_valuedoc(value)

    # Introspect the value.
    _introspected_values[pyid] = True
    introspect_func = _get_introspecter(value)
    introspect_func(value, val_doc, module_name=module_name)

    # Set canonical name, if it was given
    if val_doc.canonical_name is UNKNOWN and name is not None:
        val_doc.canonical_name = DottedName(name)

    # If the file is a script, then adjust its name.
    if is_script and filename is not None:
        val_doc.canonical_name = DottedName(munge_script_name(str(filename)))
        
    if val_doc.canonical_name is UNKNOWN and filename is not None:
        shadowed_name = DottedName(value.__name__)
        log.warning("Module %s is shadowed by a variable with "
                    "the same name." % shadowed_name)
        val_doc.canonical_name = DottedName(str(shadowed_name)+"'")

    return val_doc
Exemplo n.º 22
0
class PythonSourceColorizer:
    """
    A class that renders a python module's source code into HTML
    pages.  These HTML pages are intended to be provided along with
    the API documentation for a module, in case a user wants to learn
    more about a particular object by examining its source code.
    Links are therefore generated from the API documentation to the
    source code pages, and from the source code pages back into the
    API documentation.

    The HTML generated by C{PythonSourceColorizer} has several notable
    features:

      - CSS styles are used to color tokens according to their type.
        (See L{CSS_CLASSES} for a list of the different token types
        that are identified).
        
      - Line numbers are included to the left of each line.

      - The first line of each class and function definition includes
        a link to the API source documentation for that object.

      - The first line of each class and function definition includes
        an anchor that can be used to link directly to that class or
        function.

      - If javascript is enabled, and the page is loaded using the
        anchor for a class or function (i.e., if the url ends in
        C{'#I{<name>}'}), then that class or function will automatically
        be highlighted; and all other classes and function definition
        blocks will be 'collapsed'.  These collapsed blocks can be
        expanded by clicking on them.

      - Unicode input is supported (including automatic detection
        of C{'coding:'} declarations).

    """
    #: A look-up table that is used to determine which CSS class
    #: should be used to colorize a given token.  The following keys
    #: may be used:
    #:   - Any token name (e.g., C{'STRING'})
    #:   - Any operator token (e.g., C{'='} or C{'@'}).
    #:   - C{'KEYWORD'} -- Python keywords such as C{'for'} and C{'if'}
    #:   - C{'DEFNAME'} -- the name of a class or function at the top
    #:     of its definition statement.
    #:   - C{'BASECLASS'} -- names of base classes at the top of a class
    #:     definition statement.
    #:   - C{'PARAM'} -- function parameters
    #:   - C{'DOCSTRING'} -- docstrings
    #:   - C{'DECORATOR'} -- decorator names
    #: If no CSS class can be found for a given token, then it won't
    #: be marked with any CSS class.
    CSS_CLASSES = {
        'NUMBER': 'py-number',
        'STRING': 'py-string',
        'COMMENT': 'py-comment',
        'NAME': 'py-name',
        'KEYWORD': 'py-keyword',
        'DEFNAME': 'py-def-name',
        'BASECLASS': 'py-base-class',
        'PARAM': 'py-param',
        'DOCSTRING': 'py-docstring',
        'DECORATOR': 'py-decorator',
        'OP': 'py-op',
        '@': 'py-decorator',
    }

    #: HTML code for the beginning of a collapsable function or class
    #: definition block.  The block contains two <div>...</div>
    #: elements -- a collapsed version and an expanded version -- and
    #: only one of these elements is visible at any given time.  By
    #: default, all definition blocks are expanded.
    #:
    #: This string should be interpolated with the following values::
    #:   (name, indentation, name)
    #: Where C{name} is the anchor name for the function or class; and
    #: indentation is a string of whitespace used to indent the
    #: ellipsis marker in the collapsed version.
    START_DEF_BLOCK = ('<div id="%s-collapsed" style="display:none;" '
                       'pad="%s" indent="%s"></div>'
                       '<div id="%s-expanded">')

    #: HTML code for the end of a collapsable function or class
    #: definition block.
    END_DEF_BLOCK = '</div>'

    #: A regular expression used to pick out the unicode encoding for
    #: the source file.
    UNICODE_CODING_RE = re.compile(r'.*?\n?.*?coding[:=]\s*([-\w.]+)')

    #: A configuration constant, used to determine whether or not to add
    #: collapsable <div> elements for definition blocks.
    ADD_DEF_BLOCKS = True

    #: A configuration constant, used to determine whether or not to
    #: add line numbers.
    ADD_LINE_NUMBERS = True

    #: A configuration constant, used to determine whether or not to
    #: add tooltips for linked names.
    ADD_TOOLTIPS = True

    #: If true, then try to guess which target is appropriate for
    #: linked names; if false, then always open a div asking the
    #: user which one they want.
    GUESS_LINK_TARGETS = False

    def __init__(self,
                 module_filename,
                 module_name,
                 docindex=None,
                 url_func=None,
                 name_to_docs=None,
                 tab_width=8):
        """
        Create a new HTML colorizer for the specified module.

        @param module_filename: The name of the file containing the
            module; its text will be loaded from this file.
        @param module_name: The dotted name of the module; this will
            be used to create links back into the API source
            documentation.
        """
        # Get the source version, if possible.
        try:
            module_filename = py_src_filename(module_filename)
        except:
            pass

        #: The filename of the module we're colorizing.
        self.module_filename = module_filename

        #: The dotted name of the module we're colorizing.
        self.module_name = module_name

        #: A docindex, used to create href links from identifiers to
        #: the API documentation for their values.
        self.docindex = docindex

        #: A mapping from short names to lists of ValueDoc, used to
        #: decide which values an identifier might map to when creating
        #: href links from identifiers to the API docs for their values.
        self.name_to_docs = name_to_docs

        #: A function that maps APIDoc -> URL, used to create href
        #: links from identifiers to the API documentation for their
        #: values.
        self.url_func = url_func

        #: The index in C{text} of the last character of the last
        #: token we've processed.
        self.pos = 0

        #: A list that maps line numbers to character offsets in
        #: C{text}.  In particular, line C{M{i}} begins at character
        #: C{line_offset[i]} in C{text}.  Since line numbers begin at
        #: 1, the first element of C{line_offsets} is C{None}.
        self.line_offsets = []

        #: A list of C{(toktype, toktext)} for all tokens on the
        #: logical line that we are currently processing.  Once a
        #: complete line of tokens has been collected in C{cur_line},
        #: it is sent to L{handle_line} for processing.
        self.cur_line = []

        #: A list of the names of the class or functions that include
        #: the current block.  C{context} has one element for each
        #: level of indentation; C{context[i]} is the name of the class
        #: or function defined by the C{i}th level of indentation, or
        #: C{None} if that level of indentation doesn't correspond to a
        #: class or function definition.
        self.context = []

        #: A list, corresponding one-to-one with L{self.context},
        #: indicating the type of each entry.  Each element of
        #: C{context_types} is one of: C{'func'}, C{'class'}, C{None}.
        self.context_types = []

        #: A list of indentation strings for each of the current
        #: block's indents.  I.e., the current total indentation can
        #: be found by taking C{''.join(self.indents)}.
        self.indents = []

        #: The line number of the line we're currently processing.
        self.lineno = 0

        #: The name of the class or function whose definition started
        #: on the previous logical line, or C{None} if the previous
        #: logical line was not a class or function definition.
        self.def_name = None

        #: The type of the class or function whose definition started
        #: on the previous logical line, or C{None} if the previous
        #: logical line was not a class or function definition.
        #: Can be C{'func'}, C{'class'}, C{None}.
        self.def_type = None

        #: The number of spaces to replace each tab in source code with
        self.tab_width = tab_width

    def find_line_offsets(self):
        """
        Construct the L{line_offsets} table from C{self.text}.
        """
        # line 0 doesn't exist; line 1 starts at char offset 0.
        self.line_offsets = [None, 0]
        # Find all newlines in `text`, and add an entry to
        # line_offsets for each one.
        pos = self.text.find('\n')
        while pos != -1:
            self.line_offsets.append(pos + 1)
            pos = self.text.find('\n', pos + 1)
        # Add a final entry, marking the end of the string.
        self.line_offsets.append(len(self.text))

    def lineno_to_html(self):
        template = '%%%ds' % self.linenum_size
        n = template % self.lineno
        return '<a name="L%s"></a><tt class="py-lineno">%s</tt>' \
            % (self.lineno, n)

    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.pos = 0
        self.cur_line = []
        self.context = []
        self.context_types = []
        self.indents = []
        self.lineno = 1
        self.def_name = None
        self.def_type = None
        self.has_decorators = False

        # Cache, used so we only need to list the target elements once
        # for each variable.
        self.doclink_targets_cache = {}

        # Load the module's text.
        self.text = open(self.module_filename).read()
        self.text = self.text.expandtabs(self.tab_width).rstrip() + '\n'

        # Construct the line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count('\n') + 1
        self.linenum_size = len( ` num_lines + 1 `)

        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            output = StringIO()
            self.out = output.write
            tokenize.tokenize(StringIO(self.text).readline, self.tokeneater)
            html = output.getvalue()
            if self.has_decorators:
                html = self._FIX_DECORATOR_RE.sub(r'\2\1', html)
        except tokenize.TokenError, ex:
            html = self.text

        # Check for a unicode encoding declaration.
        m = self.UNICODE_CODING_RE.match(self.text)
        if m: coding = m.group(1)
        else: coding = 'iso-8859-1'

        # Decode the html string into unicode, and then encode it back
        # into ascii, replacing any non-ascii characters with xml
        # character references.
        try:
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except LookupError:
            coding = 'iso-8859-1'
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except UnicodeDecodeError, e:
            log.warning("Unicode error while generating syntax-highlighted "
                        "source code: %s (%s)" % (e, self.module_filename))
            html = html.decode(coding,
                               'ignore').encode('ascii', 'xmlcharrefreplace')
def introspect_class(cls, class_doc, module_name=None):
    """
    Add API documentation information about the class C{cls}
    to C{class_doc}.
    """
    class_doc.specialize_to(ClassDoc)

    # Record the class's docstring.
    class_doc.docstring = get_docstring(cls)

    # Record the class's __all__ attribute (public names).
    public_names = None
    if hasattr(cls, '__all__'):
        try:
            public_names = set([str(name) for name in cls.__all__])
        except KeyboardInterrupt: raise
        except: pass

    # Start a list of subclasses.
    class_doc.subclasses = []

    # Sometimes users will define a __metaclass__ that copies all
    # class attributes from bases directly into the derived class's
    # __dict__ when the class is created.  (This saves the lookup time
    # needed to search the base tree for an attribute.)  But for the
    # docs, we only want to list these copied attributes in the
    # parent.  So only add an attribute if it is not identical to an
    # attribute of a base class.  (Unfortunately, this can sometimes
    # cause an attribute to look like it was inherited, even though it
    # wasn't, if it happens to have the exact same value as the
    # corresponding base's attribute.)  An example of a case where
    # this helps is PyQt -- subclasses of QWidget get about 300
    # methods injected into them.
    base_children = {}
    
    # Record the class's base classes; and add the class to its
    # base class's subclass lists.
    if hasattr(cls, '__bases__'):
        try: bases = list(cls.__bases__)
        except:
            bases = None
            log.warning("Class '%s' defines __bases__, but it does not "
                        "contain an iterable; ignoring base list."
                        % getattr(cls, '__name__', '??'))
        if bases is not None:
            class_doc.bases = []
            for base in bases:
                basedoc = introspect_docs(base)
                class_doc.bases.append(basedoc)
                basedoc.subclasses.append(class_doc)
            
            bases.reverse()
            for base in bases:
                if hasattr(base, '__dict__'):
                    base_children.update(base.__dict__)

    # The module name is not defined if the class is being introspected
    # as another class base.
    if module_name is None and class_doc.defining_module not in (None, UNKNOWN):
        module_name = class_doc.defining_module.canonical_name
        
    # Record the class's local variables.
    class_doc.variables = {}
    if hasattr(cls, '__dict__'):
        private_prefix = '_%s__' % getattr(cls, '__name__', '<none>')
        for child_name, child in cls.__dict__.items():
            if (child_name in base_children
                and base_children[child_name] == child):
                continue

            if child_name.startswith(private_prefix):
                child_name = child_name[len(private_prefix)-2:]
            if child_name in UNDOCUMENTED_CLASS_VARS: continue
            val_doc = introspect_docs(child, context=class_doc,
                                      module_name=module_name)
            var_doc = VariableDoc(name=child_name, value=val_doc,
                                  container=class_doc,
                                  docs_extracted_by='introspecter')
            if public_names is not None:
                var_doc.is_public = (child_name in public_names)
            class_doc.variables[child_name] = var_doc

    return class_doc
Exemplo n.º 24
0
def introspect_class(cls, class_doc, module_name=None):
    """
    Add API documentation information about the class C{cls}
    to C{class_doc}.
    """
    class_doc.specialize_to(ClassDoc)

    # Record the class's docstring.
    class_doc.docstring = get_docstring(cls)

    # Record the class's __all__ attribute (public names).
    public_names = None
    if hasattr(cls, '__all__'):
        try:
            public_names = set([str(name) for name in cls.__all__])
        except KeyboardInterrupt: raise
        except: pass

    # Record the class's metaclass
    if type(cls) is not type(DummyClass):
        class_doc.metaclass = introspect_docs(type(cls))

    # Start a list of subclasses.
    class_doc.subclasses = []

    # Sometimes users will define a __metaclass__ that copies all
    # class attributes from bases directly into the derived class's
    # __dict__ when the class is created.  (This saves the lookup time
    # needed to search the base tree for an attribute.)  But for the
    # docs, we only want to list these copied attributes in the
    # parent.  So only add an attribute if it is not identical to an
    # attribute of a base class.  (Unfortunately, this can sometimes
    # cause an attribute to look like it was inherited, even though it
    # wasn't, if it happens to have the exact same value as the
    # corresponding base's attribute.)  An example of a case where
    # this helps is PyQt -- subclasses of QWidget get about 300
    # methods injected into them.
    base_children = {}
    
    # Record the class's base classes; and add the class to its
    # base class's subclass lists.
    if hasattr(cls, '__bases__'):
        try: bases = list(cls.__bases__)
        except:
            bases = None
            log.warning("Class '%s' defines __bases__, but it does not "
                        "contain an iterable; ignoring base list."
                        % getattr(cls, '__name__', '??'))
        if bases is not None:
            class_doc.bases = []
            for base in bases:
                basedoc = introspect_docs(base)
                class_doc.bases.append(basedoc)
                basedoc.subclasses.append(class_doc)
            
            bases.reverse()
            for base in bases:
                if hasattr(base, '__dict__'):
                    base_children.update(base.__dict__)

    # The module name is not defined if the class is being introspected
    # as another class base.
    if module_name is None and class_doc.defining_module not in (None, UNKNOWN):
        module_name = class_doc.defining_module.canonical_name
        
    # Record the class's local variables.
    class_doc.variables = {}
    if hasattr(cls, '__dict__'):
        private_prefix = '_%s__' % getattr(cls, '__name__', '<none>')
        for child_name, child in cls.__dict__.items():
            if (child_name in base_children
                and base_children[child_name] == child):
                continue

            if child_name.startswith(private_prefix):
                child_name = child_name[len(private_prefix)-2:]
            if child_name in UNDOCUMENTED_CLASS_VARS: continue
            val_doc = introspect_docs(child, context=class_doc,
                                      module_name=module_name)
            var_doc = VariableDoc(name=child_name, value=val_doc,
                                  container=class_doc,
                                  docs_extracted_by='introspecter')
            if public_names is not None:
                var_doc.is_public = (child_name in public_names)
            class_doc.variables[child_name] = var_doc

    return class_doc
Exemplo n.º 25
0
    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.pos = 0
        self.cur_line = []
        self.context = []
        self.context_types = []
        self.indents = []
        self.lineno = 1
        self.def_name = None
        self.def_type = None
        self.has_decorators = False

        # Cache, used so we only need to list the target elements once
        # for each variable.
        self.doclink_targets_cache = {}

        # Load the module's text.
        self.text = open(self.module_filename).read()
        self.text = self.text.expandtabs(self.tab_width).rstrip()+'\n'

        # Construct the line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count('\n')+1
        self.linenum_size = len(repr(num_lines+1))
        
        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            output = StringIO()
            self.out = output.write
            tokenize.tokenize(StringIO(self.text).readline, self.tokeneater)
            html = output.getvalue()
            if self.has_decorators:
                html = self._FIX_DECORATOR_RE.sub(r'\2\1', html)
        except tokenize.TokenError as ex:
            html = self.text

        # Check for a unicode encoding declaration.
        m = self.UNICODE_CODING_RE.match(self.text)
        if m: coding = m.group(1)
        else: coding = 'iso-8859-1'

        # Decode the html string into unicode, and then encode it back
        # into ascii, replacing any non-ascii characters with xml
        # character references.
        try:
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except LookupError:
            coding = 'iso-8859-1'
            html = html.decode(coding).encode('ascii', 'xmlcharrefreplace')
        except UnicodeDecodeError as e:
            log.warning("Unicode error while generating syntax-highlighted "
                        "source code: %s (%s)" % (e, self.module_filename))
            html = html.decode(coding, 'ignore').encode(
                'ascii', 'xmlcharrefreplace')
            

        # Call expandto.
        html += PYSRC_EXPANDTO_JAVASCRIPT

        return html
Exemplo n.º 26
0
def introspect_docs(value=None, name=None, filename=None, context=None,
                    is_script=False, module_name=None):
    """
    Generate the API documentation for a specified object by
    introspecting Python values, and return it as a L{ValueDoc}.  The
    object to generate documentation for may be specified using
    the C{value} parameter, the C{filename} parameter, I{or} the
    C{name} parameter.  (It is an error to specify more than one
    of these three parameters, or to not specify any of them.)

    @param value: The python object that should be documented.
    @param filename: The name of the file that contains the python
        source code for a package, module, or script.  If
        C{filename} is specified, then C{introspect} will return a
        C{ModuleDoc} describing its contents.
    @param name: The fully-qualified python dotted name of any
        value (including packages, modules, classes, and
        functions).  C{DocParser} will automatically figure out
        which module(s) it needs to import in order to find the
        documentation for the specified object.
    @param context: The API documentation for the class of module
        that contains C{value} (if available).
    @param module_name: The name of the module where the value is defined.
        Useful to retrieve the docstring encoding if there is no way to
        detect the module by introspection (such as in properties)
    """
    if value is None and name is not None and filename is None:
        value = get_value_from_name(DottedName(name))
    elif value is None and name is None and filename is not None:
        if is_script:
            value = get_value_from_scriptname(filename)
        else:
            value = get_value_from_filename(filename, context)
    elif name is None and filename is None:
        # it's ok if value is None -- that's a value, after all.
        pass 
    else:
        raise ValueError("Expected exactly one of the following "
                         "arguments: value, name, filename")
    
    pyid = id(value)

    # If we've already introspected this value, then simply return
    # its ValueDoc from our cache.
    if pyid in _introspected_values:
        # If the file is a script, then adjust its name.
        if is_script and filename is not None:
            _valuedoc_cache[pyid].canonical_name = DottedName(
                munge_script_name(str(filename)))
        return _valuedoc_cache[pyid]

    # Create an initial value doc for this value & add it to the cache.
    val_doc = _get_valuedoc(value)

    # Introspect the value.
    _introspected_values[pyid] = True
    introspect_func = _get_introspecter(value)
    introspect_func(value, val_doc, module_name=module_name)

    # Set canonical name, if it was given
    if val_doc.canonical_name is UNKNOWN and name is not None:
        val_doc.canonical_name = DottedName(name)

    # If the file is a script, then adjust its name.
    if is_script and filename is not None:
        val_doc.canonical_name = DottedName(munge_script_name(str(filename)))
        
    if val_doc.canonical_name is UNKNOWN and filename is not None:
        shadowed_name = DottedName(value.__name__)
        log.warning("Module %s is shadowed by a variable with "
                    "the same name." % shadowed_name)
        val_doc.canonical_name = DottedName(str(shadowed_name)+"'")

    return val_doc
Exemplo n.º 27
0
        write_latex(docindex, options, options.action)
    elif options.action == 'text':
        write_text(docindex, options)
    elif options.action == 'check':
        check_docs(docindex, options)
    else:
        print >> sys.stderr, '\nUnsupported action %s!' % options.action

    # If we supressed docstring warnings, then let the user know.
    if logger is not None and logger.supressed_docstring_warning:
        if logger.supressed_docstring_warning == 1:
            prefix = '1 markup error was found'
        else:
            prefix = ('%d markup errors were found' %
                      logger.supressed_docstring_warning)
        log.warning("%s while processing docstrings.  Use the verbose "
                    "switch (-v) to display markup errors." % prefix)

    # Basic timing breakdown:
    if options.verbosity >= 2 and logger is not None:
        logger.print_times()


def write_html(docindex, options):
    from epydoc.docwriter.html import HTMLWriter
    html_writer = HTMLWriter(docindex, **options.__dict__)
    if options.verbose > 0:
        log.start_progress('Writing HTML docs to %r' % options.target)
    else:
        log.start_progress('Writing HTML docs')
    html_writer.write(options.target)
    log.end_progress()