Пример #1
0
class SimpleParse(object):
    def __init__(self, config, tree):
        self.config = config
        self.tree = tree
        self.files = Files(tree)
        self.treeid = Tree.query.get_treeid(tree['name'], tree['version'])
        self.filename = ''
        self.release_id = ''
        self.buf = []
        self.pos = 0
        self.start = 0
        self.end = 0
        self.maxchar = 0

        self.frags = []

        self.open_re = re.compile(
            "|".join(['(%s)' % i['open'] for i in self.spec]), re.M)

    def parse_file(self, filename, release_id):
        fp = self.files.getfp(filename, release_id)
        buf = fp.read()
        fp.close()
        self.parse(buf, filename, release_id)

    def get_line_html(self, line, width=4):
        line = '%04d' % line
        html = '''<a class='fline' name="%s">%s</a> ''' % (line, line)
        return html

    def _multilinetwist(self, frag, css):
        if css == 'string' or css == 'comment':
            frag = frag.replace("<", "&lt;").replace(">", "&gt;")

        ss = '''<span class="%s">%s</span>''' % (css, frag)
        ss = ss.replace("\n", '</span>\n<span class="%s">' % css)
        ss = ss.replace('<span class="%s"></span>' % css, '')
        return ss

    def get_include_link(self, word, path):
        html = '''<a class='include' href="/lxr/source/%s%s">%s</a>''' % (
            self.tree['name'], path, word)
        return html

    def get_ident_link(self, ident):
        html = '''<a class='fid' href="/lxr/ident/%s?_i=%s">%s</a>''' % (
            self.tree['name'], ident, ident)
        return html

    def get_reserved_link(self, word):
        if self.is_reserved(word):
            return '<span class="reserved">%s</span>' % word
        return word

    def is_ident(self, word):
        rv = symbolcache.get_symid(self.treeid, word)
        if rv is None:
            return False
        return True

    def is_reserved(self, word):
        return word in self.reserved

    def get_idents(self, buf):
        lines = buf.split('\n')
        line_no = 0
        kk = []
        for li in lines:
            line_no += 1
            if not li:
                continue
            ss = self.identdef.split(li)
            for i in ss:
                if not i:
                    continue
                if self.is_reserved(i):
                    continue
                kk.append((i, line_no))
        return kk

    def _parse_code(self, frag):
        raise Exception("Not Impl")

    def _is_package(self, word):
        return word != 'from' and word != 'import' and (
            word[0] == '.' or word[0] in string.letters)

    def parse(self, buf, filename='', release_id=''):
        if filename and release_id:
            self.filename = filename
            self.release_id = release_id

        self.buf = buf
        self.pos = 0
        self.start = 0
        self.end = 0
        self.maxchar = len(buf)

        self.frags = []

        while self.pos < self.maxchar:
            open_match = self.open_re.search(self.buf, self.pos, self.maxchar)
            if open_match:
                left, right = open_match.start(), open_match.end()
                if self.pos < left:
                    frag = self.buf[self.pos:left]
                    self.frags.append(('code', frag))

                match_groups = open_match.groups()
                i = 0
                while i < len(match_groups):
                    if match_groups[i]:
                        break
                    i += 1
                fragtype = self.spec[i]['type']
                close_re = self.spec[i]['close']
                close_left = self.buf.find(close_re, right, self.maxchar)
                # last line without newline
                if close_left < 0:
                    frag = self.buf[left:]
                    self.frags.append((fragtype, frag))
                    break
                close_right = close_left + len(close_re)
                if close_re == '"' or close_re == "'":
                    while find_escape_char(self.buf, close_left - 1,
                                           right - 1):
                        close_left = self.buf.find(close_re, close_right,
                                                   self.maxchar)
                        # ERROR, break
                        if close_left < 0:
                            close_right = self.maxchar
                            print 'ERROR.'
                            break
                        close_right = close_left + len(close_re)

                frag = self.buf[left:close_right]
                self.frags.append((fragtype, frag))
                self.pos = close_right
            else:
                frag = self.buf[self.pos:]
                self.frags.append(('code', frag))
                self.pos = self.maxchar

        _result = ''.join([i[1] for i in self.frags])
        assert _result == buf

    def _parse_include(self, frag):
        raise Exception("Not Impl")

    def out(self):
        head = '<pre class="filecontent">'
        tail = '</pre>'

        htmls = []
        for fragtype, frag in self.frags:
            if fragtype == 'comment':
                htmls.append(self._multilinetwist(frag, fragtype))
            elif fragtype == 'string':
                htmls.append(self._multilinetwist(frag, fragtype))
            elif fragtype == 'include':
                htmls.append(self._parse_include(frag))
            elif fragtype == 'code':
                htmls.append(self._parse_code(frag))
            else:
                htmls.append(self._parse_code(frag))
        htmls = [html.decode("utf8") for html in htmls]
        tt = ''.join(htmls).split("\n")
        while tt and (tt[-1] == '' or tt[-1] == '\n'):
            tt.pop()
        linewidth = max(len(str(len(tt))), 4)

        line = 1
        htmls = [self.get_line_html(line, linewidth)]
        for i in tt:
            htmls.append(i)
            htmls.append('\n')
            line += 1
            htmls.append(self.get_line_html(line, linewidth))
        htmls.insert(0, head)
        htmls.append(tail)
        return ''.join(htmls)
Пример #2
0
class Genxref(object):

    def __init__(self, config, tree):
        self.files = Files(tree)
        self.filestype = {}
        self.tree = tree
        self.commit_cnt = 0
        self.MAX_COMMIT = 1000        
        self.config = config
        self.symid = Symbol.next_symid()


    def main(self, version):
        self.init_tree()
        self.init_lang()
        self.pathname_to_obj = {}
        
        self.init_files('/', version)

        # 建立swish
        # self.gensearch(version)
        # ctags 符号
        self.symbols('/', version)
        # sym ref
        self.symref('/', version)


    def init_tree(self):
        self.treeid = treecache.get_treeid(self.tree['name'], tree['version'])
        if self.treeid is None:
            self.treeid = Tree.query.get_treeid(tree['name'], tree['version'])
            assert self.treeid is not None
            treecache.load()


    def init_lang(self):
        self.parses = {}
        for k, v in parses.iteritems():
            self.parses[k] = v(self.config, self.tree)

            assert LangType.query.get_or_create(k, '') is not None
            for desc in v.typemap.values():
                assert LangType.query.get_or_create(k, desc) is not None
        print self.parses
        langcache.load()
        

    def init_files(self, pathname, version):

        _files = [(pathname, version)]
        
        while _files:
            pathname, version = _files.pop(0)

            if self.files.isdir(pathname, version):            
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                f = File(self.treeid, pathname)
                f.filetype = self.files.gettype(pathname, version)
                db.session.add(f)
                self.pathname_to_obj[pathname] = f
        db.session.commit()
        filecache.load(self.treeid)

        
    def feedswish(self, pathname, version, swish):
        if self.files.isdir(pathname, version):
            dirs, files = self.files.getdir(pathname, version)
            for i in dirs + files:
                self.feedswish(os.path.join(pathname, i),
                               version,
                               swish)
        else:
            _realfile = self.files.toreal(pathname, version)
            if _realfile in self.filestype:
                if self.filestype[_realfile] not in self.parses:
                    return

            # filelist.write('%s\n' % pathname)
            if self.files.getsize(pathname, version) > 0:
                fp = self.files.getfp(pathname, version)
                content = fp.read()
                swish_input = [
                    "Path-Name: %s\n" % pathname,
                    "Content-Length:  %s\n" % len(content),
                    "Document-Type: TXT\n",
                    "\n",
                    content]
            
                swish.stdin.write(''.join(swish_input))
                fp.close()
        
                
    def gensearch(self, version):
        index_file = "%s.%s.index" % (self.tree['name'], version)
        index_file = os.path.join(self.config['swishdirbase'], index_file)
        cmd = '%s -S prog -i stdin -v 1 -c %s -f %s' % (
            self.config['swishbin'],
            self.config['swishconf'],
            index_file)
        swish = subprocess.Popen(cmd, stdin=subprocess.PIPE, shell=True)
        self.feedswish('.', version, swish)
        out, err = swish.communicate()


    def symbols(self, pathname, version):
        total_commit = 0
        _files = [(pathname, version)]
        while _files:
            pathname, version = _files.pop(0)
            if self.files.isdir(pathname, version):
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_indexed():
                    tags = ctags(self.files.toreal(pathname, version), o.filetype)
                    for tag in tags:
                        sym, line, lang_type, ext = tag
                        lang_typeid = langcache.get_typeid(o.filetype, self.parses[o.filetype].typemap[lang_type])
                        symbol_obj = Symbol(self.treeid, sym, self.symid)
                        defin = Definitions(self.symid, o.fileid, line, lang_typeid)
                        db.session.add(symbol_obj)
                        db.session.add(defin)
                        self.symid += 1
                    o.set_indexed()
                    db.session.add(o)
                    total_commit += 1
                    if total_commit % 1000 == 0:
                        print total_commit
                        db.session.commit()
        db.session.commit()
        print 
        print
        symbolcache.load(self.treeid)
        

    def symref(self, pathname, version):
        total_commit = 0
        _files = [(pathname, version)]
        while _files:
            pathname, version = _files.pop(0)
            if self.files.isdir(pathname, version):
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_refered():
                    _fp = open(self.files.toreal(pathname, version))
                    _buf = _fp.read()
                    _fp.close()
                    words = self.parses[o.filetype].get_idents(_buf)
                    for word, line in words:
                        _symid = symbolcache.get_symid(self.treeid, word)
                        if _symid is None:
                            continue
                        ref = Ref(_symid, o.fileid, line)
                        db.session.add(ref)
                        total_commit += 1
                        if total_commit % 1000 == 0:
                            db.session.commit()
                            print total_commit
                    o.set_refered()
                    db.session.add(o)
                    total_commit += 1
                    if total_commit % 1000 == 0:
                        db.session.commit()
                        print total_commit

                    if total_commit % 10000 == 0:
                        print total_commit
        db.session.commit()
        print