class Genxref(object): def __init__(self, project_name, project_path, start_path='/'): self.files = Files(project_path) self.filestype = {} self.project_name = project_name self.project_path = project_path self.commit_cnt = 0 self.MAX_COMMIT = 1000 self.start_path = start_path self.sym_filetype = {} self.track_info = {} def main(self): self.session = create_session(self.project_name) self.symid = 1 # Symbol.next_symid() from simpleparse import parses self.parses = {} for k, v in parses.items(): self.parses[k] = v(self.project_name, self.project_path) self.pathname_to_obj = {} self.init_files(self.start_path) t0 = time.time() # ctags 符号 self.symbols(self.start_path) t1 = time.time() # sym ref self.symref(self.start_path) t2 = time.time() self.track_info['t1'] = int(t1 - t0) self.track_info['t2'] = int(t2 - t1) return self.track_info def init_files(self, pathname): _files = [pathname] file_count = 0 line_count = 0 while _files: pathname = _files.pop(0) if self.files.isdir(pathname): dirs, files = self.files.getdir(pathname) for i in dirs + files: _files.append(os.path.join(pathname, i)) else: f = File(pathname) cnt = self.files.getlinecount(pathname) f.filetype = self.files.gettype(pathname) f.linecount = cnt self.session.add(f) file_count += 1 line_count += cnt self.pathname_to_obj[pathname] = f self.session.commit() self.track_info['file_count'] = file_count self.track_info['line_count'] = line_count def symbols(self, pathname): total_commit = 0 _files = [pathname] exist_syms = {} while _files: pathname = _files.pop(0) if self.files.isdir(pathname): dirs, files = self.files.getdir(pathname) for i in dirs + files: _files.append(os.path.join(pathname, i)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_indexed(): logger.info('find tags: %s' % pathname) tags = find_tags(self.files.toreal(pathname), o.filetype) for tag in tags: sym, line, lang_typeid = tag if sym in exist_syms: sym_id = exist_syms[sym] else: symbol_obj = Symbol(sym, self.symid) sym_id = self.symid exist_syms[sym] = sym_id self.symid += 1 self.session.add(symbol_obj) defin = Definitions(sym_id, o.fileid, line, lang_typeid) self.sym_filetype[sym_id] = o.filetype self.session.add(defin) total_commit += 1 if total_commit % 1000 == 0: self.session.commit() o.set_indexed() self.session.add(o) logger.info('find %s tags: %s' % (len(tags), pathname)) self.session.commit() self.track_info['total_symbol'] = total_commit logger.info('finish tags, total = %s' % total_commit) def symref(self, pathname): from dbcache import symbolcache total_commit = 0 _files = [pathname] while _files: pathname = _files.pop(0) if self.files.isdir(pathname): dirs, files = self.files.getdir(pathname) for i in dirs + files: _files.append(os.path.join(pathname, i)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_refered(): with open(self.files.toreal(pathname), encoding="utf8", errors='ignore') as _fp: _buf = _fp.read() words = self.parses[o.filetype].get_idents(_buf) for word, line in words: if o.filetype == 'asm': if word[0] == '_': # 汇编调用C语言函数 _symid = symbolcache.get_symid( self.project_name, word[1:]) else: _symid = symbolcache.get_symid( self.project_name, word) if _symid is None: continue else: _symid = symbolcache.get_symid( self.project_name, word) if _symid is None: continue if not self.files.is_same_filetype( o.filetype, self.sym_filetype.get(_symid)): continue ref = Ref(_symid, o.fileid, line) self.session.add(ref) total_commit += 1 if total_commit % 1000 == 0: self.session.commit() print(total_commit) o.set_refered() self.session.add(o) total_commit += 1 if total_commit % 1000 == 0: self.session.commit() print(total_commit) if total_commit % 10000 == 0: print(total_commit) self.session.commit() self.track_info['total_ref'] = total_commit print()
class Genxref(object): def __init__(self, config, tree): self.files = Files(tree) self.filestype = {} self.tree = tree self.commit_cnt = 0 self.MAX_COMMIT = 1000 self.config = config self.symid = Symbol.next_symid() def main(self, version): self.init_tree() self.init_lang() self.pathname_to_obj = {} self.init_files('/', version) # 建立swish # self.gensearch(version) # ctags 符号 self.symbols('/', version) # sym ref self.symref('/', version) def init_tree(self): self.treeid = treecache.get_treeid(self.tree['name'], tree['version']) if self.treeid is None: self.treeid = Tree.query.get_treeid(tree['name'], tree['version']) assert self.treeid is not None treecache.load() def init_lang(self): self.parses = {} for k, v in parses.iteritems(): self.parses[k] = v(self.config, self.tree) assert LangType.query.get_or_create(k, '') is not None for desc in v.typemap.values(): assert LangType.query.get_or_create(k, desc) is not None print self.parses langcache.load() def init_files(self, pathname, version): _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: f = File(self.treeid, pathname) f.filetype = self.files.gettype(pathname, version) db.session.add(f) self.pathname_to_obj[pathname] = f db.session.commit() filecache.load(self.treeid) def feedswish(self, pathname, version, swish): if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: self.feedswish(os.path.join(pathname, i), version, swish) else: _realfile = self.files.toreal(pathname, version) if _realfile in self.filestype: if self.filestype[_realfile] not in self.parses: return # filelist.write('%s\n' % pathname) if self.files.getsize(pathname, version) > 0: fp = self.files.getfp(pathname, version) content = fp.read() swish_input = [ "Path-Name: %s\n" % pathname, "Content-Length: %s\n" % len(content), "Document-Type: TXT\n", "\n", content] swish.stdin.write(''.join(swish_input)) fp.close() def gensearch(self, version): index_file = "%s.%s.index" % (self.tree['name'], version) index_file = os.path.join(self.config['swishdirbase'], index_file) cmd = '%s -S prog -i stdin -v 1 -c %s -f %s' % ( self.config['swishbin'], self.config['swishconf'], index_file) swish = subprocess.Popen(cmd, stdin=subprocess.PIPE, shell=True) self.feedswish('.', version, swish) out, err = swish.communicate() def symbols(self, pathname, version): total_commit = 0 _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_indexed(): tags = ctags(self.files.toreal(pathname, version), o.filetype) for tag in tags: sym, line, lang_type, ext = tag lang_typeid = langcache.get_typeid(o.filetype, self.parses[o.filetype].typemap[lang_type]) symbol_obj = Symbol(self.treeid, sym, self.symid) defin = Definitions(self.symid, o.fileid, line, lang_typeid) db.session.add(symbol_obj) db.session.add(defin) self.symid += 1 o.set_indexed() db.session.add(o) total_commit += 1 if total_commit % 1000 == 0: print total_commit db.session.commit() db.session.commit() print print symbolcache.load(self.treeid) def symref(self, pathname, version): total_commit = 0 _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_refered(): _fp = open(self.files.toreal(pathname, version)) _buf = _fp.read() _fp.close() words = self.parses[o.filetype].get_idents(_buf) for word, line in words: _symid = symbolcache.get_symid(self.treeid, word) if _symid is None: continue ref = Ref(_symid, o.fileid, line) db.session.add(ref) total_commit += 1 if total_commit % 1000 == 0: db.session.commit() print total_commit o.set_refered() db.session.add(o) total_commit += 1 if total_commit % 1000 == 0: db.session.commit() print total_commit if total_commit % 10000 == 0: print total_commit db.session.commit() print