def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) \ for a in self.actions] profiler_start("Inserting actions for repository %d", (self.repo_id,)) cursor.executemany(statement(DBAction.__insert__, self.db.place_holder), actions) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id,)) if self.commits: commits = [(c.id, c.rev, c.committer, c.author, c.date, \ to_utf8(c.message).decode("utf-8"), c.composed_rev, \ c.repository_id) for c in self.commits] profiler_start("Inserting commits for repository %d", (self.repo_id,)) cursor.executemany(statement(DBLog.__insert__, self.db.place_holder), commits) self.commits = [] profiler_stop("Inserting commits for repository %d", (self.repo_id,)) profiler_start("Committing inserts for repository %d", (self.repo_id,)) self.cnn.commit() profiler_stop("Committing inserts for repository %d", (self.repo_id,))
def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: profiler_start("Inserting actions for repository %d", (self.repo_id, )) for a in self.actions: action_tuple = (a.id, a.type, a.file_id, a.commit_id, a.branch_id, a.current_file_path) if isinstance(self.db, MysqlDatabase): import MySQLdb try: cursor.execute( statement(DBAction.__insert__, self.db.place_holder), action_tuple) except MySQLdb.IntegrityError, e: if e.args[0] == 1062: # Duplicate entry pass else: cursor.execute( statement(DBAction.__insert__, self.db.place_holder), action_tuple) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id, ))
def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: profiler_start("Inserting actions for repository %d", (self.repo_id,)) for a in self.actions: action_tuple = (a.id, a.type, a.file_id, a.commit_id, a.branch_id, a.current_file_path) if isinstance(self.db, MysqlDatabase): import MySQLdb try: cursor.execute(statement(DBAction.__insert__, self.db.place_holder), action_tuple) except MySQLdb.IntegrityError, e: if e.args[0] == 1062: # Duplicate entry pass else: cursor.execute(statement(DBAction.__insert__, self.db.place_holder), action_tuple) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id,))
def ensure_person(person): profiler_start("Ensuring person %s for repository %d", (person.name, self.repo_id)) printdbg("DBContentHandler: ensure_person %s <%s>", (person.name, person.email)) cursor = self.cursor name = to_utf8(person.name) email = person.email if email is not None: email = to_utf8(email).decode("utf-8") cursor.execute(statement( "SELECT id from people where name = ?", self.db.place_holder), (to_utf8(name).decode("utf-8"),)) rs = cursor.fetchone() if not rs: p = DBPerson(None, person) cursor.execute(statement(DBPerson.__insert__, self.db.place_holder), (p.id, to_utf8(p.name).decode("utf-8"), email)) person_id = p.id else: person_id = rs[0] profiler_stop("Ensuring person %s for repository %d", (person.name, self.repo_id), True) return person_id
def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) for a in self.actions] profiler_start("Inserting actions for repository %d", (self.repo_id,)) cursor.executemany(statement(DBAction.__insert__, self.db.place_holder), actions) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id,)) if self.commits: commits = [ (c.id, c.rev, c.committer, c.author, c.date, c.date_tz, c.author_date, c.author_date_tz, c.message, c.composed_rev, c.repository_id) for c in self.commits] profiler_start("Inserting commits for repository %d", (self.repo_id,)) cursor.executemany(statement(DBLog.__insert__, self.db.place_holder), commits) p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE) for commit in commits: m = p.findall(commit[8]) for bug in m: issue_commit_link=(commit[0], bug) cursor.execute(statement(DBIssueCommitLink.__insert__, self.db.place_holder), issue_commit_link) self.commits = [] profiler_stop("Inserting commits for repository %d", (self.repo_id,)) profiler_start("Committing inserts for repository %d", (self.repo_id,)) self.cnn.commit() profiler_stop("Committing inserts for repository %d", (self.repo_id,))
def repository(self, uri): cursor = self.cursor cursor.execute(statement("SELECT id from repositories where uri = ?", self.db.place_holder), (uri,)) self.repo_id = cursor.fetchone()[0] last_rev = last_commit = None query = "SELECT rev, id from scmlog " + "where id = (select max(id) from scmlog where repository_id = ?)" cursor.execute(statement(query, self.db.place_holder), (self.repo_id,)) rs = cursor.fetchone() if rs is not None: last_rev, last_commit = rs filename = uri.replace("/", "_") self.cache_file = os.path.join(cvsanaly_cache_dir(), filename) # if there's a previous cache file, just use it if os.path.isfile(self.cache_file): self.__load_caches_from_disk() if last_rev is not None: try: commit_id = self.revision_cache[last_rev] except KeyError: msg = ( "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + "Revision %s was not found in the cache file" % (last_rev) + "It's not possible to continue, the cache " + "file should be removed and the database cleaned up" ) raise CacheFileMismatch(msg) if commit_id != last_commit: # Cache and db don't match, removing cache msg = ( "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + "Commit id mismatch for revision %s (File Cache:%d, Database: %d). " % (last_rev, commit_id, last_commit) + "It's not possible to continue, the cache " + "file should be removed and the database cleaned up" ) raise CacheFileMismatch(msg) else: # Database looks empty (or corrupt) and we have # a cache file. We can just remove it and continue # normally self.__init_caches() os.remove(self.cache_file) printout("Database looks empty, removing cache file %s", (self.cache_file,)) elif last_rev is not None: # There are data in the database, # but we don't have a cache file!!! msg = ( "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + "Cache file cannot be found" + "It's not possible to continue, the database " + "should be cleaned up" ) raise CacheFileMismatch(msg)
def repository(self, uri): cursor = self.cursor cursor.execute( statement("SELECT id from repositories where uri = ?", self.db.place_holder), (uri, )) self.repo_id = cursor.fetchone()[0] last_rev = last_commit = None query = "SELECT rev, id from scmlog " + \ "where id = (select max(id) from scmlog where repository_id = ?)" cursor.execute(statement(query, self.db.place_holder), (self.repo_id, )) rs = cursor.fetchone() if rs is not None: last_rev, last_commit = rs filename = uri.replace('/', '_') self.cache_file = os.path.join(cvsanaly_cache_dir(), filename) # if there's a previous cache file, just use it if os.path.isfile(self.cache_file): self.__load_caches_from_disk() if last_rev is not None: try: commit_id = self.revision_cache[last_rev] except KeyError: msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \ "Revision %s was not found in the cache file" % (last_rev) + \ "It's not possible to continue, the cache " + \ "file should be removed and the database cleaned up" raise CacheFileMismatch(msg) if commit_id != last_commit: # Cache and db don't match, removing cache msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \ "Commit id mismatch for revision %s (File Cache:%d, Database: %d). " % ( last_rev, commit_id, last_commit) + \ "It's not possible to continue, the cache " + \ "file should be removed and the database cleaned up" raise CacheFileMismatch(msg) else: # Database looks empty (or corrupt) and we have # a cache file. We can just remove it and continue # normally self.__init_caches() os.remove(self.cache_file) printout("Database looks empty, removing cache file %s", (self.cache_file, )) elif last_rev is not None: # There are data in the database, # but we don't have a cache file!!! msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \ "Cache file cannot be found" + \ "It's not possible to continue, the database " + \ "should be cleaned up" raise CacheFileMismatch(msg)
def __add_new_file_and_link (self, file_name, parent_id, commit_id, file_path): dbfile = DBFile (None, file_name) dbfile.repository_id = self.repo_id self.cursor.execute (statement (DBFile.__insert__, self.db.place_holder), (dbfile.id, dbfile.file_name, dbfile.repository_id)) dblink = DBFileLink (None, parent_id, dbfile.id, file_path) dblink.commit_id = commit_id self.cursor.execute (statement (DBFileLink.__insert__, self.db.place_holder), (dblink.id, dblink.parent, dblink.child, dblink.commit_id, dblink.file_path)) return dbfile.id
def foreach(self, cb, order=None): self.flush() cnn = self.db.connect() if order is None or order == ContentHandler.ORDER_REVISION: query = "SELECT object from _temp_log order by id desc" else: query = "SELECT object from _temp_log order by date asc" # We need to split the query to save memory icursor = ICursor(cnn.cursor(), self.INTERVAL_SIZE) icursor.execute(statement(query, self.db.place_holder)) rs = icursor.fetchmany() while rs: for t in rs: obj = t[0] io = BytesIO(obj) commit = load(io) io.close() cb(commit) rs = icursor.fetchmany() icursor.close() cnn.close()
def ensure_tag(tag): profiler_start("Ensuring tag %s for repository %d", (tag, self.repo_id)) printdbg("DBContentHandler: ensure_tag %s", (tag,)) cursor = self.cursor cursor.execute(statement("SELECT id from tags where name = ?", self.db.place_holder), (tag,)) rs = cursor.fetchone() if not rs: t = DBTag(None, tag) cursor.execute(statement(DBTag.__insert__, self.db.place_holder), (t.id, t.name)) tag_id = t.id else: tag_id = rs[0] profiler_stop("Ensuring tag %s for repository %d", (tag, self.repo_id), True) return tag_id
def ensure_branch(branch): profiler_start("Ensuring branch %s for repository %d", (branch, self.repo_id)) printdbg("DBContentHandler: ensure_branch %s", (branch,)) cursor = self.cursor cursor.execute(statement("SELECT id from branches where name = ?", self.db.place_holder), (branch,)) rs = cursor.fetchone() if not rs: b = DBBranch(None, branch) cursor.execute(statement(DBBranch.__insert__, self.db.place_holder), (b.id, b.name)) branch_id = b.id else: branch_id = rs[0] profiler_stop("Ensuring branch %s for repository %d", (branch, self.repo_id), True) return branch_id
def __writer(self, queue): cnn = self.db.connect() cursor = cnn.cursor() commits = [] n_commits = 0 while True: commit = queue.get() if not isinstance(commit, Commit): queue.done() break io = StringIO() dump(commit, io, -1) obj = io.getvalue() io.close() commits.append( (commit.revision, commit.date, self.db.to_binary(obj))) n_commits += 1 del commit if n_commits == 50: cursor.executemany( statement( "INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits commits = [] n_commits = 0 queue.done() if commits: cursor.executemany( statement( "INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits cursor.close() cnn.close()
def __writer(self, queue): cnn = self.db.connect() cursor = cnn.cursor() commits = [] n_commits = 0 while True: commit = queue.get() # If we receive a string, assume it's a kill # signal and end if isinstance(commit, str): queue.done() break io = BytesIO() dump(commit, io, -1) obj = io.getvalue() io.close() commits.append((commit.revision, commit.commit_date, self.db.to_binary(obj))) n_commits += 1 del commit if n_commits == 50: cursor.executemany(statement("INSERT into _temp_log " + \ "(rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits commits = [] n_commits = 0 queue.done() if commits: cursor.executemany(statement("INSERT into _temp_log " + \ "(rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits cursor.close() cnn.close()
def __add_new_copy(self, dbfilecopy): self.cursor.execute(statement(DBFileCopy.__insert__, self.db.place_holder), (dbfilecopy.id, dbfilecopy.to_id, dbfilecopy.from_id, dbfilecopy.from_commit, dbfilecopy.new_file_name, dbfilecopy.action_id))
def ensure_tag (tag): profiler_start ("Ensuring tag %s for repository %d", (tag, self.repo_id)) printdbg ("DBContentHandler: ensure_tag %s", (tag,)) cursor = self.cursor cursor.execute (statement ("SELECT id from tags where name = ?", self.db.place_holder), (tag,)) rs = cursor.fetchone () if not rs: t = DBTag (None, tag) cursor.execute (statement (DBTag.__insert__, self.db.place_holder), (t.id, t.name)) tag_id = t.id else: tag_id = rs[0] profiler_stop ("Ensuring tag %s for repository %d", (tag, self.repo_id), True) return tag_id
def __writer(self, queue): cnn = self.db.connect() cursor = cnn.cursor() commits = [] n_commits = 0 while True: commit = queue.get() if not isinstance(commit, Commit): queue.done() break io = StringIO() dump(commit, io, -1) obj = io.getvalue() io.close() commits.append((commit.revision, commit.date, self.db.to_binary(obj))) n_commits += 1 del commit if n_commits == 50: cursor.executemany( statement("INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits commits = [] n_commits = 0 queue.done() if commits: cursor.executemany( statement("INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder), commits) cnn.commit() del commits cursor.close() cnn.close()
def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) for a in self.actions] profiler_start("Inserting actions for repository %d", (self.repo_id, )) cursor.executemany( statement(DBAction.__insert__, self.db.place_holder), actions) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id, )) if self.commits: commits = [(c.id, c.rev, c.committer, c.author, c.date, c.date_tz, c.author_date, c.author_date_tz, c.message, c.composed_rev, c.repository_id) for c in self.commits] profiler_start("Inserting commits for repository %d", (self.repo_id, )) cursor.executemany( statement(DBLog.__insert__, self.db.place_holder), commits) p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE) for commit in commits: m = p.findall(commit[8]) for bug in m: issue_commit_link = (commit[0], bug) cursor.execute( statement(DBIssueCommitLink.__insert__, self.db.place_holder), issue_commit_link) self.commits = [] profiler_stop("Inserting commits for repository %d", (self.repo_id, )) profiler_start("Committing inserts for repository %d", (self.repo_id, )) self.cnn.commit() profiler_stop("Committing inserts for repository %d", (self.repo_id, ))
def __add_file_path(self, commit_id, file_id, path): """Add the latest full path of a given file_id and commit_id to the table file_paths.""" try: file_path = path.split("://", 1)[1] except IndexError: file_path = path db_file_path = DBFilePath(None, commit_id, file_id, file_path) self.cursor.execute(statement(DBFilePath.__insert__, self.db.place_holder), (db_file_path.id, db_file_path.commit_id, db_file_path.file_id, db_file_path.file_path))
def __action_rename(self, path, prefix, log, action, dbaction): """Process a renamed file""" new_parent_path = os.path.dirname(path) new_file_name = os.path.basename(path) from_commit_id = self.revision_cache.get(action.rev, None) if action.branch_f2: branch_f2_id = self.__get_branch(action.branch_f2) old_path = "%d://%s" % (branch_f2_id, action.f2) else: old_path = prefix + action.f2 file_id, parent_id = self.__get_file_for_path(old_path, from_commit_id, True) dbfilecopy = DBFileCopy(None, file_id) dbfilecopy.action_id = dbaction.id dbfilecopy.from_commit = from_commit_id if not new_parent_path or new_parent_path == prefix.strip('/'): new_parent_id = -1 else: new_parent_id = self.__get_file_for_path(new_parent_path, log.id)[0] if new_parent_id != parent_id: # It's not a simple rename, but a move operation # we have to write down the new link parent_id = new_parent_id dblink = DBFileLink(None, parent_id, file_id) dblink.commit_id = log.id self.cursor.execute(statement(DBFileLink.__insert__, self.db.place_holder), (dblink.id, dblink.parent, dblink.child, dblink.commit_id)) self.moves_cache[path] = old_path self.file_cache[path] = (file_id, parent_id) # Move/rename is a special case of copy. # There's not a # new file_id dbfilecopy.from_id = file_id dbfilecopy.new_file_name = new_file_name self.__add_new_copy(dbfilecopy) # Save also file_path self.__add_file_path(log.id, file_id, path) return file_id
def do_delete(self, delete_statement, params=None, error_message="Delete failed, data needs manual cleanup"): if self.repo_id is None: # Repo wasn't found anyway, so continue return True # You can't reference instance variables in default # parameters, so I have to do this. if params is None: params = (self.repo_id,) try: delete_cursor = self.connection.cursor() execute_statement(statement(delete_statement, self.db.place_holder), params, delete_cursor, self.db, error_message) except Exception: printdbg("Deletion exception") finally: delete_cursor.close()
printdbg("Tables not created, database already exists") db_exists = True except DatabaseException, e: printerr("Database error: %s", (e.message, )) return 1 if config.no_parse and not db_exists: printerr("The option --no-parse must be used with an already " + \ "filled database") return 1 # Add repository to Database if db_exists: printdbg("Database exists, so looking for existing repository") cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (uri, )) rep = cursor.fetchone() initialize_ids(db, cursor) cursor.close() if config.no_parse and rep is None: printerr("The option --no-parse must be used with an already " + \ "filled database") return 1 if not db_exists or rep is None: # We consider the name of the repo as the last item of the root path name = uri.rstrip("/").split("/")[-1].strip() cursor = cnn.cursor() rep = DBRepository(None, uri, name, repo.get_type()) cursor.execute(statement(DBRepository.__insert__, db.place_holder),
def commit(self, commit): if commit.revision in self.revision_cache: return profiler_start("New commit %s for repository %d", (commit.revision, self.repo_id)) log = DBLog(None, commit) log.repository_id = self.repo_id self.revision_cache[commit.revision] = log.id log.committer = self.__get_person(commit.committer) if commit.author == commit.committer: log.author = log.committer elif commit.author is not None: log.author = self.__get_person(commit.author) self.commits.append(log) printdbg("DBContentHandler: commit: %d rev: %s", (log.id, log.rev)) # TODO: sort actions? R, A, D, M, V, C for action in commit.actions: printdbg("DBContentHandler: Action: %s", (action.type,)) dbaction = DBAction(None, action.type) dbaction.commit_id = log.id branch = commit.branch or action.branch_f1 branch_id = self.__get_branch(branch) dbaction.branch_id = branch_id prefix = "%d://" % (branch_id) path = prefix + action.f1 if action.type == 'A': # A file has been added file_id = self.__action_add(path, prefix, log) elif action.type == 'M': # A file has been modified file_id = self.__get_file_for_path(path, log.id)[0] elif action.type == 'D': # A file has been deleted file_id = self.__action_delete(path, log) elif action.type == 'V': # A file has been renamed file_id = self.__action_rename(path, prefix, log, action, dbaction) elif action.type == 'C': # A file has been copied file_id = self.__action_copy(path, prefix, log, action, dbaction) elif action.type == 'R': # A file has been replaced file_id = self.__action_replace(path, prefix, log, action, dbaction) if file_id is None: continue else: assert "Unknown action type %s" % (action.type) dbaction.file_id = file_id self.actions.append(dbaction) # Tags if commit.tags is not None: tag_revs = [] for tag in commit.tags: tag_id = self.__get_tag(tag) db_tagrev = DBTagRev(None) tag_revs.append((db_tagrev.id, tag_id, log.id)) self.cursor.executemany(statement(DBTagRev.__insert__, self.db.place_holder), tag_revs) if len(self.actions) >= self.MAX_ACTIONS: printdbg("DBContentHandler: %d actions inserting", (len(self.actions),)) self.__insert_many() profiler_stop("New commit %s for repository %d", (commit.revision, self.repo_id), True)
from io import BytesIO from Database import create_database, ICursor uri = "http://svn.test-cvsanaly.org/svn/test" db = create_database('mysql', 'dbcontenthandler', sys.argv[1], None, 'localhost') cnn = db.connect() tables = ['actions', 'branches', 'file_copies', 'file_links', 'files', 'people', 'repositories', 'scmlog', 'tag_revisions', 'tags'] cursor = cnn.cursor() for table in tables: query = "delete from %s" % (table) cursor.execute(statement(query, db.place_holder)) cursor.close() cnn.commit() name = uri.rstrip("/").split("/")[-1].strip() cursor = cnn.cursor() rep = DBRepository(None, uri, name, 'svn') cursor.execute(statement(DBRepository.__insert__, db.place_holder), (rep.id, rep.uri, rep.name, rep.type)) cursor.close() cnn.commit() ch = DBContentHandler(db) ch.begin() ch.repository(uri)
except TableAlreadyExists: printdbg("Tables not created, database already exists") db_exists = True except DatabaseException, e: printerr("Database error: %s", (e.message,)) return 1 if config.no_parse and not db_exists: printerr("The option --no-parse must be used with an already " + \ "filled database") return 1 # Add repository to Database if db_exists: printdbg("Database exists, so looking for existing repository") cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (uri,)) rep = cursor.fetchone() initialize_ids(db, cursor) cursor.close() if config.no_parse and rep is None: printerr("The option --no-parse must be used with an already " + \ "filled database") return 1 if not db_exists or rep is None: # We consider the name of the repo as the last item of the root path name = uri.rstrip("/").split("/")[-1].strip() cursor = cnn.cursor() rep = DBRepository(None, uri, name, repo.get_type()) cursor.execute(statement(DBRepository.__insert__, db.place_holder),
from cStringIO import StringIO from cPickle import dump, load from Database import create_database, DBRepository, ICursor uri = "http://svn.test-cvsanaly.org/svn/test" db = create_database('mysql', 'dbcontenthandler', sys.argv[1], None, 'localhost') cnn = db.connect() tables = ['actions', 'branches', 'file_copies', 'file_links', 'files', 'people', 'repositories', 'scmlog', 'tag_revisions', 'tags'] cursor = cnn.cursor() for table in tables: query = "delete from %s" % (table) cursor.execute(statement(query, db.place_holder)) cursor.close() cnn.commit() name = uri.rstrip("/").split("/")[-1].strip() cursor = cnn.cursor() rep = DBRepository(None, uri, name, 'svn') cursor.execute(statement(DBRepository.__insert__, db.place_holder), (rep.id, rep.uri, rep.name, rep.type)) cursor.close() cnn.commit() ch = DBContentHandler(db) ch.begin() ch.repository(uri) # We need to split the query to save memory
class DBContentHandler(ContentHandler): MAX_ACTIONS = 100 def __init__(self, db): ContentHandler.__init__(self) self.db = db self.cnn = None self.cursor = None self.__init_caches() def __init_caches(self): self.file_cache = {} self.moves_cache = {} self.deletes_cache = {} self.revision_cache = {} self.branch_cache = {} self.tags_cache = {} self.people_cache = {} def __save_caches_to_disk(self): printdbg("DBContentHandler: Saving caches to disk (%s)", (self.cache_file, )) cache = [ self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache ] f = open(self.cache_file, 'w') dump(cache, f, -1) f.close() def __load_caches_from_disk(self): printdbg("DBContentHandler: Loading caches from disk (%s)", (self.cache_file, )) f = open(self.cache_file, 'r') (self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache) = load(f) f.close() def __del__(self): if self.cnn is not None: self.cnn.close() def begin(self, order=None): self.cnn = self.db.connect() self.cursor = self.cnn.cursor() self.commits = [] self.actions = [] def repository(self, uri): cursor = self.cursor cursor.execute( statement("SELECT id from repositories where uri = ?", self.db.place_holder), (uri, )) self.repo_id = cursor.fetchone()[0] last_rev = last_commit = None query = """SELECT rev, id from scmlog where id = (select max(id) from scmlog where repository_id = ?)""" cursor.execute(statement(query, self.db.place_holder), (self.repo_id, )) rs = cursor.fetchone() if rs is not None: last_rev, last_commit = rs filename = uri.replace('/', '_') self.cache_file = os.path.join(cvsanaly_cache_dir(), filename) # if there's a previous cache file, just use it if os.path.isfile(self.cache_file): self.__load_caches_from_disk() if last_rev is not None: try: commit_id = self.revision_cache[last_rev] except KeyError: msg = "".join([ "Cache file %s is not up to date or it's corrupt: " % \ (self.cache_file), "Revision %s was not found in the cache file" % \ (last_rev), "It's not possible to continue, the cache ", "file should be removed and the database cleaned up"]) raise CacheFileMismatch(msg) if commit_id != last_commit: # Cache and db don't match, removing cache msg = "".join([ "Cache file %s is not up to date or it's corrupt: " % \ (self.cache_file), "Commit id mismatch for revision %s " % (last_rev), "(File Cache:%d, Database: %d). " % \ (commit_id, last_commit), "It's not possible to continue, the cache ", "file should be removed and the database cleaned up"]) raise CacheFileMismatch(msg) else: # Database looks empty (or corrupt) and we have # a cache file. We can just remove it and continue # normally self.__init_caches() os.remove(self.cache_file) printout("Database looks empty, removing cache file %s", (self.cache_file, )) elif last_rev is not None: # There are data in the database, # but we don't have a cache file!!! msg = "".join([ "Cache file %s is not up to date or it's corrupt: " % \ (self.cache_file), "Cache file cannot be found", "It's not possible to continue, the database ", "should be cleaned up"]) raise CacheFileMismatch(msg) def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: profiler_start("Inserting actions for repository %d", (self.repo_id, )) for a in self.actions: action_tuple = (a.id, a.type, a.file_id, a.commit_id, a.branch_id, a.current_file_path) if isinstance(self.db, MysqlDatabase): import MySQLdb try: cursor.execute( statement(DBAction.__insert__, self.db.place_holder), action_tuple) except MySQLdb.IntegrityError, e: if e.args[0] == 1062: # Duplicate entry pass else: cursor.execute( statement(DBAction.__insert__, self.db.place_holder), action_tuple) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id, )) if self.commits: commits = [(c.id, c.rev, c.committer, c.author, c.commit_date, \ c.author_date, to_utf8(c.message).decode("utf-8"), \ c.composed_rev, c.repository_id) for c in self.commits] profiler_start("Inserting commits for repository %d", (self.repo_id, )) cursor.executemany( statement(DBLog.__insert__, self.db.place_holder), commits) self.commits = [] profiler_stop("Inserting commits for repository %d", (self.repo_id, )) profiler_start("Committing inserts for repository %d", (self.repo_id, )) self.cnn.commit() profiler_stop("Committing inserts for repository %d", (self.repo_id, ))
from cStringIO import StringIO from cPickle import dump, load from Database import create_database, DBRepository, ICursor uri = "http://svn.test-cvsanaly.org/svn/test" db = create_database ('mysql', 'dbcontenthandler', sys.argv[1], None, 'localhost') cnn = db.connect () tables = ['actions', 'branches', 'file_copies', 'file_links', 'files', 'people', 'repositories', 'scmlog', 'tag_revisions', 'tags'] cursor = cnn.cursor () for table in tables: query = "delete from %s" % (table) cursor.execute (statement (query, db.place_holder)) cursor.close () cnn.commit () name = uri.rstrip ("/").split ("/")[-1].strip () cursor = cnn.cursor () rep = DBRepository (None, uri, name, 'svn') cursor.execute (statement (DBRepository.__insert__, db.place_holder), (rep.id, rep.uri, rep.name, rep.type)) cursor.close () cnn.commit () ch = DBContentHandler (db) ch.begin () ch.repository (uri) # We need to split the query to save memory