Пример #1
0
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                            (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                      (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(statement(
                "SELECT id from people where name = ?", self.db.place_holder),
                (to_utf8(name).decode("utf-8"),))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(statement(DBPerson.__insert__,
                                self.db.place_holder),
                                (p.id, to_utf8(p.name).decode("utf-8"),
                                 email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                           (person.name, self.repo_id), True)

            return person_id
Пример #2
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id,
                                a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(
                            statement(DBAction.__insert__,
                                      self.db.place_holder), action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(
                        statement(DBAction.__insert__, self.db.place_holder),
                        action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
Пример #3
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id,))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id, a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(statement(DBAction.__insert__,
                                             self.db.place_holder), 
                                             action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(statement(DBAction.__insert__,
                                             self.db.place_holder), 
                                             action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id,))
Пример #4
0
    def end(self):
        # flush pending inserts
        printdbg("DBContentHandler: flushing pending inserts")
        self.__insert_many()

        # Save the caches to disk
        profiler_start("Saving caches to disk")
        self.__save_caches_to_disk()
        profiler_stop("Saving caches to disk", delete=True)

        self.cursor.close()
        self.cnn.close()
        self.cnn = None
Пример #5
0
        def ensure_tag(tag):
            profiler_start("Ensuring tag %s for repository %d", (tag, self.repo_id))
            printdbg("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from tags where name = ?", self.db.place_holder), (tag,))
            rs = cursor.fetchone()
            if not rs:
                t = DBTag(None, tag)
                cursor.execute(statement(DBTag.__insert__, self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
Пример #6
0
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d", (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?", self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__, self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d", (branch, self.repo_id), True)

            return branch_id
Пример #7
0
        def ensure_path(path, commit_id):
            profiler_start("Ensuring path %s for repository %d",
                           (path, self.repo_id))
            printdbg("DBContentHandler: ensure_path %s", (path,))

            prefix, lpath = path.split("://", 1)
            prefix += "://"
            tokens = lpath.strip('/').split('/')

            parent = -1
            node_id = None
            for i, token in enumerate(tokens):
                rpath = prefix + '/' + '/'.join(tokens[:i + 1])
                if not ":///" in path:
                    # If the repo paths don't start with /
                    # remove it here
                    rpath = rpath.replace(':///', '://')
                printdbg("DBContentHandler: rpath: %s", (rpath,))
                try:
                    node_id, parent_id = self.file_cache[rpath]
                    parent = node_id
                    continue
                except:
                    pass

                # Rpath not in cache, add it
                node_id = self.__add_new_file_and_link(token, parent,
                                                       commit_id)
                parent_id = parent
                parent = node_id

                # Also add to file_paths
                self.__add_file_path(commit_id, node_id,
                    re.sub('^\d+://', '', rpath))

                self.file_cache[rpath] = (node_id, parent_id)

            assert node_id is not None

            printdbg("DBContentHandler: path ensured %s = %d (%d)",
                     (path, node_id, parent_id))
            profiler_stop("Ensuring path %s for repository %d",
                          (path, self.repo_id), True)

            return node_id, parent_id
Пример #8
0
        def ensure_tag (tag):
            profiler_start ("Ensuring tag %s for repository %d",
                            (tag, self.repo_id))
            printdbg ("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute (statement ("SELECT id from tags where name = ?",
                            self.db.place_holder), (tag,))
            rs = cursor.fetchone ()
            if not rs:
                t = DBTag (None, tag)
                cursor.execute (statement (DBTag.__insert__,
                                self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop ("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
Пример #9
0
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d",
                            (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?",
                            self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__,
                                self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d",
                          (branch, self.repo_id), True)

            return branch_id
Пример #10
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) \
                       for a in self.actions]
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBAction.__insert__,
                                         self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id,))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.date, \
                        to_utf8(c.message).decode("utf-8"), c.composed_rev, \
                        c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBLog.__insert__,
                                         self.db.place_holder), commits)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id,))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id,))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d",
                      (self.repo_id,))
Пример #11
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) for a in self.actions]
            profiler_start("Inserting actions for repository %d", (self.repo_id,))
            cursor.executemany(statement(DBAction.__insert__, self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d", (self.repo_id,))
        if self.commits:
            commits = [
                (c.id, c.rev, c.committer, c.author, c.date, c.date_tz, c.author_date, c.author_date_tz, c.message, c.composed_rev, c.repository_id)
                for c in self.commits]
            profiler_start("Inserting commits for repository %d", (self.repo_id,))
            cursor.executemany(statement(DBLog.__insert__, self.db.place_holder), commits)
	    p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE)
            for commit in commits:
		m = p.findall(commit[8])
		for bug in m:
		   issue_commit_link=(commit[0], bug)
                   cursor.execute(statement(DBIssueCommitLink.__insert__, self.db.place_holder), issue_commit_link)
            self.commits = []
            profiler_stop("Inserting commits for repository %d", (self.repo_id,))

        profiler_start("Committing inserts for repository %d", (self.repo_id,))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id,))
Пример #12
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id)
                       for a in self.actions]
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBAction.__insert__, self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.date, c.date_tz,
                        c.author_date, c.author_date_tz, c.message,
                        c.composed_rev, c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBLog.__insert__, self.db.place_holder), commits)
            p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE)
            for commit in commits:
                m = p.findall(commit[8])
                for bug in m:
                    issue_commit_link = (commit[0], bug)
                    cursor.execute(
                        statement(DBIssueCommitLink.__insert__,
                                  self.db.place_holder), issue_commit_link)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id, ))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id, ))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id, ))
Пример #13
0
    def commit(self, commit):
        if commit.revision in self.revision_cache:
            return

        profiler_start("New commit %s for repository %d", (commit.revision,
                                                           self.repo_id))

        log = DBLog(None, commit)
        log.repository_id = self.repo_id
        self.revision_cache[commit.revision] = log.id

        log.committer = self.__get_person(commit.committer)

        if commit.author == commit.committer:
            log.author = log.committer
        elif commit.author is not None:
            log.author = self.__get_person(commit.author)

        self.commits.append(log)

        printdbg("DBContentHandler: commit: %d rev: %s", (log.id, log.rev))

        # TODO: sort actions? R, A, D, M, V, C
        for action in commit.actions:
            printdbg("DBContentHandler: Action: %s", (action.type,))
            dbaction = DBAction(None, action.type)
            dbaction.commit_id = log.id

            branch = commit.branch or action.branch_f1
            branch_id = self.__get_branch(branch)
            dbaction.branch_id = branch_id

            prefix = "%d://" % (branch_id)
            path = prefix + action.f1

            if action.type == 'A':
                # A file has been added
                file_id = self.__action_add(path, prefix, log)
            elif action.type == 'M':
                # A file has been modified
                file_id = self.__get_file_for_path(path, log.id)[0]
            elif action.type == 'D':
                # A file has been deleted
                file_id = self.__action_delete(path, log)
            elif action.type == 'V':
                # A file has been renamed
                file_id = self.__action_rename(path, prefix, log, action,
                                               dbaction)
            elif action.type == 'C':
                # A file has been copied
                file_id = self.__action_copy(path, prefix, log, action,
                                             dbaction)
            elif action.type == 'R':
                # A file has been replaced
                file_id = self.__action_replace(path, prefix, log, action,
                                                dbaction)
                if file_id is None:
                    continue
            else:
                assert "Unknown action type %s" % (action.type)

            dbaction.file_id = file_id
            self.actions.append(dbaction)

        # Tags
        if commit.tags is not None:
            tag_revs = []
            for tag in commit.tags:
                tag_id = self.__get_tag(tag)
                db_tagrev = DBTagRev(None)
                tag_revs.append((db_tagrev.id, tag_id, log.id))

            self.cursor.executemany(statement(DBTagRev.__insert__,
                                              self.db.place_holder), tag_revs)

        if len(self.actions) >= self.MAX_ACTIONS:
            printdbg("DBContentHandler: %d actions inserting",
                     (len(self.actions),))
            self.__insert_many()

        profiler_stop("New commit %s for repository %d", (commit.revision,
                                                          self.repo_id), True)
Пример #14
0
class DBContentHandler(ContentHandler):

    MAX_ACTIONS = 100

    def __init__(self, db):
        ContentHandler.__init__(self)

        self.db = db
        self.cnn = None
        self.cursor = None

        self.__init_caches()

    def __init_caches(self):
        self.file_cache = {}
        self.moves_cache = {}
        self.deletes_cache = {}
        self.revision_cache = {}
        self.branch_cache = {}
        self.tags_cache = {}
        self.people_cache = {}

    def __save_caches_to_disk(self):
        printdbg("DBContentHandler: Saving caches to disk (%s)",
                 (self.cache_file, ))
        cache = [
            self.file_cache, self.moves_cache, self.deletes_cache,
            self.revision_cache, self.branch_cache, self.tags_cache,
            self.people_cache
        ]
        f = open(self.cache_file, 'w')
        dump(cache, f, -1)
        f.close()

    def __load_caches_from_disk(self):
        printdbg("DBContentHandler: Loading caches from disk (%s)",
                 (self.cache_file, ))
        f = open(self.cache_file, 'r')
        (self.file_cache, self.moves_cache, self.deletes_cache,
         self.revision_cache, self.branch_cache, self.tags_cache,
         self.people_cache) = load(f)
        f.close()

    def __del__(self):
        if self.cnn is not None:
            self.cnn.close()

    def begin(self, order=None):
        self.cnn = self.db.connect()

        self.cursor = self.cnn.cursor()

        self.commits = []
        self.actions = []

    def repository(self, uri):
        cursor = self.cursor
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      self.db.place_holder), (uri, ))
        self.repo_id = cursor.fetchone()[0]

        last_rev = last_commit = None
        query = """SELECT rev, id from scmlog
                where id = (select max(id) from scmlog
                            where repository_id = ?)"""
        cursor.execute(statement(query, self.db.place_holder),
                       (self.repo_id, ))
        rs = cursor.fetchone()
        if rs is not None:
            last_rev, last_commit = rs

        filename = uri.replace('/', '_')
        self.cache_file = os.path.join(cvsanaly_cache_dir(), filename)

        # if there's a previous cache file, just use it
        if os.path.isfile(self.cache_file):
            self.__load_caches_from_disk()

            if last_rev is not None:
                try:
                    commit_id = self.revision_cache[last_rev]
                except KeyError:
                    msg = "".join([
                        "Cache file %s is not up to date or it's corrupt: " % \
                            (self.cache_file),
                        "Revision %s was not found in the cache file" % \
                            (last_rev),
                        "It's not possible to continue, the cache ",
                        "file should be removed and the database cleaned up"])
                    raise CacheFileMismatch(msg)
                if commit_id != last_commit:
                    # Cache and db don't match, removing cache
                    msg = "".join([
                        "Cache file %s is not up to date or it's corrupt: " % \
                            (self.cache_file),
                        "Commit id mismatch for revision %s " % (last_rev),
                        "(File Cache:%d, Database: %d). " % \
                            (commit_id, last_commit),
                        "It's not possible to continue, the cache ",
                        "file should be removed and the database cleaned up"])
                    raise CacheFileMismatch(msg)
            else:
                # Database looks empty (or corrupt) and we have
                # a cache file. We can just remove it and continue
                # normally
                self.__init_caches()
                os.remove(self.cache_file)
                printout("Database looks empty, removing cache file %s",
                         (self.cache_file, ))
        elif last_rev is not None:
            # There are data in the database,
            # but we don't have a cache file!!!
            msg = "".join([
                "Cache file %s is not up to date or it's corrupt: " % \
                    (self.cache_file),
                "Cache file cannot be found",
                "It's not possible to continue, the database ",
                "should be cleaned up"])
            raise CacheFileMismatch(msg)

    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id,
                                a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(
                            statement(DBAction.__insert__,
                                      self.db.place_holder), action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(
                        statement(DBAction.__insert__, self.db.place_holder),
                        action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.commit_date, \
                        c.author_date, to_utf8(c.message).decode("utf-8"), \
                        c.composed_rev, c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBLog.__insert__, self.db.place_holder), commits)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id, ))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id, ))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id, ))