示例#1
0
    def run(self, repo, uri, db):
        self.db = db

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cnn = self.db.connect()

        cursor = cnn.cursor()
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (repo_uri, ))
        repo_id = cursor.fetchone()[0]

        files = []

        try:
            self.__create_table(cnn)
        except TableAlreadyExists:
            cursor.execute(
                statement("SELECT max(id) from file_types", db.place_holder))
            id = cursor.fetchone()[0]
            if id is not None:
                DBFileType.id_counter = id + 1

            files = self.__get_files_for_repository(repo_id, cursor)
        except Exception, e:
            raise ExtensionRunError(str(e))
示例#2
0
    def run (self, repo, uri, db):
        self.db = db
        self.repo = repo

        path = uri_to_filename (uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path (path)
        else:
            repo_uri = uri

        path = uri_to_filename (uri)
        self.repo_uri = path or repo.get_uri ()

        cnn = self.db.connect ()

        cursor = cnn.cursor ()
        cursor.execute (statement ("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone ()[0]

        # If table does not exist, the list of commits is empty,
        # otherwise it will be filled within the except block below
        commits = []

        try:
            self.__create_table (cnn)
        except TableAlreadyExists:
            cursor.execute (statement ("SELECT max(id) from patches", db.place_holder))
            id = cursor.fetchone ()[0]
            if id is not None:
                DBPatch.id_counter = id + 1

            commits = self.__get_patches_for_repository (repo_id, cursor)
        except Exception, e:
            raise ExtensionRunError (str (e))
示例#3
0
    def run(self, repo, uri, db):
        self.db = db

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cnn = self.db.connect()

        cursor = cnn.cursor()
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (repo_uri, ))
        repo_id = cursor.fetchone()[0]

        # If table does not exist, the list of commits is empty,
        # otherwise it will be filled within the except block below
        commits = []

        try:
            self.__create_table(cnn)
        except TableAlreadyExists:
            cursor.execute(
                statement("SELECT max(id) from commits_lines",
                          db.place_holder))
            id = cursor.fetchone()[0]
            if id is not None:
                DBCommitLines.id_counter = id + 1

            commits = self.__get_commits_lines_for_repository(repo_id, cursor)
        except Exception, e:
            raise ExtensionRunError(str(e))
示例#4
0
    def run(self, repo, uri, db):
        self.db = db

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cnn = self.db.connect()

        cursor = cnn.cursor()
        cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone()[0]

        files = []

        try:
            self.__create_table(cnn)
        except TableAlreadyExists:
            cursor.execute(statement("SELECT max(id) from file_types", db.place_holder))
            id = cursor.fetchone()[0]
            if id is not None:
                DBFileType.id_counter = id + 1

            files = self.__get_files_for_repository(repo_id, cursor)
        except Exception, e:
            raise ExtensionRunError(str(e))
示例#5
0
    def run(self, repo, uri, db):
        """
        Parses all the commit messages from the WordPress repository to identify
        code contributions made by developers without access to the repository.
        """

        self.db = db
        self.db_content_handler = DBContentHandler(self.db)
        self.db_content_handler.begin()

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cnn = self.db.connect()

        cursor = cnn.cursor()
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (repo_uri, ))
        repo_id = cursor.fetchone()[0]

        self.db_content_handler.repo_id = repo_id

        self.__maybe_create_column(cnn)

        cursor.execute(
            statement("SELECT id, message from scmlog where repository_id = ?",
                      db.place_holder), (repo_id, ))
        write_cursor = cnn.cursor()
        rs = cursor.fetchmany()

        while rs:
            for scmlog_id, message in rs:
                person_id = self.__get_person_id_from_message(message)

                if person_id:
                    write_cursor.execute(
                        statement(
                            "UPDATE scmlog SET wordpress_author_id = ? WHERE id = ?",
                            db.place_holder), (person_id, scmlog_id))

            rs = cursor.fetchmany()

        cnn.commit()
        write_cursor.close()
        cursor.close()
        cnn.close()
示例#6
0
    def __process_finished_jobs(self, job_pool, write_cursor, db):
#        start = datetime.now()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the 
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            file_contents = None
                        
            if not Config().no_content:
                file_contents = str(finished_job.file_contents)
            
            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (finished_job.commit_id,
                          finished_job.file_id,
                          file_contents,
                          finished_job.file_number_of_lines,
                          finished_job.file_size)
                                
            execute_statement(insert_statement, parameters, write_cursor, db,
                       "Couldn't insert, duplicate record?", 
                       exception=ExtensionRunError)
            
            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)
            
        return processed_jobs
示例#7
0
    def __get_patches_for_repository(self, repo_id, cursor):
        query = """SELECT p.commit_id from patches p, scmlog s 
                WHERE p.commit_id = s.id and repository_id = ?"""
        cursor.execute(statement(query, self.db.place_holder), (repo_id, ))
        commits = [res[0] for res in cursor.fetchall()]

        return commits
示例#8
0
    def get_path(self, repo=None, repo_path=None):
        if not self.current:
            return None

        revision, commit_id, file_id, action_type, composed = self.current
        if composed:
            rev = revision.split("|")[0]
        else:
            rev = revision
        cursor = self.cnn.cursor()
        cursor.execute(statement(self.__path_query__, self.db.place_holder),
                       (file_id, commit_id))
        file_link = cursor.fetchone()
        relative_path = None
        if repo is None:
            relative_path = file_link[1]
        else:
            try:
                while file_link:
                    if repo.is_ancestor(repo_path, file_link[0], rev):
                        relative_path = file_link[1]
                        break
                    else:
                        file_link = cursor.fetchone()
            except CommandError as e:
                printerr(str(e) + '\n' + e.error)

        cursor.close()
        if relative_path is None:
            return None
        else:
            return relative_path.strip("/")
示例#9
0
    def __process_finished_jobs(self, job_pool, write_cursor, unlocked=False):
        if unlocked:
            job = job_pool.get_next_done_unlocked()
        else:
            job = job_pool.get_next_done()

        args = []

        while job is not None:
            authors = job.get_authors()
            file_id = job.get_file_id()
            commit_id = job.get_commit_id()

            a = [(self.id_counter + i, file_id, commit_id, self.authors[key],
                  authors[key]) for i, key in enumerate(authors.keys())]
            args.extend(a)
            self.id_counter += len(a)

            if unlocked:
                job = job_pool.get_next_done_unlocked()
            else:
                job = job_pool.get_next_done(0.5)

        if args:
            write_cursor.executemany(
                statement(self.__insert__, self.db.place_holder), args)
            del args
示例#10
0
    def fetchReleaseRevisions(self):

        cursor = self.connection.cursor()
        sql = "SELECT scmlog.id, tag_revisions.tag_id " + \
              "FROM scmlog, tag_revisions, actions " + \
              "WHERE scmlog.id = tag_revisions.commit_id " + \
              "AND actions.commit_id = scmlog.id " + \
              "AND actions.branch_id = 1 " + \
              "AND scmlog.repository_id=? " + \
              "ORDER BY tag_revisions.tag_id, scmlog.id"
        cursor.execute(statement(sql, self.db.place_holder),
                       (self.repositoryId, ))

        rs = cursor.fetchmany()

        while rs:
            for row in rs:
                releaseRevision = ReleaseRevisionEntity()
                release = self.releaseDict.getEntity(row[1])
                commit = self.revisionDict.getCommit(row[0])
                action = commit.getActions().values()[0]
                revision = self.revisionDict.getEntity(action.getDbId())
                releaseRevision.setRelease(release)
                releaseRevision.setRevision(revision)
                self.releaseRevisionDict.add(releaseRevision)

            rs = cursor.fetchmany()

        cursor.close()
示例#11
0
    def run(self, repo, uri, db):            
        # Start the profiler, per every other extension
        profiler_start("Running FileCount extension")
        
        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()
        
        # Try to get the repository and get its ID from the database
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "FileCount extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
示例#12
0
    def run(self, repo, uri, db):
        profiler_start("Running HunkBlame extension")

        self.db = db

        cnn = self.db.connect()
        read_cursor = cnn.cursor()
        write_cursor = cnn.cursor()
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(
                statement("SELECT id from repositories where uri = ?",
                          db.place_holder), (repo_uri, ))
            repoid = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError(
                "HunkBlame extension is not supported for %s repositories" %
                (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError(
                "Error creating repository %s. Exception: %s" %
                (repo.get_uri(), str(e)))
示例#13
0
   def _do_backout(self, repo, uri, db, backout_statement):
       connection = db.connect()
       repo_cursor = connection.cursor()
       repo_uri = get_repo_uri(uri, repo)
       
       try:
           repo_id = get_repo_id(repo_uri, repo_cursor, db)
       except RepoNotFound:
           # Repository isn't in there, so it's likely already backed out
           printerr("Repository not found, is it in the database?")
           return True
       finally:
           repo_cursor.close()
         
       update_cursor = connection.cursor()
 
       execute_statement(statement(backout_statement, db.place_holder),
                           (repo_id,),
                           update_cursor,
                           db,
                           "Couldn't backout extension",
                           exception=ExtensionBackoutError)
       update_cursor.close()
       connection.commit()
       connection.close()
示例#14
0
    def __get_patches_for_repository (self, repo_id, cursor):
        query = "SELECT p.commit_id from patches p, scmlog s " + \
                "WHERE p.commit_id = s.id and repository_id = ?"
        cursor.execute (statement (query, self.db.place_holder), (repo_id,))
        commits = [res[0] for res in cursor.fetchall ()]

        return commits
示例#15
0
    def __get_files_for_repository(self, repo_id, cursor):
        query = "SELECT ft.file_id from file_types ft, files f " + \
                "WHERE f.id = ft.file_id and f.repository_id = ?"
        cursor.execute(statement(query, self.db.place_holder), (repo_id, ))
        files = [res[0] for res in cursor.fetchall()]

        return files
示例#16
0
 def get_file_id(self, file_path, commit_id):
     """Ask for the file_id for a given file_path and commit_id"""
     
     if config.debug:
         profiler_start("Getting file id for file_path %s and commit_id %d",
                         (file_path, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     cursor = cnn.cursor()
     query = """SELECT file_id from actions
                WHERE binary current_file_path = ? AND commit_id = ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder),
                     (file_path, commit_id))
     try:
         file_id = cursor.fetchone()[0]
     except:
         file_id = None
     
     cursor.close()
     cnn.close()
     
     if config.debug:
         profiler_stop("Getting file id for file_path %s and commit_id %d",
                        (file_path, commit_id), True)
     
     return file_id
示例#17
0
    def __buildReleaseRevision(self, fileId, commitId, releaseId,
                               resolvedFiles):

        cursor = self.connection.cursor()
        sql = "SELECT file_id " + \
              "FROM file_links " + \
              "WHERE parent_id=? " + \
              "AND commit_id<=?"
        cursor.execute(statement(sql, self.db.place_holder),
                       (fileId, commitId))

        fileLinks = []

        rs = cursor.fetchmany()

        while rs:
            for row in rs:
                fileLinks.append(row[0])

            rs = cursor.fetchmany()

        cursor.close()

        if len(fileLinks) == 0:
            resolvedFiles.append([fileId, commitId, releaseId])
        else:
            for fileLink in fileLinks:
                self.__buildReleaseRevision(fileLink, commitId, releaseId,
                                            resolvedFiles)
示例#18
0
    def fetchReleases(self):

        cursor = self.connection.cursor()
        sql = "SELECT id, name " +\
              "FROM tags " + \
              "WHERE id IN (SELECT DISTINCT tag_revisions.tag_id " + \
              "FROM tag_revisions, scmlog, actions " + \
              "WHERE tag_revisions.commit_id = scmlog.id " + \
              "AND actions.commit_id = scmlog.id " + \
              "AND actions.branch_id = 1 " + \
              "AND scmlog.repository_id=?)"
        cursor.execute(statement(sql, self.db.place_holder),
                       (self.repositoryId, ))

        rs = cursor.fetchmany()

        while rs:
            for row in rs:
                release = ReleaseEntity()
                release.setDbId(row[0])
                release.setName(row[1])
                self.releaseDict.add(release)

            rs = cursor.fetchmany()

        cursor.close()
示例#19
0
    def __get_files_for_repository(self, repo_id, cursor):
        query = "SELECT ft.file_id from file_types ft, files f " + \
                "WHERE f.id = ft.file_id and f.repository_id = ?"
        cursor.execute(statement(query, self.db.place_holder), (repo_id,))
        files = [res[0] for res in cursor.fetchall()]

        return files
示例#20
0
 def __find_previous_commit(self, file_id, commit_id):
     query = """select a.commit_id, a.action_type, c.rev from _action_files_cache a,scmlog c
         where a.commit_id=c.id and a.file_id=?
         order by c.date
     """
     cnn = self.db.connect()
     aux_cursor = cnn.cursor()
     aux_cursor.execute(statement(query, self.db.place_holder), (file_id, ))
     all_commits = aux_cursor.fetchall()
     aux_cursor.close()
     cnn.close()
     pre_commit_id = None
     pre_rev = None
     for cur_commit_id, type, cur_rev in all_commits:
         if cur_commit_id == commit_id:
             #Nothing to blame for other types
             if type != 'M' and type != 'R':
                 raise NotValidHunkWarning(
                     "Wrong commit to blame: commit type: %s" % type)
             else:
                 break
         else:
             pre_commit_id = cur_commit_id
             pre_rev = cur_rev
     else:
         raise NotValidHunkWarning(
             "No previous commit found for file %d at commit %d" %
             (file_id, commit_id))
     if pre_commit_id is None or pre_rev is None:
         raise NotValidHunkWarning(
             "No previous commit found for file %d at commit %d" %
             (file_id, commit_id))
     return pre_commit_id, pre_rev
示例#21
0
    def process_finished_jobs(self, job_pool, write_cursor, unlocked=False):
        if unlocked:
            job = job_pool.get_next_done_unlocked()
        else:
            job = job_pool.get_next_done(0.5)

        args = []

        processed_jobs = 0
        while job is not None:
            if not job.failed:
                a = self.populate_insert_args(job)
                args.extend(a)
                self.id_counter += len(a)
            processed_jobs += 1
            if unlocked:
                job = job_pool.get_next_done_unlocked()
            else:
                job = job_pool.get_next_done(0)

        if len(args) > 0:
            write_cursor.executemany(
                statement(self.__insert__, self.db.place_holder), args)
            del args
        return processed_jobs
示例#22
0
 def get_path_from_database(self, file_id, commit_id):
     """Returns the last valid path for a given file_id at commit_id
        (May have been removed afterwords!)"""
     
     if config.debug:
         profiler_start("Getting full file path for file_id %d and \
                         commit_id %d", (file_id, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     
     cursor = cnn.cursor()
     query = """SELECT current_file_path from actions
                WHERE file_id=? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder), (file_id, commit_id))
     try:
         file_path = cursor.fetchone()[0]
     except:
         file_path = None
     
     cursor.close()
     cnn.close()
     
     printdbg("get_path_from_database:\
               Path for file_id %d at commit_id %d: %s",
              (file_id, commit_id, file_path))
     if config.debug:
         profiler_stop("Getting full file path for file_id %d and\
                          commit_id %d", (file_id, commit_id), True)
     return file_path
示例#23
0
    def run(self, repo, uri, db):
        profiler_start("Running Blame extension")

        self.db = db

        cnn = self.db.connect()
        read_cursor = cnn.cursor()
        write_cursor = cnn.cursor()

        blames = []

        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement("SELECT id from repositories " + \
                                          "where uri = ?", db.place_holder), 
                                          (repo_uri,))
            repoid = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError("Blame extension is not supported for " + \
                                    "%s repositories" % (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError("Error creating repository %s. " + \
                                    "Exception: %s" % (repo.get_uri(), str(e)))
示例#24
0
 def get_file_id(self, file_path, commit_id):
     """Ask for the file_id for a given file_path and commit_id"""
     
     if config.debug:
         profiler_start("Getting file id for file_path %s and commit_id %d",
                         (file_path, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     cursor = cnn.cursor()
     query = """SELECT file_id from file_paths
                WHERE file_path = ? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder),
                     (file_path, commit_id))
     try:
         file_id = cursor.fetchone()[0]
     except:
         file_id = None
     
     cursor.close()
     cnn.close()
     
     if config.debug:
         profiler_stop("Getting file id for file_path %s and commit_id %d",
                        (file_path, commit_id), True)
     
     return file_id
示例#25
0
    def run(self, repo, uri, db):
        # Start the profiler, per every other extension
        profiler_start("Running FileCount extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()

        # Try to get the repository and get its ID from the database
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "FileCount extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
示例#26
0
 def get_path_from_database(self, file_id, commit_id):
     """Returns the last valid path for a given file_id at commit_id
        (May have been removed afterwords!)"""
     
     if config.debug:
         profiler_start("Getting full file path for file_id %d and \
                         commit_id %d", (file_id, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     
     cursor = cnn.cursor()
     query = """SELECT file_path from file_paths
                WHERE file_id=? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder), (file_id, commit_id))
     try:
         file_path = cursor.fetchone()[0]
     except:
         file_path = None
     
     cursor.close()
     cnn.close()
     
     printdbg("get_path_from_database:\
               Path for file_id %d at commit_id %d: %s",
              (file_id, commit_id, file_path))
     if config.debug:
         profiler_stop("Getting full file path for file_id %d and\
                          commit_id %d", (file_id, commit_id), True)
     return file_path
示例#27
0
    def __process_finished_jobs(self, job_pool, write_cursor, db):
        #        start = datetime.now()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            file_contents = None

            if not Config().no_content:
                file_contents = str(finished_job.file_contents)

            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (finished_job.commit_id, finished_job.file_id,
                          file_contents, finished_job.file_number_of_lines,
                          finished_job.file_size)

            execute_statement(insert_statement,
                              parameters,
                              write_cursor,
                              db,
                              "Couldn't insert, duplicate record?",
                              exception=ExtensionRunError)

            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)

        return processed_jobs
示例#28
0
    def get_path(self, repo=None, repo_path=None):
        if not self.current:
            return None

        revision, commit_id, file_id, action_type, composed = self.current
        if composed:
            rev = revision.split("|")[0]
        else:
            rev = revision
        cursor = self.cnn.cursor()
        cursor.execute(statement(self.__path_query__, self.db.place_holder),
                       (file_id, commit_id))
        file_link = cursor.fetchone()
        relative_path = None
        if repo is None:
            relative_path = file_link[1]
        else:
            try:
                while file_link:
                    if repo.is_ancestor(repo_path, file_link[0], rev):
                        relative_path = file_link[1]
                        break
                    else:
                        file_link = cursor.fetchone()
            except CommandError as e:
                printerr(str(e) + '\n' + e.error)

        cursor.close()
        if relative_path is None:
            return None
        else:
            return relative_path.strip("/")
示例#29
0
    def run(self, repo, uri, db):
        profiler_start("Running PatchLOC extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        cursor = connection.cursor()

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (repo_uri, ))
        repo_id = cursor.fetchone()[0]

        try:
            self.__create_table(connection)
        except TableAlreadyExists:
            pass
        except Exception, e:
            raise ExtensionRunError(str(e))
示例#30
0
    def __process_finished_jobs (self, job_pool, write_cursor, unlocked = False):
        if unlocked:
            job = job_pool.get_next_done_unlocked ()
        else:
            job = job_pool.get_next_done ()

        args = []

        while job is not None:
            authors = job.get_authors ()
            file_id = job.get_file_id ()
            commit_id = job.get_commit_id ()

            a = [(self.id_counter + i, file_id, commit_id, self.authors[key], authors[key]) \
                     for i, key in enumerate (authors.keys ())]
            args.extend (a)
            self.id_counter += len (a)

            if unlocked:
                job = job_pool.get_next_done_unlocked ()
            else:
                job = job_pool.get_next_done (0.5)

        if args:
            write_cursor.executemany (statement (self.__insert__, self.db.place_holder), args)
            del args
示例#31
0
 def __find_previous_commit(self, file_id, commit_id):
     query = """select a.commit_id, a.action_type, c.rev from _action_files_cache a,scmlog c
         where a.commit_id=c.id and a.file_id=?
         order by c.date
     """
     cnn = self.db.connect ()
     aux_cursor = cnn.cursor()
     aux_cursor.execute(statement(query, self.db.place_holder),(file_id,))
     all_commits=aux_cursor.fetchall()
     aux_cursor.close()
     cnn.close()
     pre_commit_id = None
     pre_rev = None
     for cur_commit_id,type, cur_rev in all_commits:
         if cur_commit_id == commit_id:
             #Nothing to blame for other types
             if type != 'M' and type != 'R':
                 raise NotValidHunkWarning("Wrong commit to blame: commit type: %s"%type)
             else:
                 break
         else:
             pre_commit_id = cur_commit_id
             pre_rev = cur_rev
     else:
         raise NotValidHunkWarning("No previous commit found for file %d at commit %d"%(file_id, commit_id))
     if pre_commit_id is None or pre_rev is None:
         raise NotValidHunkWarning("No previous commit found for file %d at commit %d"%(file_id, commit_id))
     return pre_commit_id,pre_rev    
示例#32
0
    def __get_commits_lines_for_repository(self, repo_id, cursor):
        query = "SELECT cm.commit_id from commits_lines cm, scmlog s " + \
                "WHERE cm.commit_id = s.id and repository_id = ?"
        cursor.execute(statement(query, self.db.place_holder), (repo_id, ))
        commits = [res[0] for res in cursor.fetchall()]

        return commits
示例#33
0
    def process_finished_jobs(self, job_pool, write_cursor, unlocked=False):
        if unlocked:
            job = job_pool.get_next_done_unlocked()
        else:
            job = job_pool.get_next_done(0.5)

        args = []
        
        processed_jobs = 0
        while job is not None:
            if not job.failed:
                a = self.populate_insert_args(job)
                args.extend(a)
                self.id_counter += len(a)
            processed_jobs += 1
            if unlocked:
                job = job_pool.get_next_done_unlocked()
            else:
                job = job_pool.get_next_done(0)

        if len(args) > 0:
            write_cursor.executemany(statement(self.__insert__, 
                                               self.db.place_holder), args)
            del args
        return processed_jobs
示例#34
0
 def __get_hunk_blames(self, cursor, repoid):
     query = """select distinct b.hunk_id 
         from hunk_blames b 
         join hunks h on b.hunk_id=h.id
         join files f on h.file_id=f.id
         where f.repository_id=?"""
     cursor.execute (statement (query, self.db.place_holder), (repoid,))
     return [h[0] for h in cursor.fetchall()]
示例#35
0
 def __get_hunk_blames(self, cursor, repoid):
     query = """select distinct b.hunk_id 
         from hunk_blames b 
         join hunks h on b.hunk_id=h.id
         join files f on h.file_id=f.id
         where f.repository_id=?"""
     cursor.execute(statement(query, self.db.place_holder), (repoid, ))
     return [h[0] for h in cursor.fetchall()]
示例#36
0
 def get_max_id(self, db):
     cnn = self.db.connect()
     cursor = cnn.cursor()
     cursor.execute(statement("SELECT max(id) from blame", db.place_holder))
     max_id = cursor.fetchone()[0]
     cursor.close()
     cnn.close()
     return max_id
示例#37
0
 def get_max_id(self, db):
     cnn = self.db.connect()
     cursor = cnn.cursor()
     cursor.execute(statement("SELECT max(id) from blame", 
                              db.place_holder))
     max_id = cursor.fetchone()[0]
     cursor.close()
     cnn.close()
     return max_id
示例#38
0
def getRepositoryType(db, cnn, repositoryID):

    cursor = cnn.cursor()
    cursor.execute(
        statement("SELECT type FROM repositories WHERE id=?", db.place_holder),
        (repositoryID, ))
    repositoryType = cursor.fetchone()[0]
    cursor.close()

    return repositoryType
示例#39
0
    def __get_path_from_db(self, file_id, commit_id):
        cursor = self.cnn.cursor()

        cursor.execute(statement(self.__path_query__, self.db.place_holder),
                       (file_id, commit_id))
        path = cursor.fetchone()[0]

        cursor.close ()

        return "/" + path
示例#40
0
    def __get_path_from_db(self, file_id, commit_id):
        cursor = self.cnn.cursor()

        cursor.execute(statement(self.__path_query__, self.db.place_holder),
                       (file_id, commit_id))
        path = cursor.fetchone()[0]

        cursor.close()

        return "/" + path
示例#41
0
    def __init__(self, db, cnn, cursor, repoid):
        self.db = db
        self.cnn = cnn
        self.repoid = repoid

        self.icursor = ICursor(cursor, self.INTERVAL_SIZE)
        self.icursor.execute(statement(self.__query__, db.place_holder), (repoid,))
        self.rs = iter(self.icursor.fetchmany())
        self.prev_commit = -1
        self.current = None
示例#42
0
    def __init__(self, db, cnn, cursor, repoid):
        self.db = db
        self.cnn = cnn
        self.repoid = repoid

        self.icursor = ICursor(cursor, self.INTERVAL_SIZE)
        self.icursor.execute(statement(self.__query__, db.place_holder),
                             (repoid, ))
        self.rs = iter(self.icursor.fetchmany())
        self.prev_commit = -1
        self.current = None
示例#43
0
def findRepository(db, cnn, projectName):

    cursor = cnn.cursor()
    cursor.execute(
        statement("SELECT id FROM repositories WHERE name=?", db.place_holder),
        (projectName, ))
    rep = cursor.fetchone()[0]
    cursor.close()

    if rep is None:
        raise ProjectNotFound

    return rep
示例#44
0
 def get_patches(self, repo, repo_uri, repo_id, db, cursor):
     icursor = ICursor(cursor, self.INTERVAL_SIZE)
     # Get the patches from this repository
     query = """select p.commit_id, p.file_id, p.patch, s.rev
                 from patches p, scmlog s
                 where p.commit_id = s.id and
                 s.repository_id = ? and
                 p.patch is not NULL"""
     icursor.execute(statement(query, db.place_holder), (repo_id, ))
     rs = icursor.fetchmany()
     while rs:
         for commit_id, file_id, patch_content, rev in rs:
             yield (commit_id, file_id, to_utf8(patch_content), rev)
         rs = icursor.fetchmany()
示例#45
0
 def get_patches(self, repo, repo_uri, repo_id, db, cursor):
     icursor = ICursor(cursor, self.INTERVAL_SIZE)
     # Get the patches from this repository
     query = """select p.commit_id, p.file_id, p.patch, s.rev
                 from patches p, scmlog s
                 where p.commit_id = s.id and
                 s.repository_id = ? and
                 p.patch is not NULL"""
     icursor.execute(statement(query, db.place_holder), (repo_id,))
     rs = icursor.fetchmany()
     while rs:
         for commit_id, file_id, patch_content, rev in rs:
             yield (commit_id, file_id, to_utf8(patch_content), rev)
         rs = icursor.fetchmany()
示例#46
0
    def update_all(self, repo_id):
        """
        update_all enable cache for adjacency matrices
        Pros: File paths in different revisions can be
        accessed randomly, i.e. after calling update_all,
        get_path can be called with any revision in any
        order.
        Cons: It consumes significant memory to store
        the adjacency matrices

        If the config has low_memory set to true, shelve will
        be used instead, to write the cache out to disk.
        """
        profiler_start("Update all file paths")
        
        if Config().low_memory:
            self.shelve_file_name = str(time()) + "-shelve.db"
            
            # If there is an old file, shelf will complain viciously
            if os.path.exists(self.shelve_file_name):
                os.remove(self.shelve_file_name)
            
            self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, 
                                                        writeback=False)
        
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.date"""
        cursor.execute(statement(query, db.place_holder), (repo_id,))
        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                adj = self.__dict__['cached_adj'].get(str(id))

                if adj is None:
                    self.update_for_revision(cursor, id, repo_id)
                    self.__dict__['cached_adj'][str(id)] = \
                    deepcopy(self.__dict__['adj'])
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
示例#47
0
    def update_all(self, repo_id):
        """
        update_all enable cache for adjacency matrices
        Pros: File paths in different revisions can be
        accessed randomly, i.e. after calling update_all,
        get_path can be called with any revision in any
        order.
        Cons: It consumes significant memory to store
        the adjacency matrices

        If the config has low_memory set to true, shelve will
        be used instead, to write the cache out to disk.
        """
        profiler_start("Update all file paths")
        
        if Config().low_memory:
            self.shelve_file_name = str(time()) + "-shelve.db"
            
            # If there is an old file, shelf will complain viciously
            if os.path.exists(self.shelve_file_name):
                os.remove(self.shelve_file_name)
            
            self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, 
                                                        writeback=False)
        
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.commit_date"""
        cursor.execute(statement(query, db.place_holder), (repo_id,))
        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                adj = self.__dict__['cached_adj'].get(str(id))

                if adj is None:
                    self.update_for_revision(cursor, id, repo_id)
                    self.__dict__['cached_adj'][str(id)] = \
                    deepcopy(self.__dict__['adj'])
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
示例#48
0
    def populate_insert_args(self, job):
        args = []
        cnn = self.db.connect()
        cursor = cnn.cursor()
        for h in job.hunks:
            query = "select id from scmlog where rev = ?"
            cursor.execute(statement(query, self.db.place_holder), (h.rev, ))

            fetched_row = cursor.fetchone()

            if fetched_row is not None:
                args.append((job.file_id, job.commit_id, h.start, h.end,
                             fetched_row[0]))

        cursor.close()
        cnn.close()
        return args
示例#49
0
    def populate_insert_args(self, job):
        args = []
        cnn = self.db.connect()
        cursor = cnn.cursor()
        for h in job.hunks:
            query = "select id from scmlog where rev = ?"
            cursor.execute(statement(query, self.db.place_holder),
                           (h.rev,))

            fetched_row = cursor.fetchone()

            if fetched_row is not None:
                args.append((job.file_id, job.commit_id, h.start, h.end, fetched_row[0]))
            
        cursor.close()
        cnn.close()
        return args
示例#50
0
    def fetchReleaseRevisions(self):

        cursor = self.connection.cursor()
        sql = "SELECT file_copies.from_id, file_copies.from_commit_id, tag_revisions.tag_id " + \
              "FROM tag_revisions, scmlog, actions, file_copies " + \
              "WHERE scmlog.id = tag_revisions.commit_id " + \
              "AND actions.commit_id = scmlog.id " + \
              "AND file_copies.action_id = actions.id " + \
              "AND actions.branch_id = 1 " + \
              "AND actions.type = 'C' " + \
              "AND scmlog.repository_id=?"
        cursor.execute(statement(sql, self.db.place_holder),
                       (self.repositoryId, ))

        fileLinks = []

        rs = cursor.fetchmany()

        while rs:
            for row in rs:
                fileLinks.append([row[0], row[1], row[2]])

            rs = cursor.fetchmany()

        cursor.close()

        resolvedFileLinks = []

        for fileLink in fileLinks:
            self.__buildReleaseRevision(fileLink[0], fileLink[1], fileLink[2],
                                        resolvedFileLinks)

        for resolved in resolvedFileLinks:
            fileId = resolved[0]
            commitId = resolved[1]
            releaseId = resolved[2]

            sourceFile = self.filesDict.getEntity(fileId, commitId)
            revision = sourceFile.getLastRevisionPriorToCommit(commitId)
            if not (revision is None):
                release = self.releaseDict.getEntity(releaseId)
                releaseRevision = ReleaseRevisionEntity()
                releaseRevision.setRelease(release)
                releaseRevision.setRevision(revision)
                self.releaseRevisionDict.add(releaseRevision)
示例#51
0
    def __process_finished_jobs(self, job_pool, write_cursor, db):
        finished_job = job_pool.get_next_done()

        # scmlog_id is the commit ID. For some reason, the 
        # documentaion advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            p = DBPatch (None, finished_job.commit_id, finished_job.data)

            execute_statement(statement(DBPatch.__insert__, self.db.place_holder),
                              (p.id, p.commit_id, to_utf8(p.patch).decode("utf-8")),
                              write_cursor,
                              db,
                              "Couldn't insert, duplicate patch?",
                              exception=ExtensionRunError)

            finished_job = job_pool.get_next_done(0)
示例#52
0
    def __process_finished_jobs(self, job_pool, connection, db):
        if isinstance(self.db, SqliteDatabase):
            from sqlite3 import IntegrityError
        elif isinstance(self.db, MysqlDatabase):
            from MySQLdb import IntegrityError
        write_cursor = connection.cursor()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (
                finished_job.commit_id,
                finished_job.file_id,
                finished_job.file_contents,
                finished_job.file_number_of_lines,
                finished_job.file_size,
            )
            try:
                write_cursor.execute(insert_statement, parameters)
            except IntegrityError as e:
                if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062:
                    # Ignore duplicate entry
                    pass
                else:
                    printerr(
                        "Error while inserting content for file %d @ commit %d"
                        % (finished_job.file_id, finished_job.commit_id)
                    )
                    raise

            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)

        connection.commit()
        write_cursor.close()

        return processed_jobs
示例#53
0
 def populate_insert_args(self, job):
     bug_revs = job.get_bug_revs ()
     cnn = self.db.connect()
     cursor = cnn.cursor()
     args = []
     for hunk_id in bug_revs:
         for rev in bug_revs[hunk_id]:
             printdbg("Find id for rev %s"%rev)
             query = "select id from scmlog where rev = ?"
             cursor.execute(statement(query, self.db.place_holder),(rev,))
             
             fetched_row = cursor.fetchone()
             
             if fetched_row is not None:
                 args.append((hunk_id,fetched_row[0]))
                 
     cursor.close()
     cnn.close()
     return args
示例#54
0
    def update_all(self, repo_id):
        profiler_start("Update all file paths")
        db = self.__dict__['db']
        cnn = db.connect ()

        cursor = cnn.cursor ()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.id"""
        cursor.execute (statement (query, db.place_holder), (repo_id,))        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall ()]
        for id in all_commits:
            if old_id != id:
                self.update_for_revision (cursor, id, repo_id)
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
示例#55
0
    def __process_finished_jobs(self, job_pool, write_cursor, db):
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0

        while finished_job is not None:
            query = """update scmlog
                        set file_count = ?
                        where id = ?"""
            insert_statement = statement(query, db.place_holder)
            parameters = (finished_job.ls_line_count, finished_job.row_id)
                                
            execute_statement(insert_statement, parameters, write_cursor, db,
                       "Couldn't update scmlog with ls line count", 
                       exception=ExtensionRunError)
            
            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)
            # print "Before return: %s"%(datetime.now()-start)
            
        return processed_jobs
示例#56
0
        def patch_generator(repo, repo_uri, repo_id, db, cursor):
            icursor = ICursor(cursor, self.INTERVAL_SIZE)
            icursor.execute(
                statement("SELECT id, rev, composed_rev " + "from scmlog where repository_id = ?", db.place_holder),
                (repo_id,),
            )

            rs = icursor.fetchmany()

            while rs:
                for commit_id, revision, composed_rev in rs:
                    # Get the patch
                    pj = PatchJob(revision, commit_id)

                    path = uri_to_filename(repo_uri)
                    pj.run(repo, path or repo.get_uri())

                    # Yield the patch to hunks
                    yield (pj.commit_id, pj.data, pj.rev)

                rs = icursor.fetchmany()
示例#57
0
    def run(self, repo, uri, db):
        profiler_start("Running PatchLOC extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        cursor = connection.cursor()

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cursor.execute(statement("SELECT id from repositories where uri = ?",
                                 db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone()[0]

        try:
            self.__create_table(connection)
        except TableAlreadyExists:
            pass
        except Exception, e:
            raise ExtensionRunError(str(e))
示例#58
0
文件: Blame.py 项目: jsichi/cvsanaly
 def __get_authors (self, cursor):
     query = "select id, name from people"
     cursor.execute (statement (query, self.db.place_holder))
     self.authors = dict ([(name, id) for id, name in cursor.fetchall ()])
示例#59
0
文件: Blame.py 项目: jsichi/cvsanaly
 def __get_blames (self, cursor, repoid):
     query = "select b.file_id, b.commit_id from blame b, files f " + \
             "where b.file_id = f.id and repository_id = ?"
     cursor.execute (statement (query, self.db.place_holder), (repoid,))
     return [(res[0], res[1]) for res in cursor.fetchall ()]