class GitDao(): """ This class handles the persistence and retrieval of Git data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitDao init failed") raise def check_connection_alive(self): try: cursor = self._cnx.cursor() cursor.execute("SELECT VERSION()") results = cursor.fetchone() ver = results[0] cursor.close() if not ver: self._cnx = self._db_util.restart_connection( self._config, self._logger) except: self._cnx = self._db_util.restart_connection( self._config, self._logger) def close_connection(self): self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger) def get_connection(self): return self._cnx def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def array2string(self, array): return ','.join(str(x) for x in array) def line_detail_table_is_empty(self, repo_id): """ checks line detail table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "JOIN line_detail l ON fm.id = l.file_modification_id " \ "WHERE l.content IS NOT NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def file_modification_patch_is_empty(self, repo_id): """ checks patch column in file modification table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "WHERE patch IS NOT NULL and repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def get_last_commit_id(self, repo_id): """ gets last commit id :type repo_id: int :param repo_id: id of an existing repository in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT MAX(id) as last_commit_id " \ "FROM commit c " \ "WHERE repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, repo_name): """ selects id of a repository by its name :type repo_name: str :param repo_name: name of an existing repository in the DB """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def insert_repo(self, project_id, repo_name): """ inserts repository to DB :type project_id: int :param project_id: id of an existing project in the DB :type repo_name: str :param repo_name: name of a repository to insert """ return self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) def select_project_id(self, project_name): """ selects id of a project by its name :type project_name: str :param project_name: name of an existing project in the DB """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def get_user_id(self, user_name, user_email): """ gets id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if not user_email and not user_name: user_name = "uknonwn_user" user_email = "uknonwn_user" if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) return user_id def insert_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB, one by one :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) query = "INSERT IGNORE INTO commit_parent " \ "VALUES (%s, %s, %s, %s, %s)" if parent_id: arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha] else: arguments = [repo_id, commit_id, sha, None, parent.hexsha] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_all_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB all together :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ to_insert = [] for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) if parent_id: to_insert.append( (repo_id, commit_id, sha, parent_id, parent.hexsha)) else: to_insert.append( (repo_id, commit_id, sha, None, parent.hexsha)) if to_insert: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) " \ "VALUES (%s, %s, %s, %s, %s)" cursor.executemany(query, [i for i in to_insert]) self._cnx.commit() cursor.close() def insert_commits_in_reference(self, commits_data): """ inserts commits to DB all together :type commits_data: list of Object :param commits_data: commit data """ if commits_data: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)" cursor.executemany(query, commits_data) self._cnx.commit() cursor.close() def insert_commit_in_reference(self, repo_id, commit_id, ref_id): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type commit_id: int :param commit_id: id of the commit :type ref_id: int :param ref_id: id of the reference """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference " \ "VALUES (%s, %s, %s)" arguments = [repo_id, commit_id, ref_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_line_details(self, file_modification_id, detail): """ inserts line details to DB :type file_modification_id: int :param file_modification_id: id of the file modification :type detail: str :param detail: line content """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO line_detail " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ file_modification_id, detail[0], detail[1], detail[2], detail[3], detail[4], detail[5] ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_modification_id(self, commit_id, file_id): """ selects file modification id :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file_modification " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_file_modification(self, commit_id, file_id, status, additions, deletions, changes, patch_content): """ inserts file modification to DB :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type status: str :param status: type of the modification :type additions: int :param additions: number of additions :type deletions: int :param deletions: number of deletions :type changes: int :param changes: number of changes :type patch_content: str :param patch_content: content of the patch """ # 4194304 is the max statement size for MySQL by default. if patch_content != None and len(patch_content) > 4194304: self._logger.error('Patch content for commit (' + str(commit_id) + ') is too large, not persisted.') patch_content = None cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_modification " \ "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, status, additions, deletions, changes, patch_content ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file_renamed(self, repo_id, current_file_id, previous_file_id, file_modification_id): """ inserts file renamed information :type repo_id: int :param repo_id: id of the repository :type current_file_id: int :param current_file_id: id of the renamed file :type previous_file_id: int :param previous_file_id: id of the file before renaming :type file_modification_id: int :param file_modification_id: id of the file modification """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_renamed " \ "VALUES (%s, %s, %s, %s)" arguments = [ repo_id, current_file_id, previous_file_id, file_modification_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file(self, repo_id, name, ext=None): """ inserts file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type ext: str :param ext: extension of the file """ if name == None: return cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file " \ "VALUES (%s, %s, %s, %s)" # extract file extension from file path if not passed if not ext: ext = name.split('.')[-1] arguments = [None, repo_id, name, ext] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_id_before_date(self, repo_id, name, before_date): """ selects id of the file before date :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type before_date: timestamp :param before_date: date """ cursor = self._cnx.cursor() query = "SELECT DISTINCT f.id " \ "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE f.name = %s AND f.repo_id = %s AND fm.status = 'added' " \ "AND c.authored_date <= '" + str(before_date) + "' " arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def select_file_id(self, repo_id, name): """ selects id of the file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file " \ "WHERE name = %s AND repo_id = %s" arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def insert_reference(self, repo_id, ref_name, ref_type): """ inserts reference :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference :type ref_type: str :param ref_type: type of the reference (branch or tag) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO reference " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, ref_name, ref_type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_reference_name(self, repo_id, ref_id): """ selects reference name by its id :type repo_id: int :param repo_id: id of the repository :type ref_id: int :param ref_id: id of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT name " \ "FROM reference " \ "WHERE id = %s and repo_id = %s" arguments = [ref_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_reference_id(self, repo_id, ref_name): """ selects reference id by its name :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM reference " \ "WHERE name = %s and repo_id = %s" arguments = [ref_name, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_commit(self, repo_id, sha, message, author_id, committer_id, authored_date, committed_date, size): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type sha: str :param sha: SHA of the commit :type message: str :param message: message of the commit :type author_id: int :param author_id: author id of the commit :type committer_id: int :param committer_id: committer id of the commit :type authored_date: str :param authored_date: authored date of the commit :type committed_date: str :param committed_date: committed date of the commit :type size: int :param size: size of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, repo_id, sha, message.strip(), author_id, committer_id, authored_date, committed_date, size ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_commit_parent(self, parent_id, parent_sha, repo_id): """ inserts commit parent to DB :type parent_id: int :param parent_id: id of the commit parent :type parent_sha: str :param parent_sha: SHA of the commit parent :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_update = "UPDATE commit_parent " \ "SET parent_id = %s " \ "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s " arguments = [parent_id, parent_sha, repo_id] cursor.execute(query_update, arguments) self._cnx.commit() cursor.close() def fix_commit_parent_table(self, repo_id): """ checks for missing commit parent information and fixes it :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_select = "SELECT parent_sha " \ "FROM commit_parent " \ "WHERE parent_id IS NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query_select, arguments) row = cursor.fetchone() while row: parent_sha = row[0] parent_id = self.select_commit_id(parent_sha, repo_id) self.update_commit_parent(parent_id, parent_sha, repo_id) row = cursor.fetchone() cursor.close() def select_commit_id(self, sha, repo_id): """ selects id of a commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_commit_id_before_date(self, sha, repo_id, before_date): """ selects id of a commit by its SHA before a given date :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository :type before_date: timestamp :param before_date: date """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s AND authored_date <= '" + str(before_date) + "' " arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_all_developer_ids(self, repo_id): """ selects all developers (committers or authors) of a given repo :type repo_id: int :param repo_id: id of the repository """ user_ids = [] cursor = self._cnx.cursor() query = "SELECT c.author_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \ "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL " \ "UNION " \ "SELECT c.committer_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \ "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL " arguments = [repo_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: user_id = row[0] user_ids.append(user_id) row = cursor.fetchone() cursor.close() return user_ids def select_sha_commit_by_user(self, user_id, repo_id): """ selects the SHA of the first commit (authored or committed) by a given user id :type user_id: int :param user_id: id of the user :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT sha " \ "FROM commit " \ "WHERE (author_id = %s OR committer_id = %s) AND repo_id = %s " \ "LIMIT 1" arguments = [user_id, user_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] return found
class DbSchema(): """ This class initializes the DB schema """ def __init__(self, db_name, config, log_root_path): """ :type db_name: str :param db_name: the name of the DB to initialize/connect to, it cannot be null and must follow the format allowed in MySQL (http://dev.mysql.com/doc/refman/5.7/en/identifiers.html). If a DB having a name equal already exists in Gitana, the existing DB will be dropped and a new one will be created :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._db_name = db_name self._config = config self._log_root_path = log_root_path self._db_util = DbUtil() self._logging_util = LoggingUtil() log_path = self._log_root_path + "db-schema-" + db_name self._logger = self._logging_util.get_logger(log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, log_path, "info") self._cnx = self._db_util.get_connection(self._config) def __del__(self): if self._cnx: self._db_util.close_connection(self._cnx) if self._logger: #deletes the file handler of the logger self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) def add_git_tables(self): """ initializes git tables if they do not exist """ self.set_database(self._db_name) self._init_git_tables() def add_issue_tracker_tables(self): """ initializes issue tracker tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_issue_tracker_tables() def add_instant_messaging_tables(self): """ initializes instant messaging tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_instant_messaging_tables() def add_forum_tables(self): """ initializes forum tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_forum_tables() def init_database(self, init_git, init_issue_tracker, init_forum, init_instant_messaging): """ initializes the database tables and functions :type init_git: bool :param init_git: if True, it initializes the tables containing git data :type init_issue_tracker: bool :param init_issue_tracker: if True, it initializes the tables containing issue tracker data :type init_forum: bool :param init_forum: if True, it initializes the tables containing forum data :type init_instant_messaging: bool :param init_instant_messaging: if True, it initializes the tables containing instant messaging data """ try: self._logger.info("init database started") start_time = datetime.now() self._create_database() self.set_database(self._db_name) self._set_settings() self._init_common_tables() if init_issue_tracker or init_forum or init_instant_messaging: self._init_shared_tables_issue_tracker_communication_channels() if init_git: self._init_git_tables() if init_issue_tracker: self._init_issue_tracker_tables() if init_forum: self._init_forum_tables() if init_instant_messaging: self._init_instant_messaging_tables() self._init_functions() self._logger.info("database " + self._db_name + " created") end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("Init database finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") except Exception: self._logger.error("init database failed", exc_info=True) def create_project(self, project_name): """ inserts a project in the DB :type project_name: str :param project_name: the name of the project to create """ self._cnx = self._db_util.get_connection(self._config) self._db_util.insert_project(self._cnx, self._db_name, project_name) self._db_util.close_connection(self._cnx) def create_repository(self, project_name, repo_name): """ inserts a repository in the DB :type project_name: str :param project_name: the name of an existing project :type repo_name: str :param repo_name: the name of the repository to insert """ self._cnx = self._db_util.get_connection(self._config) self.set_database(self._db_name) project_id = self._db_util.select_project_id(self._cnx, project_name, self._logger) try: self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) except Exception: self._logger.error("repository " + repo_name + " not inserted", exc_info=True) self._db_util.close_connection(self._cnx) def list_projects(self): """ lists all projects contained in the DB """ self._cnx = self._db_util.get_connection(self._config) project_names = [] self.set_database(self._db_name) cursor = self._cnx.cursor() query = "SELECT name FROM project" cursor.execute(query) row = cursor.fetchone() while row: project_names.append(row[0]) row = cursor.fetchone() cursor.close() return project_names def set_database(self, db_name): """ sets the DB used by the tool :type db_name: str :param db_name: the name of the DB """ try: self._logger.info("set database " + db_name + " started") self._db_util.set_database(self._cnx, db_name) self._logger.info("set database " + db_name + " finished") except Exception: self._logger.error("set database failed", exc_info=True) def _set_settings(self): #sets the settings (max connections, charset, file format, ...) used by the DB self._db_util.set_settings(self._cnx) def _create_database(self): #creates the database cursor = self._cnx.cursor() drop_database_if_exists = "DROP DATABASE IF EXISTS " + self._db_name cursor.execute(drop_database_if_exists) create_database = "CREATE DATABASE " + self._db_name cursor.execute(create_database) cursor.close() def _init_functions(self): #initializes functions cursor = self._cnx.cursor() levenshtein_distance = """ CREATE DEFINER=`root`@`localhost` FUNCTION `levenshtein_distance`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(11) DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; DECLARE s1_char CHAR CHARACTER SET utf8; -- max strlen=255 for this function DECLARE cv0, cv1 VARBINARY(256); SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; IF (s1 = s2) THEN RETURN (0); ELSEIF (s1_len = 0) THEN RETURN (s2_len); ELSEIF (s2_len = 0) THEN RETURN (s1_len); END IF; WHILE (j <= s2_len) DO SET cv1 = CONCAT(cv1, CHAR(j)), j = j + 1; END WHILE; WHILE (i <= s1_len) DO SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = CHAR(i), j = 1; WHILE (j <= s2_len) DO SET c = c + 1, cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1); SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost; IF (c > c_temp) THEN SET c = c_temp; END IF; SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1; IF (c > c_temp) THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, CHAR(c)), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; RETURN (c); END""" soundex_match = """ CREATE DEFINER=`root`@`localhost` FUNCTION `soundex_match`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(1) DETERMINISTIC BEGIN DECLARE _result INT DEFAULT 0; IF SOUNDEX(s1) = SOUNDEX(s2) THEN SET _result = 1; END IF; RETURN _result; END""" cursor.execute(levenshtein_distance) cursor.execute(soundex_match) cursor.close() def _init_common_tables(self): #initializes common tables used by tables modeling git, issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_project = "CREATE TABLE IF NOT EXISTS project( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user = "******" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "email varchar(256), " \ "CONSTRAINT namem UNIQUE (name, email) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user_alias = "CREATE TABLE IF NOT EXISTS user_alias ( " \ "user_id int(20), " \ "alias_id int(20), " \ "CONSTRAINT a UNIQUE (user_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_project) cursor.execute(create_table_user) cursor.execute(create_table_user_alias) def _init_shared_tables_issue_tracker_communication_channels(self): #initializes shared tables used by tables modeling issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_label = "CREATE TABLE IF NOT EXISTS label ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message = "CREATE TABLE IF NOT EXISTS message ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "pos int(10), " \ "type_id int(20), " \ "issue_id int(20), " \ "topic_id int(20), " \ "channel_id int(20), " \ "body longblob, " \ "votes int(20), " \ "author_id int(20), " \ "created_at timestamp NULL DEFAULT NULL," \ "CONSTRAINT ip UNIQUE (issue_id, topic_id, channel_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_dependency = "CREATE TABLE IF NOT EXISTS message_dependency ( " \ "source_message_id int(20), " \ "target_message_id int(20), " \ "PRIMARY KEY st (source_message_id, target_message_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_type = "CREATE TABLE IF NOT EXISTS message_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_message_types = "INSERT IGNORE INTO message_type VALUES (NULL, 'question'), " \ "(NULL, 'answer'), " \ "(NULL, 'comment'), " \ "(NULL, 'accepted_answer'), " \ "(NULL, 'reply'), " \ "(NULL, 'file_upload'), " \ "(NULL, 'info');" create_table_attachment = "CREATE TABLE IF NOT EXISTS attachment ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "message_id int(20), " \ "name varchar(256), " \ "extension varchar(10), " \ "bytes int(20), " \ "url varchar(512), " \ "CONSTRAINT ip UNIQUE (message_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_label) cursor.execute(create_table_message) cursor.execute(create_table_message_dependency) cursor.execute(create_table_message_type) cursor.execute(insert_message_types) cursor.execute(create_table_attachment) cursor.close() def _init_git_tables(self): #initializes tables used to model git data cursor = self._cnx.cursor() create_table_repository = "CREATE TABLE IF NOT EXISTS repository( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_reference = "CREATE TABLE IF NOT EXISTS reference( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(255), " \ "type varchar(255), " \ "CONSTRAINT name UNIQUE (repo_id, name, type) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit = "CREATE TABLE IF NOT EXISTS commit(" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "sha varchar(512), " \ "message varchar(512), " \ "author_id int(20), " \ "committer_id int(20), " \ "authored_date timestamp NULL DEFAULT NULL, " \ "committed_date timestamp NULL DEFAULT NULL, " \ "size int(20), " \ "INDEX sha (sha), " \ "INDEX auth (author_id), " \ "INDEX comm (committer_id), " \ "CONSTRAINT s UNIQUE (sha, repo_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit_parent = "CREATE TABLE IF NOT EXISTS commit_parent(" \ "repo_id int(20), " \ "commit_id int(20), " \ "commit_sha varchar(512), " \ "parent_id int(20), " \ "parent_sha varchar(512), " \ "PRIMARY KEY copa (repo_id, commit_id, parent_id), " \ "CONSTRAINT cshapsha UNIQUE (repo_id, commit_id, parent_sha) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commits2reference = "CREATE TABLE IF NOT EXISTS commit_in_reference(" \ "repo_id int(20), " \ "commit_id int(20), " \ "ref_id int(20), " \ "PRIMARY KEY core (commit_id, ref_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file = "CREATE TABLE IF NOT EXISTS file( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "ext varchar(255), " \ "CONSTRAINT rerena UNIQUE (repo_id, name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_renamed = "CREATE TABLE IF NOT EXISTS file_renamed ( " \ "repo_id int(20), " \ "current_file_id int(20), " \ "previous_file_id int(20), " \ "file_modification_id int(20), " \ "PRIMARY KEY cpc (current_file_id, previous_file_id, file_modification_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_modification = "CREATE TABLE IF NOT EXISTS file_modification ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "commit_id int(20), " \ "file_id int(20), " \ "status varchar(10), " \ "additions numeric(10), " \ "deletions numeric(10), " \ "changes numeric(10), " \ "patch longblob, " \ "CONSTRAINT cf UNIQUE (commit_id, file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_line_detail = "CREATE TABLE IF NOT EXISTS line_detail( " \ "file_modification_id int(20)," \ "type varchar(25), " \ "line_number numeric(20), " \ "is_commented numeric(1), " \ "is_partially_commented numeric(1), " \ "is_empty numeric(1), " \ "content longblob, " \ "PRIMARY KEY fityli (file_modification_id, type, line_number) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" # adding it here because "file_dependency" depends on "file" table creation. # @todo: find a way to move the following table creation to separate section # make "extract_dependency_relations" API interface completely independent. create_table_file_dependency = "CREATE TABLE file_dependency ( " \ "repo_id int(20), " \ "ref_id int(20), " \ "source_file_id int(20), " \ "target_file_id int(20), " \ "CONSTRAINT dep UNIQUE (repo_id, ref_id, source_file_id, target_file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_repository) cursor.execute(create_table_reference) cursor.execute(create_table_commit) cursor.execute(create_table_commit_parent) cursor.execute(create_table_commits2reference) cursor.execute(create_table_file) cursor.execute(create_table_file_renamed) cursor.execute(create_table_file_modification) cursor.execute(create_table_line_detail) cursor.execute(create_table_file_dependency) cursor.close() def _init_issue_tracker_tables(self): #initializes tables used to model issue tracker data cursor = self._cnx.cursor() create_table_issue_tracker = "CREATE TABLE IF NOT EXISTS issue_tracker ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue = "CREATE TABLE IF NOT EXISTS issue ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "issue_tracker_id int(20), " \ "summary varchar(512), " \ "component varchar(256), " \ "version varchar(256), " \ "hardware varchar(256), " \ "priority varchar(256), " \ "severity varchar(256), " \ "reference_id int(20), " \ "reporter_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT ioi UNIQUE (issue_tracker_id, own_id), " \ "INDEX u (reporter_id), " \ "INDEX r (reference_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_assignee = "CREATE TABLE IF NOT EXISTS issue_assignee ( " \ "issue_id int(20), " \ "assignee_id int(20), " \ "PRIMARY KEY il (issue_id, assignee_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_subscriber = "CREATE TABLE IF NOT EXISTS issue_subscriber ( " \ "issue_id int(20), " \ "subscriber_id int(20), " \ "PRIMARY KEY il (issue_id, subscriber_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event = "CREATE TABLE IF NOT EXISTS issue_event ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "issue_id int(20), " \ "event_type_id int(20), " \ "detail varchar(256), " \ "creator_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "target_user_id int(20), " \ "CONSTRAINT iecc UNIQUE (issue_id, event_type_id, creator_id, created_at, detail) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event_type = "CREATE TABLE IF NOT EXISTS issue_event_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_labelled = "CREATE TABLE IF NOT EXISTS issue_labelled ( " \ "issue_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (issue_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_commit_dependency = "CREATE TABLE IF NOT EXISTS issue_commit_dependency ( " \ "issue_id int(20), " \ "commit_id int(20), " \ "PRIMARY KEY ict (issue_id, commit_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_dependency = "CREATE TABLE IF NOT EXISTS issue_dependency ( " \ "issue_source_id int(20), " \ "issue_target_id int(20), " \ "type_id int(20), " \ "PRIMARY KEY st (issue_source_id, issue_target_id, type_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_dependency_type = "CREATE TABLE IF NOT EXISTS issue_dependency_type (" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_issue_dependency_type = "INSERT IGNORE INTO issue_dependency_type VALUES (NULL, 'block'), " \ "(NULL, 'depends'), " \ "(NULL, 'related'), " \ "(NULL, 'duplicated');" cursor.execute(create_table_issue_tracker) cursor.execute(create_table_issue) cursor.execute(create_table_issue_assignee) cursor.execute(create_table_issue_subscriber) cursor.execute(create_table_issue_event) cursor.execute(create_table_issue_event_type) cursor.execute(create_table_issue_labelled) cursor.execute(create_issue_commit_dependency) cursor.execute(create_table_issue_dependency) cursor.execute(create_issue_dependency_type) cursor.execute(insert_issue_dependency_type) cursor.close() def _init_forum_tables(self): #initializes tables used to model forum data cursor = self._cnx.cursor() create_table_forum = "CREATE TABLE IF NOT EXISTS forum ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic = "CREATE TABLE IF NOT EXISTS topic ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "forum_id int(20), " \ "name varchar(256), " \ "votes int(10), " \ "views int(10), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (forum_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic_labelled = "CREATE TABLE IF NOT EXISTS topic_labelled ( " \ "topic_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (topic_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_forum) cursor.execute(create_table_topic) cursor.execute(create_table_topic_labelled) cursor.close() def _init_instant_messaging_tables(self): #initializes tables used to model instant messaging data cursor = self._cnx.cursor() create_table_instant_messaging = "CREATE TABLE IF NOT EXISTS instant_messaging ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_channel = "CREATE TABLE IF NOT EXISTS channel ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "instant_messaging_id int(20), " \ "name varchar(256), " \ "description varchar(512), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (instant_messaging_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_instant_messaging) cursor.execute(create_table_channel) cursor.close()
class GitDao(): """ This class handles the persistence and retrieval of Git data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitDao init failed") raise def check_connection_alive(self): try: cursor = self._cnx.cursor() cursor.execute("SELECT VERSION()") results = cursor.fetchone() ver = results[0] cursor.close() if not ver: self._cnx = self._db_util.restart_connection( self._config, self._logger) except: self._cnx = self._db_util.restart_connection( self._config, self._logger) def close_connection(self): self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger) def get_connection(self): return self._cnx def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def array2string(self, array): return ','.join(str(x) for x in array) def function_at_commit_is_empty(self, repo_id): """ checks function at commit table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN function_at_commit fac ON c.id = fac.commit_id " \ "WHERE c.repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def select_code_at_commit(self, commit_id, file_id): """ retrieve the code at commit info for a given file and commit :type commit_id: int :param commit_id: id of an existing commit in the DB :type file_id: int :param file_id: id of an existing file in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT * " \ "FROM code_at_commit " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row cursor.close() return found def line_detail_table_is_empty(self, repo_id): """ checks line detail table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "JOIN line_detail l ON fm.id = l.file_modification_id " \ "WHERE l.content IS NOT NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def code_at_commit_is_empty(self, repo_id): """ checks code at commit table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN code_at_commit cac ON c.id = cac.commit_id " \ "WHERE c.repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def file_modification_patch_is_empty(self, repo_id): """ checks patch column in file modification table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "WHERE patch IS NOT NULL and repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def get_last_commit_id(self, repo_id): """ gets last commit id :type repo_id: int :param repo_id: id of an existing repository in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT MAX(id) as last_commit_id " \ "FROM commit c " \ "WHERE repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, repo_name): """ selects id of a repository by its name :type repo_name: str :param repo_name: name of an existing repository in the DB """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def insert_repo(self, project_id, repo_name): """ inserts repository to DB :type project_id: int :param project_id: id of an existing project in the DB :type repo_name: str :param repo_name: name of a repository to insert """ return self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) def select_project_id(self, project_name): """ selects id of a project by its name :type project_name: str :param project_name: name of an existing project in the DB """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def get_user_id(self, user_name, user_email): """ gets id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if user_email == None and user_name == None: user_name = "unknown_user" user_email = "unknown_user" if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) return user_id def insert_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB, one by one :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) query = "INSERT IGNORE INTO commit_parent " \ "VALUES (%s, %s, %s, %s, %s)" if parent_id: arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha] else: arguments = [repo_id, commit_id, sha, None, parent.hexsha] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_all_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB all together :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ to_insert = [] for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) if parent_id: to_insert.append( (repo_id, commit_id, sha, parent_id, parent.hexsha)) else: to_insert.append( (repo_id, commit_id, sha, None, parent.hexsha)) if to_insert: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) VALUES (%s, %s, %s, %s, %s)" cursor.executemany(query, [i for i in to_insert]) self._cnx.commit() cursor.close() def insert_commits_in_reference(self, commits_data): """ inserts commits to DB all together :type commits_data: list of Object :param commits_data: commit data """ if commits_data: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)" cursor.executemany(query, commits_data) self._cnx.commit() cursor.close() def insert_commit_in_reference(self, repo_id, commit_id, ref_id): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type commit_id: int :param commit_id: id of the commit :type ref_id: int :param ref_id: id of the reference """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference " \ "VALUES (%s, %s, %s)" arguments = [repo_id, commit_id, ref_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_line_details(self, file_modification_id, detail): """ inserts line details to DB :type file_modification_id: int :param file_modification_id: id of the file modification :type detail: str :param detail: line content """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO line_detail " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ file_modification_id, detail[0], detail[1], detail[2], detail[3], detail[4], detail[5] ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_modification_id(self, commit_id, file_id): """ selects file modification id :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file_modification " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_file_modification(self, commit_id, file_id, status, additions, deletions, changes, patch_content): """ inserts file modification to DB :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type status: str :param status: type of the modification :type additions: int :param additions: number of additions :type deletions: int :param deletions: number of deletions :type changes: int :param changes: number of changes :type patch_content: str :param patch_content: content of the patch """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_modification " \ "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, status, additions, deletions, changes, patch_content ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file_renamed(self, repo_id, current_file_id, previous_file_id, file_modification_id): """ inserts file renamed information :type repo_id: int :param repo_id: id of the repository :type current_file_id: int :param current_file_id: id of the renamed file :type previous_file_id: int :param previous_file_id: id of the file before renaming :type file_modification_id: int :param file_modification_id: id of the file modification """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_renamed " \ "VALUES (%s, %s, %s, %s)" arguments = [ repo_id, current_file_id, previous_file_id, file_modification_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file(self, repo_id, name, ext): """ inserts file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type ext: str :param ext: extension of the file """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, name, ext] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_id(self, repo_id, name): """ selects id of the file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file " \ "WHERE name = %s AND repo_id = %s" arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def insert_reference(self, repo_id, ref_name, ref_type): """ inserts reference :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference :type ref_type: str :param ref_type: type of the reference (branch or tag) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO reference " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, ref_name, ref_type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_reference_name(self, repo_id, ref_id): """ selects reference name by its id :type repo_id: int :param repo_id: id of the repository :type ref_id: int :param ref_id: id of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT name " \ "FROM reference " \ "WHERE id = %s and repo_id = %s" arguments = [ref_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_reference_id(self, repo_id, ref_name): """ selects reference id by its name :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM reference " \ "WHERE name = %s and repo_id = %s" arguments = [ref_name, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_commit(self, repo_id, sha, message, author_id, committer_id, authored_date, committed_date, size): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type sha: str :param sha: SHA of the commit :type message: str :param message: message of the commit :type author_id: int :param author_id: author id of the commit :type committer_id: int :param committer_id: committer id of the commit :type authored_date: str :param authored_date: authored date of the commit :type committed_date: str :param committed_date: committed date of the commit :type size: int :param size: size of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, repo_id, sha, message.strip(), author_id, committer_id, authored_date, committed_date, size ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_function_id(self, file_id, start_line, end_line): """ select DB function id :type file_id: int :param file_id: id of the file :type start_line: int :param start_line: line number where the function starts :type end_line: int :param end_line: line number where the function ends """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM function " \ "WHERE file_id = %s AND start_line = %s AND end_line = %s" arguments = [file_id, start_line, end_line] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_code_at_commit(self, commit_id, file_id, ccn, loc, comments, blanks, funs, tokens, avg_ccn, avg_loc, avg_tokens): """ inserts to DB code information of a file at a given commit :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type ccn: int :param ccn: cyclomatic complexity value :type loc: int :param loc: lines of code :type comments: int :param comments: commented lines :type blanks: int :param blanks: blank lines :type funs: int :param funs: number of functions :type tokens: int :param tokens: tokens in the files :type avg_ccn: int :param avg_ccn: file avg cyclomatic complexity (per function) :type avg_loc: int :param avg_loc: file avg lines of code (per function) :type avg_tokens: int :param avg_tokens: file avg number of tokens (per function) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO code_at_commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, ccn, loc, comments, blanks, funs, tokens, avg_ccn, avg_loc, avg_tokens ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_function(self, function_name, file_id, num_arguments, loc, tokens, total_lines, ccn, start_line, end_line): """ inserts function to DB :type function_name: str :param function_name: name of the function :type file_id: int :param file_id: id of the file :type num_arguments: int :param num_arguments: number of arguments :type loc: int :param loc: lines of code :type tokens: int :param tokens: tokens in the function :type total_lines: int :param total_lines: number of lines (loc + comments + empty lines) :type ccn: int :param ccn: cyclomatic complexity of the function :type start_line: int :param start_line: line number where the function starts :type end_line: int :param end_line: line number where the function ends """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO function " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, function_name, file_id, num_arguments, loc, tokens, total_lines, ccn, start_line, end_line ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_function_at_commit(self, fun_id, commit_id): """ inserts link between a function and the commit :type fun_id: int :param fun_id: id of the function :type commit_id: int :param commit_id: id of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO function_at_commit " \ "VALUES (%s, %s)" arguments = [commit_id, fun_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_commit_parent(self, parent_id, parent_sha, repo_id): """ inserts commit parent to DB :type parent_id: int :param parent_id: id of the commit parent :type parent_sha: str :param parent_sha: SHA of the commit parent :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_update = "UPDATE commit_parent " \ "SET parent_id = %s " \ "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s " arguments = [parent_id, parent_sha, repo_id] cursor.execute(query_update, arguments) self._cnx.commit() cursor.close() def fix_commit_parent_table(self, repo_id): """ checks for missing commit parent information and fixes it :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_select = "SELECT parent_sha " \ "FROM commit_parent " \ "WHERE parent_id IS NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query_select, arguments) row = cursor.fetchone() while row: parent_sha = row[0] parent_id = self.select_commit_id(parent_sha, repo_id) self.update_commit_parent(parent_id, parent_sha, repo_id) row = cursor.fetchone() cursor.close() def select_commit_id(self, sha, repo_id): """ selects id of a commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_all_developer_ids(self, repo_id): """ selects all developers (committers or authors) of a given repo :type repo_id: int :param repo_id: id of the repository """ user_ids = [] cursor = self._cnx.cursor() query = "SELECT c.author_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \ "WHERE repo_id = %s " \ "UNION " \ "SELECT c.committer_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \ "WHERE repo_id = %s " arguments = [repo_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: user_id = row[0] user_ids.append(user_id) row = cursor.fetchone() cursor.close() return user_ids def select_sha_commit_by_user(self, user_id, repo_id, match_on="author"): """ selects the SHA of the first commit (authored or committed) by a given user id :type user_id: int :param user_id: id of the user :type repo_id: int :param repo_id: id of the repository :type match_on: str (author or committer) :param match_on: define whether to perform the match on the author id or committer id """ found = None cursor = self._cnx.cursor() if match_on == "committer": query = "SELECT sha " \ "FROM commit " \ "WHERE committer_id = %s AND repo_id = %s " \ "LIMIT 1" else: query = "SELECT sha " \ "FROM commit " \ "WHERE author_id = %s AND repo_id = %s " \ "LIMIT 1" arguments = [user_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] return found def select_file_changes(self, file_id, ref_id, before_date=None, patch=False, code=False): """ get all file changes, excluding renamings for a given file within a reference :type file_id: int :param file_id: id of the file :type ref_id: int :param ref_id: id of the reference :type before_date: str :param before_date: if not null, it retrieves the changes before the given date :type patch: bool :param: patch: if True, it retrieves also the patch associate to each file change :type code: bool :param: code: if True, it retrieves also code-related information of the file (ccn, loc, commented lines, etc.) """ found = [] before_date_selection = "" if before_date: before_date_selection = " AND c.authored_date <= '" + str( before_date) + "'" patch_selection = ", NULL AS patch" if patch: patch_selection = ", fm.patch" code_selection = "" code_join = "" if code: code_selection = ", " \ "IFNULL(cac.ccn, 0) AS ccn, " \ "IFNULL(cac.loc, 0) AS loc, " \ "IFNULL(cac.commented_lines, 0) AS commented_lines, " \ "IFNULL(cac.blank_lines, 0) AS blank_lines, " \ "IFNULL(cac.funs, 0) AS funs, " \ "IFNULL(cac.tokens, 0) AS tokens, " \ "IFNULL(cac.avg_ccn, 0) AS avg_ccn, " \ "IFNULL(cac.avg_loc, 0) AS avg_loc, " \ "IFNULL(cac.avg_tokens, 0) AS avg_tokens" code_join = "LEFT JOIN code_at_commit cac ON cac.commit_id = c.id AND cac.file_id = f.id " cursor = self._cnx.cursor() query = "SELECT f.id, f.name, c.sha, c.message, IFNULL(ua.alias_id, c.author_id) AS author_id, " \ "c.authored_date, c.committed_date, fm.status, fm.additions, fm.deletions, " \ "fm.changes" + patch_selection + code_selection + " " \ "FROM file f join file_modification fm on f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "JOIN commit_in_reference cin ON cin.commit_id = c.id " \ "JOIN reference r ON r.id = cin.ref_id " \ "LEFT JOIN user_alias ua ON ua.user_id = c.author_id " + code_join + " " \ "WHERE f.id = %s and r.id = %s" + before_date_selection arguments = [file_id, ref_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: file_id = row[0] file_name = row[1] sha = row[2] message = row[3] author_id = int(row[4]) authored_date = row[5] committed_date = row[6] status = str(row[7]) additions = int(row[8]) deletions = int(row[9]) changes = int(row[10]) git_data = { 'sha': sha, 'commit_message': message, 'author_id': author_id, 'authored_date': authored_date, 'committed_date': committed_date, 'status': status, 'additions': additions, 'deletions': deletions, 'changes': changes, 'file_name': file_name, 'file_id': file_id } patch_data = {} if patch: patch = str(row[11]) patch_data = {'patch': patch} code_data = {} if code: ccn = int(row[12]) loc = int(row[13]) commented_lines = int(row[14]) blank_lines = int(row[15]) funs = int(row[16]) tokens = int(row[17]) avg_ccn = float(row[18]) avg_loc = float(row[19]) avg_tokens = float(row[20]) code_data = { 'ccn': ccn, 'loc': loc, 'commented_lines': commented_lines, 'blank_lines': blank_lines, 'funs': funs, 'tokens': tokens, 'avg_ccn': avg_ccn, 'avg_loc': avg_loc, 'avg_tokens': avg_tokens } found.append( dict(git_data.items() + patch_data.items() + code_data.items())) row = cursor.fetchone() cursor.close() return found def select_file_renamings(self, file_id, ref_id): """ get file renamings for a given file within a reference :type file_id: int :param file_id: id of the file :type ref_id: int :param ref_id: id of the reference """ renamings = [] cursor = self._cnx.cursor() query = "SELECT fr.current_file_id, fr.previous_file_id, c.authored_date, c.authored_date, c.committed_date " \ "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "JOIN commit_in_reference cin ON cin.commit_id = c.id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN file_renamed fr ON fr.file_modification_id = fm.id AND fr.current_file_id = f.id " \ "WHERE f.id = %s and r.id = %s AND fm.status = 'renamed';" arguments = [file_id, ref_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: renamings.append(row[1]) row = cursor.fetchone() cursor.close() if len(renamings) > 1: self._logger.warning("The file with id " + str(file_id) + " has multiple renamings in reference " + str(ref_id) + "!") return renamings