class GitHubUtil(): """ This class helps mapping the identities of the users in the vcs and GitHub """ def __init__(self, db_name, project_name, repo_name, github_repo_full_name, tokens, config, log_root_path): """ :type db_name: str :param db_name: the name of an existing DB :type project_name: str :param project_name: the name of an existing project in the DB :type repo_name: str :param repo_name: the name of an existing repository in the DB :type url: str :param url: full name of the GitHub repository :type tokens: list str :param token: list of GitHub tokens :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._log_path = log_root_path + "map-vcs-github-users-" + db_name + "-" + project_name + "-" + repo_name self._project_name = project_name self._db_name = db_name self._repo_name = repo_name self._tokens = tokens self._active_token = 0 self._url = github_repo_full_name config.update({'database': db_name}) self._config = config self._logging_util = LoggingUtil() self._logger = self._logging_util.get_logger(self._log_path) self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) self._git_dao = GitDao(self._config, self._logger) self._github_querier = GitHubQuerier(self._url, self._tokens[self._active_token], self._logger) def _change_token(self): if len(self._tokens) > 1: if not self._github_querier._token_util._is_usuable( self._tokens[self._active_token]): self._active_token = (self._active_token + 1) % len( self._tokens) self._github_querier = GitHubQuerier( self._url, self._tokens[self._active_token], self._logger) def _analyse_user(self, user, unmatched_user, sha): if user: user_name = self._github_querier.get_user_name(user) user_ids = self._db_util.select_all_user_ids_by_name( self._cnx, user_name, self._logger) for user_id in user_ids: try: user_id, alias_id = self._db_util._identify_user_and_alias( self._cnx, unmatched_user, user_id, self._logger) if user_id != alias_id: self._db_util.insert_user_alias( self._cnx, user_id, alias_id, self._logger) self._logger.info("user ids " + str(user_id) + " and " + str(alias_id) + " successfully matched") except Exception: self._logger.error("user ids " + str(user_id) + " and " + str(alias_id) + " not matched", exc_info=True) continue else: self._logger.warning("GitHub user not found for commit " + sha) def match(self): """ matches GitHub and Git identities """ try: self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._logger.info("GitHubUtil started") start_time = datetime.now() repo_id = self._git_dao.select_repo_id(self._repo_name) user_ids = self._git_dao.select_all_developer_ids(repo_id) alias_ids = self._db_util.select_all_aliased_user_ids( self._cnx, self._logger) unmatched_users = list(set(user_ids) - set(alias_ids)) for unmatched_user in unmatched_users: matched = False sha = self._git_dao.select_sha_commit_by_user( unmatched_user, repo_id, match_on="author") if sha: author = self._github_querier.get_author_by_commit(sha) self._analyse_user(author, unmatched_user, sha) matched = True else: sha = self._git_dao.select_sha_commit_by_user( unmatched_user, repo_id, match_on="committer") if sha: committer = self._github_querier.get_committer_by_commit( sha) self._analyse_user(committer, unmatched_user, sha) matched = True if not matched: self._logger.warning("No commits found for user " + str(unmatched_user)) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("GitHubUtil finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("GitHubUtil failed", exc_info=True) finally: if self._git_dao: self._git_dao.close_connection() if self._cnx: self._db_util.close_connection(self._cnx)
class GitDao(): """ This class handles the persistence and retrieval of Git data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitDao init failed") raise def check_connection_alive(self): try: cursor = self._cnx.cursor() cursor.execute("SELECT VERSION()") results = cursor.fetchone() ver = results[0] cursor.close() if not ver: self._cnx = self._db_util.restart_connection( self._config, self._logger) except: self._cnx = self._db_util.restart_connection( self._config, self._logger) def close_connection(self): self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger) def get_connection(self): return self._cnx def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def array2string(self, array): return ','.join(str(x) for x in array) def line_detail_table_is_empty(self, repo_id): """ checks line detail table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "JOIN line_detail l ON fm.id = l.file_modification_id " \ "WHERE l.content IS NOT NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def file_modification_patch_is_empty(self, repo_id): """ checks patch column in file modification table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "WHERE patch IS NOT NULL and repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def get_last_commit_id(self, repo_id): """ gets last commit id :type repo_id: int :param repo_id: id of an existing repository in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT MAX(id) as last_commit_id " \ "FROM commit c " \ "WHERE repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, repo_name): """ selects id of a repository by its name :type repo_name: str :param repo_name: name of an existing repository in the DB """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def insert_repo(self, project_id, repo_name): """ inserts repository to DB :type project_id: int :param project_id: id of an existing project in the DB :type repo_name: str :param repo_name: name of a repository to insert """ return self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) def select_project_id(self, project_name): """ selects id of a project by its name :type project_name: str :param project_name: name of an existing project in the DB """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def get_user_id(self, user_name, user_email): """ gets id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if not user_email and not user_name: user_name = "uknonwn_user" user_email = "uknonwn_user" if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) return user_id def insert_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB, one by one :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) query = "INSERT IGNORE INTO commit_parent " \ "VALUES (%s, %s, %s, %s, %s)" if parent_id: arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha] else: arguments = [repo_id, commit_id, sha, None, parent.hexsha] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_all_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB all together :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ to_insert = [] for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) if parent_id: to_insert.append( (repo_id, commit_id, sha, parent_id, parent.hexsha)) else: to_insert.append( (repo_id, commit_id, sha, None, parent.hexsha)) if to_insert: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) " \ "VALUES (%s, %s, %s, %s, %s)" cursor.executemany(query, [i for i in to_insert]) self._cnx.commit() cursor.close() def insert_commits_in_reference(self, commits_data): """ inserts commits to DB all together :type commits_data: list of Object :param commits_data: commit data """ if commits_data: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)" cursor.executemany(query, commits_data) self._cnx.commit() cursor.close() def insert_commit_in_reference(self, repo_id, commit_id, ref_id): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type commit_id: int :param commit_id: id of the commit :type ref_id: int :param ref_id: id of the reference """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference " \ "VALUES (%s, %s, %s)" arguments = [repo_id, commit_id, ref_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_line_details(self, file_modification_id, detail): """ inserts line details to DB :type file_modification_id: int :param file_modification_id: id of the file modification :type detail: str :param detail: line content """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO line_detail " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ file_modification_id, detail[0], detail[1], detail[2], detail[3], detail[4], detail[5] ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_modification_id(self, commit_id, file_id): """ selects file modification id :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file_modification " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_file_modification(self, commit_id, file_id, status, additions, deletions, changes, patch_content): """ inserts file modification to DB :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type status: str :param status: type of the modification :type additions: int :param additions: number of additions :type deletions: int :param deletions: number of deletions :type changes: int :param changes: number of changes :type patch_content: str :param patch_content: content of the patch """ # 4194304 is the max statement size for MySQL by default. if patch_content != None and len(patch_content) > 4194304: self._logger.error('Patch content for commit (' + str(commit_id) + ') is too large, not persisted.') patch_content = None cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_modification " \ "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, status, additions, deletions, changes, patch_content ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file_renamed(self, repo_id, current_file_id, previous_file_id, file_modification_id): """ inserts file renamed information :type repo_id: int :param repo_id: id of the repository :type current_file_id: int :param current_file_id: id of the renamed file :type previous_file_id: int :param previous_file_id: id of the file before renaming :type file_modification_id: int :param file_modification_id: id of the file modification """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_renamed " \ "VALUES (%s, %s, %s, %s)" arguments = [ repo_id, current_file_id, previous_file_id, file_modification_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file(self, repo_id, name, ext=None): """ inserts file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type ext: str :param ext: extension of the file """ if name == None: return cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file " \ "VALUES (%s, %s, %s, %s)" # extract file extension from file path if not passed if not ext: ext = name.split('.')[-1] arguments = [None, repo_id, name, ext] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_id_before_date(self, repo_id, name, before_date): """ selects id of the file before date :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type before_date: timestamp :param before_date: date """ cursor = self._cnx.cursor() query = "SELECT DISTINCT f.id " \ "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE f.name = %s AND f.repo_id = %s AND fm.status = 'added' " \ "AND c.authored_date <= '" + str(before_date) + "' " arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def select_file_id(self, repo_id, name): """ selects id of the file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file " \ "WHERE name = %s AND repo_id = %s" arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def insert_reference(self, repo_id, ref_name, ref_type): """ inserts reference :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference :type ref_type: str :param ref_type: type of the reference (branch or tag) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO reference " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, ref_name, ref_type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_reference_name(self, repo_id, ref_id): """ selects reference name by its id :type repo_id: int :param repo_id: id of the repository :type ref_id: int :param ref_id: id of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT name " \ "FROM reference " \ "WHERE id = %s and repo_id = %s" arguments = [ref_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_reference_id(self, repo_id, ref_name): """ selects reference id by its name :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM reference " \ "WHERE name = %s and repo_id = %s" arguments = [ref_name, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_commit(self, repo_id, sha, message, author_id, committer_id, authored_date, committed_date, size): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type sha: str :param sha: SHA of the commit :type message: str :param message: message of the commit :type author_id: int :param author_id: author id of the commit :type committer_id: int :param committer_id: committer id of the commit :type authored_date: str :param authored_date: authored date of the commit :type committed_date: str :param committed_date: committed date of the commit :type size: int :param size: size of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, repo_id, sha, message.strip(), author_id, committer_id, authored_date, committed_date, size ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_commit_parent(self, parent_id, parent_sha, repo_id): """ inserts commit parent to DB :type parent_id: int :param parent_id: id of the commit parent :type parent_sha: str :param parent_sha: SHA of the commit parent :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_update = "UPDATE commit_parent " \ "SET parent_id = %s " \ "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s " arguments = [parent_id, parent_sha, repo_id] cursor.execute(query_update, arguments) self._cnx.commit() cursor.close() def fix_commit_parent_table(self, repo_id): """ checks for missing commit parent information and fixes it :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_select = "SELECT parent_sha " \ "FROM commit_parent " \ "WHERE parent_id IS NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query_select, arguments) row = cursor.fetchone() while row: parent_sha = row[0] parent_id = self.select_commit_id(parent_sha, repo_id) self.update_commit_parent(parent_id, parent_sha, repo_id) row = cursor.fetchone() cursor.close() def select_commit_id(self, sha, repo_id): """ selects id of a commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_commit_id_before_date(self, sha, repo_id, before_date): """ selects id of a commit by its SHA before a given date :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository :type before_date: timestamp :param before_date: date """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s AND authored_date <= '" + str(before_date) + "' " arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_all_developer_ids(self, repo_id): """ selects all developers (committers or authors) of a given repo :type repo_id: int :param repo_id: id of the repository """ user_ids = [] cursor = self._cnx.cursor() query = "SELECT c.author_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \ "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL " \ "UNION " \ "SELECT c.committer_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \ "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL " arguments = [repo_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: user_id = row[0] user_ids.append(user_id) row = cursor.fetchone() cursor.close() return user_ids def select_sha_commit_by_user(self, user_id, repo_id): """ selects the SHA of the first commit (authored or committed) by a given user id :type user_id: int :param user_id: id of the user :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT sha " \ "FROM commit " \ "WHERE (author_id = %s OR committer_id = %s) AND repo_id = %s " \ "LIMIT 1" arguments = [user_id, user_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] return found
class DbSchema(): """ This class initializes the DB schema """ def __init__(self, db_name, config, log_root_path): """ :type db_name: str :param db_name: the name of the DB to initialize/connect to, it cannot be null and must follow the format allowed in MySQL (http://dev.mysql.com/doc/refman/5.7/en/identifiers.html). If a DB having a name equal already exists in Gitana, the existing DB will be dropped and a new one will be created :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._db_name = db_name self._config = config self._log_root_path = log_root_path self._db_util = DbUtil() self._logging_util = LoggingUtil() log_path = self._log_root_path + "db-schema-" + db_name self._logger = self._logging_util.get_logger(log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, log_path, "info") self._cnx = self._db_util.get_connection(self._config) def __del__(self): if self._cnx: self._db_util.close_connection(self._cnx) if self._logger: #deletes the file handler of the logger self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) def add_git_tables(self): """ initializes git tables if they do not exist """ self.set_database(self._db_name) self._init_git_tables() def add_issue_tracker_tables(self): """ initializes issue tracker tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_issue_tracker_tables() def add_instant_messaging_tables(self): """ initializes instant messaging tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_instant_messaging_tables() def add_forum_tables(self): """ initializes forum tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_forum_tables() def init_database(self, init_git, init_issue_tracker, init_forum, init_instant_messaging): """ initializes the database tables and functions :type init_git: bool :param init_git: if True, it initializes the tables containing git data :type init_issue_tracker: bool :param init_issue_tracker: if True, it initializes the tables containing issue tracker data :type init_forum: bool :param init_forum: if True, it initializes the tables containing forum data :type init_instant_messaging: bool :param init_instant_messaging: if True, it initializes the tables containing instant messaging data """ try: self._logger.info("init database started") start_time = datetime.now() self._create_database() self.set_database(self._db_name) self._set_settings() self._init_common_tables() if init_issue_tracker or init_forum or init_instant_messaging: self._init_shared_tables_issue_tracker_communication_channels() if init_git: self._init_git_tables() if init_issue_tracker: self._init_issue_tracker_tables() if init_forum: self._init_forum_tables() if init_instant_messaging: self._init_instant_messaging_tables() self._init_functions() self._logger.info("database " + self._db_name + " created") end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("Init database finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") except Exception: self._logger.error("init database failed", exc_info=True) def create_project(self, project_name): """ inserts a project in the DB :type project_name: str :param project_name: the name of the project to create """ self._cnx = self._db_util.get_connection(self._config) self._db_util.insert_project(self._cnx, self._db_name, project_name) self._db_util.close_connection(self._cnx) def create_repository(self, project_name, repo_name): """ inserts a repository in the DB :type project_name: str :param project_name: the name of an existing project :type repo_name: str :param repo_name: the name of the repository to insert """ self._cnx = self._db_util.get_connection(self._config) self.set_database(self._db_name) project_id = self._db_util.select_project_id(self._cnx, project_name, self._logger) try: self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) except Exception: self._logger.error("repository " + repo_name + " not inserted", exc_info=True) self._db_util.close_connection(self._cnx) def list_projects(self): """ lists all projects contained in the DB """ self._cnx = self._db_util.get_connection(self._config) project_names = [] self.set_database(self._db_name) cursor = self._cnx.cursor() query = "SELECT name FROM project" cursor.execute(query) row = cursor.fetchone() while row: project_names.append(row[0]) row = cursor.fetchone() cursor.close() return project_names def set_database(self, db_name): """ sets the DB used by the tool :type db_name: str :param db_name: the name of the DB """ try: self._logger.info("set database " + db_name + " started") self._db_util.set_database(self._cnx, db_name) self._logger.info("set database " + db_name + " finished") except Exception: self._logger.error("set database failed", exc_info=True) def _set_settings(self): #sets the settings (max connections, charset, file format, ...) used by the DB self._db_util.set_settings(self._cnx) def _create_database(self): #creates the database cursor = self._cnx.cursor() drop_database_if_exists = "DROP DATABASE IF EXISTS " + self._db_name cursor.execute(drop_database_if_exists) create_database = "CREATE DATABASE " + self._db_name cursor.execute(create_database) cursor.close() def _init_functions(self): #initializes functions cursor = self._cnx.cursor() levenshtein_distance = """ CREATE DEFINER=`root`@`localhost` FUNCTION `levenshtein_distance`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(11) DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; DECLARE s1_char CHAR CHARACTER SET utf8; -- max strlen=255 for this function DECLARE cv0, cv1 VARBINARY(256); SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; IF (s1 = s2) THEN RETURN (0); ELSEIF (s1_len = 0) THEN RETURN (s2_len); ELSEIF (s2_len = 0) THEN RETURN (s1_len); END IF; WHILE (j <= s2_len) DO SET cv1 = CONCAT(cv1, CHAR(j)), j = j + 1; END WHILE; WHILE (i <= s1_len) DO SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = CHAR(i), j = 1; WHILE (j <= s2_len) DO SET c = c + 1, cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1); SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost; IF (c > c_temp) THEN SET c = c_temp; END IF; SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1; IF (c > c_temp) THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, CHAR(c)), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; RETURN (c); END""" soundex_match = """ CREATE DEFINER=`root`@`localhost` FUNCTION `soundex_match`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(1) DETERMINISTIC BEGIN DECLARE _result INT DEFAULT 0; IF SOUNDEX(s1) = SOUNDEX(s2) THEN SET _result = 1; END IF; RETURN _result; END""" cursor.execute(levenshtein_distance) cursor.execute(soundex_match) cursor.close() def _init_common_tables(self): #initializes common tables used by tables modeling git, issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_project = "CREATE TABLE IF NOT EXISTS project( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user = "******" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "email varchar(256), " \ "CONSTRAINT namem UNIQUE (name, email) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user_alias = "CREATE TABLE IF NOT EXISTS user_alias ( " \ "user_id int(20), " \ "alias_id int(20), " \ "CONSTRAINT a UNIQUE (user_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_project) cursor.execute(create_table_user) cursor.execute(create_table_user_alias) def _init_shared_tables_issue_tracker_communication_channels(self): #initializes shared tables used by tables modeling issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_label = "CREATE TABLE IF NOT EXISTS label ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message = "CREATE TABLE IF NOT EXISTS message ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "pos int(10), " \ "type_id int(20), " \ "issue_id int(20), " \ "topic_id int(20), " \ "channel_id int(20), " \ "body longblob, " \ "votes int(20), " \ "author_id int(20), " \ "created_at timestamp NULL DEFAULT NULL," \ "CONSTRAINT ip UNIQUE (issue_id, topic_id, channel_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_dependency = "CREATE TABLE IF NOT EXISTS message_dependency ( " \ "source_message_id int(20), " \ "target_message_id int(20), " \ "PRIMARY KEY st (source_message_id, target_message_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_type = "CREATE TABLE IF NOT EXISTS message_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_message_types = "INSERT IGNORE INTO message_type VALUES (NULL, 'question'), " \ "(NULL, 'answer'), " \ "(NULL, 'comment'), " \ "(NULL, 'accepted_answer'), " \ "(NULL, 'reply'), " \ "(NULL, 'file_upload'), " \ "(NULL, 'info');" create_table_attachment = "CREATE TABLE IF NOT EXISTS attachment ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "message_id int(20), " \ "name varchar(256), " \ "extension varchar(10), " \ "bytes int(20), " \ "url varchar(512), " \ "CONSTRAINT ip UNIQUE (message_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_label) cursor.execute(create_table_message) cursor.execute(create_table_message_dependency) cursor.execute(create_table_message_type) cursor.execute(insert_message_types) cursor.execute(create_table_attachment) cursor.close() def _init_git_tables(self): #initializes tables used to model git data cursor = self._cnx.cursor() create_table_repository = "CREATE TABLE IF NOT EXISTS repository( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_reference = "CREATE TABLE IF NOT EXISTS reference( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(255), " \ "type varchar(255), " \ "CONSTRAINT name UNIQUE (repo_id, name, type) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit = "CREATE TABLE IF NOT EXISTS commit(" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "sha varchar(512), " \ "message varchar(512), " \ "author_id int(20), " \ "committer_id int(20), " \ "authored_date timestamp NULL DEFAULT NULL, " \ "committed_date timestamp NULL DEFAULT NULL, " \ "size int(20), " \ "INDEX sha (sha), " \ "INDEX auth (author_id), " \ "INDEX comm (committer_id), " \ "CONSTRAINT s UNIQUE (sha, repo_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit_parent = "CREATE TABLE IF NOT EXISTS commit_parent(" \ "repo_id int(20), " \ "commit_id int(20), " \ "commit_sha varchar(512), " \ "parent_id int(20), " \ "parent_sha varchar(512), " \ "PRIMARY KEY copa (repo_id, commit_id, parent_id), " \ "CONSTRAINT cshapsha UNIQUE (repo_id, commit_id, parent_sha) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commits2reference = "CREATE TABLE IF NOT EXISTS commit_in_reference(" \ "repo_id int(20), " \ "commit_id int(20), " \ "ref_id int(20), " \ "PRIMARY KEY core (commit_id, ref_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file = "CREATE TABLE IF NOT EXISTS file( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "ext varchar(255), " \ "CONSTRAINT rerena UNIQUE (repo_id, name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_renamed = "CREATE TABLE IF NOT EXISTS file_renamed ( " \ "repo_id int(20), " \ "current_file_id int(20), " \ "previous_file_id int(20), " \ "file_modification_id int(20), " \ "PRIMARY KEY cpc (current_file_id, previous_file_id, file_modification_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_modification = "CREATE TABLE IF NOT EXISTS file_modification ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "commit_id int(20), " \ "file_id int(20), " \ "status varchar(10), " \ "additions numeric(10), " \ "deletions numeric(10), " \ "changes numeric(10), " \ "patch longblob, " \ "CONSTRAINT cf UNIQUE (commit_id, file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_line_detail = "CREATE TABLE IF NOT EXISTS line_detail( " \ "file_modification_id int(20)," \ "type varchar(25), " \ "line_number numeric(20), " \ "is_commented numeric(1), " \ "is_partially_commented numeric(1), " \ "is_empty numeric(1), " \ "content longblob, " \ "PRIMARY KEY fityli (file_modification_id, type, line_number) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" # adding it here because "file_dependency" depends on "file" table creation. # @todo: find a way to move the following table creation to separate section # make "extract_dependency_relations" API interface completely independent. create_table_file_dependency = "CREATE TABLE file_dependency ( " \ "repo_id int(20), " \ "ref_id int(20), " \ "source_file_id int(20), " \ "target_file_id int(20), " \ "CONSTRAINT dep UNIQUE (repo_id, ref_id, source_file_id, target_file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_repository) cursor.execute(create_table_reference) cursor.execute(create_table_commit) cursor.execute(create_table_commit_parent) cursor.execute(create_table_commits2reference) cursor.execute(create_table_file) cursor.execute(create_table_file_renamed) cursor.execute(create_table_file_modification) cursor.execute(create_table_line_detail) cursor.execute(create_table_file_dependency) cursor.close() def _init_issue_tracker_tables(self): #initializes tables used to model issue tracker data cursor = self._cnx.cursor() create_table_issue_tracker = "CREATE TABLE IF NOT EXISTS issue_tracker ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue = "CREATE TABLE IF NOT EXISTS issue ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "issue_tracker_id int(20), " \ "summary varchar(512), " \ "component varchar(256), " \ "version varchar(256), " \ "hardware varchar(256), " \ "priority varchar(256), " \ "severity varchar(256), " \ "reference_id int(20), " \ "reporter_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT ioi UNIQUE (issue_tracker_id, own_id), " \ "INDEX u (reporter_id), " \ "INDEX r (reference_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_assignee = "CREATE TABLE IF NOT EXISTS issue_assignee ( " \ "issue_id int(20), " \ "assignee_id int(20), " \ "PRIMARY KEY il (issue_id, assignee_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_subscriber = "CREATE TABLE IF NOT EXISTS issue_subscriber ( " \ "issue_id int(20), " \ "subscriber_id int(20), " \ "PRIMARY KEY il (issue_id, subscriber_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event = "CREATE TABLE IF NOT EXISTS issue_event ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "issue_id int(20), " \ "event_type_id int(20), " \ "detail varchar(256), " \ "creator_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "target_user_id int(20), " \ "CONSTRAINT iecc UNIQUE (issue_id, event_type_id, creator_id, created_at, detail) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event_type = "CREATE TABLE IF NOT EXISTS issue_event_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_labelled = "CREATE TABLE IF NOT EXISTS issue_labelled ( " \ "issue_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (issue_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_commit_dependency = "CREATE TABLE IF NOT EXISTS issue_commit_dependency ( " \ "issue_id int(20), " \ "commit_id int(20), " \ "PRIMARY KEY ict (issue_id, commit_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_dependency = "CREATE TABLE IF NOT EXISTS issue_dependency ( " \ "issue_source_id int(20), " \ "issue_target_id int(20), " \ "type_id int(20), " \ "PRIMARY KEY st (issue_source_id, issue_target_id, type_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_dependency_type = "CREATE TABLE IF NOT EXISTS issue_dependency_type (" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_issue_dependency_type = "INSERT IGNORE INTO issue_dependency_type VALUES (NULL, 'block'), " \ "(NULL, 'depends'), " \ "(NULL, 'related'), " \ "(NULL, 'duplicated');" cursor.execute(create_table_issue_tracker) cursor.execute(create_table_issue) cursor.execute(create_table_issue_assignee) cursor.execute(create_table_issue_subscriber) cursor.execute(create_table_issue_event) cursor.execute(create_table_issue_event_type) cursor.execute(create_table_issue_labelled) cursor.execute(create_issue_commit_dependency) cursor.execute(create_table_issue_dependency) cursor.execute(create_issue_dependency_type) cursor.execute(insert_issue_dependency_type) cursor.close() def _init_forum_tables(self): #initializes tables used to model forum data cursor = self._cnx.cursor() create_table_forum = "CREATE TABLE IF NOT EXISTS forum ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic = "CREATE TABLE IF NOT EXISTS topic ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "forum_id int(20), " \ "name varchar(256), " \ "votes int(10), " \ "views int(10), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (forum_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic_labelled = "CREATE TABLE IF NOT EXISTS topic_labelled ( " \ "topic_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (topic_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_forum) cursor.execute(create_table_topic) cursor.execute(create_table_topic_labelled) cursor.close() def _init_instant_messaging_tables(self): #initializes tables used to model instant messaging data cursor = self._cnx.cursor() create_table_instant_messaging = "CREATE TABLE IF NOT EXISTS instant_messaging ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_channel = "CREATE TABLE IF NOT EXISTS channel ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "instant_messaging_id int(20), " \ "name varchar(256), " \ "description varchar(512), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (instant_messaging_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_instant_messaging) cursor.execute(create_table_channel) cursor.close()
class ActivityReportExporter(): """ This class handles the generation of reports """ LOG_FOLDER_PATH = "logs" INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json') def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._dsl_util = DslUtil() self._date_util = DateUtil() self._db_util = DbUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-report-" + db_name + ".log" self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler(self._logger, self._log_path, "info") self._db_name = db_name self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) self._chart_generator = ChartGenerator(self._cnx, self._logger) self._html_generator = HtmlGenerator(self._logger) def _create_log_folder(self, name): #creates the log folder if not os.path.exists(name): os.makedirs(name) def _create_output_file(self, filename): #creates the output folder if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise def _load_report_exporter_json(self, json_path): #loads the JSON that drives the report export process with open(json_path) as json_data: data = json.load(json_data) return data.get('report') def _find_entity_id(self, type, name): #finds the id of the tools stored in the DB found = None if type == "project": found = self._db_util.select_project_id(self._cnx, name, self._logger) elif type == "repo": found = self._db_util.select_repo_id(self._cnx, name, self._logger) elif type == "issuetracker": found = self._db_util.select_issue_tracker_id(self._cnx, name, self._logger) elif type == "forum": found = self._db_util.select_forum_id(self._cnx, name, self._logger) elif type == "instantmessaging": found = self._db_util.select_instant_messaging_id(self._cnx, name, self._logger) if not found: self._logger.error("ReporExporter: entity " + str(type) + " with name " + str(name) + " not found!") return found def _get_parameter(self, key, parameters): #gets parameters of the JSON found = None if key in ["AFTERDATE", "INTERVAL"]: found = parameters.get(key.lower()) else: if key.endswith("ID"): found = parameters.get(key[:-2].lower()) if not found: self._logger.error("ReportExporter: parameter " + str(key) + " not found!") return found def _load_query_json(self, metric_name, parameters): #loads the queries in the JSON configuration file with open(ActivityReportExporter.INPUT_PATH) as json_data: data = json.load(json_data) metrics = data.get('queries') try: found = [m for m in metrics if m.get('name') == metric_name][0] query = found.get('query') for k in found.keys(): if k not in ['name', 'query']: k_value = str(self._get_parameter(k, parameters)) query = query.replace(k, k_value) return query except: self._logger.error("ReportExporter: metric " + str(metric_name) + " not found!") def _get_activity_name(self, activity): #gets the name of the activity return activity.replace("_", " ") def _get_activity_type(self, activity): #gets the type of the activity return activity.replace("_activity", "").replace("_", "") def _generate_charts(self, activity, activity_data, project_id, time_span): #generates charts entity2charts = {} after_date, interval = self._calculate_time_information(time_span) activity_type = self._get_activity_type(activity) names = activity_data.get('names') measures = activity_data.get('measures') for entity_name in names: entity_id = self._dsl_util.find_entity_id(self._cnx, activity_type, entity_name, self._logger) charts = [] for measure in measures: query = self._load_query_json(measure, {activity_type: entity_id, 'project': project_id, 'afterdate': after_date, 'interval': interval}) charts.append(self._chart_generator.create(query, interval.lower(), measure, time_span)) entity2charts.update({entity_name: charts}) return entity2charts def _calculate_time_information(self, time_span): #calculates the time span information start = None interval = None current_time = datetime.now() #test datetime.strptime("2015-10-10", "%Y-%m-%d") if time_span == "this_week": start = self._date_util.get_start_time_span(current_time, "week", "%Y-%m-%d") interval = "DAY" elif time_span == "this_month": start = self._date_util.get_start_time_span(current_time, "month", "%Y-%m-%d") interval = "DAY" elif time_span == "this_year": start = self._date_util.get_start_time_span(current_time, "year", "%Y-%m-%d") interval = "MONTH" else: self._logger.error("ReportExporter: time span " + str(time_span) + " not recognized! Options are: this_week, this_month, this_year") return start, interval def export(self, file_path, json_path): """ exports the Gitana data to a report :type file_path: str :param file_path: the path where to export the report :type json_path: str :param json_path: the path of the JSON that drives the export process """ try: self._logger.info("ReportExporter started") start_time = datetime.now() exporter_data = self._load_report_exporter_json(json_path) project_name = exporter_data.get('project') project_id = self._dsl_util.find_entity_id(self._cnx, "project", project_name, self._logger) time_span = exporter_data.get('time_span') activity2charts = {} for activity in [attr for attr in exporter_data.keys() if attr.endswith('activity')]: activity_name = self._get_activity_name(activity) charts = self._generate_charts(activity, exporter_data.get(activity), project_id, time_span) activity2charts.update({activity_name: charts}) html_page = self._html_generator.create(project_name, activity2charts) with codecs.open(file_path, 'w', encoding='utf8') as f: f.write(html_page) self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time(end_time, start_time) self._logger.info("ReportExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger(self._logger, self._fileHandler) except: self._logger.error("ReportExporter failed", exc_info=True)
class GraphExporter(): """ This class exports the Gitana data to a graph representation """ LOG_FOLDER_PATH = "logs" INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json') def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._db_util = DbUtil() self._dsl_util = DslUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-graph-" + db_name + ".log" self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._db_name = db_name self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) def _create_log_folder(self, name): # creates the log folder if not os.path.exists(name): os.makedirs(name) def _create_output_file(self, filename): # creates the output folder if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise def _load_graph_exporter_json(self, json_path): # load the JSON that drives the graph exporter process with open(json_path) as json_data: data = json.load(json_data) return data.get('graph') def _get_parameter(self, key, parameters): # get JSON parameters found = None if key in ["EDGECOLOR", "NODECOLOR"]: found = parameters.get(key.lower()) else: if key.endswith("ID"): name = parameters.get(key[:-2].lower()) found = self._dsl_util.find_entity_id(self._cnx, key[:-2].lower(), name, self._logger) if not found: self._logger.error("GraphExporter: parameter " + str(key) + " not found!") return found def _load_query_json(self, metric_name, parameters): # loads the query stored in the JSON file with open(GraphExporter.INPUT_PATH) as json_data: data = json.load(json_data) metrics = data.get('queries') try: found = [m for m in metrics if m.get('name') == metric_name][0] nodes_query = found.get('nodes') edges_query = found.get('edges') for k in found.keys(): if k not in ['name', 'edges', 'nodes']: k_value = str(self._get_parameter(k, parameters)) nodes_query = nodes_query.replace(k, k_value) edges_query = edges_query.replace(k, k_value) return (nodes_query, edges_query) except: self._logger.error("GraphExporter: metric " + str(metric_name) + " not found!") def export(self, file_path, json_path): """ exports the Gitana data to a graph :type file_path: str :param file_path: the path where to export the graph :type json_path: str :param json_path: the path of the JSON that drives the export process """ # gtype -> graph type = "undirected", "directed", if null "undirected" # gmode -> graph mode = "dynamic", "static", if null "dynamic" try: self._logger.info("GraphExporter started") start_time = datetime.now() exporter_data = self._load_graph_exporter_json(json_path) metric_name = exporter_data.get("name") parameters = exporter_data.get("params") (nodes_query, edges_query) = self._load_query_json(metric_name, parameters) gexf = GexfGenerator(self._cnx, self._logger) gexf.create(nodes_query, edges_query, parameters.get("type"), file_path) self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("GraphExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("GraphExporter failed", exc_info=True)
class GitHubDao(): """ This class handles the persistence and retrieval of GitHub issue tracker data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitHubDao init failed", exc_info=True) raise def select_issue_tracker_id(self, repo_id, issue_tracker_name): """ gets DB issue tracker id by its name :type repo_id: int :param repo_id: id of the repository associated to the issue tracker :type issue_tracker_name: str :param issue_tracker_name: issue tracker name """ return self._db_util.select_issue_tracker_id(self._cnx, issue_tracker_name, self._logger) def insert_issue_comment(self, own_id, position, type, issue_id, body, votes, author_id, created_at): """ inserts issue comment :type own_id: id :param own_id: data source message id :type position: int :param position: position of the comment :type type: str :param type: type of the message :type issue_id: id :param issue_id: DB issue id :type body: str :param body: body of the comment :type votes: int :param votes: votes of the comment :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation date of the comment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, position, type, issue_id, 0, 0, 0, body, votes, author_id, created_at ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_proposed_commit_id(self, sha, repo_id): """ selects id of a proposed commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM proposed_commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_proposed_commit(self, repo_id, sha, message, author_id, committer_id, authored_date, committed_date): """ inserts a proposed commit to DB :type repo_id: int :param repo_id: id of the repository :type sha: str :param sha: SHA of the commit :type message: str :param message: message of the commit :type author_id: int :param author_id: author id of the commit :type committer_id: int :param committer_id: committer id of the commit :type authored_date: str :param authored_date: authored date of the commit :type committed_date: str :param committed_date: committed date of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO proposed_commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, repo_id, sha, message.strip(), author_id, committer_id, authored_date, committed_date ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_pull_request_commit(self, pull_request_id, commit_id, proposed_commit_id): """ inserts matched pull request commit to DB :type pull_request_id: int :param pull_request_id: pull request DB id :type commit_id: int :param commit_id: id of the commit stored in Gitana :type proposed_commit_id: int :param proposed_commit_id: id of the commit that was proposed for merge """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO pull_request_commit " \ "VALUES (%s, %s, %s)" arguments = [pull_request_id, commit_id, proposed_commit_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_pull_request_review(self, comment_id, pr_id, file_id, proposed_file_id): """ inserts dependency between pull request comment and a file id :type comment_id: int :param comment_id: id of the comment :type pr_id: int :param pr_id: id of the pull request :type file_id: id :param file_id: DB file id :type proposed_file_id: id :param proposed_file_id: DB proposed file id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO pull_request_review " \ "VALUES (%s, %s, %s, %s)" arguments = [comment_id, pr_id, file_id, proposed_file_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_pull_request_comment_id(self, own_id, pr_id): """ gets pull request comment id :type own_id: id :param own_id: data source message id """ cursor = self._cnx.cursor() query = "SELECT id FROM message WHERE own_id = %s AND pull_request_id = %s" arguments = [str(own_id), pr_id] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_pull_request_comment(self, own_id, position, type, pr_id, body, votes, author_id, created_at): """ inserts pull request comment :type own_id: id :param own_id: data source message id :type position: int :param position: position of the comment :type type: str :param type: type of the message :type pr_id: id :param pr_id: DB pull request id :type body: str :param body: body of the comment :type votes: int :param votes: votes of the comment :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation date of the comment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, position, type, 0, 0, 0, pr_id, body, votes, author_id, created_at ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_event_type(self, name): """ selects event type id by its name :type name: str :param name: name of the event """ cursor = self._cnx.cursor() query = "SELECT id FROM issue_event_type WHERE name = %s" arguments = [name] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_issue_event(self, issue_id, event_type_id, detail, creator_id, created_at, target_user_id): """ inserts issue event :type issue_id: int :param issue_id: DB issue id :type event_type_id: int :param event_type_id: event type id :type detail: str :param detail: detail of the event :type creator_id: int :param creator_id: id of the creator :type created_at: str :param created_at: creation date of the event :type target_user_id: int :param target_user_id: target user id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_event " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, issue_id, event_type_id, detail, creator_id, created_at, target_user_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue_commit_dependency(self, issue_id, commit_id): """ inserts dependency between commit and issue :type issue_id: int :param issue_id: DB issue id :type commit_id: int :param commit_id: DB commit id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_commit_dependency " \ "VALUES (%s, %s)" arguments = [issue_id, commit_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_event_type(self, name): """ inserts event type :type name: str :param name: event type name """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_event_type " \ "VALUES (%s, %s)" arguments = [None, name] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue_tracker(self, repo_id, issue_tracker_name, type): """ inserts issue tracker :type repo_id: int :param repo_id: DB repo id :type issue_tracker_name: str :param issue_tracker_name: issue tracker name :type type: str :param type: issue tracker type (github, bugzilla, etc.) """ return self._db_util.insert_issue_tracker(self._cnx, repo_id, issue_tracker_name, type, self._logger) def get_already_imported_issue_ids(self, issue_tracker_id, repo_id): """ gets issues already stored in DB :type issue_tracker_id: int :param issue_tracker_id: DB issue tracker id :type repo_id: int :param repo_id: DB repo id """ issue_ids = [] cursor = self._cnx.cursor() query = "SELECT i.own_id FROM issue i " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE issue_tracker_id = %s AND repo_id = %s " \ "ORDER BY i.id ASC;" arguments = [issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: own_id = int(row[0]) issue_ids.append(own_id) row = cursor.fetchone() cursor.close() return issue_ids def get_pull_request_file_id_by_name(self, file_name, repo_id): """ gets the file id linked to a pull request by name :type file_name: str :param file_name: name of the file commented :type repo_id: int :param repo_id: id of the repository """ found = (None, None) cursor = self._cnx.cursor() query = "SELECT f.id, 'codebase' AS type " \ "FROM file f " \ "WHERE f.name = %s AND f.repo_id = %s " \ "UNION " \ "SELECT f.id, 'proposed' AS type " \ "FROM proposed_file f " \ "WHERE f.name = %s AND f.repo_id = %s" arguments = [file_name, repo_id, file_name, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = (row[0], row[1]) cursor.close() return found def get_already_imported_pull_request_ids(self, issue_tracker_id, repo_id): """ gets pull requests already stored in DB :type issue_tracker_id: int :param issue_tracker_id: DB issue tracker id :type repo_id: int :param repo_id: DB repo id """ pr_ids = [] cursor = self._cnx.cursor() query = "SELECT i.own_id FROM pull_request pr " \ "JOIN issue i ON i.id = pr.issue_id " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE it.id = %s AND it.repo_id = %s " \ "ORDER BY i.id ASC;" arguments = [issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: own_id = row[0] pr_ids.append(own_id) row = cursor.fetchone() cursor.close() return pr_ids def get_message_type_id(self, message_type): """ gets message type id :type message_type: str :param message_type: message type """ return self._db_util.get_message_type_id(self._cnx, message_type) def insert_issue_dependency(self, issue_source_id, issue_target_id, created_at, type): """ inserts dependency between issues :type issue_source_id: int :param issue_source_id: issue source id :type issue_target_id: int :param issue_target_id: issue target id :type created_at: str :param created_at: date of the dependency creation :type type: str :param type: type of dependency """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_dependency " \ "VALUES (%s, %s, %s, %s)" arguments = [issue_source_id, issue_target_id, created_at, type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_project_id(self, project_name): """ selects project id by its name :type project_name: str :param project_name: name of a project """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def select_issue_own_id(self, issue_id, issue_tracker_id, repo_id): """ selects data source issue id :type issue_id: int :param issue_id: DB issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.own_id " \ "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE i.id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = int(row[0]) cursor.close() return found def get_issue_dependency_type_id(self, name): """ get id of the issue dependency type :type name: str :param name: name of the dependency type """ return self._db_util.get_issue_dependency_type_id(self._cnx, name) def select_commit(self, sha, repo_id): """ gets commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_issue_id(self, issue_own_id, issue_tracker_id, repo_id): """ gets issue id :type issue_own_id: int :param issue_own_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.id FROM issue i " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_own_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_pull_request_id(self, pr_own_id, issue_tracker_id): """ gets pull request id :type pr_own_id: int :param pr_own_id: data source pull request id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id """ found = None cursor = self._cnx.cursor() query = "SELECT pr.id FROM pull_request pr JOIN issue i ON pr.issue_id = i.id " \ "WHERE i.own_id = %s AND i.issue_tracker_id = %s" arguments = [pr_own_id, issue_tracker_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def update_issue(self, issue_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, last_change_at): """ updates an issue :type issue_id: int :param issue_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type summary: str :param summary: new issue description :type component: str :param component: component where the issue was found :type version: str :param version: version where the issue was found :type hardware: str :param hardware: hardware where the issue was found :type priority: str :param priority: priority of the issue :type severity: str :param severity: severity of the issue :type reference_id: int :param reference_id: id of the Git reference where the issue was found :type last_change_at: str :param last_change_at: last change date of the issue """ cursor = self._cnx.cursor() query = "UPDATE issue SET last_change_at = %s, summary = %s, component = %s, version = %s, hardware = %s, priority = %s, severity = %s, reference_id = %s WHERE own_id = %s AND issue_tracker_id = %s" arguments = [ last_change_at, summary, component, version, hardware, priority, severity, reference_id, issue_id, issue_tracker_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_pull_request(self, pr_own_id, issue_tracker_id, summary, status, merged, last_change_at): """ updates a pull request :type pr_own_id: int :param pr_own_id: data source pr id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type summary: str :param summary: new pull request description :type status: str :param status: new pull request status :type merged: bool :param merged: new pull request merged status :type last_change_at: str :param last_change_at: last change date of the issue """ cursor = self._cnx.cursor() query = "UPDATE pull_request SET last_change_at = %s, summary = %s, status = %s, merged = %s WHERE own_id = %s AND issue_tracker_id = %s" arguments = [ last_change_at, summary, status, merged, pr_own_id, issue_tracker_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue(self, issue_own_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, user_id, created_at, last_change_at): """ inserts an issue :type issue_own_id: int :param issue_own_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type summary: str :param summary: new issue description :type component: str :param component: component where the issue was found :type version: str :param version: version where the issue was found :type hardware: str :param hardware: hardware where the issue was found :type priority: str :param priority: priority of the issue :type severity: str :param severity: severity of the issue :type reference_id: int :param reference_id: id of the Git reference where the issue was found :type user_id: int :param user_id: issue creator id :type created_at: str :param created_at: creation date of the issue :type last_change_at: str :param last_change_at: last change date of the issue """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, issue_own_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, user_id, created_at, last_change_at ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_pull_request(self, pr_own_id, author_id, state, target_ref_id, merged_at, merged_by, issue_tracker_id, repo_id): """ inserts a pull request (note that the pull request id is equal to the issue id, since that in GitHub every pull request is an issue :type pr_own_id: int :param pr_own_id: data source pull request id :type author_id: int :param author_id: author id :type state: str :param state: pull request state :type target_ref_id: int :param target_ref_id: target reference :type merged_at: timestamp :param merged_at: pull request merged time :type merged_by: int :param merged_by: merger id """ cursor = self._cnx.cursor() issue_id = self.select_issue_id(pr_own_id, issue_tracker_id, repo_id) query = "INSERT IGNORE INTO pull_request " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, issue_id, author_id, state, target_ref_id, merged_at, merged_by ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_proposed_file_id(self, repo_id, name): """ selects id of the proposed file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM proposed_file " \ "WHERE name = %s AND repo_id = %s" arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def insert_proposed_file(self, repo_id, filename, ext): """ inserts file not part of the codebase proposed in a pull request :type repo_id: int :param repo_id: id of the repository :type filename: str :param filename: name of the file :type ext: str :param ext: extension of the file """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO proposed_file " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, filename, ext] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_proposed_file_modification(self, proposed_commit_id, file_id, proposed_file_id, status, additions, deletions, changes, patch_content): """ inserts a proposed file modification to DB :type proposed_commit_id: int :param proposed_commit_id: id of the proposed commit :type file_id: int :param file_id: id of the file :type proposed_file_id: int :param proposed_file_id: id of the proposed file :type status: str :param status: type of the modification :type additions: int :param additions: number of additions :type deletions: int :param deletions: number of deletions :type changes: int :param changes: number of changes :type patch_content: str :param patch_content: content of the patch """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO proposed_file_modification " \ "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ proposed_commit_id, file_id, proposed_file_id, status, additions, deletions, changes, patch_content ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_label(self, name): """ inserts a label :type name: str :param name: the name of the label """ self._db_util.insert_label(self._cnx, name, self._logger) def select_label_id(self, name): """ selects the label id by its name :type name: str :param name: the name of the label """ return self._db_util.select_label_id(self._cnx, name, self._logger) def select_issue_comment_id(self, own_id, issue_id, created_at): """ selects the id of an issue comment :type own_id: int :param own_id: data source comment id :type issue_id: int :param issue_id: DB issue id :type created_at: str :param created_at: creation date of the issue """ cursor = self._cnx.cursor() query = "SELECT id FROM message WHERE own_id = %s AND issue_id = %s AND created_at = %s" arguments = [own_id, issue_id, created_at] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def get_user_id(self, user_name, user_email): """ selects the id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if user_email == None and user_name == None: user_name = "unknown_user" user_email = "unknown_user" if user_name: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) else: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) return user_id def insert_subscriber(self, issue_id, subscriber_id): """ inserts issue subscriber :type issue_id: int :param issue_id: db issue id :type subscriber_id: int :param subscriber_id: subscriber id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_subscriber " \ "VALUES (%s, %s)" arguments = [issue_id, subscriber_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_assignee(self, issue_id, assignee_id): """ inserts issue assignee :type issue_id: int :param issue_id: db issue id :type assignee_id: int :param assignee_id: assignee id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_assignee " \ "VALUES (%s, %s)" arguments = [issue_id, assignee_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_attachment(self, attachment_id, issue_comment_id, name, url): """ inserts attachment :type attachment_id: int :param attachment_id: db attachment id :type issue_comment_id: int :param issue_comment_id: issue comment id :type name: str :param name: name of the attachment :type url: str :param url: url of the attachment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, attachment_id, issue_comment_id, name, None, None, url ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def assign_label_to_issue(self, issue_id, label_id): """ links label to issue :type issue_id: int :param issue_id: db issue id :type label_id: int :param label_id: label id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_labelled " \ "VALUES (%s, %s)" arguments = [issue_id, label_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def find_reference_id(self, version, issue_id, repo_id): """ retrieves reference id :type version: str :param version: name of the reference :type issue_id: int :param issue_id: db issue id :type repo_id: int :param repo_id: repository id """ found = None if version: try: cursor = self._cnx.cursor() query = "SELECT id FROM reference WHERE name = %s AND repo_id = %s" arguments = [version, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: #sometimes the version is followed by extra information such as alpha, beta, RC, M. query = "SELECT id FROM reference WHERE name LIKE '" + str( version) + "%' AND repo_id = " + str(repo_id) cursor.execute(query) row = cursor.fetchone() if row: found = row[0] cursor.close() except Exception: self._logger.warning("version (" + str(version) + ") not inserted for issue id: " + str(issue_id), exc_info=True) return found def select_last_change_issue(self, issue_id, issue_tracker_id, repo_id): """ retrieves last change date of an issue :type issue_id: int :param issue_id: db issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.last_change_at " \ "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_last_change_pull_request(self, pr_id, issue_tracker_id, repo_id): """ retrieves last change date of a pull request :type pr_id: int :param pr_id: db pull request id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT pr.last_change_at " \ "FROM pull_request pr JOIN issue_tracker it ON pr.issue_tracker_id = it.id " \ "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [pr_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, project_id, repo_name): """ selects repository id :type project_id: int :param project_id: project id :type repo_name: int :param repo_name: repository name """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def close_connection(self): if self._cnx: self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger)
class GitHubDao(): """ This class handles the persistence and retrieval of GitHub issue tracker data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitHubDao init failed", exc_info=True) raise def select_issue_tracker_id(self, repo_id, issue_tracker_name): """ gets DB issue tracker id by its name :type repo_id: int :param repo_id: id of the repository associated to the issue tracker :type issue_tracker_name: str :param issue_tracker_name: issue tracker name """ return self._db_util.select_issue_tracker_id(self._cnx, issue_tracker_name, self._logger) def insert_issue_comment(self, own_id, position, type, issue_id, body, votes, author_id, created_at): """ inserts issue comment :type own_id: id :param own_id: data source message id :type position: int :param position: position of the comment :type type: str :param type: type of the message :type issue_id: id :param issue_id: DB issue id :type body: str :param body: body of the comment :type votes: int :param votes: votes of the comment :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation date of the comment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, position, type, issue_id, 0, 0, body, votes, author_id, created_at ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_event_type(self, name): """ selects event type id by its name :type name: str :param name: name of the event """ cursor = self._cnx.cursor() query = "SELECT id FROM issue_event_type WHERE name = %s" arguments = [name] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_issue_event(self, issue_id, event_type_id, detail, creator_id, created_at, target_user_id): """ inserts issue event :type issue_id: int :param issue_id: DB issue id :type event_type_id: int :param event_type_id: event type id :type detail: str :param detail: detail of the event :type creator_id: int :param creator_id: id of the creator :type created_at: str :param created_at: creation date of the event :type target_user_id: int :param target_user_id: target user id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_event " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, issue_id, event_type_id, detail, creator_id, created_at, target_user_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue_commit_dependency(self, issue_id, commit_id): """ inserts dependency between commit and issue :type issue_id: int :param issue_id: DB issue id :type commit_id: int :param commit_id: DB commit id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_commit_dependency " \ "VALUES (%s, %s)" arguments = [issue_id, commit_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_event_type(self, name): """ inserts event type :type name: str :param name: event type name """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_event_type " \ "VALUES (%s, %s)" arguments = [None, name] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue_tracker(self, repo_id, issue_tracker_name, type): """ inserts issue tracker :type repo_id: int :param repo_id: DB repo id :type issue_tracker_name: str :param issue_tracker_name: issue tracker name :type type: str :param type: issue tracker type (github, bugzilla, etc.) """ return self._db_util.insert_issue_tracker(self._cnx, repo_id, issue_tracker_name, type, self._logger) def get_already_imported_issue_ids(self, issue_tracker_id, repo_id): """ gets issues already stored in DB :type issue_tracker_id: int :param issue_tracker_id: DB issue tracker id :type repo_id: int :param repo_id: DB repo id """ issue_ids = [] cursor = self._cnx.cursor() query = "SELECT i.own_id FROM issue i " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE issue_tracker_id = %s AND repo_id = %s " \ "ORDER BY i.id ASC;" arguments = [issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: own_id = int(row[0]) issue_ids.append(own_id) row = cursor.fetchone() cursor.close() return issue_ids def get_message_type_id(self, message_type): """ gets message type id :type message_type: str :param message_type: message type """ return self._db_util.get_message_type_id(self._cnx, message_type) def insert_issue_dependency(self, issue_source_id, issue_target_id, type): """ inserts dependency between issues :type issue_source_id: int :param issue_source_id: issue source id :type issue_target_id: int :param issue_target_id: issue target id :type type: str :param type: type of dependency """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_dependency " \ "VALUES (%s, %s, %s)" arguments = [issue_source_id, issue_target_id, type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_project_id(self, project_name): """ selects project id by its name :type project_name: str :param project_name: name of a project """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def select_issue_own_id(self, issue_id, issue_tracker_id, repo_id): """ selects data source issue id :type issue_id: int :param issue_id: DB issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.own_id " \ "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE i.id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = int(row[0]) cursor.close() return found def get_issue_dependency_type_id(self, name): """ get id of the issue dependency type :type name: str :param name: name of the dependency type """ return self._db_util.get_issue_dependency_type_id(self._cnx, name) def select_commit(self, sha, repo_id): """ gets commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_issue_id(self, issue_own_id, issue_tracker_id, repo_id): """ gets issue id :type issue_own_id: int :param issue_own_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.id FROM issue i " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_own_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def update_issue(self, issue_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, last_change_at): """ updates an issue :type issue_id: int :param issue_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type summary: str :param summary: new issue description :type component: str :param component: component where the issue was found :type version: str :param version: version where the issue was found :type hardware: str :param hardware: hardware where the issue was found :type priority: str :param priority: priority of the issue :type severity: str :param severity: severity of the issue :type reference_id: int :param reference_id: id of the Git reference where the issue was found :type last_change_at: str :param last_change_at: last change date of the issue """ cursor = self._cnx.cursor() query = "UPDATE issue " \ "SET last_change_at = %s, summary = %s, component = %s, version = %s, hardware = %s, " \ "priority = %s, severity = %s, reference_id = %s " \ "WHERE own_id = %s AND issue_tracker_id = %s" arguments = [ last_change_at, summary, component, version, hardware, priority, severity, reference_id, issue_id, issue_tracker_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_issue(self, issue_own_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, user_id, created_at, last_change_at): """ inserts an issue :type issue_own_id: int :param issue_own_id: data source issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type summary: str :param summary: new issue description :type component: str :param component: component where the issue was found :type version: str :param version: version where the issue was found :type hardware: str :param hardware: hardware where the issue was found :type priority: str :param priority: priority of the issue :type severity: str :param severity: severity of the issue :type reference_id: int :param reference_id: id of the Git reference where the issue was found :type user_id: int :param user_id: issue creator id :type created_at: str :param created_at: creation date of the issue :type last_change_at: str :param last_change_at: last change date of the issue """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, issue_own_id, issue_tracker_id, summary, component, version, hardware, priority, severity, reference_id, user_id, created_at, last_change_at ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_label(self, name): """ inserts a label :type name: str :param name: the name of the label """ self._db_util.insert_label(self._cnx, name, self._logger) def select_label_id(self, name): """ selects the label id by its name :type name: str :param name: the name of the label """ return self._db_util.select_label_id(self._cnx, name, self._logger) def select_issue_comment_id(self, own_id, issue_id, created_at): """ selects the id of an issue comment :type own_id: int :param own_id: data source comment id :type issue_id: int :param issue_id: DB issue id :type created_at: str :param created_at: creation date of the issue """ cursor = self._cnx.cursor() query = "SELECT id FROM message WHERE own_id = %s AND issue_id = %s AND created_at = %s" arguments = [own_id, issue_id, created_at] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def get_user_id(self, user_name, user_email): """ selects the id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if not user_email and not user_name: user_name = "uknonwn_user" user_email = "uknonwn_user" if user_name: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) else: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) return user_id def insert_subscriber(self, issue_id, subscriber_id): """ inserts issue subscriber :type issue_id: int :param issue_id: db issue id :type subscriber_id: int :param subscriber_id: subscriber id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_subscriber " \ "VALUES (%s, %s)" arguments = [issue_id, subscriber_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_assignee(self, issue_id, assignee_id): """ inserts issue assignee :type issue_id: int :param issue_id: db issue id :type assignee_id: int :param assignee_id: assignee id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_assignee " \ "VALUES (%s, %s)" arguments = [issue_id, assignee_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_attachment(self, attachment_id, issue_comment_id, name, url): """ inserts attachment :type attachment_id: int :param attachment_id: db attachment id :type issue_comment_id: int :param issue_comment_id: issue comment id :type name: str :param name: name of the attachment :type url: str :param url: url of the attachment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, attachment_id, issue_comment_id, name, None, None, url ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def assign_label_to_issue(self, issue_id, label_id): """ links label to issue :type issue_id: int :param issue_id: db issue id :type label_id: int :param label_id: label id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO issue_labelled " \ "VALUES (%s, %s)" arguments = [issue_id, label_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def find_reference_id(self, version, issue_id, repo_id): """ retrieves reference id :type version: str :param version: name of the reference :type issue_id: int :param issue_id: db issue id :type repo_id: int :param repo_id: repository id """ found = None if version: try: cursor = self._cnx.cursor() query = "SELECT id FROM reference WHERE name = %s AND repo_id = %s" arguments = [version, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: # sometimes the version is followed by extra information such as alpha, beta, RC, M. query = "SELECT id " \ "FROM reference " \ "WHERE name LIKE '" + str(version) + "%' AND repo_id = " + str(repo_id) cursor.execute(query) row = cursor.fetchone() if row: found = row[0] cursor.close() except Exception: self._logger.warning("version (" + str(version) + ") not inserted for issue id: " + str(issue_id), exc_info=True) return found def select_last_change_issue(self, issue_id, issue_tracker_id, repo_id): """ retrieves last change date of an issue :type issue_id: int :param issue_id: db issue id :type issue_tracker_id: int :param issue_tracker_id: issue tracker id :type repo_id: int :param repo_id: repository id """ found = None cursor = self._cnx.cursor() query = "SELECT i.last_change_at " \ "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s" arguments = [issue_id, issue_tracker_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, project_id, repo_name): """ selects repository id :type project_id: int :param project_id: project id :type repo_name: int :param repo_name: repository name """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def close_connection(self): if self._cnx: self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger)
class SlackDao(): """ This class handles the persistence and retrieval of Slack data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("SlackDao init failed") raise def close_connection(self): """ closes DB connection """ self._db_util.close_connection(self._cnx) def select_project_id(self, project_name): """ gets the project id :type project_name: str :param project_name: the name of a project """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def select_instant_messaging_id(self, instant_messaging_name, project_id): """ gets DB instant messaging id by its name :type instant_messaging_name: str :param instant_messaging_name: instant messaging name :type project_id: int :param project_id: project id """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM instant_messaging " \ "WHERE name = %s AND project_id = %s" arguments = [instant_messaging_name, project_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no instant messaging with this name " + str(instant_messaging_name)) cursor.close() return found def get_message_type_id(self, message_type): """ gets the id associated to a given type of message :type message_type: str :param message_type: message type """ return self._db_util.get_message_type_id(self._cnx, message_type) def insert_url_attachment(self, own_id, message_id, name, extension, url): """ insert attachment URL :type own_id: int :param own_id: data source message id :type message_id: int :param message_id: DB message id :type name: str :param name: attachment name :type extension: str :param extension: attachment extension :type url: str :param url: attachment url """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, message_id, name, extension, None, url] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_attachment(self, own_id, message_id, name, extension, bytes, url): """ insert attachment of a message :type own_id: int :param own_id: data source message id :type message_id: int :param message_id: DB message id :type name: str :param name: attachment name :type extension: str :param extension: attachment extension :type bytes: int :param bytes: attachment size in bytes :type url: str :param url: attachment url """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, message_id, name, extension, bytes, url] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_message_id(self, own_id, channel_id): """ gets message id :type own_id: int :param own_id: data source message id :type channel_id: int :param channel_id: DB channel id """ cursor = self._cnx.cursor() query = "SELECT id FROM message WHERE own_id = %s AND channel_id = %s" arguments = [own_id, channel_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found def get_comments(self, message_id): """ gets comments of a message :type message_id: int :param message_id: DB message id """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) as count " \ "FROM message_dependency " \ "WHERE target_message_id = %s OR source_message_id = %s" arguments = [message_id, message_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning( "no message dependency found for message id " + str(message_id)) cursor.close() return found def insert_message_dependency(self, source_message_id, target_message_id): """ inserts dependency between two messages :type source_message_id: int :param source_message_id: DB source message id :type target_message_id: int :param target_message_id: DB target message id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message_dependency " \ "VALUES (%s, %s)" arguments = [source_message_id, target_message_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_message(self, own_id, pos, type, channel_id, body, author_id, created_at): """ inserts message to DB :type own_id: int :param own_id: data source message id :type pos: int :param pos: position of the message in the topic :type type: str :param type: type of the message (question, reply) :type channel_id: int :param channel_id: DB channel id :type body: str :param body: message body :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation time of the message """ try: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, pos, type, 0, 0, channel_id, body, None, author_id, created_at ] cursor.execute(query, arguments) self._cnx.commit() except: self._logger.warning("message " + str(own_id) + ") for channel id: " + str(channel_id) + " not inserted", exc_info=True) def get_user_id(self, user_name, user_email): """ gets the id associated to a user name :type user_name: str :param user_name: user name :type user_email: str :param user_email: user email """ if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) return user_id def select_channel_own_id(self, channel_id, instant_messaging_id): """ gets data source channel id :type channel_id: int :param channel_id: DB channel id :type instant_messaging_id: int :param instant_messaging_id: DB instant messaging id """ cursor = self._cnx.cursor() query = "SELECT own_id " \ "FROM channel " \ "WHERE id = %s AND instant_messaging_id = %s" arguments = [channel_id, instant_messaging_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no channel found for instant messaging " + str(instant_messaging_id)) cursor.close() return found def insert_channel(self, own_id, instant_messaging_id, name, description, created_at, last_change_at): """ inserts channel to DB :type own_id: int :param own_id: data source channel id :type instant_messaging_id: int :param instant_messaging_id: instant messaging id :type name: str :param name: title of the channel :type description: str :param description: channel description :type created_at: str :param created_at: creation date :type last_change_at: str :param last_change_at: last change date """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO channel " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, instant_messaging_id, name, description, created_at, last_change_at ] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id " \ "FROM channel " \ "WHERE own_id = %s AND instant_messaging_id = %s" arguments = [own_id, instant_messaging_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no channel found for instant messaging " + str(instant_messaging_id)) cursor.close() return found def insert_instant_messaging(self, project_id, instant_messaging_name, type): """ inserts instant messaging to DB :type project_id: int :param project_id: project id :type instant_messaging_name: str :param instant_messaging_name: instant messaging name :type type: str :param type: instant messaging type (Slack, IRC) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO instant_messaging " \ "VALUES (%s, %s, %s, %s)" arguments = [None, project_id, instant_messaging_name, type] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id " \ "FROM instant_messaging " \ "WHERE name = %s" arguments = [instant_messaging_name] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no instant messaging with name " + str(instant_messaging_name)) cursor.close() return found def get_channel_last_change_at(self, own_id, instant_messaging_id): """ gets last change date of a channel :type own_id: int :param own_id: data source channel id :type instant_messaging_id: int :param instant_messaging_id: DB instant messaging id """ cursor = self._cnx.cursor() query = "SELECT last_change_at FROM channel WHERE own_id = %s AND instant_messaging_id = %s" arguments = [own_id, instant_messaging_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found def get_channel_ids(self, instant_messaging_id): """ gets list of channel ids in a given instant messaging :type instant_messaging_id: int :param instant_messaging_id: DB instant messaging id """ channel_ids = [] cursor = self._cnx.cursor() query = "SELECT id FROM channel WHERE instant_messaging_id = %s" arguments = [instant_messaging_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: channel_id = row[0] channel_ids.append(channel_id) row = cursor.fetchone() cursor.close() return channel_ids
class EclipseForumDao(): """ This class handles the persistence and retrieval of Eclipse forum data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("EclipseForumDao init failed") raise def close_connection(self): """ closes DB connection """ self._db_util.close_connection(self._cnx) def get_message_type_id(self, message_type): """ gets the id associated to a given type of message :type message_type: str :param message_type: message type """ return self._db_util.get_message_type_id(self._cnx, message_type) def get_user_id(self, user_name): """ gets the id associated to a user name :type user_name: str :param user_name: user name """ if user_name == None: user_name = "unknown_user" user_id = self._db_util.select_user_id_by_name(self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, None, self._logger) user_id = self._db_util.select_user_id_by_name(self._cnx, user_name, self._logger) return user_id def insert_message_attachment(self, url, own_id, name, extension, size, message_id): """ inserts attachment of a message :type url: str :param url: url of the attachment :type own_id: int :param own_id: id in the data source :type name: str :param name: name of the attachment :type extension: str :param extension: extension of the attachment :type size: int :param size: size of the attachment in bytes :type message_id: int :param message_id: message id of the attachment """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, message_id, name, extension, size, url] cursor.execute(query, arguments) self._cnx.commit() def get_topic_own_id(self, forum_id, topic_id): """ gets data source topic id :type forum_id: int :param forum_id: DB forum id :type topic_id: int :param topic_id: DB topic id """ found = None cursor = self._cnx.cursor() query = "SELECT own_id FROM topic WHERE forum_id = %s AND id = %s" arguments = [forum_id, topic_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def get_topic_id(self, topic_own_id, forum_id): """ gets DB topic id :type topic_own_id: int :param topic_own_id: data source topic id :type forum_id: int :param forum_id: DB forum id """ found = None cursor = self._cnx.cursor() query = "SELECT id FROM topic WHERE own_id = %s AND forum_id = %s" arguments = [topic_own_id, forum_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def get_topic_ids(self, forum_id): """ gets list of topic ids in a given forum :type forum_id: int :param forum_id: DB forum id """ topic_ids = [] cursor = self._cnx.cursor() query = "SELECT id FROM topic WHERE forum_id = %s" arguments = [forum_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: topic_id = row[0] topic_ids.append(topic_id) row = cursor.fetchone() cursor.close() return topic_ids def get_topic_own_ids(self, forum_id): """ gets list of data source topic ids :type forum_id: int :param forum_id: DB forum id """ topic_own_ids = [] cursor = self._cnx.cursor() query = "SELECT own_id FROM topic WHERE forum_id = %s" arguments = [forum_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: topic_own_id = row[0] topic_own_ids.append(topic_own_id) row = cursor.fetchone() cursor.close() return topic_own_ids def update_topic_created_at(self, topic_id, created_at, forum_id): """ updates created_at column of a topic :type forum_id: int :param forum_id: DB topic id :type created_at: str :param created_at: new created_at value :type forum_id: int :param forum_id: DB forum id """ cursor = self._cnx.cursor() query = "UPDATE topic SET created_at = %s WHERE id = %s AND forum_id = %s" arguments = [created_at, topic_id, forum_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_message(self, own_id, pos, type, topic_id, body, votes, author_id, created_at): """ inserts message to DB :type own_id: int :param own_id: data source message id :type pos: int :param pos: position of the message in the topic :type type: str :param type: type of the message (question, reply) :type topic_id: int :param topic_id: DB topic id :type body: str :param body: message body :type votes: int :param votes: number of votes received :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation time of the message """ try: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, pos, type, 0, topic_id, 0, 0, body, votes, author_id, created_at] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id FROM message WHERE own_id = %s AND topic_id = %s" arguments = [own_id, topic_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found except: self._logger.warning("message " + str(own_id) + ") for topic id: " + str(topic_id) + " not inserted", exc_info=True) def select_forum_id(self, forum_name, project_id): """ gets DB forum id by its name :type forum_name: str :param forum_name: forum name :type project_id: int :param project_id: project id """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM forum " \ "WHERE name = %s AND project_id = %s" arguments = [forum_name, project_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no forum with this name " + str(forum_name)) cursor.close() return found def select_project_id(self, project_name): """ gets project id by its name :type project_name: str :param project_name: project name """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def insert_forum(self, project_id, forum_name, type): """ inserts forum to DB :type project_id: int :param project_id: project id :type forum_name: str :param forum_name: forum name :type type: str :param type: forum type (Eclipse forum, Bugzilla) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO forum " \ "VALUES (%s, %s, %s, %s)" arguments = [None, project_id, forum_name, type] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id " \ "FROM forum " \ "WHERE name = %s" arguments = [forum_name] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no forum with this name " + str(forum_name)) cursor.close() return found def select_topic_id(self, forum_id, own_id): """ gets DB topic id by its own id :type forum_id: int :param forum_id: forum id :type own_id: int :param own_id: data source topic id """ try: found = None cursor = self._cnx.cursor() query = "SELECT id FROM topic WHERE forum_id = %s AND own_id = %s" arguments = [forum_id, own_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found except Exception: self._logger.warning("topic id " + str(own_id) + " not found for forum id: " + str(forum_id), exc_info=True) def insert_topic(self, own_id, forum_id, title, views, last_change_at): """ inserts topic to DB :type own_id: int :param own_id: data source topic id :type forum_id: int :param forum_id: forum id :type title: str :param title: title of the topic :type views: int :param views: number of views of the topic :type last_change_at: str :param last_change_at: last change date """ try: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO topic " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, forum_id, title.lower(), None, views, None, last_change_at] cursor.execute(query, arguments) self._cnx.commit() cursor.close() except Exception: self._logger.warning("topic with title " + title.lower() + " not inserted for forum id: " + str(forum_id), exc_info=True) def update_topic_info(self, topic_id, forum_id, views, last_change_at): """ updates views and last change date :type topic_id: int :param topic_id: DB topic id :type forum_id: int :param forum_id: DB forum id :type views: int :param views: new number of views :type last_change_at: str :param last_change_at: new last change date """ cursor = self._cnx.cursor() query = "UPDATE topic SET views = %s, last_change_at = %s WHERE id = %s AND forum_id = %s" arguments = [views, last_change_at, topic_id, forum_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close()
class FileJsonExporter: """ This class handles the export of file information via JSON. It allows to use a former version of the bus factor tool (https://github.com/SOM-Research/busfactor) """ LOG_FOLDER_PATH = "logs" def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._date_util = DateUtil() self._db_util = DbUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-file-json-" + db_name self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._db_name = db_name config.update({'database': db_name}) self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) self._file_util = FileUtil(self._config, self._logger) def get_diff_info(self, patch_content): if patch_content: first_line = patch_content.split('\n')[0] if re.match(r"^@@(\s|\+|\-|\d|,)+@@", first_line, re.M): diff_info = first_line.split("@@")[1] else: diff_info = "Binary file" else: diff_info = 'Renamed file' return diff_info def get_diff_content(self, patch_content): if patch_content: lines = patch_content.split('\n') if re.match(r"^@@(\s|\+|\-|\d|,)+@@", lines[0], re.M): first_line_content = lines[0].split("@@")[2] diff_content = lines[1:] diff_content.insert(0, first_line_content) diff_content = '\n'.join(diff_content) else: diff_content = "No content" else: diff_content = "No content" return diff_content def get_patch_info(self, content): diff_info = self.get_diff_info(content) diff_content = self.get_diff_content(content) return { 'info': diff_info, 'content': diff_content.decode('utf-8', 'ignore') } def get_changes_for_file(self, file_ids): file_modifications = [] cursor = self._cnx.cursor() query = "SELECT c.author_id, c.committer_id, c.authored_date, c.committed_date, c.sha, fm.additions, fm.deletions, fm.patch " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ") " \ "ORDER BY c.authored_date DESC" cursor.execute(query) row = cursor.fetchone() while row: author_id = row[0] committer_id = row[1] authored_date = row[2].strftime('%Y-%m-%d %H:%M:%S') committed_date = row[3].strftime('%Y-%m-%d %H:%M:%S') additions = str(row[4]) deletions = str(row[5]) sha = str(row[6]) patch = str(row[7]) patch_info = self.get_patch_info(patch) author_name, author_email = self.get_user_identity(author_id) if author_id != committer_id: committer_name, committer_email = self.get_user_identity( committer_id) else: committer_name = author_name committer_email = author_email author = {'name': author_name, 'email': author_email} committer = {'name': committer_name, 'email': committer_email} file_modifications.append({ 'author': author, 'authored_date': authored_date, 'committer': committer, 'committed_date': committed_date, 'additions': additions, 'deletions': deletions, 'sha': sha, 'patch': patch_info }) row = cursor.fetchone() cursor.close() return file_modifications def array2string(self, array): return ','.join(str(x) for x in array) def get_user_identity(self, user_id): found = None cursor = self._cnx.cursor() query = "SELECT u.name, u.email " \ "FROM user u " \ "JOIN (SELECT IFNULL(ua.alias_id, u.id) as user_id FROM user u LEFT JOIN user_alias ua ON u.id = ua.user_id WHERE u.id = %s) as aliased " \ "ON aliased.user_id = u.id" arguments = [user_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: name = row[0] email = row[1] found = (name, email) return found def get_commits_info(self, file_ids): commits = [] cursor = self._cnx.cursor() query = "SELECT c.sha, c.message, r.name, c.author_id, c.committer_id, c.authored_date, c.committed_date " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ")" cursor.execute(query) row = cursor.fetchone() while row: sha = str(row[0]) message = str(row[1].encode('utf8')) ref = str(row[2]) author_id = row[3] committer_id = row[4] authored_date = row[5].strftime('%Y-%m-%d %H:%M:%S') committed_date = row[6].strftime('%Y-%m-%d %H:%M:%S') author_name, author_email = self.get_user_identity(author_id) if author_id != committer_id: committer_name, committer_email = self.get_user_identity( committer_id) else: committer_name = author_name committer_email = author_email author = {'name': author_name, 'email': author_email} committer = {'name': committer_name, 'email': committer_email} commits.append({ 'sha': sha, 'author': author, 'committer': committer, 'message': message, 'ref': ref, 'authored_date': authored_date, 'committed_date': committed_date }) row = cursor.fetchone() cursor.close() return commits def get_status_file(self, file_id): cursor = self._cnx.cursor() query = "SELECT fm.status, MAX(c.committed_date) AS last_modification " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id = %s" arguments = [file_id] cursor.execute(query, arguments) row = cursor.fetchone() cursor.close() status = "" last_modification = "" if row: status = row[0] last_modification = row[1].strftime('%Y-%m-%d %H:%M:%S') return {'status': status, 'last_modification': last_modification} def add_file_info_to_json(self, references, repo_json): cursor = self._cnx.cursor() query = "SELECT f.id, f.name, f.ext, r.name, r.id " \ "FROM repository repo JOIN commit_in_reference cin ON repo.id = cin.repo_id " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN file f ON f.id = fm.file_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "WHERE repo.id = %s AND r.name IN (" + ",".join(["'" + ref + "'" for ref in references]) + ") AND " \ "f.id NOT IN " \ "(SELECT deletions.file_id FROM " \ "(SELECT fm.file_id, c.committed_date " \ "FROM commit_in_reference cin " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE fm.status = 'deleted' AND cin.repo_id = %s ) as deletions " \ "JOIN " \ "(SELECT fm.file_id, max(c.committed_date) as committed_date " \ "FROM commit_in_reference cin " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE fm.status <> 'deleted' AND cin.repo_id = %s " \ "GROUP BY fm.file_id) AS last_action " \ "ON deletions.file_id = last_action.file_id " \ "WHERE deletions.committed_date > last_action.committed_date " \ "UNION " \ "SELECT fr.previous_file_id " \ "FROM file_renamed fr JOIN file f ON fr.previous_file_id = f.id " \ "JOIN file_modification fm ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "WHERE cin.repo_id = %s) " \ "GROUP BY f.id, r.id" arguments = [ self._repo_id, self._repo_id, self._repo_id, self._repo_id ] cursor.execute(query, arguments) row = cursor.fetchone() while row: file_id = row[0] file_name = row[1] file_ext = row[2] ref_name = row[3] ref_id = row[4] status = self.get_status_file(file_id) file_history = self._file_util.get_file_history_by_id( file_id, ref_id) file_ids = list(set([h.get("file_id") for h in file_history])) commits = self.get_commits_info(file_ids) directories = self._file_util.get_directories(file_name) changes_info = self.get_changes_for_file(file_ids) file_info = { 'repo': self._repo_name, 'info': status, 'commits': commits, 'ref': ref_name, 'id': str(file_id), 'name': file_name.split('/')[-1], 'ext': file_ext, 'dirs': directories, 'file_changes': changes_info } repo_json.write(json.dumps(file_info) + "\n") row = cursor.fetchone() cursor.close() def export(self, repo_name, references, file_path): """ exports the file data to JSON format :type repo_name: str :param repo_name: name of the repository to analyse :type references: list str :param references: list of references to analyse :type file_path: str :param file_path: the path where to export the file information """ try: self._logger.info("FileJSONExporter started") start_time = datetime.now() repo_json = codecs.open(file_path, 'w', "utf-8") self._repo_name = repo_name self._repo_id = self._db_util.select_repo_id( self._cnx, repo_name, self._logger) self.add_file_info_to_json(references, repo_json) repo_json.close() self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("FileJSONExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("FileJSONExporter failed", exc_info=True)
class GitDao(): """ This class handles the persistence and retrieval of Git data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("GitDao init failed") raise def check_connection_alive(self): try: cursor = self._cnx.cursor() cursor.execute("SELECT VERSION()") results = cursor.fetchone() ver = results[0] cursor.close() if not ver: self._cnx = self._db_util.restart_connection( self._config, self._logger) except: self._cnx = self._db_util.restart_connection( self._config, self._logger) def close_connection(self): self._db_util.close_connection(self._cnx) def restart_connection(self): self._cnx = self._db_util.restart_connection(self._config, self._logger) def get_connection(self): return self._cnx def get_cursor(self): return self._cnx.cursor() def close_cursor(self, cursor): return cursor.close() def fetchone(self, cursor): return cursor.fetchone() def execute(self, cursor, query, arguments): cursor.execute(query, arguments) def array2string(self, array): return ','.join(str(x) for x in array) def function_at_commit_is_empty(self, repo_id): """ checks function at commit table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN function_at_commit fac ON c.id = fac.commit_id " \ "WHERE c.repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def select_code_at_commit(self, commit_id, file_id): """ retrieve the code at commit info for a given file and commit :type commit_id: int :param commit_id: id of an existing commit in the DB :type file_id: int :param file_id: id of an existing file in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT * " \ "FROM code_at_commit " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row cursor.close() return found def line_detail_table_is_empty(self, repo_id): """ checks line detail table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "JOIN line_detail l ON fm.id = l.file_modification_id " \ "WHERE l.content IS NOT NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def code_at_commit_is_empty(self, repo_id): """ checks code at commit table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN code_at_commit cac ON c.id = cac.commit_id " \ "WHERE c.repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def file_modification_patch_is_empty(self, repo_id): """ checks patch column in file modification table is empty :type repo_id: int :param repo_id: id of an existing repository in the DB """ cursor = self._cnx.cursor() query = "SELECT COUNT(*) " \ "FROM commit c " \ "JOIN file_modification fm ON c.id = fm.commit_id " \ "WHERE patch IS NOT NULL and repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() count = 0 if row: count = int(row[0]) cursor.close() return int(count > 0) def get_last_commit_id(self, repo_id): """ gets last commit id :type repo_id: int :param repo_id: id of an existing repository in the DB """ found = None cursor = self._cnx.cursor() query = "SELECT MAX(id) as last_commit_id " \ "FROM commit c " \ "WHERE repo_id = %s" arguments = [repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_repo_id(self, repo_name): """ selects id of a repository by its name :type repo_name: str :param repo_name: name of an existing repository in the DB """ return self._db_util.select_repo_id(self._cnx, repo_name, self._logger) def insert_repo(self, project_id, repo_name): """ inserts repository to DB :type project_id: int :param project_id: id of an existing project in the DB :type repo_name: str :param repo_name: name of a repository to insert """ return self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) def select_project_id(self, project_name): """ selects id of a project by its name :type project_name: str :param project_name: name of an existing project in the DB """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def get_user_id(self, user_name, user_email): """ gets id of a user :type user_name: str :param user_name: name of the user :type user_email: str :param user_email: email of the user """ if user_email == None and user_name == None: user_name = "unknown_user" user_email = "unknown_user" if user_email: user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) else: user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, user_email, self._logger) user_id = self._db_util.select_user_id_by_email( self._cnx, user_email, self._logger) return user_id def insert_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB, one by one :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) query = "INSERT IGNORE INTO commit_parent " \ "VALUES (%s, %s, %s, %s, %s)" if parent_id: arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha] else: arguments = [repo_id, commit_id, sha, None, parent.hexsha] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_all_commit_parents(self, parents, commit_id, sha, repo_id): """ inserts commit parents to DB all together :type parents: list of Object :param parents: parents of a commit :type commit_id: int :param commit_id: id of the commit :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ to_insert = [] for parent in parents: parent_id = self.select_commit_id(parent.hexsha, repo_id) if not parent_id: self._logger.warning( "parent commit id not found! SHA parent " + str(parent.hexsha)) if parent_id: to_insert.append( (repo_id, commit_id, sha, parent_id, parent.hexsha)) else: to_insert.append( (repo_id, commit_id, sha, None, parent.hexsha)) if to_insert: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) VALUES (%s, %s, %s, %s, %s)" cursor.executemany(query, [i for i in to_insert]) self._cnx.commit() cursor.close() def insert_commits_in_reference(self, commits_data): """ inserts commits to DB all together :type commits_data: list of Object :param commits_data: commit data """ if commits_data: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)" cursor.executemany(query, commits_data) self._cnx.commit() cursor.close() def insert_commit_in_reference(self, repo_id, commit_id, ref_id): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type commit_id: int :param commit_id: id of the commit :type ref_id: int :param ref_id: id of the reference """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit_in_reference " \ "VALUES (%s, %s, %s)" arguments = [repo_id, commit_id, ref_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_line_details(self, file_modification_id, detail): """ inserts line details to DB :type file_modification_id: int :param file_modification_id: id of the file modification :type detail: str :param detail: line content """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO line_detail " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [ file_modification_id, detail[0], detail[1], detail[2], detail[3], detail[4], detail[5] ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_modification_id(self, commit_id, file_id): """ selects file modification id :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file_modification " \ "WHERE commit_id = %s AND file_id = %s" arguments = [commit_id, file_id] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] cursor.close() return found def insert_file_modification(self, commit_id, file_id, status, additions, deletions, changes, patch_content): """ inserts file modification to DB :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type status: str :param status: type of the modification :type additions: int :param additions: number of additions :type deletions: int :param deletions: number of deletions :type changes: int :param changes: number of changes :type patch_content: str :param patch_content: content of the patch """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_modification " \ "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, status, additions, deletions, changes, patch_content ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file_renamed(self, repo_id, current_file_id, previous_file_id, file_modification_id): """ inserts file renamed information :type repo_id: int :param repo_id: id of the repository :type current_file_id: int :param current_file_id: id of the renamed file :type previous_file_id: int :param previous_file_id: id of the file before renaming :type file_modification_id: int :param file_modification_id: id of the file modification """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file_renamed " \ "VALUES (%s, %s, %s, %s)" arguments = [ repo_id, current_file_id, previous_file_id, file_modification_id ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_file(self, repo_id, name, ext): """ inserts file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) :type ext: str :param ext: extension of the file """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO file " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, name, ext] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_file_id(self, repo_id, name): """ selects id of the file :type repo_id: int :param repo_id: id of the repository :type name: str :param name: name of the file (full path) """ cursor = self._cnx.cursor() query = "SELECT id " \ "FROM file " \ "WHERE name = %s AND repo_id = %s" arguments = [name, repo_id] cursor.execute(query, arguments) try: id = cursor.fetchone()[0] except: id = None cursor.close() return id def insert_reference(self, repo_id, ref_name, ref_type): """ inserts reference :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference :type ref_type: str :param ref_type: type of the reference (branch or tag) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO reference " \ "VALUES (%s, %s, %s, %s)" arguments = [None, repo_id, ref_name, ref_type] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_reference_name(self, repo_id, ref_id): """ selects reference name by its id :type repo_id: int :param repo_id: id of the repository :type ref_id: int :param ref_id: id of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT name " \ "FROM reference " \ "WHERE id = %s and repo_id = %s" arguments = [ref_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_reference_id(self, repo_id, ref_name): """ selects reference id by its name :type repo_id: int :param repo_id: id of the repository :type ref_name: str :param ref_name: name of the reference """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM reference " \ "WHERE name = %s and repo_id = %s" arguments = [ref_name, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_commit(self, repo_id, sha, message, author_id, committer_id, authored_date, committed_date, size): """ inserts commit to DB :type repo_id: int :param repo_id: id of the repository :type sha: str :param sha: SHA of the commit :type message: str :param message: message of the commit :type author_id: int :param author_id: author id of the commit :type committer_id: int :param committer_id: committer id of the commit :type authored_date: str :param authored_date: authored date of the commit :type committed_date: str :param committed_date: committed date of the commit :type size: int :param size: size of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, repo_id, sha, message.strip(), author_id, committer_id, authored_date, committed_date, size ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_function_id(self, file_id, start_line, end_line): """ select DB function id :type file_id: int :param file_id: id of the file :type start_line: int :param start_line: line number where the function starts :type end_line: int :param end_line: line number where the function ends """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM function " \ "WHERE file_id = %s AND start_line = %s AND end_line = %s" arguments = [file_id, start_line, end_line] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_code_at_commit(self, commit_id, file_id, ccn, loc, comments, blanks, funs, tokens, avg_ccn, avg_loc, avg_tokens): """ inserts to DB code information of a file at a given commit :type commit_id: int :param commit_id: id of the commit :type file_id: int :param file_id: id of the file :type ccn: int :param ccn: cyclomatic complexity value :type loc: int :param loc: lines of code :type comments: int :param comments: commented lines :type blanks: int :param blanks: blank lines :type funs: int :param funs: number of functions :type tokens: int :param tokens: tokens in the files :type avg_ccn: int :param avg_ccn: file avg cyclomatic complexity (per function) :type avg_loc: int :param avg_loc: file avg lines of code (per function) :type avg_tokens: int :param avg_tokens: file avg number of tokens (per function) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO code_at_commit " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ commit_id, file_id, ccn, loc, comments, blanks, funs, tokens, avg_ccn, avg_loc, avg_tokens ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_function(self, function_name, file_id, num_arguments, loc, tokens, total_lines, ccn, start_line, end_line): """ inserts function to DB :type function_name: str :param function_name: name of the function :type file_id: int :param file_id: id of the file :type num_arguments: int :param num_arguments: number of arguments :type loc: int :param loc: lines of code :type tokens: int :param tokens: tokens in the function :type total_lines: int :param total_lines: number of lines (loc + comments + empty lines) :type ccn: int :param ccn: cyclomatic complexity of the function :type start_line: int :param start_line: line number where the function starts :type end_line: int :param end_line: line number where the function ends """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO function " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, function_name, file_id, num_arguments, loc, tokens, total_lines, ccn, start_line, end_line ] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def insert_function_at_commit(self, fun_id, commit_id): """ inserts link between a function and the commit :type fun_id: int :param fun_id: id of the function :type commit_id: int :param commit_id: id of the commit """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO function_at_commit " \ "VALUES (%s, %s)" arguments = [commit_id, fun_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_commit_parent(self, parent_id, parent_sha, repo_id): """ inserts commit parent to DB :type parent_id: int :param parent_id: id of the commit parent :type parent_sha: str :param parent_sha: SHA of the commit parent :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_update = "UPDATE commit_parent " \ "SET parent_id = %s " \ "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s " arguments = [parent_id, parent_sha, repo_id] cursor.execute(query_update, arguments) self._cnx.commit() cursor.close() def fix_commit_parent_table(self, repo_id): """ checks for missing commit parent information and fixes it :type repo_id: int :param repo_id: id of the repository """ cursor = self._cnx.cursor() query_select = "SELECT parent_sha " \ "FROM commit_parent " \ "WHERE parent_id IS NULL AND repo_id = %s" arguments = [repo_id] cursor.execute(query_select, arguments) row = cursor.fetchone() while row: parent_sha = row[0] parent_id = self.select_commit_id(parent_sha, repo_id) self.update_commit_parent(parent_id, parent_sha, repo_id) row = cursor.fetchone() cursor.close() def select_commit_id(self, sha, repo_id): """ selects id of a commit by its SHA :type sha: str :param sha: SHA of the commit :type repo_id: int :param repo_id: id of the repository """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM commit " \ "WHERE sha = %s AND repo_id = %s" arguments = [sha, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def select_all_developer_ids(self, repo_id): """ selects all developers (committers or authors) of a given repo :type repo_id: int :param repo_id: id of the repository """ user_ids = [] cursor = self._cnx.cursor() query = "SELECT c.author_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \ "WHERE repo_id = %s " \ "UNION " \ "SELECT c.committer_id " \ "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \ "WHERE repo_id = %s " arguments = [repo_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: user_id = row[0] user_ids.append(user_id) row = cursor.fetchone() cursor.close() return user_ids def select_sha_commit_by_user(self, user_id, repo_id, match_on="author"): """ selects the SHA of the first commit (authored or committed) by a given user id :type user_id: int :param user_id: id of the user :type repo_id: int :param repo_id: id of the repository :type match_on: str (author or committer) :param match_on: define whether to perform the match on the author id or committer id """ found = None cursor = self._cnx.cursor() if match_on == "committer": query = "SELECT sha " \ "FROM commit " \ "WHERE committer_id = %s AND repo_id = %s " \ "LIMIT 1" else: query = "SELECT sha " \ "FROM commit " \ "WHERE author_id = %s AND repo_id = %s " \ "LIMIT 1" arguments = [user_id, repo_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] return found def select_file_changes(self, file_id, ref_id, before_date=None, patch=False, code=False): """ get all file changes, excluding renamings for a given file within a reference :type file_id: int :param file_id: id of the file :type ref_id: int :param ref_id: id of the reference :type before_date: str :param before_date: if not null, it retrieves the changes before the given date :type patch: bool :param: patch: if True, it retrieves also the patch associate to each file change :type code: bool :param: code: if True, it retrieves also code-related information of the file (ccn, loc, commented lines, etc.) """ found = [] before_date_selection = "" if before_date: before_date_selection = " AND c.authored_date <= '" + str( before_date) + "'" patch_selection = ", NULL AS patch" if patch: patch_selection = ", fm.patch" code_selection = "" code_join = "" if code: code_selection = ", " \ "IFNULL(cac.ccn, 0) AS ccn, " \ "IFNULL(cac.loc, 0) AS loc, " \ "IFNULL(cac.commented_lines, 0) AS commented_lines, " \ "IFNULL(cac.blank_lines, 0) AS blank_lines, " \ "IFNULL(cac.funs, 0) AS funs, " \ "IFNULL(cac.tokens, 0) AS tokens, " \ "IFNULL(cac.avg_ccn, 0) AS avg_ccn, " \ "IFNULL(cac.avg_loc, 0) AS avg_loc, " \ "IFNULL(cac.avg_tokens, 0) AS avg_tokens" code_join = "LEFT JOIN code_at_commit cac ON cac.commit_id = c.id AND cac.file_id = f.id " cursor = self._cnx.cursor() query = "SELECT f.id, f.name, c.sha, c.message, IFNULL(ua.alias_id, c.author_id) AS author_id, " \ "c.authored_date, c.committed_date, fm.status, fm.additions, fm.deletions, " \ "fm.changes" + patch_selection + code_selection + " " \ "FROM file f join file_modification fm on f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "JOIN commit_in_reference cin ON cin.commit_id = c.id " \ "JOIN reference r ON r.id = cin.ref_id " \ "LEFT JOIN user_alias ua ON ua.user_id = c.author_id " + code_join + " " \ "WHERE f.id = %s and r.id = %s" + before_date_selection arguments = [file_id, ref_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: file_id = row[0] file_name = row[1] sha = row[2] message = row[3] author_id = int(row[4]) authored_date = row[5] committed_date = row[6] status = str(row[7]) additions = int(row[8]) deletions = int(row[9]) changes = int(row[10]) git_data = { 'sha': sha, 'commit_message': message, 'author_id': author_id, 'authored_date': authored_date, 'committed_date': committed_date, 'status': status, 'additions': additions, 'deletions': deletions, 'changes': changes, 'file_name': file_name, 'file_id': file_id } patch_data = {} if patch: patch = str(row[11]) patch_data = {'patch': patch} code_data = {} if code: ccn = int(row[12]) loc = int(row[13]) commented_lines = int(row[14]) blank_lines = int(row[15]) funs = int(row[16]) tokens = int(row[17]) avg_ccn = float(row[18]) avg_loc = float(row[19]) avg_tokens = float(row[20]) code_data = { 'ccn': ccn, 'loc': loc, 'commented_lines': commented_lines, 'blank_lines': blank_lines, 'funs': funs, 'tokens': tokens, 'avg_ccn': avg_ccn, 'avg_loc': avg_loc, 'avg_tokens': avg_tokens } found.append( dict(git_data.items() + patch_data.items() + code_data.items())) row = cursor.fetchone() cursor.close() return found def select_file_renamings(self, file_id, ref_id): """ get file renamings for a given file within a reference :type file_id: int :param file_id: id of the file :type ref_id: int :param ref_id: id of the reference """ renamings = [] cursor = self._cnx.cursor() query = "SELECT fr.current_file_id, fr.previous_file_id, c.authored_date, c.authored_date, c.committed_date " \ "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "JOIN commit_in_reference cin ON cin.commit_id = c.id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN file_renamed fr ON fr.file_modification_id = fm.id AND fr.current_file_id = f.id " \ "WHERE f.id = %s and r.id = %s AND fm.status = 'renamed';" arguments = [file_id, ref_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: renamings.append(row[1]) row = cursor.fetchone() cursor.close() if len(renamings) > 1: self._logger.warning("The file with id " + str(file_id) + " has multiple renamings in reference " + str(ref_id) + "!") return renamings
class StackOverflowDao(): """ This class handles the persistence and retrieval of Stackoverflow data """ def __init__(self, config, logger): """ :type config: dict :param config: the DB configuration file :type logger: Object :param logger: logger """ try: self._config = config self._logger = logger self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) except: self._logger.error("StackOverflowDao failed") raise def close_connection(self): """ closes DB connection """ self._db_util.close_connection(self._cnx) def get_message_type_id(self, message_type): """ gets the id associated to a given type of message :type message_type: str :param message_type: message type """ return self._db_util.get_message_type_id(self._cnx, message_type) def get_user_id(self, user_name): """ gets the id associated to a user name :type user_name: str :param user_name: user name """ if not user_name: user_name = "uknonwn_user" user_id = self._db_util.select_user_id_by_name(self._cnx, user_name, self._logger) if not user_id: self._db_util.insert_user(self._cnx, user_name, None, self._logger) user_id = self._db_util.select_user_id_by_name( self._cnx, user_name, self._logger) return user_id def select_project_id(self, project_name): """ gets project id by its name :type project_name: str :param project_name: project name """ return self._db_util.select_project_id(self._cnx, project_name, self._logger) def select_forum_id(self, forum_name, project_id): """ gets DB forum id by its name :type forum_name: str :param forum_name: forum name :type project_id: int :param project_id: project id """ found = None cursor = self._cnx.cursor() query = "SELECT id " \ "FROM forum " \ "WHERE name = %s AND project_id = %s" arguments = [forum_name, project_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] else: self._logger.warning("no forum with this name " + str(forum_name)) cursor.close() return found def insert_forum(self, project_id, forum_name, type): """ inserts forum to DB :type project_id: int :param project_id: project id :type forum_name: str :param forum_name: forum name :type type: str :param type: forum type (Eclipse forum, Bugzilla) """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO forum " \ "VALUES (%s, %s, %s, %s)" arguments = [None, project_id, forum_name, type] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id " \ "FROM forum " \ "WHERE name = %s" arguments = [forum_name] cursor.execute(query, arguments) row = cursor.fetchone() found = None if row: found = row[0] else: self._logger.warning("no forum with name " + str(forum_name)) cursor.close() return found def update_topic_created_at(self, topic_id, created_at, forum_id): """ updates created_at column of a topic :type forum_id: int :param forum_id: DB topic id :type created_at: str :param created_at: new created_at value :type forum_id: int :param forum_id: DB forum id """ cursor = self._cnx.cursor() query = "UPDATE topic SET created_at = %s WHERE id = %s AND forum_id = %s" arguments = [created_at, topic_id, forum_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def update_message(self, own_id, topic_id, body, votes): """ updates message data :type own_id: int :param own_id: data source message id :type topic_id: int :param topic_id: DB topic id :type body: str :param body: new message body :type votes: int :param votes: new message votes """ try: cursor = self._cnx.cursor() query = "UPDATE message " \ "SET body = %s, votes = %s WHERE own_id = %s AND topic_id = %s" arguments = [body, votes, own_id, topic_id] cursor.execute(query, arguments) self._cnx.commit() except: self._logger.warning("message " + str(own_id) + ") for topic id: " + str(topic_id) + " not inserted", exc_info=True) def insert_message(self, own_id, pos, type, topic_id, body, votes, author_id, created_at): """ inserts message to DB :type own_id: int :param own_id: data source message id :type pos: int :param pos: position of the message in the topic :type type: str :param type: type of the message (question, reply) :type topic_id: int :param topic_id: DB topic id :type body: str :param body: message body :type votes: int :param votes: number of votes received :type author_id: int :param author_id: id of the author :type created_at: str :param created_at: creation time of the message """ try: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, pos, type, 0, topic_id, 0, body, votes, author_id, created_at ] cursor.execute(query, arguments) self._cnx.commit() except: self._logger.warning("message " + str(own_id) + ") for topic id: " + str(topic_id) + " not inserted", exc_info=True) def insert_topic(self, own_id, forum_id, name, votes, views, created_at, last_change_at): """ inserts topic to DB :type own_id: int :param own_id: data source topic id :type forum_id: int :param forum_id: forum id :type name: str :param name: title of the topic :type views: int :param views: number of views of the topic :type created_at: str :param created_at: creation date :type last_change_at: str :param last_change_at: last change date """ try: cursor = self._cnx.cursor() query = "INSERT IGNORE INTO topic " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" arguments = [ None, own_id, forum_id, name, votes, views, created_at, last_change_at ] cursor.execute(query, arguments) self._cnx.commit() query = "SELECT id FROM topic WHERE own_id = %s AND forum_id = %s" arguments = [own_id, forum_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found except Exception: self._logger.warning("topic " + str(own_id) + ") for forum id: " + str(forum_id) + " not inserted", exc_info=True) def insert_message_dependency(self, source_message_id, target_message_id): """ inserts dependency between two messages :type source_message_id: int :param source_message_id: DB source message id :type target_message_id: int :param target_message_id: DB target message id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO message_dependency " \ "VALUES (%s, %s)" arguments = [source_message_id, target_message_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def get_topic_own_id(self, forum_id, topic_id): """ gets data source topic id :type forum_id: int :param forum_id: DB forum id :type topic_id: int :param topic_id: DB topic id """ found = None cursor = self._cnx.cursor() query = "SELECT own_id FROM topic WHERE forum_id = %s AND id = %s" arguments = [forum_id, topic_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: found = row[0] cursor.close() return found def assign_label_to_topic(self, topic_id, label_id): """ links label to topic :type topic_id: int :param topic_id: db topic id :type label_id: int :param label_id: label id """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO topic_labelled " \ "VALUES (%s, %s)" arguments = [topic_id, label_id] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_label_id(self, name): """ selects the label id by its name :type name: str :param name: the name of the label """ return self._db_util.select_label_id(self._cnx, name, self._logger) def insert_label(self, name): """ inserts a label :type name: str :param name: the name of the label """ self._db_util.insert_label(self._cnx, name, self._logger) def get_topic_own_ids(self, forum_id): """ gets list of data source topic ids :type forum_id: int :param forum_id: DB forum id """ topic_own_ids = [] cursor = self._cnx.cursor() query = "SELECT own_id FROM topic WHERE forum_id = %s" arguments = [forum_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: topic_own_id = row[0] topic_own_ids.append(topic_own_id) row = cursor.fetchone() cursor.close() return topic_own_ids def get_topic_ids(self, forum_id): """ gets list of topic ids in a given forum :type forum_id: int :param forum_id: DB forum id """ topic_ids = [] cursor = self._cnx.cursor() query = "SELECT id FROM topic WHERE forum_id = %s" arguments = [forum_id] cursor.execute(query, arguments) row = cursor.fetchone() while row: topic_id = row[0] topic_ids.append(topic_id) row = cursor.fetchone() cursor.close() return topic_ids def get_topic_last_change_at(self, own_id, forum_id): """ gets last change date of a topic :type own_id: int :param own_id: data source topic id :type forum_id: int :param forum_id: DB forum id """ cursor = self._cnx.cursor() query = "SELECT last_change_at FROM topic WHERE own_id = %s AND forum_id = %s" arguments = [own_id, forum_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found def insert_attachment(self, own_id, message_id, name, url): """ insert attachment of a message :type own_id: int :param own_id: data source message id :type message_id: int :param message_id: DB message id :type name: str :param name: attachment name :type url: str :param url: attachment url """ cursor = self._cnx.cursor() query = "INSERT IGNORE INTO attachment " \ "VALUES (%s, %s, %s, %s, %s, %s, %s)" arguments = [None, own_id, message_id, name, None, None, url] cursor.execute(query, arguments) self._cnx.commit() cursor.close() def select_message_id(self, own_id, topic_id): """ gets message id :type own_id: int :param own_id: data source message id :type topic_id: int :param topic_id: DB topic id """ cursor = self._cnx.cursor() query = "SELECT id FROM message WHERE own_id = %s AND topic_id = %s" arguments = [own_id, topic_id] cursor.execute(query, arguments) found = None row = cursor.fetchone() if row: found = row[0] cursor.close() return found