class DbSchema(): """ This class initializes the DB schema """ def __init__(self, db_name, config, log_root_path): """ :type db_name: str :param db_name: the name of the DB to initialize/connect to, it cannot be null and must follow the format allowed in MySQL (http://dev.mysql.com/doc/refman/5.7/en/identifiers.html). If a DB having a name equal already exists in Gitana, the existing DB will be dropped and a new one will be created :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._db_name = db_name self._config = config self._log_root_path = log_root_path self._db_util = DbUtil() self._logging_util = LoggingUtil() log_path = self._log_root_path + "db-schema-" + db_name self._logger = self._logging_util.get_logger(log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, log_path, "info") self._cnx = self._db_util.get_connection(self._config) def __del__(self): if self._cnx: self._db_util.close_connection(self._cnx) if self._logger: #deletes the file handler of the logger self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) def add_git_tables(self): """ initializes git tables if they do not exist """ self.set_database(self._db_name) self._init_git_tables() def add_issue_tracker_tables(self): """ initializes issue tracker tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_issue_tracker_tables() def add_instant_messaging_tables(self): """ initializes instant messaging tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_instant_messaging_tables() def add_forum_tables(self): """ initializes forum tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_forum_tables() def init_database(self, init_git, init_issue_tracker, init_forum, init_instant_messaging): """ initializes the database tables and functions :type init_git: bool :param init_git: if True, it initializes the tables containing git data :type init_issue_tracker: bool :param init_issue_tracker: if True, it initializes the tables containing issue tracker data :type init_forum: bool :param init_forum: if True, it initializes the tables containing forum data :type init_instant_messaging: bool :param init_instant_messaging: if True, it initializes the tables containing instant messaging data """ try: self._logger.info("init database started") start_time = datetime.now() self._create_database() self.set_database(self._db_name) self._set_settings() self._init_common_tables() if init_issue_tracker or init_forum or init_instant_messaging: self._init_shared_tables_issue_tracker_communication_channels() if init_git: self._init_git_tables() if init_issue_tracker: self._init_issue_tracker_tables() if init_forum: self._init_forum_tables() if init_instant_messaging: self._init_instant_messaging_tables() self._init_functions() self._logger.info("database " + self._db_name + " created") end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("Init database finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") except Exception: self._logger.error("init database failed", exc_info=True) def create_project(self, project_name): """ inserts a project in the DB :type project_name: str :param project_name: the name of the project to create """ self._cnx = self._db_util.get_connection(self._config) self._db_util.insert_project(self._cnx, self._db_name, project_name) self._db_util.close_connection(self._cnx) def create_repository(self, project_name, repo_name): """ inserts a repository in the DB :type project_name: str :param project_name: the name of an existing project :type repo_name: str :param repo_name: the name of the repository to insert """ self._cnx = self._db_util.get_connection(self._config) self.set_database(self._db_name) project_id = self._db_util.select_project_id(self._cnx, project_name, self._logger) try: self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) except Exception: self._logger.error("repository " + repo_name + " not inserted", exc_info=True) self._db_util.close_connection(self._cnx) def list_projects(self): """ lists all projects contained in the DB """ self._cnx = self._db_util.get_connection(self._config) project_names = [] self.set_database(self._db_name) cursor = self._cnx.cursor() query = "SELECT name FROM project" cursor.execute(query) row = cursor.fetchone() while row: project_names.append(row[0]) row = cursor.fetchone() cursor.close() return project_names def set_database(self, db_name): """ sets the DB used by the tool :type db_name: str :param db_name: the name of the DB """ try: self._logger.info("set database " + db_name + " started") self._db_util.set_database(self._cnx, db_name) self._logger.info("set database " + db_name + " finished") except Exception: self._logger.error("set database failed", exc_info=True) def _set_settings(self): #sets the settings (max connections, charset, file format, ...) used by the DB self._db_util.set_settings(self._cnx) def _create_database(self): #creates the database cursor = self._cnx.cursor() drop_database_if_exists = "DROP DATABASE IF EXISTS " + self._db_name cursor.execute(drop_database_if_exists) create_database = "CREATE DATABASE " + self._db_name cursor.execute(create_database) cursor.close() def _init_functions(self): #initializes functions cursor = self._cnx.cursor() levenshtein_distance = """ CREATE DEFINER=`root`@`localhost` FUNCTION `levenshtein_distance`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(11) DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; DECLARE s1_char CHAR CHARACTER SET utf8; -- max strlen=255 for this function DECLARE cv0, cv1 VARBINARY(256); SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; IF (s1 = s2) THEN RETURN (0); ELSEIF (s1_len = 0) THEN RETURN (s2_len); ELSEIF (s2_len = 0) THEN RETURN (s1_len); END IF; WHILE (j <= s2_len) DO SET cv1 = CONCAT(cv1, CHAR(j)), j = j + 1; END WHILE; WHILE (i <= s1_len) DO SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = CHAR(i), j = 1; WHILE (j <= s2_len) DO SET c = c + 1, cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1); SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost; IF (c > c_temp) THEN SET c = c_temp; END IF; SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1; IF (c > c_temp) THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, CHAR(c)), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; RETURN (c); END""" soundex_match = """ CREATE DEFINER=`root`@`localhost` FUNCTION `soundex_match`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(1) DETERMINISTIC BEGIN DECLARE _result INT DEFAULT 0; IF SOUNDEX(s1) = SOUNDEX(s2) THEN SET _result = 1; END IF; RETURN _result; END""" cursor.execute(levenshtein_distance) cursor.execute(soundex_match) cursor.close() def _init_common_tables(self): #initializes common tables used by tables modeling git, issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_project = "CREATE TABLE IF NOT EXISTS project( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user = "******" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "email varchar(256), " \ "CONSTRAINT namem UNIQUE (name, email) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user_alias = "CREATE TABLE IF NOT EXISTS user_alias ( " \ "user_id int(20), " \ "alias_id int(20), " \ "CONSTRAINT a UNIQUE (user_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_project) cursor.execute(create_table_user) cursor.execute(create_table_user_alias) def _init_shared_tables_issue_tracker_communication_channels(self): #initializes shared tables used by tables modeling issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_label = "CREATE TABLE IF NOT EXISTS label ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message = "CREATE TABLE IF NOT EXISTS message ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "pos int(10), " \ "type_id int(20), " \ "issue_id int(20), " \ "topic_id int(20), " \ "channel_id int(20), " \ "body longblob, " \ "votes int(20), " \ "author_id int(20), " \ "created_at timestamp NULL DEFAULT NULL," \ "CONSTRAINT ip UNIQUE (issue_id, topic_id, channel_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_dependency = "CREATE TABLE IF NOT EXISTS message_dependency ( " \ "source_message_id int(20), " \ "target_message_id int(20), " \ "PRIMARY KEY st (source_message_id, target_message_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_type = "CREATE TABLE IF NOT EXISTS message_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_message_types = "INSERT IGNORE INTO message_type VALUES (NULL, 'question'), " \ "(NULL, 'answer'), " \ "(NULL, 'comment'), " \ "(NULL, 'accepted_answer'), " \ "(NULL, 'reply'), " \ "(NULL, 'file_upload'), " \ "(NULL, 'info');" create_table_attachment = "CREATE TABLE IF NOT EXISTS attachment ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "message_id int(20), " \ "name varchar(256), " \ "extension varchar(10), " \ "bytes int(20), " \ "url varchar(512), " \ "CONSTRAINT ip UNIQUE (message_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_label) cursor.execute(create_table_message) cursor.execute(create_table_message_dependency) cursor.execute(create_table_message_type) cursor.execute(insert_message_types) cursor.execute(create_table_attachment) cursor.close() def _init_git_tables(self): #initializes tables used to model git data cursor = self._cnx.cursor() create_table_repository = "CREATE TABLE IF NOT EXISTS repository( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_reference = "CREATE TABLE IF NOT EXISTS reference( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(255), " \ "type varchar(255), " \ "CONSTRAINT name UNIQUE (repo_id, name, type) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit = "CREATE TABLE IF NOT EXISTS commit(" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "sha varchar(512), " \ "message varchar(512), " \ "author_id int(20), " \ "committer_id int(20), " \ "authored_date timestamp NULL DEFAULT NULL, " \ "committed_date timestamp NULL DEFAULT NULL, " \ "size int(20), " \ "INDEX sha (sha), " \ "INDEX auth (author_id), " \ "INDEX comm (committer_id), " \ "CONSTRAINT s UNIQUE (sha, repo_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit_parent = "CREATE TABLE IF NOT EXISTS commit_parent(" \ "repo_id int(20), " \ "commit_id int(20), " \ "commit_sha varchar(512), " \ "parent_id int(20), " \ "parent_sha varchar(512), " \ "PRIMARY KEY copa (repo_id, commit_id, parent_id), " \ "CONSTRAINT cshapsha UNIQUE (repo_id, commit_id, parent_sha) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commits2reference = "CREATE TABLE IF NOT EXISTS commit_in_reference(" \ "repo_id int(20), " \ "commit_id int(20), " \ "ref_id int(20), " \ "PRIMARY KEY core (commit_id, ref_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file = "CREATE TABLE IF NOT EXISTS file( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "ext varchar(255), " \ "CONSTRAINT rerena UNIQUE (repo_id, name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_renamed = "CREATE TABLE IF NOT EXISTS file_renamed ( " \ "repo_id int(20), " \ "current_file_id int(20), " \ "previous_file_id int(20), " \ "file_modification_id int(20), " \ "PRIMARY KEY cpc (current_file_id, previous_file_id, file_modification_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_modification = "CREATE TABLE IF NOT EXISTS file_modification ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "commit_id int(20), " \ "file_id int(20), " \ "status varchar(10), " \ "additions numeric(10), " \ "deletions numeric(10), " \ "changes numeric(10), " \ "patch longblob, " \ "CONSTRAINT cf UNIQUE (commit_id, file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_line_detail = "CREATE TABLE IF NOT EXISTS line_detail( " \ "file_modification_id int(20)," \ "type varchar(25), " \ "line_number numeric(20), " \ "is_commented numeric(1), " \ "is_partially_commented numeric(1), " \ "is_empty numeric(1), " \ "content longblob, " \ "PRIMARY KEY fityli (file_modification_id, type, line_number) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" # adding it here because "file_dependency" depends on "file" table creation. # @todo: find a way to move the following table creation to separate section # make "extract_dependency_relations" API interface completely independent. create_table_file_dependency = "CREATE TABLE file_dependency ( " \ "repo_id int(20), " \ "ref_id int(20), " \ "source_file_id int(20), " \ "target_file_id int(20), " \ "CONSTRAINT dep UNIQUE (repo_id, ref_id, source_file_id, target_file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_repository) cursor.execute(create_table_reference) cursor.execute(create_table_commit) cursor.execute(create_table_commit_parent) cursor.execute(create_table_commits2reference) cursor.execute(create_table_file) cursor.execute(create_table_file_renamed) cursor.execute(create_table_file_modification) cursor.execute(create_table_line_detail) cursor.execute(create_table_file_dependency) cursor.close() def _init_issue_tracker_tables(self): #initializes tables used to model issue tracker data cursor = self._cnx.cursor() create_table_issue_tracker = "CREATE TABLE IF NOT EXISTS issue_tracker ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue = "CREATE TABLE IF NOT EXISTS issue ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "issue_tracker_id int(20), " \ "summary varchar(512), " \ "component varchar(256), " \ "version varchar(256), " \ "hardware varchar(256), " \ "priority varchar(256), " \ "severity varchar(256), " \ "reference_id int(20), " \ "reporter_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT ioi UNIQUE (issue_tracker_id, own_id), " \ "INDEX u (reporter_id), " \ "INDEX r (reference_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_assignee = "CREATE TABLE IF NOT EXISTS issue_assignee ( " \ "issue_id int(20), " \ "assignee_id int(20), " \ "PRIMARY KEY il (issue_id, assignee_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_subscriber = "CREATE TABLE IF NOT EXISTS issue_subscriber ( " \ "issue_id int(20), " \ "subscriber_id int(20), " \ "PRIMARY KEY il (issue_id, subscriber_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event = "CREATE TABLE IF NOT EXISTS issue_event ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "issue_id int(20), " \ "event_type_id int(20), " \ "detail varchar(256), " \ "creator_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "target_user_id int(20), " \ "CONSTRAINT iecc UNIQUE (issue_id, event_type_id, creator_id, created_at, detail) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event_type = "CREATE TABLE IF NOT EXISTS issue_event_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_labelled = "CREATE TABLE IF NOT EXISTS issue_labelled ( " \ "issue_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (issue_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_commit_dependency = "CREATE TABLE IF NOT EXISTS issue_commit_dependency ( " \ "issue_id int(20), " \ "commit_id int(20), " \ "PRIMARY KEY ict (issue_id, commit_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_dependency = "CREATE TABLE IF NOT EXISTS issue_dependency ( " \ "issue_source_id int(20), " \ "issue_target_id int(20), " \ "type_id int(20), " \ "PRIMARY KEY st (issue_source_id, issue_target_id, type_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_dependency_type = "CREATE TABLE IF NOT EXISTS issue_dependency_type (" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_issue_dependency_type = "INSERT IGNORE INTO issue_dependency_type VALUES (NULL, 'block'), " \ "(NULL, 'depends'), " \ "(NULL, 'related'), " \ "(NULL, 'duplicated');" cursor.execute(create_table_issue_tracker) cursor.execute(create_table_issue) cursor.execute(create_table_issue_assignee) cursor.execute(create_table_issue_subscriber) cursor.execute(create_table_issue_event) cursor.execute(create_table_issue_event_type) cursor.execute(create_table_issue_labelled) cursor.execute(create_issue_commit_dependency) cursor.execute(create_table_issue_dependency) cursor.execute(create_issue_dependency_type) cursor.execute(insert_issue_dependency_type) cursor.close() def _init_forum_tables(self): #initializes tables used to model forum data cursor = self._cnx.cursor() create_table_forum = "CREATE TABLE IF NOT EXISTS forum ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic = "CREATE TABLE IF NOT EXISTS topic ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "forum_id int(20), " \ "name varchar(256), " \ "votes int(10), " \ "views int(10), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (forum_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic_labelled = "CREATE TABLE IF NOT EXISTS topic_labelled ( " \ "topic_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (topic_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_forum) cursor.execute(create_table_topic) cursor.execute(create_table_topic_labelled) cursor.close() def _init_instant_messaging_tables(self): #initializes tables used to model instant messaging data cursor = self._cnx.cursor() create_table_instant_messaging = "CREATE TABLE IF NOT EXISTS instant_messaging ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_channel = "CREATE TABLE IF NOT EXISTS channel ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "instant_messaging_id int(20), " \ "name varchar(256), " \ "description varchar(512), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (instant_messaging_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_instant_messaging) cursor.execute(create_table_channel) cursor.close()
class ActivityReportExporter(): """ This class handles the generation of reports """ LOG_FOLDER_PATH = "logs" INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json') def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._dsl_util = DslUtil() self._date_util = DateUtil() self._db_util = DbUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-report-" + db_name + ".log" self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler(self._logger, self._log_path, "info") self._db_name = db_name self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) self._chart_generator = ChartGenerator(self._cnx, self._logger) self._html_generator = HtmlGenerator(self._logger) def _create_log_folder(self, name): #creates the log folder if not os.path.exists(name): os.makedirs(name) def _create_output_file(self, filename): #creates the output folder if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise def _load_report_exporter_json(self, json_path): #loads the JSON that drives the report export process with open(json_path) as json_data: data = json.load(json_data) return data.get('report') def _find_entity_id(self, type, name): #finds the id of the tools stored in the DB found = None if type == "project": found = self._db_util.select_project_id(self._cnx, name, self._logger) elif type == "repo": found = self._db_util.select_repo_id(self._cnx, name, self._logger) elif type == "issuetracker": found = self._db_util.select_issue_tracker_id(self._cnx, name, self._logger) elif type == "forum": found = self._db_util.select_forum_id(self._cnx, name, self._logger) elif type == "instantmessaging": found = self._db_util.select_instant_messaging_id(self._cnx, name, self._logger) if not found: self._logger.error("ReporExporter: entity " + str(type) + " with name " + str(name) + " not found!") return found def _get_parameter(self, key, parameters): #gets parameters of the JSON found = None if key in ["AFTERDATE", "INTERVAL"]: found = parameters.get(key.lower()) else: if key.endswith("ID"): found = parameters.get(key[:-2].lower()) if not found: self._logger.error("ReportExporter: parameter " + str(key) + " not found!") return found def _load_query_json(self, metric_name, parameters): #loads the queries in the JSON configuration file with open(ActivityReportExporter.INPUT_PATH) as json_data: data = json.load(json_data) metrics = data.get('queries') try: found = [m for m in metrics if m.get('name') == metric_name][0] query = found.get('query') for k in found.keys(): if k not in ['name', 'query']: k_value = str(self._get_parameter(k, parameters)) query = query.replace(k, k_value) return query except: self._logger.error("ReportExporter: metric " + str(metric_name) + " not found!") def _get_activity_name(self, activity): #gets the name of the activity return activity.replace("_", " ") def _get_activity_type(self, activity): #gets the type of the activity return activity.replace("_activity", "").replace("_", "") def _generate_charts(self, activity, activity_data, project_id, time_span): #generates charts entity2charts = {} after_date, interval = self._calculate_time_information(time_span) activity_type = self._get_activity_type(activity) names = activity_data.get('names') measures = activity_data.get('measures') for entity_name in names: entity_id = self._dsl_util.find_entity_id(self._cnx, activity_type, entity_name, self._logger) charts = [] for measure in measures: query = self._load_query_json(measure, {activity_type: entity_id, 'project': project_id, 'afterdate': after_date, 'interval': interval}) charts.append(self._chart_generator.create(query, interval.lower(), measure, time_span)) entity2charts.update({entity_name: charts}) return entity2charts def _calculate_time_information(self, time_span): #calculates the time span information start = None interval = None current_time = datetime.now() #test datetime.strptime("2015-10-10", "%Y-%m-%d") if time_span == "this_week": start = self._date_util.get_start_time_span(current_time, "week", "%Y-%m-%d") interval = "DAY" elif time_span == "this_month": start = self._date_util.get_start_time_span(current_time, "month", "%Y-%m-%d") interval = "DAY" elif time_span == "this_year": start = self._date_util.get_start_time_span(current_time, "year", "%Y-%m-%d") interval = "MONTH" else: self._logger.error("ReportExporter: time span " + str(time_span) + " not recognized! Options are: this_week, this_month, this_year") return start, interval def export(self, file_path, json_path): """ exports the Gitana data to a report :type file_path: str :param file_path: the path where to export the report :type json_path: str :param json_path: the path of the JSON that drives the export process """ try: self._logger.info("ReportExporter started") start_time = datetime.now() exporter_data = self._load_report_exporter_json(json_path) project_name = exporter_data.get('project') project_id = self._dsl_util.find_entity_id(self._cnx, "project", project_name, self._logger) time_span = exporter_data.get('time_span') activity2charts = {} for activity in [attr for attr in exporter_data.keys() if attr.endswith('activity')]: activity_name = self._get_activity_name(activity) charts = self._generate_charts(activity, exporter_data.get(activity), project_id, time_span) activity2charts.update({activity_name: charts}) html_page = self._html_generator.create(project_name, activity2charts) with codecs.open(file_path, 'w', encoding='utf8') as f: f.write(html_page) self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time(end_time, start_time) self._logger.info("ReportExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger(self._logger, self._fileHandler) except: self._logger.error("ReportExporter failed", exc_info=True)
class GraphExporter(): """ This class exports the Gitana data to a graph representation """ LOG_FOLDER_PATH = "logs" INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json') def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._db_util = DbUtil() self._dsl_util = DslUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-graph-" + db_name + ".log" self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._db_name = db_name self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) def _create_log_folder(self, name): # creates the log folder if not os.path.exists(name): os.makedirs(name) def _create_output_file(self, filename): # creates the output folder if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise def _load_graph_exporter_json(self, json_path): # load the JSON that drives the graph exporter process with open(json_path) as json_data: data = json.load(json_data) return data.get('graph') def _get_parameter(self, key, parameters): # get JSON parameters found = None if key in ["EDGECOLOR", "NODECOLOR"]: found = parameters.get(key.lower()) else: if key.endswith("ID"): name = parameters.get(key[:-2].lower()) found = self._dsl_util.find_entity_id(self._cnx, key[:-2].lower(), name, self._logger) if not found: self._logger.error("GraphExporter: parameter " + str(key) + " not found!") return found def _load_query_json(self, metric_name, parameters): # loads the query stored in the JSON file with open(GraphExporter.INPUT_PATH) as json_data: data = json.load(json_data) metrics = data.get('queries') try: found = [m for m in metrics if m.get('name') == metric_name][0] nodes_query = found.get('nodes') edges_query = found.get('edges') for k in found.keys(): if k not in ['name', 'edges', 'nodes']: k_value = str(self._get_parameter(k, parameters)) nodes_query = nodes_query.replace(k, k_value) edges_query = edges_query.replace(k, k_value) return (nodes_query, edges_query) except: self._logger.error("GraphExporter: metric " + str(metric_name) + " not found!") def export(self, file_path, json_path): """ exports the Gitana data to a graph :type file_path: str :param file_path: the path where to export the graph :type json_path: str :param json_path: the path of the JSON that drives the export process """ # gtype -> graph type = "undirected", "directed", if null "undirected" # gmode -> graph mode = "dynamic", "static", if null "dynamic" try: self._logger.info("GraphExporter started") start_time = datetime.now() exporter_data = self._load_graph_exporter_json(json_path) metric_name = exporter_data.get("name") parameters = exporter_data.get("params") (nodes_query, edges_query) = self._load_query_json(metric_name, parameters) gexf = GexfGenerator(self._cnx, self._logger) gexf.create(nodes_query, edges_query, parameters.get("type"), file_path) self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("GraphExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("GraphExporter failed", exc_info=True)
class FileJsonExporter: """ This class handles the export of file information via JSON. It allows to use a former version of the bus factor tool (https://github.com/SOM-Research/busfactor) """ LOG_FOLDER_PATH = "logs" def __init__(self, config, db_name, log_root_path): """ :type config: dict :param config: the DB configuration file :type db_name: str :param config: name of an existing DB :type log_root_path: str :param log_root_path: the log path """ self._date_util = DateUtil() self._db_util = DbUtil() self._logging_util = LoggingUtil() self._log_path = log_root_path + "export-file-json-" + db_name self._logger = self._logging_util.get_logger(self._log_path) self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._db_name = db_name config.update({'database': db_name}) self._config = config self._cnx = self._db_util.get_connection(self._config) self._db_util.set_database(self._cnx, self._db_name) self._db_util.set_settings(self._cnx) self._file_util = FileUtil(self._config, self._logger) def get_diff_info(self, patch_content): if patch_content: first_line = patch_content.split('\n')[0] if re.match(r"^@@(\s|\+|\-|\d|,)+@@", first_line, re.M): diff_info = first_line.split("@@")[1] else: diff_info = "Binary file" else: diff_info = 'Renamed file' return diff_info def get_diff_content(self, patch_content): if patch_content: lines = patch_content.split('\n') if re.match(r"^@@(\s|\+|\-|\d|,)+@@", lines[0], re.M): first_line_content = lines[0].split("@@")[2] diff_content = lines[1:] diff_content.insert(0, first_line_content) diff_content = '\n'.join(diff_content) else: diff_content = "No content" else: diff_content = "No content" return diff_content def get_patch_info(self, content): diff_info = self.get_diff_info(content) diff_content = self.get_diff_content(content) return { 'info': diff_info, 'content': diff_content.decode('utf-8', 'ignore') } def get_changes_for_file(self, file_ids): file_modifications = [] cursor = self._cnx.cursor() query = "SELECT c.author_id, c.committer_id, c.authored_date, c.committed_date, c.sha, fm.additions, fm.deletions, fm.patch " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ") " \ "ORDER BY c.authored_date DESC" cursor.execute(query) row = cursor.fetchone() while row: author_id = row[0] committer_id = row[1] authored_date = row[2].strftime('%Y-%m-%d %H:%M:%S') committed_date = row[3].strftime('%Y-%m-%d %H:%M:%S') additions = str(row[4]) deletions = str(row[5]) sha = str(row[6]) patch = str(row[7]) patch_info = self.get_patch_info(patch) author_name, author_email = self.get_user_identity(author_id) if author_id != committer_id: committer_name, committer_email = self.get_user_identity( committer_id) else: committer_name = author_name committer_email = author_email author = {'name': author_name, 'email': author_email} committer = {'name': committer_name, 'email': committer_email} file_modifications.append({ 'author': author, 'authored_date': authored_date, 'committer': committer, 'committed_date': committed_date, 'additions': additions, 'deletions': deletions, 'sha': sha, 'patch': patch_info }) row = cursor.fetchone() cursor.close() return file_modifications def array2string(self, array): return ','.join(str(x) for x in array) def get_user_identity(self, user_id): found = None cursor = self._cnx.cursor() query = "SELECT u.name, u.email " \ "FROM user u " \ "JOIN (SELECT IFNULL(ua.alias_id, u.id) as user_id FROM user u LEFT JOIN user_alias ua ON u.id = ua.user_id WHERE u.id = %s) as aliased " \ "ON aliased.user_id = u.id" arguments = [user_id] cursor.execute(query, arguments) row = cursor.fetchone() if row: name = row[0] email = row[1] found = (name, email) return found def get_commits_info(self, file_ids): commits = [] cursor = self._cnx.cursor() query = "SELECT c.sha, c.message, r.name, c.author_id, c.committer_id, c.authored_date, c.committed_date " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ")" cursor.execute(query) row = cursor.fetchone() while row: sha = str(row[0]) message = str(row[1].encode('utf8')) ref = str(row[2]) author_id = row[3] committer_id = row[4] authored_date = row[5].strftime('%Y-%m-%d %H:%M:%S') committed_date = row[6].strftime('%Y-%m-%d %H:%M:%S') author_name, author_email = self.get_user_identity(author_id) if author_id != committer_id: committer_name, committer_email = self.get_user_identity( committer_id) else: committer_name = author_name committer_email = author_email author = {'name': author_name, 'email': author_email} committer = {'name': committer_name, 'email': committer_email} commits.append({ 'sha': sha, 'author': author, 'committer': committer, 'message': message, 'ref': ref, 'authored_date': authored_date, 'committed_date': committed_date }) row = cursor.fetchone() cursor.close() return commits def get_status_file(self, file_id): cursor = self._cnx.cursor() query = "SELECT fm.status, MAX(c.committed_date) AS last_modification " \ "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN commit c ON c.id = cin.commit_id " \ "WHERE f.id = %s" arguments = [file_id] cursor.execute(query, arguments) row = cursor.fetchone() cursor.close() status = "" last_modification = "" if row: status = row[0] last_modification = row[1].strftime('%Y-%m-%d %H:%M:%S') return {'status': status, 'last_modification': last_modification} def add_file_info_to_json(self, references, repo_json): cursor = self._cnx.cursor() query = "SELECT f.id, f.name, f.ext, r.name, r.id " \ "FROM repository repo JOIN commit_in_reference cin ON repo.id = cin.repo_id " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN file f ON f.id = fm.file_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "WHERE repo.id = %s AND r.name IN (" + ",".join(["'" + ref + "'" for ref in references]) + ") AND " \ "f.id NOT IN " \ "(SELECT deletions.file_id FROM " \ "(SELECT fm.file_id, c.committed_date " \ "FROM commit_in_reference cin " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE fm.status = 'deleted' AND cin.repo_id = %s ) as deletions " \ "JOIN " \ "(SELECT fm.file_id, max(c.committed_date) as committed_date " \ "FROM commit_in_reference cin " \ "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "JOIN commit c ON c.id = fm.commit_id " \ "WHERE fm.status <> 'deleted' AND cin.repo_id = %s " \ "GROUP BY fm.file_id) AS last_action " \ "ON deletions.file_id = last_action.file_id " \ "WHERE deletions.committed_date > last_action.committed_date " \ "UNION " \ "SELECT fr.previous_file_id " \ "FROM file_renamed fr JOIN file f ON fr.previous_file_id = f.id " \ "JOIN file_modification fm ON fm.file_id = f.id " \ "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \ "JOIN reference r ON r.id = cin.ref_id " \ "WHERE cin.repo_id = %s) " \ "GROUP BY f.id, r.id" arguments = [ self._repo_id, self._repo_id, self._repo_id, self._repo_id ] cursor.execute(query, arguments) row = cursor.fetchone() while row: file_id = row[0] file_name = row[1] file_ext = row[2] ref_name = row[3] ref_id = row[4] status = self.get_status_file(file_id) file_history = self._file_util.get_file_history_by_id( file_id, ref_id) file_ids = list(set([h.get("file_id") for h in file_history])) commits = self.get_commits_info(file_ids) directories = self._file_util.get_directories(file_name) changes_info = self.get_changes_for_file(file_ids) file_info = { 'repo': self._repo_name, 'info': status, 'commits': commits, 'ref': ref_name, 'id': str(file_id), 'name': file_name.split('/')[-1], 'ext': file_ext, 'dirs': directories, 'file_changes': changes_info } repo_json.write(json.dumps(file_info) + "\n") row = cursor.fetchone() cursor.close() def export(self, repo_name, references, file_path): """ exports the file data to JSON format :type repo_name: str :param repo_name: name of the repository to analyse :type references: list str :param references: list of references to analyse :type file_path: str :param file_path: the path where to export the file information """ try: self._logger.info("FileJSONExporter started") start_time = datetime.now() repo_json = codecs.open(file_path, 'w', "utf-8") self._repo_name = repo_name self._repo_id = self._db_util.select_repo_id( self._cnx, repo_name, self._logger) self.add_file_info_to_json(references, repo_json) repo_json.close() self._db_util.close_connection(self._cnx) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("FileJSONExporter: process finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("FileJSONExporter failed", exc_info=True)