class DslUtil():
    """
    This class provides utilities for the Domain Specific Languages used in the export processes
    """
    def __init__(self):
        self.db_util = DbUtil()

    def find_entity_id(self, cnx, type, name, logger):
        """
        finds id of a entity  stored in the DB (project, repository, issue tracker, forum or instant messaging)

        :type cnx: Object
        :param cnx: DB connection

        :type type: str
        :param type: type of the entity

        :type name: str
        :param name: name of the entity

        :type logger: Object
        :param logger: logger
        """
        found = None

        if type == "project":
            found = self.db_util.select_project_id(cnx, name, logger)
        elif type == "repo":
            found = self.db_util.select_repo_id(cnx, name, logger)
        elif type == "issuetracker":
            found = self.db_util.select_issue_tracker_id(cnx, name, logger)
        elif type == "forum":
            found = self.db_util.select_forum_id(cnx, name, logger)
        elif type == "instantmessaging":
            found = self.db_util.select_instant_messaging_id(cnx, name, logger)

        if not found:
            logger.error("DslUtil: entity " + str(type) + " with name " +
                         str(name) + " not found!")

        return found
示例#2
0
class GitDao():
    """
    This class handles the persistence and retrieval of Git data
    """
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        try:
            self._config = config
            self._logger = logger
            self._db_util = DbUtil()
            self._cnx = self._db_util.get_connection(self._config)
        except:
            self._logger.error("GitDao init failed")
            raise

    def check_connection_alive(self):
        try:
            cursor = self._cnx.cursor()
            cursor.execute("SELECT VERSION()")
            results = cursor.fetchone()
            ver = results[0]
            cursor.close()
            if not ver:
                self._cnx = self._db_util.restart_connection(
                    self._config, self._logger)
        except:
            self._cnx = self._db_util.restart_connection(
                self._config, self._logger)

    def close_connection(self):
        self._db_util.close_connection(self._cnx)

    def restart_connection(self):
        self._cnx = self._db_util.restart_connection(self._config,
                                                     self._logger)

    def get_connection(self):
        return self._cnx

    def get_cursor(self):
        return self._cnx.cursor()

    def close_cursor(self, cursor):
        return cursor.close()

    def fetchone(self, cursor):
        return cursor.fetchone()

    def execute(self, cursor, query, arguments):
        cursor.execute(query, arguments)

    def array2string(self, array):
        return ','.join(str(x) for x in array)

    def line_detail_table_is_empty(self, repo_id):
        """
        checks line detail table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN file_modification fm ON c.id = fm.commit_id " \
                "JOIN line_detail l ON fm.id = l.file_modification_id " \
                "WHERE l.content IS NOT NULL AND repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def file_modification_patch_is_empty(self, repo_id):
        """
        checks patch column in file modification table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN file_modification fm ON c.id = fm.commit_id " \
                "WHERE patch IS NOT NULL and repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def get_last_commit_id(self, repo_id):
        """
        gets last commit id

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT MAX(id) as last_commit_id " \
                "FROM commit c " \
                "WHERE repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        if row:
            found = row[0]

        cursor.close()
        return found

    def select_repo_id(self, repo_name):
        """
        selects id of a repository by its name

        :type repo_name: str
        :param repo_name: name of an existing repository in the DB
        """
        return self._db_util.select_repo_id(self._cnx, repo_name, self._logger)

    def insert_repo(self, project_id, repo_name):
        """
        inserts repository to DB

        :type project_id: int
        :param project_id: id of an existing project in the DB

        :type repo_name: str
        :param repo_name: name of a repository to insert
        """
        return self._db_util.insert_repo(self._cnx, project_id, repo_name,
                                         self._logger)

    def select_project_id(self, project_name):
        """
        selects id of a project by its name

        :type project_name: str
        :param project_name: name of an existing project in the DB
        """
        return self._db_util.select_project_id(self._cnx, project_name,
                                               self._logger)

    def get_user_id(self, user_name, user_email):
        """
        gets id of a user

        :type user_name: str
        :param user_name: name of the user

        :type user_email: str
        :param user_email: email of the user
        """

        if not user_email and not user_name:
            user_name = "uknonwn_user"
            user_email = "uknonwn_user"

        if user_email:
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)
        else:
            user_id = self._db_util.select_user_id_by_name(
                self._cnx, user_name, self._logger)

        if not user_id:
            self._db_util.insert_user(self._cnx, user_name, user_email,
                                      self._logger)
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)

        return user_id

    def insert_commit_parents(self, parents, commit_id, sha, repo_id):
        """
        inserts commit parents to DB, one by one

        :type parents: list of Object
        :param parents: parents of a commit

        :type commit_id: int
        :param commit_id: id of the commit

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        for parent in parents:
            parent_id = self.select_commit_id(parent.hexsha, repo_id)

            if not parent_id:
                self._logger.warning(
                    "parent commit id not found! SHA parent " +
                    str(parent.hexsha))

            query = "INSERT IGNORE INTO commit_parent " \
                    "VALUES (%s, %s, %s, %s, %s)"

            if parent_id:
                arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha]
            else:
                arguments = [repo_id, commit_id, sha, None, parent.hexsha]

            cursor.execute(query, arguments)
            self._cnx.commit()

        cursor.close()

    def insert_all_commit_parents(self, parents, commit_id, sha, repo_id):
        """
        inserts commit parents to DB all together

        :type parents: list of Object
        :param parents: parents of a commit

        :type commit_id: int
        :param commit_id: id of the commit

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        to_insert = []
        for parent in parents:
            parent_id = self.select_commit_id(parent.hexsha, repo_id)

            if not parent_id:
                self._logger.warning(
                    "parent commit id not found! SHA parent " +
                    str(parent.hexsha))

            if parent_id:
                to_insert.append(
                    (repo_id, commit_id, sha, parent_id, parent.hexsha))
            else:
                to_insert.append(
                    (repo_id, commit_id, sha, None, parent.hexsha))

        if to_insert:
            cursor = self._cnx.cursor()
            query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) " \
                    "VALUES (%s, %s, %s, %s, %s)"
            cursor.executemany(query, [i for i in to_insert])
            self._cnx.commit()
            cursor.close()

    def insert_commits_in_reference(self, commits_data):
        """
        inserts commits to DB all together

        :type commits_data: list of Object
        :param commits_data: commit data
        """
        if commits_data:
            cursor = self._cnx.cursor()
            query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)"
            cursor.executemany(query, commits_data)
            self._cnx.commit()
            cursor.close()

    def insert_commit_in_reference(self, repo_id, commit_id, ref_id):
        """
        inserts commit to DB

        :type repo_id: int
        :param repo_id: id of the repository

        :type commit_id: int
        :param commit_id: id of the commit

        :type ref_id: int
        :param ref_id: id of the reference
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO commit_in_reference " \
                "VALUES (%s, %s, %s)"
        arguments = [repo_id, commit_id, ref_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_line_details(self, file_modification_id, detail):
        """
        inserts line details to DB

        :type file_modification_id: int
        :param file_modification_id: id of the file modification

        :type detail: str
        :param detail: line content
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO line_detail " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"

        arguments = [
            file_modification_id, detail[0], detail[1], detail[2], detail[3],
            detail[4], detail[5]
        ]

        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_file_modification_id(self, commit_id, file_id):
        """
        selects file modification id

        :type commit_id: int
        :param commit_id: id of the commit

        :type file_id: int
        :param file_id: id of the file
        """
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM file_modification " \
                "WHERE commit_id = %s AND file_id = %s"
        arguments = [commit_id, file_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]

        cursor.close()
        return found

    def insert_file_modification(self, commit_id, file_id, status, additions,
                                 deletions, changes, patch_content):
        """
        inserts file modification to DB

        :type commit_id: int
        :param commit_id: id of the commit

        :type file_id: int
        :param file_id: id of the file

        :type status: str
        :param status: type of the modification

        :type additions: int
        :param additions: number of additions

        :type deletions: int
        :param deletions: number of deletions

        :type changes: int
        :param changes: number of changes

        :type patch_content: str
        :param patch_content: content of the patch
        """
        # 4194304 is the max statement size for MySQL by default.
        if patch_content != None and len(patch_content) > 4194304:
            self._logger.error('Patch content for commit (' + str(commit_id) +
                               ') is too large, not persisted.')
            patch_content = None

        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO file_modification " \
                "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            commit_id, file_id, status, additions, deletions, changes,
            patch_content
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_file_renamed(self, repo_id, current_file_id, previous_file_id,
                            file_modification_id):
        """
        inserts file renamed information

        :type repo_id: int
        :param repo_id: id of the repository

        :type current_file_id: int
        :param current_file_id: id of the renamed file

        :type previous_file_id: int
        :param previous_file_id: id of the file before renaming

        :type file_modification_id: int
        :param file_modification_id: id of the file modification
        """
        cursor = self._cnx.cursor()

        query = "INSERT IGNORE INTO file_renamed " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [
            repo_id, current_file_id, previous_file_id, file_modification_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_file(self, repo_id, name, ext=None):
        """
        inserts file

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)

        :type ext: str
        :param ext: extension of the file
        """
        if name == None:
            return

        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO file " \
                "VALUES (%s, %s, %s, %s)"

        # extract file extension from file path if not passed
        if not ext:
            ext = name.split('.')[-1]

        arguments = [None, repo_id, name, ext]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_file_id_before_date(self, repo_id, name, before_date):
        """
        selects id of the file before date

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)

        :type before_date: timestamp
        :param before_date: date
        """
        cursor = self._cnx.cursor()
        query = "SELECT DISTINCT f.id " \
                "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \
                "JOIN commit c ON c.id = fm.commit_id " \
                "WHERE f.name = %s AND f.repo_id = %s AND fm.status = 'added' " \
                "AND c.authored_date <= '" + str(before_date) + "' "
        arguments = [name, repo_id]
        cursor.execute(query, arguments)
        try:
            id = cursor.fetchone()[0]
        except:
            id = None
        cursor.close()
        return id

    def select_file_id(self, repo_id, name):
        """
        selects id of the file

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)
        """
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM file " \
                "WHERE name = %s AND repo_id = %s"
        arguments = [name, repo_id]
        cursor.execute(query, arguments)
        try:
            id = cursor.fetchone()[0]
        except:
            id = None
        cursor.close()
        return id

    def insert_reference(self, repo_id, ref_name, ref_type):
        """
        inserts reference

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_name: str
        :param ref_name: name of the reference

        :type ref_type: str
        :param ref_type: type of the reference (branch or tag)
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO reference " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [None, repo_id, ref_name, ref_type]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_reference_name(self, repo_id, ref_id):
        """
        selects reference name by its id

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_id: int
        :param ref_id: id of the reference
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT name " \
                "FROM reference " \
                "WHERE id = %s and repo_id = %s"
        arguments = [ref_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()

        return found

    def select_reference_id(self, repo_id, ref_name):
        """
        selects reference id by its name

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_name: str
        :param ref_name: name of the reference
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM reference " \
                "WHERE name = %s and repo_id = %s"
        arguments = [ref_name, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        if row:
            found = row[0]

        cursor.close()
        return found

    def insert_commit(self, repo_id, sha, message, author_id, committer_id,
                      authored_date, committed_date, size):
        """
        inserts commit to DB

        :type repo_id: int
        :param repo_id: id of the repository

        :type sha: str
        :param sha: SHA of the commit

        :type message: str
        :param message: message of the commit

        :type author_id: int
        :param author_id: author id of the commit

        :type committer_id: int
        :param committer_id: committer id of the commit

        :type authored_date: str
        :param authored_date: authored date of the commit

        :type committed_date: str
        :param committed_date: committed date of the commit

        :type size: int
        :param size: size of the commit
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO commit " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, repo_id, sha,
            message.strip(), author_id, committer_id, authored_date,
            committed_date, size
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def update_commit_parent(self, parent_id, parent_sha, repo_id):
        """
        inserts commit parent to DB

        :type parent_id: int
        :param parent_id: id of the commit parent

        :type parent_sha: str
        :param parent_sha: SHA of the commit parent

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        query_update = "UPDATE commit_parent " \
                       "SET parent_id = %s " \
                       "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s "
        arguments = [parent_id, parent_sha, repo_id]
        cursor.execute(query_update, arguments)
        self._cnx.commit()
        cursor.close()

    def fix_commit_parent_table(self, repo_id):
        """
        checks for missing commit parent information and fixes it

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        query_select = "SELECT parent_sha " \
                       "FROM commit_parent " \
                       "WHERE parent_id IS NULL AND repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query_select, arguments)
        row = cursor.fetchone()
        while row:
            parent_sha = row[0]
            parent_id = self.select_commit_id(parent_sha, repo_id)
            self.update_commit_parent(parent_id, parent_sha, repo_id)
            row = cursor.fetchone()
        cursor.close()

    def select_commit_id(self, sha, repo_id):
        """
        selects id of a commit by its SHA

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM commit " \
                "WHERE sha = %s AND repo_id = %s"
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_commit_id_before_date(self, sha, repo_id, before_date):
        """
        selects id of a commit by its SHA before a given date

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository

        :type before_date: timestamp
        :param before_date: date
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM commit " \
                "WHERE sha = %s AND repo_id = %s AND authored_date <= '" + str(before_date) + "' "
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_all_developer_ids(self, repo_id):
        """
        selects all developers (committers or authors) of a given repo

        :type repo_id: int
        :param repo_id: id of the repository
        """
        user_ids = []
        cursor = self._cnx.cursor()
        query = "SELECT c.author_id " \
                "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \
                "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL " \
                "UNION " \
                "SELECT c.committer_id " \
                "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \
                "WHERE repo_id = %s AND u.name IS NOT NULL AND u.email IS NOT NULL "
        arguments = [repo_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        while row:
            user_id = row[0]
            user_ids.append(user_id)
            row = cursor.fetchone()

        cursor.close()
        return user_ids

    def select_sha_commit_by_user(self, user_id, repo_id):
        """
        selects the SHA of the first commit (authored or committed) by a given user id

        :type user_id: int
        :param user_id: id of the user

        :type repo_id: int
        :param repo_id: id of the repository
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT sha " \
                "FROM commit " \
                "WHERE (author_id = %s OR committer_id = %s) AND repo_id = %s " \
                "LIMIT 1"
        arguments = [user_id, user_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        return found
class DependencyUtils(object):
    """
    Utilities for dependency extractor
    """
    def __init__(self, config, project_name, repo_name, repo_path, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type project_name: str
        :param project_name: the name of an existing project in the DB

        :param repo_name: repo name
        :type repo_name: str

        :param repo_path: git repo path
        :type repo_name: str

        :type logger: logger object to log messages
        :param logger: logging.Logger
        """
        try:
            self._config = config
            self._project_name = project_name
            self._repo_name = repo_name
            self._logger = logger
            self._db_util = DbUtil()
            self._cnx = self._db_util.get_connection(self._config)
            self._git_dao = GitDao(config, logger)
            self._git_querier = GitQuerier(repo_path, logger)
        except:
            self._logger.error("DB connection failure")
            raise

    def load_all_references(self, repo_id):
        """
         load all git branches and tags into database

         :param repo_id: repo id of repo name
         :type repo_id: int
        """
        for reference in self._git_querier.get_references():
            ref_name = reference[0]
            ref_type = reference[1]
            self._git_dao.insert_reference(repo_id, ref_name, ref_type)

    def insert_repository(self):
        """
        insert repository into database
        """
        project_id = self._git_dao.select_project_id(self._project_name)
        self._git_dao.insert_repo(project_id, self._repo_name)

    def insert_dependencies(self, source_to_targets):
        """
        inserts source to target file dependencies into datatable

        :param source_to_targets: source and target files mapping
        :type source_to_targets: nested list
        """
        cursor = self._cnx.cursor()

        repo_id = self._db_util.select_repo_id(self._cnx, self._repo_name,
                                               self._logger)

        self.load_all_references(repo_id)

        for ref_name, source_file, target_files in source_to_targets:

            ref_id = self._git_dao.select_reference_id(repo_id, ref_name)

            # get source fileid by creating it if not exists
            source_file_id = self._git_dao.select_file_id(repo_id, source_file)
            if not source_file_id:
                self._git_dao.insert_file(repo_id, source_file)
                source_file_id = self._git_dao.select_file_id(
                    repo_id, source_file)

            for target_file in target_files:

                # get target fileid by creating it if not exists
                target_file_id = self._git_dao.select_file_id(
                    repo_id, target_file)
                if not target_file_id:
                    self._git_dao.insert_file(repo_id, target_file)
                    target_file_id = self._git_dao.select_file_id(
                        repo_id, target_file)

                args = [repo_id, ref_id, source_file_id, target_file_id]

                query = "INSERT IGNORE INTO file_dependency VALUES (%s, %s, %s, %s)"
                cursor.execute(query, args)

        self._cnx.commit()
        cursor.close()
示例#4
0
class ActivityReportExporter():
    """
    This class handles the generation of reports
    """

    LOG_FOLDER_PATH = "logs"
    INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json')

    def __init__(self, config, db_name, log_root_path):
        """
        :type config: dict
        :param config: the DB configuration file

        :type db_name: str
        :param config: name of an existing DB

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._dsl_util = DslUtil()
        self._date_util = DateUtil()
        self._db_util = DbUtil()

        self._logging_util = LoggingUtil()
        self._log_path = log_root_path + "export-report-" + db_name + ".log"
        self._logger = self._logging_util.get_logger(self._log_path)
        self._fileHandler = self._logging_util.get_file_handler(self._logger, self._log_path, "info")

        self._db_name = db_name
        self._config = config
        self._cnx = self._db_util.get_connection(self._config)
        self._db_util.set_database(self._cnx, self._db_name)
        self._db_util.set_settings(self._cnx)

        self._chart_generator = ChartGenerator(self._cnx, self._logger)
        self._html_generator = HtmlGenerator(self._logger)

    def _create_log_folder(self, name):
        #creates the log folder
        if not os.path.exists(name):
            os.makedirs(name)

    def _create_output_file(self, filename):
        #creates the output folder
        if not os.path.exists(os.path.dirname(filename)):
            try:
                os.makedirs(os.path.dirname(filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

    def _load_report_exporter_json(self, json_path):
        #loads the JSON that drives the report export process
        with open(json_path) as json_data:
            data = json.load(json_data)

        return data.get('report')

    def _find_entity_id(self, type, name):
        #finds the id of the tools stored in the DB
        found = None

        if type == "project":
            found = self._db_util.select_project_id(self._cnx, name, self._logger)
        elif type == "repo":
            found = self._db_util.select_repo_id(self._cnx, name, self._logger)
        elif type == "issuetracker":
            found = self._db_util.select_issue_tracker_id(self._cnx, name, self._logger)
        elif type == "forum":
            found = self._db_util.select_forum_id(self._cnx, name, self._logger)
        elif type == "instantmessaging":
            found = self._db_util.select_instant_messaging_id(self._cnx, name, self._logger)

        if not found:
            self._logger.error("ReporExporter: entity " + str(type) + " with name " + str(name) + " not found!")

        return found

    def _get_parameter(self, key, parameters):
        #gets parameters of the JSON
        found = None
        if key in ["AFTERDATE", "INTERVAL"]:
            found = parameters.get(key.lower())
        else:
            if key.endswith("ID"):
                found = parameters.get(key[:-2].lower())
        if not found:
            self._logger.error("ReportExporter: parameter " + str(key) + " not found!")

        return found

    def _load_query_json(self, metric_name, parameters):
        #loads the queries in the JSON configuration file
        with open(ActivityReportExporter.INPUT_PATH) as json_data:
            data = json.load(json_data)

        metrics = data.get('queries')

        try:
            found = [m for m in metrics if m.get('name') == metric_name][0]
            query = found.get('query')

            for k in found.keys():
                if k not in ['name', 'query']:

                    k_value = str(self._get_parameter(k, parameters))

                    query = query.replace(k, k_value)

            return query
        except:
            self._logger.error("ReportExporter: metric " + str(metric_name) + " not found!")

    def _get_activity_name(self, activity):
        #gets the name of the activity
        return activity.replace("_", " ")

    def _get_activity_type(self, activity):
        #gets the type of the activity
        return activity.replace("_activity", "").replace("_", "")

    def _generate_charts(self, activity, activity_data, project_id, time_span):
        #generates charts
        entity2charts = {}
        after_date, interval = self._calculate_time_information(time_span)
        activity_type = self._get_activity_type(activity)
        names = activity_data.get('names')
        measures = activity_data.get('measures')

        for entity_name in names:
            entity_id = self._dsl_util.find_entity_id(self._cnx, activity_type, entity_name, self._logger)
            charts = []
            for measure in measures:
                query = self._load_query_json(measure, {activity_type: entity_id, 'project': project_id, 'afterdate': after_date, 'interval': interval})
                charts.append(self._chart_generator.create(query, interval.lower(), measure, time_span))

            entity2charts.update({entity_name: charts})

        return entity2charts

    def _calculate_time_information(self, time_span):
        #calculates the time span information
        start = None
        interval = None
        current_time = datetime.now() #test datetime.strptime("2015-10-10", "%Y-%m-%d")
        if time_span == "this_week":
            start = self._date_util.get_start_time_span(current_time, "week", "%Y-%m-%d")
            interval = "DAY"
        elif time_span == "this_month":
            start = self._date_util.get_start_time_span(current_time, "month", "%Y-%m-%d")
            interval = "DAY"
        elif time_span == "this_year":
            start = self._date_util.get_start_time_span(current_time, "year", "%Y-%m-%d")
            interval = "MONTH"
        else:
            self._logger.error("ReportExporter: time span " + str(time_span) + " not recognized! Options are: this_week, this_month, this_year")

        return start, interval

    def export(self, file_path, json_path):
        """
        exports the Gitana data to a report

        :type file_path: str
        :param file_path: the path where to export the report

        :type json_path: str
        :param json_path: the path of the JSON that drives the export process
        """
        try:
            self._logger.info("ReportExporter started")
            start_time = datetime.now()

            exporter_data = self._load_report_exporter_json(json_path)

            project_name = exporter_data.get('project')
            project_id = self._dsl_util.find_entity_id(self._cnx, "project", project_name, self._logger)

            time_span = exporter_data.get('time_span')

            activity2charts = {}
            for activity in [attr for attr in exporter_data.keys() if attr.endswith('activity')]:
                activity_name = self._get_activity_name(activity)
                charts = self._generate_charts(activity, exporter_data.get(activity), project_id, time_span)
                activity2charts.update({activity_name: charts})

            html_page = self._html_generator.create(project_name, activity2charts)

            with codecs.open(file_path, 'w', encoding='utf8') as f:
                f.write(html_page)

            self._db_util.close_connection(self._cnx)

            end_time = datetime.now()
            minutes_and_seconds = self._logging_util.calculate_execution_time(end_time, start_time)
            self._logger.info("ReportExporter: process finished after " + str(minutes_and_seconds[0])
                             + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs")
            self._logging_util.remove_file_handler_logger(self._logger, self._fileHandler)
        except:
            self._logger.error("ReportExporter failed", exc_info=True)
class GitHubDao():
    """
    This class handles the persistence and retrieval of GitHub issue tracker data
    """
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        try:
            self._config = config
            self._logger = logger
            self._db_util = DbUtil()
            self._cnx = self._db_util.get_connection(self._config)
        except:
            self._logger.error("GitHubDao init failed", exc_info=True)
            raise

    def select_issue_tracker_id(self, repo_id, issue_tracker_name):
        """
        gets DB issue tracker id by its name

        :type repo_id: int
        :param repo_id: id of the repository associated to the issue tracker

        :type issue_tracker_name: str
        :param issue_tracker_name: issue tracker name
        """
        return self._db_util.select_issue_tracker_id(self._cnx,
                                                     issue_tracker_name,
                                                     self._logger)

    def insert_issue_comment(self, own_id, position, type, issue_id, body,
                             votes, author_id, created_at):
        """
        inserts issue comment

        :type own_id: id
        :param own_id: data source message id

        :type position: int
        :param position: position of the comment

        :type type: str
        :param type: type of the message

        :type issue_id: id
        :param issue_id: DB issue id

        :type body: str
        :param body: body of the comment

        :type votes: int
        :param votes: votes of the comment

        :type author_id: int
        :param author_id: id of the author

        :type created_at: str
        :param created_at: creation date of the comment
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO message " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, own_id, position, type, issue_id, 0, 0, body, votes,
            author_id, created_at
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_event_type(self, name):
        """
        selects event type id by its name

        :type name: str
        :param name: name of the event
        """
        cursor = self._cnx.cursor()
        query = "SELECT id FROM issue_event_type WHERE name = %s"
        arguments = [name]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]
        cursor.close()

        return found

    def insert_issue_event(self, issue_id, event_type_id, detail, creator_id,
                           created_at, target_user_id):
        """
        inserts issue event

        :type issue_id: int
        :param issue_id: DB issue id

        :type event_type_id: int
        :param event_type_id: event type id

        :type detail: str
        :param detail: detail of the event

        :type creator_id: int
        :param creator_id: id of the creator

        :type created_at: str
        :param created_at: creation date of the event

        :type target_user_id: int
        :param target_user_id: target user id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_event " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, issue_id, event_type_id, detail, creator_id, created_at,
            target_user_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue_commit_dependency(self, issue_id, commit_id):
        """
        inserts dependency between commit and issue

        :type issue_id: int
        :param issue_id: DB issue id

        :type commit_id: int
        :param commit_id: DB commit id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_commit_dependency " \
                "VALUES (%s, %s)"
        arguments = [issue_id, commit_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_event_type(self, name):
        """
        inserts event type

        :type name: str
        :param name: event type name
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_event_type " \
                "VALUES (%s, %s)"
        arguments = [None, name]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue_tracker(self, repo_id, issue_tracker_name, type):
        """
        inserts issue tracker

        :type repo_id: int
        :param repo_id: DB repo id

        :type issue_tracker_name: str
        :param issue_tracker_name: issue tracker name

        :type type: str
        :param type: issue tracker type (github, bugzilla, etc.)
        """
        return self._db_util.insert_issue_tracker(self._cnx, repo_id,
                                                  issue_tracker_name, type,
                                                  self._logger)

    def get_already_imported_issue_ids(self, issue_tracker_id, repo_id):
        """
        gets issues already stored in DB

        :type issue_tracker_id: int
        :param issue_tracker_id: DB issue tracker id

        :type repo_id: int
        :param repo_id: DB repo id
        """
        issue_ids = []
        cursor = self._cnx.cursor()
        query = "SELECT i.own_id FROM issue i " \
                "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE issue_tracker_id = %s AND repo_id = %s " \
                "ORDER BY i.id ASC;"
        arguments = [issue_tracker_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        while row:
            own_id = int(row[0])
            issue_ids.append(own_id)
            row = cursor.fetchone()

        cursor.close()

        return issue_ids

    def get_message_type_id(self, message_type):
        """
        gets message type id

        :type message_type: str
        :param message_type: message type
        """
        return self._db_util.get_message_type_id(self._cnx, message_type)

    def insert_issue_dependency(self, issue_source_id, issue_target_id, type):
        """
        inserts dependency between issues

        :type issue_source_id: int
        :param issue_source_id: issue source id

        :type issue_target_id: int
        :param issue_target_id: issue target id

        :type type: str
        :param type: type of dependency
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_dependency " \
                "VALUES (%s, %s, %s)"
        arguments = [issue_source_id, issue_target_id, type]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_project_id(self, project_name):
        """
        selects project id by its name

        :type project_name: str
        :param project_name: name of a project
        """
        return self._db_util.select_project_id(self._cnx, project_name,
                                               self._logger)

    def select_issue_own_id(self, issue_id, issue_tracker_id, repo_id):
        """
        selects data source issue id

        :type issue_id: int
        :param issue_id: DB issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT i.own_id " \
                "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE i.id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = int(row[0])

        cursor.close()
        return found

    def get_issue_dependency_type_id(self, name):
        """
        get id of the issue dependency type

        :type name: str
        :param name: name of the dependency type
        """
        return self._db_util.get_issue_dependency_type_id(self._cnx, name)

    def select_commit(self, sha, repo_id):
        """
        gets commit by its SHA

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM commit " \
                "WHERE sha = %s AND repo_id = %s"
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_issue_id(self, issue_own_id, issue_tracker_id, repo_id):
        """
        gets issue id

        :type issue_own_id: int
        :param issue_own_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT i.id FROM issue i " \
                "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_own_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def update_issue(self, issue_id, issue_tracker_id, summary, component,
                     version, hardware, priority, severity, reference_id,
                     last_change_at):
        """
        updates an issue

        :type issue_id: int
        :param issue_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type summary: str
        :param summary: new issue description

        :type component: str
        :param component: component where the issue was found

        :type version: str
        :param version: version where the issue was found

        :type hardware: str
        :param hardware: hardware where the issue was found

        :type priority: str
        :param priority: priority of the issue

        :type severity: str
        :param severity: severity of the issue

        :type reference_id: int
        :param reference_id: id of the Git reference where the issue was found

        :type last_change_at: str
        :param last_change_at: last change date of the issue
        """
        cursor = self._cnx.cursor()
        query = "UPDATE issue " \
                "SET last_change_at = %s, summary = %s, component = %s, version = %s, hardware = %s, " \
                "priority = %s, severity = %s, reference_id = %s " \
                "WHERE own_id = %s AND issue_tracker_id = %s"
        arguments = [
            last_change_at, summary, component, version, hardware, priority,
            severity, reference_id, issue_id, issue_tracker_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue(self, issue_own_id, issue_tracker_id, summary, component,
                     version, hardware, priority, severity, reference_id,
                     user_id, created_at, last_change_at):
        """
        inserts an issue

        :type issue_own_id: int
        :param issue_own_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type summary: str
        :param summary: new issue description

        :type component: str
        :param component: component where the issue was found

        :type version: str
        :param version: version where the issue was found

        :type hardware: str
        :param hardware: hardware where the issue was found

        :type priority: str
        :param priority: priority of the issue

        :type severity: str
        :param severity: severity of the issue

        :type reference_id: int
        :param reference_id: id of the Git reference where the issue was found

        :type user_id: int
        :param user_id: issue creator id

        :type created_at: str
        :param created_at: creation date of the issue

        :type last_change_at: str
        :param last_change_at: last change date of the issue
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, issue_own_id, issue_tracker_id, summary, component, version,
            hardware, priority, severity, reference_id, user_id, created_at,
            last_change_at
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_label(self, name):
        """
        inserts a label

        :type name: str
        :param name: the name of the label
        """
        self._db_util.insert_label(self._cnx, name, self._logger)

    def select_label_id(self, name):
        """
        selects the label id by its name

        :type name: str
        :param name: the name of the label
        """
        return self._db_util.select_label_id(self._cnx, name, self._logger)

    def select_issue_comment_id(self, own_id, issue_id, created_at):
        """
        selects the id of an issue comment

        :type own_id: int
        :param own_id: data source comment id

        :type issue_id: int
        :param issue_id: DB issue id

        :type created_at: str
        :param created_at: creation date of the issue
        """
        cursor = self._cnx.cursor()
        query = "SELECT id FROM message WHERE own_id = %s AND issue_id = %s AND created_at = %s"
        arguments = [own_id, issue_id, created_at]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]
        cursor.close()

        return found

    def get_user_id(self, user_name, user_email):
        """
        selects the id of a user

        :type user_name: str
        :param user_name: name of the user

        :type user_email: str
        :param user_email: email of the user
        """

        if not user_email and not user_name:
            user_name = "uknonwn_user"
            user_email = "uknonwn_user"

        if user_name:
            user_id = self._db_util.select_user_id_by_name(
                self._cnx, user_name, self._logger)
        else:
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)

        if not user_id:
            self._db_util.insert_user(self._cnx, user_name, user_email,
                                      self._logger)
            if user_email:
                user_id = self._db_util.select_user_id_by_email(
                    self._cnx, user_email, self._logger)
            else:
                user_id = self._db_util.select_user_id_by_name(
                    self._cnx, user_name, self._logger)

        return user_id

    def insert_subscriber(self, issue_id, subscriber_id):
        """
        inserts issue subscriber

        :type issue_id: int
        :param issue_id: db issue id

        :type subscriber_id: int
        :param subscriber_id: subscriber id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_subscriber " \
                "VALUES (%s, %s)"
        arguments = [issue_id, subscriber_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_assignee(self, issue_id, assignee_id):
        """
        inserts issue assignee

        :type issue_id: int
        :param issue_id: db issue id

        :type assignee_id: int
        :param assignee_id: assignee id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_assignee " \
                "VALUES (%s, %s)"
        arguments = [issue_id, assignee_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_attachment(self, attachment_id, issue_comment_id, name, url):
        """
        inserts attachment

        :type attachment_id: int
        :param attachment_id: db attachment id

        :type issue_comment_id: int
        :param issue_comment_id: issue comment id

        :type name: str
        :param name: name of the attachment

        :type url: str
        :param url: url of the attachment
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO attachment " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, attachment_id, issue_comment_id, name, None, None, url
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def assign_label_to_issue(self, issue_id, label_id):
        """
        links label to issue

        :type issue_id: int
        :param issue_id: db issue id

        :type label_id: int
        :param label_id: label id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_labelled " \
                "VALUES (%s, %s)"
        arguments = [issue_id, label_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def find_reference_id(self, version, issue_id, repo_id):
        """
        retrieves reference id

        :type version: str
        :param version: name of the reference

        :type issue_id: int
        :param issue_id: db issue id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        if version:
            try:
                cursor = self._cnx.cursor()
                query = "SELECT id FROM reference WHERE name = %s AND repo_id = %s"
                arguments = [version, repo_id]
                cursor.execute(query, arguments)
                row = cursor.fetchone()
                if row:
                    found = row[0]
                else:
                    # sometimes the version is followed by extra information such as alpha, beta, RC, M.
                    query = "SELECT id " \
                            "FROM reference " \
                            "WHERE name LIKE '" + str(version) + "%' AND repo_id = " + str(repo_id)
                    cursor.execute(query)
                    row = cursor.fetchone()

                    if row:
                        found = row[0]

                cursor.close()
            except Exception:
                self._logger.warning("version (" + str(version) +
                                     ") not inserted for issue id: " +
                                     str(issue_id),
                                     exc_info=True)

        return found

    def select_last_change_issue(self, issue_id, issue_tracker_id, repo_id):
        """
        retrieves last change date of an issue

        :type issue_id: int
        :param issue_id: db issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None

        cursor = self._cnx.cursor()
        query = "SELECT i.last_change_at " \
                "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_repo_id(self, project_id, repo_name):
        """
        selects repository id

        :type project_id: int
        :param project_id: project id

        :type repo_name: int
        :param repo_name: repository name
        """
        return self._db_util.select_repo_id(self._cnx, repo_name, self._logger)

    def get_cursor(self):
        return self._cnx.cursor()

    def close_cursor(self, cursor):
        return cursor.close()

    def fetchone(self, cursor):
        return cursor.fetchone()

    def execute(self, cursor, query, arguments):
        cursor.execute(query, arguments)

    def close_connection(self):
        if self._cnx:
            self._db_util.close_connection(self._cnx)

    def restart_connection(self):
        self._cnx = self._db_util.restart_connection(self._config,
                                                     self._logger)
示例#6
0
class GitHubDao():
    """
    This class handles the persistence and retrieval of GitHub issue tracker data
    """
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        try:
            self._config = config
            self._logger = logger
            self._db_util = DbUtil()
            self._cnx = self._db_util.get_connection(self._config)
        except:
            self._logger.error("GitHubDao init failed", exc_info=True)
            raise

    def select_issue_tracker_id(self, repo_id, issue_tracker_name):
        """
        gets DB issue tracker id by its name

        :type repo_id: int
        :param repo_id: id of the repository associated to the issue tracker

        :type issue_tracker_name: str
        :param issue_tracker_name: issue tracker name
        """
        return self._db_util.select_issue_tracker_id(self._cnx,
                                                     issue_tracker_name,
                                                     self._logger)

    def insert_issue_comment(self, own_id, position, type, issue_id, body,
                             votes, author_id, created_at):
        """
        inserts issue comment

        :type own_id: id
        :param own_id: data source message id

        :type position: int
        :param position: position of the comment

        :type type: str
        :param type: type of the message

        :type issue_id: id
        :param issue_id: DB issue id

        :type body: str
        :param body: body of the comment

        :type votes: int
        :param votes: votes of the comment

        :type author_id: int
        :param author_id: id of the author

        :type created_at: str
        :param created_at: creation date of the comment
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO message " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, own_id, position, type, issue_id, 0, 0, 0, body, votes,
            author_id, created_at
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_proposed_commit_id(self, sha, repo_id):
        """
        selects id of a proposed commit by its SHA

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM proposed_commit " \
                "WHERE sha = %s AND repo_id = %s"
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def insert_proposed_commit(self, repo_id, sha, message, author_id,
                               committer_id, authored_date, committed_date):
        """
        inserts a proposed commit to DB

        :type repo_id: int
        :param repo_id: id of the repository

        :type sha: str
        :param sha: SHA of the commit

        :type message: str
        :param message: message of the commit

        :type author_id: int
        :param author_id: author id of the commit

        :type committer_id: int
        :param committer_id: committer id of the commit

        :type authored_date: str
        :param authored_date: authored date of the commit

        :type committed_date: str
        :param committed_date: committed date of the commit
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO proposed_commit " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, repo_id, sha,
            message.strip(), author_id, committer_id, authored_date,
            committed_date
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_pull_request_commit(self, pull_request_id, commit_id,
                                   proposed_commit_id):
        """
        inserts matched pull request commit to DB

        :type pull_request_id: int
        :param pull_request_id: pull request DB id

        :type commit_id: int
        :param commit_id: id of the commit stored in Gitana

        :type proposed_commit_id: int
        :param proposed_commit_id: id of the commit that was proposed for merge
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO pull_request_commit " \
                "VALUES (%s, %s, %s)"
        arguments = [pull_request_id, commit_id, proposed_commit_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_pull_request_review(self, comment_id, pr_id, file_id,
                                   proposed_file_id):
        """
        inserts dependency between pull request comment and a file id

        :type comment_id: int
        :param comment_id: id of the comment

        :type pr_id: int
        :param pr_id: id of the pull request

        :type file_id: id
        :param file_id: DB file id

        :type proposed_file_id: id
        :param proposed_file_id: DB proposed file id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO pull_request_review " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [comment_id, pr_id, file_id, proposed_file_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_pull_request_comment_id(self, own_id, pr_id):
        """
        gets pull request comment id

        :type own_id: id
        :param own_id: data source message id
        """
        cursor = self._cnx.cursor()
        query = "SELECT id FROM message WHERE own_id = %s AND pull_request_id = %s"
        arguments = [str(own_id), pr_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]
        cursor.close()

        return found

    def insert_pull_request_comment(self, own_id, position, type, pr_id, body,
                                    votes, author_id, created_at):
        """
        inserts pull request comment

        :type own_id: id
        :param own_id: data source message id

        :type position: int
        :param position: position of the comment

        :type type: str
        :param type: type of the message

        :type pr_id: id
        :param pr_id: DB pull request id

        :type body: str
        :param body: body of the comment

        :type votes: int
        :param votes: votes of the comment

        :type author_id: int
        :param author_id: id of the author

        :type created_at: str
        :param created_at: creation date of the comment
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO message " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, own_id, position, type, 0, 0, 0, pr_id, body, votes,
            author_id, created_at
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_event_type(self, name):
        """
        selects event type id by its name

        :type name: str
        :param name: name of the event
        """
        cursor = self._cnx.cursor()
        query = "SELECT id FROM issue_event_type WHERE name = %s"
        arguments = [name]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]
        cursor.close()

        return found

    def insert_issue_event(self, issue_id, event_type_id, detail, creator_id,
                           created_at, target_user_id):
        """
        inserts issue event

        :type issue_id: int
        :param issue_id: DB issue id

        :type event_type_id: int
        :param event_type_id: event type id

        :type detail: str
        :param detail: detail of the event

        :type creator_id: int
        :param creator_id: id of the creator

        :type created_at: str
        :param created_at: creation date of the event

        :type target_user_id: int
        :param target_user_id: target user id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_event " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, issue_id, event_type_id, detail, creator_id, created_at,
            target_user_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue_commit_dependency(self, issue_id, commit_id):
        """
        inserts dependency between commit and issue

        :type issue_id: int
        :param issue_id: DB issue id

        :type commit_id: int
        :param commit_id: DB commit id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_commit_dependency " \
                "VALUES (%s, %s)"
        arguments = [issue_id, commit_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_event_type(self, name):
        """
        inserts event type

        :type name: str
        :param name: event type name
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_event_type " \
                "VALUES (%s, %s)"
        arguments = [None, name]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue_tracker(self, repo_id, issue_tracker_name, type):
        """
        inserts issue tracker

        :type repo_id: int
        :param repo_id: DB repo id

        :type issue_tracker_name: str
        :param issue_tracker_name: issue tracker name

        :type type: str
        :param type: issue tracker type (github, bugzilla, etc.)
        """
        return self._db_util.insert_issue_tracker(self._cnx, repo_id,
                                                  issue_tracker_name, type,
                                                  self._logger)

    def get_already_imported_issue_ids(self, issue_tracker_id, repo_id):
        """
        gets issues already stored in DB

        :type issue_tracker_id: int
        :param issue_tracker_id: DB issue tracker id

        :type repo_id: int
        :param repo_id: DB repo id
        """
        issue_ids = []
        cursor = self._cnx.cursor()
        query = "SELECT i.own_id FROM issue i " \
                "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE issue_tracker_id = %s AND repo_id = %s " \
                "ORDER BY i.id ASC;"
        arguments = [issue_tracker_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        while row:
            own_id = int(row[0])
            issue_ids.append(own_id)
            row = cursor.fetchone()

        cursor.close()

        return issue_ids

    def get_pull_request_file_id_by_name(self, file_name, repo_id):
        """
        gets the file id linked to a pull request by name

        :type file_name: str
        :param file_name: name of the file commented

        :type repo_id: int
        :param repo_id: id of the repository
        """
        found = (None, None)
        cursor = self._cnx.cursor()
        query = "SELECT f.id, 'codebase' AS type " \
                "FROM file f " \
                "WHERE f.name = %s AND f.repo_id = %s " \
                "UNION " \
                "SELECT f.id, 'proposed' AS type " \
                "FROM proposed_file f " \
                "WHERE f.name = %s AND f.repo_id = %s"
        arguments = [file_name, repo_id, file_name, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = (row[0], row[1])

        cursor.close()

        return found

    def get_already_imported_pull_request_ids(self, issue_tracker_id, repo_id):
        """
        gets pull requests already stored in DB

        :type issue_tracker_id: int
        :param issue_tracker_id: DB issue tracker id

        :type repo_id: int
        :param repo_id: DB repo id
        """
        pr_ids = []
        cursor = self._cnx.cursor()
        query = "SELECT i.own_id FROM pull_request pr " \
                "JOIN issue i ON i.id = pr.issue_id " \
                "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE it.id = %s AND it.repo_id = %s " \
                "ORDER BY i.id ASC;"
        arguments = [issue_tracker_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        while row:
            own_id = row[0]
            pr_ids.append(own_id)
            row = cursor.fetchone()

        cursor.close()

        return pr_ids

    def get_message_type_id(self, message_type):
        """
        gets message type id

        :type message_type: str
        :param message_type: message type
        """
        return self._db_util.get_message_type_id(self._cnx, message_type)

    def insert_issue_dependency(self, issue_source_id, issue_target_id,
                                created_at, type):
        """
        inserts dependency between issues

        :type issue_source_id: int
        :param issue_source_id: issue source id

        :type issue_target_id: int
        :param issue_target_id: issue target id

        :type created_at: str
        :param created_at: date of the dependency creation

        :type type: str
        :param type: type of dependency
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_dependency " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [issue_source_id, issue_target_id, created_at, type]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_project_id(self, project_name):
        """
        selects project id by its name

        :type project_name: str
        :param project_name: name of a project
        """
        return self._db_util.select_project_id(self._cnx, project_name,
                                               self._logger)

    def select_issue_own_id(self, issue_id, issue_tracker_id, repo_id):
        """
        selects data source issue id

        :type issue_id: int
        :param issue_id: DB issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT i.own_id " \
                "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE i.id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = int(row[0])

        cursor.close()
        return found

    def get_issue_dependency_type_id(self, name):
        """
        get id of the issue dependency type

        :type name: str
        :param name: name of the dependency type
        """
        return self._db_util.get_issue_dependency_type_id(self._cnx, name)

    def select_commit(self, sha, repo_id):
        """
        gets commit by its SHA

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM commit " \
                "WHERE sha = %s AND repo_id = %s"
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_issue_id(self, issue_own_id, issue_tracker_id, repo_id):
        """
        gets issue id

        :type issue_own_id: int
        :param issue_own_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT i.id FROM issue i " \
                "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_own_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_pull_request_id(self, pr_own_id, issue_tracker_id):
        """
        gets pull request id

        :type pr_own_id: int
        :param pr_own_id: data source pull request id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT pr.id FROM pull_request pr JOIN issue i ON pr.issue_id = i.id " \
                "WHERE i.own_id = %s AND i.issue_tracker_id = %s"
        arguments = [pr_own_id, issue_tracker_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def update_issue(self, issue_id, issue_tracker_id, summary, component,
                     version, hardware, priority, severity, reference_id,
                     last_change_at):
        """
        updates an issue

        :type issue_id: int
        :param issue_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type summary: str
        :param summary: new issue description

        :type component: str
        :param component: component where the issue was found

        :type version: str
        :param version: version where the issue was found

        :type hardware: str
        :param hardware: hardware where the issue was found

        :type priority: str
        :param priority: priority of the issue

        :type severity: str
        :param severity: severity of the issue

        :type reference_id: int
        :param reference_id: id of the Git reference where the issue was found

        :type last_change_at: str
        :param last_change_at: last change date of the issue
        """
        cursor = self._cnx.cursor()
        query = "UPDATE issue SET last_change_at = %s, summary = %s, component = %s, version = %s, hardware = %s, priority = %s, severity = %s, reference_id = %s WHERE own_id = %s AND issue_tracker_id = %s"
        arguments = [
            last_change_at, summary, component, version, hardware, priority,
            severity, reference_id, issue_id, issue_tracker_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def update_pull_request(self, pr_own_id, issue_tracker_id, summary, status,
                            merged, last_change_at):
        """
        updates a pull request

        :type pr_own_id: int
        :param pr_own_id: data source pr id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type summary: str
        :param summary: new pull request description

        :type status: str
        :param status: new pull request status

        :type merged: bool
        :param merged: new pull request merged status

        :type last_change_at: str
        :param last_change_at: last change date of the issue
        """
        cursor = self._cnx.cursor()
        query = "UPDATE pull_request SET last_change_at = %s, summary = %s, status = %s, merged = %s WHERE own_id = %s AND issue_tracker_id = %s"
        arguments = [
            last_change_at, summary, status, merged, pr_own_id,
            issue_tracker_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_issue(self, issue_own_id, issue_tracker_id, summary, component,
                     version, hardware, priority, severity, reference_id,
                     user_id, created_at, last_change_at):
        """
        inserts an issue

        :type issue_own_id: int
        :param issue_own_id: data source issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type summary: str
        :param summary: new issue description

        :type component: str
        :param component: component where the issue was found

        :type version: str
        :param version: version where the issue was found

        :type hardware: str
        :param hardware: hardware where the issue was found

        :type priority: str
        :param priority: priority of the issue

        :type severity: str
        :param severity: severity of the issue

        :type reference_id: int
        :param reference_id: id of the Git reference where the issue was found

        :type user_id: int
        :param user_id: issue creator id

        :type created_at: str
        :param created_at: creation date of the issue

        :type last_change_at: str
        :param last_change_at: last change date of the issue
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, issue_own_id, issue_tracker_id, summary, component, version,
            hardware, priority, severity, reference_id, user_id, created_at,
            last_change_at
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_pull_request(self, pr_own_id, author_id, state, target_ref_id,
                            merged_at, merged_by, issue_tracker_id, repo_id):
        """
        inserts a pull request (note that the pull request id is equal to the issue id,
        since that in GitHub every pull request is an issue

        :type pr_own_id: int
        :param pr_own_id: data source pull request id

        :type author_id: int
        :param author_id: author id

        :type state: str
        :param state: pull request state

        :type target_ref_id: int
        :param target_ref_id: target reference

        :type merged_at: timestamp
        :param merged_at: pull request merged time

        :type merged_by: int
        :param merged_by: merger id
        """
        cursor = self._cnx.cursor()
        issue_id = self.select_issue_id(pr_own_id, issue_tracker_id, repo_id)
        query = "INSERT IGNORE INTO pull_request " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, issue_id, author_id, state, target_ref_id, merged_at,
            merged_by
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_proposed_file_id(self, repo_id, name):
        """
        selects id of the proposed file

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)
        """
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM proposed_file " \
                "WHERE name = %s AND repo_id = %s"
        arguments = [name, repo_id]
        cursor.execute(query, arguments)
        try:
            id = cursor.fetchone()[0]
        except:
            id = None
        cursor.close()
        return id

    def insert_proposed_file(self, repo_id, filename, ext):
        """
        inserts file not part of the codebase proposed in a pull request

        :type repo_id: int
        :param repo_id: id of the repository

        :type filename: str
        :param filename: name of the file

        :type ext: str
        :param ext: extension of the file
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO proposed_file " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [None, repo_id, filename, ext]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_proposed_file_modification(self, proposed_commit_id, file_id,
                                          proposed_file_id, status, additions,
                                          deletions, changes, patch_content):
        """
        inserts a proposed file modification to DB

        :type proposed_commit_id: int
        :param proposed_commit_id: id of the proposed commit

        :type file_id: int
        :param file_id: id of the file

        :type proposed_file_id: int
        :param proposed_file_id: id of the proposed file

        :type status: str
        :param status: type of the modification

        :type additions: int
        :param additions: number of additions

        :type deletions: int
        :param deletions: number of deletions

        :type changes: int
        :param changes: number of changes

        :type patch_content: str
        :param patch_content: content of the patch
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO proposed_file_modification " \
                "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            proposed_commit_id, file_id, proposed_file_id, status, additions,
            deletions, changes, patch_content
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_label(self, name):
        """
        inserts a label

        :type name: str
        :param name: the name of the label
        """
        self._db_util.insert_label(self._cnx, name, self._logger)

    def select_label_id(self, name):
        """
        selects the label id by its name

        :type name: str
        :param name: the name of the label
        """
        return self._db_util.select_label_id(self._cnx, name, self._logger)

    def select_issue_comment_id(self, own_id, issue_id, created_at):
        """
        selects the id of an issue comment

        :type own_id: int
        :param own_id: data source comment id

        :type issue_id: int
        :param issue_id: DB issue id

        :type created_at: str
        :param created_at: creation date of the issue
        """
        cursor = self._cnx.cursor()
        query = "SELECT id FROM message WHERE own_id = %s AND issue_id = %s AND created_at = %s"
        arguments = [own_id, issue_id, created_at]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]
        cursor.close()

        return found

    def get_user_id(self, user_name, user_email):
        """
        selects the id of a user

        :type user_name: str
        :param user_name: name of the user

        :type user_email: str
        :param user_email: email of the user
        """

        if user_email == None and user_name == None:
            user_name = "unknown_user"
            user_email = "unknown_user"

        if user_name:
            user_id = self._db_util.select_user_id_by_name(
                self._cnx, user_name, self._logger)
        else:
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)

        if not user_id:
            self._db_util.insert_user(self._cnx, user_name, user_email,
                                      self._logger)
            if user_email:
                user_id = self._db_util.select_user_id_by_email(
                    self._cnx, user_email, self._logger)
            else:
                user_id = self._db_util.select_user_id_by_name(
                    self._cnx, user_name, self._logger)

        return user_id

    def insert_subscriber(self, issue_id, subscriber_id):
        """
        inserts issue subscriber

        :type issue_id: int
        :param issue_id: db issue id

        :type subscriber_id: int
        :param subscriber_id: subscriber id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_subscriber " \
                "VALUES (%s, %s)"
        arguments = [issue_id, subscriber_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_assignee(self, issue_id, assignee_id):
        """
        inserts issue assignee

        :type issue_id: int
        :param issue_id: db issue id

        :type assignee_id: int
        :param assignee_id: assignee id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_assignee " \
                "VALUES (%s, %s)"
        arguments = [issue_id, assignee_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_attachment(self, attachment_id, issue_comment_id, name, url):
        """
        inserts attachment

        :type attachment_id: int
        :param attachment_id: db attachment id

        :type issue_comment_id: int
        :param issue_comment_id: issue comment id

        :type name: str
        :param name: name of the attachment

        :type url: str
        :param url: url of the attachment
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO attachment " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, attachment_id, issue_comment_id, name, None, None, url
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def assign_label_to_issue(self, issue_id, label_id):
        """
        links label to issue

        :type issue_id: int
        :param issue_id: db issue id

        :type label_id: int
        :param label_id: label id
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO issue_labelled " \
                "VALUES (%s, %s)"
        arguments = [issue_id, label_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def find_reference_id(self, version, issue_id, repo_id):
        """
        retrieves reference id

        :type version: str
        :param version: name of the reference

        :type issue_id: int
        :param issue_id: db issue id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None
        if version:
            try:
                cursor = self._cnx.cursor()
                query = "SELECT id FROM reference WHERE name = %s AND repo_id = %s"
                arguments = [version, repo_id]
                cursor.execute(query, arguments)
                row = cursor.fetchone()
                if row:
                    found = row[0]
                else:
                    #sometimes the version is followed by extra information such as alpha, beta, RC, M.
                    query = "SELECT id FROM reference WHERE name LIKE '" + str(
                        version) + "%' AND repo_id = " + str(repo_id)
                    cursor.execute(query)
                    row = cursor.fetchone()

                    if row:
                        found = row[0]

                cursor.close()
            except Exception:
                self._logger.warning("version (" + str(version) +
                                     ") not inserted for issue id: " +
                                     str(issue_id),
                                     exc_info=True)

        return found

    def select_last_change_issue(self, issue_id, issue_tracker_id, repo_id):
        """
        retrieves last change date of an issue

        :type issue_id: int
        :param issue_id: db issue id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None

        cursor = self._cnx.cursor()
        query = "SELECT i.last_change_at " \
                "FROM issue i JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [issue_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_last_change_pull_request(self, pr_id, issue_tracker_id,
                                        repo_id):
        """
        retrieves last change date of a pull request

        :type pr_id: int
        :param pr_id: db pull request id

        :type issue_tracker_id: int
        :param issue_tracker_id: issue tracker id

        :type repo_id: int
        :param repo_id: repository id
        """
        found = None

        cursor = self._cnx.cursor()
        query = "SELECT pr.last_change_at " \
                "FROM pull_request pr JOIN issue_tracker it ON pr.issue_tracker_id = it.id " \
                "WHERE own_id = %s AND issue_tracker_id = %s AND repo_id = %s"
        arguments = [pr_id, issue_tracker_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_repo_id(self, project_id, repo_name):
        """
        selects repository id

        :type project_id: int
        :param project_id: project id

        :type repo_name: int
        :param repo_name: repository name
        """
        return self._db_util.select_repo_id(self._cnx, repo_name, self._logger)

    def get_cursor(self):
        return self._cnx.cursor()

    def close_cursor(self, cursor):
        return cursor.close()

    def fetchone(self, cursor):
        return cursor.fetchone()

    def execute(self, cursor, query, arguments):
        cursor.execute(query, arguments)

    def close_connection(self):
        if self._cnx:
            self._db_util.close_connection(self._cnx)

    def restart_connection(self):
        self._cnx = self._db_util.restart_connection(self._config,
                                                     self._logger)
示例#7
0
class FileJsonExporter:
    """
    This class handles the export of file information via JSON. It allows to use a former version of
    the bus factor tool (https://github.com/SOM-Research/busfactor)
    """

    LOG_FOLDER_PATH = "logs"

    def __init__(self, config, db_name, log_root_path):
        """
        :type config: dict
        :param config: the DB configuration file

        :type db_name: str
        :param config: name of an existing DB

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._date_util = DateUtil()
        self._db_util = DbUtil()

        self._logging_util = LoggingUtil()
        self._log_path = log_root_path + "export-file-json-" + db_name
        self._logger = self._logging_util.get_logger(self._log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, self._log_path, "info")

        self._db_name = db_name
        config.update({'database': db_name})
        self._config = config

        self._cnx = self._db_util.get_connection(self._config)
        self._db_util.set_database(self._cnx, self._db_name)
        self._db_util.set_settings(self._cnx)
        self._file_util = FileUtil(self._config, self._logger)

    def get_diff_info(self, patch_content):
        if patch_content:
            first_line = patch_content.split('\n')[0]
            if re.match(r"^@@(\s|\+|\-|\d|,)+@@", first_line, re.M):
                diff_info = first_line.split("@@")[1]
            else:
                diff_info = "Binary file"
        else:
            diff_info = 'Renamed file'

        return diff_info

    def get_diff_content(self, patch_content):
        if patch_content:
            lines = patch_content.split('\n')
            if re.match(r"^@@(\s|\+|\-|\d|,)+@@", lines[0], re.M):
                first_line_content = lines[0].split("@@")[2]
                diff_content = lines[1:]
                diff_content.insert(0, first_line_content)
                diff_content = '\n'.join(diff_content)
            else:
                diff_content = "No content"
        else:
            diff_content = "No content"

        return diff_content

    def get_patch_info(self, content):
        diff_info = self.get_diff_info(content)
        diff_content = self.get_diff_content(content)
        return {
            'info': diff_info,
            'content': diff_content.decode('utf-8', 'ignore')
        }

    def get_changes_for_file(self, file_ids):
        file_modifications = []
        cursor = self._cnx.cursor()
        query = "SELECT c.author_id, c.committer_id, c.authored_date, c.committed_date, c.sha, fm.additions, fm.deletions, fm.patch " \
                "FROM file_modification fm JOIN file f ON fm.file_id = f.id  " \
                "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \
                "JOIN reference r ON r.id = cin.ref_id " \
                "JOIN commit c ON c.id = cin.commit_id " \
                "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ") " \
                "ORDER BY c.authored_date DESC"
        cursor.execute(query)
        row = cursor.fetchone()

        while row:
            author_id = row[0]
            committer_id = row[1]
            authored_date = row[2].strftime('%Y-%m-%d %H:%M:%S')
            committed_date = row[3].strftime('%Y-%m-%d %H:%M:%S')
            additions = str(row[4])
            deletions = str(row[5])
            sha = str(row[6])
            patch = str(row[7])

            patch_info = self.get_patch_info(patch)

            author_name, author_email = self.get_user_identity(author_id)

            if author_id != committer_id:
                committer_name, committer_email = self.get_user_identity(
                    committer_id)
            else:
                committer_name = author_name
                committer_email = author_email

            author = {'name': author_name, 'email': author_email}
            committer = {'name': committer_name, 'email': committer_email}

            file_modifications.append({
                'author': author,
                'authored_date': authored_date,
                'committer': committer,
                'committed_date': committed_date,
                'additions': additions,
                'deletions': deletions,
                'sha': sha,
                'patch': patch_info
            })

            row = cursor.fetchone()
        cursor.close()

        return file_modifications

    def array2string(self, array):
        return ','.join(str(x) for x in array)

    def get_user_identity(self, user_id):
        found = None

        cursor = self._cnx.cursor()
        query = "SELECT u.name, u.email " \
                "FROM user u " \
                "JOIN (SELECT IFNULL(ua.alias_id, u.id) as user_id FROM user u LEFT JOIN user_alias ua ON u.id = ua.user_id WHERE u.id = %s) as aliased " \
                "ON aliased.user_id = u.id"
        arguments = [user_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            name = row[0]
            email = row[1]
            found = (name, email)

        return found

    def get_commits_info(self, file_ids):
        commits = []
        cursor = self._cnx.cursor()
        query = "SELECT c.sha, c.message, r.name, c.author_id, c.committer_id, c.authored_date, c.committed_date " \
                "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \
                "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \
                "JOIN reference r ON r.id = cin.ref_id " \
                "JOIN commit c ON c.id = cin.commit_id " \
                "WHERE f.id IN (" + ",".join([str(id) for id in file_ids]) + ")"
        cursor.execute(query)
        row = cursor.fetchone()
        while row:
            sha = str(row[0])
            message = str(row[1].encode('utf8'))
            ref = str(row[2])
            author_id = row[3]
            committer_id = row[4]
            authored_date = row[5].strftime('%Y-%m-%d %H:%M:%S')
            committed_date = row[6].strftime('%Y-%m-%d %H:%M:%S')

            author_name, author_email = self.get_user_identity(author_id)

            if author_id != committer_id:
                committer_name, committer_email = self.get_user_identity(
                    committer_id)
            else:
                committer_name = author_name
                committer_email = author_email

            author = {'name': author_name, 'email': author_email}
            committer = {'name': committer_name, 'email': committer_email}
            commits.append({
                'sha': sha,
                'author': author,
                'committer': committer,
                'message': message,
                'ref': ref,
                'authored_date': authored_date,
                'committed_date': committed_date
            })
            row = cursor.fetchone()
        cursor.close()

        return commits

    def get_status_file(self, file_id):
        cursor = self._cnx.cursor()
        query = "SELECT fm.status, MAX(c.committed_date) AS last_modification " \
                "FROM file_modification fm JOIN file f ON fm.file_id = f.id " \
                "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \
                "JOIN commit c ON c.id = cin.commit_id " \
                "WHERE f.id = %s"
        arguments = [file_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        cursor.close()

        status = ""
        last_modification = ""
        if row:
            status = row[0]
            last_modification = row[1].strftime('%Y-%m-%d %H:%M:%S')

        return {'status': status, 'last_modification': last_modification}

    def add_file_info_to_json(self, references, repo_json):
        cursor = self._cnx.cursor()
        query = "SELECT f.id, f.name, f.ext, r.name, r.id " \
                "FROM repository repo JOIN commit_in_reference cin ON repo.id = cin.repo_id " \
                "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \
                "JOIN file f ON f.id = fm.file_id " \
                "JOIN reference r ON r.id = cin.ref_id " \
                "WHERE repo.id = %s AND r.name IN (" + ",".join(["'" + ref + "'" for ref in references]) + ") AND " \
                "f.id NOT IN " \
                            "(SELECT deletions.file_id FROM " \
                                        "(SELECT fm.file_id, c.committed_date " \
                                        "FROM commit_in_reference cin " \
                                        "JOIN file_modification fm ON fm.commit_id = cin.commit_id  " \
                                        "JOIN reference r ON r.id = cin.ref_id " \
                                        "JOIN commit c ON c.id = fm.commit_id " \
                                        "WHERE fm.status = 'deleted' AND cin.repo_id = %s ) as deletions " \
                                        "JOIN " \
                                        "(SELECT fm.file_id, max(c.committed_date) as committed_date " \
                                        "FROM commit_in_reference cin " \
                                        "JOIN file_modification fm ON fm.commit_id = cin.commit_id " \
                                        "JOIN reference r ON r.id = cin.ref_id " \
                                        "JOIN commit c ON c.id = fm.commit_id " \
                                        "WHERE fm.status <> 'deleted' AND cin.repo_id = %s " \
                                        "GROUP BY fm.file_id) AS last_action " \
                                        "ON deletions.file_id = last_action.file_id " \
                                        "WHERE deletions.committed_date > last_action.committed_date " \
                                        "UNION " \
                                        "SELECT fr.previous_file_id " \
                                        "FROM file_renamed fr JOIN file f ON fr.previous_file_id = f.id " \
                                        "JOIN file_modification fm ON fm.file_id = f.id " \
                                        "JOIN commit_in_reference cin ON cin.commit_id = fm.commit_id " \
                                        "JOIN reference r ON r.id = cin.ref_id " \
                                        "WHERE cin.repo_id = %s) " \
                "GROUP BY f.id, r.id"
        arguments = [
            self._repo_id, self._repo_id, self._repo_id, self._repo_id
        ]
        cursor.execute(query, arguments)
        row = cursor.fetchone()
        while row:
            file_id = row[0]
            file_name = row[1]
            file_ext = row[2]
            ref_name = row[3]
            ref_id = row[4]

            status = self.get_status_file(file_id)
            file_history = self._file_util.get_file_history_by_id(
                file_id, ref_id)
            file_ids = list(set([h.get("file_id") for h in file_history]))

            commits = self.get_commits_info(file_ids)
            directories = self._file_util.get_directories(file_name)
            changes_info = self.get_changes_for_file(file_ids)

            file_info = {
                'repo': self._repo_name,
                'info': status,
                'commits': commits,
                'ref': ref_name,
                'id': str(file_id),
                'name': file_name.split('/')[-1],
                'ext': file_ext,
                'dirs': directories,
                'file_changes': changes_info
            }
            repo_json.write(json.dumps(file_info) + "\n")
            row = cursor.fetchone()
        cursor.close()

    def export(self, repo_name, references, file_path):
        """
        exports the file data to JSON format

        :type repo_name: str
        :param repo_name: name of the repository to analyse

        :type references: list str
        :param references: list of references to analyse

        :type file_path: str
        :param file_path: the path where to export the file information
        """
        try:
            self._logger.info("FileJSONExporter started")
            start_time = datetime.now()

            repo_json = codecs.open(file_path, 'w', "utf-8")
            self._repo_name = repo_name
            self._repo_id = self._db_util.select_repo_id(
                self._cnx, repo_name, self._logger)
            self.add_file_info_to_json(references, repo_json)
            repo_json.close()

            self._db_util.close_connection(self._cnx)

            end_time = datetime.now()
            minutes_and_seconds = self._logging_util.calculate_execution_time(
                end_time, start_time)
            self._logger.info("FileJSONExporter: process finished after " +
                              str(minutes_and_seconds[0]) + " minutes and " +
                              str(round(minutes_and_seconds[1], 1)) + " secs")
            self._logging_util.remove_file_handler_logger(
                self._logger, self._fileHandler)
        except:
            self._logger.error("FileJSONExporter failed", exc_info=True)
示例#8
0
class GitDao():
    """
    This class handles the persistence and retrieval of Git data
    """
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        try:
            self._config = config
            self._logger = logger
            self._db_util = DbUtil()
            self._cnx = self._db_util.get_connection(self._config)
        except:
            self._logger.error("GitDao init failed")
            raise

    def check_connection_alive(self):
        try:
            cursor = self._cnx.cursor()
            cursor.execute("SELECT VERSION()")
            results = cursor.fetchone()
            ver = results[0]
            cursor.close()
            if not ver:
                self._cnx = self._db_util.restart_connection(
                    self._config, self._logger)
        except:
            self._cnx = self._db_util.restart_connection(
                self._config, self._logger)

    def close_connection(self):
        self._db_util.close_connection(self._cnx)

    def restart_connection(self):
        self._cnx = self._db_util.restart_connection(self._config,
                                                     self._logger)

    def get_connection(self):
        return self._cnx

    def get_cursor(self):
        return self._cnx.cursor()

    def close_cursor(self, cursor):
        return cursor.close()

    def fetchone(self, cursor):
        return cursor.fetchone()

    def execute(self, cursor, query, arguments):
        cursor.execute(query, arguments)

    def array2string(self, array):
        return ','.join(str(x) for x in array)

    def function_at_commit_is_empty(self, repo_id):
        """
        checks function at commit table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN function_at_commit fac ON c.id = fac.commit_id " \
                "WHERE c.repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def select_code_at_commit(self, commit_id, file_id):
        """
        retrieve the code at commit info for a given file and commit

        :type commit_id: int
        :param commit_id: id of an existing commit in the DB

        :type file_id: int
        :param file_id: id of an existing file in the DB
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT * " \
                "FROM code_at_commit " \
                "WHERE commit_id = %s AND file_id = %s"
        arguments = [commit_id, file_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        if row:
            found = row

        cursor.close()

        return found

    def line_detail_table_is_empty(self, repo_id):
        """
        checks line detail table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN file_modification fm ON c.id = fm.commit_id " \
                "JOIN line_detail l ON fm.id = l.file_modification_id " \
                "WHERE l.content IS NOT NULL AND repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def code_at_commit_is_empty(self, repo_id):
        """
        checks code at commit table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN code_at_commit cac ON c.id = cac.commit_id " \
                "WHERE c.repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def file_modification_patch_is_empty(self, repo_id):
        """
        checks patch column in file modification table is empty

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        cursor = self._cnx.cursor()
        query = "SELECT COUNT(*) " \
                "FROM commit c " \
                "JOIN file_modification fm ON c.id = fm.commit_id " \
                "WHERE patch IS NOT NULL and repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        count = 0
        if row:
            count = int(row[0])

        cursor.close()

        return int(count > 0)

    def get_last_commit_id(self, repo_id):
        """
        gets last commit id

        :type repo_id: int
        :param repo_id: id of an existing repository in the DB
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT MAX(id) as last_commit_id " \
                "FROM commit c " \
                "WHERE repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        if row:
            found = row[0]

        cursor.close()
        return found

    def select_repo_id(self, repo_name):
        """
        selects id of a repository by its name

        :type repo_name: str
        :param repo_name: name of an existing repository in the DB
        """
        return self._db_util.select_repo_id(self._cnx, repo_name, self._logger)

    def insert_repo(self, project_id, repo_name):
        """
        inserts repository to DB

        :type project_id: int
        :param project_id: id of an existing project in the DB

        :type repo_name: str
        :param repo_name: name of a repository to insert
        """
        return self._db_util.insert_repo(self._cnx, project_id, repo_name,
                                         self._logger)

    def select_project_id(self, project_name):
        """
        selects id of a project by its name

        :type project_name: str
        :param project_name: name of an existing project in the DB
        """
        return self._db_util.select_project_id(self._cnx, project_name,
                                               self._logger)

    def get_user_id(self, user_name, user_email):
        """
        gets id of a user

        :type user_name: str
        :param user_name: name of the user

        :type user_email: str
        :param user_email: email of the user
        """

        if user_email == None and user_name == None:
            user_name = "unknown_user"
            user_email = "unknown_user"

        if user_email:
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)
        else:
            user_id = self._db_util.select_user_id_by_name(
                self._cnx, user_name, self._logger)

        if not user_id:
            self._db_util.insert_user(self._cnx, user_name, user_email,
                                      self._logger)
            user_id = self._db_util.select_user_id_by_email(
                self._cnx, user_email, self._logger)

        return user_id

    def insert_commit_parents(self, parents, commit_id, sha, repo_id):
        """
        inserts commit parents to DB, one by one

        :type parents: list of Object
        :param parents: parents of a commit

        :type commit_id: int
        :param commit_id: id of the commit

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        for parent in parents:
            parent_id = self.select_commit_id(parent.hexsha, repo_id)

            if not parent_id:
                self._logger.warning(
                    "parent commit id not found! SHA parent " +
                    str(parent.hexsha))

            query = "INSERT IGNORE INTO commit_parent " \
                    "VALUES (%s, %s, %s, %s, %s)"

            if parent_id:
                arguments = [repo_id, commit_id, sha, parent_id, parent.hexsha]
            else:
                arguments = [repo_id, commit_id, sha, None, parent.hexsha]

            cursor.execute(query, arguments)
            self._cnx.commit()

        cursor.close()

    def insert_all_commit_parents(self, parents, commit_id, sha, repo_id):
        """
        inserts commit parents to DB all together

        :type parents: list of Object
        :param parents: parents of a commit

        :type commit_id: int
        :param commit_id: id of the commit

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        to_insert = []
        for parent in parents:
            parent_id = self.select_commit_id(parent.hexsha, repo_id)

            if not parent_id:
                self._logger.warning(
                    "parent commit id not found! SHA parent " +
                    str(parent.hexsha))

            if parent_id:
                to_insert.append(
                    (repo_id, commit_id, sha, parent_id, parent.hexsha))
            else:
                to_insert.append(
                    (repo_id, commit_id, sha, None, parent.hexsha))

        if to_insert:
            cursor = self._cnx.cursor()
            query = "INSERT IGNORE INTO commit_parent(repo_id, commit_id, commit_sha, parent_id, parent_sha) VALUES (%s, %s, %s, %s, %s)"
            cursor.executemany(query, [i for i in to_insert])
            self._cnx.commit()
            cursor.close()

    def insert_commits_in_reference(self, commits_data):
        """
        inserts commits to DB all together

        :type commits_data: list of Object
        :param commits_data: commit data
        """
        if commits_data:
            cursor = self._cnx.cursor()
            query = "INSERT IGNORE INTO commit_in_reference(repo_id, commit_id, ref_id) VALUES (%s, %s, %s)"
            cursor.executemany(query, commits_data)
            self._cnx.commit()
            cursor.close()

    def insert_commit_in_reference(self, repo_id, commit_id, ref_id):
        """
        inserts commit to DB

        :type repo_id: int
        :param repo_id: id of the repository

        :type commit_id: int
        :param commit_id: id of the commit

        :type ref_id: int
        :param ref_id: id of the reference
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO commit_in_reference " \
                "VALUES (%s, %s, %s)"
        arguments = [repo_id, commit_id, ref_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_line_details(self, file_modification_id, detail):
        """
        inserts line details to DB

        :type file_modification_id: int
        :param file_modification_id: id of the file modification

        :type detail: str
        :param detail: line content
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO line_detail " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s)"

        arguments = [
            file_modification_id, detail[0], detail[1], detail[2], detail[3],
            detail[4], detail[5]
        ]

        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_file_modification_id(self, commit_id, file_id):
        """
        selects file modification id

        :type commit_id: int
        :param commit_id: id of the commit

        :type file_id: int
        :param file_id: id of the file
        """
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM file_modification " \
                "WHERE commit_id = %s AND file_id = %s"
        arguments = [commit_id, file_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        found = None
        if row:
            found = row[0]

        cursor.close()
        return found

    def insert_file_modification(self, commit_id, file_id, status, additions,
                                 deletions, changes, patch_content):
        """
        inserts file modification to DB

        :type commit_id: int
        :param commit_id: id of the commit

        :type file_id: int
        :param file_id: id of the file

        :type status: str
        :param status: type of the modification

        :type additions: int
        :param additions: number of additions

        :type deletions: int
        :param deletions: number of deletions

        :type changes: int
        :param changes: number of changes

        :type patch_content: str
        :param patch_content: content of the patch
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO file_modification " \
                "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            commit_id, file_id, status, additions, deletions, changes,
            patch_content
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_file_renamed(self, repo_id, current_file_id, previous_file_id,
                            file_modification_id):
        """
        inserts file renamed information

        :type repo_id: int
        :param repo_id: id of the repository

        :type current_file_id: int
        :param current_file_id: id of the renamed file

        :type previous_file_id: int
        :param previous_file_id: id of the file before renaming

        :type file_modification_id: int
        :param file_modification_id: id of the file modification
        """
        cursor = self._cnx.cursor()

        query = "INSERT IGNORE INTO file_renamed " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [
            repo_id, current_file_id, previous_file_id, file_modification_id
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_file(self, repo_id, name, ext):
        """
        inserts file

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)

        :type ext: str
        :param ext: extension of the file
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO file " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [None, repo_id, name, ext]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_file_id(self, repo_id, name):
        """
        selects id of the file

        :type repo_id: int
        :param repo_id: id of the repository

        :type name: str
        :param name: name of the file (full path)
        """
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM file " \
                "WHERE name = %s AND repo_id = %s"
        arguments = [name, repo_id]
        cursor.execute(query, arguments)
        try:
            id = cursor.fetchone()[0]
        except:
            id = None
        cursor.close()
        return id

    def insert_reference(self, repo_id, ref_name, ref_type):
        """
        inserts reference

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_name: str
        :param ref_name: name of the reference

        :type ref_type: str
        :param ref_type: type of the reference (branch or tag)
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO reference " \
                "VALUES (%s, %s, %s, %s)"
        arguments = [None, repo_id, ref_name, ref_type]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_reference_name(self, repo_id, ref_id):
        """
        selects reference name by its id

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_id: int
        :param ref_id: id of the reference
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT name " \
                "FROM reference " \
                "WHERE id = %s and repo_id = %s"
        arguments = [ref_id, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()

        return found

    def select_reference_id(self, repo_id, ref_name):
        """
        selects reference id by its name

        :type repo_id: int
        :param repo_id: id of the repository

        :type ref_name: str
        :param ref_name: name of the reference
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM reference " \
                "WHERE name = %s and repo_id = %s"
        arguments = [ref_name, repo_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()
        if row:
            found = row[0]

        cursor.close()
        return found

    def insert_commit(self, repo_id, sha, message, author_id, committer_id,
                      authored_date, committed_date, size):
        """
        inserts commit to DB

        :type repo_id: int
        :param repo_id: id of the repository

        :type sha: str
        :param sha: SHA of the commit

        :type message: str
        :param message: message of the commit

        :type author_id: int
        :param author_id: author id of the commit

        :type committer_id: int
        :param committer_id: committer id of the commit

        :type authored_date: str
        :param authored_date: authored date of the commit

        :type committed_date: str
        :param committed_date: committed date of the commit

        :type size: int
        :param size: size of the commit
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO commit " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, repo_id, sha,
            message.strip(), author_id, committer_id, authored_date,
            committed_date, size
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def select_function_id(self, file_id, start_line, end_line):
        """
        select DB function id

        :type file_id: int
        :param file_id: id of the file

        :type start_line: int
        :param start_line: line number where the function starts

        :type end_line: int
        :param end_line: line number where the function ends
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM function " \
                "WHERE file_id = %s AND start_line = %s AND end_line = %s"
        arguments = [file_id, start_line, end_line]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()

        return found

    def insert_code_at_commit(self, commit_id, file_id, ccn, loc, comments,
                              blanks, funs, tokens, avg_ccn, avg_loc,
                              avg_tokens):
        """
        inserts to DB code information of a file at a given commit

        :type commit_id: int
        :param commit_id: id of the commit

        :type file_id: int
        :param file_id: id of the file

        :type ccn: int
        :param ccn: cyclomatic complexity value

        :type loc: int
        :param loc: lines of code

        :type comments: int
        :param comments: commented lines

        :type blanks: int
        :param blanks: blank lines

        :type funs: int
        :param funs: number of functions

        :type tokens: int
        :param tokens: tokens in the files

        :type avg_ccn: int
        :param avg_ccn: file avg cyclomatic complexity (per function)

        :type avg_loc: int
        :param avg_loc: file avg lines of code (per function)

        :type avg_tokens: int
        :param avg_tokens: file avg number of tokens (per function)
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO code_at_commit " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            commit_id, file_id, ccn, loc, comments, blanks, funs, tokens,
            avg_ccn, avg_loc, avg_tokens
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_function(self, function_name, file_id, num_arguments, loc,
                        tokens, total_lines, ccn, start_line, end_line):
        """
        inserts function to DB

        :type function_name: str
        :param function_name: name of the function

        :type file_id: int
        :param file_id: id of the file

        :type num_arguments: int
        :param num_arguments: number of arguments

        :type loc: int
        :param loc: lines of code

        :type tokens: int
        :param tokens: tokens in the function

        :type total_lines: int
        :param total_lines: number of lines (loc + comments + empty lines)

        :type ccn: int
        :param ccn: cyclomatic complexity of the function

        :type start_line: int
        :param start_line: line number where the function starts

        :type end_line: int
        :param end_line: line number where the function ends
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO function " \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        arguments = [
            None, function_name, file_id, num_arguments, loc, tokens,
            total_lines, ccn, start_line, end_line
        ]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def insert_function_at_commit(self, fun_id, commit_id):
        """
        inserts link between a function and the commit

        :type fun_id: int
        :param fun_id: id of the function

        :type commit_id: int
        :param commit_id: id of the commit
        """
        cursor = self._cnx.cursor()
        query = "INSERT IGNORE INTO function_at_commit " \
                "VALUES (%s, %s)"
        arguments = [commit_id, fun_id]
        cursor.execute(query, arguments)
        self._cnx.commit()
        cursor.close()

    def update_commit_parent(self, parent_id, parent_sha, repo_id):
        """
        inserts commit parent to DB

        :type parent_id: int
        :param parent_id: id of the commit parent

        :type parent_sha: str
        :param parent_sha: SHA of the commit parent

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        query_update = "UPDATE commit_parent " \
                       "SET parent_id = %s " \
                       "WHERE parent_id IS NULL AND parent_sha = %s AND repo_id = %s "
        arguments = [parent_id, parent_sha, repo_id]
        cursor.execute(query_update, arguments)
        self._cnx.commit()
        cursor.close()

    def fix_commit_parent_table(self, repo_id):
        """
        checks for missing commit parent information and fixes it

        :type repo_id: int
        :param repo_id: id of the repository
        """
        cursor = self._cnx.cursor()
        query_select = "SELECT parent_sha " \
                       "FROM commit_parent " \
                       "WHERE parent_id IS NULL AND repo_id = %s"
        arguments = [repo_id]
        cursor.execute(query_select, arguments)
        row = cursor.fetchone()
        while row:
            parent_sha = row[0]
            parent_id = self.select_commit_id(parent_sha, repo_id)
            self.update_commit_parent(parent_id, parent_sha, repo_id)
            row = cursor.fetchone()
        cursor.close()

    def select_commit_id(self, sha, repo_id):
        """
        selects id of a commit by its SHA

        :type sha: str
        :param sha: SHA of the commit

        :type repo_id: int
        :param repo_id: id of the repository
        """
        found = None
        cursor = self._cnx.cursor()
        query = "SELECT id " \
                "FROM commit " \
                "WHERE sha = %s AND repo_id = %s"
        arguments = [sha, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        cursor.close()
        return found

    def select_all_developer_ids(self, repo_id):
        """
        selects all developers (committers or authors) of a given repo

        :type repo_id: int
        :param repo_id: id of the repository
        """
        user_ids = []
        cursor = self._cnx.cursor()
        query = "SELECT c.author_id " \
                "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.author_id " \
                "WHERE repo_id = %s " \
                "UNION " \
                "SELECT c.committer_id " \
                "FROM commit c JOIN repository r ON c.repo_id = r.id JOIN user u ON u.id = c.committer_id " \
                "WHERE repo_id = %s "
        arguments = [repo_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        while row:
            user_id = row[0]
            user_ids.append(user_id)
            row = cursor.fetchone()

        cursor.close()
        return user_ids

    def select_sha_commit_by_user(self, user_id, repo_id, match_on="author"):
        """
        selects the SHA of the first commit (authored or committed) by a given user id

        :type user_id: int
        :param user_id: id of the user

        :type repo_id: int
        :param repo_id: id of the repository

        :type match_on: str (author or committer)
        :param match_on: define whether to perform the match on the author id or committer id
        """
        found = None
        cursor = self._cnx.cursor()

        if match_on == "committer":
            query = "SELECT sha " \
                    "FROM commit " \
                    "WHERE committer_id = %s AND repo_id = %s " \
                    "LIMIT 1"
        else:
            query = "SELECT sha " \
                    "FROM commit " \
                    "WHERE author_id = %s AND repo_id = %s " \
                    "LIMIT 1"
        arguments = [user_id, repo_id]
        cursor.execute(query, arguments)
        row = cursor.fetchone()

        if row:
            found = row[0]

        return found

    def select_file_changes(self,
                            file_id,
                            ref_id,
                            before_date=None,
                            patch=False,
                            code=False):
        """
        get all file changes, excluding renamings for a given file within a reference

        :type file_id: int
        :param file_id: id of the file

        :type ref_id: int
        :param ref_id: id of the reference

        :type before_date: str
        :param before_date: if not null, it retrieves the changes before the given date

        :type patch: bool
        :param: patch: if True, it retrieves also the patch associate to each file change

        :type code: bool
        :param: code: if True, it retrieves also code-related information of the file (ccn, loc, commented lines, etc.)
        """
        found = []

        before_date_selection = ""
        if before_date:
            before_date_selection = " AND c.authored_date <= '" + str(
                before_date) + "'"

        patch_selection = ", NULL AS patch"
        if patch:
            patch_selection = ", fm.patch"

        code_selection = ""
        code_join = ""
        if code:
            code_selection = ", " \
                             "IFNULL(cac.ccn, 0) AS ccn, " \
                             "IFNULL(cac.loc, 0) AS loc, " \
                             "IFNULL(cac.commented_lines, 0) AS commented_lines, " \
                             "IFNULL(cac.blank_lines, 0) AS blank_lines, " \
                             "IFNULL(cac.funs, 0) AS funs, " \
                             "IFNULL(cac.tokens, 0) AS tokens, " \
                             "IFNULL(cac.avg_ccn, 0) AS avg_ccn, " \
                             "IFNULL(cac.avg_loc, 0) AS avg_loc, " \
                             "IFNULL(cac.avg_tokens, 0) AS avg_tokens"
            code_join = "LEFT JOIN code_at_commit cac ON cac.commit_id = c.id AND cac.file_id = f.id "

        cursor = self._cnx.cursor()
        query = "SELECT f.id, f.name, c.sha, c.message, IFNULL(ua.alias_id, c.author_id) AS author_id, " \
                "c.authored_date, c.committed_date, fm.status, fm.additions, fm.deletions, " \
                "fm.changes" + patch_selection + code_selection + " " \
                "FROM file f join file_modification fm on f.id = fm.file_id " \
                "JOIN commit c ON c.id = fm.commit_id " \
                "JOIN commit_in_reference cin ON cin.commit_id = c.id " \
                "JOIN reference r ON r.id = cin.ref_id " \
                "LEFT JOIN user_alias ua ON ua.user_id = c.author_id " + code_join + " " \
                "WHERE f.id = %s and r.id = %s" + before_date_selection
        arguments = [file_id, ref_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        while row:
            file_id = row[0]
            file_name = row[1]
            sha = row[2]
            message = row[3]
            author_id = int(row[4])

            authored_date = row[5]
            committed_date = row[6]
            status = str(row[7])
            additions = int(row[8])
            deletions = int(row[9])
            changes = int(row[10])

            git_data = {
                'sha': sha,
                'commit_message': message,
                'author_id': author_id,
                'authored_date': authored_date,
                'committed_date': committed_date,
                'status': status,
                'additions': additions,
                'deletions': deletions,
                'changes': changes,
                'file_name': file_name,
                'file_id': file_id
            }

            patch_data = {}
            if patch:
                patch = str(row[11])
                patch_data = {'patch': patch}

            code_data = {}
            if code:
                ccn = int(row[12])
                loc = int(row[13])
                commented_lines = int(row[14])
                blank_lines = int(row[15])
                funs = int(row[16])
                tokens = int(row[17])
                avg_ccn = float(row[18])
                avg_loc = float(row[19])
                avg_tokens = float(row[20])

                code_data = {
                    'ccn': ccn,
                    'loc': loc,
                    'commented_lines': commented_lines,
                    'blank_lines': blank_lines,
                    'funs': funs,
                    'tokens': tokens,
                    'avg_ccn': avg_ccn,
                    'avg_loc': avg_loc,
                    'avg_tokens': avg_tokens
                }

            found.append(
                dict(git_data.items() + patch_data.items() +
                     code_data.items()))
            row = cursor.fetchone()

        cursor.close()

        return found

    def select_file_renamings(self, file_id, ref_id):
        """
        get file renamings for a given file within a reference

        :type file_id: int
        :param file_id: id of the file

        :type ref_id: int
        :param ref_id: id of the reference
        """
        renamings = []
        cursor = self._cnx.cursor()
        query = "SELECT fr.current_file_id, fr.previous_file_id, c.authored_date, c.authored_date, c.committed_date " \
                "FROM file f JOIN file_modification fm ON f.id = fm.file_id " \
                "JOIN commit c ON c.id = fm.commit_id " \
                "JOIN commit_in_reference cin ON cin.commit_id = c.id " \
                "JOIN reference r ON r.id = cin.ref_id " \
                "JOIN file_renamed fr ON fr.file_modification_id = fm.id AND fr.current_file_id = f.id " \
                "WHERE f.id = %s and r.id = %s AND fm.status = 'renamed';"
        arguments = [file_id, ref_id]
        cursor.execute(query, arguments)

        row = cursor.fetchone()

        while row:
            renamings.append(row[1])
            row = cursor.fetchone()

        cursor.close()

        if len(renamings) > 1:
            self._logger.warning("The file with id " + str(file_id) +
                                 " has multiple renamings in reference " +
                                 str(ref_id) + "!")

        return renamings