Пример #1
0
def GetPersonIdentifiers(identities_db, upeople_id):
    """ Get people, company and country information """
    res = None
    q = """
        SELECT pro.uuid, pro.name, pro.email, cou.name as country,
               org.name as affiliation
        FROM %s.profiles pro
        JOIN %s.enrollments enr ON enr.uuid= pro.uuid
        JOIN %s.organizations org ON org.id = enr.organization_id
        LEFT JOIN %s.countries cou ON cou.code = pro.country_code
        WHERE pro.uuid ='%s'
        """ % (identities_db, identities_db, identities_db, identities_db,
               upeople_id)
    try:
        res = ExecuteQuery(q)
    except:
        # No organizations. Just people data and country data.
        q = """
            SELECT pro.uuid, pro.name, pro.email, cou.name as country
            FROM %s.profiles pro
            LEFT JOIN %s.countries cou ON cou.code = pro.country_code
            WHERE pro.uuid ='%s'
            """ % (identities_db, identities_db, upeople_id)
        res = ExecuteQuery(q)
    return res
Пример #2
0
 def _remove_issue(cls, issue_id):
     # Backend name
     its_type = cls._get_backend().its_type
     db_ext = its_type
     if its_type == "lp": db_ext = "launchpad"
     elif its_type == "bg": db_ext = "bugzilla"
     # attachments
     q = "DELETE FROM attachments WHERE issue_id='%s'" % (issue_id)
     ExecuteQuery(q)
     # changes
     q = "DELETE FROM changes WHERE issue_id='%s'" % (issue_id)
     ExecuteQuery(q)
     # comments
     q = "DELETE FROM comments WHERE issue_id='%s'" % (issue_id)
     ExecuteQuery(q)
     # related_to
     q = "DELETE FROM related_to WHERE issue_id='%s'" % (issue_id)
     ExecuteQuery(q)
     # issues_ext_bugzilla
     q = "DELETE FROM issues_ext_%s WHERE issue_id='%s'" % (db_ext,
                                                            issue_id)
     ExecuteQuery(q)
     # issues_log_bugzilla
     q = "DELETE FROM issues_log_%s WHERE issue_id='%s'" % (db_ext,
                                                            issue_id)
     ExecuteQuery(q)
     # issues_watchers
     q = "DELETE FROM issues_watchers WHERE issue_id='%s'" % (issue_id)
     ExecuteQuery(q)
     # issues
     q = "DELETE FROM issues WHERE id='%s'" % (issue_id)
     ExecuteQuery(q)
Пример #3
0
 def _remove_scmlog(scmlog_id):
     # Get actions and remove mappings
     q = "SELECT * from actions where commit_id='%s'" % (scmlog_id)
     res = ExecuteQuery(q)
     if 'id' in res:
         if not isinstance(res['id'], list): res['id'] = [res['id']]
         for action_id in res['id']:
             # action_files is a view
             # q = "DELETE FROM action_files WHERE action_id='%s'" % (action_id)
             # ExecuteQuery(q)
             q = "DELETE FROM file_copies WHERE action_id='%s'" % (
                 action_id)
             ExecuteQuery(q)
     # actions_file_names is a VIEW
     # q = "DELETE FROM actions_file_names WHERE commit_id='%s'" % (scmlog_id)
     # ExecuteQuery(q)
     q = "DELETE FROM commits_lines WHERE commit_id='%s'" % (scmlog_id)
     ExecuteQuery(q)
     q = "DELETE FROM file_links WHERE commit_id='%s'" % (scmlog_id)
     ExecuteQuery(q)
     q = "SELECT tag_id from tag_revisions WHERE commit_id='%s'" % (
         scmlog_id)
     res = ExecuteQuery(q)
     for tag_id in res['tag_id']:
         q = "DELETE FROM tags WHERE id='%s'" % (tag_id)
         ExecuteQuery(q)
         q = "DELETE FROM tag_revisions WHERE tag_id='%s'" % (tag_id)
         ExecuteQuery(q)
     q = "DELETE FROM scmlog WHERE id='%s'" % (scmlog_id)
     ExecuteQuery(q)
Пример #4
0
    def get_url():
        """Get the URL from which the data source was gathered"""

        q = "SELECT url, name as type FROM trackers t JOIN "+\
            "supported_trackers s ON t.type = s.id limit 1"

        return (ExecuteQuery(q))
Пример #5
0
 def verboseThread(self):
     # TODO: at some point these numbers should be calculated when
     # retrieving the initial list of message_id, is_response_of values
     # Returns the most verbose thread (the biggest emails)
     if self.verbose == None:
         # variable was not initialize
         self.verbose = ""
         current_len = 0
         # iterating through the root messages
         for message_id in self.threads.keys():
             total_len_bodies = 0  # len of all of the body messages
             # iterating through each of the messages of the thread
             for msg in self.threads[message_id]:
                 query = """
                         select length(message_body) as length
                         from messages
                         where message_ID = '%s'
                         """ % (msg)
                 result = ExecuteQuery(query)
                 length = int(result["length"])
                 total_len_bodies = total_len_bodies + length
                 if total_len_bodies > current_len:
                     # New bigger thread found
                     self.verbose = message_id
                     current_len = total_len_bodies
     return Email(self.verbose, self.i_db)
Пример #6
0
 def get_date_init(startdate=None,
                   enddate=None,
                   identities_db=None,
                   type_analysis=None):
     """Get the date of the first activity in the data source"""
     q = "SELECT DATE_FORMAT (MIN(created_on), '%Y-%m-%d') AS first_date FROM projects"
     return (ExecuteQuery(q))
Пример #7
0
 def get_date_end(startdate=None,
                  enddate=None,
                  identities_db=None,
                  type_analysis=None):
     """Get the date of the last activity in the data source"""
     q = "SELECT DATE_FORMAT (MAX(date),'%Y-%m-%d') as last_date FROM repositories_log"
     return (ExecuteQuery(q))
Пример #8
0
def GetPeopleStaticITS (developer_id, startdate, enddate, closed_condition) :
    ## FIXME is this function used only to calculate closed issues? if not it must be
    ## fixed
    q = GetPeopleQueryITS(developer_id, None, startdate, enddate, False, closed_condition)

    data = ExecuteQuery(q)
    return (data)
Пример #9
0
    def _init_threads(self):
        # Returns dictionary of message_id threads. Each key contains a list
        # of emails associated to that thread (not ordered).

        # Retrieving all of the messages.
        query = """
                select DISTINCT message_ID, is_response_of
                from messages
                where first_date >= %s and first_date < %s
                """ % (self.initdate, self.enddate)
        list_messages = ExecuteQuery(query)

        to_list = lambda x: [x] if type(x) not in (list, dict) else x
        self.list_message_id = to_list(list_messages["message_ID"])
        self.list_is_response_of = to_list(list_messages["is_response_of"])

        messages = {}
        for message_id in self.list_message_id:
            # Looking for messages in the thread
            index = self.list_message_id.index(message_id)

            # Only analyzing those whose is_response_of is None,
            # those are the message 'root' of each thread.
            if self.list_is_response_of[index] is None:
                messages[message_id] = self._build_threads(message_id)
                # Adding the root message to the list in first place
                messages[message_id].insert(0, message_id)

        self.threads = messages
Пример #10
0
def people():
    # List of people participating in the source code development

    q = "select id,identifier from upeople"

    data = ExecuteQuery(q)
    return (data)
Пример #11
0
def GetListPeopleIRC(startdate, enddate):
    fields = "DISTINCT(pup.uuid) as id, count(irclog.id) total"
    tables = GetTablesOwnUniqueIdsIRC()
    filters = GetFiltersOwnUniqueIdsIRC()
    filters += " AND irclog.type='COMMENT' "
    filters += " GROUP BY nick ORDER BY total desc"
    q = GetSQLGlobal('date', fields, tables, filters, startdate, enddate)
    return (ExecuteQuery(q))
Пример #12
0
    def remove_filter_data(filter_):
        uri = filter_.get_item()
        logging.info("Removing ITS filter %s %s" %
                     (filter_.get_name(), filter_.get_item()))
        q = "SELECT * from trackers WHERE url='%s'" % (uri)
        repo = ExecuteQuery(q)
        if 'id' not in repo:
            logging.error("%s not found" % (uri))
            return

        def get_people_one_repo(field):
            return """
                SELECT %s FROM (SELECT COUNT(DISTINCT(tracker_id)) AS total, %s
                FROM issues
                GROUP BY %s
                HAVING total=1) t
                """ % (field, field, field)

        logging.info("Removing people")
        ## Remove submitted_by that exists only in this repository
        q = """
            SELECT DISTINCT(submitted_by) from issues
            WHERE tracker_id='%s' AND submitted_by in (%s)
        """ % (repo['id'], get_people_one_repo("submitted_by"))
        res = ExecuteQuery(q)
        for people_id in res['submitted_by']:
            ITS._remove_people(people_id)
        ## Remove assigned_to that exists only in this repository
        q = """
            SELECT DISTINCT(assigned_to) from issues
            WHERE tracker_id='%s' AND assigned_to in (%s)
        """ % (repo['id'], get_people_one_repo("assigned_to"))
        res = ExecuteQuery(q)
        for people_id in res['assigned_to']:
            ITS._remove_people(people_id)

        # Remove people activity
        logging.info("Removing issues")
        q = "SELECT id from issues WHERE tracker_id='%s'" % (repo['id'])
        res = ExecuteQuery(q)
        for issue_id in res['id']:
            ITS._remove_issue(issue_id)
        # Remove filter
        q = "DELETE from trackers WHERE id='%s'" % (repo['id'])
        ExecuteQuery(q)
Пример #13
0
def GetListPeopleMediaWiki (startdate, enddate) :
    fields = "DISTINCT(pup.uuid) as id, count(wiki_pages_revs.id) total"
    tables = GetTablesOwnUniqueIdsMediaWiki()
    filters = GetFiltersOwnUniqueIdsMediaWiki()
    filters += " GROUP BY user ORDER BY total desc"
    q = GetSQLGlobal('date',fields,tables, filters, startdate, enddate)

    data = ExecuteQuery(q)
    return (data)
Пример #14
0
def GetListPeopleMLS(startdate, enddate):
    fields = "DISTINCT(pup.uuid) as id, count(m.message_ID) total"
    tables = GetTablesOwnUniqueIdsMLS()
    filters = GetFiltersOwnUniqueIdsMLS()
    filters += " GROUP BY id ORDER BY total desc"
    q = GetSQLGlobal('first_date', fields, tables, filters, startdate, enddate)

    data = ExecuteQuery(q)
    return (data)
Пример #15
0
def GetPeopleListITS(startdate, enddate):
    fields = "DISTINCT(pup.uuid) as pid, count(c.id) as total"
    tables = GetTablesOwnUniqueIdsITS()
    filters = GetFiltersOwnUniqueIdsITS()
    filters += " GROUP BY pid ORDER BY total desc"
    q = GetSQLGlobal('changed_on', fields, tables, filters, startdate, enddate)

    data = ExecuteQuery(q)
    return (data)
Пример #16
0
def GetPeopleListSCM(startdate, enddate):
    fields = "DISTINCT(pup.uuid) as pid, COUNT(distinct(s.id)) as total"
    tables = GetTablesOwnUniqueIdsSCM()
    filters = GetFiltersOwnUniqueIdsSCM()
    filters += " GROUP BY pid ORDER BY total desc, pid"
    q = GetSQLGlobal('s.author_date', fields, tables, filters, startdate,
                     enddate)

    data = ExecuteQuery(q)
    return (data)
Пример #17
0
def reposField():
    # Depending on the mailing list, the field to be
    # used is mailing_list or mailing_list_url
    rfield = 'mailing_list'
    sql = "select count(distinct(mailing_list)) from messages"
    mailing_lists = ExecuteQuery(sql)
    if (len(mailing_lists) == 0):
        rfield = "mailing_list_url"

    return (rfield)
Пример #18
0
 def get_date_end(startdate=None,
                  enddate=None,
                  identities_db=None,
                  type_analysis=None):
     """Get the date of the last activity in the data source"""
     q1 = "SELECT MAX(updated_on) as ru, MAX(created_on) as rc FROM releases"
     q2 = "SELECT MAX(updated_on) as pu, MAX(created_on) as pr FROM projects"
     q = "SELECT DATE_FORMAT (last_date,'%Y-%m-%d') as last_date FROM "
     q += "(SELECT GREATEST(ru, rc, pu, pr) AS last_date FROM (%s) r, (%s) p) t" % (
         q1, q2)
     return (ExecuteQuery(q))
Пример #19
0
 def get_date_end(startdate=None,
                  enddate=None,
                  identities_db=None,
                  type_analysis=None):
     """Get the date of the last activity in the data source"""
     q1 = "SELECT MAX(added_at) AS aq FROM questions"
     q2 = "SELECT MAX(submitted_on) AS sc FROM comments"
     q3 = "SELECT MAX(submitted_on) AS sa FROM answers"
     q = "SELECT DATE_FORMAT (GREATEST(aq, sc, sa), '%%Y-%%m-%%d') AS last_date FROM (%s) q, (%s) c, (%s) a" % (
         q1, q2, q3)
     return (ExecuteQuery(q))
Пример #20
0
def top_files_modified():
    # Top 10 modified files

    #FIXME: to be updated to use stardate and enddate values
    q = "select file_name, count(commit_id) as modifications "+\
        "from action_files a join files f on a.file_id = f.id  "+\
        "where action_type='M'  "+\
        "group by f.id  "+\
        "order by modifications desc limit 10; "
    data = ExecuteQuery(q)
    return (data)
Пример #21
0
    def remove_filter_data(filter_):
        uri = filter_.get_item()
        logging.info("Removing SCM filter %s %s" %
                     (filter_.get_name(), filter_.get_item()))
        q = "SELECT * from repositories WHERE uri='%s'" % (uri)
        repo = ExecuteQuery(q)
        if 'id' not in repo:
            logging.error("%s not found" % (uri))
            return
        # Remove people
        def get_people_one_repo(field):
            return """
                SELECT %s FROM (SELECT COUNT(DISTINCT(repository_id)) AS total, %s
                FROM scmlog
                GROUP BY %s
                HAVING total=1) t
                """ % (field, field, field)

        ## Remove committer_id that exists only in this repository
        q = """
            SELECT DISTINCT(committer_id) from scmlog
            WHERE repository_id='%s' AND committer_id in (%s)
        """ % (repo['id'], get_people_one_repo("committer_id"))
        res = ExecuteQuery(q)
        for people_id in res['committer_id']:
            SCM._remove_people(people_id)
        ## Remove author_id that exists only in this repository
        q = """
            SELECT DISTINCT(author_id) from scmlog
            WHERE repository_id='%s' AND author_id in (%s)
        """ % (repo['id'], get_people_one_repo("author_id"))
        res = ExecuteQuery(q)
        for people_id in res['author_id']:
            SCM._remove_people(people_id)
        # Remove people activity
        q = "SELECT id from scmlog WHERE repository_id='%s'" % (repo['id'])
        res = ExecuteQuery(q)
        for scmlog_id in res['id']:
            SCM._remove_scmlog(scmlog_id)
        # Remove files
        q = "SELECT id FROM files WHERE repository_id='%s'" % (repo['id'])
        res = ExecuteQuery(q)
        for file_id in res['id']:
            q = "DELETE FROM file_types WHERE file_id='%s'" % (file_id)
            ExecuteQuery(q)
            q = "DELETE FROM files WHERE id='%s'" % (file_id)
            ExecuteQuery(q)
        # Remove filter
        q = "DELETE from repositories WHERE id='%s'" % (repo['id'])
        ExecuteQuery(q)
Пример #22
0
def GetTopClosersByAssignee (days, startdate, enddate, identities_db, filter) :

    affiliations = ""
    for aff in filter:
        affiliations += " org.name<>'"+ aff +"' and "

    date_limit = ""
    if (days != 0 ) :
        sql = "SELECT @maxdate:=max(changed_on) from changes limit 1"
        ExecuteQuery(sql)
        date_limit = " AND DATEDIFF(@maxdate, changed_on)<"+str(days)

    q = "SELECT up.uuid as id, "+\
        "       up.identifier as closers, "+\
        "       count(distinct(ill.issue_id)) as closed "+\
        "FROM people_uidentities pup,  "+\
        "     "+ identities_db+ ".enrollments enr, "+\
        "     "+ identities_db+ ".uidentities up,  "+\
        "     "+ identities_db+ ".organizations org, "+\
        "     issues_log_launchpad ill  "+\
        "WHERE ill.assigned_to = pup.people_id and "+\
        "      pup.uuid = up.uuid and  "+\
        "      up.uuid = enr.uuid and  "+\
        "      enr.organization_id = org.id and "+\
        "      "+ affiliations+ " "+\
        "      ill.date >= enr.start and "+\
        "      ill.date < enr.end and  "+\
        "      ill.change_id  in (  "+\
        "         select id "+\
        "         from changes  "+\
        "         where new_value='Fix Committed' and "+\
        "               changed_on>="+ startdate+ " and  "+\
        "               changed_on<"+ enddate+ " "+ date_limit+") "+\
        "GROUP BY up.identifier "+\
        "ORDER BY closed desc, closers limit 10"


    data = ExecuteQuery(q)
    return (data)
Пример #23
0
def GetPeopleListSCR (startdate, enddate, bots):

    filter_bots = ""
    for bot in bots:
        filter_bots += " name<>'"+bot+"' and "

    fields = "DISTINCT(pup.uuid) as id, count(i.id) as total, name"
    tables = GetTablesOwnUniqueIdsSCR('issues') + ", people"
    filters = filter_bots
    filters += GetFiltersOwnUniqueIdsSCR('issues')+ " and people.id = pup.people_id"
    filters += " GROUP BY id ORDER BY total desc"
    q = GetSQLGlobal('submitted_on', fields, tables, filters, startdate, enddate)
    return(ExecuteQuery(q))
Пример #24
0
def GetDate(startdate, enddate, identities_db, type_analysis, type):
    # date of submmitted issues (type= max or min)
    if (type == "max"):
        fields = " DATE_FORMAT (max(date), '%Y-%m-%d') as last_date"
    else:
        fields = " DATE_FORMAT (min(date), '%Y-%m-%d') as first_date"

    tables = " irclog i " + GetIRCSQLReportFrom(identities_db, type_analysis)
    filters = GetIRCSQLReportWhere(type_analysis)

    q = BuildQuery(None, startdate, enddate, " i.date ", fields, tables,
                   filters, False)
    data = ExecuteQuery(q)
    return (data)
Пример #25
0
def GetEmailsSent(period, startdate, enddate, identities_db, type_analysis,
                  evolutionary, projects_db):
    # Generic function that counts emails sent

    if (evolutionary):
        fields = " count(distinct(m.message_ID)) as sent "
    else:
        fields = " count(distinct(m.message_ID)) as sent, "+\
                  " DATE_FORMAT (min(m.first_date), '%Y-%m-%d') as first_date, "+\
                  " DATE_FORMAT (max(m.first_date), '%Y-%m-%d') as last_date "

    tables = " messages m " + GetMLSSQLReportFrom(identities_db, type_analysis)
    filters = GetMLSSQLReportWhere(type_analysis, projects_db)

    q = BuildQuery(period, startdate, enddate, " m.first_date ", fields,
                   tables, filters, evolutionary)
    return (ExecuteQuery(q))
Пример #26
0
    def topCrowdedThread(self, numTop):
        # Returns list ordered by the longest thread

        top_threads = [
        ]  # [(message_id, number of different upeople_id), (...,...), ...]

        for thread in self.threads.values():
            # this loop counts number of different people
            # in each of the threads and provides a
            # dictionary with root message_id as each of the keys
            # and a list of upeople_id as the value.
            # Sets were considered as an option, but it implies that
            # we may find with a higher probability equal sets, what
            # would provide incorrect sets to their correspondant message_id
            # when ordering them (at least using this algorithm).
            # So, not using sets, and manual order of the lists is done
            people = set([])
            for message in thread:
                query = """
                        select distinct pup.uuid as upeople_id
                        from messages m,
                             messages_people mp,
                             people_uidentities pup
                        where m.message_ID = '%s' and
                              m.message_ID = mp.message_id and
                              mp.type_of_recipient = 'From' and
                              mp.email_address = pup.people_id
                        """ % (message)
                result = ExecuteQuery(query)
                upeople_id = result["upeople_id"]
                people.add(upeople_id)
            top_threads.append((message, len(people)))

        sorted_threads = sorted(top_threads,
                                key=lambda thread: thread[1],
                                reverse=True)

        top_threads_emails = []
        for top in sorted_threads[:numTop]:
            # Create a list of emails
            message_id = top[0]
            email = Email(message_id, self.i_db)
            top_threads_emails.append((email, top[1]))
        return top_threads_emails
Пример #27
0
    def get_people_query(developer_id,
                         startdate,
                         enddate,
                         evol=False,
                         period=None):
        query_builder = Pullpo.get_query_builder()
        fields = 'COUNT(distinct(pr.id)) AS submissions'
        tables = 'pull_requests pr, people_uidentities pup'
        filters = 'pr.user_id = pup.people_id'
        filters += " AND pup.uuid='" + str(developer_id) + "'"
        if (evol):
            q = GetSQLPeriod(period, 'pr.created_at', fields, tables, filters,
                             startdate, enddate)
        else:
            fields += ",DATE_FORMAT (min(pr.created_at),'%Y-%m-%d') as first_date, "+\
                      "DATE_FORMAT (max(pr.created_at),'%Y-%m-%d') as last_date"
            q = GetSQLGlobal('pr.created_at', fields, tables, filters,
                             startdate, enddate)

        data = ExecuteQuery(q)
        return (data)
Пример #28
0
def GetPersonIdentifiers(identities_db, upeople_id):
    """ Get people, company and country information """
    res = None
    q = """
        SELECT pro.uuid, pro.name, pro.email, cou.name as country,
               org.name as affiliation
        FROM %s.profiles pro
        LEFT JOIN (
            SELECT * FROM %s.enrollments WHERE (uuid, end) IN
                ( SELECT uuid, MAX(end)
                  FROM %s.enrollments
                  GROUP BY uuid
            )) enr ON enr.uuid = pro.uuid
        LEFT JOIN %s.organizations org ON org.id = enr.organization_id
        LEFT JOIN %s.countries cou ON cou.code = pro.country_code
        WHERE pro.uuid ='%s'
        LIMIT 1
        """ % (identities_db, identities_db, identities_db, identities_db,
               identities_db, upeople_id)
    res = ExecuteQuery(q)

    return res
Пример #29
0
    def _buildEmail(self):
        # This method retrieves items of information of a given
        # email, specified by its email id.

        query = """
                select distinct m.message_ID,
                       m.subject,
                       m.message_body,
                       m.first_date,
                       pro.name as initiator_name,
                       u.uuid as initiator_id,
                       m.mailing_list_url as url
                from messages m,
                     messages_people mp,
                     people_uidentities pup,
                     %s.uidentities u,
                     %s.profiles pro
                where m.message_ID = '%s' and
                      m.message_ID = mp.message_id and
                      mp.type_of_recipient = 'From' and
                      mp.email_address = pup.people_id and
                      pup.uuid = u.uuid and
                      pup.uuid = pro.uuid
                limit 1
                """ % (self.i_db, self.i_db, self.message_id)
        # WARNING: There may appear in some cases repeated emails.
        # This may be because the same email was sent to different
        # mailing lists. Forcing the query to 1 row, allows to
        # avoid this issue till we understand why this behaviour
        results = ExecuteQuery(query)

        self.subject = results["subject"]
        self.body = results["message_body"]
        self.date = results["first_date"]
        self.initiator_name = results["initiator_name"]
        self.initiator_id = results["initiator_id"]
        self.url = results["url"]
Пример #30
0
def GetPeopleIRC():
    # Returns the ids of the IRC participants
    q = "SELECT DISTINCT(uuid) AS members FROM people_upeople"
    data = ExecuteQuery(q)
    return (data['members'])