Exemplo n.º 1
0
    def readReviewComments(self):
        # File with review, commit, file and developer data from comments in format:
        # relationType/\\fileName/\\ownerName/\\commenterName/\\reviewNumber/\\commitHash
        revFileComm = open("Data\\ReviewFilesFromComments.txt", "r")
        while (True):
            crtLine = revFileComm.readline()
            if not crtLine:
                break
            lst = crtLine[:-1].split("/\\")
            assert len(lst) == 6
            reviewNumber = lst[4]
            self.addReview(reviewNumber)
            ownerName = Ownership.purifyName(lst[2])
            commenterName = Ownership.purifyName(lst[3])

            node1 = self.humanDict[ownerName].index
            node2 = self.humanDict[commenterName].index
            L = Settings.getLayer2('reviewer', 'reviewOwner')
            if ownerName != commenterName:
                # Add edge from commenter to owner.
                self.addEdge(node2, L, node1, L, 8)
            if not ('.' in lst[1]):
                # All files should contain at least one '.' from the file type.
                continue
            fileName = (lst[1].rsplit('.', 1)[0]).replace('/', '.')[:-1]
            if fileName in self.fileDict:
                self.reviewDict[reviewNumber].addFile(self.fileDict[fileName])
                #Add edge between reviewOwner and file on Review layer.
                self.addEdge(node1, 3, self.fileDict[fileName], 3, 11)
Exemplo n.º 2
0
    def readIssues(self):
        # Each line of file contains issue data in format:
        # bugID/\version/\creation_ts/\delta_ts/\status/\resolution.
        issueFile = open("Data\\BugDetails.txt")
        while True:
            crtLine = issueFile.readline()
            if not crtLine:
                break
            lst = crtLine.split('/\\')
            assert (len(lst) == 6)
            dateTime = lst[2].split(' ', 1)
            dateTime[1] = dateTime[1].replace(' ', '')
            assert len(dateTime) == 2
            lst[2] = Ownership.getTime(dateTime[0], dateTime[1])
            dateTime = lst[3].split(' ', 1)
            dateTime[1] = dateTime[1].replace(' ', '')
            lst[3] = Ownership.getTime(dateTime[0], dateTime[1])
            assert len(dateTime) == 2

            lst[5] = lst[5][:-1]  # ignore '\n'
            issue = Issue.Issue(lst[0], lst[1], lst[2], lst[3], lst[4], lst[5],
                                self.projectList[self.projectID])

            if issue.getType() == self.issueType and (lst[5] == 'FIXED' or
                                                      lst[5] == 'WORKSFORME'):
                if not (issue.name in self.issueDict):
                    self.addIssue(issue)
                    issue.setIndex(self.nrIssues)
                    self.i_R[self.nrNodes] = {}
                # only fixed issues must be added =>
            # if ("CLOSED" in status_i) or ("RESOLVED" in status_i) or ("VERIFIED" in status_i) or ("FIXED" in status_i):

        issueFile.close()
Exemplo n.º 3
0
    def readReviews(self):
        reviewEdges = {}
        reviewFile = open("Data\\ReviewEdges2020.txt", "r")
        while True:
            crtLine = reviewFile.readline()
            if not crtLine:
                break
            lst = crtLine.split('/\\')
            assert len(lst) == 3
            if lst[0] == 'CommentEdge' or lst[0] == 'PCommentEdge':
                # The format of current line is "edgeType/\ownerName/\commenterName".
                name1 = Ownership.purifyName(lst[1])
                name2 = Ownership.purifyName(lst[2][:-1])
                L = Settings.getLayer2('reviewer', 'reviewOwner')
                self.addEdge(self.humanDict[name2].index, L,
                             self.humanDict[name1].index, L, 8)
            elif lst[0] == 'Review2Commit':
                # The format of current line is "edgeType/\reviewNumber/\commitHash".
                reviewNumber = lst[1]
                self.addReview(reviewNumber)
                commitHash = lst[2].replace('\n', '')
                # If the commit belongs to multiple reviews, then data is invalid.
                assert (
                    not ((commitHash in self.reviewIdForCommit) and
                         (self.reviewIdForCommit[commitHash] != reviewNumber)))
                self.reviewIdForCommit[commitHash] = reviewNumber
            else:
                # The format of current line is "edgeType/\commitHash/\developerName", where
                # edgeType in {AuthorEdge, UploaderEdge, ApprovalEdge, OwnerEdge}
                commitHash = lst[1]
                developerName = Ownership.purifyName(lst[2][:-1])
                # reviewEdges[commitHash] = edges that relate to commitHash s.t edges between the
                # review corresponding to commitHash can be linked with the humans in one pass.
                if not (commitHash in reviewEdges):
                    reviewEdges[commitHash] = []
                reviewEdges[commitHash].append((lst[0], developerName))
                if commitHash in self.commitDict:
                    layer = 'patchUploader'
                    col = 5
                    if lst[0] == 'OwnerEdge':
                        layer = 'reviewOwner'
                        col = 6
                    elif lst[0] == 'AuthorEdge':
                        layer = 'author'
                        col = 7
                    elif lst[0] == 'ApprovalEdge':
                        layer = 'approver'
                        col = 8
                    else:
                        assert lst[0] == 'UploaderEdge'

                    L = Settings.getLayer2(layer, 'file')
                    # link humans to the files of the commit
                    if developerName in self.humanDict:
                        for fileNode in self.commitDict[
                                commitHash].modifiedFiles:
                            self.addEdge(self.humanDict[developerName].index,
                                         L, fileNode, L, 10)
        self.processReviewEdges(reviewEdges, 3)
        reviewFile.close()
Exemplo n.º 4
0
 def readCommits(self, commitDataFile):
     fileListForCrtCommit = []
     crtCommitHash = 0
     committerName = ''
     authorName = ''
     while (True):
         crtLine = commitDataFile.readline()
         if not crtLine:
             break
         if (crtLine == '' or crtLine == '\n'):
             continue
         lst = crtLine.split('/\\')
         if not (('.' in lst[0]) or ('/' in lst[0]) or ('\\' in lst[0])):
             # committer/\\author/\\commitHash
             assert len(lst) == 3
             committerName = Ownership.purifyName(lst[0])
             authorName = Ownership.purifyName(lst[1])
             self.addHuman(committerName, 0)
             self.addHuman(authorName, 5)
             commitHash = lst[2][:-1]
             crtCommitHash = commitHash
             self.addCommit(commitHash)
             if authorName != committerName:
                 # add edge committer->author as cross layer edge
                 self.addEdge(self.humanDict[committerName].index, 1,
                              self.humanDict[authorName].index, 3, 2)
             fileListForCrtCommit = []
         else:
             crtFile = crtLine.rsplit('.', 1)[0].replace("/", '.')
             if not (crtFile in self.fileDict):
                 self.addFile(crtFile)
             self.commitDict[crtCommitHash].addFile(self.fileDict[crtFile])
             if len(fileListForCrtCommit) > 0:
                 L = Settings.getLayer2('file', 'fileC')
                 self.addEdge(self.fileDict[crtFile], L,
                              self.fileDict[fileListForCrtCommit[-1]], L, 3)
             # Add edge from author of file to file.
             self.addEdge(self.humanDict[authorName].index, 1,
                          self.fileDict[crtFile], 1, 4)
             if authorName != committerName:
                 # Add edge from committer of file to file.
                 self.addEdge(self.humanDict[committerName].index, 1,
                              self.fileDict[crtFile], 1, 4)
             fileListForCrtCommit.append(crtFile)
     commitDataFile.close()
Exemplo n.º 5
0
 def readNameUsername(self):
     # name/\username from Bugzilla
     f = open("Data\\emailName2020B.txt", "rb")
     while (True):
         crtLine = f.readline().decode('utf-8')
         if not crtLine:
             break
         lst = crtLine.split('/\\')
         assert len(lst) == 2
         name = Ownership.purifyName(lst[0])
         if name in self.humanDict:
             self.nameOfUsername[lst[-1][:-1]] = name
             self.humanDict[name].setUserName(lst[-1][:-1])
     f.close()
Exemplo n.º 6
0
 def readHumanRoleFile(self, humanRoleFile, fileId):
     while (True):
         # names in Bugzilla are read from binary files
         if fileId >= 6 and fileId <= 8:
             crtLine = humanRoleFile.readline().decode('utf-8')
         else:
             crtLine = humanRoleFile.readline()
         if not crtLine:
             break
         # name/\email/\index
         lst = crtLine.split('/\\')
         assert len(lst) == 3 - (fileId >= 6 and fileId <= 8)
         name = Ownership.purifyName(lst[0])
         self.addHuman(name, fileId)
     humanRoleFile.close()
Exemplo n.º 7
0
 def readIssueEdges(self):
     # Each line of file contains data for an issue in format HumanRole/\name/username/\bugID.
     issueEdgesFile = open("Data\\IssueEdges2020B.txt", "rb")
     while True:
         crtLine = issueEdgesFile.readline().decode('utf-8')
         if not crtLine:
             break
         if crtLine == '':
             continue
         lst = crtLine.split('/\\')
         assert (len(lst) == 3)
         if (lst[0][0] != 'C'):
             # Ignore "CC-Assignees".
             name = Ownership.purifyName(lst[1])
             bugId = lst[-1][:-1]
             if not (bugId in self.issueDict):
                 # issue was not fixed
                 continue
             # All humans must have been introduced previously.
             assert (name in self.humanDict)
             layer = 'issueReporter'
             col = 5
             if lst[0][0] == 'A':
                 layer = 'issueAssignee'
                 col = 6
             else:
                 assert lst[0][0] == 'R'
                 self.humanDict[name].isReporter = True
             L = Settings.getLayer2(layer, 'issue')
             self.addEdge(self.humanDict[name].index, L,
                          self.issueDict[bugId], L, col)
         else:
             uname = lst[1]
             if uname in self.nameOfUsername:
                 name = self.nameOfUsername[uname]
                 # ToDo add CCassignee edges
     issueEdgesFile.close()
     return self.nrIssues
Exemplo n.º 8
0
    def readOwnershipFile(self):
        # For each modified file in the repository, the format is as follows:
        # fileName/\nrCommits
        # author_name/\self.author_date/\author_timezone/\added/\removed/\complexity
        # Note: author_date = '%Y-%m-%d %H:%M:%S'
        ownershipFile = open("Data\\ownership.txt")
        Itype = Settings.getIssueType()
        Ntype = Settings.getNetworkType()
        valuesC = []
        valuesL = []
        while (True):
            # Read the line with file details, then read nrCommits lines with commit details.
            crtLine = ownershipFile.readline()
            if not crtLine:
                break
            lst = crtLine.split('/\\')
            # Format the file names to contain '.' instead of '/'
            compName = lst[0].replace('/', '.')
            # Remove the file type (i.e "dir.file.java" -> "dir.file").
            compName = compName.rsplit('.', 1)[0]
            if not (compName in self.fileDict):
                # Ignore modified files that are not in the Contribution Network.
                for i in range(int(lst[1])):
                    ownershipFile.readline()
                continue
            obj = Ownership.Ownership(compName)

            for i in range(int(lst[1])):
                nxtLine = ownershipFile.readline().split('/\\')
                lineLen = len(nxtLine)
                if lineLen == 0:
                    continue
                obj.addModif(Ownership.getModifFromLine(nxtLine))

            allCommitters = obj.authorDex[0]
            L = Settings.getLayer2('committer', 'committer')
            A = Settings.getLayer2('committer', 'author')
            nrChangedLines = 0
            if self.fileDict[obj.name] in self.posInFiles:
                nrChangedLines = obj.sumAdd[0] + obj.sumRem[0]

            for c1 in allCommitters:
                cp = (100 * obj.authorDex[0][c1].nrCommits / obj.nrCommits[0])
                # Add only minor edges. Change to cp > 50 for only major edges.
                if (cp > 50):
                    # Add major edge
                    self.addEdge(self.humanDict[c1].index, 1,
                                 self.fileDict[obj.name], 1, 14)
                else:
                    # Add minor edge
                    self.addEdge(self.humanDict[c1].index, 1,
                                 self.fileDict[obj.name], 1, -14)
                valuesC.append(cp)
                if nrChangedLines != 0:
                    c1ChangedLines = obj.authorDex[0][
                        c1].sumAdd + obj.authorDex[0][c1].sumRem
                    assert (c1ChangedLines <= nrChangedLines)
                    if c1ChangedLines == nrChangedLines:
                        lp = 100
                    else:
                        lp = (100 * (obj.authorDex[0][c1].sumAdd +
                                     obj.authorDex[0][c1].sumRem) /
                              nrChangedLines)
                    valuesL.append(lp)
                for c2 in allCommitters:
                    if c1 != c2:
                        if self.humanDict[c1].isRole[0] and self.humanDict[
                                c2].isRole[0]:
                            # Add committer to committer edge.
                            self.addEdge(self.humanDict[c1].index, L,
                                         self.humanDict[c2].index, L, 13)
                        elif self.humanDict[c2].isRole[0] or self.humanDict[
                                c1].isRole[0]:
                            # Add committer to author edge.
                            self.addEdge(self.humanDict[c1].index, L,
                                         self.humanDict[c2].index, A, 2)

            # Tuple(name, nrCommits) for obj file where name is the name of the committer with
            # the highest number of the commits to obj.
            ownershipTuple = obj.nrCommitsOwner(0)
            self.ownershipDict[self.fileDict[obj.name]] = (
                ownershipTuple[0], obj.nrCommitsPercentage(0))

        ownershipFile.close()