示例#1
0
    def dump2db(self, bug_only):

        if self.configInfo.DATABASE is False:
            return
            
        dl = dumpLogs(self.dbPass, self.configInfo)
        dl.createFileChangesTable()
        
        for sha, co in self.sha2commit.iteritems():
          for ch in co.changes:
            insertion, deletion, file_name, language = ch.get() 
            if "test" in file_name:
              is_test = "True"
            else:
              is_test = "False"
              
            out_str = (',').join((toStr(co.project), toStr(co.sha), \
                toStr(language), toStr(file_name), toStr(is_test), \
                toStr(co.committer), toStr(co.commit_date), toStr(co.author), toStr(co.author_date), \
                toStr(co.isbug),toStr(insertion),toStr(deletion)))
            dl.dumpFileChanges(out_str)
            
        dl.close()
示例#2
0
    def processLog(self, config = ""):
        if(config == ""):
            config = self.config_info.CONFIG

        signal.signal(signal.SIGALRM, timeout)

        project1 = os.path.split(self.log_file)[0]
        project1 = project1.rstrip(os.sep)
        self.project_name = os.path.basename(project1)
        print("---------- %s ------------\n" % (self.project_name))

        if(self.config_info.DATABASE):
            dl = dumpLogs(self.dbPass, self.config_info)

        if(self.config_info.CSV):
            if not os.path.isdir("../Results"):
                os.mkdir("../Results")
            inf1=open("../Results/"+str(self.project_name)+"ChangeSummary.csv",'w')
            fPtrChangeSummary=open("../Results/"+"ChangeSummary.csv",'w')

            inf1.write("project,sha,author,commit_date,is_bug\n")

            inf2=open("../Results/"+str(self.project_name)+"PatchSummary.csv",'w')
            fPtrPatchSummary=open("../Results/"+"PatchSummary.csv",'w')

            lst=[]
            listToDict={}
            mockChunk=logChunk("", "C", self.config_info) #TODO: This is C specific,  Why is this C specific?
            lst = mockChunk.readKeywords(lst)
            keywords= [k[0] for k in lst if k[1] == INCLUDED]
            for keyword in keywords:
                listToDict[str(keyword)+" Adds"]=0
                listToDict[str(keyword)+" Dels"]=0

            inf2.write("project, sha, language, file_name, is_test, method_name,total_add,total_del,%s\n"%",".join(sorted(listToDict.keys())))

        inf = codecs.open(self.log_file, "r", "iso-8859-1")

        shaObj   = None
        patchObj = None
        is_diff  = False
        log_mssg = ""
        is_no_prev_ver = False
        is_no_next_ver = False
        curLogChunk = logChunk("", "C", self.config_info)
        linenum = 0

        for l in inf:

            try:
                signal.alarm(0)

                sha  = self.isSha(l)
                line = l


                #if(self.config_info.DEBUGLITE):
                #    try:
                #        print(line)
                #    except:
                #        pass

                if sha:
                    #Reverting back to version that outputs at the end...
                    #if(shaObj != None):
                    #    if(self.config_info.DEBUGLITE):
                    #        print("Writing Sha:" + sha)

                    #    if(self.config_info.DATABASE):            
                    #        shaObj.dumpSha(dl)
                    #    elif(self.config_info.CSV):
                    #        shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary)
                    #    else:
                    #        shaObj.printSha()
          
                    shaObj = Sha(self.project_name, sha)
                    #if(self.config_info.DEBUGLITE): #Save for testing.
                    self.shas.append(shaObj) #This will become very memory intensive in large git logs.
                    
                    is_diff = False
                    log_mssg = ""
                    
                    continue

                elif self.isAuthor(line,shaObj):
                    continue

                elif self.isDate(line,shaObj):
                    continue

                fullLine=line
                line=line.rstrip()

                if line.startswith('diff --git '):
                    shaObj.setLog(log_mssg)
                    is_diff = True
                    is_no_prev_ver = False
                    is_no_next_ver = False
                    continue

                    if patchObj != None:
                        shaObj.patches.append(patchObj)

                elif is_diff == False:
                    if not line.strip():
                        continue
                    log_mssg += line + "\t"


                if is_diff:
                    if line.startswith("--- a/"):
                        #Finish the changes to the old patch object
                        if(patchObj != None):
                            #If there is an existing chunk to parse, process it
                            if(curLogChunk.header != ""):
                                if(self.config_info.DEBUG): 
                                    print("New diff with previous version: " + line)
                                    print("HEADER: " + curLogChunk.header)
                                self.processLastChunk(patchObj, curLogChunk)
                            
                            #Reset the current chunk obj
                            if (self.config_info.DEBUG):
                                print("Resetting.")
                            curLogChunk.reset()
                            curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME!

                        patchObj = self.createPatch(line)
                        shaObj.patches.append(patchObj)
                        #print patchObj
                        #print shaObj.patches
                    elif (line == '--- /dev/null'): #earlier file was empty
                        is_no_prev_ver = True
                    elif (line == '+++ /dev/null'): #next file version was empty
                        is_no_next_ver = True
                        continue
                    elif (is_no_prev_ver == True) and line.startswith("+++ b/"):
                        #Finish the changes to the old patch object
                        if(patchObj != None):
                            if(curLogChunk.header != ""): #If there is an existing chunk
                                if (self.config_info.DEBUG): 
                                    print("New diff with no previous version: " + line)
                                    print("HEADER: " + curLogChunk.header)
                                self.processLastChunk(patchObj, curLogChunk)

                                if (self.config_info.DEBUG):
                                    print("Resetting.")
                                curLogChunk.reset()
                                curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME!

                        patchObj = self.createPatchWithNoPrevVersion(line)
                        shaObj.patches.append(patchObj)
                    else: #Then we reached a content line.
                        self.processPatch(fullLine, patchObj, curLogChunk)

            except TimeExceededError.TimeExceededError:
                print("Line Timed out, moving to next.")
                continue

        #Clear timeouts.
        signal.alarm(0)

        #Make sure to get the last patch in the file!
        if(curLogChunk.header != ""): #If there is an existing chunk to parse
            if(self.config_info.DEBUG): 
                print("Last Patch: " + line)
                print("HEADER: " + curLogChunk.header)
            self.processLastChunk(patchObj, curLogChunk)

        #if shaObj != None:
        #    shaObj.patches.append(patchObj)

        parseFinish = datetime.now()

        if(self.shas != []): #If the log wasn't empty...
            #Create the change summary table and the method change table now if necessary
            if(self.config_info.DATABASE):
                cfg = Config(self.config_info.CONFIG)
                keywordFile = cfg.ConfigSectionMap("Keywords")
                full_title = dumpLogs.getFullTitleString(curLogChunk.getEmptyKeywordDict())

                dl.createSummaryTable()

                if(full_title != ""): #Check if the changes table exists and create it if we have a title.
                    dl.createMethodChangesTable(full_title)

            for s in self.shas:
                #s.printSha()
                if s != None:
                   if(self.config_info.DATABASE):            
                       s.dumpSha(dl)
                   elif(self.config_info.CSV):
                       s.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary)
                   else:
                       s.printSha()


        #Write out last sha.
        #if(shaObj != None and self.config_info.DATABASE):
        #    if(self.config_info.DEBUGLITE):
        #        print("Writing to db.")
        #    shaObj.dumpSha(dl)

        if(self.config_info.DATABASE):
            print("Closing Time.")
            dl.close()
        
        if(self.config_info.CSV):
            inf1.close()
            inf2.close()
            fPtrChangeSummary.close()
            fPtrPatchSummary.close()

        print("Sha's processed:")
        print(len(self.shas))

        return parseFinish
示例#3
0
  def processLog(self):

    project1 = os.path.split(self.log_file)[0]
    project1 = project1.rstrip(os.sep)
    self.project_name = os.path.basename(project1)
    print("---------- %s ------------\n" % (self.project_name))

    dl = dumpLogs()

    inf = codecs.open(self.log_file, "r", "iso-8859-1")
    #lines = inf.readlines()
    #inf.close()

    shaObj   = None
    patchObj = None
    is_diff  = False
    log_mssg = ""
    is_no_prev_ver = False
    is_no_next_ver = False

    #for i,l in enumerate(lines):
    for l in inf:
      #continue
      #print i+1, line
      sha  = self.isSha(l)
      #line = l.strip()
      line = l
      #print line

      if sha:
        # if shaObj != None:
        #   shaObj.dumpSha(dl)

        shaObj = Sha(self.project_name, sha)
        self.shas.append(shaObj)
        is_diff = False
        log_mssg = ""
        continue

      elif self.isAuthor(line,shaObj):
        continue

      elif self.isDate(line,shaObj):
        continue

      line = line.strip()

      if line.startswith('diff --git '):
        shaObj.setLog(log_mssg)
        is_diff = True
        is_no_prev_ver = False
        is_no_next_ver = False
        continue
        '''
        if patchObj != None:
          shaObj.patches.append(patchObj)
        '''
      elif is_diff == False:
        if not line.strip():
          continue
        log_mssg += line + "\t"


      if is_diff:
        if line.startswith("--- a/"):
          #print "<a> : " , line
          patchObj = self.createPatch(line)
          shaObj.patches.append(patchObj)
          #print patchObj
          #print shaObj.patches
        elif (line == '--- /dev/null'): #earlier file was empty
          #print "<b> : " , line
          is_no_prev_ver = True
        elif (line == '+++ /dev/null'): #next file version was empty
          #print "<c> : " , line
          is_no_next_ver = True
          continue
        elif (is_no_prev_ver == True) and line.startswith("+++ b/"):
          #print "<d> : " , line
          patchObj = self.createPatchWithNoPrevVersion(line)
          shaObj.patches.append(patchObj)
          #print shaObj.patches
        else:
          #print "<e> : " , line
          self.processPatch(line,patchObj)

    if shaObj != None:
      shaObj.patches.append(patchObj)

    for s in self.shas:
      #s.printSha()
      if s != None:
        s.dumpSha(dl)

    dl.close()
    inf.close()
    print len(self.shas)
示例#4
0
    def processLog(self, config=""):
        if (config == ""):
            config = self.config_info.CONFIG

        signal.signal(signal.SIGALRM, timeout)

        project1 = os.path.split(self.log_file)[0]
        project1 = project1.rstrip(os.sep)
        self.project_name = os.path.basename(project1)
        print(("---------- %s ------------\n" % (self.project_name)))

        if (self.config_info.DATABASE):
            dl = dumpLogs(self.dbPass, self.config_info)

        if (self.config_info.CSV):
            if not os.path.isdir("../Results"):
                os.mkdir("../Results")
            inf1 = open(
                "../Results/" + str(self.project_name) + "ChangeSummary.csv",
                'w')
            fPtrChangeSummary = open("../Results/" + "ChangeSummary.csv", 'w')

            inf1.write("project,sha,author,author_email,commit_date,is_bug\n")

            inf2 = open(
                "../Results/" + str(self.project_name) + "PatchSummary.csv",
                'w')
            fPtrPatchSummary = open("../Results/" + "PatchSummary.csv", 'w')

            lst = []
            listToDict = {}
            mockChunk = logChunk(
                "", "C", self.config_info
            )  #TODO: This is C specific,  Why is this C specific?
            lst = mockChunk.readKeywords(lst)
            keywords = [k[0] for k in lst if k[1] == INCLUDED]
            for keyword in keywords:
                listToDict["\"" + str(keyword) + "\" adds"] = 0
                listToDict["\"" + str(keyword) + "\" dels"] = 0

            inf2.write(
                "project, sha, language, file_name, is_test, method_name,total_add,total_del,%s\n"
                % ",".join(sorted(listToDict.keys())))

        inf = codecs.open(self.log_file, "r", "iso-8859-1")

        shaObj = None
        patchObj = None
        is_diff = False
        log_mssg = ""
        is_no_prev_ver = False
        is_no_next_ver = False
        curLogChunk = logChunk("", "C", self.config_info)
        linenum = 0

        for l in inf:

            try:
                signal.alarm(0)

                sha = self.isSha(l)
                line = l

                #if(self.config_info.DEBUGLITE):
                #    try:
                #        print(line)
                #    except:
                #        pass

                if sha:
                    #Reverting back to version that outputs at the end...
                    #if(shaObj != None):
                    #    if(self.config_info.DEBUGLITE):
                    #        print("Writing Sha:" + sha)

                    #    if(self.config_info.DATABASE):
                    #        shaObj.dumpSha(dl)
                    #    elif(self.config_info.CSV):
                    #        shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary)
                    #    else:
                    #        shaObj.printSha()

                    shaObj = Sha(self.project_name, sha)
                    #if(self.config_info.DEBUGLITE): #Save for testing.
                    self.shas.append(
                        shaObj
                    )  #This will become very memory intensive in large git logs.

                    is_diff = False
                    log_mssg = ""

                    continue

                elif self.isAuthor(line, shaObj):
                    continue

                elif self.isDate(line, shaObj):
                    continue

                fullLine = line
                line = line.rstrip()

                if line.startswith('diff --git '):
                    shaObj.setLog(log_mssg)
                    is_diff = True
                    is_no_prev_ver = False
                    is_no_next_ver = False
                    continue

                    if patchObj != None:
                        shaObj.patches.append(patchObj)

                elif is_diff == False:
                    if not line.strip():
                        continue
                    log_mssg += line + "\t"

                if is_diff:
                    if line.startswith("--- a/"):
                        #Finish the changes to the old patch object
                        if (patchObj != None):
                            #If there is an existing chunk to parse, process it
                            if (curLogChunk.header != ""):
                                if (self.config_info.DEBUG):
                                    print(("New diff with previous version: " +
                                           line))
                                    print(("HEADER: " + curLogChunk.header))
                                self.processLastChunk(patchObj, curLogChunk)

                            #Reset the current chunk obj
                            if (self.config_info.DEBUG):
                                print("Resetting.")
                            curLogChunk.reset()
                            curLogChunk.setLang(
                                "." + self.cur_lang)  #DOUBLE CHECK ME!

                        patchObj = self.createPatch(line)
                        shaObj.patches.append(patchObj)
                        #print patchObj
                        #print shaObj.patches
                    elif (line == '--- /dev/null'):  #earlier file was empty
                        is_no_prev_ver = True
                    elif (line == '+++ /dev/null'
                          ):  #next file version was empty
                        is_no_next_ver = True
                        continue
                    elif (is_no_prev_ver
                          == True) and line.startswith("+++ b/"):
                        #Finish the changes to the old patch object
                        if (patchObj != None):
                            if (curLogChunk.header !=
                                    ""):  #If there is an existing chunk
                                if (self.config_info.DEBUG):
                                    print((
                                        "New diff with no previous version: " +
                                        line))
                                    print(("HEADER: " + curLogChunk.header))
                                self.processLastChunk(patchObj, curLogChunk)

                                if (self.config_info.DEBUG):
                                    print("Resetting.")
                                curLogChunk.reset()
                                curLogChunk.setLang(
                                    "." + self.cur_lang)  #DOUBLE CHECK ME!

                        patchObj = self.createPatchWithNoPrevVersion(line)
                        shaObj.patches.append(patchObj)
                    else:  #Then we reached a content line.
                        self.processPatch(fullLine, patchObj, curLogChunk)

            except TimeExceededError.TimeExceededError:
                print("Line Timed out, moving to next.")
                continue

        #Clear timeouts.
        signal.alarm(0)

        #Make sure to get the last patch in the file!
        if (curLogChunk.header != ""):  #If there is an existing chunk to parse
            if (self.config_info.DEBUG):
                print(("Last Patch: " + line))
                print(("HEADER: " + curLogChunk.header))
            self.processLastChunk(patchObj, curLogChunk)

        #if shaObj != None:
        #    shaObj.patches.append(patchObj)

        parseFinish = datetime.now()

        if (self.shas != []):  #If the log wasn't empty...
            #Create the change summary table and the method change table now if necessary
            if (self.config_info.DATABASE):
                cfg = Config(self.config_info.CONFIG)
                keywordFile = cfg.ConfigSectionMap("Keywords")
                full_title = dumpLogs.getFullTitleString(
                    curLogChunk.getEmptyKeywordDict())

                dl.createSummaryTable()

                if (
                        full_title != ""
                ):  #Check if the changes table exists and create it if we have a title.
                    dl.createMethodChangesTable(full_title)

            for s in self.shas:
                #s.printSha()
                if s != None:
                    if (self.config_info.DATABASE):
                        s.dumpSha(dl)
                    elif (self.config_info.CSV):
                        s.shaToCsv(inf1, inf2, fPtrChangeSummary,
                                   fPtrPatchSummary)
                    else:
                        s.printSha()

        #Write out last sha.
        #if(shaObj != None and self.config_info.DATABASE):
        #    if(self.config_info.DEBUGLITE):
        #        print("Writing to db.")
        #    shaObj.dumpSha(dl)

        if (self.config_info.DATABASE):
            print("Closing Time.")
            dl.close()

        if (self.config_info.CSV):
            inf1.close()
            inf2.close()
            fPtrChangeSummary.close()
            fPtrPatchSummary.close()

        print("Sha's processed:")
        print((len(self.shas)))

        return parseFinish
示例#5
0
import sys
示例#6
0
import sys
示例#7
0
    def processLog(self, config = Util.CONFIG):

        project1 = os.path.split(self.log_file)[0]
        project1 = project1.rstrip(os.sep)
        self.project_name = os.path.basename(project1)
        print("---------- %s ------------\n" % (self.project_name))

        if(Util.DATABASE == 1):
            dl = dumpLogs()

        if(Util.CSV==1):
            if not os.path.isdir("../Results"):
                os.mkdir("../Results")
            inf1=open("../Results/"+str(self.project_name)+"ChangeSummary.csv",'w')
            fPtrChangeSummary=open("../Results/"+"ChangeSummary.csv",'a')

            inf1.write("project,sha,author,commit_date,is_bug\n")

            inf2=open("../Results/"+str(self.project_name)+"PatchSummary.csv",'w')
            fPtrPatchSummary=open("../Results/"+"PatchSummary.csv",'a')

            lst=[]
            listToDict={}
            mockChunk=logChunk("", "C")
            mockChunk.readKeywords(lst)
            keywords= [sub_list[0] for sub_list in lst]
            for keyword in keywords:
                listToDict[str(keyword)+" Adds"]=0
                listToDict[str(keyword)+" Dels"]=0

            inf2.write("project, sha, language, file_name, is_test,isExceptionPatch, method_name,total_add,total_del,%s\n"%",".join(listToDict.keys()))

        inf = codecs.open(self.log_file, "r", "iso-8859-1")

        shaObj   = None
        patchObj = None
        is_diff  = False
        log_mssg = ""
        is_no_prev_ver = False
        is_no_next_ver = False
        curLogChunk = logChunk("", "C", config)
        linenum = 0

        for l in inf:
            sha  = self.isSha(l)
            line = l

            if sha:
                if(shaObj != None):
                    if(Util.DATABASE):            
                        shaObj.dumpSha(dl)
                    else:
                        shaObj.printSha()
                        if(Util.CSV):
                            shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary)

                shaObj = Sha(self.project_name, sha)
                if(Util.DEBUGLITE): #Save for testing.
                    self.shas.append(shaObj) #This will become very memory intensive in large git logs.
                is_diff = False
                log_mssg = ""
                
                continue

            elif self.isAuthor(line,shaObj):
                continue

            elif self.isDate(line,shaObj):
                continue

            fullLine=line
            line=line.strip()

            if line.startswith('diff --git '):
                shaObj.setLog(log_mssg)
                is_diff = True
                is_no_prev_ver = False
                is_no_next_ver = False
                continue

                if patchObj != None:
                    shaObj.patches.append(patchObj)

            elif is_diff == False:
                if not line.strip():
                    continue
                log_mssg += line + "\t"


            if is_diff:
                if line.startswith("--- a/"):
                    #Finish the changes to the old patch object
                    if(patchObj != None):
                        #If there is an existing chunk to parse, process it
                        if(curLogChunk.header != ""):
                            if Util.DEBUG == 1: 
                                print("New diff with previous version: " + line)
                                print("HEADER: " + curLogChunk.header)
                            self.processLastChunk(patchObj, curLogChunk)
                        
                        #Reset the current chunk obj
                        if Util.DEBUG == 1:
                            print("Resetting.")
                        curLogChunk.reset()
                        curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME!

                    patchObj = self.createPatch(line)
                    shaObj.patches.append(patchObj)
                    #print patchObj
                    #print shaObj.patches
                elif (line == '--- /dev/null'): #earlier file was empty
                    is_no_prev_ver = True
                elif (line == '+++ /dev/null'): #next file version was empty
                    is_no_next_ver = True
                    continue
                elif (is_no_prev_ver == True) and line.startswith("+++ b/"):
                    #Finish the changes to the old patch object
                    if(patchObj != None):
                        if(curLogChunk.header != ""): #If there is an existing chunk
                            if Util.DEBUG == 1: 
                                print("New diff with no previous version: " + line)
                                print("HEADER: " + curLogChunk.header)
                            self.processLastChunk(patchObj, curLogChunk)

                            if Util.DEBUG == 1:
                                print("Resetting.")
                            curLogChunk.reset()
                            curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME!

                    patchObj = self.createPatchWithNoPrevVersion(line)
                    shaObj.patches.append(patchObj)
                else: #Then we reached a content line.
                    self.processPatch(fullLine, patchObj, curLogChunk)


        #if shaObj != None:
        #    shaObj.patches.append(patchObj)


        #Make sure to get the last patch in the file!
        if(curLogChunk.header != ""): #If there is an existing chunk to parse
            if Util.DEBUG == 1: 
                print("Last Patch: " + line)
                print("HEADER: " + curLogChunk.header)
            self.processLastChunk(patchObj, curLogChunk)

        #Write out last sha.
        if(shaObj != None and Util.DATABASE):
            if(Util.DEBUGLITE):
                print("Writing to db.")
            shaObj.dumpSha(dl)

        if(Util.DATABASE == 1):
            print("Closing Time.")
            dl.close()
        
        if(Util.CSV == 1):
            shaObj.printSha();
            shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary)
            inf1.close()
            inf2.close()
            fPtrChangeSummary.close()
            fPtrPatchSummary.close()