Exemplo n.º 1
0
 def _jnlGenerator(self, jnlFile):
     jnlCaseNum = ''
     block = []
     caseNameMatched = False
     fd = basicApi.openFile(jnlFile, 'r')
     for line in fd:
         #check for block saving begin
         matched = re.match(
             r"^400\|[0-9]+\s+([0-9]+)\s+[0-9]+\s+[0-9:]+\s+[0-9]+\|IC Start",
             line)
         if matched:
             if self.execResultDict.has_key(matched.group(1)):
                 jnlCaseNum = matched.group(1)
             else:
                 jnlCaseNum = ''
             block = []
         #append test case jounal in block
         if jnlCaseNum != '':
             if line.find(self.tcfFullName) is not -1:
                 caseNameMatched = True
             block.append(line)
             #We just need to check for block saving end in append mode.
             matched = re.match(
                 r"^410\|[0-9]+\s+([0-9]+)\s+[0-9]+\s+[0-9:]+\s+[0-9]+\|IC End",
                 line)
             if matched:
                 if caseNameMatched:
                     yield jnlCaseNum, block
                 jnlCaseNum = ''
                 caseNameMatched = False
     fd.close()
Exemplo n.º 2
0
 def _execOutGenerator(self, execOutFile):
     fd = basicApi.openFile(execOutFile, "r")
     for line in fd:
         matched = re.match(r"(.*)\{([0-9]*)\}\: (\w+)", line)
         if matched:
             yield matched
     fd.close()
Exemplo n.º 3
0
 def analysing(self):
     basicApi.d_print('TFIDF_analyzer: analysing()')
     for itemId,CaseName in self.task.caseList:
         jnlPath = self.task.getJnlStoreName(itemId)
         block = basicApi.openFile(jnlPath, "r")
         #Filter jnl block
         filteBlock = _filteJnl(block)
         #Simplify jnl block
         extractedJnl = tfidf.stemming(filteBlock, self.stopWordDic)
         msgVector = tfidf.featureGenerator(extractedJnl, self.featureDic)
         #Check msgVector
         if self._isZeroVec(msgVector):
             text = 'ERROR: Extracted journal file is empty, please manually analysis it.'
             self.err.append('%s\n%s\n'%(CaseName, text))
             #Write log into database
             self.task.db.cateTFIDF.insertNotAnalysis([itemId])
             continue
         #Find out all vectors
         rtl = self.task.db.cateTFIDF.searchAllVecs(itemId)
         allVecs = rtl.getDict()
         #Translate vector into string
         vecStr = basicApi.vec2Str(msgVector)
         if allVecs == {}:
             #Initial database
             self.task.db.cateTFIDF.insertTfidfResult([itemId, vecStr, '?'])
             continue
         #Insert current vector into database and attach it to exist CR if it has
         rtl = self.task.db.cateTFIDF.insertTfidfResult([itemId, vecStr, '?'])
         if rtl.getProcRtId() == 1:
             #If return value equal to 1 then means it is a new issue
             #First get all data from database
             if allVecs:
                 #Fuzzy search process begin
                 matched_tb = tfidf.fuzzySearch(msgVector, allVecs)
                 if matched_tb == {}:
                     #Should not run to here
                     text = 'ERROR: Not find any similarity vector.'
                     self.err.append('%s\n%s\n'%(CaseName, text))
                 else:
                     #Insert similarty table into db
                     for simVal,msgId in matched_tb.items():
                         self.task.db.cateTFIDF.insertSimilarity([itemId, str(msgId), str(simVal)])
             else:
                 #Database is empty, what we need to do, is just insert current message into databse.
                 text = 'WARNING: Database is empty now, we just directly insert current message as first itme.'
                 self.err.append('%s\n%s\n'%(CaseName, text))
     pass
Exemplo n.º 4
0
def wordFrequencyAnalysis(dateTuple):
    itemId = dateTuple[0]
    jnlName = dateTuple[1]
    stopWordDic = dateTuple[2]
    extJnlDict = dateTuple[3]
    kWordDict = dateTuple[4]
    print('Analysis journal of %s' % itemId)
    #jnlName = self.task.getJnlStoreName(itemId)
    #jnl = basicApi.readFile(jnlName)
    block = basicApi.openFile(jnlName, "r")
    filteBlock = _filteJnl(block)
    extJnl = tfidf.stemming(filteBlock, stopWordDic)
    extJnlDict[itemId] = extJnl
    #Find distinct words
    for word in extJnl.split():
        if kWordDict.has_key(word):
            continue
        else:
            kWordDict[word] = True
Exemplo n.º 5
0
 def _fillStopWordDic(self):
     stopwordsList = basicApi.openFile(self.stopWordsFile, 'r')
     for stwd in stopwordsList:
         self.stopWordDic[stwd.strip('\n')] = True