def _jnlGenerator(self, jnlFile): jnlCaseNum = '' block = [] caseNameMatched = False fd = basicApi.openFile(jnlFile, 'r') for line in fd: #check for block saving begin matched = re.match( r"^400\|[0-9]+\s+([0-9]+)\s+[0-9]+\s+[0-9:]+\s+[0-9]+\|IC Start", line) if matched: if self.execResultDict.has_key(matched.group(1)): jnlCaseNum = matched.group(1) else: jnlCaseNum = '' block = [] #append test case jounal in block if jnlCaseNum != '': if line.find(self.tcfFullName) is not -1: caseNameMatched = True block.append(line) #We just need to check for block saving end in append mode. matched = re.match( r"^410\|[0-9]+\s+([0-9]+)\s+[0-9]+\s+[0-9:]+\s+[0-9]+\|IC End", line) if matched: if caseNameMatched: yield jnlCaseNum, block jnlCaseNum = '' caseNameMatched = False fd.close()
def _execOutGenerator(self, execOutFile): fd = basicApi.openFile(execOutFile, "r") for line in fd: matched = re.match(r"(.*)\{([0-9]*)\}\: (\w+)", line) if matched: yield matched fd.close()
def analysing(self): basicApi.d_print('TFIDF_analyzer: analysing()') for itemId,CaseName in self.task.caseList: jnlPath = self.task.getJnlStoreName(itemId) block = basicApi.openFile(jnlPath, "r") #Filter jnl block filteBlock = _filteJnl(block) #Simplify jnl block extractedJnl = tfidf.stemming(filteBlock, self.stopWordDic) msgVector = tfidf.featureGenerator(extractedJnl, self.featureDic) #Check msgVector if self._isZeroVec(msgVector): text = 'ERROR: Extracted journal file is empty, please manually analysis it.' self.err.append('%s\n%s\n'%(CaseName, text)) #Write log into database self.task.db.cateTFIDF.insertNotAnalysis([itemId]) continue #Find out all vectors rtl = self.task.db.cateTFIDF.searchAllVecs(itemId) allVecs = rtl.getDict() #Translate vector into string vecStr = basicApi.vec2Str(msgVector) if allVecs == {}: #Initial database self.task.db.cateTFIDF.insertTfidfResult([itemId, vecStr, '?']) continue #Insert current vector into database and attach it to exist CR if it has rtl = self.task.db.cateTFIDF.insertTfidfResult([itemId, vecStr, '?']) if rtl.getProcRtId() == 1: #If return value equal to 1 then means it is a new issue #First get all data from database if allVecs: #Fuzzy search process begin matched_tb = tfidf.fuzzySearch(msgVector, allVecs) if matched_tb == {}: #Should not run to here text = 'ERROR: Not find any similarity vector.' self.err.append('%s\n%s\n'%(CaseName, text)) else: #Insert similarty table into db for simVal,msgId in matched_tb.items(): self.task.db.cateTFIDF.insertSimilarity([itemId, str(msgId), str(simVal)]) else: #Database is empty, what we need to do, is just insert current message into databse. text = 'WARNING: Database is empty now, we just directly insert current message as first itme.' self.err.append('%s\n%s\n'%(CaseName, text)) pass
def wordFrequencyAnalysis(dateTuple): itemId = dateTuple[0] jnlName = dateTuple[1] stopWordDic = dateTuple[2] extJnlDict = dateTuple[3] kWordDict = dateTuple[4] print('Analysis journal of %s' % itemId) #jnlName = self.task.getJnlStoreName(itemId) #jnl = basicApi.readFile(jnlName) block = basicApi.openFile(jnlName, "r") filteBlock = _filteJnl(block) extJnl = tfidf.stemming(filteBlock, stopWordDic) extJnlDict[itemId] = extJnl #Find distinct words for word in extJnl.split(): if kWordDict.has_key(word): continue else: kWordDict[word] = True
def _fillStopWordDic(self): stopwordsList = basicApi.openFile(self.stopWordsFile, 'r') for stwd in stopwordsList: self.stopWordDic[stwd.strip('\n')] = True