def importMapFiles(BadFileList, GoodFileList, IDmap): good_docs = [] bad_docs = [] for badFile in BadFileList: mapDict = utility.loadObject(badFile) bad_docs.append(importMapFile(mapDict, IDmap)) print len(IDmap) for goodFile in GoodFileList: mapDict = utility.loadObject(badFile) good_docs.append(importMapFile(mapDict, IDmap)) print len(IDmap) return good_docs, bad_docs
def importMapFiles( BadFileList, GoodFileList , IDmap ): good_docs = [] bad_docs = [] for badFile in BadFileList: mapDict = utility.loadObject(badFile) bad_docs.append(importMapFile(mapDict, IDmap)) print len(IDmap) for goodFile in GoodFileList: mapDict = utility.loadObject(badFile) good_docs.append(importMapFile(mapDict, IDmap)) print len(IDmap) return good_docs, bad_docs
def importGoodReport(sGoodDirectory, dictCurrent, iBadRun): listGoodReport = [] utility.findDesiredFiles(sGoodDirectory, listGoodReport, 'map.obj') print 'good:', len(listGoodReport) iGoodRun = 0 for sFile in listGoodReport: dictTmp = utility.loadObject(sFile) addGoodSample(dictTmp, dictCurrent) iGoodRun = iGoodRun + 1
def importGoodReport(sGoodDirectory, dictCurrent, iBadRun): listGoodReport = [] utility.findDesiredFiles(sGoodDirectory, listGoodReport, 'map.obj') print 'good:', len(listGoodReport) iGoodRun = 0 for sFile in listGoodReport: dictTmp = utility.loadObject(sFile) addGoodSample( dictTmp, dictCurrent ) iGoodRun = iGoodRun + 1
def importBadReport(sBadDirectory, dictCurrent, setCallSite): listBadReport = [] utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj') print 'bad:', len(listBadReport) iBadRun = 0 for sFile in listBadReport: dictTmp = utility.loadObject(sFile) addBadSample(dictTmp, dictCurrent, setCallSite) iBadRun = iBadRun + 1 return iBadRun
def importBadReport(sBadDirectory, dictCurrent, setCallSite): listBadReport = [] utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj') print 'bad:', len(listBadReport) iBadRun = 0 for sFile in listBadReport: dictTmp = utility.loadObject(sFile) addBadSample( dictTmp, dictCurrent , setCallSite) iBadRun = iBadRun + 1 return iBadRun
def importBadReport(sBadDirectory, dictCurrent, mapInstruction, mapFunction): listBadReport = [] utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj') print 'bad:', len(listBadReport) iBadRun = 0 for sFile in listBadReport: #print sFile dictTmp = utility.loadObject(sFile) #print dictTmp[int('c57ad7', 16)] addBadSample(dictTmp, dictCurrent, mapInstruction, mapFunction) iBadRun = iBadRun + 1 return iBadRun
def importBadReport(sBadDirectory, dictCurrent, mapInstruction, mapFunction ): listBadReport = [] utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj') print 'bad:', len(listBadReport) iBadRun = 0 for sFile in listBadReport: #print sFile dictTmp = utility.loadObject(sFile) #print dictTmp[int('c57ad7', 16)] addBadSample( dictTmp, dictCurrent, mapInstruction, mapFunction ) iBadRun = iBadRun + 1 return iBadRun
import string import re import os import sys import commands import pickle import glob import math import utility import gc from sets import Set if __name__ == '__main__': sReportDirectory = sys.argv[1] listReport = [] utility.findDesiredFiles(sReportDirectory, listReport, 'sample.count.obj') iCount = 0 for report in listReport: print report l = utility.loadObject(report) print l iCount += l[0] print len(listReport), iCount, iCount*1.0/len(listReport) #exit(0)
def print_rank(finalResult, badDict): rank = 0 for (key, value) in sorted(finalResult.iteritems(), key=lambda d: d[1], reverse=True): strTmp = key.split('_') print strTmp[0], strTmp[1], badDict[int(strTmp[0], 16)], value rank = rank + 1 if rank == 100: break if __name__ == '__main__': setCallSite = utility.loadObject(sys.argv[1]) sBadDirectory = sys.argv[2] sGoodDirectory = sys.argv[3] dictCurrent = {} iBadRun = importBadReport(sBadDirectory, dictCurrent, setCallSite) importGoodReport(sGoodDirectory, dictCurrent, iBadRun) count = 0 for key in dictCurrent: for num in range(0, 6): if dictCurrent[key][num] > 0: count += 1 print 'total predicate:', len(setCallSite) * 6 print len(dictCurrent) * 6
dictTmp = utility.loadObject(sFile) addGoodSample( dictTmp, dictCurrent ) iGoodRun = iGoodRun + 1 def print_rank(finalResult, badDict): rank = 0 for (key, value) in sorted(finalResult.iteritems(), key = lambda d:d[1], reverse = True ): strTmp = key.split('_') print strTmp[0], strTmp[1], badDict[int(strTmp[0], 16)], value rank = rank + 1 if rank == 100: break if __name__ == '__main__': setCallSite = utility.loadObject(sys.argv[1]) sBadDirectory = sys.argv[2] sGoodDirectory = sys.argv[3] dictCurrent = {} iBadRun = importBadReport(sBadDirectory, dictCurrent, setCallSite) importGoodReport(sGoodDirectory, dictCurrent, iBadRun) count = 0 for key in dictCurrent: for num in range(0,6): if dictCurrent[key][num] > 0: count += 1 print 'total predicate:', len(setCallSite) * 6 print len(dictCurrent) * 6
def __init__(self, videoPath): self.videoPath = videoPath SIFTfeatures = [] # imageNames = [] # name of each image # Read in video frames for item in os.listdir(videoPath): imagePath = videoPath +"/"+ item locations, features = sift.siftFeature(imagePath) SIFTfeatures.append(features) imageNames.append(item) # Histogramize each image imageHistograms = [] vocabulary = util.loadObject("data/voc.pkl") vocSize = len(vocabulary) for imageFeature in SIFTfeatures: imageFeature = util.normalizeSIFT(imageFeature) histogram = self.buildHistogram(imageFeature, vocabulary) imageHistograms.append(histogram) imageHistograms = np.array(imageHistograms) self.imageNames = imageNames self.imageHistograms = imageHistograms self.SIFTfeatures = SIFTfeatures # Cluster frames self.numOfFrames = len(imageNames) self.numOfCentriods = int(self.numOfFrames / 10) kmeans = KMeans(init="k-means++", n_clusters=self.numOfCentriods, n_init=10) kmeans.fit(self.imageHistograms) cluster_centroids = kmeans.cluster_centers_ # Get components of each cluter codes, distance = vq(self.imageHistograms, cluster_centroids) dict = {} indice = 0 for code in codes: keys = dict.keys() if str(code) in keys: dict[str(code)].append(indice) else: dict[str(code)] = [] dict[str(code)].append(indice) indice += 1 # stack all SIFT features to perform PCA stackOfSIFTfeatures = SIFTfeatures[0] for eachFeature in SIFTfeatures[1:]: stackOfSIFTfeatures = np.vstack((stackOfSIFTfeatures, eachFeature)) V,S, mean = pca.pca(stackOfSIFTfeatures) self.V = V # Perform near duplicate within each cluster KEYFRAMES = [] keys = dict.keys() for key in keys: cluster = dict[key] clusterFeatures = [] for i in cluster: clusterFeatures.append(self.SIFTfeatures[i]) potentialKeyFrames = self.identifyKeyFrame(clusterFeatures, cluster) KEYFRAMES += potentialKeyFrames print str(cluster) +": "+ str(potentialKeyFrames) self.keyFrames = KEYFRAMES compressedHistogram = self.imageHistograms[KEYFRAMES[0]] compressedImageName = [self.imageNames[KEYFRAMES[0]]] for keyframe in KEYFRAMES[1:]: compressedHistogram = np.vstack((compressedHistogram, self.imageHistograms[keyframe])) compressedImageName.append(self.imageNames[keyframe]) self.compressedHistogram = compressedHistogram self.compressedImageName = compressedImageName